https://bosungtea9416.tistory.com/entry/AWS-ALB-Access-Log-활성화-및-분석하기

{
  "Version": "2012-10-17",
  "Statement": [
    {
      "Effect": "Allow",
      "Principal": {
        "AWS": "arn:aws:iam::600734575887:root"
      },
      "Action": "s3:PutObject",
      "Resource": "arn:aws:s3:::<버킷명>/*"
    }
  ]
}

https://inpa.tistory.com/entry/AWS-📚-ELBALB-로그-활성화-S3에-로그-저장하기

s3://sksmsansdj/logs/alb/AWSLogs/511256179815/elasticloadbalancing/ap-northeast-2/2023/10/08/

s3://<버킷명>/logs/alb/AWSLogs/<ELB_AWS_ID>/elasticloadbalancing/<리전>/yyyy/mm/dd/

[type] [time] [elb] [client:port] [target:port] [request_processing_time] [target_processing_time] [response_processing_time] [elb_status_code] [target_status_code] [received_bytes] [sent_bytes] ["request"] ["user_agent"] [ssl_cipher] [ssl_protocol] [target_group_arn] ["trace_id"] ["domain_name"] ["chosen_cert_arn"] [matched_rule_priority] [request_creation_time] ["actions_executed"] ["redirect_url"] ["error_reason"] ["target:port_list"] ["target_status_code_list"] ["classification"] ["classification_reason"]

활성화 후 s3://sksmsansdj/logs/alb/AWSLogs/511256179815/ 에 생성된 ELBAccessLogTestFile 을 삭제한다.

테이블을 생성한다.

CREATE EXTERNAL TABLE IF NOT EXISTS alb_logs (
            type string,
            time string,
            elb string,
            client_ip string,
            client_port int,
            target_ip string,
            target_port int,
            request_processing_time double,
            target_processing_time double,
            response_processing_time double,
            elb_status_code int,
            target_status_code string,
            received_bytes bigint,
            sent_bytes bigint,
            request_verb string,
            request_url string,
            request_proto string,
            user_agent string,
            ssl_cipher string,
            ssl_protocol string,
            target_group_arn string,
            trace_id string,
            domain_name string,
            chosen_cert_arn string,
            matched_rule_priority string,
            request_creation_time string,
            actions_executed string,
            redirect_url string,
            lambda_error_reason string,
            target_port_list string,
            target_status_code_list string,
            classification string,
            classification_reason string
            )
            PARTITIONED BY
            (
              year int,
              month int,
              day int
            )
            ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.RegexSerDe'
            WITH SERDEPROPERTIES (
            'serialization.format' = '1',
            'input.regex' = 
        '([^ ]*) ([^ ]*) ([^ ]*) ([^ ]*):([0-9]*) ([^ ]*)[:-]([0-9]*) ([-.0-9]*) ([-.0-9]*) ([-.0-9]*) (|[-0-9]*) (-|[-0-9]*) ([-0-9]*) ([-0-9]*) \\"([^ ]*) (.*) (- |[^ ]*)\\" \\"([^\\"]*)\\" ([A-Z0-9-_]+) ([A-Za-z0-9.-]*) ([^ ]*) \\"([^\\"]*)\\" \\"([^\\"]*)\\" \\"([^\\"]*)\\" ([-.0-9]*) ([^ ]*) \\"([^\\"]*)\\" \\"([^\\"]*)\\" \\"([^ ]*)\\" \\"([^\\s]+?)\\" \\"([^\\s]+)\\" \\"([^ ]*)\\" \\"([^ ]*)\\"')
            LOCATION 's3://sksmsansdj/logs/alb/AWSLogs/511256179815/elasticloadbalancing/ap-northeast-2/'
            TBLPROPERTIES
            (
             "projection.enabled" = "true",
             'projection.day.digits'='2',
             'projection.day.range'='01,31',
             'projection.day.type'='integer',
             'projection.month.digits'='2',
             'projection.month.range'='01,12',
             'projection.month.type'='integer',
             'projection.year.digits'='4',
             'projection.year.range'='2022,2023', 
             'projection.year.type'='integer', 
             'projection.hour.digits'='2', 
             'projection.hour.range'='01,23',
             'projection.hour.type'='integer',
             "storage.location.template" = "s3://sksmsansdj/logs/alb/AWSLogs/511256179815/elasticloadbalancing/ap-northeast-2/${year}/${month}/${day}"
            )