/year=yyyy/month=mm/day=dd/hour=hh/
파티션, S3 데이터 JSON
CREATE EXTERNAL TABLE carrysb (
`timestamp` timestamp,
`data` string
)
PARTITIONED BY (
year int,
month int,
day int,
hour int
)
ROW FORMAT SERDE
'org.openx.data.jsonserde.JsonSerDe'
WITH SERDEPROPERTIES (
'case.insensitive'='TRUE',
'dots.in.keys'='FALSE',
'ignore.malformed.json'='FALSE',
'mapping'='TRUE')
STORED AS INPUTFORMAT
'org.apache.hadoop.mapred.TextInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION 's3://sksmsansdj/'
TBLPROPERTIES (
'skip.header.line.count'='1',
'projection.enabled'='true',
'projection.day.digits'='2',
'projection.day.range'='01,31',
'projection.day.type'='integer',
'projection.month.digits'='2',
'projection.month.range'='01,12',
'projection.month.type'='integer',
'projection.year.digits'='4',
'projection.year.range'='2022,2023',
'projection.year.type'='integer',
'projection.hour.digits'='2',
'projection.hour.range'='01,23',
'projection.hour.type'='integer',
"storage.location.template" = "s3://sksmsansdj/year=${year}/month=${month}/day=${day}/hour=${hour}"
);
/year=yyyy/month=mm/day=dd/hour=hh/
파티션, S3 데이터 parquet
CREATE EXTERNAL TABLE onevsall (
`timestamp` timestamp,
`data` string
)
PARTITIONED BY (
year int,
month int,
day int,
hour int
)
ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
LOCATION 's3://sksmsansdj/'
TBLPROPERTIES (
'projection.enabled'='true',
'projection.day.digits'='2',
'projection.day.range'='01,31',
'projection.day.type'='integer',
'projection.month.digits'='2',
'projection.month.range'='01,12',
'projection.month.type'='integer',
'projection.year.digits'='4',
'projection.year.range'='2022,2023',
'projection.year.type'='integer',
'projection.hour.digits'='2',
'projection.hour.range'='01,23',
'projection.hour.type'='integer',
"storage.location.template" = "s3://sksmsansdj/year=${year}/month=${month}/day=${day}/hour=${hour}"
);
STRING 형이 있을 때
CREATE EXTERNAL TABLE IF NOT EXISTS `z` (
`time` string,
status_code string,
request_ip string,
method string,
`path` string
)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
WITH SERDEPROPERTIES (
'ignore.malformed.json' = 'FALSE',
'dots.in.keys' = 'FALSE',
'case.insensitive' = 'TRUE',
'mapping' = 'TRUE'
)
STORED AS INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION 's3://sksmsansdj/'
TBLPROPERTIES (
'classification' = 'json',
'projection.enabled'='true',
'projection.day.range'='01,31',
'projection.day.type'='integer',
'projection.month.range'='01,12',
'projection.month.type'='integer',
'projection.year.range'='2022,2023',
'projection.year.type'='integer',
'projection.hour.range'='01,23',
'projection.hour.type'='integer',
'projection.method_part.type'='enum',
'projection.method_part.values'='GET,POST,PUT,DELETE,HEAD,TRACE,OPTION,PATCH',
"storage.location.template" = "s3://sksmsansdj/${month}/${day}/${year}/${method_part}"
);
PARQUET + STRING
CREATE EXTERNAL TABLE app_log (
`time` double,
status_code string,
request_ip string,
method string,
`path` string
)
PARTITIONED BY (
month int,
day int,
year int,
method_part string
)
ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
LOCATION 's3://sksmsansdj/'
TBLPROPERTIES (
'projection.enabled'='true',
'projection.day.digits'='2',
'projection.day.range'='01,31',
'projection.day.type'='integer',
'projection.month.range'='01,12',
'projection.month.type'='integer',
'projection.year.range'='2022,2023',
'projection.year.type'='integer',
'projection.hour.range'='01,23',
'projection.hour.type'='integer',
'projection.method_part.type'='enum',
'projection.method_part.values'='GET,POST,PUT,PATCH,DELETE,HEAD,OPTION,TRACE',
"storage.location.template" = "s3://sksmsansdj/output/${month}/${day}/${year}/${method_part}"
);