예제

/year=yyyy/month=mm/day=dd/hour=hh/ 파티션, S3 데이터 JSON

CREATE EXTERNAL TABLE carrysb (
  `timestamp` timestamp,
  `data` string
)
PARTITIONED BY (
  year int,
  month int,
  day int,
  hour int 
)
ROW FORMAT SERDE 
  'org.openx.data.jsonserde.JsonSerDe' 
WITH SERDEPROPERTIES ( 
  'case.insensitive'='TRUE', 
  'dots.in.keys'='FALSE', 
  'ignore.malformed.json'='FALSE', 
  'mapping'='TRUE') 
STORED AS INPUTFORMAT 
  'org.apache.hadoop.mapred.TextInputFormat' 
OUTPUTFORMAT 
  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION 's3://sksmsansdj/'
TBLPROPERTIES (
'skip.header.line.count'='1',
'projection.enabled'='true',
'projection.day.digits'='2',
'projection.day.range'='01,31',
'projection.day.type'='integer',
'projection.month.digits'='2',
'projection.month.range'='01,12',
'projection.month.type'='integer',
'projection.year.digits'='4',
'projection.year.range'='2022,2023', 
'projection.year.type'='integer', 
'projection.hour.digits'='2', 
'projection.hour.range'='01,23',
'projection.hour.type'='integer',
"storage.location.template" = "s3://sksmsansdj/year=${year}/month=${month}/day=${day}/hour=${hour}"
);

예제 2

/year=yyyy/month=mm/day=dd/hour=hh/ 파티션, S3 데이터 parquet

CREATE EXTERNAL TABLE onevsall (
  `timestamp` timestamp,
  `data` string
)
PARTITIONED BY (
  year int,
  month int,
  day int,
  hour int 
)
ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
LOCATION 's3://sksmsansdj/'
TBLPROPERTIES (
'projection.enabled'='true',
'projection.day.digits'='2',
'projection.day.range'='01,31',
'projection.day.type'='integer',
'projection.month.digits'='2',
'projection.month.range'='01,12',
'projection.month.type'='integer',
'projection.year.digits'='4',
'projection.year.range'='2022,2023', 
'projection.year.type'='integer', 
'projection.hour.digits'='2', 
'projection.hour.range'='01,23',
'projection.hour.type'='integer',
"storage.location.template" = "s3://sksmsansdj/year=${year}/month=${month}/day=${day}/hour=${hour}"
);

예제3

STRING 형이 있을 때

CREATE EXTERNAL TABLE IF NOT EXISTS `z` (
  `time` string,
  status_code string,
  request_ip string,
  method string,
  `path` string
)
ROW FORMAT SERDE 'org.openx.data.jsonserde.JsonSerDe'
WITH SERDEPROPERTIES (
  'ignore.malformed.json' = 'FALSE',
  'dots.in.keys' = 'FALSE',
  'case.insensitive' = 'TRUE',
  'mapping' = 'TRUE'
)
STORED AS INPUTFORMAT 'org.apache.hadoop.mapred.TextInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION 's3://sksmsansdj/'
TBLPROPERTIES (
'classification' = 'json',
'projection.enabled'='true',
'projection.day.range'='01,31',
'projection.day.type'='integer',
'projection.month.range'='01,12',
'projection.month.type'='integer',
'projection.year.range'='2022,2023', 
'projection.year.type'='integer', 
'projection.hour.range'='01,23',
'projection.hour.type'='integer',
'projection.method_part.type'='enum',
'projection.method_part.values'='GET,POST,PUT,DELETE,HEAD,TRACE,OPTION,PATCH',
"storage.location.template" = "s3://sksmsansdj/${month}/${day}/${year}/${method_part}"
);

예제4

PARQUET + STRING

CREATE EXTERNAL TABLE app_log (
  `time` double,
  status_code string,
  request_ip string,
  method string,
  `path` string
)
PARTITIONED BY (
  month int,
  day int,
  year int,
  method_part string
)
ROW FORMAT SERDE 'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
STORED AS INPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat' OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
LOCATION 's3://sksmsansdj/'
TBLPROPERTIES (
'projection.enabled'='true',
'projection.day.digits'='2',
'projection.day.range'='01,31',
'projection.day.type'='integer',
'projection.month.range'='01,12',
'projection.month.type'='integer',
'projection.year.range'='2022,2023', 
'projection.year.type'='integer', 
'projection.hour.range'='01,23',
'projection.hour.type'='integer',
'projection.method_part.type'='enum',
'projection.method_part.values'='GET,POST,PUT,PATCH,DELETE,HEAD,OPTION,TRACE',
"storage.location.template" = "s3://sksmsansdj/output/${month}/${day}/${year}/${method_part}"
);