
create external table data_table (
key string
,fare_amount int
,pickup_datetime string
,pickup_longitude int
,pickup_latitude int
,dropoff_longitude int
,dropoff_latitude int
,passenger_count int)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ','
LINES TERMINATED BY '\\n'
STORED AS TEXTFILE
LOCATION '/user/split_dataset'
TBLPROPERTIES ('skip.header.line.count'='1')
;
-----------------------------------------
create table partition_table (
fare_amount float
,pickup_datetime string
,pickup_longitude float
,pickup_latitude float
,dropoff_longitude float
,dropoff_latitude float
,passenger_count float
)
PARTITIONED BY(key string)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ','
LINES TERMINATED BY '\\n'
STORED AS TEXTFILE
;
-------------------------------------------------------------
INSERT overwrite TABLE partition_table PARTITION(key)
SELECT
fare_amount
,from_unixtime(unix_timestamp(pickup_datetime, 'yyyy-MM-dd HH:mm:ss'))
,pickup_longitude
,pickup_latitude
,dropoff_longitude
,dropoff_latitude
,passenger_count
,SUBSTRING(key,1,7) AS key
FROM data_table;
----------------------------------------user---------------------
set hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nonstrict;
set hive.exec.max.dynamic.partitions=1000;
set hive.exec.max.dynamic.partitions.pernode=1000;
set hive.support.concurrency = true;
<name>dfs.datanode.data.dir</name>
<value>/usr/local/hadoop/data/datanode</value>

- 외부 테이블&동적 파티션을 만들고 insert 결과
- key는 연월만 사용 →
