Untitled

create external table data_table (
key string
,fare_amount int
,pickup_datetime string
,pickup_longitude int
,pickup_latitude int
,dropoff_longitude int
,dropoff_latitude int
,passenger_count int)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ','                
LINES TERMINATED BY '\\n'
STORED AS TEXTFILE
LOCATION '/user/split_dataset'
TBLPROPERTIES ('skip.header.line.count'='1')
;
-----------------------------------------
create table partition_table (
fare_amount float
,pickup_datetime string 
,pickup_longitude float
,pickup_latitude float
,dropoff_longitude float
,dropoff_latitude float
,passenger_count float
)
PARTITIONED BY(key string)
ROW FORMAT DELIMITED
FIELDS TERMINATED BY ','
LINES TERMINATED BY '\\n'
STORED AS TEXTFILE
;
-------------------------------------------------------------
INSERT overwrite TABLE partition_table PARTITION(key)
SELECT
fare_amount
,from_unixtime(unix_timestamp(pickup_datetime, 'yyyy-MM-dd HH:mm:ss'))
,pickup_longitude
,pickup_latitude
,dropoff_longitude
,dropoff_latitude
,passenger_count
,SUBSTRING(key,1,7) AS key
FROM data_table;

----------------------------------------user---------------------

set hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nonstrict;     
set hive.exec.max.dynamic.partitions=1000;          
set hive.exec.max.dynamic.partitions.pernode=1000;  
set hive.support.concurrency = true; 

<name>dfs.datanode.data.dir</name>
        <value>/usr/local/hadoop/data/datanode</value>               

Untitled

Untitled