1. orc+snappy
create table if not exists dwd.dwd_event_detail(
visit_user_id string,
session_id string,
user_id string,
event string,
app_version string,
platform string
)
comment 'event明细表'
partitioned by (dt string)
row format delimited fields terminated by '\t'
stored as orc
tblproperties ("orc.compress"="SNAPPY");
2. 读写hdfs上json文件,使用外部表
create external table if not exists ods.ods_backup_log(doc string)
location '存储地址';
3. 读取hdfs上txt文件数据,按照’,‘分割
create external table if not exists log.log_search_log(
user_id string,
keyword string,
channel string,
app_type string,
hotkey_set string,
result_num string,
time_stamp string,
created_at string
)
ROW FORMAT SERDE 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
WITH SERDEPROPERTIES ("separatorChar" = ",")
location '/data/day/search/'
;
4. hive读取HBase数据
CREATE EXTERNAL TABLE src.src_study_log (key string,
b1_course_id string,
b1_lesson_id string,
b1_play_time bigint,
b1_play_from string,
b1_play_position string,
b1_play_source string,
b1_user_id string
)
STORED BY 'org.apache.hadoop.hive.hbase.HBaseStorageHandler'
WITH SERDEPROPERTIES ("hbase.columns.mapping" = ":key,
basic_info:courseId,
basic_info:lessonId,
basic_info:playTime#b,
basic_info:playFrom,
basic_info:playPosition,
basic_info:playSource,
basic_info:userId
")
TBLPROPERTIES("hbase.table.name" = "test_model");
-- select * from dwd_play_model limit 10;
-- drop table dwd.dwd_play_model;
备注
1. 外部表数据存储位置由hdfs管理,存储位置可以由自己制定存在hdfs上任一目录,内部表默认存在在/user/hive/warehouse/
删除外部表,仅仅删除元数据,hdfs上的文件并不会被删除;
删除内部表,直接删除元数据和存储数据。
2. TBLPROPERTIES用途:向表中添加自定义或预定义的元数据属性