hive shell
建立外表,指定目錄
CREATE EXTERNAL TABLE IF NOT EXISTS songs2 (
sid string,
aid string,
ptime string,
sinit int,
language int,
gender int)
COMMENT 'This is the staging page view table'
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
LOCATION '/bs/music/songs/';
導入HDFS檔案(原檔案消失)
LOAD DATA INPATH '/bs/music/input/mars_tianchi_songs.csv' OVERWRITE INTO TABLE songs2;
檢視前10條資料
select * from songs2 limit 10;
建立外表,指定目錄
CREATE EXTERNAL TABLE IF NOT EXISTS useraction (
uid string,
sid string,
btime string,
atype int,
ds string)
COMMENT 'This is the staging page view table'
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
LOCATION '/bs/music/useraction/';
導入HDFS檔案(原檔案消失)
LOAD DATA INPATH '/bs/music/input/mars_tianchi_user_actions.csv' OVERWRITE INTO TABLE useraction;
select * from useraction limit 10;
表連接配接 小表在前
SELECT t1.*,t2.* FROM songs2 t1 JOIN useraction t2 on t1.sid=t2.sid;
Map join連接配接
SELECT t1.*,t2.* FROM songs2 t1 JOIN useraction t2 on t1.sid=t2.sid;
導出查詢資料到hdfs
INSERT OVERWRITE DIRECTORY '/bs/music/data'
SELECT t1.*,t2.* FROM songs2 t1 JOIN useraction t2 on t1.sid=t2.sid;
查詢結果儲存到表
CREATE EXTERNAL TABLE IF NOT EXISTS usersongs (
sid string,
aid string,
ptime string,
sinit int,
language int,
gender int,
uid string,
sid2 string,
btime string,
atype int,
ds string)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
LOCATION '/bs/music/data/';
INSERT OVERWRITE TABLE usersongs
SELECT t1.*,t2.* FROM songs2 t1 JOIN useraction t2 on t1.sid=t2.sid;
查詢結果儲存到本地
hive -e "select * from usersongs limit 10" >> /opt/tools/test.csv
檢視HDFS上檔案的前5行
hadoop fs -text /bs/music/data/000000_0 |head -n 5
去重分組查詢 同一aid的uid去重總量,sid的去重總量
select count(distinct uid),count(distinct sid),aid from usersongs where atype=1 group by aid;