一般而言,存储格式分为列式存储和行存储,对于hadoop来时,列式存储有parquet,rcfile,orcfile等,行存储有SequenceFile,MapFile,Avro Datafile等
hive中的使用
1.orc
create table test_orc(
...
)
PARTITIONED BY (day int )
STORED AS ORC
LOCATION '/test/test_orc/'
tblproperties ("orc.compress"="SNAPPY");
默认为tblproperties(“orc.compress”=”ZLIB”);
2.parquet
create table test_parquet(
...
)
PARTITIONED BY (day int )
STORED AS parquet
LOCATION '/test/test_parquet/'
;
CREATE TABLE … STORED AS ORC
ALTER TABLE … SET FILEFORMAT ORC
SET hive.default.fileformat=ORC
参考:
http://blog.csdn.net/bingduanlbd/article/details/52088520
https://www.cnblogs.com/zhenjing/archive/2012/11/02/File-Format.html