greenplum 分区表,索引
greenplum 分区按照类型可以分为
#列表分表
create table gh_par_list(
id1 integer,
id2 varchar(10))
distributed by (id1)
partition by list(id2)
(
partition p1 values ('1','2') tablespace ts_gh,
partition p2 values ('3','0')
tablespace ts_gh,
default partition pd
tablespace ts_gh
);
#范围分区
create table gh_par_range
(id1 integer,
id2 varchar(10),
id3 date)
distributed by (id1)
partition by range(id3)
(partition p1 start ('2011-01-01'::date) end ('2012-01-01'::date) tablespace ts_gh,
partition p2 start('2020-01-01'::date) end ('2021-01-01'::date) tablespace ts_gh);
##简便分区
create table gh_par_range_every
partition by range(id3)
(partition p2011 start ('2011-01-01'::date) end ('2030-01-01'::date) every ('1 year'::interval) tablespace ts_gh);
#表和分区的联系
tutorial=> \d pg_partition;
Table "pg_catalog.pg_partition"
Column | Type | Modifiers
---------------+------------+-----------
parrelid | oid | not null
parkind | "char" | not null
parlevel | smallint | not null
paristemplate | boolean | not null
parnatts | smallint | not null
paratts | int2vector | not null
parclass | oidvector | not null
Indexes:
"pg_partition_oid_index" UNIQUE, btree (oid)
"pg_partition_parrelid_index" btree (parrelid)
"pg_partition_parrelid_parlevel_istemplate_index" btree (parrelid, parlevel, paristemplate)
tutorial=>
**pg_partition中的parrelid关联pg_class的oid**
tutorial=> \d pg_partition_rule;
Table "pg_catalog.pg_partition_rule"
Column | Type | Modifiers
-------------------+----------+-----------
paroid | oid | not null
parchildrelid | oid | not null
parparentrule | oid | not null
parname | name | not null
parisdefault | boolean | not null
parruleord | smallint | not null
parrangestartincl | boolean | not null
parrangeendincl | boolean | not null
parrangestart | text |
parrangeend | text |
parrangeevery | text |
parlistvalues | text |
parreloptions | text[] |
partemplatespace | oid |
"pg_partition_rule_oid_index" UNIQUE, btree (oid)
"pg_partition_rule_parchildrelid_index" btree (parchildrelid)
"pg_partition_rule_parchildrelid_parparentrule_parruleord_index" btree (parchildrelid, parp
arentrule, parruleord)
"pg_partition_rule_paroid_parentrule_ruleord_index" btree (paroid, parparentrule, parruleor
d)
**pg_partition_rule中的paroid关联pg_partition的oid**
##创建视图直接查询
create view vw_partition as
select pp.parrelid tableoid ,prl.parchildrelid,prl.parname as partitionname
from pg_partition pp,pg_partition_rule prl
where pp.paristemplate=false and prl.paroid=pp.oid;
select * from vw_partition t where tableoid='public.gh_par_range'::regclass;
#分区操作
##增加
alter table gh_par_range
add partition p3 start ('1991-01-01'::date) end ('1992-01-01'::date);
##删除
alter table gh_par_range drop partition p3;
##清空
alter table gh_par_range truncate partition p3;
##分离
alter table gh_par_range
split partition p3
at ('1991-07-01'::date)
into (partition p4,partition p5);
##交换
2016-12-01
greenplum 支持对表进行分区
分区表
逻辑上的一个大表分割为物理上的几块
greenplum 来自于 postgresql
postgresql 创建分区表 步骤
1)创建主表,所有分区都从它继承
2)创建几个子表,每个都从主表上继承
3)为分区表增加约束,定义每个分区允许的键值
4)对于每个分区,在关键字字段上创建一个索引,以及其他想创建的索引。
5)定义一个规则或触发器,把对主表的修改重定向到合适的分区表
greenplum 来说,分区表的实现原理与上面介绍的一样
分区表目前支持范围分区和列表分区
范围分区
创建范围分区
tutorial=> create table gh_partition_range(id int,name varchar(32),dw_end_date date)
tutorial-> distributed by (id)
tutorial-> partition by range (dw_end_date)
tutorial-> (
tutorial(> partition p2015 start ('2015-01-01'::date) end ('2016-01-01'::date),
tutorial(> partition p2016 start ('2016-01-01'::date) end ('2017-01-01'::date)
tutorial(> );
NOTICE: CREATE TABLE will create partition "gh_partition_range_1_prt_p2015" for table "gh_partitio
n_range"NOTICE: CREATE TABLE will create partition "gh_partition_range_1_prt_p2016" for table "gh_partitio
n_range"CREATE TABLE
插入数据
tutorial=> insert into gh_partition_range values(1,23,'2015-01-01'::date);
INSERT 0 1
tutorial=> select * from gh_partition_range;
id | name | dw_end_date
----+------+-------------
1 | 23 | 2015-01-01
(1 row)
tutorial=> insert into gh_partition_range values(1,23,'2016-01-01'::date);
插入数值大于分区范围报错
tutorial=> insert into gh_partition_range values(1,23,'2018-01-01'::date);
ERROR: no partition for partitioning key (seg0 slave1:40000 pid=1875)
查询数据
1 | 23 | 2016-01-01
(2 rows)
查询分区数据
tutorial=> select * from gh_partition_range_1_prt_p2015;
tutorial=> select * from gh_partition_range_1_prt_p2016;
添加分区
tutorial=> alter table gh_partition_range add partition p2017 start ('2017-01-01'::date) end ('2018-01-01'::date);
NOTICE: CREATE TABLE will create partition "gh_partition_range_1_prt_p2017" for table "gh_partitio
n_range"ALTER TABLE
tutorial=> insert into gh_partition_range values(1,23,'2017-01-01'::date);
tutorial=>
tutorial=> select * from gh_partition_range_1_prt_p2017;
1 | 23 | 2017-01-01
删除分区
tutorial=> alter table gh_partition_range drop partition p2017;
ALTER TABLE
清空分区
tutorial=> alter table gh_partition_range truncate partition p2015;
分离分区
tutorial=> alter table gh_partition_range split partition p2016 at (('2016-06-01'::date)) into (partition p2016s, partition p2016x);
NOTICE: exchanged partition "p2016" of relation "gh_partition_range" with relation "pg_temp_85849"
NOTICE: dropped partition "p2016" for relation "gh_partition_range"
NOTICE: CREATE TABLE will create partition "gh_partition_range_1_prt_p2016s" for table "gh_partiti
on_range"NOTICE: CREATE TABLE will create partition "gh_partition_range_1_prt_p2016x" for table "gh_partiti
on_range"ALTER TABLE
1 | 25 | 2016-01-01
1 | 25 | 2016-02-01
1 | 25 | 2016-06-01
1 | 25 | 2016-07-01
(5 rows)
tutorial=> select * from gh_partition_range_1_prt_p2016s;
(3 rows)
tutorial=> select * from gh_partition_range_1_prt_p2016x;
交换分区(表字段名称,顺序,类型,字段长度必须一致,就像一张另外自己的表,且交换分区,一张表不能为分区表)
tutorial=> create table gh_one_partition (id int,name varchar(35),dw_date date)
partition by range (dw_date) (partition p2015 start ('2015-01-01'::date) end ('2016-01-01'::date), partition p2016 start ('2016-01-01'::date) end ('2017-01-01'::date));NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'id' as the Greenplum Dat
abase data distribution key for this table.HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen
are the optimal data distribution key to minimize skew.NOTICE: CREATE TABLE will create partition "gh_one_partition_1_prt_p2015" for table "gh_one_partit
ion"NOTICE: CREATE TABLE will create partition "gh_one_partition_1_prt_p2016" for table "gh_one_partit
ion"CREATE TABLE
tutorial=> insert into gh_one_partition values(1,26,'2015-01-01'::date);
tutorial=> insert into gh_one_partition values(1,26,'2015-02-01'::date);
tutorial=> select * from gh_one_partition;
id | name | dw_date
----+------+------------
1 | 26 | 2015-01-01
1 | 26 | 2015-02-01
(0 rows)
tutorial=> insert into gh_partition_range_1_prt_p2015 values(1,21,'2015-01-01'::date);
1 | 21 | 2015-01-01
tutorial=> select * from gh_one_partition_1_prt_p2015;
tutorial=> alter table gh_partition_range exchange partition p2015 with table gh_one_partition;
ERROR: relation "gh_one_partition" must have the same column names and column order as "gh_partition_range"
tutorial=> ALTER TABLE public.gh_one_partition RENAME dw_date TO dw_end_date;
ERROR: cannot EXCHANGE table "gh_one_partition" as it has child table(s)
tutorial=> alter table gh_one_partition drop partition p2016;
tutorial=> create table gh_one_partition2 as select * from gh_one_partition;
NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'id' as the Greenplum
Database data distribution key for this table.HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen
are the optimal data distribution key to minimize skew.SELECT 2
tutorial=> alter table gh_partition_range exchange partition p2015 with table gh_one_partition2;
ERROR: child table "gh_one_partition2" has different type for column "name"
tutorial=> select * from gh_one_partition2;
tutorial=> truncate tabel gh_one_partition2;
ERROR: syntax error at or near "gh_one_partition2"
LINE 1: truncate tabel gh_one_partition2;
^
tutorial=> truncate table gh_one_partition2;
TRUNCATE TABLE
范围分区每个间隔都要写,可以简化写法
every
tutorial=> create table gh_partition_every (id int,name varchar(32),dw_end_date date)
distributed by (id) partition by range (dw_end_date) ( partition p2015 start ('2015-01-01'::date) end ('2019-01-01'::date) every ('1 years'::interval) );
NOTICE: CREATE TABLE will create partition "gh_partition_every_1_prt_p2015_1" for table "gh_partit
ion_every"NOTICE: CREATE TABLE will create partition "gh_partition_every_1_prt_p2015_2" for table "gh_partit
ion_every"NOTICE: CREATE TABLE will create partition "gh_partition_every_1_prt_p2015_3" for table "gh_partit
ion_every"NOTICE: CREATE TABLE will create partition "gh_partition_every_1_prt_p2015_4" for table "gh_partit
ion_every"CREATE TABLE
tutorial=> insert into gh_partition_every values(1,1,'2018-01-01'::date);
tutorial=> insert into gh_partition_every values(1,1,'2017-01-01'::date);
tutorial=> insert into gh_partition_every values(1,1,'2016-01-01'::date);
tutorial=> insert into gh_partition_every values(1,1,'2015-01-01'::date);
tutorial=> insert into gh_partition_every values(1,1,'2014-01-01'::date);
ERROR: no partition for partitioning key (seg0 slave1:40000 pid=2464)
tutorial=> insert into gh_partition_every values(1,1,'2019-01-01'::date);
tutorial=> select * from gh_partition_every;
1 | 1 | 2015-01-01
1 | 1 | 2016-01-01
1 | 1 | 2017-01-01
1 | 1 | 2018-01-01
(4 rows)
list分区
tutorial=> create table gh_partition_list(id int,name varchar(32),status varchar(10))
tutorial-> partition by list (status)
tutorial(> partition p_list_1 values ('weifang','qingdao'),
tutorial(> partition p_list_2 values ('taiyuan','datong'),
tutorial(> default partition other_city);
NOTICE: CREATE TABLE will create partition "gh_partition_list_1_prt_p_list_1" for table "gh_partit
ion_list"NOTICE: CREATE TABLE will create partition "gh_partition_list_1_prt_p_list_2" for table "gh_partit
ion_list"NOTICE: CREATE TABLE will create partition "gh_partition_list_1_prt_other_city" for table "gh_part
ition_list"CREATE TABLE
tutorial=> insert into gh_partition_list values (1,1,'shanghai'),(2,2,'weifang');
INSERT 0 2
tutorial=> select * from gh_partition_list_1_prt_p_list_1;
id | name | status
----+------+---------
2 | 2 | weifang
tutorial=> select * from gh_partition_list_1_prt_p_list_2;
id | name | status
----+------+--------
tutorial=> select * from gh_partition_list_1_prt_other_city ;
id | name | status
----+------+----------
1 | 1 | shanghai
分区表参数
pg_partition:分区主表
Column | Type | Modifiers
parrelid | oid | not null
parkind | "char" | not null
parlevel | smallint | not null
paristemplate | boolean | not null
parnatts | smallint | not null
paratts | int2vector | not null
parclass | oidvector | not null
tutorial=> select parrelid,parkind from pg_partition;
parrelid | parkind
----------+---------
85849 | r
86189 | r
86445 | r
86613 | l
tutorial=> select relname from pg_class where oid ='public.gh_partition_range'::regclass;
relname
--------------------
gh_partition_range
tutorial=> select relname from pg_class where oid =85849;
Table "pg_catalog.pg_partition_rule"
Column | Type | Modifiers
paroid | oid | not null
parchildrelid | oid | not null
parparentrule | oid | not null
parname | name | not null
parisdefault | boolean | not null
parruleord | smallint | not null
parrangestartincl | boolean | not null
parrangeendincl | boolean | not null
parrangestart | text |
parrangeend | text |
parrangeevery | text |
parlistvalues | text |
parreloptions | text[] |
partemplatespace | oid |
"pg_partition_rule_parchildrelid_parparentrule_parruleord_index" btree (parchildrelid, parparen
trule, parruleord) "pg_partition_rule_paroid_parentrule_ruleord_index" btree (paroid, parparentrule, parruleord)
tutorial=> select paroid, parchildrelid , parname from pg_partition_rule ;
paroid | parchildrelid | parname
--------+---------------+------------
85920 | 86106 | p2016s
85920 | 86153 | p2016x
86260 | 86212 | p2015
85920 | 85872 | p2015
86564 | 86468 | p2015_1
86564 | 86492 | p2015_2
86564 | 86516 | p2015_3
86564 | 86540 | p2015_4
86707 | 86636 | p_list_1
86707 | 86660 | p_list_2
86707 | 86684 | other_city
(11 rows)
tutorial=> select relname from pg_class where oid=86106;
relname
---------------------------------
gh_partition_range_1_prt_p2016s
范围分区支持max吗?
目前没有sql语法,无法测试,不过按照语句start end 应该不行
范围分区支持split吗?
范围分区支持split
范围分区各个分区索引可以不同吗?
Command: CREATE INDEX
Description: define a new index
Syntax:
CREATE [UNIQUE] INDEX name ON table
[USING btree|bitmap|gist]
( {column | (expression)} [opclass] [, ...] )
[ WITH ( FILLFACTOR = value ) ]
[TABLESPACE tablespace]
[WHERE predicate]
查询创建索引语句没有所谓全局索引,本地索引的创建语句
删除分区会对其他分区的索引有影响吗?
既然为本地索引,那么就可以认为删除分区对其他分区索引无影响