greenplum partition table

greenplum 分区表，索引

greenplum 分区按照类型可以分为

#列表分表

create table gh_par_list(

id1 integer,

id2 varchar(10))

distributed by (id1)

partition by list(id2)

(

partition p1 values ('1','2') tablespace ts_gh,

partition p2 values ('3','0')

tablespace ts_gh,

default partition pd

tablespace ts_gh

);

#范围分区

create table gh_par_range

(id1 integer,

id2 varchar(10),

id3 date)

distributed by (id1)

partition by range(id3)

(partition p1 start ('2011-01-01'::date) end ('2012-01-01'::date) tablespace ts_gh,

partition p2 start('2020-01-01'::date) end ('2021-01-01'::date) tablespace ts_gh);

##简便分区

create table gh_par_range_every

partition by range(id3)

(partition p2011 start ('2011-01-01'::date) end ('2030-01-01'::date) every ('1 year'::interval) tablespace ts_gh);

#表和分区的联系

tutorial=> \d pg_partition;

Table "pg_catalog.pg_partition"

Column | Type | Modifiers

---------------+------------+-----------

parrelid | oid | not null

parkind | "char" | not null

parlevel | smallint | not null

paristemplate | boolean | not null

parnatts | smallint | not null

paratts | int2vector | not null

parclass | oidvector | not null

Indexes:

"pg_partition_oid_index" UNIQUE, btree (oid)

"pg_partition_parrelid_index" btree (parrelid)

"pg_partition_parrelid_parlevel_istemplate_index" btree (parrelid, parlevel, paristemplate)

tutorial=>

**pg_partition中的parrelid关联pg_class的oid**

tutorial=> \d pg_partition_rule;

Table "pg_catalog.pg_partition_rule"

Column | Type | Modifiers

-------------------+----------+-----------

paroid | oid | not null

parchildrelid | oid | not null

parparentrule | oid | not null

parname | name | not null

parisdefault | boolean | not null

parruleord | smallint | not null

parrangestartincl | boolean | not null

parrangeendincl | boolean | not null

parrangestart | text |

parrangeend | text |

parrangeevery | text |

parlistvalues | text |

parreloptions | text[] |

partemplatespace | oid |

"pg_partition_rule_oid_index" UNIQUE, btree (oid)

"pg_partition_rule_parchildrelid_index" btree (parchildrelid)

"pg_partition_rule_parchildrelid_parparentrule_parruleord_index" btree (parchildrelid, parp

arentrule, parruleord)

"pg_partition_rule_paroid_parentrule_ruleord_index" btree (paroid, parparentrule, parruleor

**pg_partition_rule中的paroid关联pg_partition的oid**

##创建视图直接查询

create view vw_partition as

select pp.parrelid tableoid ,prl.parchildrelid,prl.parname as partitionname

from pg_partition pp,pg_partition_rule prl

where pp.paristemplate=false and prl.paroid=pp.oid;

select * from vw_partition t where tableoid='public.gh_par_range'::regclass;

#分区操作

##增加

alter table gh_par_range

add partition p3 start ('1991-01-01'::date) end ('1992-01-01'::date);

##删除

alter table gh_par_range drop partition p3;

##清空

alter table gh_par_range truncate partition p3;

##分离

alter table gh_par_range

split partition p3

at ('1991-07-01'::date)

into (partition p4,partition p5);

##交换

2016-12-01

greenplum 支持对表进行分区

分区表

逻辑上的一个大表分割为物理上的几块

greenplum 来自于 postgresql

postgresql 创建分区表步骤

1）创建主表，所有分区都从它继承

2）创建几个子表，每个都从主表上继承

3）为分区表增加约束，定义每个分区允许的键值

4）对于每个分区，在关键字字段上创建一个索引，以及其他想创建的索引。

5）定义一个规则或触发器，把对主表的修改重定向到合适的分区表

greenplum 来说，分区表的实现原理与上面介绍的一样

分区表目前支持范围分区和列表分区

范围分区

创建范围分区

tutorial=> create table gh_partition_range(id int,name varchar(32),dw_end_date date)

tutorial-> distributed by (id)

tutorial-> partition by range (dw_end_date)

tutorial-> (

tutorial(> partition p2015 start ('2015-01-01'::date) end ('2016-01-01'::date),

tutorial(> partition p2016 start ('2016-01-01'::date) end ('2017-01-01'::date)

tutorial(> );

NOTICE: CREATE TABLE will create partition "gh_partition_range_1_prt_p2015" for table "gh_partitio

n_range"NOTICE: CREATE TABLE will create partition "gh_partition_range_1_prt_p2016" for table "gh_partitio

n_range"CREATE TABLE

插入数据

tutorial=> insert into gh_partition_range values(1,23,'2015-01-01'::date);

INSERT 0 1

tutorial=> select * from gh_partition_range;

id | name | dw_end_date

----+------+-------------

1 | 23 | 2015-01-01

(1 row)

tutorial=> insert into gh_partition_range values(1,23,'2016-01-01'::date);

插入数值大于分区范围报错

tutorial=> insert into gh_partition_range values(1,23,'2018-01-01'::date);

ERROR: no partition for partitioning key (seg0 slave1:40000 pid=1875)

查询数据

1 | 23 | 2016-01-01

(2 rows)

查询分区数据

tutorial=> select * from gh_partition_range_1_prt_p2015;

tutorial=> select * from gh_partition_range_1_prt_p2016;

添加分区

tutorial=> alter table gh_partition_range add partition p2017 start ('2017-01-01'::date) end ('2018-01-01'::date);

NOTICE: CREATE TABLE will create partition "gh_partition_range_1_prt_p2017" for table "gh_partitio

n_range"ALTER TABLE

tutorial=> insert into gh_partition_range values(1,23,'2017-01-01'::date);

tutorial=>

tutorial=> select * from gh_partition_range_1_prt_p2017;

1 | 23 | 2017-01-01

删除分区

tutorial=> alter table gh_partition_range drop partition p2017;

ALTER TABLE

清空分区

tutorial=> alter table gh_partition_range truncate partition p2015;

分离分区

tutorial=> alter table gh_partition_range split partition p2016 at (('2016-06-01'::date)) into (partition p2016s, partition p2016x);

NOTICE: exchanged partition "p2016" of relation "gh_partition_range" with relation "pg_temp_85849"

NOTICE: dropped partition "p2016" for relation "gh_partition_range"

NOTICE: CREATE TABLE will create partition "gh_partition_range_1_prt_p2016s" for table "gh_partiti

on_range"NOTICE: CREATE TABLE will create partition "gh_partition_range_1_prt_p2016x" for table "gh_partiti

on_range"ALTER TABLE

1 | 25 | 2016-01-01

1 | 25 | 2016-02-01

1 | 25 | 2016-06-01

1 | 25 | 2016-07-01

(5 rows)

tutorial=> select * from gh_partition_range_1_prt_p2016s;

(3 rows)

tutorial=> select * from gh_partition_range_1_prt_p2016x;

交换分区(表字段名称，顺序，类型，字段长度必须一致，就像一张另外自己的表，且交换分区，一张表不能为分区表)

tutorial=> create table gh_one_partition (id int,name varchar(35),dw_date date)

partition by range (dw_date) (partition p2015 start ('2015-01-01'::date) end ('2016-01-01'::date), partition p2016 start ('2016-01-01'::date) end ('2017-01-01'::date));NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column named 'id' as the Greenplum Dat

abase data distribution key for this table.HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen

are the optimal data distribution key to minimize skew.NOTICE: CREATE TABLE will create partition "gh_one_partition_1_prt_p2015" for table "gh_one_partit

ion"NOTICE: CREATE TABLE will create partition "gh_one_partition_1_prt_p2016" for table "gh_one_partit

ion"CREATE TABLE

tutorial=> insert into gh_one_partition values(1,26,'2015-01-01'::date);

tutorial=> insert into gh_one_partition values(1,26,'2015-02-01'::date);

tutorial=> select * from gh_one_partition;

id | name | dw_date

----+------+------------

1 | 26 | 2015-01-01

1 | 26 | 2015-02-01

(0 rows)

tutorial=> insert into gh_partition_range_1_prt_p2015 values(1,21,'2015-01-01'::date);

1 | 21 | 2015-01-01

tutorial=> select * from gh_one_partition_1_prt_p2015;

tutorial=> alter table gh_partition_range exchange partition p2015 with table gh_one_partition;

ERROR: relation "gh_one_partition" must have the same column names and column order as "gh_partition_range"

tutorial=> ALTER TABLE public.gh_one_partition RENAME dw_date TO dw_end_date;

ERROR: cannot EXCHANGE table "gh_one_partition" as it has child table(s)

tutorial=> alter table gh_one_partition drop partition p2016;

tutorial=> create table gh_one_partition2 as select * from gh_one_partition;

NOTICE: Table doesn't have 'DISTRIBUTED BY' clause -- Using column(s) named 'id' as the Greenplum

Database data distribution key for this table.HINT: The 'DISTRIBUTED BY' clause determines the distribution of data. Make sure column(s) chosen

are the optimal data distribution key to minimize skew.SELECT 2

tutorial=> alter table gh_partition_range exchange partition p2015 with table gh_one_partition2;

ERROR: child table "gh_one_partition2" has different type for column "name"

tutorial=> select * from gh_one_partition2;

tutorial=> truncate tabel gh_one_partition2;

ERROR: syntax error at or near "gh_one_partition2"

LINE 1: truncate tabel gh_one_partition2;

tutorial=> truncate table gh_one_partition2;

TRUNCATE TABLE

范围分区每个间隔都要写，可以简化写法

every

tutorial=> create table gh_partition_every (id int,name varchar(32),dw_end_date date)

distributed by (id) partition by range (dw_end_date) ( partition p2015 start ('2015-01-01'::date) end ('2019-01-01'::date) every ('1 years'::interval) );

NOTICE: CREATE TABLE will create partition "gh_partition_every_1_prt_p2015_1" for table "gh_partit

ion_every"NOTICE: CREATE TABLE will create partition "gh_partition_every_1_prt_p2015_2" for table "gh_partit

ion_every"NOTICE: CREATE TABLE will create partition "gh_partition_every_1_prt_p2015_3" for table "gh_partit

ion_every"NOTICE: CREATE TABLE will create partition "gh_partition_every_1_prt_p2015_4" for table "gh_partit

ion_every"CREATE TABLE

tutorial=> insert into gh_partition_every values(1,1,'2018-01-01'::date);

tutorial=> insert into gh_partition_every values(1,1,'2017-01-01'::date);

tutorial=> insert into gh_partition_every values(1,1,'2016-01-01'::date);

tutorial=> insert into gh_partition_every values(1,1,'2015-01-01'::date);

tutorial=> insert into gh_partition_every values(1,1,'2014-01-01'::date);

ERROR: no partition for partitioning key (seg0 slave1:40000 pid=2464)

tutorial=> insert into gh_partition_every values(1,1,'2019-01-01'::date);

tutorial=> select * from gh_partition_every;

1 | 1 | 2015-01-01

1 | 1 | 2016-01-01

1 | 1 | 2017-01-01

1 | 1 | 2018-01-01

(4 rows)

list分区

tutorial=> create table gh_partition_list(id int,name varchar(32),status varchar(10))

tutorial-> partition by list (status)

tutorial(> partition p_list_1 values ('weifang','qingdao'),

tutorial(> partition p_list_2 values ('taiyuan','datong'),

tutorial(> default partition other_city);

NOTICE: CREATE TABLE will create partition "gh_partition_list_1_prt_p_list_1" for table "gh_partit

ion_list"NOTICE: CREATE TABLE will create partition "gh_partition_list_1_prt_p_list_2" for table "gh_partit

ion_list"NOTICE: CREATE TABLE will create partition "gh_partition_list_1_prt_other_city" for table "gh_part

ition_list"CREATE TABLE

tutorial=> insert into gh_partition_list values (1,1,'shanghai'),(2,2,'weifang');

INSERT 0 2

tutorial=> select * from gh_partition_list_1_prt_p_list_1;

id | name | status

----+------+---------

2 | 2 | weifang

tutorial=> select * from gh_partition_list_1_prt_p_list_2;

id | name | status

----+------+--------

tutorial=> select * from gh_partition_list_1_prt_other_city ;

id | name | status

----+------+----------

1 | 1 | shanghai

分区表参数

pg_partition:分区主表

Column | Type | Modifiers

parrelid | oid | not null

parkind | "char" | not null

parlevel | smallint | not null

paristemplate | boolean | not null

parnatts | smallint | not null

paratts | int2vector | not null

parclass | oidvector | not null

tutorial=> select parrelid,parkind from pg_partition;

parrelid | parkind

----------+---------

85849 | r

86189 | r

86445 | r

86613 | l

tutorial=> select relname from pg_class where oid ='public.gh_partition_range'::regclass;

relname

--------------------

gh_partition_range

tutorial=> select relname from pg_class where oid =85849;

Table "pg_catalog.pg_partition_rule"

Column | Type | Modifiers

paroid | oid | not null

parchildrelid | oid | not null

parparentrule | oid | not null

parname | name | not null

parisdefault | boolean | not null

parruleord | smallint | not null

parrangestartincl | boolean | not null

parrangeendincl | boolean | not null

parrangestart | text |

parrangeend | text |

parrangeevery | text |

parlistvalues | text |

parreloptions | text[] |

partemplatespace | oid |

"pg_partition_rule_parchildrelid_parparentrule_parruleord_index" btree (parchildrelid, parparen

trule, parruleord) "pg_partition_rule_paroid_parentrule_ruleord_index" btree (paroid, parparentrule, parruleord)

tutorial=> select paroid, parchildrelid , parname from pg_partition_rule ;

paroid | parchildrelid | parname

--------+---------------+------------

85920 | 86106 | p2016s

85920 | 86153 | p2016x

86260 | 86212 | p2015

85920 | 85872 | p2015

86564 | 86468 | p2015_1

86564 | 86492 | p2015_2

86564 | 86516 | p2015_3

86564 | 86540 | p2015_4

86707 | 86636 | p_list_1

86707 | 86660 | p_list_2

86707 | 86684 | other_city

(11 rows)

tutorial=> select relname from pg_class where oid=86106;

relname

---------------------------------

gh_partition_range_1_prt_p2016s

范围分区支持max吗?

目前没有sql语法，无法测试，不过按照语句start end 应该不行

范围分区支持split吗？

范围分区支持split

范围分区各个分区索引可以不同吗？

Command: CREATE INDEX

Description: define a new index

Syntax:

CREATE [UNIQUE] INDEX name ON table

[USING btree|bitmap|gist]

( {column | (expression)} [opclass] [, ...] )

[ WITH ( FILLFACTOR = value ) ]

[TABLESPACE tablespace]

[WHERE predicate]

查询创建索引语句没有所谓全局索引，本地索引的创建语句

删除分区会对其他分区的索引有影响吗？

既然为本地索引，那么就可以认为删除分区对其他分区索引无影响

greenplum partition table

继续阅读

Oracle索引内部结构研究

MySQL和Lucene索引对比分析1. MySQL索引实现2. Lucene索引实现3. MySQL与Lucence对比参考：

一文搞定 MySQL 索引

MySQL（第五篇）—数据的完整性约束（2）（索引、引用完整性）数据的完整性约束（2）三、索引四、引用完整性

【MySQL索引】MySQL索引分类，90%的开发都不知道一、根据底层数据结构划分二、根据索引字段个数划分三、根据是否是在主键上建立的索引进行划分四、根据数据与索引的存储关联性划分五、其他分类六、总结

JS--------for循环删除数组元素

Python中字符串常见操作总结

Lucence的基本原理

sql执行计划 explain 以及结合索引优化sql

MySQL - order by排序优化

2021-08-08 mysql索引

SQLServer 提升查询速度

详解SQL中几种常用的表连接方式

oracle 中不使用已有的索引解决办法

对first_name创建唯一索引uniq_idx_firstname问题描述Sql语句

记一次因MySQL编码问题导致的慢查询排查