天天看点

【重新发现PostgreSQL之美】- 44 摩斯电码

背景

场景:

某些字段的值经过计算后再过滤的场景, 例如:

json里面的内容包含经纬度, 我们需要对经纬度进行地理信息空间查询过滤.

a,b,c,d分别代表语、数、英、科的分数, 查询总分等于或范围时, 需要计算后再搜索.

挑战:

大多数数据库无法使用表达式索引, 只能全表扫描, 逐条计算. 效率低下.

PG解决方案:

支持表达式索引(也可以叫函数索引), 性能指数级提升.

支持表达式统计信息柱状图, 用于优化器计算

例子:

create table a (id int, info jsonb);    

create index idx_a on a using gist (ST_SetSRID(ST_MakePoint((info ->> 'lon')::numeric, (info ->> 'lat')::numeric),4326));    

explain select * from a order by ST_SetSRID(ST_MakePoint((info ->> 'lon')::numeric, (info ->> 'lat')::numeric),4326) <->    

ST_SetSRID(ST_MakePoint(120,70),4326) limit 10;    

 Limit (cost=0.14..0.69 rows=10 width=44)   

   -> Index Scan using idx_a on a (cost=0.14..69.40 rows=1270 width=44)   

         Order By: (st_setsrid(st_makepoint((((info ->> 'lon'::text))::numeric)::double precision, (((info ->> 'lat'::text))::numeric)::double precision), 4326) <-> '0101000020E61000000000000000005E400000000000805140'::geometry)    

create table t (id int, a float4, b float4, c float4, d float4);    

create index idx_t on t ((a+b+c+d));    

explain select * from t where a+b+c+d=400;    

 Index Scan using idx_t on t  (cost=0.15..7.99 rows=8 width=20)    

   Index Cond: ((((a + b) + c) + d) = '400'::double precision)    

postgres=# select * from pg_stats where tablename='idx_t'; 

-[ RECORD 1 ]----------+-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------  

schemaname             | public  

tablename              | idx_t  

attname                | expr  

inherited              | f  

null_frac              | 0 

avg_width              | 4  

n_distinct             | -0.9994  

most_common_vals       | {151.73206,180.91998,197.2688,200.11456,204.47366,223.13992}  

most_common_freqs      | {0.0002,0.0002,0.0002,0.0002,0.0002,0.0002} 

histogram_bounds       | {25.474722,71.74933,85.48342,93.977295,99.26418,104.8926,110.16269,114.38039,118.075554,121.30721,124.80748,127.81897,130.67479,133.32335,136.02103,138.41626,140.85258,143.05424,145.51877,147.9408,149.95238,151.72961,153.72885,155.82372,157.6345,159.48929,161.0307,162.76514,164.57907,166.19772,167.8121,169.29343,171.28735,173.25894,174.89429,176.23984,177.65022,179.2883,180.66162,182.22772,183.88147,185.28021,186.64587,188.12837,189.66924,191.4691,192.80214,194.05939,195.64655,197.10524,198.36841,199.72656,201.35751,203.02931,204.50558,205.91415,207.49933,209.28078,210.977,212.39197,214.18248,215.5002,217.03229,218.55179,220.12622,221.61935,223.03786,224.73047,226.53156,228.12646,229.62404,231.14334,232.95035,234.51816,236.07428,237.84808,239.52545,241.77795,243.91528,246.18135,248.33812,250.06604,252.14948,254.52863,257.24,260.0845,262.6031,265.53894,268.5458,271.4497,275.00317,278.2635,281.67947,286.42548,290.9062,295.2775,301.70978,307.6002,317.32483,328.39,375.6833}  

correlation            | -0.026684083  

most_common_elems      |  

most_common_elem_freqs |   

elem_count_histogram   |