CREATE TABLE T1 ( ID INTNOT NULLPRIMARY KEY, val INTNOT NULL, col1 UUID NOT NULL, col2 UUID NOT NULL, col3 UUID NOT NULL, col4 UUID NOT NULL, col5 UUID NOT NULL, col6 UUID NOT NULL );
INSERT INTO T1 SELECT i, RANDOM() *1000000, md5(random()::text || clock_timestamp()::text)::uuid, md5(random()::text || clock_timestamp()::text)::uuid, md5(random()::text || clock_timestamp()::text)::uuid, md5(random()::text || clock_timestamp()::text)::uuid, md5(random()::text || clock_timestamp()::text)::uuid, md5(random()::text || clock_timestamp()::text)::uuid FROM generate_series(1,20000000) i;
CREATE TABLE T2 ( ID INTNOT NULLPRIMARY KEY, val INTNOT NULL, col1 UUID NOT NULL, col2 UUID NOT NULL, col3 UUID NOT NULL, col4 UUID NOT NULL, col5 UUID NOT NULL, col6 UUID NOT NULL );
INSERT INTO T2 SELECT i, RANDOM() *1000000, md5(random()::text || clock_timestamp()::text)::uuid, md5(random()::text || clock_timestamp()::text)::uuid, md5(random()::text || clock_timestamp()::text)::uuid, md5(random()::text || clock_timestamp()::text)::uuid, md5(random()::text || clock_timestamp()::text)::uuid, md5(random()::text || clock_timestamp()::text)::uuid FROM generate_series(1,1000000) i;
vacuum ANALYZE T1; vacuum ANALYZE T2;
查詢 sample code
1 2 3 4 5
EXPLAIN (ANALYZE,TIMING ON,BUFFERS ON) SELECT t1.* FROM T1 INNERJOIN T2 ON t1.id = t2.id WHERE t1.id <1000000
select name,setting from pg_settings where name in ('autovacuum_vacuum_scale_factor','autovacuum_analyze_scale_factor','autovacuum_analyze_threshold','autovacuum_vacuum_threshold');
AutoVacuum 主要是由下面兩個公式判斷是否需要執行 vacuum 或 analyze,會有一個類似累積池概念,累績目前資料表 dead tuple 數量
1 2
觸發 auto_analyze = autovacuum_analyze_scale_factor * dead tuple count + autovacuum_analyze_threshold 觸發 auto_vacuum = autovacuum_vacuum_scale_factor * dead tuple count + autovacuum_vacuum_threshold
ALTER TABLE t2 SET (autovacuum_vacuum_scale_factor =0.001); ALTER TABLE t2 SET (autovacuum_vacuum_threshold =0); ALTER TABLE t2 SET (autovacuum_analyze_scale_factor =0.001); ALTER TABLE t2 SET (autovacuum_analyze_threshold =0);
autovacuum_analyze = 0.001 * 1000000 + 0 = 1000
autovacuum_vacuum = 0.001 * 1000000 + 0 = 1000
我利用下面語法查詢,了解某張資料表 dead tuple 數量
1 2 3 4 5 6 7 8
select relname,schemaname,n_dead_tup as "死元組數", (casewhen n_live_tup >0then n_dead_tup::float8/n_live_tup::float8 else 0 end) as "死/活元組的比例" from pg_stat_all_tables where relname ='t2'
換句話說 t2 資料表只需要有超過 1000 個 dead tuple 就會出發更新 (就類似所謂的校正回歸XDD)
1 2 3
UPDATE t2 SET val =20000 WHERE id <1002
在過一陣子後 postgrsql console log 會在背景執行 autovacuum 並把那些資料 mark
假如我們資料量大到一定程度,需要讀寫分離.在 Read DB 建議在每天離峰時做一次 VACUUM and ANALYZE.
A common strategy for read-mostly databases is to run VACUUM and ANALYZE once a day during a low-usage time of day. (This will not be sufficient if there is heavy update activity.)