Tab sis feem ntau tsis muaj nws, qhov kev thov hloov mus ua kom muaj txiaj ntsig ntau dua li nws. Yog li hnub no peb mam li sim tshem tawm cov peev txheej-intensive JOIN - siv phau ntawv txhais lus.
Pib nrog PostgreSQL 12, qee qhov xwm txheej tau piav qhia hauv qab no tuaj yeem tsim tawm me ntsis txawv vim default non-materialization CTE. Tus cwj pwm no tuaj yeem thim rov qab los ntawm kev qhia tus yuam sij MATERIALIZED.
Muaj ntau "qhov tseeb" nyob rau hauv cov lus txwv
Cia peb ua ib qho haujlwm tiag tiag - peb yuav tsum tso cov npe cov lus tuaj los yog ua haujlwm nquag nrog cov neeg xa khoom:
Nyob rau hauv lub ntiaj teb paub daws teeb meem, cov neeg sau ntawv ua hauj lwm yuav tsum tau muab faib sib npaug ntawm txhua tus neeg ua haujlwm ntawm peb lub koom haum, tab sis qhov tseeb cov dej num tuaj, raws li txoj cai, los ntawm cov neeg tsawg tsawg - "los ntawm kev tswj hwm" nce qib siab lossis "los ntawm cov neeg ua haujlwm ua haujlwm" los ntawm cov chaw nyob sib ze (cov kws tshuaj ntsuam, tus tsim qauv, kev lag luam, ...).
Cia peb lees txais tias hauv peb lub koom haum ntawm 1000 tus neeg, tsuas yog 20 tus kws sau ntawv (feem ntau txawm tsawg dua) teeb tsa cov haujlwm rau txhua tus neeg ua yeeb yam tshwj xeeb thiab Cia peb siv txoj kev paub nokom ceev cov lus nug "ib txwm".
Script generator
-- ΡΠΎΡΡΡΠ΄Π½ΠΈΠΊΠΈ
CREATE TABLE person AS
SELECT
id
, repeat(chr(ascii('a') + (id % 26)), (id % 32) + 1) "name"
, '2000-01-01'::date - (random() * 1e4)::integer birth_date
FROM
generate_series(1, 1000) id;
ALTER TABLE person ADD PRIMARY KEY(id);
-- Π·Π°Π΄Π°ΡΠΈ Ρ ΡΠΊΠ°Π·Π°Π½Π½ΡΠΌ ΡΠ°ΡΠΏΡΠ΅Π΄Π΅Π»Π΅Π½ΠΈΠ΅ΠΌ
CREATE TABLE task AS
WITH aid AS (
SELECT
id
, array_agg((random() * 999)::integer + 1) aids
FROM
generate_series(1, 1000) id
, generate_series(1, 20)
GROUP BY
1
)
SELECT
*
FROM
(
SELECT
id
, '2020-01-01'::date - (random() * 1e3)::integer task_date
, (random() * 999)::integer + 1 owner_id
FROM
generate_series(1, 100000) id
) T
, LATERAL(
SELECT
aids[(random() * (array_length(aids, 1) - 1))::integer + 1] author_id
FROM
aid
WHERE
id = T.owner_id
LIMIT 1
) a;
ALTER TABLE task ADD PRIMARY KEY(id);
CREATE INDEX ON task(owner_id, task_date);
CREATE INDEX ON task(author_id);
Cia peb qhia 100 lub luag haujlwm kawg rau ib tus neeg ua haujlwm tshwj xeeb:
SELECT
task.*
, person.name
FROM
task
LEFT JOIN
person
ON person.id = task.author_id
WHERE
owner_id = 777
ORDER BY
task_date DESC
LIMIT 100;
Nws hloov tawm ntawd 1/3 tag nrho lub sij hawm thiab 3/4 nyeem nplooj ntawv ntawm cov ntaub ntawv tsuas yog ua los tshawb nrhiav tus sau 100 zaug - rau txhua qhov haujlwm tso tawm. Tab sis peb paub tias ntawm cov pua pua tsuas yog 20 txawv - Puas muaj peev xwm siv qhov kev paub no?
hstore- phau ntawv txhais lus
Cia peb ua kom zoo dua hstore yam los tsim ib qho "dictionary" key-value:
CREATE EXTENSION hstore
Peb tsuas yog yuav tsum muab tus sau tus ID thiab nws lub npe rau hauv phau ntawv txhais lus kom peb thiaj li tuaj yeem rho tawm siv tus yuam sij no:
-- ΡΠΎΡΠΌΠΈΡΡΠ΅ΠΌ ΡΠ΅Π»Π΅Π²ΡΡ Π²ΡΠ±ΠΎΡΠΊΡ
WITH T AS (
SELECT
*
FROM
task
WHERE
owner_id = 777
ORDER BY
task_date DESC
LIMIT 100
)
-- ΡΠΎΡΠΌΠΈΡΡΠ΅ΠΌ ΡΠ»ΠΎΠ²Π°ΡΡ Π΄Π»Ρ ΡΠ½ΠΈΠΊΠ°Π»ΡΠ½ΡΡ Π·Π½Π°ΡΠ΅Π½ΠΈΠΉ
, dict AS (
SELECT
hstore( -- hstore(keys::text[], values::text[])
array_agg(id)::text[]
, array_agg(name)::text[]
)
FROM
person
WHERE
id = ANY(ARRAY(
SELECT DISTINCT
author_id
FROM
T
))
)
-- ΠΏΠΎΠ»ΡΡΠ°Π΅ΠΌ ΡΠ²ΡΠ·Π°Π½Π½ΡΠ΅ Π·Π½Π°ΡΠ΅Π½ΠΈΡ ΡΠ»ΠΎΠ²Π°ΡΡ
SELECT
*
, (TABLE dict) -> author_id::text -- hstore -> key
FROM
T;
Siv rau kev tau txais cov ntaub ntawv hais txog tib neeg 2 zaug tsawg lub sij hawm thiab 7 zaug tsawg cov ntaub ntawv nyeem! Ntxiv nrog rau "cov ntsiab lus", dab tsi tseem pab peb ua tiav cov txiaj ntsig no bulk ntaub ntawv retrieval los ntawm lub rooj nyob rau hauv ib tug pass siv = ANY(ARRAY(...)).
Cov lus nkag: Serialization thiab Deserialization
Tab sis yuav ua li cas yog tias peb xav tau txuag tsis yog ib qho ntawv sau xwb, tab sis tag nrho cov ntawv nkag hauv phau ntawv txhais lus? Hauv qhov no, PostgreSQL lub peev xwm yuav pab tau peb kho lub rooj nkag raws li ib qho nqi:
...
, dict AS (
SELECT
hstore(
array_agg(id)::text[]
, array_agg(p)::text[] -- ΠΌΠ°Π³ΠΈΡ #1
)
FROM
person p
WHERE
...
)
SELECT
*
, (((TABLE dict) -> author_id::text)::person).* -- ΠΌΠ°Π³ΠΈΡ #2
FROM
T;
Cia peb saib dab tsi tshwm sim ntawm no:
Peb coj p raws li ib tug alias rau tag nrho cov neeg lub rooj nkag thiab sib sau ua ke ib array ntawm lawv.
no qhov array ntawm cov ntaub ntawv kaw tau rov ua dua rau ib qho array ntawm cov kab ntawv (tus neeg[]::text[]) muab tso rau hauv phau ntawv txhais lus hstore ua ib qho array ntawm qhov tseem ceeb.
Thaum peb tau txais cov ntaub ntawv ntsig txog, peb rub los ntawm phau ntawv txhais lus los ntawm qhov tseem ceeb raws li ib txoj hlua ntawv.
Peb xav tau ntawv tig mus rau hauv lub rooj hom nqi tus neeg (rau txhua lub rooj ib hom ntawm tib lub npe yog cia li tsim).
"Expand" cov ntaub ntawv sau rau hauv kab siv (...).*.
json phau ntawv txhais lus
Tab sis xws li ib tug ua kom yuam kev raws li peb thov saum toj no yuav tsis ua hauj lwm yog hais tias tsis muaj ib tug coj lub rooj hom los ua tus "casting". Raws nraim tib qhov xwm txheej yuav tshwm sim, thiab yog tias peb sim siv ib kab CTE, tsis yog lub rooj "tiag"..
Yog li, peb tau txais ob txoj hauv kev los txheeb xyuas cov ntaub ntawv rau hauv phau ntawv txhais lus - hstore/json_object. Tsis tas li ntawd, cov arrays ntawm cov yuam sij thiab qhov tseem ceeb ntawm lawv tus kheej kuj tuaj yeem tsim tau ob txoj hauv kev, nrog kev hloov pauv sab hauv lossis sab nraud rau cov ntawv: array_agg(i::text) / array_agg(i)::text[].
Cia peb tshawb xyuas qhov ua tau zoo ntawm ntau hom serialization siv cov piv txwv zoo li hluavtaws - serialize ntau tus lej ntawm cov yuam sij:
WITH dict AS (
SELECT
hstore(
array_agg(i::text)
, array_agg(i::text)
)
FROM
generate_series(1, ...) i
)
TABLE dict;
Kev ntsuam xyuas tsab ntawv: serialization
WITH T AS (
SELECT
*
, (
SELECT
regexp_replace(ea[array_length(ea, 1)], '^Execution Time: (d+.d+) ms$', '1')::real et
FROM
(
SELECT
array_agg(el) ea
FROM
dblink('port= ' || current_setting('port') || ' dbname=' || current_database(), $$
explain analyze
WITH dict AS (
SELECT
hstore(
array_agg(i::text)
, array_agg(i::text)
)
FROM
generate_series(1, $$ || (1 << v) || $$) i
)
TABLE dict
$$) T(el text)
) T
) et
FROM
generate_series(0, 19) v
, LATERAL generate_series(1, 7) i
ORDER BY
1, 2
)
SELECT
v
, avg(et)::numeric(32,3)
FROM
T
GROUP BY
1
ORDER BY
1;
Ntawm PostgreSQL 11, mus txog kwv yees li phau ntawv txhais lus loj ntawm 2^12 tus yuam sij serialization rau json yuav siv sij hawm tsawg dua. Hauv qhov no, qhov ua tau zoo tshaj plaws yog kev sib xyaw ntawm json_object thiab "internal" hom hloov dua siab tshiab array_agg(i::text).
Tam sim no cia peb sim nyeem tus nqi ntawm txhua tus yuam sij 8 zaug - tom qab tag nrho, yog tias koj tsis nkag mus rau phau ntawv txhais lus, ces vim li cas nws thiaj xav tau?
WITH T AS (
SELECT
*
, (
SELECT
regexp_replace(ea[array_length(ea, 1)], '^Execution Time: (d+.d+) ms$', '1')::real et
FROM
(
SELECT
array_agg(el) ea
FROM
dblink('port= ' || current_setting('port') || ' dbname=' || current_database(), $$
explain analyze
WITH dict AS (
SELECT
json_object(
array_agg(i::text)
, array_agg(i::text)
)
FROM
generate_series(1, $$ || (1 << v) || $$) i
)
SELECT
(TABLE dict) -> (i % ($$ || (1 << v) || $$) + 1)::text
FROM
generate_series(1, $$ || (1 << (v + 3)) || $$) i
$$) T(el text)
) T
) et
FROM
generate_series(0, 19) v
, LATERAL generate_series(1, 7) i
ORDER BY
1, 2
)
SELECT
v
, avg(et)::numeric(32,3)
FROM
T
GROUP BY
1
ORDER BY
1;