Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save MaherSaif/10f683c750695dd15e6ea49895e39d8d to your computer and use it in GitHub Desktop.
Save MaherSaif/10f683c750695dd15e6ea49895e39d8d to your computer and use it in GitHub Desktop.
-- Carefull setup script is slow
-- SETUP SCRIPT :: START --
DROP TABLE IF EXISTS docs;
CREATE TABLE docs (
id SERIAL PRIMARY KEY,
type varchar(40) DEFAULT 'pdf' NOT NULL,
status varchar(40) NOT NULL,
sender_reference varchar(40) NOT NULL,
sent_at TIMESTAMPTZ,
created_at TIMESTAMPTZ DEFAULT now() NOT NULL
);
INSERT INTO
docs (type, status, sender_reference, sent_at)
SELECT
('{pdf,doc,raw}'::text[])[ceil(random()*3)],
('{sent,draft,suspended}'::text[])[ceil(random()*3)],
('{Custom,Client}'::text[])[ceil(random()*2)] || '/' || floor(random() * 2000),
(LOCALTIMESTAMP - interval '2 years' * random())::timestamptz
FROM generate_series(1, 60000000) g;
CREATE INDEX docs_sent_at_idx ON docs USING btree (sender_reference, status, sent_at DESC NULLS LAST);
CREATE INDEX docs_sent_at_idx_1 ON docs USING btree (sent_at DESC NULLS LAST, sender_reference, status);
CREATE INDEX docs_sent_at_idx_2 ON docs USING btree (sender_reference, sent_at DESC NULLS LAST);
VACUUM(ANALYZE) docs;
-- SETUP SCRIPT :: END --
SELECT sender_reference, COUNT(*) FROM docs GROUP BY sender_reference HAVING COUNT(*) > 1000 ORDER BY 2 LIMIT 10; -- grab sender_reference with some documents if needed
EXPLAIN (ANALYZE, COSTS, VERBOSE, BUFFERS)
SELECT * FROM docs
WHERE status IN ('draft', 'sent') AND sender_reference IN ('Custom/1175', 'Client/362', 'Custom/280') ORDER BY sent_at DESC NULLS LAST LIMIT 20 OFFSET 0;
/*
Limit (cost=0.56..26727.94 rows=20 width=38) (actual time=116.926..218.789 rows=20 loops=1)
Output: id, type, status, sender_reference, sent_at, created_at
Buffers: shared hit=1421 read=45046
-> Index Scan using docs_sent_at_idx_1 on public.docs (cost=0.56..39748945.58 rows=29744 width=38) (actual time=116.924..218.784 rows=20 loops=1)
Output: id, type, status, sender_reference, sent_at, created_at
Filter: (((docs.status)::text = ANY ('{draft,sent}'::text[])) AND ((docs.sender_reference)::text = ANY ('{Custom/1175,Client/362,Custom/280}'::text[])))
Rows Removed by Filter: 46354
Buffers: shared hit=1421 read=45046
Query Identifier: -4773896564660341574
Planning Time: 0.282 ms
Execution Time: 218.817 ms
*/
DROP INDEX docs_sent_at_idx_1;
-- redo the query without the index used
EXPLAIN (ANALYZE, COSTS, VERBOSE, BUFFERS)
SELECT * FROM docs
WHERE status IN ('draft', 'sent') AND sender_reference IN ('Custom/1175', 'Client/362', 'Custom/280') ORDER BY sent_at DESC NULLS LAST LIMIT 20 OFFSET 0;
-- with warm cache, very expensive read, all rows returned then sorted
/*
Limit (cost=97013.42..97013.47 rows=20 width=38) (actual time=98.236..98.237 rows=20 loops=1)
Output: id, type, status, sender_reference, sent_at, created_at
Buffers: shared hit=15 read=29030
-> Sort (cost=97013.42..97087.78 rows=29744 width=38) (actual time=98.234..98.235 rows=20 loops=1)
Output: id, type, status, sender_reference, sent_at, created_at
Sort Key: docs.sent_at DESC NULLS LAST
Sort Method: top-N heapsort Memory: 27kB
Buffers: shared hit=15 read=29030
-> Bitmap Heap Scan on public.docs (cost=524.25..96221.94 rows=29744 width=38) (actual time=22.013..95.987 rows=29834 loops=1)
Output: id, type, status, sender_reference, sent_at, created_at
Recheck Cond: (((docs.sender_reference)::text = ANY ('{Custom/1175,Client/362,Custom/280}'::text[])) AND ((docs.status)::text = ANY ('{draft,sent}'::text[])))
Heap Blocks: exact=28973
Buffers: shared hit=15 read=29030
-> Bitmap Index Scan on docs_sent_at_idx_3 (cost=0.00..516.81 rows=29744 width=0) (actual time=13.608..13.608 rows=29834 loops=1)
Index Cond: (((docs.sender_reference)::text = ANY ('{Custom/1175,Client/362,Custom/280}'::text[])) AND ((docs.status)::text = ANY ('{draft,sent}'::text[])))
Buffers: shared hit=15 read=57
Query Identifier: -4773896564660341574
Planning Time: 0.312 ms
Execution Time: 98.320 ms
*/
-- Recreate previous deleted index
CREATE INDEX docs_sent_at_idx_1 ON docs USING btree (sent_at DESC NULLS LAST, sender_reference, status);
VACUUM(ANALYZE) docs;
-- Decompose using UNION ALL. Result will be a little bit different (I think because of sent_at insertion :thinking:)
-- but it is the same amount of docs than the previous query.
EXPLAIN (ANALYZE, COSTS, VERBOSE, BUFFERS)
SELECT *
FROM
(
(
(
SELECT * FROM docs
WHERE status = 'draft'
AND sender_reference = 'Custom/1175'
ORDER BY
sent_at DESC NULLS LAST
LIMIT
20 OFFSET 0
)
UNION ALL
(
SELECT * FROM docs
WHERE status = 'sent'
AND sender_reference = 'Custom/1175'
ORDER BY
sent_at DESC NULLS LAST
LIMIT
20 OFFSET 0
)
UNION ALL
(
SELECT * FROM docs
WHERE status = 'draft'
AND sender_reference = 'Client/362'
ORDER BY
sent_at DESC NULLS LAST
LIMIT
20 OFFSET 0
)
UNION ALL
(
SELECT * FROM docs
WHERE status = 'sent'
AND sender_reference = 'Client/362'
ORDER BY
sent_at DESC NULLS LAST
LIMIT
20 OFFSET 0
)
UNION ALL
(
SELECT * FROM docs
WHERE status = 'draft'
AND sender_reference = 'Custom/280'
ORDER BY
sent_at DESC NULLS LAST
LIMIT
20 OFFSET 0
)
UNION ALL
(
SELECT * FROM docs
WHERE status = 'sent'
AND sender_reference = 'Custom/280'
ORDER BY
sent_at DESC NULLS LAST
LIMIT
20 OFFSET 0
)
)
) docs
ORDER BY
sent_at DESC NULLS LAST
LIMIT
20 OFFSET 0;
-- Fast query with very low usage of IO or disk read. Good usage of index.
/*
Limit (cost=3.47..84.20 rows=20 width=38) (actual time=4.340..4.398 rows=20 loops=1)
Output: docs_1.id, docs_1.type, docs_1.status, docs_1.sender_reference, docs_1.sent_at, docs_1.created_at
Buffers: shared hit=36 read=13
-> Merge Append (cost=3.47..487.86 rows=120 width=38) (actual time=4.338..4.393 rows=20 loops=1)
Sort Key: docs_1.sent_at DESC NULLS LAST
Buffers: shared hit=36 read=13
-> Limit (cost=0.56..80.74 rows=20 width=38) (actual time=2.081..2.101 rows=9 loops=1)
Output: docs_1.id, docs_1.type, docs_1.status, docs_1.sender_reference, docs_1.sent_at, docs_1.created_at
Buffers: shared hit=9 read=4
-> Index Scan using docs_sent_at_idx on public.docs docs_1 (cost=0.56..19763.16 rows=4930 width=38) (actual time=2.079..2.098 rows=9 loops=1)
Output: docs_1.id, docs_1.type, docs_1.status, docs_1.sender_reference, docs_1.sent_at, docs_1.created_at
Index Cond: (((docs_1.sender_reference)::text = 'Custom/1175'::text) AND ((docs_1.status)::text = 'draft'::text))
Buffers: shared hit=9 read=4
-> Limit (cost=0.56..80.74 rows=20 width=38) (actual time=0.204..0.215 rows=6 loops=1)
Output: docs_2.id, docs_2.type, docs_2.status, docs_2.sender_reference, docs_2.sent_at, docs_2.created_at
Buffers: shared hit=9 read=1
-> Index Scan using docs_sent_at_idx on public.docs docs_2 (cost=0.56..20032.50 rows=4997 width=38) (actual time=0.202..0.212 rows=6 loops=1)
Output: docs_2.id, docs_2.type, docs_2.status, docs_2.sender_reference, docs_2.sent_at, docs_2.created_at
Index Cond: (((docs_2.sender_reference)::text = 'Custom/1175'::text) AND ((docs_2.status)::text = 'sent'::text))
Buffers: shared hit=9 read=1
-> Limit (cost=0.56..80.74 rows=20 width=38) (actual time=0.755..0.756 rows=1 loops=1)
Output: docs_3.id, docs_3.type, docs_3.status, docs_3.sender_reference, docs_3.sent_at, docs_3.created_at
Buffers: shared hit=2 read=3
-> Index Scan using docs_sent_at_idx on public.docs docs_3 (cost=0.56..19763.16 rows=4930 width=38) (actual time=0.754..0.754 rows=1 loops=1)
Output: docs_3.id, docs_3.type, docs_3.status, docs_3.sender_reference, docs_3.sent_at, docs_3.created_at
Index Cond: (((docs_3.sender_reference)::text = 'Client/362'::text) AND ((docs_3.status)::text = 'draft'::text))
Buffers: shared hit=2 read=3
-> Limit (cost=0.56..80.74 rows=20 width=38) (actual time=0.195..0.200 rows=3 loops=1)
Output: docs_4.id, docs_4.type, docs_4.status, docs_4.sender_reference, docs_4.sent_at, docs_4.created_at
Buffers: shared hit=6 read=1
-> Index Scan using docs_sent_at_idx on public.docs docs_4 (cost=0.56..20032.50 rows=4997 width=38) (actual time=0.194..0.198 rows=3 loops=1)
Output: docs_4.id, docs_4.type, docs_4.status, docs_4.sender_reference, docs_4.sent_at, docs_4.created_at
Index Cond: (((docs_4.sender_reference)::text = 'Client/362'::text) AND ((docs_4.status)::text = 'sent'::text))
Buffers: shared hit=6 read=1
-> Limit (cost=0.56..80.74 rows=20 width=38) (actual time=0.888..0.897 rows=5 loops=1)
Output: docs_5.id, docs_5.type, docs_5.status, docs_5.sender_reference, docs_5.sent_at, docs_5.created_at
Buffers: shared hit=6 read=3
-> Index Scan using docs_sent_at_idx on public.docs docs_5 (cost=0.56..19763.16 rows=4930 width=38) (actual time=0.887..0.895 rows=5 loops=1)
Output: docs_5.id, docs_5.type, docs_5.status, docs_5.sender_reference, docs_5.sent_at, docs_5.created_at
Index Cond: (((docs_5.sender_reference)::text = 'Custom/280'::text) AND ((docs_5.status)::text = 'draft'::text))
Buffers: shared hit=6 read=3
-> Limit (cost=0.56..80.74 rows=20 width=38) (actual time=0.207..0.208 rows=1 loops=1)
Output: docs_6.id, docs_6.type, docs_6.status, docs_6.sender_reference, docs_6.sent_at, docs_6.created_at
Buffers: shared hit=4 read=1
-> Index Scan using docs_sent_at_idx on public.docs docs_6 (cost=0.56..20032.50 rows=4997 width=38) (actual time=0.206..0.206 rows=1 loops=1)
Output: docs_6.id, docs_6.type, docs_6.status, docs_6.sender_reference, docs_6.sent_at, docs_6.created_at
Index Cond: (((docs_6.sender_reference)::text = 'Custom/280'::text) AND ((docs_6.status)::text = 'sent'::text))
Buffers: shared hit=4 read=1
Query Identifier: -998395605132531303
Planning:
Buffers: shared hit=5
Planning Time: 1.633 ms
Execution Time: 4.498 ms
*/
SELECT version();
-- PostgreSQL 14.8 (Homebrew) on aarch64-apple-darwin22.4.0, compiled by Apple clang version 14.0.3 (clang-1403.0.22.14.1), 64-bit
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment