wip: try partitioning by date

main
Max Ignatenko 2024-09-07 13:29:50 +01:00
parent 19a21ad3a9
commit a41f6aa75c
8 changed files with 149 additions and 41 deletions

View File

@ -0,0 +1,5 @@
# DB migrations
WARNING: due to partitioning schema changes (which require re-creating the
tables from scratch), some migrations were **edited**. Their previous versions
have been copied to `migrations/obsolete` folder.

View File

@ -1,3 +1,7 @@
\timing
CREATE EXTENSION pg_partman SCHEMA public;
alter table records rename to records_like;
create table records
@ -6,48 +10,47 @@ partition by list (collection);
drop index idx_repo_record_key;
drop index idx_repo_rev;
alter table records_like drop constraint records_pkey;
create unique index records_pkey on records (id, collection);
alter sequence records_id_seq owned by records.id;
create table records_default
partition of records default;
drop table records_like;
create table records_post
partition of records for values in ('app.bsky.feed.post');
create table records_follow
partition of records for values in ('app.bsky.graph.follow');
create table records_block
partition of records for values in ('app.bsky.graph.block');
create table records_repost
partition of records for values in ('app.bsky.feed.repost');
create table records_profile
partition of records for values in ('app.bsky.actor.profile');
create index on records (collection, repo, rkey);
ALTER TABLE records_like
ADD CHECK (collection in ('app.bsky.feed.like'));
CREATE OR REPLACE FUNCTION setup_partition(in collection text, in suffix text) RETURNS boolean AS $$
BEGIN
EXECUTE 'CREATE TABLE records_' || suffix ||
' PARTITION OF records FOR VALUES IN (' || quote_literal(collection) || ')
PARTITION BY RANGE (created_at)';
EXECUTE 'CREATE INDEX ON records_' || suffix || ' (created_at)';
EXECUTE 'alter table records_' || suffix || ' add check (collection = ' || quote_literal(collection) || ')';
ALTER TABLE records_post
ADD CHECK (collection in ('app.bsky.feed.post'));
PERFORM public.create_parent('public.records_' || suffix, 'created_at', '1 month',
p_start_partition := '2024-02-01');
RETURN true;
END;
$$ LANGUAGE plpgsql;
ALTER TABLE records_follow
ADD CHECK (collection in ('app.bsky.graph.follow'));
ALTER TABLE records_repost
ADD CHECK (collection in ('app.bsky.feed.repost'));
select setup_partition('app.bsky.feed.like', 'like');
select setup_partition('app.bsky.feed.post', 'post');
select setup_partition('app.bsky.graph.follow', 'follow');
select setup_partition('app.bsky.graph.block', 'block');
select setup_partition('app.bsky.feed.repost', 'repost');
select setup_partition('app.bsky.actor.profile', 'profile');
select setup_partition('app.bsky.graph.list', 'list');
select setup_partition('app.bsky.graph.listblock', 'listblock');
select setup_partition('app.bsky.graph.listitem', 'listitem');
ALTER TABLE records_profile
ADD CHECK (collection in ('app.bsky.actor.profile'));
-- SLOW, can run overnight
with moved_rows as (
delete from records_like r
where collection <> 'app.bsky.feed.like'
returning r.*
)
insert into records select * from moved_rows;
CREATE TABLE records_default
PARTITION OF records DEFAULT
PARTITION BY RANGE (created_at);
CREATE INDEX ON records_default (created_at);
SELECT public.create_parent('public.records_default', 'created_at', '1 month',
p_start_partition := '2024-02-01');
-- ULTRA SLOW, DO NOT RUN on large DB
alter table records attach partition records_like for values in ('app.bsky.feed.like');
create index idx_like_subject
on records_like
@ -60,3 +63,31 @@ on records_follow
create index idx_reply_subject
on records_post
(split_part(jsonb_extract_path_text(content, 'reply', 'parent', 'uri'), '/', 3));
create index listitem_uri_subject
on records_listitem
(
jsonb_extract_path_text(content, 'list'),
jsonb_extract_path_text(content, 'subject'))
include (deleted);
create index listitem_subject_uri
on records_listitem
(
jsonb_extract_path_text(content, 'subject'),
jsonb_extract_path_text(content, 'list'))
include (deleted);
create view listitems as
select *, jsonb_extract_path_text(content, 'list') as list,
jsonb_extract_path_text(content, 'subject') as subject
from records_listitem;
create view lists as
select records_list.*,
jsonb_extract_path_text(content, 'name') as name,
jsonb_extract_path_text(content, 'description') as description,
jsonb_extract_path_text(content, 'purpose') as purpose,
'at://' || repos.did || '/app.bsky.graph.list/' || rkey as uri
from records_list join repos on records_list.repo = repos.id;

View File

@ -0,0 +1,62 @@
alter table records rename to records_like;
create table records
(like records_like including defaults)
partition by list (collection);
drop index idx_repo_record_key;
drop index idx_repo_rev;
alter table records_like drop constraint records_pkey;
create unique index records_pkey on records (id, collection);
create table records_default
partition of records default;
create table records_post
partition of records for values in ('app.bsky.feed.post');
create table records_follow
partition of records for values in ('app.bsky.graph.follow');
create table records_block
partition of records for values in ('app.bsky.graph.block');
create table records_repost
partition of records for values in ('app.bsky.feed.repost');
create table records_profile
partition of records for values in ('app.bsky.actor.profile');
ALTER TABLE records_like
ADD CHECK (collection in ('app.bsky.feed.like'));
ALTER TABLE records_post
ADD CHECK (collection in ('app.bsky.feed.post'));
ALTER TABLE records_follow
ADD CHECK (collection in ('app.bsky.graph.follow'));
ALTER TABLE records_repost
ADD CHECK (collection in ('app.bsky.feed.repost'));
ALTER TABLE records_profile
ADD CHECK (collection in ('app.bsky.actor.profile'));
-- SLOW, can run overnight
with moved_rows as (
delete from records_like r
where collection <> 'app.bsky.feed.like'
returning r.*
)
insert into records select * from moved_rows;
-- ULTRA SLOW, DO NOT RUN on large DB
alter table records attach partition records_like for values in ('app.bsky.feed.like');
create index idx_like_subject
on records_like
(split_part(jsonb_extract_path_text(content, 'subject', 'uri'), '/', 3));
create index idx_follow_subject
on records_follow
(jsonb_extract_path_text(content, 'subject'));
create index idx_reply_subject
on records_post
(split_part(jsonb_extract_path_text(content, 'reply', 'parent', 'uri'), '/', 3));

View File

@ -2,7 +2,12 @@ version: '3.8'
services:
postgres:
image: "postgres:16"
# image: "postgres:16"
build:
context: ./docker
dockerfile_inline: |
FROM postgres:16
RUN apt update && apt install -y postgresql-16-partman
volumes:
- "${DATA_DIR:?specify data dir in .env file}/postgres:/var/lib/postgresql/data:rw"
restart: always
@ -23,7 +28,11 @@ services:
"-c", "shared_buffers=8GB",
"-c", "work_mem=2GB",
"-c", "max_parallel_workers_per_gather=8",
"-c", "max_wal_size=8GB"
"-c", "max_wal_size=8GB",
"-c", "shared_preload_libraries=pg_partman_bgw",
"-c", "pg_partman_bgw.interval=3600",
"-c", "pg_partman_bgw.role=postgres",
"-c", "pg_partman_bgw.dbname=bluesky",
]
shm_size: '16gb'
stop_grace_period: 24h

View File

@ -0,0 +1 @@
*

View File

@ -32,12 +32,12 @@ type Repo struct {
}
type Record struct {
ID models.ID `gorm:"primarykey"`
CreatedAt time.Time
ID models.ID
CreatedAt time.Time `gorm:"not null"`
UpdatedAt time.Time `gorm:"autoUpdateTime:false"`
Repo models.ID `gorm:"index:idx_repo_record_key,unique,priority:1;not null;index:idx_repo_rev"`
Collection string `gorm:"index:idx_repo_record_key,unique,priority:2;not null"`
Rkey string `gorm:"index:idx_repo_record_key,unique,priority:3"`
Repo models.ID `gorm:"index:idx_repo_record_key,priority:1;not null;index:idx_repo_rev"`
Collection string `gorm:"index:idx_repo_record_key,priority:2;not null"`
Rkey string `gorm:"index:idx_repo_record_key,priority:3"`
AtRev string `gorm:"index:idx_repo_rev"`
Content json.RawMessage `gorm:"type:JSONB"`
Deleted bool `gorm:"default:false"`