Export the list of repos from indexer

main
Max Ignatenko 2024-02-20 15:36:52 +00:00
parent d9147940aa
commit b7458a18f4
2 changed files with 37 additions and 0 deletions

View File

@ -39,3 +39,27 @@ echo "Dumping posts..."
docker compose exec -it postgres psql -U postgres -d bluesky \ docker compose exec -it postgres psql -U postgres -d bluesky \
-c "copy (${posts_query}) to stdout with csv header;" > post_counts.csv -c "copy (${posts_query}) to stdout with csv header;" > post_counts.csv
echo "Done: $(ls -lh post_counts.csv)" echo "Done: $(ls -lh post_counts.csv)"
dids_query="$(cat <<- EOF
insert into repos (did)
select distinct did from (
select distinct (split_part(jsonb_extract_path_text(content, 'reply', 'parent', 'uri'), '/', 3)) as did from records_post where collection='app.bsky.feed.post'
union
select distinct (split_part(jsonb_extract_path_text(content, 'subject', 'uri'), '/', 3)) from records where collection='app.bsky.feed.like'
union
select distinct (jsonb_extract_path_text(content, 'subject')) from records where collection='app.bsky.graph.follow'
)
on conflict (did) do nothing;
copy (select did as "did:ID" from repos) to stdout with csv header;
EOF
)"
echo "Dumping DIDs..."
docker compose exec -it postgres psql -U postgres -d bluesky \
-c "copy (${posts_query}) to stdout with csv header;" > dids.csv
echo "Done: $(ls -lh dids.csv)"
docker exec -it plc-postgres-1 psql -U postgres -d plc \
-c 'copy (select handle, did as "did:ID" from actors) to stdout with (format csv , header, force_quote ("handle"));' > handles.csv

View File

@ -31,3 +31,16 @@ with moved_rows as (
insert into records select * from moved_rows; insert into records select * from moved_rows;
alter table records attach partition records_like for values in ('app.bsky.feed.like'); alter table records attach partition records_like for values in ('app.bsky.feed.like');
create index idx_like_subject
on records_like
(split_part(jsonb_extract_path_text(content, 'subject', 'uri'), '/', 3));
create index idx_follow_subject
on records_follow
(jsonb_extract_path_text(content, 'subject'));
create index idx_reply_subject
on records_post
(split_part(jsonb_extract_path_text(content, 'reply', 'parent', 'uri'), '/', 3));