Add export_blocks view. Add iexport + audit table.
parent
e1ec43def7
commit
75386c0235
11
Makefile
11
Makefile
|
@ -59,12 +59,19 @@ sqldu:
|
|||
|
||||
# NOT RECOMMENDED TO RUN for the firts time on hot live db, will chomp all available IO. stop services first
|
||||
csv-export:
|
||||
@docker compose up -d postgres
|
||||
@sleep 10
|
||||
@nohup ./csv_export.sh > csv_export.out &
|
||||
|
||||
csv-iexport:
|
||||
@docker compose up -d postgres
|
||||
@sleep 10
|
||||
@nohup ./csv_iexport.sh > csv_iexport.out &
|
||||
|
||||
kill-csv-export:
|
||||
@kill -9 `pgrep csv_export.sh`
|
||||
|
||||
csv-compress:
|
||||
@tar cvzf csv_export.tgz dids.csv handles.csv post_counts.csv follows.csv like_counts.csv
|
||||
kill-csv-iexport:
|
||||
@kill -9 `pgrep csv_iexport.sh`
|
||||
|
||||
# ---------------------------- CSV Export ----------------------------
|
||||
|
|
|
@ -17,6 +17,8 @@ refresh materialized view export_follows;
|
|||
refresh materialized view export_likes;
|
||||
\echo Refreshing reply counts...
|
||||
refresh materialized view export_replies;
|
||||
\echo Refreshing block list...
|
||||
refresh materialized view export_blocks;
|
||||
\echo Refreshing DID list...
|
||||
refresh materialized view export_dids;
|
||||
EOF
|
||||
|
@ -30,6 +32,8 @@ docker compose exec -it postgres psql -U postgres -d bluesky \
|
|||
-c "copy (select * from export_likes) to stdout with csv header;" > like_counts.csv
|
||||
docker compose exec -it postgres psql -U postgres -d bluesky \
|
||||
-c "copy (select * from export_replies) to stdout with csv header;" > post_counts.csv
|
||||
docker compose exec -it postgres psql -U postgres -d bluesky \
|
||||
-c "copy (select * from export_blocks) to stdout with csv header;" > blocks.csv
|
||||
docker compose exec -it postgres psql -U postgres -d bluesky \
|
||||
-c "copy (select * from export_dids) to stdout with csv header;" > dids.csv
|
||||
|
||||
|
@ -40,6 +44,7 @@ docker compose exec -iT postgres psql -U postgres -d bluesky <<- EOF
|
|||
refresh materialized view export_follows with no data;
|
||||
refresh materialized view export_likes with no data;
|
||||
refresh materialized view export_replies with no data;
|
||||
refresh materialized view export_blocks with no data;
|
||||
refresh materialized view export_dids with no data;
|
||||
EOF
|
||||
|
||||
|
|
|
@ -0,0 +1,102 @@
|
|||
#!/bin/bash
|
||||
|
||||
set -e
|
||||
|
||||
# ------------------------------ Write data timestamp ----------------------------------
|
||||
|
||||
to_timestamp=$(date -Iseconds --utc)
|
||||
echo "export_start" > timestamp.csv
|
||||
echo "${to_timestamp}" >> timestamp.csv
|
||||
|
||||
# ------------------------------ Refresh views ----------------------------------
|
||||
|
||||
docker compose exec -iT postgres psql -U postgres -d bluesky <<- EOF
|
||||
\timing
|
||||
\echo Refreshing follows...
|
||||
refresh materialized view export_follows;
|
||||
\echo Refreshing like counts...
|
||||
refresh materialized view export_likes;
|
||||
\echo Refreshing reply counts...
|
||||
refresh materialized view export_replies;
|
||||
\echo Refreshing block list...
|
||||
refresh materialized view export_blocks;
|
||||
\echo Refreshing DID list...
|
||||
refresh materialized view export_dids;
|
||||
EOF
|
||||
|
||||
# ------------------------------ Dump views into .csv ----------------------------------
|
||||
|
||||
echo "Writing .csv files..."
|
||||
|
||||
echo "Starting follows export..."
|
||||
folows_started=$(date -Iseconds --utc)
|
||||
docker compose exec -it postgres psql -U postgres -d bluesky \
|
||||
-c "insert into incremental_export_log (started, to_tsmp, collection) values ('$folows_started', '$to_timestamp', 'app.bsky.graph.follow')"
|
||||
docker compose exec -it postgres psql -U postgres -d bluesky \
|
||||
-c "copy (select * from export_follows) to stdout with csv header;" > follows.csv
|
||||
echo "Finishing follows export..."
|
||||
folows_finished=$(date -Iseconds --utc)
|
||||
docker compose exec -it postgres psql -U postgres -d bluesky \
|
||||
-c "update incremental_export_log set finished='$folows_finished' where started='$folows_started' and to_tsmp='$to_timestamp' and collection = 'app.bsky.graph.follow'"
|
||||
|
||||
echo "Starting blocks export..."
|
||||
block_started=$(date -Iseconds --utc)
|
||||
docker compose exec -it postgres psql -U postgres -d bluesky \
|
||||
-c "insert into incremental_export_log (started, to_tsmp, collection) values ('$block_started', '$to_timestamp', 'app.bsky.graph.block')"
|
||||
docker compose exec -it postgres psql -U postgres -d bluesky \
|
||||
-c "copy (select * from export_blocks) to stdout with csv header;" > blocks.csv
|
||||
echo "Finishing blocks export..."
|
||||
block_finished=$(date -Iseconds --utc)
|
||||
docker compose exec -it postgres psql -U postgres -d bluesky \
|
||||
-c "update incremental_export_log set finished='$block_finished' where started='$block_started' and to_tsmp='$to_timestamp' and collection = 'app.bsky.graph.follow'"
|
||||
|
||||
|
||||
echo "Starting likes export..."
|
||||
likes_started=$(date -Iseconds --utc)
|
||||
docker compose exec -it postgres psql -U postgres -d bluesky \
|
||||
-c "insert into incremental_export_log (started, to_tsmp, collection) values ('$likes_started', '$to_timestamp', 'app.bsky.feed.like')"
|
||||
docker compose exec -it postgres psql -U postgres -d bluesky \
|
||||
-c "copy (select * from export_likes) to stdout with csv header;" > like_counts.csv
|
||||
echo "Finishing likes export..."
|
||||
likes_finished=$(date -Iseconds --utc)
|
||||
docker compose exec -it postgres psql -U postgres -d bluesky \
|
||||
-c "update incremental_export_log set finished='$likes_finished' where started='$likes_started' and to_tsmp='$to_timestamp' and collection = 'app.bsky.feed.like'"
|
||||
|
||||
echo "Starting posts export..."
|
||||
posts_started=$(date -Iseconds --utc)
|
||||
docker compose exec -it postgres psql -U postgres -d bluesky \
|
||||
-c "insert into incremental_export_log (started, to_tsmp, collection) values ('$posts_started', '$to_timestamp', 'app.bsky.feed.post')"
|
||||
docker compose exec -it postgres psql -U postgres -d bluesky \
|
||||
-c "copy (select * from export_replies) to stdout with csv header;" > post_counts.csv
|
||||
echo "Finishing posts export..."
|
||||
posts_finished=$(date -Iseconds --utc)
|
||||
docker compose exec -it postgres psql -U postgres -d bluesky \
|
||||
-c "update incremental_export_log set finished='$posts_finished' where started='$posts_started' and to_tsmp='$to_timestamp' and collection = 'app.bsky.feed.post'"
|
||||
|
||||
echo "Starting dids export..."
|
||||
dids_started=$(date -Iseconds --utc)
|
||||
docker compose exec -it postgres psql -U postgres -d bluesky \
|
||||
-c "insert into incremental_export_log (started, to_tsmp, collection) values ('$dids_started', '$to_timestamp', 'did')"
|
||||
docker compose exec -it postgres psql -U postgres -d bluesky \
|
||||
-c "copy (select * from export_dids) to stdout with csv header;" > dids.csv
|
||||
echo "Finishing dids export..."
|
||||
dids_finished=$(date -Iseconds --utc)
|
||||
docker compose exec -it postgres psql -U postgres -d bluesky \
|
||||
-c "update incremental_export_log set finished='$dids_finished' where started='$dids_started' and to_tsmp='$to_timestamp' and collection = 'did'"
|
||||
|
||||
|
||||
# ------------------------------ DO NOT Free up space used by materialized views for incremental refresh ----------------------------------
|
||||
|
||||
# ------------------------------ Dump handles from plc-mirror ----------------------------------
|
||||
|
||||
echo "Starting handles export..."
|
||||
handles_started=$(date -Iseconds --utc)
|
||||
docker compose exec -it postgres psql -U postgres -d bluesky \
|
||||
-c "insert into incremental_export_log (started, to_tsmp, collection) values ('$handles_started', '$to_timestamp', 'handle')"
|
||||
docker exec -t plc-postgres-1 psql -U postgres -d plc \
|
||||
-c 'copy (select handle, did as "did:ID" from actors) to stdout with (format csv , header, force_quote ("handle"));' | sed -E -e 's/([^\\])\\",/\1\\\\",/g' > handles.csv
|
||||
echo "Finishing dids export..."
|
||||
handles_finished=$(date -Iseconds --utc)
|
||||
docker compose exec -it postgres psql -U postgres -d bluesky \
|
||||
-c "update incremental_export_log set finished='$handles_finished' where started='$handles_started' and to_tsmp='$to_timestamp' and collection = 'handle'"
|
||||
|
|
@ -0,0 +1,10 @@
|
|||
-- Create a block materialized view, don't populate right away.
|
||||
|
||||
create materialized view export_blocks
|
||||
as select repos.did as ":START_ID",
|
||||
records.content ->> 'subject' as ":END_ID"
|
||||
from repos join records on repos.id = records.repo
|
||||
where records.collection = 'app.bsky.graph.block'
|
||||
and records.content ->> 'subject' <> repos.did
|
||||
with no data;
|
||||
create index export_block_subject on export_blocks (":END_ID");
|
|
@ -0,0 +1,16 @@
|
|||
CREATE TABLE incremental_export_log (
|
||||
id SERIAL PRIMARY KEY,
|
||||
collection text NOT NULL,
|
||||
to_tsmp TIMESTAMP NOT NULL,
|
||||
started TIMESTAMP,
|
||||
finished TIMESTAMP,
|
||||
);
|
||||
|
||||
CREATE UNIQUE INDEX incremental_export_log_idx on incremental_export_log ("collection", "to_tsmp");
|
||||
|
||||
-- manually insert your latest snapshot here
|
||||
-- insert into incremental_export_log (started, finished, to_tsmp, collection) values ('2024-02-27T05:53:30+00:00', '2024-02-27T07:23:30+00:00', '2024-02-27T05:53:30+00:00', 'app.bsky.graph.follow');
|
||||
-- insert into incremental_export_log (started, finished, to_tsmp, collection) values ('2024-02-27T05:53:30+00:00', '2024-02-27T07:23:30+00:00', '2024-02-27T05:53:30+00:00', 'app.bsky.feed.like');
|
||||
-- insert into incremental_export_log (started, finished, to_tsmp, collection) values ('2024-02-27T05:53:30+00:00', '2024-02-27T07:23:30+00:00', '2024-02-27T05:53:30+00:00', 'app.bsky.feed.post');
|
||||
-- insert into incremental_export_log (started, finished, to_tsmp, collection) values ('2024-02-27T05:53:30+00:00', '2024-02-27T07:23:30+00:00', '2024-02-27T05:53:30+00:00', 'did');
|
||||
-- insert into incremental_export_log (started, finished, to_tsmp, collection) values ('2024-02-27T05:53:30+00:00', '2024-02-27T07:23:30+00:00', '2024-02-27T05:53:30+00:00', 'handle');
|
Loading…
Reference in New Issue