Fix view in migration. Add by lang metric to consumer.

main
mathan 2024-02-22 18:54:29 -08:00
parent 600dac7694
commit db425b1d5f
11 changed files with 101 additions and 33 deletions

View File

@ -39,11 +39,11 @@ logs:
psql: psql:
@docker compose exec -it postgres psql -U postgres -d bluesky @docker compose exec -it postgres psql -U postgres -d bluesky
init-db: init.sql init-db:
@docker compose up -d --build lister @docker compose up -d --build lister
@sleep 10 @sleep 10
@docker compose stop lister @docker compose stop lister
@cat db-migration/init.sql | docker exec -i "$$(docker compose ps --format '{{.Names}}' postgres)" psql -U postgres -d bluesky @cat ./db-migration/init.sql | docker exec -i "$$(docker compose ps --format '{{.Names}}' postgres)" psql -U postgres -d bluesky
# ---------------------------- Database ---------------------------- # ---------------------------- Database ----------------------------

View File

@ -295,6 +295,14 @@ func (c *Consumer) processMessage(ctx context.Context, typ string, r io.Reader,
log.Warn().Msgf("Unexpected key format: %q", k) log.Warn().Msgf("Unexpected key format: %q", k)
continue continue
} }
langs, _, err := repo.GetLang(ctx, v)
if err == nil {
lang := ""
if len(langs) != 0 {
lang = langs[0]
}
postsByLanguageIndexed.WithLabelValues(c.remote.Host, lang).Inc()
}
recs = append(recs, repo.Record{ recs = append(recs, repo.Record{
Repo: models.ID(repoInfo.ID), Repo: models.ID(repoInfo.ID),
Collection: parts[0], Collection: parts[0],

View File

@ -39,6 +39,25 @@ type Config struct {
var config Config var config Config
// todo figure out how to use this shit
// type LangTimestampCollector struct {
// metric *prometheus.Desc
// }
// func (c *LangTimestampCollector) Describe(ch chan<- *prometheus.Desc) {
// ch <- c.metric
// }
// func (c *LangTimestampCollector) Collect(ch chan<- prometheus.Metric) {
// // your logic should be placed here
// t := time.Date(2009, time.November, 10, 23, 0, 0, 12345678, time.UTC)
// s := prometheus.NewMetricWithTimestamp(t, prometheus.MustNewConstMetric(c.metric, prometheus.CounterValue, 123))
// ch <- s
// }
func runMain(ctx context.Context) error { func runMain(ctx context.Context) error {
ctx = setupLogging(ctx) ctx = setupLogging(ctx)
log := zerolog.Ctx(ctx) log := zerolog.Ctx(ctx)
@ -69,6 +88,16 @@ func runMain(ctx context.Context) error {
} }
} }
// collector := &LangTimestampCollector{
// metric: prometheus.NewDesc(
// "indexer_posts_by_language_timestamp_count",
// "Language metric with custom TS",
// nil,
// nil,
// ),
// }
// prometheus.MustRegister(collector)
log.Info().Msgf("Starting HTTP listener on %q...", config.MetricsPort) log.Info().Msgf("Starting HTTP listener on %q...", config.MetricsPort)
http.Handle("/metrics", promhttp.Handler()) http.Handle("/metrics", promhttp.Handler())
srv := &http.Server{Addr: fmt.Sprintf(":%s", config.MetricsPort)} srv := &http.Server{Addr: fmt.Sprintf(":%s", config.MetricsPort)}

View File

@ -19,3 +19,8 @@ var reposDiscovered = promauto.NewCounterVec(prometheus.CounterOpts{
Name: "repo_discovered_counter", Name: "repo_discovered_counter",
Help: "Counter of newly discovered repos", Help: "Counter of newly discovered repos",
}, []string{"remote"}) }, []string{"remote"})
var postsByLanguageIndexed = promauto.NewCounterVec(prometheus.CounterOpts{
Name: "indexer_posts_by_language_count",
Help: "Number of posts by language",
}, []string{"remote", "lang"})

View File

@ -35,11 +35,6 @@ var recordsInserted = promauto.NewCounter(prometheus.CounterOpts{
Help: "Number of records inserted into DB", Help: "Number of records inserted into DB",
}) })
// var postsByLanguageIndexed = promauto.NewCounterVec(prometheus.CounterOpts{
// Name: "indexer_posts_by_language_inserted_count",
// Help: "Number of posts inserted into DB by language",
// }, []string{"lang"})
var workerPoolSize = promauto.NewGauge(prometheus.GaugeOpts{ var workerPoolSize = promauto.NewGauge(prometheus.GaugeOpts{
Name: "indexer_workers_count", Name: "indexer_workers_count",
Help: "Current number of workers running", Help: "Current number of workers running",

View File

@ -221,10 +221,7 @@ retry:
continue continue
} }
v = regexp.MustCompile(`[^\\](\\\\)*(\\u0000)`).ReplaceAll(v, []byte(`$1<0x00>`)) v = regexp.MustCompile(`[^\\](\\\\)*(\\u0000)`).ReplaceAll(v, []byte(`$1<0x00>`))
// lang, err := repo.GetLang(ctx, v)
// if err == nil {
// postsByLanguageIndexed.WithLabelValues(u.String(), lang).Inc()
// }
recs = append(recs, repo.Record{ recs = append(recs, repo.Record{
Repo: models.ID(work.Repo.ID), Repo: models.ID(work.Repo.ID),
Collection: parts[0], Collection: parts[0],

View File

@ -23,8 +23,22 @@ partition of records for values in ('app.bsky.feed.repost');
create table records_profile create table records_profile
partition of records for values in ('app.bsky.actor.profile'); partition of records for values in ('app.bsky.actor.profile');
ALTER TABLE records_like
ADD CHECK (collection in ('app.bsky.feed.like'));
-- SLOW, can run overnight, make sure to run in tmux or eternal terminal ALTER TABLE records_post
ADD CHECK (collection in ('app.bsky.feed.post'));
ALTER TABLE records_follow
ADD CHECK (collection in ('app.bsky.graph.follow'));
ALTER TABLE records_repost
ADD CHECK (collection in ('app.bsky.feed.repost'));
ALTER TABLE records_profile
ADD CHECK (collection in ('app.bsky.actor.profile'));
-- SLOW, can run overnight
with moved_rows as ( with moved_rows as (
delete from records_like r delete from records_like r
where collection <> 'app.bsky.feed.like' where collection <> 'app.bsky.feed.like'
@ -32,9 +46,9 @@ with moved_rows as (
) )
insert into records select * from moved_rows; insert into records select * from moved_rows;
-- ULTRA SLOW, DO NOT RUN on large DB
alter table records attach partition records_like for values in ('app.bsky.feed.like'); alter table records attach partition records_like for values in ('app.bsky.feed.like');
create index idx_like_subject create index idx_like_subject
on records_like on records_like
(split_part(jsonb_extract_path_text(content, 'subject', 'uri'), '/', 3)); (split_part(jsonb_extract_path_text(content, 'subject', 'uri'), '/', 3));

View File

@ -22,7 +22,7 @@ create index post_created_at on records_post (parse_timestamp(jsonb_extract_path
create view posts as create view posts as
select *, jsonb_extract_path(content, 'langs') as langs, select *, jsonb_extract_path(content, 'langs') as langs,
parse_timestamp(jsonb_extract_path_text(content, 'createdAt')) as created_at parse_timestamp(jsonb_extract_path_text(content, 'createdAt')) as content_created_at
from records_post; from records_post;
explain select count(*) from posts where langs ? 'uk' and content_created_at > now() - interval '1 day'; explain select count(*) from posts where langs ? 'uk' and content_created_at > now() - interval '1 day';

View File

@ -7,6 +7,15 @@ partition of records for values in ('app.bsky.graph.listblock');
create table records_listitem create table records_listitem
partition of records for values in ('app.bsky.graph.listitem'); partition of records for values in ('app.bsky.graph.listitem');
ALTER TABLE records_list
ADD CHECK (collection in ('app.bsky.graph.list'));
ALTER TABLE records_listblock
ADD CHECK (collection in ('app.bsky.graph.listblock'));
ALTER TABLE records_listitem
ADD CHECK (collection in ('app.bsky.graph.listitem'));
with moved_rows as ( with moved_rows as (
delete from records_default r delete from records_default r
where collection in ('app.bsky.graph.list', 'app.bsky.graph.listblock', 'app.bsky.graph.listitem') where collection in ('app.bsky.graph.list', 'app.bsky.graph.listblock', 'app.bsky.graph.listitem')

View File

@ -56,6 +56,11 @@ Restart errors
`update repos set failed_attempts=0, last_error='' where failed_attempts >0;` `update repos set failed_attempts=0, last_error='' where failed_attempts >0;`
# MONITORING
More verbose logging for queries DEBUG1-DEBUG5
`set client_min_messages = 'DEBUG5';`
Take a look at slow queries Take a look at slow queries
``` ```
SELECT pid, age(clock_timestamp(), query_start), state, query SELECT pid, age(clock_timestamp(), query_start), state, query
@ -64,6 +69,9 @@ WHERE query != '<IDLE>' AND query NOT ILIKE '%pg_stat_activity%'
ORDER BY query_start asc; ORDER BY query_start asc;
``` ```
Monitor index progress
`select * from pg_stat_progress_create_index;`
Explore new collection types Explore new collection types
``` ```

View File

@ -7,6 +7,7 @@ import (
"errors" "errors"
"fmt" "fmt"
"io" "io"
"time"
"github.com/ipfs/go-cid" "github.com/ipfs/go-cid"
"github.com/ipld/go-car" "github.com/ipld/go-car"
@ -295,24 +296,26 @@ func GetRev(ctx context.Context, b io.Reader) (string, error) {
return s, nil return s, nil
} }
// func GetLang(ctx context.Context, value json.RawMessage) (string, error) { func GetLang(ctx context.Context, value json.RawMessage) ([]string, time.Time, error) {
// var content map[string]interface{} var content struct {
// var lang string Type string `json:"$type"`
// err := json.Unmarshal([]byte(value), &content) Langs []string `json:"langs"`
Time string `json:"createdAt"`
}
err := json.Unmarshal([]byte(value), &content)
// if err != nil { if err != nil {
// return "", fmt.Errorf("failed to extract lang from content") return nil, time.Now(), fmt.Errorf("failed to extract lang from content: %w", err)
// } }
if content.Type != "app.bsky.feed.post" {
return nil, time.Now(), errors.New("not a post")
}
// if content["$type"] != "app.bsky.feed.post" || var timestamp time.Time
// content["langs"] == nil || if t, err := time.Parse(time.RFC3339, content.Time); err != nil {
// content["langs"].([]string) == nil || return nil, time.Now(), fmt.Errorf("failed to extract time from content: %w", err)
// len(content["langs"].([]string)) == 0 { } else {
// return "", errors.New("not a post") timestamp = t
// } }
return content.Langs, timestamp, nil
// //todo: do something a bit less dumb than that }
// lang = content["langs"].([]string)[0]
// return lang, nil
// }