Merge branch 'main' of github.com:uabluerail/indexer
This commit is contained in:
commit
78a17bf238
11 changed files with 154 additions and 46 deletions
26
cmd/record-indexer/postgres_json_test.go
Normal file
26
cmd/record-indexer/postgres_json_test.go
Normal file
|
@ -0,0 +1,26 @@
|
|||
package main
|
||||
|
||||
import (
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestPostgresFix(t *testing.T) {
|
||||
type testCase struct{ input, want string }
|
||||
|
||||
cases := []testCase{
|
||||
{`"a"`, `"a"`},
|
||||
{`"\u0000"`, `"<0x00>"`},
|
||||
{`"description":"\u0000"`, `"description":"<0x00>"`},
|
||||
{`"\\u0000"`, `"\\u0000"`},
|
||||
{`"\\\u0000"`, `"\\<0x00>"`},
|
||||
{`\n\n\u0000\u0000 \u0000\u0000\u0000\u0000 \u0000\u0000\u0000\u0000\u0000`,
|
||||
`\n\n<0x00><0x00> <0x00><0x00><0x00><0x00> <0x00><0x00><0x00><0x00><0x00>`},
|
||||
}
|
||||
|
||||
for _, tc := range cases {
|
||||
got := escapeNullCharForPostgres([]byte(tc.input))
|
||||
if string(got) != tc.want {
|
||||
t.Errorf("escapeNullCharForPostgres(%s) = %s, want %s", tc.input, string(got), tc.want)
|
||||
}
|
||||
}
|
||||
}
|
|
@ -3,6 +3,7 @@ package main
|
|||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"slices"
|
||||
"time"
|
||||
|
||||
"github.com/rs/zerolog"
|
||||
|
@ -101,7 +102,11 @@ func (s *Scheduler) fillQueue(ctx context.Context) error {
|
|||
if err := s.db.Find(&remotes).Error; err != nil {
|
||||
return fmt.Errorf("failed to get the list of PDSs: %w", err)
|
||||
}
|
||||
perPDSLimit := 0
|
||||
|
||||
remotes = slices.DeleteFunc(remotes, func(pds pds.PDS) bool {
|
||||
return pds.Disabled
|
||||
})
|
||||
perPDSLimit := maxQueueLen
|
||||
if len(remotes) > 0 {
|
||||
perPDSLimit = maxQueueLen * 2 / len(remotes)
|
||||
}
|
||||
|
|
|
@ -4,7 +4,6 @@ import (
|
|||
"bytes"
|
||||
"context"
|
||||
"fmt"
|
||||
"net/url"
|
||||
"regexp"
|
||||
"strings"
|
||||
"time"
|
||||
|
@ -133,37 +132,32 @@ func (p *WorkerPool) worker(ctx context.Context, signal chan struct{}) {
|
|||
}
|
||||
}
|
||||
|
||||
var postgresFixRegexp = regexp.MustCompile(`[^\\](\\\\)*(\\u0000)`)
|
||||
var postgresFixRegexp = regexp.MustCompile(`([^\\](\\\\)*)(\\u0000)+`)
|
||||
|
||||
func escapeNullCharForPostgres(b []byte) []byte {
|
||||
return postgresFixRegexp.ReplaceAll(b, []byte(`$1<0x00>`))
|
||||
return postgresFixRegexp.ReplaceAllFunc(b, func(b []byte) []byte {
|
||||
return bytes.ReplaceAll(b, []byte(`\u0000`), []byte(`<0x00>`))
|
||||
})
|
||||
}
|
||||
|
||||
func (p *WorkerPool) doWork(ctx context.Context, work WorkItem) error {
|
||||
log := zerolog.Ctx(ctx)
|
||||
defer close(work.signal)
|
||||
|
||||
doc, err := resolver.GetDocument(ctx, work.Repo.DID)
|
||||
u, err := resolver.GetPDSEndpoint(ctx, work.Repo.DID)
|
||||
if err != nil {
|
||||
return fmt.Errorf("resolving did %q: %w", work.Repo.DID, err)
|
||||
return err
|
||||
}
|
||||
|
||||
pdsHost := ""
|
||||
for _, srv := range doc.Service {
|
||||
if srv.Type != "AtprotoPersonalDataServer" {
|
||||
continue
|
||||
}
|
||||
pdsHost = srv.ServiceEndpoint
|
||||
}
|
||||
if pdsHost == "" {
|
||||
return fmt.Errorf("did not find any PDS in DID Document")
|
||||
}
|
||||
u, err := url.Parse(pdsHost)
|
||||
remote, err := pds.EnsureExists(ctx, p.db, u.String())
|
||||
if err != nil {
|
||||
return fmt.Errorf("PDS endpoint (%q) is an invalid URL: %w", pdsHost, err)
|
||||
return fmt.Errorf("failed to get PDS records for %q: %w", u, err)
|
||||
}
|
||||
if u.Host == "" {
|
||||
return fmt.Errorf("PDS endpoint (%q) doesn't have a host part", pdsHost)
|
||||
if work.Repo.PDS != remote.ID {
|
||||
if err := p.db.Model(&work.Repo).Where(&repo.Repo{ID: work.Repo.ID}).Updates(&repo.Repo{PDS: remote.ID}).Error; err != nil {
|
||||
return fmt.Errorf("failed to update repo's PDS to %q: %w", u, err)
|
||||
}
|
||||
work.Repo.PDS = remote.ID
|
||||
}
|
||||
|
||||
client := xrpcauth.NewAnonymousClient(ctx)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue