This commit is contained in:
Max Ignatenko 2024-02-15 16:10:39 +00:00
parent 2b6abac607
commit 63a767d890
25 changed files with 3027 additions and 0 deletions

296
repo/mst.go Normal file
View file

@ -0,0 +1,296 @@
package repo
import (
"bytes"
"context"
"encoding/json"
"errors"
"fmt"
"io"
"github.com/ipfs/go-cid"
"github.com/ipld/go-car"
"github.com/ipld/go-ipld-prime/codec/dagcbor"
"github.com/ipld/go-ipld-prime/codec/dagjson"
"github.com/ipld/go-ipld-prime/datamodel"
"github.com/ipld/go-ipld-prime/node/basicnode"
)
func ExtractRecords(ctx context.Context, b io.Reader) (map[string]json.RawMessage, error) {
r, err := car.NewCarReader(b)
if err != nil {
return nil, fmt.Errorf("failed to construct CAR reader: %w", err)
}
blocks := map[cid.Cid][]byte{}
for {
block, err := r.Next()
if errors.Is(err, io.EOF) {
break
}
if err != nil {
return nil, fmt.Errorf("reading next block: %w", err)
}
c, err := block.Cid().Prefix().Sum(block.RawData())
if err != nil {
return nil, fmt.Errorf("failed to calculate CID from content")
}
if c.Equals(block.Cid()) {
blocks[block.Cid()] = block.RawData()
}
}
records := map[string]cid.Cid{}
for _, root := range r.Header.Roots {
// TODO: verify that a root is a commit record and validate signature
cids, err := findRecords(blocks, root, nil, nil, 0)
if err != nil {
return nil, err
}
for k, v := range cids {
records[k] = v
}
}
res := map[string]json.RawMessage{}
for k, c := range records {
builder := basicnode.Prototype.Any.NewBuilder()
if err := (&dagcbor.DecodeOptions{AllowLinks: true}).Decode(builder, bytes.NewReader(blocks[c])); err != nil {
return nil, fmt.Errorf("unmarshaling %q: %w", c.String(), err)
}
w := bytes.NewBuffer(nil)
if err := (dagjson.EncodeOptions{EncodeLinks: true, EncodeBytes: true}).Encode(builder.Build(), w); err != nil {
return nil, fmt.Errorf("marshaling %q as JSON: %w", c.String(), err)
}
res[k] = w.Bytes()
}
return res, nil
}
const maxDepth = 128
func findRecords(blocks map[cid.Cid][]byte, root cid.Cid, key []byte, visited map[cid.Cid]bool, depth int) (map[string]cid.Cid, error) {
if depth > maxDepth {
return nil, fmt.Errorf("reached maximum depth at %q", root.String())
}
if visited == nil {
visited = map[cid.Cid]bool{}
}
visited[root] = true
builder := basicnode.Prototype.Any.NewBuilder()
if err := (&dagcbor.DecodeOptions{AllowLinks: true}).Decode(builder, bytes.NewReader(blocks[root])); err != nil {
return nil, fmt.Errorf("unmarshaling %q: %w", root.String(), err)
}
node := builder.Build()
if node.Kind() != datamodel.Kind_Map {
return nil, nil
}
m, err := parseMap(node)
if err != nil {
return nil, err
}
if _, ok := m["$type"]; ok {
return map[string]cid.Cid{string(key): root}, nil
}
if d, ok := m["data"]; ok {
// Commit record
if d.Kind() == datamodel.Kind_Link {
l, _ := d.AsLink()
if l != nil {
c, err := cid.Parse([]byte(l.Binary()))
if err != nil {
return nil, fmt.Errorf("failed to parse %q as CID: %w", l.String(), err)
}
if _, ok := blocks[c]; ok && !visited[c] {
return findRecords(blocks, c, nil, visited, depth+1)
}
}
}
return nil, nil
}
if entries, ok := m["e"]; ok {
// MST node
r := map[string]cid.Cid{}
iter := entries.ListIterator()
key = []byte{}
for !iter.Done() {
_, item, err := iter.Next()
if err != nil {
return nil, fmt.Errorf("failed to read the next list item in block %q: %w", root.String(), err)
}
if item.Kind() != datamodel.Kind_Map {
continue
}
m, err := parseMap(item)
if err != nil {
return nil, err
}
for _, field := range []string{"k", "p", "v", "t"} {
if _, ok := m[field]; !ok {
return nil, fmt.Errorf("TreeEntry is missing field %q", field)
}
}
prefixLen, err := m["p"].AsInt()
if err != nil {
return nil, fmt.Errorf("m[\"p\"].AsInt(): %w", err)
}
prefixPart, err := m["k"].AsBytes()
if err != nil {
return nil, fmt.Errorf("m[\"k\"].AsBytes(): %w", err)
}
val, err := m["v"].AsLink()
if err != nil {
return nil, fmt.Errorf("m[\"v\"].AsLink(): %w", err)
}
c, err := cid.Parse([]byte(val.Binary()))
if err != nil {
return nil, fmt.Errorf("failed to parse %q as CID: %w", val.String(), err)
}
if len(key) == 0 {
// First entry, must have a full key.
if prefixLen != 0 {
return nil, fmt.Errorf("incomplete key in the first entry")
}
key = prefixPart
}
if prefixLen > int64(len(key)) {
return nil, fmt.Errorf("specified prefix length is larger than the key length: %d > %d", prefixLen, len(key))
}
key = append(key[:prefixLen], prefixPart...)
if _, ok := blocks[c]; ok && !visited[c] {
results, err := findRecords(blocks, c, key, visited, depth+1)
if err != nil {
return nil, err
}
for k, v := range results {
r[k] = v
}
}
if m["t"] != nil && m["t"].Kind() == datamodel.Kind_Link {
subtree, err := m["t"].AsLink()
if err != nil {
return nil, fmt.Errorf("m[\"t\"].AsLink(): %w", err)
}
subtreeCid, err := cid.Parse([]byte(subtree.Binary()))
if err != nil {
return nil, fmt.Errorf("failed to parse %q as CID: %w", val.String(), err)
}
if _, ok := blocks[subtreeCid]; ok && !visited[subtreeCid] {
results, err := findRecords(blocks, subtreeCid, key, visited, depth+1)
if err != nil {
return nil, err
}
for k, v := range results {
r[k] = v
}
}
}
}
left, ok := m["l"]
if ok && left.Kind() == datamodel.Kind_Link {
l, _ := left.AsLink()
if l != nil {
c, err := cid.Parse([]byte(l.Binary()))
if err != nil {
return nil, fmt.Errorf("failed to parse %q as CID: %w", l.String(), err)
}
if _, ok := blocks[c]; ok && !visited[c] {
results, err := findRecords(blocks, c, nil, visited, depth+1)
if err != nil {
return nil, err
}
for k, v := range results {
r[k] = v
}
}
}
}
return r, nil
}
return nil, fmt.Errorf("unrecognized block %q", root.String())
}
func parseMap(node datamodel.Node) (map[string]datamodel.Node, error) {
if node.Kind() != datamodel.Kind_Map {
return nil, fmt.Errorf("not a map")
}
m := map[string]datamodel.Node{}
iter := node.MapIterator()
for !iter.Done() {
k, v, err := iter.Next()
if err != nil {
return nil, fmt.Errorf("iterating over map fields: %w", err)
}
if k.Kind() != datamodel.Kind_String {
continue
}
ks, _ := k.AsString()
m[ks] = v
}
return m, nil
}
func GetRev(ctx context.Context, b io.Reader) (string, error) {
r, err := car.NewCarReader(b)
if err != nil {
return "", fmt.Errorf("failed to construct CAR reader: %w", err)
}
if len(r.Header.Roots) == 0 {
return "", fmt.Errorf("no roots specified in CAR header")
}
blocks := map[cid.Cid][]byte{}
for {
block, err := r.Next()
if errors.Is(err, io.EOF) {
break
}
if err != nil {
return "", fmt.Errorf("reading next block: %w", err)
}
c, err := block.Cid().Prefix().Sum(block.RawData())
if err != nil {
return "", fmt.Errorf("failed to calculate CID from content")
}
if c.Equals(block.Cid()) {
blocks[block.Cid()] = block.RawData()
}
}
builder := basicnode.Prototype.Any.NewBuilder()
if err := (&dagcbor.DecodeOptions{AllowLinks: true}).Decode(builder, bytes.NewReader(blocks[r.Header.Roots[0]])); err != nil {
return "", fmt.Errorf("unmarshaling %q: %w", r.Header.Roots[0].String(), err)
}
node := builder.Build()
v, err := node.LookupByString("rev")
if err != nil {
return "", fmt.Errorf("looking up 'rev' field: %w", err)
}
s, err := v.AsString()
if err != nil {
return "", fmt.Errorf("rev.AsString(): %w", err)
}
return s, nil
}

103
repo/repo.go Normal file
View file

@ -0,0 +1,103 @@
package repo
import (
"context"
"encoding/json"
"errors"
"fmt"
"net/url"
"time"
"gorm.io/gorm"
"github.com/uabluerail/indexer/models"
"github.com/uabluerail/indexer/pds"
"github.com/uabluerail/indexer/util/resolver"
)
type Repo struct {
gorm.Model
PDS models.ID `gorm:"index:rev_state_index,priority:2"`
DID string `gorm:"uniqueIndex;column:did"`
LastIndexedRev string `gorm:"index:rev_state_index,expression:(last_indexed_rev < first_rev_since_reset),priority:1"`
FirstRevSinceReset string
FirstCursorSinceReset int64
TombstonedAt time.Time
LastIndexAttempt time.Time
LastError string
}
type Record struct {
gorm.Model
Repo models.ID `gorm:"index:idx_repo_record_key,unique,priority:1;not null"`
Collection string `gorm:"index:idx_repo_record_key,unique,priority:2;not null"`
Rkey string `gorm:"index:idx_repo_record_key,unique,priority:3"`
Content json.RawMessage `gorm:"type:JSONB"`
Deleted bool
}
func AutoMigrate(db *gorm.DB) error {
return db.AutoMigrate(&Repo{}, &Record{})
}
func EnsureExists(ctx context.Context, db *gorm.DB, did string) (*Repo, error) {
r := Repo{}
if err := db.Model(&r).Where(&Repo{DID: did}).Take(&r).Error; err == nil {
// Already have a row, just return it.
return &r, nil
} else {
if !errors.Is(err, gorm.ErrRecordNotFound) {
return nil, fmt.Errorf("querying DB: %w", err)
}
}
// No row yet, so we need to create one (keeping in mind that it can be created
// concurrently by someone else).
// 1) resolve did (i.e., query PLC)
// 2) get PDS address from didDoc and ensure we have a record for it
// 3) in a transaction, check if we have a record for the repo
// if we don't - just create a record
// if we do - compare PDS IDs
// if they don't match - also reset FirstRevSinceReset
doc, err := resolver.GetDocument(ctx, did)
if err != nil {
return nil, fmt.Errorf("fetching DID Document: %w", err)
}
pdsHost := ""
for _, srv := range doc.Service {
if srv.Type != "AtprotoPersonalDataServer" {
continue
}
pdsHost = srv.ServiceEndpoint
}
if pdsHost == "" {
return nil, fmt.Errorf("did not find any PDS in DID Document")
}
u, err := url.Parse(pdsHost)
if err != nil {
return nil, fmt.Errorf("PDS endpoint (%q) is an invalid URL: %w", pdsHost, err)
}
if u.Host == "" {
return nil, fmt.Errorf("PDS endpoint (%q) doesn't have a host part", pdsHost)
}
remote := pds.PDS{Host: u.String()}
if err := db.Model(&remote).Where(&pds.PDS{Host: remote.Host}).FirstOrCreate(&remote).Error; err != nil {
return nil, fmt.Errorf("failed to get PDS record from DB for %q: %w", remote.Host, err)
}
r = Repo{DID: did, PDS: models.ID(remote.ID)}
err = db.Transaction(func(tx *gorm.DB) error {
if err := tx.Model(&r).Where(&Repo{DID: r.DID}).FirstOrCreate(&r).Error; err != nil {
return fmt.Errorf("looking for repo: %w", err)
}
if r.PDS != models.ID(remote.ID) {
return tx.Model(&r).Select("FirstRevSinceReset").Updates(&Repo{FirstRevSinceReset: ""}).Error
}
return nil
})
if err != nil {
return nil, fmt.Errorf("upserting repo record: %w", err)
}
return &r, nil
}