Import
This commit is contained in:
parent
2b6abac607
commit
63a767d890
25 changed files with 3027 additions and 0 deletions
296
repo/mst.go
Normal file
296
repo/mst.go
Normal file
|
@ -0,0 +1,296 @@
|
|||
package repo
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
|
||||
"github.com/ipfs/go-cid"
|
||||
"github.com/ipld/go-car"
|
||||
"github.com/ipld/go-ipld-prime/codec/dagcbor"
|
||||
"github.com/ipld/go-ipld-prime/codec/dagjson"
|
||||
"github.com/ipld/go-ipld-prime/datamodel"
|
||||
"github.com/ipld/go-ipld-prime/node/basicnode"
|
||||
)
|
||||
|
||||
func ExtractRecords(ctx context.Context, b io.Reader) (map[string]json.RawMessage, error) {
|
||||
r, err := car.NewCarReader(b)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to construct CAR reader: %w", err)
|
||||
}
|
||||
|
||||
blocks := map[cid.Cid][]byte{}
|
||||
for {
|
||||
block, err := r.Next()
|
||||
if errors.Is(err, io.EOF) {
|
||||
break
|
||||
}
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("reading next block: %w", err)
|
||||
}
|
||||
c, err := block.Cid().Prefix().Sum(block.RawData())
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to calculate CID from content")
|
||||
}
|
||||
if c.Equals(block.Cid()) {
|
||||
blocks[block.Cid()] = block.RawData()
|
||||
}
|
||||
}
|
||||
|
||||
records := map[string]cid.Cid{}
|
||||
for _, root := range r.Header.Roots {
|
||||
// TODO: verify that a root is a commit record and validate signature
|
||||
|
||||
cids, err := findRecords(blocks, root, nil, nil, 0)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
for k, v := range cids {
|
||||
records[k] = v
|
||||
}
|
||||
}
|
||||
|
||||
res := map[string]json.RawMessage{}
|
||||
for k, c := range records {
|
||||
builder := basicnode.Prototype.Any.NewBuilder()
|
||||
if err := (&dagcbor.DecodeOptions{AllowLinks: true}).Decode(builder, bytes.NewReader(blocks[c])); err != nil {
|
||||
return nil, fmt.Errorf("unmarshaling %q: %w", c.String(), err)
|
||||
}
|
||||
w := bytes.NewBuffer(nil)
|
||||
if err := (dagjson.EncodeOptions{EncodeLinks: true, EncodeBytes: true}).Encode(builder.Build(), w); err != nil {
|
||||
return nil, fmt.Errorf("marshaling %q as JSON: %w", c.String(), err)
|
||||
}
|
||||
res[k] = w.Bytes()
|
||||
}
|
||||
return res, nil
|
||||
}
|
||||
|
||||
const maxDepth = 128
|
||||
|
||||
func findRecords(blocks map[cid.Cid][]byte, root cid.Cid, key []byte, visited map[cid.Cid]bool, depth int) (map[string]cid.Cid, error) {
|
||||
if depth > maxDepth {
|
||||
return nil, fmt.Errorf("reached maximum depth at %q", root.String())
|
||||
}
|
||||
|
||||
if visited == nil {
|
||||
visited = map[cid.Cid]bool{}
|
||||
}
|
||||
|
||||
visited[root] = true
|
||||
|
||||
builder := basicnode.Prototype.Any.NewBuilder()
|
||||
if err := (&dagcbor.DecodeOptions{AllowLinks: true}).Decode(builder, bytes.NewReader(blocks[root])); err != nil {
|
||||
return nil, fmt.Errorf("unmarshaling %q: %w", root.String(), err)
|
||||
}
|
||||
node := builder.Build()
|
||||
|
||||
if node.Kind() != datamodel.Kind_Map {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
m, err := parseMap(node)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
if _, ok := m["$type"]; ok {
|
||||
return map[string]cid.Cid{string(key): root}, nil
|
||||
}
|
||||
|
||||
if d, ok := m["data"]; ok {
|
||||
// Commit record
|
||||
if d.Kind() == datamodel.Kind_Link {
|
||||
l, _ := d.AsLink()
|
||||
if l != nil {
|
||||
c, err := cid.Parse([]byte(l.Binary()))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse %q as CID: %w", l.String(), err)
|
||||
}
|
||||
if _, ok := blocks[c]; ok && !visited[c] {
|
||||
return findRecords(blocks, c, nil, visited, depth+1)
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
if entries, ok := m["e"]; ok {
|
||||
// MST node
|
||||
r := map[string]cid.Cid{}
|
||||
iter := entries.ListIterator()
|
||||
key = []byte{}
|
||||
for !iter.Done() {
|
||||
_, item, err := iter.Next()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read the next list item in block %q: %w", root.String(), err)
|
||||
}
|
||||
if item.Kind() != datamodel.Kind_Map {
|
||||
continue
|
||||
}
|
||||
|
||||
m, err := parseMap(item)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for _, field := range []string{"k", "p", "v", "t"} {
|
||||
if _, ok := m[field]; !ok {
|
||||
return nil, fmt.Errorf("TreeEntry is missing field %q", field)
|
||||
}
|
||||
}
|
||||
prefixLen, err := m["p"].AsInt()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("m[\"p\"].AsInt(): %w", err)
|
||||
}
|
||||
prefixPart, err := m["k"].AsBytes()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("m[\"k\"].AsBytes(): %w", err)
|
||||
}
|
||||
val, err := m["v"].AsLink()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("m[\"v\"].AsLink(): %w", err)
|
||||
}
|
||||
c, err := cid.Parse([]byte(val.Binary()))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse %q as CID: %w", val.String(), err)
|
||||
}
|
||||
|
||||
if len(key) == 0 {
|
||||
// First entry, must have a full key.
|
||||
if prefixLen != 0 {
|
||||
return nil, fmt.Errorf("incomplete key in the first entry")
|
||||
}
|
||||
key = prefixPart
|
||||
}
|
||||
|
||||
if prefixLen > int64(len(key)) {
|
||||
return nil, fmt.Errorf("specified prefix length is larger than the key length: %d > %d", prefixLen, len(key))
|
||||
}
|
||||
key = append(key[:prefixLen], prefixPart...)
|
||||
|
||||
if _, ok := blocks[c]; ok && !visited[c] {
|
||||
results, err := findRecords(blocks, c, key, visited, depth+1)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
for k, v := range results {
|
||||
r[k] = v
|
||||
}
|
||||
}
|
||||
|
||||
if m["t"] != nil && m["t"].Kind() == datamodel.Kind_Link {
|
||||
subtree, err := m["t"].AsLink()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("m[\"t\"].AsLink(): %w", err)
|
||||
}
|
||||
subtreeCid, err := cid.Parse([]byte(subtree.Binary()))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse %q as CID: %w", val.String(), err)
|
||||
}
|
||||
if _, ok := blocks[subtreeCid]; ok && !visited[subtreeCid] {
|
||||
results, err := findRecords(blocks, subtreeCid, key, visited, depth+1)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
for k, v := range results {
|
||||
r[k] = v
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
left, ok := m["l"]
|
||||
if ok && left.Kind() == datamodel.Kind_Link {
|
||||
l, _ := left.AsLink()
|
||||
if l != nil {
|
||||
c, err := cid.Parse([]byte(l.Binary()))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse %q as CID: %w", l.String(), err)
|
||||
}
|
||||
if _, ok := blocks[c]; ok && !visited[c] {
|
||||
results, err := findRecords(blocks, c, nil, visited, depth+1)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
for k, v := range results {
|
||||
r[k] = v
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return r, nil
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("unrecognized block %q", root.String())
|
||||
}
|
||||
|
||||
func parseMap(node datamodel.Node) (map[string]datamodel.Node, error) {
|
||||
if node.Kind() != datamodel.Kind_Map {
|
||||
return nil, fmt.Errorf("not a map")
|
||||
}
|
||||
|
||||
m := map[string]datamodel.Node{}
|
||||
iter := node.MapIterator()
|
||||
for !iter.Done() {
|
||||
k, v, err := iter.Next()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("iterating over map fields: %w", err)
|
||||
}
|
||||
if k.Kind() != datamodel.Kind_String {
|
||||
continue
|
||||
}
|
||||
ks, _ := k.AsString()
|
||||
m[ks] = v
|
||||
}
|
||||
return m, nil
|
||||
}
|
||||
|
||||
func GetRev(ctx context.Context, b io.Reader) (string, error) {
|
||||
r, err := car.NewCarReader(b)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to construct CAR reader: %w", err)
|
||||
}
|
||||
|
||||
if len(r.Header.Roots) == 0 {
|
||||
return "", fmt.Errorf("no roots specified in CAR header")
|
||||
}
|
||||
|
||||
blocks := map[cid.Cid][]byte{}
|
||||
for {
|
||||
block, err := r.Next()
|
||||
if errors.Is(err, io.EOF) {
|
||||
break
|
||||
}
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("reading next block: %w", err)
|
||||
}
|
||||
c, err := block.Cid().Prefix().Sum(block.RawData())
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("failed to calculate CID from content")
|
||||
}
|
||||
if c.Equals(block.Cid()) {
|
||||
blocks[block.Cid()] = block.RawData()
|
||||
}
|
||||
}
|
||||
|
||||
builder := basicnode.Prototype.Any.NewBuilder()
|
||||
if err := (&dagcbor.DecodeOptions{AllowLinks: true}).Decode(builder, bytes.NewReader(blocks[r.Header.Roots[0]])); err != nil {
|
||||
return "", fmt.Errorf("unmarshaling %q: %w", r.Header.Roots[0].String(), err)
|
||||
}
|
||||
node := builder.Build()
|
||||
|
||||
v, err := node.LookupByString("rev")
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("looking up 'rev' field: %w", err)
|
||||
}
|
||||
|
||||
s, err := v.AsString()
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("rev.AsString(): %w", err)
|
||||
}
|
||||
return s, nil
|
||||
}
|
103
repo/repo.go
Normal file
103
repo/repo.go
Normal file
|
@ -0,0 +1,103 @@
|
|||
package repo
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"net/url"
|
||||
"time"
|
||||
|
||||
"gorm.io/gorm"
|
||||
|
||||
"github.com/uabluerail/indexer/models"
|
||||
"github.com/uabluerail/indexer/pds"
|
||||
"github.com/uabluerail/indexer/util/resolver"
|
||||
)
|
||||
|
||||
type Repo struct {
|
||||
gorm.Model
|
||||
PDS models.ID `gorm:"index:rev_state_index,priority:2"`
|
||||
DID string `gorm:"uniqueIndex;column:did"`
|
||||
LastIndexedRev string `gorm:"index:rev_state_index,expression:(last_indexed_rev < first_rev_since_reset),priority:1"`
|
||||
FirstRevSinceReset string
|
||||
FirstCursorSinceReset int64
|
||||
TombstonedAt time.Time
|
||||
LastIndexAttempt time.Time
|
||||
LastError string
|
||||
}
|
||||
|
||||
type Record struct {
|
||||
gorm.Model
|
||||
Repo models.ID `gorm:"index:idx_repo_record_key,unique,priority:1;not null"`
|
||||
Collection string `gorm:"index:idx_repo_record_key,unique,priority:2;not null"`
|
||||
Rkey string `gorm:"index:idx_repo_record_key,unique,priority:3"`
|
||||
Content json.RawMessage `gorm:"type:JSONB"`
|
||||
Deleted bool
|
||||
}
|
||||
|
||||
func AutoMigrate(db *gorm.DB) error {
|
||||
return db.AutoMigrate(&Repo{}, &Record{})
|
||||
}
|
||||
|
||||
func EnsureExists(ctx context.Context, db *gorm.DB, did string) (*Repo, error) {
|
||||
r := Repo{}
|
||||
if err := db.Model(&r).Where(&Repo{DID: did}).Take(&r).Error; err == nil {
|
||||
// Already have a row, just return it.
|
||||
return &r, nil
|
||||
} else {
|
||||
if !errors.Is(err, gorm.ErrRecordNotFound) {
|
||||
return nil, fmt.Errorf("querying DB: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
// No row yet, so we need to create one (keeping in mind that it can be created
|
||||
// concurrently by someone else).
|
||||
// 1) resolve did (i.e., query PLC)
|
||||
// 2) get PDS address from didDoc and ensure we have a record for it
|
||||
// 3) in a transaction, check if we have a record for the repo
|
||||
// if we don't - just create a record
|
||||
// if we do - compare PDS IDs
|
||||
// if they don't match - also reset FirstRevSinceReset
|
||||
|
||||
doc, err := resolver.GetDocument(ctx, did)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("fetching DID Document: %w", err)
|
||||
}
|
||||
|
||||
pdsHost := ""
|
||||
for _, srv := range doc.Service {
|
||||
if srv.Type != "AtprotoPersonalDataServer" {
|
||||
continue
|
||||
}
|
||||
pdsHost = srv.ServiceEndpoint
|
||||
}
|
||||
if pdsHost == "" {
|
||||
return nil, fmt.Errorf("did not find any PDS in DID Document")
|
||||
}
|
||||
u, err := url.Parse(pdsHost)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("PDS endpoint (%q) is an invalid URL: %w", pdsHost, err)
|
||||
}
|
||||
if u.Host == "" {
|
||||
return nil, fmt.Errorf("PDS endpoint (%q) doesn't have a host part", pdsHost)
|
||||
}
|
||||
remote := pds.PDS{Host: u.String()}
|
||||
if err := db.Model(&remote).Where(&pds.PDS{Host: remote.Host}).FirstOrCreate(&remote).Error; err != nil {
|
||||
return nil, fmt.Errorf("failed to get PDS record from DB for %q: %w", remote.Host, err)
|
||||
}
|
||||
r = Repo{DID: did, PDS: models.ID(remote.ID)}
|
||||
err = db.Transaction(func(tx *gorm.DB) error {
|
||||
if err := tx.Model(&r).Where(&Repo{DID: r.DID}).FirstOrCreate(&r).Error; err != nil {
|
||||
return fmt.Errorf("looking for repo: %w", err)
|
||||
}
|
||||
if r.PDS != models.ID(remote.ID) {
|
||||
return tx.Model(&r).Select("FirstRevSinceReset").Updates(&Repo{FirstRevSinceReset: ""}).Error
|
||||
}
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("upserting repo record: %w", err)
|
||||
}
|
||||
return &r, nil
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue