Add plc mirror

If you want to avoid sending all requests to https://plc.directory while
it's catching up for the first time, do
`docker compose up -d --build plc` and wait for it to catch up before
updating all other containers.
This commit is contained in:
Max Ignatenko 2024-04-13 16:45:02 +01:00
parent c17730c11f
commit d6b5850827
8 changed files with 562 additions and 60 deletions

14
cmd/plc-mirror/Dockerfile Normal file
View file

@ -0,0 +1,14 @@
FROM golang:1.21.1 as builder
WORKDIR /app
COPY go.mod go.sum ./
RUN go mod download
COPY . ./
RUN go build -trimpath ./cmd/plc-mirror
FROM alpine:latest as certs
RUN apk --update add ca-certificates
FROM debian:stable-slim
COPY --from=certs /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/ca-certificates.crt
COPY --from=builder /app/plc-mirror .
ENTRYPOINT ["./plc-mirror"]

180
cmd/plc-mirror/main.go Normal file
View file

@ -0,0 +1,180 @@
package main
import (
"context"
"flag"
"fmt"
"io"
"log"
"net/http"
_ "net/http/pprof"
"os"
"os/signal"
"path/filepath"
"runtime"
"runtime/debug"
"strings"
"syscall"
"time"
_ "github.com/joho/godotenv/autoload"
"github.com/kelseyhightower/envconfig"
"github.com/prometheus/client_golang/prometheus/promhttp"
"github.com/rs/zerolog"
"gorm.io/driver/postgres"
"gorm.io/gorm"
"gorm.io/gorm/logger"
"github.com/uabluerail/indexer/util/gormzerolog"
)
type Config struct {
LogFile string
LogFormat string `default:"text"`
LogLevel int64 `default:"1"`
MetricsPort string `split_words:"true"`
DBUrl string `envconfig:"POSTGRES_URL"`
Upstream string `default:"https://plc.directory"`
}
var config Config
func runMain(ctx context.Context) error {
ctx = setupLogging(ctx)
log := zerolog.Ctx(ctx)
log.Debug().Msgf("Starting up...")
db, err := gorm.Open(postgres.Open(config.DBUrl), &gorm.Config{
Logger: gormzerolog.New(&logger.Config{
SlowThreshold: 1 * time.Second,
IgnoreRecordNotFoundError: true,
}, nil),
})
if err != nil {
return fmt.Errorf("connecting to the database: %w", err)
}
log.Debug().Msgf("DB connection established")
if err := db.AutoMigrate(&PLCLogEntry{}); err != nil {
return fmt.Errorf("auto-migrating DB schema: %w", err)
}
log.Debug().Msgf("DB schema updated")
mirror, err := NewMirror(ctx, config.Upstream, db)
if err != nil {
return fmt.Errorf("failed to create mirroring worker: %w", err)
}
if err := mirror.Start(ctx); err != nil {
return fmt.Errorf("failed to start mirroring worker: %w", err)
}
server, err := NewServer(ctx, db, mirror)
if err != nil {
return fmt.Errorf("failed to create server: %w", err)
}
http.Handle("/", server)
log.Info().Msgf("Starting HTTP listener on %q...", config.MetricsPort)
http.Handle("/metrics", promhttp.Handler())
srv := &http.Server{Addr: fmt.Sprintf(":%s", config.MetricsPort)}
errCh := make(chan error)
go func() {
errCh <- srv.ListenAndServe()
}()
select {
case <-ctx.Done():
if err := srv.Shutdown(context.Background()); err != nil {
return fmt.Errorf("HTTP server shutdown failed: %w", err)
}
}
return <-errCh
}
func main() {
flag.StringVar(&config.LogFile, "log", "", "Path to the log file. If empty, will log to stderr")
flag.StringVar(&config.LogFormat, "log-format", "text", "Logging format. 'text' or 'json'")
flag.Int64Var(&config.LogLevel, "log-level", 1, "Log level. -1 - trace, 0 - debug, 1 - info, 5 - panic")
if err := envconfig.Process("plc", &config); err != nil {
log.Fatalf("envconfig.Process: %s", err)
}
flag.Parse()
ctx, _ := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM)
if err := runMain(ctx); err != nil {
fmt.Fprintln(os.Stderr, err)
os.Exit(1)
}
}
func setupLogging(ctx context.Context) context.Context {
logFile := os.Stderr
if config.LogFile != "" {
f, err := os.OpenFile(config.LogFile, os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
if err != nil {
log.Fatalf("Failed to open the specified log file %q: %s", config.LogFile, err)
}
logFile = f
}
var output io.Writer
switch config.LogFormat {
case "json":
output = logFile
case "text":
prefixList := []string{}
info, ok := debug.ReadBuildInfo()
if ok {
prefixList = append(prefixList, info.Path+"/")
}
basedir := ""
_, sourceFile, _, ok := runtime.Caller(0)
if ok {
basedir = filepath.Dir(sourceFile)
}
if basedir != "" && strings.HasPrefix(basedir, "/") {
prefixList = append(prefixList, basedir+"/")
head, _ := filepath.Split(basedir)
for head != "/" {
prefixList = append(prefixList, head)
head, _ = filepath.Split(strings.TrimSuffix(head, "/"))
}
}
output = zerolog.ConsoleWriter{
Out: logFile,
NoColor: true,
TimeFormat: time.RFC3339,
PartsOrder: []string{
zerolog.LevelFieldName,
zerolog.TimestampFieldName,
zerolog.CallerFieldName,
zerolog.MessageFieldName,
},
FormatFieldName: func(i interface{}) string { return fmt.Sprintf("%s:", i) },
FormatFieldValue: func(i interface{}) string { return fmt.Sprintf("%s", i) },
FormatCaller: func(i interface{}) string {
s := i.(string)
for _, p := range prefixList {
s = strings.TrimPrefix(s, p)
}
return s
},
}
default:
log.Fatalf("Invalid log format specified: %q", config.LogFormat)
}
logger := zerolog.New(output).Level(zerolog.Level(config.LogLevel)).With().Caller().Timestamp().Logger()
ctx = logger.WithContext(ctx)
zerolog.DefaultContextLogger = &logger
log.SetOutput(logger)
return ctx
}

173
cmd/plc-mirror/mirror.go Normal file
View file

@ -0,0 +1,173 @@
package main
import (
"context"
"encoding/json"
"errors"
"fmt"
"io"
"net/http"
"net/url"
"sync"
"time"
"github.com/rs/zerolog"
"github.com/uabluerail/indexer/models"
"github.com/uabluerail/indexer/util/plc"
"golang.org/x/time/rate"
"gorm.io/gorm"
"gorm.io/gorm/clause"
)
type PLCLogEntry struct {
ID models.ID `gorm:"primarykey"`
CreatedAt time.Time
DID string `gorm:"column:did;index:did_timestamp;uniqueIndex:did_cid"`
CID string `gorm:"column:cid;uniqueIndex:did_cid"`
PLCTimestamp string `gorm:"column:plc_timestamp;index:did_timestamp,sort:desc;index:,sort:desc"`
Nullified bool `gorm:"default:false"`
Operation plc.Operation `gorm:"type:JSONB;serializer:json"`
}
type Mirror struct {
db *gorm.DB
upstream *url.URL
limiter *rate.Limiter
mu sync.RWMutex
lastSuccessTimestamp time.Time
}
func NewMirror(ctx context.Context, upstream string, db *gorm.DB) (*Mirror, error) {
u, err := url.Parse(upstream)
if err != nil {
return nil, err
}
u.Path, err = url.JoinPath(u.Path, "export")
if err != nil {
return nil, err
}
return &Mirror{
db: db,
upstream: u,
limiter: rate.NewLimiter(rate.Limit(2), 4),
}, nil
}
func (m *Mirror) Start(ctx context.Context) error {
go m.run(ctx)
return nil
}
func (m *Mirror) run(ctx context.Context) {
log := zerolog.Ctx(ctx).With().Str("module", "mirror").Logger()
for {
select {
case <-ctx.Done():
log.Info().Msgf("PLC mirror stopped")
return
default:
if err := m.runOnce(ctx); err != nil {
if ctx.Err() == nil {
log.Error().Err(err).Msgf("Failed to get new log entries from PLC: %s", err)
}
} else {
now := time.Now()
m.mu.Lock()
m.lastSuccessTimestamp = now
m.mu.Unlock()
}
time.Sleep(10 * time.Second)
}
}
}
func (m *Mirror) LastSuccess() time.Time {
m.mu.RLock()
defer m.mu.RUnlock()
return m.lastSuccessTimestamp
}
func (m *Mirror) runOnce(ctx context.Context) error {
log := zerolog.Ctx(ctx)
cursor := ""
err := m.db.Model(&PLCLogEntry{}).Select("plc_timestamp").Order("plc_timestamp desc").Limit(1).Take(&cursor).Error
if err != nil && !errors.Is(err, gorm.ErrRecordNotFound) {
return fmt.Errorf("failed to get the cursor: %w", err)
}
u := *m.upstream
for {
params := u.Query()
params.Set("count", "1000")
if cursor != "" {
params.Set("after", cursor)
}
u.RawQuery = params.Encode()
req, err := http.NewRequestWithContext(ctx, http.MethodGet, u.String(), nil)
if err != nil {
return fmt.Errorf("constructing request: %w", err)
}
_ = m.limiter.Wait(ctx)
log.Info().Msgf("Listing PLC log entries with cursor %q...", cursor)
log.Debug().Msgf("Request URL: %s", u.String())
resp, err := http.DefaultClient.Do(req)
if err != nil {
return fmt.Errorf("sending request: %w", err)
}
if resp.StatusCode != http.StatusOK {
resp.Body.Close()
return fmt.Errorf("unexpected status code: %d", resp.StatusCode)
}
newEntries := 0
decoder := json.NewDecoder(resp.Body)
oldCursor := cursor
for {
var entry plc.OperationLogEntry
err := decoder.Decode(&entry)
if errors.Is(err, io.EOF) {
break
}
if err != nil {
return fmt.Errorf("parsing log entry: %w", err)
}
err = m.db.Clauses(
clause.OnConflict{
Columns: []clause.Column{{Name: "did"}, {Name: "cid"}},
DoNothing: true,
},
).Create(FromOperationLogEntry(entry)).Error
if err != nil {
return fmt.Errorf("inserting log entry into database: %w", err)
}
cursor = entry.CreatedAt
newEntries++
}
if newEntries == 0 || cursor == oldCursor {
break
}
log.Info().Msgf("Got %d log entries. New cursor: %q", newEntries, cursor)
}
return nil
}
func FromOperationLogEntry(op plc.OperationLogEntry) *PLCLogEntry {
return &PLCLogEntry{
DID: op.DID,
CID: op.CID,
PLCTimestamp: op.CreatedAt,
Nullified: op.Nullified,
Operation: op.Operation,
}
}

134
cmd/plc-mirror/serve.go Normal file
View file

@ -0,0 +1,134 @@
package main
import (
"context"
"errors"
"fmt"
"net/http"
"slices"
"strings"
"time"
"github.com/Jille/convreq"
"github.com/Jille/convreq/respond"
"github.com/rs/zerolog"
"gorm.io/gorm"
ssi "github.com/nuts-foundation/go-did"
"github.com/nuts-foundation/go-did/did"
"github.com/bluesky-social/indigo/atproto/crypto"
"github.com/uabluerail/indexer/util/plc"
)
type Server struct {
db *gorm.DB
mirror *Mirror
MaxDelay time.Duration
handler http.HandlerFunc
}
func NewServer(ctx context.Context, db *gorm.DB, mirror *Mirror) (*Server, error) {
s := &Server{
db: db,
mirror: mirror,
MaxDelay: 5 * time.Minute,
}
s.handler = convreq.Wrap(s.serve)
return s, nil
}
func (s *Server) ServeHTTP(w http.ResponseWriter, req *http.Request) {
s.handler(w, req)
}
func (s *Server) serve(ctx context.Context, req *http.Request) convreq.HttpResponse {
delay := time.Since(s.mirror.LastSuccess())
if delay > s.MaxDelay {
return respond.ServiceUnavailable(fmt.Sprintf("mirror is %s behind", delay))
}
log := zerolog.Ctx(ctx)
requestedDid := strings.ToLower(strings.TrimPrefix(req.URL.Path, "/"))
var entry PLCLogEntry
err := s.db.Model(&entry).Where("did = ? AND (NOT nullified)", requestedDid).Order("plc_timestamp desc").Limit(1).Take(&entry).Error
if errors.Is(err, gorm.ErrRecordNotFound) {
return respond.NotFound("unknown DID")
}
if err != nil {
log.Error().Err(err).Str("did", requestedDid).Msgf("Failed to get the last log entry for %q: %s", requestedDid, err)
return respond.InternalServerError("failed to get the last log entry")
}
if _, ok := entry.Operation.Value.(plc.Tombstone); ok {
return respond.NotFound("DID deleted")
}
var op plc.Op
switch v := entry.Operation.Value.(type) {
case plc.Op:
op = v
case plc.LegacyCreateOp:
op = v.AsUnsignedOp()
}
didValue := did.DID{
Method: "plc",
ID: strings.TrimPrefix(entry.DID, "did:plc:"),
}
r := did.Document{
Context: []interface{}{
"https://www.w3.org/ns/did/v1",
"https://w3id.org/security/multikey/v1"},
ID: didValue,
AlsoKnownAs: mapSlice(op.AlsoKnownAs, ssi.MustParseURI),
}
for id, s := range op.Services {
r.Service = append(r.Service, did.Service{
ID: ssi.MustParseURI("#" + id),
Type: s.Type,
ServiceEndpoint: s.Endpoint,
})
}
for id, m := range op.VerificationMethods {
idValue := did.DIDURL{
DID: didValue,
Fragment: id,
}
r.VerificationMethod.Add(&did.VerificationMethod{
ID: idValue,
Type: "Multikey",
Controller: didValue,
PublicKeyMultibase: strings.TrimPrefix(m, "did:key:"),
})
key, err := crypto.ParsePublicDIDKey(m)
if err == nil {
context := ""
switch key.(type) {
case *crypto.PublicKeyK256:
context = "https://w3id.org/security/suites/secp256k1-2019/v1"
case *crypto.PublicKeyP256:
context = "https://w3id.org/security/suites/ecdsa-2019/v1"
}
if context != "" && !slices.Contains(r.Context, interface{}(context)) {
r.Context = append(r.Context, context)
}
}
}
return respond.JSON(r)
}
func mapSlice[A any, B any](s []A, fn func(A) B) []B {
r := make([]B, 0, len(s))
for _, v := range s {
r = append(r, fn(v))
}
return r
}