Add AtRev column to only overwrite records with a newer version
parent
1d3c6edf0a
commit
1038ca3bea
|
@ -10,6 +10,7 @@ import (
|
|||
"net/http"
|
||||
"net/url"
|
||||
"path"
|
||||
"regexp"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
|
@ -177,6 +178,12 @@ func (c *Consumer) updateCursor(ctx context.Context, seq int64) error {
|
|||
|
||||
}
|
||||
|
||||
var postgresFixRegexp = regexp.MustCompile(`[^\\](\\\\)*(\\u0000)`)
|
||||
|
||||
func escapeNullCharForPostgres(b []byte) []byte {
|
||||
return postgresFixRegexp.ReplaceAll(b, []byte(`$1<0x00>`))
|
||||
}
|
||||
|
||||
func (c *Consumer) processMessage(ctx context.Context, typ string, r io.Reader, first bool) error {
|
||||
log := zerolog.Ctx(ctx)
|
||||
|
||||
|
@ -253,7 +260,11 @@ func (c *Consumer) processMessage(ctx context.Context, typ string, r io.Reader,
|
|||
Repo: models.ID(repoInfo.ID),
|
||||
Collection: parts[0],
|
||||
Rkey: parts[1],
|
||||
Content: v,
|
||||
// XXX: proper replacement of \u0000 would require full parsing of JSON
|
||||
// and recursive iteration over all string values, but this
|
||||
// should work well enough for now.
|
||||
Content: escapeNullCharForPostgres(v),
|
||||
AtRev: payload.Rev,
|
||||
})
|
||||
}
|
||||
if len(recs) == 0 && expectRecords {
|
||||
|
@ -261,8 +272,16 @@ func (c *Consumer) processMessage(ctx context.Context, typ string, r io.Reader,
|
|||
}
|
||||
if len(recs) > 0 || expectRecords {
|
||||
err = c.db.Model(&repo.Record{}).
|
||||
Clauses(clause.OnConflict{DoUpdates: clause.AssignmentColumns([]string{"content"}),
|
||||
Columns: []clause.Column{{Name: "repo"}, {Name: "collection"}, {Name: "rkey"}}}).
|
||||
Clauses(clause.OnConflict{
|
||||
Where: clause.Where{Exprs: []clause.Expression{clause.Or(
|
||||
clause.Eq{Column: clause.Column{Name: "at_rev", Table: "records"}, Value: nil},
|
||||
clause.Eq{Column: clause.Column{Name: "at_rev", Table: "records"}, Value: ""},
|
||||
clause.Lt{
|
||||
Column: clause.Column{Name: "at_rev", Table: "records"},
|
||||
Value: clause.Column{Name: "at_rev", Table: "excluded"}},
|
||||
)}},
|
||||
DoUpdates: clause.AssignmentColumns([]string{"content", "at_rev"}),
|
||||
Columns: []clause.Column{{Name: "repo"}, {Name: "collection"}, {Name: "rkey"}}}).
|
||||
Create(recs).Error
|
||||
if err != nil {
|
||||
return fmt.Errorf("inserting records into the database: %w", err)
|
||||
|
|
|
@ -5,6 +5,7 @@ import (
|
|||
"context"
|
||||
"fmt"
|
||||
"net/url"
|
||||
"regexp"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
|
@ -131,6 +132,12 @@ func (p *WorkerPool) worker(ctx context.Context, signal chan struct{}) {
|
|||
}
|
||||
}
|
||||
|
||||
var postgresFixRegexp = regexp.MustCompile(`[^\\](\\\\)*(\\u0000)`)
|
||||
|
||||
func escapeNullCharForPostgres(b []byte) []byte {
|
||||
return postgresFixRegexp.ReplaceAll(b, []byte(`$1<0x00>`))
|
||||
}
|
||||
|
||||
func (p *WorkerPool) doWork(ctx context.Context, work WorkItem) error {
|
||||
log := zerolog.Ctx(ctx)
|
||||
defer close(work.signal)
|
||||
|
@ -201,19 +208,32 @@ retry:
|
|||
log.Warn().Msgf("Unexpected key format: %q", k)
|
||||
continue
|
||||
}
|
||||
v = regexp.MustCompile(`[^\\](\\\\)*(\\u0000)`).ReplaceAll(v, []byte(`$1<0x00>`))
|
||||
recs = append(recs, repo.Record{
|
||||
Repo: models.ID(work.Repo.ID),
|
||||
Collection: parts[0],
|
||||
Rkey: parts[1],
|
||||
Content: v,
|
||||
// XXX: proper replacement of \u0000 would require full parsing of JSON
|
||||
// and recursive iteration over all string values, but this
|
||||
// should work well enough for now.
|
||||
Content: escapeNullCharForPostgres(v),
|
||||
AtRev: newRev,
|
||||
})
|
||||
}
|
||||
recordsFetched.Add(float64(len(recs)))
|
||||
if len(recs) > 0 {
|
||||
for _, batch := range splitInBatshes(recs, 500) {
|
||||
result := p.db.Model(&repo.Record{}).
|
||||
Clauses(clause.OnConflict{DoUpdates: clause.AssignmentColumns([]string{"content"}),
|
||||
Columns: []clause.Column{{Name: "repo"}, {Name: "collection"}, {Name: "rkey"}}}).
|
||||
Clauses(clause.OnConflict{
|
||||
Where: clause.Where{Exprs: []clause.Expression{clause.Or(
|
||||
clause.Eq{Column: clause.Column{Name: "at_rev", Table: "records"}, Value: nil},
|
||||
clause.Eq{Column: clause.Column{Name: "at_rev", Table: "records"}, Value: ""},
|
||||
clause.Lt{
|
||||
Column: clause.Column{Name: "at_rev", Table: "records"},
|
||||
Value: clause.Column{Name: "at_rev", Table: "excluded"}},
|
||||
)}},
|
||||
DoUpdates: clause.AssignmentColumns([]string{"content", "at_rev"}),
|
||||
Columns: []clause.Column{{Name: "repo"}, {Name: "collection"}, {Name: "rkey"}}}).
|
||||
Create(batch)
|
||||
if err := result.Error; err != nil {
|
||||
return fmt.Errorf("inserting records into the database: %w", err)
|
||||
|
@ -229,6 +249,9 @@ retry:
|
|||
return fmt.Errorf("updating repo rev: %w", err)
|
||||
}
|
||||
|
||||
// TODO: check for records that are missing in the repo download
|
||||
// and mark them as deleted.
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
|
|
|
@ -34,9 +34,10 @@ type Record struct {
|
|||
ID models.ID `gorm:"primarykey"`
|
||||
CreatedAt time.Time
|
||||
UpdatedAt time.Time
|
||||
Repo models.ID `gorm:"index:idx_repo_record_key,unique,priority:1;not null"`
|
||||
Repo models.ID `gorm:"index:idx_repo_record_key,unique,priority:1;not null;index:idx_repo_rev"`
|
||||
Collection string `gorm:"index:idx_repo_record_key,unique,priority:2;not null"`
|
||||
Rkey string `gorm:"index:idx_repo_record_key,unique,priority:3"`
|
||||
AtRev string `gorm:"index:idx_repo_rev"`
|
||||
Content json.RawMessage `gorm:"type:JSONB"`
|
||||
Deleted bool
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue