Properly escape null character in the consumer too

This commit is contained in:
Max Ignatenko 2024-03-24 12:43:52 +00:00
parent 693ae1ba0a
commit ffa2faa420
4 changed files with 20 additions and 19 deletions

View file

@ -10,7 +10,6 @@ import (
"net/http"
"net/url"
"path"
"regexp"
"strings"
"time"
@ -28,6 +27,7 @@ import (
"github.com/uabluerail/indexer/models"
"github.com/uabluerail/indexer/pds"
"github.com/uabluerail/indexer/repo"
"github.com/uabluerail/indexer/util/fix"
"github.com/uabluerail/indexer/util/resolver"
)
@ -211,12 +211,6 @@ func (c *Consumer) updateCursor(ctx context.Context, seq int64) error {
}
var postgresFixRegexp = regexp.MustCompile(`[^\\](\\\\)*(\\u0000)`)
func escapeNullCharForPostgres(b []byte) []byte {
return postgresFixRegexp.ReplaceAll(b, []byte(`$1<0x00>`))
}
func (c *Consumer) processMessage(ctx context.Context, typ string, r io.Reader, first bool) error {
log := zerolog.Ctx(ctx)
@ -331,7 +325,7 @@ func (c *Consumer) processMessage(ctx context.Context, typ string, r io.Reader,
// XXX: proper replacement of \u0000 would require full parsing of JSON
// and recursive iteration over all string values, but this
// should work well enough for now.
Content: escapeNullCharForPostgres(v),
Content: fix.EscapeNullCharForPostgres(v),
AtRev: payload.Rev,
})
}

View file

@ -1,26 +0,0 @@
package main
import (
"testing"
)
func TestPostgresFix(t *testing.T) {
type testCase struct{ input, want string }
cases := []testCase{
{`"a"`, `"a"`},
{`"\u0000"`, `"<0x00>"`},
{`"description":"\u0000"`, `"description":"<0x00>"`},
{`"\\u0000"`, `"\\u0000"`},
{`"\\\u0000"`, `"\\<0x00>"`},
{`\n\n\u0000\u0000 \u0000\u0000\u0000\u0000 \u0000\u0000\u0000\u0000\u0000`,
`\n\n<0x00><0x00> <0x00><0x00><0x00><0x00> <0x00><0x00><0x00><0x00><0x00>`},
}
for _, tc := range cases {
got := escapeNullCharForPostgres([]byte(tc.input))
if string(got) != tc.want {
t.Errorf("escapeNullCharForPostgres(%s) = %s, want %s", tc.input, string(got), tc.want)
}
}
}

View file

@ -21,6 +21,7 @@ import (
"github.com/uabluerail/indexer/models"
"github.com/uabluerail/indexer/pds"
"github.com/uabluerail/indexer/repo"
"github.com/uabluerail/indexer/util/fix"
"github.com/uabluerail/indexer/util/resolver"
)
@ -132,14 +133,6 @@ func (p *WorkerPool) worker(ctx context.Context, signal chan struct{}) {
}
}
var postgresFixRegexp = regexp.MustCompile(`([^\\](\\\\)*)(\\u0000)+`)
func escapeNullCharForPostgres(b []byte) []byte {
return postgresFixRegexp.ReplaceAllFunc(b, func(b []byte) []byte {
return bytes.ReplaceAll(b, []byte(`\u0000`), []byte(`<0x00>`))
})
}
func (p *WorkerPool) doWork(ctx context.Context, work WorkItem) error {
log := zerolog.Ctx(ctx)
defer close(work.signal)
@ -244,7 +237,7 @@ retry:
// XXX: proper replacement of \u0000 would require full parsing of JSON
// and recursive iteration over all string values, but this
// should work well enough for now.
Content: escapeNullCharForPostgres(v),
Content: fix.EscapeNullCharForPostgres(v),
AtRev: newRev,
})
}