Fix the fucking regexp
parent
a28199fb92
commit
a20ddf0717
|
@ -0,0 +1,26 @@
|
||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestPostgresFix(t *testing.T) {
|
||||||
|
type testCase struct{ input, want string }
|
||||||
|
|
||||||
|
cases := []testCase{
|
||||||
|
{`"a"`, `"a"`},
|
||||||
|
{`"\u0000"`, `"<0x00>"`},
|
||||||
|
{`"description":"\u0000"`, `"description":"<0x00>"`},
|
||||||
|
{`"\\u0000"`, `"\\u0000"`},
|
||||||
|
{`"\\\u0000"`, `"\\<0x00>"`},
|
||||||
|
{`\n\n\u0000\u0000 \u0000\u0000\u0000\u0000 \u0000\u0000\u0000\u0000\u0000`,
|
||||||
|
`\n\n<0x00><0x00> <0x00><0x00><0x00><0x00> <0x00><0x00><0x00><0x00><0x00>`},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tc := range cases {
|
||||||
|
got := escapeNullCharForPostgres([]byte(tc.input))
|
||||||
|
if string(got) != tc.want {
|
||||||
|
t.Errorf("escapeNullCharForPostgres(%s) = %s, want %s", tc.input, string(got), tc.want)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -132,10 +132,12 @@ func (p *WorkerPool) worker(ctx context.Context, signal chan struct{}) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
var postgresFixRegexp = regexp.MustCompile(`[^\\](\\\\)*(\\u0000)`)
|
var postgresFixRegexp = regexp.MustCompile(`([^\\](\\\\)*)(\\u0000)+`)
|
||||||
|
|
||||||
func escapeNullCharForPostgres(b []byte) []byte {
|
func escapeNullCharForPostgres(b []byte) []byte {
|
||||||
return postgresFixRegexp.ReplaceAll(b, []byte(`$1<0x00>`))
|
return postgresFixRegexp.ReplaceAllFunc(b, func(b []byte) []byte {
|
||||||
|
return bytes.ReplaceAll(b, []byte(`\u0000`), []byte(`<0x00>`))
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *WorkerPool) doWork(ctx context.Context, work WorkItem) error {
|
func (p *WorkerPool) doWork(ctx context.Context, work WorkItem) error {
|
||||||
|
@ -212,7 +214,6 @@ retry:
|
||||||
log.Warn().Msgf("Unexpected key format: %q", k)
|
log.Warn().Msgf("Unexpected key format: %q", k)
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
v = regexp.MustCompile(`[^\\](\\\\)*(\\u0000)`).ReplaceAll(v, []byte(`$1<0x00>`))
|
|
||||||
// lang, err := repo.GetLang(ctx, v)
|
// lang, err := repo.GetLang(ctx, v)
|
||||||
// if err == nil {
|
// if err == nil {
|
||||||
// postsByLanguageIndexed.WithLabelValues(u.String(), lang).Inc()
|
// postsByLanguageIndexed.WithLabelValues(u.String(), lang).Inc()
|
||||||
|
|
Loading…
Reference in New Issue