ee04d4a74d
Postgres on Mac OS X has a bug in how it parses hstore text values
that causes it to misinterpret some Unicode values as spaces. This
causes values sent by pgx to be misinterpreted. To avoid this, always
quote hstore values, which is how Postgres serializes them itself.
The test change fails on Mac OS X without this fix.
While I suspect this should not be performance critical for any
application, I added a quick benchmark to test the performance of the
encoding. This change actually makes encoding slightly faster on my
M1 Pro. The output from the benchstat program on this banchmark is:
goos: darwin
goarch: arm64
pkg: github.com/jackc/pgx/v5/pgtype
│ orig.txt │ new-quotes.txt │
│ sec/op │ sec/op vs base │
HstoreSerialize/text-10 207.1n ± 0% 142.3n ± 1% -31.31% (p=0.000 n=10)
HstoreSerialize/binary-10 100.10n ± 0% 99.64n ± 1% -0.45% (p=0.013 n=10)
geomean 144.0n 119.1n -17.31%
I have also attempted to fix the Postgres bug, but it will take a
long time for this fix to get upstream:
https://www.postgresql.org/message-id/CA%2BHWA9awUW0%2BRV_gO9r1ABZwGoZxPztcJxPy8vMFSTbTfi4jig%40mail.gmail.com
464 lines
9.5 KiB
Go
464 lines
9.5 KiB
Go
package pgtype
|
|
|
|
import (
|
|
"bytes"
|
|
"database/sql/driver"
|
|
"encoding/binary"
|
|
"errors"
|
|
"fmt"
|
|
"unicode"
|
|
"unicode/utf8"
|
|
|
|
"github.com/jackc/pgx/v5/internal/pgio"
|
|
)
|
|
|
|
type HstoreScanner interface {
|
|
ScanHstore(v Hstore) error
|
|
}
|
|
|
|
type HstoreValuer interface {
|
|
HstoreValue() (Hstore, error)
|
|
}
|
|
|
|
// Hstore represents an hstore column that can be null or have null values
|
|
// associated with its keys.
|
|
type Hstore map[string]*string
|
|
|
|
func (h *Hstore) ScanHstore(v Hstore) error {
|
|
*h = v
|
|
return nil
|
|
}
|
|
|
|
func (h Hstore) HstoreValue() (Hstore, error) {
|
|
return h, nil
|
|
}
|
|
|
|
// Scan implements the database/sql Scanner interface.
|
|
func (h *Hstore) Scan(src any) error {
|
|
if src == nil {
|
|
*h = nil
|
|
return nil
|
|
}
|
|
|
|
switch src := src.(type) {
|
|
case string:
|
|
return scanPlanTextAnyToHstoreScanner{}.Scan([]byte(src), h)
|
|
}
|
|
|
|
return fmt.Errorf("cannot scan %T", src)
|
|
}
|
|
|
|
// Value implements the database/sql/driver Valuer interface.
|
|
func (h Hstore) Value() (driver.Value, error) {
|
|
if h == nil {
|
|
return nil, nil
|
|
}
|
|
|
|
buf, err := HstoreCodec{}.PlanEncode(nil, 0, TextFormatCode, h).Encode(h, nil)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return string(buf), err
|
|
}
|
|
|
|
type HstoreCodec struct{}
|
|
|
|
func (HstoreCodec) FormatSupported(format int16) bool {
|
|
return format == TextFormatCode || format == BinaryFormatCode
|
|
}
|
|
|
|
func (HstoreCodec) PreferredFormat() int16 {
|
|
return BinaryFormatCode
|
|
}
|
|
|
|
func (HstoreCodec) PlanEncode(m *Map, oid uint32, format int16, value any) EncodePlan {
|
|
if _, ok := value.(HstoreValuer); !ok {
|
|
return nil
|
|
}
|
|
|
|
switch format {
|
|
case BinaryFormatCode:
|
|
return encodePlanHstoreCodecBinary{}
|
|
case TextFormatCode:
|
|
return encodePlanHstoreCodecText{}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
type encodePlanHstoreCodecBinary struct{}
|
|
|
|
func (encodePlanHstoreCodecBinary) Encode(value any, buf []byte) (newBuf []byte, err error) {
|
|
hstore, err := value.(HstoreValuer).HstoreValue()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if hstore == nil {
|
|
return nil, nil
|
|
}
|
|
|
|
buf = pgio.AppendInt32(buf, int32(len(hstore)))
|
|
|
|
for k, v := range hstore {
|
|
buf = pgio.AppendInt32(buf, int32(len(k)))
|
|
buf = append(buf, k...)
|
|
|
|
if v == nil {
|
|
buf = pgio.AppendInt32(buf, -1)
|
|
} else {
|
|
buf = pgio.AppendInt32(buf, int32(len(*v)))
|
|
buf = append(buf, (*v)...)
|
|
}
|
|
}
|
|
|
|
return buf, nil
|
|
}
|
|
|
|
type encodePlanHstoreCodecText struct{}
|
|
|
|
func (encodePlanHstoreCodecText) Encode(value any, buf []byte) (newBuf []byte, err error) {
|
|
hstore, err := value.(HstoreValuer).HstoreValue()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if hstore == nil {
|
|
return nil, nil
|
|
}
|
|
|
|
firstPair := true
|
|
|
|
for k, v := range hstore {
|
|
if firstPair {
|
|
firstPair = false
|
|
} else {
|
|
buf = append(buf, ',')
|
|
}
|
|
|
|
// unconditionally quote hstore keys/values like Postgres does
|
|
// this avoids a Mac OS X Postgres hstore parsing bug:
|
|
// https://www.postgresql.org/message-id/CA%2BHWA9awUW0%2BRV_gO9r1ABZwGoZxPztcJxPy8vMFSTbTfi4jig%40mail.gmail.com
|
|
buf = append(buf, '"')
|
|
buf = append(buf, quoteArrayReplacer.Replace(k)...)
|
|
buf = append(buf, '"')
|
|
buf = append(buf, "=>"...)
|
|
|
|
if v == nil {
|
|
buf = append(buf, "NULL"...)
|
|
} else {
|
|
buf = append(buf, '"')
|
|
buf = append(buf, quoteArrayReplacer.Replace(*v)...)
|
|
buf = append(buf, '"')
|
|
}
|
|
}
|
|
|
|
return buf, nil
|
|
}
|
|
|
|
func (HstoreCodec) PlanScan(m *Map, oid uint32, format int16, target any) ScanPlan {
|
|
|
|
switch format {
|
|
case BinaryFormatCode:
|
|
switch target.(type) {
|
|
case HstoreScanner:
|
|
return scanPlanBinaryHstoreToHstoreScanner{}
|
|
}
|
|
case TextFormatCode:
|
|
switch target.(type) {
|
|
case HstoreScanner:
|
|
return scanPlanTextAnyToHstoreScanner{}
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
type scanPlanBinaryHstoreToHstoreScanner struct{}
|
|
|
|
func (scanPlanBinaryHstoreToHstoreScanner) Scan(src []byte, dst any) error {
|
|
scanner := (dst).(HstoreScanner)
|
|
|
|
if src == nil {
|
|
return scanner.ScanHstore(Hstore(nil))
|
|
}
|
|
|
|
rp := 0
|
|
|
|
if len(src[rp:]) < 4 {
|
|
return fmt.Errorf("hstore incomplete %v", src)
|
|
}
|
|
pairCount := int(int32(binary.BigEndian.Uint32(src[rp:])))
|
|
rp += 4
|
|
|
|
hstore := make(Hstore, pairCount)
|
|
|
|
for i := 0; i < pairCount; i++ {
|
|
if len(src[rp:]) < 4 {
|
|
return fmt.Errorf("hstore incomplete %v", src)
|
|
}
|
|
keyLen := int(int32(binary.BigEndian.Uint32(src[rp:])))
|
|
rp += 4
|
|
|
|
if len(src[rp:]) < keyLen {
|
|
return fmt.Errorf("hstore incomplete %v", src)
|
|
}
|
|
key := string(src[rp : rp+keyLen])
|
|
rp += keyLen
|
|
|
|
if len(src[rp:]) < 4 {
|
|
return fmt.Errorf("hstore incomplete %v", src)
|
|
}
|
|
valueLen := int(int32(binary.BigEndian.Uint32(src[rp:])))
|
|
rp += 4
|
|
|
|
var valueBuf []byte
|
|
if valueLen >= 0 {
|
|
valueBuf = src[rp : rp+valueLen]
|
|
rp += valueLen
|
|
}
|
|
|
|
var value Text
|
|
err := scanPlanTextAnyToTextScanner{}.Scan(valueBuf, &value)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if value.Valid {
|
|
hstore[key] = &value.String
|
|
} else {
|
|
hstore[key] = nil
|
|
}
|
|
}
|
|
|
|
return scanner.ScanHstore(hstore)
|
|
}
|
|
|
|
type scanPlanTextAnyToHstoreScanner struct{}
|
|
|
|
func (scanPlanTextAnyToHstoreScanner) Scan(src []byte, dst any) error {
|
|
scanner := (dst).(HstoreScanner)
|
|
|
|
if src == nil {
|
|
return scanner.ScanHstore(Hstore(nil))
|
|
}
|
|
|
|
keys, values, err := parseHstore(string(src))
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
m := make(Hstore, len(keys))
|
|
for i := range keys {
|
|
if values[i].Valid {
|
|
m[keys[i]] = &values[i].String
|
|
} else {
|
|
m[keys[i]] = nil
|
|
}
|
|
}
|
|
|
|
return scanner.ScanHstore(m)
|
|
}
|
|
|
|
func (c HstoreCodec) DecodeDatabaseSQLValue(m *Map, oid uint32, format int16, src []byte) (driver.Value, error) {
|
|
return codecDecodeToTextFormat(c, m, oid, format, src)
|
|
}
|
|
|
|
func (c HstoreCodec) DecodeValue(m *Map, oid uint32, format int16, src []byte) (any, error) {
|
|
if src == nil {
|
|
return nil, nil
|
|
}
|
|
|
|
var hstore Hstore
|
|
err := codecScan(c, m, oid, format, src, &hstore)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return hstore, nil
|
|
}
|
|
|
|
const (
|
|
hsPre = iota
|
|
hsKey
|
|
hsSep
|
|
hsVal
|
|
hsNul
|
|
hsNext
|
|
)
|
|
|
|
type hstoreParser struct {
|
|
str string
|
|
pos int
|
|
}
|
|
|
|
func newHSP(in string) *hstoreParser {
|
|
return &hstoreParser{
|
|
pos: 0,
|
|
str: in,
|
|
}
|
|
}
|
|
|
|
func (p *hstoreParser) Consume() (r rune, end bool) {
|
|
if p.pos >= len(p.str) {
|
|
end = true
|
|
return
|
|
}
|
|
r, w := utf8.DecodeRuneInString(p.str[p.pos:])
|
|
p.pos += w
|
|
return
|
|
}
|
|
|
|
func (p *hstoreParser) Peek() (r rune, end bool) {
|
|
if p.pos >= len(p.str) {
|
|
end = true
|
|
return
|
|
}
|
|
r, _ = utf8.DecodeRuneInString(p.str[p.pos:])
|
|
return
|
|
}
|
|
|
|
// parseHstore parses the string representation of an hstore column (the same
|
|
// you would get from an ordinary SELECT) into two slices of keys and values. it
|
|
// is used internally in the default parsing of hstores.
|
|
func parseHstore(s string) (k []string, v []Text, err error) {
|
|
if s == "" {
|
|
return
|
|
}
|
|
|
|
buf := bytes.Buffer{}
|
|
keys := []string{}
|
|
values := []Text{}
|
|
p := newHSP(s)
|
|
|
|
r, end := p.Consume()
|
|
state := hsPre
|
|
|
|
for !end {
|
|
switch state {
|
|
case hsPre:
|
|
if r == '"' {
|
|
state = hsKey
|
|
} else {
|
|
err = errors.New("String does not begin with \"")
|
|
}
|
|
case hsKey:
|
|
switch r {
|
|
case '"': //End of the key
|
|
keys = append(keys, buf.String())
|
|
buf = bytes.Buffer{}
|
|
state = hsSep
|
|
case '\\': //Potential escaped character
|
|
n, end := p.Consume()
|
|
switch {
|
|
case end:
|
|
err = errors.New("Found EOS in key, expecting character or \"")
|
|
case n == '"', n == '\\':
|
|
buf.WriteRune(n)
|
|
default:
|
|
buf.WriteRune(r)
|
|
buf.WriteRune(n)
|
|
}
|
|
default: //Any other character
|
|
buf.WriteRune(r)
|
|
}
|
|
case hsSep:
|
|
if r == '=' {
|
|
r, end = p.Consume()
|
|
switch {
|
|
case end:
|
|
err = errors.New("Found EOS after '=', expecting '>'")
|
|
case r == '>':
|
|
r, end = p.Consume()
|
|
switch {
|
|
case end:
|
|
err = errors.New("Found EOS after '=>', expecting '\"' or 'NULL'")
|
|
case r == '"':
|
|
state = hsVal
|
|
case r == 'N':
|
|
state = hsNul
|
|
default:
|
|
err = fmt.Errorf("Invalid character '%c' after '=>', expecting '\"' or 'NULL'", r)
|
|
}
|
|
default:
|
|
err = fmt.Errorf("Invalid character after '=', expecting '>'")
|
|
}
|
|
} else {
|
|
err = fmt.Errorf("Invalid character '%c' after value, expecting '='", r)
|
|
}
|
|
case hsVal:
|
|
switch r {
|
|
case '"': //End of the value
|
|
values = append(values, Text{String: buf.String(), Valid: true})
|
|
buf = bytes.Buffer{}
|
|
state = hsNext
|
|
case '\\': //Potential escaped character
|
|
n, end := p.Consume()
|
|
switch {
|
|
case end:
|
|
err = errors.New("Found EOS in key, expecting character or \"")
|
|
case n == '"', n == '\\':
|
|
buf.WriteRune(n)
|
|
default:
|
|
buf.WriteRune(r)
|
|
buf.WriteRune(n)
|
|
}
|
|
default: //Any other character
|
|
buf.WriteRune(r)
|
|
}
|
|
case hsNul:
|
|
nulBuf := make([]rune, 3)
|
|
nulBuf[0] = r
|
|
for i := 1; i < 3; i++ {
|
|
r, end = p.Consume()
|
|
if end {
|
|
err = errors.New("Found EOS in NULL value")
|
|
return
|
|
}
|
|
nulBuf[i] = r
|
|
}
|
|
if nulBuf[0] == 'U' && nulBuf[1] == 'L' && nulBuf[2] == 'L' {
|
|
values = append(values, Text{})
|
|
state = hsNext
|
|
} else {
|
|
err = fmt.Errorf("Invalid NULL value: 'N%s'", string(nulBuf))
|
|
}
|
|
case hsNext:
|
|
if r == ',' {
|
|
r, end = p.Consume()
|
|
switch {
|
|
case end:
|
|
err = errors.New("Found EOS after ',', expecting space")
|
|
case (unicode.IsSpace(r)):
|
|
// after space is a doublequote to start the key
|
|
r, end = p.Consume()
|
|
if end {
|
|
err = errors.New("Found EOS after space, expecting \"")
|
|
return
|
|
}
|
|
if r != '"' {
|
|
err = fmt.Errorf("Invalid character '%c' after space, expecting \"", r)
|
|
return
|
|
}
|
|
state = hsKey
|
|
default:
|
|
err = fmt.Errorf("Invalid character '%c' after ',', expecting space", r)
|
|
}
|
|
} else {
|
|
err = fmt.Errorf("Invalid character '%c' after value, expecting ','", r)
|
|
}
|
|
}
|
|
|
|
if err != nil {
|
|
return
|
|
}
|
|
r, end = p.Consume()
|
|
}
|
|
if state != hsNext {
|
|
err = errors.New("Improperly formatted hstore")
|
|
return
|
|
}
|
|
k = keys
|
|
v = values
|
|
return
|
|
}
|