// Copyright (c) 2012-2020 Ugorji Nwoke. All rights reserved. // Use of this source code is governed by a MIT license found in the LICENSE file. package codec import ( "strconv" ) // Per go spec, floats are represented in memory as // IEEE single or double precision floating point values. // // We also looked at the source for stdlib math/modf.go, // reviewed https://github.com/chewxy/math32 // and read wikipedia documents describing the formats. // // It became clear that we could easily look at the bits to determine // whether any fraction exists. func parseFloat32(b []byte) (f float32, err error) { return parseFloat32_custom(b) } func parseFloat64(b []byte) (f float64, err error) { return parseFloat64_custom(b) } func parseFloat32_strconv(b []byte) (f float32, err error) { f64, err := strconv.ParseFloat(stringView(b), 32) f = float32(f64) return } func parseFloat64_strconv(b []byte) (f float64, err error) { return strconv.ParseFloat(stringView(b), 64) } // ------ parseFloat custom below -------- // JSON really supports decimal numbers in base 10 notation, with exponent support. // // We assume the following: // - a lot of floating point numbers in json files will have defined precision // (in terms of number of digits after decimal point), etc. // - these (referenced above) can be written in exact format. // // strconv.ParseFloat has some unnecessary overhead which we can do without // for the common case: // // - expensive char-by-char check to see if underscores are in right place // - testing for and skipping underscores // - check if the string matches ignorecase +/- inf, +/- infinity, nan // - support for base 16 (0xFFFF...) // // The functions below will try a fast-path for floats which can be decoded // without any loss of precision, meaning they: // // - fits within the significand bits of the 32-bits or 64-bits // - exponent fits within the exponent value // - there is no truncation (any extra numbers are all trailing zeros) // // To figure out what the values are for maxMantDigits, use this idea below: // // 2^23 = 838 8608 (between 10^ 6 and 10^ 7) (significand bits of uint32) // 2^32 = 42 9496 7296 (between 10^ 9 and 10^10) (full uint32) // 2^52 = 4503 5996 2737 0496 (between 10^15 and 10^16) (significand bits of uint64) // 2^64 = 1844 6744 0737 0955 1616 (between 10^19 and 10^20) (full uint64) // // Note: we only allow for up to what can comfortably fit into the significand // ignoring the exponent, and we only try to parse iff significand fits. const ( fMaxMultiplierForExactPow10_64 = 1e15 fMaxMultiplierForExactPow10_32 = 1e7 fUint64Cutoff = (1<<64-1)/10 + 1 // fUint32Cutoff = (1<<32-1)/10 + 1 fBase = 10 ) const ( thousand = 1000 million = thousand * thousand billion = thousand * million trillion = thousand * billion quadrillion = thousand * trillion quintillion = thousand * quadrillion ) // Exact powers of 10. var uint64pow10 = [...]uint64{ 1, 10, 100, 1 * thousand, 10 * thousand, 100 * thousand, 1 * million, 10 * million, 100 * million, 1 * billion, 10 * billion, 100 * billion, 1 * trillion, 10 * trillion, 100 * trillion, 1 * quadrillion, 10 * quadrillion, 100 * quadrillion, 1 * quintillion, 10 * quintillion, } var float64pow10 = [...]float64{ 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22, } var float32pow10 = [...]float32{ 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, } type floatinfo struct { mantbits uint8 // expbits uint8 // (unused) // bias int16 // (unused) is32bit bool exactPow10 int8 // Exact powers of ten are <= 10^N (32: 10, 64: 22) exactInts int8 // Exact integers are <= 10^N (for non-float, set to 0) // maxMantDigits int8 // 10^19 fits in uint64, while 10^9 fits in uint32 mantCutoffIsUint64Cutoff bool mantCutoff uint64 } var fi32 = floatinfo{23, true, 10, 7, false, 1<<23 - 1} var fi64 = floatinfo{52, false, 22, 15, false, 1<<52 - 1} var fi64u = floatinfo{0, false, 19, 0, true, fUint64Cutoff} var fi64i = floatinfo{0, false, 19, 0, true, fUint64Cutoff} func strconvParseErr(b []byte, fn string) error { return &strconv.NumError{ Func: fn, Err: strconv.ErrSyntax, Num: string(b), } } func parseFloat32_reader(r readFloatResult) (f float32, fail bool) { f = float32(r.mantissa) if r.exp == 0 { } else if r.exp < 0 { // int / 10^k f /= float32pow10[uint8(-r.exp)] } else { // exp > 0 if r.exp > fi32.exactPow10 { f *= float32pow10[r.exp-fi32.exactPow10] if f > fMaxMultiplierForExactPow10_32 { // exponent too large - outside range fail = true return // ok = false } f *= float32pow10[fi32.exactPow10] } else { f *= float32pow10[uint8(r.exp)] } } if r.neg { f = -f } return } func parseFloat32_custom(b []byte) (f float32, err error) { r := readFloat(b, fi32) if r.bad { return 0, strconvParseErr(b, "ParseFloat") } if r.ok { f, r.bad = parseFloat32_reader(r) if !r.bad { return } } return parseFloat32_strconv(b) } func parseFloat64_reader(r readFloatResult) (f float64, fail bool) { f = float64(r.mantissa) if r.exp == 0 { } else if r.exp < 0 { // int / 10^k f /= float64pow10[-uint8(r.exp)] } else { // exp > 0 if r.exp > fi64.exactPow10 { f *= float64pow10[r.exp-fi64.exactPow10] if f > fMaxMultiplierForExactPow10_64 { // exponent too large - outside range fail = true return } f *= float64pow10[fi64.exactPow10] } else { f *= float64pow10[uint8(r.exp)] } } if r.neg { f = -f } return } func parseFloat64_custom(b []byte) (f float64, err error) { r := readFloat(b, fi64) if r.bad { return 0, strconvParseErr(b, "ParseFloat") } if r.ok { f, r.bad = parseFloat64_reader(r) if !r.bad { return } } return parseFloat64_strconv(b) } func parseUint64_simple(b []byte) (n uint64, ok bool) { var i int var n1 uint64 var c uint8 LOOP: if i < len(b) { c = b[i] // unsigned integers don't overflow well on multiplication, so check cutoff here // e.g. (maxUint64-5)*10 doesn't overflow well ... // if n >= fUint64Cutoff || !isDigitChar(b[i]) { // if c < '0' || c > '9' { if n >= fUint64Cutoff || c < '0' || c > '9' { return } else if c == '0' { n *= fBase } else { n1 = n n = n*fBase + uint64(c-'0') if n < n1 { return } } i++ goto LOOP } ok = true return } func parseUint64_reader(r readFloatResult) (f uint64, fail bool) { f = r.mantissa if r.exp == 0 { } else if r.exp < 0 { // int / 10^k if f%uint64pow10[uint8(-r.exp)] != 0 { fail = true } else { f /= uint64pow10[uint8(-r.exp)] } } else { // exp > 0 f *= uint64pow10[uint8(r.exp)] } return } func parseInt64_reader(r readFloatResult) (v int64, fail bool) { if r.exp == 0 { } else if r.exp < 0 { // int / 10^k if r.mantissa%uint64pow10[uint8(-r.exp)] != 0 { // fail = true return 0, true } r.mantissa /= uint64pow10[uint8(-r.exp)] } else { // exp > 0 r.mantissa *= uint64pow10[uint8(r.exp)] } if chkOvf.Uint2Int(r.mantissa, r.neg) { fail = true } else if r.neg { v = -int64(r.mantissa) } else { v = int64(r.mantissa) } return } // parseNumber will return an integer if only composed of [-]?[0-9]+ // Else it will return a float. func parseNumber(b []byte, z *fauxUnion, preferSignedInt bool) (err error) { var ok, neg bool var f uint64 if len(b) == 0 { return } if b[0] == '-' { neg = true f, ok = parseUint64_simple(b[1:]) } else { f, ok = parseUint64_simple(b) } if ok { if neg { z.v = valueTypeInt if chkOvf.Uint2Int(f, neg) { return strconvParseErr(b, "ParseInt") } z.i = -int64(f) } else if preferSignedInt { z.v = valueTypeInt if chkOvf.Uint2Int(f, neg) { return strconvParseErr(b, "ParseInt") } z.i = int64(f) } else { z.v = valueTypeUint z.u = f } return } z.v = valueTypeFloat z.f, err = parseFloat64_custom(b) return } type readFloatResult struct { mantissa uint64 exp int8 neg, sawdot, sawexp, trunc, bad bool ok bool _ byte // padding } func readFloat(s []byte, y floatinfo) (r readFloatResult) { var i uint // uint, so that we eliminate bounds checking var slen = uint(len(s)) if slen == 0 { // read an empty string as the zero value // r.bad = true r.ok = true return } if s[0] == '-' { r.neg = true i++ } // we considered punting early if string has length > maxMantDigits, but this doesn't account // for trailing 0's e.g. 700000000000000000000 can be encoded exactly as it is 7e20 var nd, ndMant, dp int8 var xu uint64 LOOP: for ; i < slen; i++ { switch s[i] { case '.': if r.sawdot { r.bad = true return } r.sawdot = true dp = nd case 'e', 'E': r.sawexp = true break LOOP case '0': if nd == 0 { dp-- continue LOOP } nd++ if r.mantissa < y.mantCutoff { r.mantissa *= fBase ndMant++ } case '1', '2', '3', '4', '5', '6', '7', '8', '9': nd++ if y.mantCutoffIsUint64Cutoff && r.mantissa < fUint64Cutoff { r.mantissa *= fBase xu = r.mantissa + uint64(s[i]-'0') if xu < r.mantissa { r.trunc = true return } r.mantissa = xu } else if r.mantissa < y.mantCutoff { // mantissa = (mantissa << 1) + (mantissa << 3) + uint64(c-'0') r.mantissa = r.mantissa*fBase + uint64(s[i]-'0') } else { r.trunc = true return } ndMant++ default: r.bad = true return } } if !r.sawdot { dp = nd } if r.sawexp { i++ if i < slen { var eneg bool if s[i] == '+' { i++ } else if s[i] == '-' { i++ eneg = true } if i < slen { // for exact match, exponent is 1 or 2 digits (float64: -22 to 37, float32: -1 to 17). // exit quick if exponent is more than 2 digits. if i+2 < slen { return } var e int8 if s[i] < '0' || s[i] > '9' { // !isDigitChar(s[i]) { // r.bad = true return } e = int8(s[i] - '0') i++ if i < slen { if s[i] < '0' || s[i] > '9' { // !isDigitChar(s[i]) { // r.bad = true return } e = e*fBase + int8(s[i]-'0') // (e << 1) + (e << 3) + int8(s[i]-'0') i++ } if eneg { dp -= e } else { dp += e } } } } if r.mantissa != 0 { r.exp = dp - ndMant // do not set ok=true for cases we cannot handle if r.exp < -y.exactPow10 || r.exp > y.exactInts+y.exactPow10 || (y.mantbits != 0 && r.mantissa>>y.mantbits != 0) { return } } r.ok = true return }