Files
c3c/lib/std/core/string_to_real.c3
Pierre Curto 2437573a8f lib/std/io: add Stream.read_all (#843)
* lib/std/io: add Stream.read_all

Signed-off-by: Pierre Curto <pierre.curto@gmail.com>

* lib/std/core: use shortened receiver notation

Signed-off-by: Pierre Curto <pierre.curto@gmail.com>

---------

Signed-off-by: Pierre Curto <pierre.curto@gmail.com>
2023-07-10 20:13:31 +02:00

488 lines
11 KiB
C

module std::core::string;
import std::math;
// Float parsing based on code in Musl floatscan.c by Rich Felker.
// Musl uses the MIT license, copied below:
// ----------------------------------------------------------------------
// Copyright © 2005-2014 Rich Felker, et al.
//
// Permission is hereby granted, free of charge, to any person obtaining
// a copy of this software and associated documentation files (the
// "Software"), to deal in the Software without restriction, including
// without limitation the rights to use, copy, modify, merge, publish,
// distribute, sublicense, and/or sell copies of the Software, and to
// permit persons to whom the Software is furnished to do so, subject to
// the following conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
// IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
// CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
// TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
// SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
// ----------------------------------------------------------------------
const KMAX = 128;
const MASK = KMAX - 1;
const B1B_DIG = 2;
const uint[2] B1B_MAX = { 9007199, 254740991 };
/**
* @require chars.len > 0
**/
macro double! decfloat(char[] chars, int $bits, int $emin, int sign)
{
uint[KMAX] x;
const uint[2] TH = B1B_MAX;
int emax = - $emin - $bits + 3;
const int[*] P10S = { 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000 };
usz index;
bool got_digit = chars[0] == '0';
bool got_rad;
long lrp, dc;
int k, j, lnz;
usz len = chars.len;
usz last_char = len - 1;
assert(len);
char c @noinit;
// Skip past first characters
while ((c = chars[index]) == '0')
{
if (index == last_char) return sign * 0.0;
index++;
}
if (c == '.')
{
got_rad = true;
if (index == last_char)
{
if (!got_digit) return NumberConversion.MALFORMED_FLOAT?;
return sign * 0.0;
}
if (index != last_char && (c = chars[++index]) == '0')
{
lrp--;
got_digit = true;
while (last_char != index && (c = chars[++index]) == '0')
{
lrp--;
}
}
}
while (c - '0' < 10u || c == '.')
{
switch
{
case c == '.':
if (got_rad) return NumberConversion.MALFORMED_FLOAT?;
got_rad = true;
lrp = dc;
case k < KMAX - 3:
dc++;
if (c != '0') lnz = (int)dc;
if (j)
{
x[k] = x[k] * 10 + c - '0';
}
else
{
x[k] = c - '0';
}
if (++j == 9)
{
k++;
j = 0;
}
got_digit = true;
default:
dc++;
if (c != '0') x[KMAX - 4] |= 1;
}
if (index == last_char) break;
assert(index < last_char);
c = chars[++index];
}
if (!got_rad) lrp = dc;
if (!got_digit) return NumberConversion.MALFORMED_FLOAT?;
if ((c | 32) == 'e')
{
if (last_char == index) return NumberConversion.MALFORMED_FLOAT?;
long e10 = String.to_long((String)chars[index + 1..]) ?? NumberConversion.MALFORMED_FLOAT?!;
lrp += e10;
}
else if (index != last_char)
{
return NumberConversion.MALFORMED_FLOAT?;
}
// Handle zero specially to avoid nasty special cases later
if (!x[0]) return sign * 0.0;
// Optimize small integers (w/no exponent) and over/under-flow
if (lrp == dc && dc < 10 && ($bits > 30 || (ulong)x[0] >> $bits == 0)) return sign * (double)x[0];
if (lrp > - $emin / 2) return NumberConversion.FLOAT_OUT_OF_RANGE?;
if (lrp < $emin - 2 * math::DOUBLE_MANT_DIG) return NumberConversion.FLOAT_OUT_OF_RANGE?;
// Align incomplete final B1B digit
if (j)
{
for (; j < 9; j++) x[k] *= 10;
k++;
j = 0;
}
int a;
int z = k;
int e2;
long rp = lrp;
// Optimize small to mid-size integers (even in exp. notation)
if (lnz < 9 && lnz <= rp && rp < 18)
{
if (rp == 9) return sign * (double)x[0];
if (rp < 9) return sign * (double)x[0] / P10S[8 - rp];
int bitlim = $bits - 3 * (int)(rp - 9);
if (bitlim > 30 || x[0] >> bitlim == 0) return sign * (double)x[0] * P10S[rp - 10];
}
// Align radix point to B1B digit boundary
if (rp % 9)
{
long rpm9 = rp >= 0 ? rp % 9 : rp % 9 + 9;
int p10 = P10S[8 - rpm9];
uint carry = 0;
for (k = a; k != z; k++)
{
uint tmp = x[k] % p10;
x[k] = x[k] / p10 + carry;
carry = 1000000000 / p10 * tmp;
if (k == a && !x[k])
{
a = (a + 1) & MASK;
rp -= 9;
}
}
if (carry) x[z++] = carry;
rp += 9 - rpm9;
}
// Upscale until desired number of bits are left of radix point
while (rp < 9 * B1B_DIG || (rp == 9 * B1B_DIG && x[a] < TH[0]))
{
uint carry = 0;
e2 -= 29;
for (k = (z - 1) & MASK; ; k = (k - 1) & MASK)
{
ulong tmp = (ulong)x[k] << 29 + carry;
if (tmp > 1000000000)
{
carry = (uint)(tmp / 1000000000);
x[k] = (uint)(tmp % 1000000000);
}
else
{
carry = 0;
x[k] = (uint)tmp;
}
if (k == (z - 1) & MASK && k != a && !x[k]) z = k;
if (k == a) break;
}
if (carry)
{
rp += 9;
a = (a - 1) & MASK;
if (a == z)
{
z = (z - 1) & MASK;
x[(z - 1) & MASK] |= x[z];
}
x[a] = carry;
}
}
// Downscale until exactly number of bits are left of radix point
while (true)
{
uint carry = 0;
int sh = 1;
int i;
for (i = 0; i < B1B_DIG; i++)
{
k = (a + i) & MASK;
if (k == z || x[k] < TH[i])
{
i = B1B_DIG;
break;
}
if (x[(a + i) & MASK] > TH[i]) break;
}
if (i == B1B_DIG && rp == 9 * B1B_DIG) break;
if (rp > 9 + 9 * B1B_DIG) sh = 9;
e2 += sh;
for (k = a; k != z; k = (k+1) & MASK)
{
uint tmp = x[k] & (1 << sh - 1);
x[k] = x[k] >> sh + carry;
carry = (1000000000 >> sh) * tmp;
if (k == a && !x[k])
{
a = (a + 1) & MASK;
i--;
rp -= 9;
}
}
if (carry)
{
if ((z + 1) & MASK != a)
{
x[z] = carry;
z = (z + 1) & MASK;
}
else
{
x[(z - 1) & MASK] |= 1;
}
}
}
// Assemble desired bits into floating point variable
double y;
int i;
for (i = 0; i < B1B_DIG; i++)
{
if ((a + i) & MASK == z) x[(z = (z + 1) & MASK) - 1] = 0;
y = 1000000000.0 * y + x[(a + i) & MASK];
}
y *= sign;
bool denormal;
/* Limit precision for denormal results */
uint bits = $bits;
if (bits > math::DOUBLE_MANT_DIG + e2 - $emin)
{
bits = math::DOUBLE_MANT_DIG + e2 - $emin;
if (bits < 0) bits = 0;
denormal = true;
}
// Calculate bias term to force rounding, move out lower bits
double bias;
double frac;
if (bits < math::DOUBLE_MANT_DIG)
{
bias = math::copysign(math::scalbn(1, 2 * math::DOUBLE_MANT_DIG - bits - 1), y);
frac = y % math::scalbn(1, math::DOUBLE_MANT_DIG - bits);
y -= frac;
y += bias;
}
// Process tail of decimal input so it can affect rounding
if ((a + i) & MASK != z)
{
uint t = x[(a + i) & MASK];
switch
{
case t < 500000000 && (t || (a + i + 1) & MASK != z):
frac += 0.25 * sign;
case t > 500000000:
frac += 0.75 * sign;
case t == 500000000:
if ((a + i + 1) & MASK == z)
{
frac += 0.5 * sign;
}
else
{
frac += 0.75 * sign;
}
}
if (math::DOUBLE_MANT_DIG - bits >= 2 && !(frac % 1)) frac++;
}
y += frac;
y -= bias;
if (((e2 + math::DOUBLE_MANT_DIG) & int.max) > emax - 5)
{
if (math::abs(y) >= 0x1p53)
{
if (denormal && bits == math::DOUBLE_MANT_DIG + e2 - $emin) denormal = false;
y *= 0.5;
e2++;
}
if (e2 + math::DOUBLE_MANT_DIG > emax || (denormal && frac)) return NumberConversion.MALFORMED_FLOAT?;
}
return math::scalbn(y, e2);
}
macro double! hexfloat(char[] chars, int $bits, int $emin, int sign)
{
double scale = 1;
uint x;
long rp;
long dc;
char c @noinit;
bool got_rad;
bool got_digit;
bool got_tail;
usz len = chars.len;
usz last_char = len - 1;
usz index;
double y;
// Skip past first characters
while ((c = chars[index]) == '0')
{
if (index == last_char) return 0.0;
index++;
}
if (c == '.')
{
got_rad = true;
if (index == last_char)
{
if (!got_digit) return NumberConversion.MALFORMED_FLOAT?;
return sign * 0.0;
}
if (index != last_char && (c = chars[++index]) == '0')
{
rp--;
got_digit = true;
while (last_char != index && (c = chars[++index]) == '0')
{
rp--;
}
}
}
while ((c - '0') < 10u || ((c | 32) - 'a') < 6u || c == '.')
{
if (c == '.')
{
if (got_rad) return NumberConversion.MALFORMED_FLOAT?;
got_rad = true;
rp = dc;
}
else
{
got_digit = true;
int d = {|
if (c > '9') return (c | 32) + 10 - 'a';
return c - '0';
|};
switch
{
case dc < 8:
x = x * 16 + d;
case dc < math::DOUBLE_MANT_DIG / 4 + 1:
y += d * (scale /= 16);
got_tail = true;
case d && !got_tail:
y += 0.5 * scale;
got_tail = true;
}
dc++;
}
if (index == last_char) break;
c = chars[++index];
}
if (!got_digit) return NumberConversion.MALFORMED_FLOAT?;
if (!got_rad) rp = dc;
for (; dc < 8; dc++) x *= 16;
long e2;
if ((c | 32) == 'p')
{
long e2val = String.to_long((String)chars[index + 1..]) ?? (NumberConversion.MALFORMED_FLOAT?)!;
e2 = e2val;
}
e2 += 4 * rp - 32;
if (!x) return sign * 0.0;
if (e2 > -$emin) return NumberConversion.FLOAT_OUT_OF_RANGE?;
if (e2 < $emin - 2 * math::DOUBLE_MANT_DIG) return NumberConversion.FLOAT_OUT_OF_RANGE?;
while (x < 0x80000000)
{
if (y >= 0.5)
{
x += x + 1;
y += y - 1;
}
else
{
x += x;
y += y;
}
e2--;
}
int bits = $bits;
if ($bits > 32 + e2 - $emin)
{
bits = (int)(32 + e2 - $emin);
if (bits < 0) bits = 0;
}
double bias;
if (bits < math::DOUBLE_MANT_DIG)
{
bias = math::copysign(math::scalbn(1, 32 + math::DOUBLE_MANT_DIG - bits - 1), (double)sign);
}
if (bits < 32 && y && !(x & 1))
{
x++;
y = 0;
}
y = bias + sign * (double)x + sign * y;
y -= bias;
if (!y) return NumberConversion.FLOAT_OUT_OF_RANGE?;
return math::scalbn(y, (int)e2);
}
macro String.to_real(chars, $Type) @private
{
int sign = 1;
$switch ($Type)
$case float:
const int BITS = math::FLOAT_MANT_DIG;
const int EMIN = math::FLOAT_MIN_EXP - BITS;
$case double:
const int BITS = math::DOUBLE_MANT_DIG;
const int EMIN = math::DOUBLE_MIN_EXP - BITS;
$case float128:
$error "Not yet supported";
$default:
$error "Unexpected type";
$endswitch
while (chars.len && chars[0] == ' ') chars = chars[1..];
if (!chars.len) return NumberConversion.MALFORMED_FLOAT?;
switch (chars[0])
{
case '-':
sign = -1;
nextcase;
case '+':
chars = chars[1..];
}
if (chars == "infinity" || chars == "INFINITY") return sign * $Type.inf;
if (chars == "NAN" || chars == "nan") return $Type.nan;
if (chars.len > 2 && chars[0] == '0' && (chars[1] | 32) == 'x')
{
return ($Type)hexfloat((char[])chars[2..], BITS, EMIN, sign);
}
return ($Type)decfloat((char[])chars, BITS, EMIN, sign);
}