mirror of
https://github.com/c3lang/c3c.git
synced 2026-02-27 12:01:16 +00:00
142 lines
4.4 KiB
Plaintext
142 lines
4.4 KiB
Plaintext
module std::math::nolibc @if(env::NO_LIBC || $feature(C3_MATH));
|
|
|
|
macro uint _top12f(float x) @private => bitcast(x, uint) >> 20;
|
|
|
|
|
|
fn float _exp2f(float x) @extern("exp2f") @weak @nostrip
|
|
{
|
|
double xd = x;
|
|
uint abstop = _top12f(x) & 0x7ff;
|
|
if (abstop >= _top12f(128.0f)) /* @unlikely */
|
|
{
|
|
switch
|
|
{
|
|
// |x| >= 128 or x is nan.
|
|
case bitcast(x, uint) == bitcast(-float.inf, uint):
|
|
return 0;
|
|
case abstop >= _top12f(float.inf):
|
|
return x + x;
|
|
case x > 0:
|
|
return __math_oflowf(0);
|
|
case x <= -150.0f:
|
|
return __math_uflowf(0);
|
|
}
|
|
}
|
|
const SHIFT = __EXP2F_DATA.shift_scaled;
|
|
const uint N = 1U << EXP2F_TABLE_BITS;
|
|
// x = k/N + r with r in [-1/(2N), 1/(2N)] and int k.
|
|
double kd = xd + SHIFT;
|
|
ulong ki = bitcast(kd, ulong);
|
|
kd -= SHIFT; /* k/N for int k. */
|
|
double r = xd - kd;
|
|
|
|
// exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1)
|
|
ulong t = __EXP2F_DATA.tab[ki % N];
|
|
t += ki << (52 - EXP2F_TABLE_BITS);
|
|
double s = bitcast(t, double);
|
|
double z = __EXP2F_DATA.poly[0] * r + __EXP2F_DATA.poly[1];
|
|
double r2 = r * r;
|
|
double y = __EXP2F_DATA.poly[2] * r + 1;
|
|
y = z * r2 + y;
|
|
y = y * s;
|
|
return (float)y;
|
|
}
|
|
|
|
fn double _exp2_specialcase(double tmp, ulong sbits, ulong ki) @private
|
|
{
|
|
if (ki & 0x80000000 == 0)
|
|
{
|
|
// k > 0, the exponent of scale might have overflowed by 1.
|
|
sbits -= 1UL << 52;
|
|
double scale = bitcast(sbits, double);
|
|
double y = 2 * (scale + scale * tmp);
|
|
return y;
|
|
}
|
|
// k < 0, need special care in the subnormal range.
|
|
sbits += 1022UL << 52;
|
|
double scale = bitcast(sbits, double);
|
|
double y = scale + scale * tmp;
|
|
if (y >= 1.0)
|
|
{
|
|
return 0x1p-1022 * y;
|
|
}
|
|
// Round y to the right precision before scaling it into the subnormal
|
|
// range to avoid double rounding that can cause 0.5+E/2 ulp error where
|
|
// E is the worst-case ulp error outside the subnormal range. So this
|
|
// is only useful if the goal is better than 1 ulp worst-case error.
|
|
double lo = scale - y + scale * tmp;
|
|
double hi = 1.0 + y;
|
|
lo = 1.0 - hi + y + lo;
|
|
y = hi + lo - 1.0;
|
|
/* Avoid -0.0 with downward rounding. */
|
|
if (WANT_ROUNDING && y == 0.0) y = 0.0;
|
|
/* The underflow exception needs to be signaled explicitly. */
|
|
return __math_xflow(0, 0x1p-1022);
|
|
}
|
|
|
|
|
|
macro uint _top12d(double x) @private
|
|
{
|
|
return (uint)(bitcast(x, ulong) >> 52);
|
|
}
|
|
|
|
fn double _exp2(double x) @extern("exp2") @weak @nostrip
|
|
{
|
|
uint abstop = _top12d(x) & 0x7ff;
|
|
ulong u = bitcast(x, ulong);
|
|
if (abstop - _top12d(0x1p-54) >= _top12d(512.0) - _top12d(0x1p-54)) /* @unlikely */
|
|
{
|
|
if (abstop - _top12d(0x1p-54) >= 0x80000000)
|
|
{
|
|
// Avoid spurious underflow for tiny x.
|
|
// Note: 0 is common input.
|
|
return WANT_ROUNDING ? 1.0 + x : 1.0;
|
|
}
|
|
if (abstop >= _top12d(1024.0))
|
|
{
|
|
switch
|
|
{
|
|
case u == bitcast(-double.inf, ulong):
|
|
return 0.0;
|
|
case abstop >= _top12d(double.inf):
|
|
return 1.0 + x;
|
|
case !(u >> 63):
|
|
return __math_oflow(0);
|
|
case u >= bitcast(-1075.0, ulong):
|
|
return __math_uflow(0);
|
|
}
|
|
}
|
|
// Large x is special cased below.
|
|
if (2 * u > 2 * bitcast(928.0, ulong)) abstop = 0;
|
|
}
|
|
const SHIFT = __EXP2_DATA.exp2_shift;
|
|
// exp2(x) = 2^(k/N) * 2^r, with 2^r in [2^(-1/2N),2^(1/2N)].
|
|
// x = k/N + r, with int k and r in [-1/2N, 1/2N].
|
|
double kd = x + SHIFT;
|
|
ulong ki = bitcast(kd, ulong); /* k. */
|
|
kd -= SHIFT; /* k/N for int k. */
|
|
double r = x - kd;
|
|
/* 2^(k/N) ~= scale * (1 + tail). */
|
|
ulong idx = 2 * (ki % EXP_DATA_WIDTH);
|
|
ulong top = ki << (52 - EXP_TABLE_BITS);
|
|
double tail = __EXP2_DATA.tab[idx];
|
|
/* This is only a valid scale when -1023*N < k < 1024*N. */
|
|
ulong sbits = __EXP2_DATA.tab[idx + 1] + top;
|
|
/* exp2(x) = 2^(k/N) * 2^r ~= scale + scale * (tail + 2^r - 1). */
|
|
/* Evaluation is optimized assuming superscalar pipelined execution. */
|
|
double r2 = r * r;
|
|
const C1 = __EXP2_DATA.exp2_poly[0];
|
|
const C2 = __EXP2_DATA.exp2_poly[1];
|
|
const C3 = __EXP2_DATA.exp2_poly[2];
|
|
const C4 = __EXP2_DATA.exp2_poly[3];
|
|
const C5 = __EXP2_DATA.exp2_poly[4];
|
|
/* Without fma the worst case error is 0.5/N ulp larger. */
|
|
/* Worst case error is less than 0.5+0.86/N+(abs poly error * 2^53) ulp. */
|
|
double tmp = tail + r * C1 + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
|
|
if (abstop == 0 /* @unlikely */) return _exp2_specialcase(tmp, sbits, ki);
|
|
double scale = bitcast(sbits, double);
|
|
/* Note: tmp == 0 or |tmp| > 2^-65 and scale > 2^-928, so there
|
|
is no spurious underflow here even without fma. */
|
|
return scale + scale * tmp;
|
|
}
|