diff --git a/lib/std/math/math_nolibc/__cos.c3 b/lib/std/math/math_nolibc/__cos.c3 new file mode 100644 index 000000000..da169620c --- /dev/null +++ b/lib/std/math/math_nolibc/__cos.c3 @@ -0,0 +1,34 @@ +module std::math::nolibc; + +$if (!env::COMPILER_LIBC_AVAILABLE): + +/* origin: FreeBSD /usr/src/lib/msun/src/k_cos.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunSoft, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +fn double __cos(double x, double y) @extern("__cos") @weak +{ + const C1 = 4.16666666666666019037e-02; /* 0x3FA55555, 0x5555554C */ + const C2 = -1.38888888888741095749e-03; /* 0xBF56C16C, 0x16C15177 */ + const C3 = 2.48015872894767294178e-05; /* 0x3EFA01A0, 0x19CB1590 */ + const C4 = -2.75573143513906633035e-07; /* 0xBE927E4F, 0x809C52AD */ + const C5 = 2.08757232129817482790e-09; /* 0x3E21EE9E, 0xBDB4B1C4 */ + const C6 = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */ + + double z = x * x; + double w = z * z; + double r = z * (C1 + z * (C2 + z * C3)) + w * w * (C4 + z * (C5 + z * C6)); + double hz = 0.5 * z; + w = 1.0 - hz; + return w + (((1.0 - w) - hz) + (z * r - x * y)); +} + +$endif; \ No newline at end of file diff --git a/lib/std/math/math_nolibc/__cosdf.c3 b/lib/std/math/math_nolibc/__cosdf.c3 new file mode 100644 index 000000000..55f00a215 --- /dev/null +++ b/lib/std/math/math_nolibc/__cosdf.c3 @@ -0,0 +1,36 @@ +module std::math::nolibc; + +$if (!env::COMPILER_LIBC_AVAILABLE): + +/* origin: FreeBSD /usr/src/lib/msun/src/k_cosf.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + * Debugged and optimized by Bruce D. Evans. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +/* |cos(x) - c(x)| < 2**-34.1 (~[-5.37e-11, 5.295e-11]). */ + +private const double C0 = -0x1ffffffd0c5e81.0p-54; /* -0.499999997251031003120 */ +private const double C1 = 0x155553e1053a42.0p-57; /* 0.0416666233237390631894 */ +private const double C2 = -0x16c087e80f1e27.0p-62; /* -0.00138867637746099294692 */ +private const double C3 = 0x199342e0ee5069.0p-68; /* 0.0000243904487962774090654 */ + +fn float __cosdf(double x) @extern("__cosdf") @weak +{ + /* Try to optimize for parallel evaluation as in __tandf.c. */ + double z = x * x; + double w = z * z; + double r = C2 + z * C3; + return (float)(((1.0f + z * C0) + w * C1) + (w * z) * r); +} + +$endif; \ No newline at end of file diff --git a/lib/std/math/math_nolibc/__sin.c3 b/lib/std/math/math_nolibc/__sin.c3 new file mode 100644 index 000000000..298bd4899 --- /dev/null +++ b/lib/std/math/math_nolibc/__sin.c3 @@ -0,0 +1,35 @@ +module std::math::nolibc; + +$if (!env::COMPILER_LIBC_AVAILABLE): + +/* origin: FreeBSD /usr/src/lib/msun/src/k_sin.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunSoft, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +fn double __sin(double x, double y, int iy) @extern("__sin") @weak +{ + + const S1 = -1.66666666666666324348e-01; /* 0xBFC55555, 0x55555549 */ + const S2 = 8.33333333332248946124e-03; /* 0x3F811111, 0x1110F8A6 */ + const S3 = -1.98412698298579493134e-04; /* 0xBF2A01A0, 0x19C161D5 */ + const S4 = 2.75573137070700676789e-06; /* 0x3EC71DE3, 0x57B1FE7D */ + const S5 = -2.50507602534068634195e-08; /* 0xBE5AE5E6, 0x8A2B9CEB */ + const S6 = 1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */ + + double z = x * x; + double w = z * z; + double r = S2 + z * (S3 + z * S4) + z * w * (S5 + z * S6); + double v = z * x; + return iy == 0 + ? x + v * (S1 + z * r) + : x - ((z * (0.5 * y - v * r) - y) - v * S1); +} + +$endif; \ No newline at end of file diff --git a/lib/std/math/math_nolibc/__sindf.c3 b/lib/std/math/math_nolibc/__sindf.c3 new file mode 100644 index 000000000..7a0c95225 --- /dev/null +++ b/lib/std/math/math_nolibc/__sindf.c3 @@ -0,0 +1,34 @@ +module std::math::nolibc; + +$if (!env::COMPILER_LIBC_AVAILABLE): + +/* origin: FreeBSD /usr/src/lib/msun/src/k_sinf.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + * Optimized by Bruce D. Evans. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ +// |sin(x)/x - s(x)| < 2**-37.5 (~[-4.89e-12, 4.824e-12]). +fn float __sindf(double x) @extern("__sindf") @weak +{ + const S1F = -0x15555554cbac77.0p-55; /* -0.166666666416265235595 */ + const S2F = 0x111110896efbb2.0p-59; /* 0.0083333293858894631756 */ + const S3F = -0x1a00f9e2cae774.0p-65; /* -0.000198393348360966317347 */ + const S4F = 0x16cd878c3b46a7.0p-71; /* 0.0000027183114939898219064 */ + double z = x * x; + double w = z * z; + double r = S3F + z * S4F; + double s = z * x; + return (float)((x + s * (S1F + z * S2F)) + s * w * r); +} + +$endif; diff --git a/lib/std/math/math_nolibc/ceil.c3 b/lib/std/math/math_nolibc/ceil.c3 index 497fd62cc..91645ab5d 100644 --- a/lib/std/math/math_nolibc/ceil.c3 +++ b/lib/std/math/math_nolibc/ceil.c3 @@ -12,7 +12,7 @@ fn double _ceil(double x) @weak @extname("ceil") // special case because of non-nearest rounding modes if (e <= 0x3ff - 1) { - @force_eval_add(y, 0); + force_eval_add(y, 0); return ui >> 63 ? -0.0 : 1; } return y < 0 ? x + y + 1 : x + y; @@ -30,11 +30,11 @@ fn float _ceilf(float x) @weak @extname("ceilf") case e >= 0: uint m = 0x007fffff >> e; if (u & m == 0) return x; - @force_eval_add(x, 0x1p120f); + force_eval_add(x, 0x1p120f); if (u >> 31 == 0) u += m; u &= ~m; case u >> 31 != 0: - @force_eval_add(x, 0x1p120f); + force_eval_add(x, 0x1p120f); return -0.0f; case u << 1 != 0: return 1; diff --git a/lib/std/math/math_nolibc/cos.c3 b/lib/std/math/math_nolibc/cos.c3 new file mode 100644 index 000000000..078474f86 --- /dev/null +++ b/lib/std/math/math_nolibc/cos.c3 @@ -0,0 +1,91 @@ +module std::math::nolibc; + +$if (!env::COMPILER_LIBC_AVAILABLE): + +fn float _cosf(float x) @extern("cosf") @weak +{ + uint ix = bitcast(x, uint); + uint sign = ix >> 31; + ix &= 0x7fffffff; + + switch + { + case (ix <= 0x3f490fda): + // |x| < 2**-12 + if (ix < 0x39800000) + { + /* raise inexact if x != 0 */ + force_eval_add(x, 0x1p120f); + return 1.0f; + } + return __cosdf(x); + // |x| ~<= 5*pi/4 + case (ix <= 0x407b53d1): + // |x| ~> 3*pi/4 + if (ix > 0x4016cbe3) return -__cosdf(sign ? x + S2PI2 : x - S2PI2); + return sign ? __sindf(x + S1PI2) : __sindf(S1PI2 - x); + // |x| ~<= 9*pi/4 + case (ix <= 0x40e231d5): + if (ix > 0x40afeddf) return __cosdf(sign ? x + S4PI2 : x - S4PI2); + return sign ? __sindf((double)(-x) - S3PI2) : __sindf(x - S3PI2); + } + + // general argument reduction needed + double y = void; + switch (__rem_pio2f(x, &y) & 3) + { + case 0: return __cosdf(y); + case 1: return __sindf(-y); + case 2: return -__cosdf(y); + default: return __sindf(y); + } + unreachable(); +} + +/* origin: FreeBSD /usr/src/lib/msun/src/s_cos.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +fn double _cos(double x) @extern("cos") @weak +{ + // High word of x. + uint ix = (uint)(bitcast(x, ulong) >> 32); + ix &= 0x7fffffff; + + switch + { + // |x| ~< pi/4 + case ix <= 0x3fe921fb: + if (ix < 0x3e46a09e) + { + // |x| < 2**-27 * sqrt(2) + // raise inexact if x!=0 + force_eval_add(x, 0x1p120f); + return 1.0; + } + return __cos(x, 0); + case ix >= 0x7ff00000: + // cos(Inf or NaN) is NaN + return x - x; + default: + // argument reduction + double[2] y = void; + switch (__rem_pio2(x, &y) & 3) + { + case 0: return __cos(y[0], y[1]); + case 1: return -__sin(y[0], y[1], 1); + case 2: return -__cos(y[0], y[1]); + default: return __sin(y[0], y[1], 1); + } + } +} + +$endif; \ No newline at end of file diff --git a/lib/std/math/math_nolibc/exp2.c3 b/lib/std/math/math_nolibc/exp2.c3 index 61cfb025c..6c6ef20ad 100644 --- a/lib/std/math/math_nolibc/exp2.c3 +++ b/lib/std/math/math_nolibc/exp2.c3 @@ -2,14 +2,143 @@ module std::math::nolibc; $if (!env::COMPILER_LIBC_AVAILABLE): -fn float exp2f_broken(float x) @extname("exp2f") @weak +private macro uint _top12f(float x) => bitcast(x, uint) >> 20; + + +fn float _exp2f(float x) @extern("exp2f") @weak { - unreachable("'exp2f' not supported"); + double xd = x; + uint abstop = _top12f(x) & 0x7ff; + if (abstop >= _top12f(128.0f)) /* @unlikely */ + { + switch + { + // |x| >= 128 or x is nan. + case bitcast(x, uint) == bitcast(-float.inf, uint): + return 0; + case abstop >= _top12f(float.inf): + return x + x; + case x > 0: + return __math_oflowf(0); + case x <= -150.0f: + return __math_uflowf(0); + } + } + const SHIFT = __EXP2F_DATA.shift_scaled; + const uint N = 1U << EXP2F_TABLE_BITS; + // x = k/N + r with r in [-1/(2N), 1/(2N)] and int k. + double kd = xd + SHIFT; + ulong ki = bitcast(kd, ulong); + kd -= SHIFT; /* k/N for int k. */ + double r = xd - kd; + + // exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1) + ulong t = __EXP2F_DATA.tab[ki % N]; + t += ki << (52 - EXP2F_TABLE_BITS); + double s = bitcast(t, double); + double z = __EXP2F_DATA.poly[0] * r + __EXP2F_DATA.poly[1]; + double r2 = r * r; + double y = __EXP2F_DATA.poly[2] * r + 1; + y = z * r2 + y; + y = y * s; + return (float)y; } -fn float exp2_broken(float x) @extname("exp2") @weak +private fn double _exp2_specialcase(double tmp, ulong sbits, ulong ki) { - unreachable("'exp2' not supported"); + if (ki & 0x80000000 == 0) + { + // k > 0, the exponent of scale might have overflowed by 1. + sbits -= 1u64 << 52; + double scale = bitcast(sbits, double); + double y = 2 * (scale + scale * tmp); + return y; + } + // k < 0, need special care in the subnormal range. + sbits += 1022u64 << 52; + double scale = bitcast(sbits, double); + double y = scale + scale * tmp; + if (y >= 1.0) + { + return 0x1p-1022 * y; + } + // Round y to the right precision before scaling it into the subnormal + // range to avoid double rounding that can cause 0.5+E/2 ulp error where + // E is the worst-case ulp error outside the subnormal range. So this + // is only useful if the goal is better than 1 ulp worst-case error. + double lo = scale - y + scale * tmp; + double hi = 1.0 + y; + lo = 1.0 - hi + y + lo; + y = hi + lo - 1.0; + /* Avoid -0.0 with downward rounding. */ + if (WANT_ROUNDING && y == 0.0) y = 0.0; + /* The underflow exception needs to be signaled explicitly. */ + return __math_xflow(0, 0x1p-1022); } + +private macro uint _top12d(double x) +{ + return (uint)(bitcast(x, ulong) >> 52); +} + +fn double _exp2(double x) @extern("exp2") @weak +{ + uint abstop = _top12d(x) & 0x7ff; + ulong u = bitcast(x, ulong); + if (abstop - _top12d(0x1p-54) >= _top12d(512.0) - _top12d(0x1p-54)) /* @unlikely */ + { + if (abstop - _top12d(0x1p-54) >= 0x80000000) + { + // Avoid spurious underflow for tiny x. */ + // Note: 0 is common input. */ + return WANT_ROUNDING ? 1.0 + x : 1.0; + } + if (abstop >= _top12d(1024.0)) + { + switch + { + case u == bitcast(-double.inf, ulong): + return 0.0; + case abstop >= _top12d(double.inf): + return 1.0 + x; + case !(u >> 63): + return __math_oflow(0); + case u >= bitcast(-1075.0, ulong): + return __math_uflow(0); + } + } + // Large x is special cased below. + if (2 * u > 2 * bitcast(928.0, ulong)) abstop = 0; + } + const SHIFT = __EXP2_DATA.exp2_shift; + // exp2(x) = 2^(k/N) * 2^r, with 2^r in [2^(-1/2N),2^(1/2N)]. */ + // x = k/N + r, with int k and r in [-1/2N, 1/2N]. */ + double kd = x + SHIFT; + ulong ki = bitcast(kd, ulong); /* k. */ + kd -= SHIFT; /* k/N for int k. */ + double r = x - kd; + /* 2^(k/N) ~= scale * (1 + tail). */ + ulong idx = 2 * (ki % EXP_DATA_WIDTH); + ulong top = ki << (52 - EXP_TABLE_BITS); + double tail = __EXP2_DATA.tab[idx]; + /* This is only a valid scale when -1023*N < k < 1024*N. */ + ulong sbits = __EXP2_DATA.tab[idx + 1] + top; + /* exp2(x) = 2^(k/N) * 2^r ~= scale + scale * (tail + 2^r - 1). */ + /* Evaluation is optimized assuming superscalar pipelined execution. */ + double r2 = r * r; + const C1 = __EXP2_DATA.exp2_poly[0]; + const C2 = __EXP2_DATA.exp2_poly[1]; + const C3 = __EXP2_DATA.exp2_poly[2]; + const C4 = __EXP2_DATA.exp2_poly[3]; + const C5 = __EXP2_DATA.exp2_poly[4]; + /* Without fma the worst case error is 0.5/N ulp larger. */ + /* Worst case error is less than 0.5+0.86/N+(abs poly error * 2^53) ulp. */ + double tmp = tail + r * C1 + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5); + if (abstop == 0 /* @unlikely */) return _exp2_specialcase(tmp, sbits, ki); + double scale = bitcast(sbits, double); + /* Note: tmp == 0 or |tmp| > 2^-65 and scale > 2^-928, so there + is no spurious underflow here even without fma. */ + return scale + scale * tmp; +} $endif; \ No newline at end of file diff --git a/lib/std/math/math_nolibc/floor.c3 b/lib/std/math/math_nolibc/floor.c3 index 1b49d6626..f79a31d92 100644 --- a/lib/std/math/math_nolibc/floor.c3 +++ b/lib/std/math/math_nolibc/floor.c3 @@ -12,7 +12,7 @@ fn double _floor(double x) @weak @extname("floor") // special case because of non-nearest rounding modes if (e <= 0x3ff - 1) { - @force_eval_add(y, 0); + force_eval_add(y, 0); return ui >> 63 ? -1 : 0; } return y > 0 ? x + y - 1 : x + y; @@ -30,11 +30,11 @@ fn float _floorf(float x) @weak @extname("floorf") case e >= 0: uint m = 0x007fffff >> e; if (u & m == 0) return x; - @force_eval_add(x, 0x1p120f); + force_eval_add(x, 0x1p120f); if (u >> 31) u += m; u &= ~m; case u >> 31 == 0: - @force_eval_add(x, 0x1p120f); + force_eval_add(x, 0x1p120f); u = 0; case (u << 1 != 0): return -1; diff --git a/lib/std/math/math_nolibc/math_nolibc.c3 b/lib/std/math/math_nolibc/math_nolibc.c3 index 48e9d03e7..2ed1c084b 100644 --- a/lib/std/math/math_nolibc/math_nolibc.c3 +++ b/lib/std/math/math_nolibc/math_nolibc.c3 @@ -1,9 +1,256 @@ module std::math::nolibc; const double TOINT = 1 / math::DOUBLE_EPSILON; +const double TOINT15 = 1.5 / math::DOUBLE_EPSILON; const float TOINTF = (float)(1 / math::FLOAT_EPSILON); +private const double S1PI2 = math::PI_2; /* 0x3FF921FB, 0x54442D18 */ +private const double S2PI2 = math::PI; /* 0x400921FB, 0x54442D18 */ +private const double S3PI2 = math::PI + math::PI_2; /* 0x4012D97C, 0x7F3321D2 */ +private const double S4PI2 = math::PI + math::PI; /* 0x401921FB, 0x54442D18 */ -macro @force_eval_add(&x, v) +// Shared between expf, exp2f and powf. +private const EXP2F_TABLE_BITS = 5; +private const EXP2F_POLY_ORDER = 3; +private struct Exp2fData { - @volatile_store(x, @volatile_load(x) + v); + ulong[1 << EXP2F_TABLE_BITS] tab; + double shift_scaled; + double[EXP2F_POLY_ORDER] poly; + double shift; + double invln2_scaled; + double[EXP2F_POLY_ORDER] poly_scaled; +} + +private const Exp2fData __EXP2F_DATA = { + .tab = { + 0x3ff0000000000000, 0x3fefd9b0d3158574, 0x3fefb5586cf9890f, 0x3fef9301d0125b51, + 0x3fef72b83c7d517b, 0x3fef54873168b9aa, 0x3fef387a6e756238, 0x3fef1e9df51fdee1, + 0x3fef06fe0a31b715, 0x3feef1a7373aa9cb, 0x3feedea64c123422, 0x3feece086061892d, + 0x3feebfdad5362a27, 0x3feeb42b569d4f82, 0x3feeab07dd485429, 0x3feea47eb03a5585, + 0x3feea09e667f3bcd, 0x3fee9f75e8ec5f74, 0x3feea11473eb0187, 0x3feea589994cce13, + 0x3feeace5422aa0db, 0x3feeb737b0cdc5e5, 0x3feec49182a3f090, 0x3feed503b23e255d, + 0x3feee89f995ad3ad, 0x3feeff76f2fb5e47, 0x3fef199bdd85529c, 0x3fef3720dcef9069, + 0x3fef5818dcfba487, 0x3fef7c97337b9b5f, 0x3fefa4afa2a490da, 0x3fefd0765b6e4540, + }, + .shift_scaled = 0x1.8p+52 / (1 << 5), + .poly = { + 0x1.c6af84b912394p-5, 0x1.ebfce50fac4f3p-3, 0x1.62e42ff0c52d6p-1, + }, + .shift = 0x1.8p+52, + .invln2_scaled = 0x1.71547652b82fep+0 * (1 << 5), + .poly_scaled = { + 0x1.c6af84b912394p-5 / (1 << 5) / (1 << 5) / (1 << 5), + 0x1.ebfce50fac4f3p-3 / (1 << 5) / (1 << 5), + 0x1.62e42ff0c52d6p-1 / (1 << 5), + }, +}; + +const EXP_TABLE_BITS = 7; +const EXP_POLY_ORDER = 5; +const EXP2_POLY_ORDER = 5; +const EXP_DATA_WIDTH = 1 << EXP_TABLE_BITS; +private struct Exp2Data +{ + double invln2N; + double shift; + double negln2hiN; + double negln2loN; + double[4] poly; // Last four coefficients. + double exp2_shift; + double[EXP2_POLY_ORDER] exp2_poly; + ulong[2 * EXP_DATA_WIDTH] tab; +} + +/* + * Shared data between exp, exp2 and pow. + * + * Copyright (c) 2018, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + */ + + +const Exp2Data __EXP2_DATA = { + // N/ln2 + .invln2N = 0x1.71547652b82fep0 * EXP_DATA_WIDTH, + // -ln2/N + .negln2hiN = -0x1.62e42fefa0000p-8, + .negln2loN = -0x1.cf79abc9e3b3ap-47, + .shift = 0x1.8p52, + // exp polynomial coefficients. + .poly = { + // abs error: 1.555*2^-66 + // ulp error: 0.509 (0.511 without fma) + // if |x| < ln2/256+eps + // abs error if |x| < ln2/256+0x1p-15: 1.09*2^-65 + // abs error if |x| < ln2/128: 1.7145*2^-56 + 0x1.ffffffffffdbdp-2, + 0x1.555555555543cp-3, + 0x1.55555cf172b91p-5, + 0x1.1111167a4d017p-7, + }, + .exp2_shift = 0x1.8p52 / EXP_DATA_WIDTH, + // exp2 polynomial coefficients. + .exp2_poly = { + // abs error: 1.2195*2^-65 + // ulp error: 0.507 (0.511 without fma) + // if |x| < 1/256 + // abs error if |x| < 1/128: 1.9941*2^-56 + 0x1.62e42fefa39efp-1, + 0x1.ebfbdff82c424p-3, + 0x1.c6b08d70cf4b5p-5, + 0x1.3b2abd24650ccp-7, + 0x1.5d7e09b4e3a84p-10, + }, + // 2^(k/N) ~= H[k]*(1 + T[k]) for int k in [0,N) + // tab[2*k] = asuint64(T[k]) + // tab[2*k+1] = asuint64(H[k]) - (k << 52)/N + .tab = { + 0x0, 0x3ff0000000000000, + 0x3c9b3b4f1a88bf6e, 0x3feff63da9fb3335, + 0xbc7160139cd8dc5d, 0x3fefec9a3e778061, + 0xbc905e7a108766d1, 0x3fefe315e86e7f85, + 0x3c8cd2523567f613, 0x3fefd9b0d3158574, + 0xbc8bce8023f98efa, 0x3fefd06b29ddf6de, + 0x3c60f74e61e6c861, 0x3fefc74518759bc8, + 0x3c90a3e45b33d399, 0x3fefbe3ecac6f383, + 0x3c979aa65d837b6d, 0x3fefb5586cf9890f, + 0x3c8eb51a92fdeffc, 0x3fefac922b7247f7, + 0x3c3ebe3d702f9cd1, 0x3fefa3ec32d3d1a2, + 0xbc6a033489906e0b, 0x3fef9b66affed31b, + 0xbc9556522a2fbd0e, 0x3fef9301d0125b51, + 0xbc5080ef8c4eea55, 0x3fef8abdc06c31cc, + 0xbc91c923b9d5f416, 0x3fef829aaea92de0, + 0x3c80d3e3e95c55af, 0x3fef7a98c8a58e51, + 0xbc801b15eaa59348, 0x3fef72b83c7d517b, + 0xbc8f1ff055de323d, 0x3fef6af9388c8dea, + 0x3c8b898c3f1353bf, 0x3fef635beb6fcb75, + 0xbc96d99c7611eb26, 0x3fef5be084045cd4, + 0x3c9aecf73e3a2f60, 0x3fef54873168b9aa, + 0xbc8fe782cb86389d, 0x3fef4d5022fcd91d, + 0x3c8a6f4144a6c38d, 0x3fef463b88628cd6, + 0x3c807a05b0e4047d, 0x3fef3f49917ddc96, + 0x3c968efde3a8a894, 0x3fef387a6e756238, + 0x3c875e18f274487d, 0x3fef31ce4fb2a63f, + 0x3c80472b981fe7f2, 0x3fef2b4565e27cdd, + 0xbc96b87b3f71085e, 0x3fef24dfe1f56381, + 0x3c82f7e16d09ab31, 0x3fef1e9df51fdee1, + 0xbc3d219b1a6fbffa, 0x3fef187fd0dad990, + 0x3c8b3782720c0ab4, 0x3fef1285a6e4030b, + 0x3c6e149289cecb8f, 0x3fef0cafa93e2f56, + 0x3c834d754db0abb6, 0x3fef06fe0a31b715, + 0x3c864201e2ac744c, 0x3fef0170fc4cd831, + 0x3c8fdd395dd3f84a, 0x3feefc08b26416ff, + 0xbc86a3803b8e5b04, 0x3feef6c55f929ff1, + 0xbc924aedcc4b5068, 0x3feef1a7373aa9cb, + 0xbc9907f81b512d8e, 0x3feeecae6d05d866, + 0xbc71d1e83e9436d2, 0x3feee7db34e59ff7, + 0xbc991919b3ce1b15, 0x3feee32dc313a8e5, + 0x3c859f48a72a4c6d, 0x3feedea64c123422, + 0xbc9312607a28698a, 0x3feeda4504ac801c, + 0xbc58a78f4817895b, 0x3feed60a21f72e2a, + 0xbc7c2c9b67499a1b, 0x3feed1f5d950a897, + 0x3c4363ed60c2ac11, 0x3feece086061892d, + 0x3c9666093b0664ef, 0x3feeca41ed1d0057, + 0x3c6ecce1daa10379, 0x3feec6a2b5c13cd0, + 0x3c93ff8e3f0f1230, 0x3feec32af0d7d3de, + 0x3c7690cebb7aafb0, 0x3feebfdad5362a27, + 0x3c931dbdeb54e077, 0x3feebcb299fddd0d, + 0xbc8f94340071a38e, 0x3feeb9b2769d2ca7, + 0xbc87deccdc93a349, 0x3feeb6daa2cf6642, + 0xbc78dec6bd0f385f, 0x3feeb42b569d4f82, + 0xbc861246ec7b5cf6, 0x3feeb1a4ca5d920f, + 0x3c93350518fdd78e, 0x3feeaf4736b527da, + 0x3c7b98b72f8a9b05, 0x3feead12d497c7fd, + 0x3c9063e1e21c5409, 0x3feeab07dd485429, + 0x3c34c7855019c6ea, 0x3feea9268a5946b7, + 0x3c9432e62b64c035, 0x3feea76f15ad2148, + 0xbc8ce44a6199769f, 0x3feea5e1b976dc09, + 0xbc8c33c53bef4da8, 0x3feea47eb03a5585, + 0xbc845378892be9ae, 0x3feea34634ccc320, + 0xbc93cedd78565858, 0x3feea23882552225, + 0x3c5710aa807e1964, 0x3feea155d44ca973, + 0xbc93b3efbf5e2228, 0x3feea09e667f3bcd, + 0xbc6a12ad8734b982, 0x3feea012750bdabf, + 0xbc6367efb86da9ee, 0x3fee9fb23c651a2f, + 0xbc80dc3d54e08851, 0x3fee9f7df9519484, + 0xbc781f647e5a3ecf, 0x3fee9f75e8ec5f74, + 0xbc86ee4ac08b7db0, 0x3fee9f9a48a58174, + 0xbc8619321e55e68a, 0x3fee9feb564267c9, + 0x3c909ccb5e09d4d3, 0x3feea0694fde5d3f, + 0xbc7b32dcb94da51d, 0x3feea11473eb0187, + 0x3c94ecfd5467c06b, 0x3feea1ed0130c132, + 0x3c65ebe1abd66c55, 0x3feea2f336cf4e62, + 0xbc88a1c52fb3cf42, 0x3feea427543e1a12, + 0xbc9369b6f13b3734, 0x3feea589994cce13, + 0xbc805e843a19ff1e, 0x3feea71a4623c7ad, + 0xbc94d450d872576e, 0x3feea8d99b4492ed, + 0x3c90ad675b0e8a00, 0x3feeaac7d98a6699, + 0x3c8db72fc1f0eab4, 0x3feeace5422aa0db, + 0xbc65b6609cc5e7ff, 0x3feeaf3216b5448c, + 0x3c7bf68359f35f44, 0x3feeb1ae99157736, + 0xbc93091fa71e3d83, 0x3feeb45b0b91ffc6, + 0xbc5da9b88b6c1e29, 0x3feeb737b0cdc5e5, + 0xbc6c23f97c90b959, 0x3feeba44cbc8520f, + 0xbc92434322f4f9aa, 0x3feebd829fde4e50, + 0xbc85ca6cd7668e4b, 0x3feec0f170ca07ba, + 0x3c71affc2b91ce27, 0x3feec49182a3f090, + 0x3c6dd235e10a73bb, 0x3feec86319e32323, + 0xbc87c50422622263, 0x3feecc667b5de565, + 0x3c8b1c86e3e231d5, 0x3feed09bec4a2d33, + 0xbc91bbd1d3bcbb15, 0x3feed503b23e255d, + 0x3c90cc319cee31d2, 0x3feed99e1330b358, + 0x3c8469846e735ab3, 0x3feede6b5579fdbf, + 0xbc82dfcd978e9db4, 0x3feee36bbfd3f37a, + 0x3c8c1a7792cb3387, 0x3feee89f995ad3ad, + 0xbc907b8f4ad1d9fa, 0x3feeee07298db666, + 0xbc55c3d956dcaeba, 0x3feef3a2b84f15fb, + 0xbc90a40e3da6f640, 0x3feef9728de5593a, + 0xbc68d6f438ad9334, 0x3feeff76f2fb5e47, + 0xbc91eee26b588a35, 0x3fef05b030a1064a, + 0x3c74ffd70a5fddcd, 0x3fef0c1e904bc1d2, + 0xbc91bdfbfa9298ac, 0x3fef12c25bd71e09, + 0x3c736eae30af0cb3, 0x3fef199bdd85529c, + 0x3c8ee3325c9ffd94, 0x3fef20ab5fffd07a, + 0x3c84e08fd10959ac, 0x3fef27f12e57d14b, + 0x3c63cdaf384e1a67, 0x3fef2f6d9406e7b5, + 0x3c676b2c6c921968, 0x3fef3720dcef9069, + 0xbc808a1883ccb5d2, 0x3fef3f0b555dc3fa, + 0xbc8fad5d3ffffa6f, 0x3fef472d4a07897c, + 0xbc900dae3875a949, 0x3fef4f87080d89f2, + 0x3c74a385a63d07a7, 0x3fef5818dcfba487, + 0xbc82919e2040220f, 0x3fef60e316c98398, + 0x3c8e5a50d5c192ac, 0x3fef69e603db3285, + 0x3c843a59ac016b4b, 0x3fef7321f301b460, + 0xbc82d52107b43e1f, 0x3fef7c97337b9b5f, + 0xbc892ab93b470dc9, 0x3fef864614f5a129, + 0x3c74b604603a88d3, 0x3fef902ee78b3ff6, + 0x3c83c5ec519d7271, 0x3fef9a51fbc74c83, + 0xbc8ff7128fd391f0, 0x3fefa4afa2a490da, + 0xbc8dae98e223747d, 0x3fefaf482d8e67f1, + 0x3c8ec3bc41aa2008, 0x3fefba1bee615a27, + 0x3c842b94c3a9eb32, 0x3fefc52b376bba97, + 0x3c8a64a931d185ee, 0x3fefd0765b6e4540, + 0xbc8e37bae43be3ed, 0x3fefdbfdad9cbe14, + 0x3c77893b4d91cd9d, 0x3fefe7c1819e90d8, + 0x3c5305c14160cc89, 0x3feff3c22b8f71f1, + } +}; + +const bool WANT_ROUNDING = true; +macro float __math_uflowf(uint sign) => __math_xflow(sign, 0x1p97f); +macro double __math_uflow(ulong sign) => __math_xflow(sign, 0x1p769); +macro float __math_oflowf(uint sign) => __math_xflow(sign, 0x1p-95f); +macro double __math_oflow(ulong sign) => __math_xflow(sign, 0x1p-767); + +macro __math_xflow(sign, v) +{ + $typeof(v) temp; + @volatile_store(temp, (sign ? -v : v) * v); + return temp; +} + +macro force_eval_add(x, v) +{ + $typeof(x) temp; + @volatile_store(temp, x + v); } \ No newline at end of file diff --git a/lib/std/math/math_nolibc/rempi.c3 b/lib/std/math/math_nolibc/rempi.c3 index 9b045c076..50b7f8773 100644 --- a/lib/std/math/math_nolibc/rempi.c3 +++ b/lib/std/math/math_nolibc/rempi.c3 @@ -1,5 +1,8 @@ -module std::math::nolibc::rempi; +module std::math::nolibc; import std::math; + +$if (!env::COMPILER_LIBC_AVAILABLE): + /* origin: FreeBSD /usr/src/lib/msun/src/e_rem_pio2f.c */ /* * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. @@ -22,21 +25,18 @@ import std::math; * use __rem_pio2_large() for large x */ -$if (!env::COMPILER_LIBC_AVAILABLE): - /* * invpio2: 53 bits of 2/pi * pio2_1: first 25 bits of pi/2 * pio2_1t: pi/2 - pio2_1 */ -private const double TOINT = 1.5 / math::DOUBLE_EPSILON; -private const double PIO4 = 0x1.921fb6p-1; -private const double INVPIO2 = 6.36619772367581382433e-01; /* 0x3FE45F30, 0x6DC9C883 */ -private const double PIO2_1 = 1.57079631090164184570e+00; /* 0x3FF921FB, 0x50000000 */ -private const double PIO2_1T = 1.58932547735281966916e-08; /* 0x3E5110b4, 0x611A6263 */ - fn int __rem_pio2f(float x, double *y) { + const double PIO4 = 0x1.921fb6p-1; + const double INVPIO2 = 6.36619772367581382433e-01; /* 0x3FE45F30, 0x6DC9C883 */ + const double PIO2_1 = 1.57079631090164184570e+00; /* 0x3FF921FB, 0x50000000 */ + const double PIO2_1T = 1.58932547735281966916e-08; /* 0x3E5110b4, 0x611A6263 */ + uint ux = bitcast(x, uint); uint ix = ux & 0x7fffffff; /* 25+53 bit pi is good enough for medium size */ @@ -44,7 +44,7 @@ fn int __rem_pio2f(float x, double *y) { // |x| ~< 2^28*(pi/2), medium size // Use a specialized rint() to get f. - uint f = (uint)(((double)x * INVPIO2 + TOINT) - TOINT); + uint f = (uint)(((double)x * INVPIO2 + TOINT15) - TOINT15); int n = (int)f; *y = x - f * PIO2_1 - f * PIO2_1T; /* Matters with directed rounding. */ @@ -265,7 +265,7 @@ fn int __rem_pio2_large(double* x, double* y, int e0, int nx, int prec) else { /* break z into 24-bit if necessary */ - z = nolibc::_scalbn(z,-q0); + z = _scalbn(z,-q0); if (z >= 0x1p24) { fw = (double)(int)(0x1p-24 * z); @@ -344,4 +344,197 @@ fn int __rem_pio2_large(double* x, double* y, int e0, int nx, int prec) return n & 7; } +/* origin: FreeBSD /usr/src/lib/msun/src/e_rem_pio2.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunSoft, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + * + * Optimized by Bruce D. Evans. + */ +/* + * invpio2: 53 bits of 2/pi + * pio2_1: first 33 bit of pi/2 + * pio2_1t: pi/2 - pio2_1 + * pio2_2: second 33 bit of pi/2 + * pio2_2t: pi/2 - (pio2_1+pio2_2) + * pio2_3: third 33 bit of pi/2 + * pio2_3t: pi/2 - (pio2_1+pio2_2+pio2_3) + */ + +/* caller must handle the case when reduction is not needed: |x| ~<= pi/4 */ +fn int __rem_pio2(double x, double *y) +{ + const double PIO4 = 0x1.921fb54442d18p-1; + const double INVPIO2 = 6.36619772367581382433e-01; /* 0x3FE45F30, 0x6DC9C883 */ + const double PIO2_1 = 1.57079632673412561417e+00; /* 0x3FF921FB, 0x54400000 */ + const double PIO2_1T = 6.07710050650619224932e-11; /* 0x3DD0B461, 0x1A626331 */ + const double PIO2_2 = 6.07710050630396597660e-11; /* 0x3DD0B461, 0x1A600000 */ + const double PIO2_2T = 2.02226624879595063154e-21; /* 0x3BA3198A, 0x2E037073 */ + const double PIO2_3 = 2.02226624871116645580e-21; /* 0x3BA3198A, 0x2E000000 */ + const double PIO2_3T = 8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */ + + ulong u = bitcast(x, ulong); + int sign = (int)(u >> 63); + uint ix = (uint)(u >> 32 & 0x7fffffff); + switch + { + // |x| ~<= 5pi/4 + case ix <= 0x400f6a7a: + // |x| ~= pi/2 or 2pi/2 + if ((ix & 0xfffff) == 0x921fb) + { + // cancellation -- use medium case + break; + } + // |x| ~<= 3pi/4 + if (ix <= 0x4002d97c) + { + if (!sign) + { + double z = x - PIO2_1; /* one round good to 85 bits */ + y[0] = z - PIO2_1T; + y[1] = (z - y[0]) - PIO2_1T; + return 1; + } + double z = x + PIO2_1; + y[0] = z + PIO2_1T; + y[1] = (z - y[0]) + PIO2_1T; + return -1; + } + if (!sign) + { + double z = x - 2 * PIO2_1; + y[0] = z - 2 * PIO2_1T; + y[1] = (z - y[0]) - 2 * PIO2_1T; + return 2; + } + double z = x + 2 * PIO2_1; + y[0] = z + 2 * PIO2_1T; + y[1] = (z - y[0]) + 2 * PIO2_1T; + return -2; + // |x| ~<= 9pi/4 + case ix <= 0x401c463b: + // |x| ~<= 7pi/4 + if (ix <= 0x4015fdbc) + { + // |x| ~= 3pi/2 + if (ix == 0x4012d97c) break; // Use medium + + if (!sign) + { + double z = x - 3 * PIO2_1; + y[0] = z - 3 * PIO2_1T; + y[1] = (z - y[0]) - 3 * PIO2_1T; + return 3; + } + double z = x + 3 * PIO2_1; + y[0] = z + 3 * PIO2_1T; + y[1] = (z - y[0]) + 3 * PIO2_1T; + return -3; + } + // |x| ~= 4pi/2 + if (ix == 0x401921fb) break; // Use medium + if (!sign) + { + double z = x - 4 * PIO2_1; + y[0] = z - 4 * PIO2_1T; + y[1] = (z - y[0]) - 4 * PIO2_1T; + return 4; + } + double z = x + 4*PIO2_1; + y[0] = z + 4 * PIO2_1T; + y[1] = (z - y[0]) + 4 * PIO2_1T; + return -4; + case ix < 0x413921fb: + break; // medium + case ix >= 0x7ff00000: + // x is inf or NaN */ + y[0] = y[1] = x - x; + return 0; + default: + // all other (large) arguments + // set z = scalbn(|x|,-ilogb(x)+23) + u = bitcast(x, ulong); + u &= ((ulong)-1) >> 12; + u |= (ulong)(0x3ff + 23) << 52; + double z = bitcast(u, double); + double[3] tx; + int i; + for (i = 0; i < 2; i++) + { + tx[i] = (double)(int)z; + z = (z - tx[i]) * 0x1p24; + } + tx[i] = z; + // skip zero terms, first term is non-zero + while (tx[i] == 0.0) i--; + double[2] ty; + int n = __rem_pio2_large(&tx, &ty, (int)(ix >> 20) - (0x3ff + 23), i + 1, 1); + if (sign) + { + y[0] = -ty[0]; + y[1] = -ty[1]; + return -n; + } + y[0] = ty[0]; + y[1] = ty[1]; + return n; + } + + // |x| ~< 2^20*(pi/2), medium size + // rint(x/(pi/2)) + double fnn = (double)x * INVPIO2 + TOINT15 - TOINT15; + int n = (int)fnn; + double r = x - fnn * PIO2_1; + double w = fnn * PIO2_1T; // 1st round, good to 85 bits + // Matters with directed rounding. + if (r - w < -PIO4) /*@unlikely*/ + { + n--; + fnn--; + r = x - fnn * PIO2_1; + w = fnn * PIO2_1T; + } + else if (r - w > PIO4) /*@unlikely*/ + { + n++; + fnn++; + r = x - fnn * PIO2_1; + w = fnn * PIO2_1T; + } + y[0] = r - w; + u = bitcast(y[0], ulong); + int ey = (int)(u >> 52 & 0x7ff); + int ex = (int)(ix >> 20); + if (ex - ey > 16) + { + // 2nd round, good to 118 bits */ + double t = r; + w = fnn * PIO2_2; + r = t - w; + w = fnn * PIO2_2T - ((t - r) - w); + y[0] = r - w; + u = bitcast(y[0], ulong); + ey = (int)(u >> 52 & 0x7ff); + if (ex - ey > 49) + { + // 3rd round, good to 151 bits, covers all cases + t = r; + w = fnn * PIO2_3; + r = t - w; + w = fnn * PIO2_3T - ((t - r) - w); + y[0] = r - w; + } + } + y[1] = (r - y[0]) - w; + return n; + + +} $endif; \ No newline at end of file diff --git a/lib/std/math/math_nolibc/round.c3 b/lib/std/math/math_nolibc/round.c3 index 1a5b47d26..a527f019b 100644 --- a/lib/std/math/math_nolibc/round.c3 +++ b/lib/std/math/math_nolibc/round.c3 @@ -11,7 +11,7 @@ fn double _round(double x) @extname("round") @weak if (e < 0x3ff - 1) { /* raise inexact if x!=0 */ - @force_eval_add(x, TOINT); + force_eval_add(x, TOINT); return 0 * x; } double y = (x + TOINT) - TOINT - x; @@ -28,7 +28,7 @@ fn double _round(double x) @extname("round") @weak return y; } -fn float roundf(float x) @extname("roundf") @weak +fn float _roundf(float x) @extname("roundf") @weak { uint u = bitcast(x, uint); int e = (u >> 23) & 0xff; @@ -36,7 +36,7 @@ fn float roundf(float x) @extname("roundf") @weak if (u >> 31) x = -x; if (e < 0x7f - 1) { - @force_eval_add(x, TOINTF); + force_eval_add(x, TOINTF); return 0 * x; } float y = (x + TOINTF) - TOINTF - x; diff --git a/lib/std/math/math_nolibc/sin.c3 b/lib/std/math/math_nolibc/sin.c3 new file mode 100644 index 000000000..2e642fb6c --- /dev/null +++ b/lib/std/math/math_nolibc/sin.c3 @@ -0,0 +1,122 @@ +module std::math::nolibc; + +$if (!env::COMPILER_LIBC_AVAILABLE): + +/* origin: FreeBSD /usr/src/lib/msun/src/s_sinf.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + * Optimized by Bruce D. Evans. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +fn float _sinf(float x) @weak @extname("sinf") +{ + uint ix = bitcast(x, uint); + int sign = ix >> 31; + ix &= 0x7fffffff; + switch + { + case ix <= 0x3f490fda: + // |x| ~<= pi/4 + if (ix < 0x39800000) + { + /* |x| < 2**-12 */ + // raise inexact if x!=0 and underflow if subnormal + if (ix < 0x00800000) + { + @volatile_load(x) / 0x1p120f; + } + else + { + @volatile_load(x) + 0x1p120f; + } + return x; + } + return __sindf(x); + case ix <= 0x407b53d1: + // |x| ~<= 5*pi/4 + if (ix <= 0x4016cbe3) + { + // |x| ~<= 3pi/4 + if (sign) return -__cosdf(x + S1PI2); + return __cosdf(x - S1PI2); + } + return __sindf(sign ? -(x + S2PI2) : -(x - S2PI2)); + case ix <= 0x40e231d5: + // |x| ~<= 9*pi/4 + if (ix <= 0x40afeddf) + { + // |x| ~<= 7*pi/4 + return sign ? __cosdf(x + S3PI2) : -__cosdf(x - S3PI2); + } + return __sindf(sign ? x + S4PI2 : x - S4PI2); + case ix >= 0x7f800000: + // sin(Inf or NaN) is NaN + return x - x; + } + /* general argument reduction needed */ + double y = void; + switch (__rem_pio2f(x, &y) & 3) + { + case 0: return __sindf(y); + case 1: return __cosdf(y); + case 2: return __sindf(-y); + default: return -__cosdf(y); + } + unreachable(); +} + +/* origin: FreeBSD /usr/src/lib/msun/src/s_sin.c */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +fn double sin(double x) @extern("sin") @weak +{ + // High word of x. + uint ix = (uint)(bitcast(x, ulong) >> 32); + ix &= 0x7fffffff; + switch + { + // |x| ~< pi/4 + case ix <= 0x3fe921fb: + // |x| < 2**-26 + if (ix < 0x3e500000) + { + // raise inexact if x != 0 and underflow if subnormal + force_eval_add(x, ix < 0x00100000 ? -0x1p120f : 0x1p120f); + return x; + } + return __sin(x, 0.0, 0); + // sin(Inf or NaN) is NaN + case ix >= 0x7ff00000: + return x - x; + default: + // argument reduction needed + double[2] y; + switch (__rem_pio2(x, &y) & 3) + { + case 0: return __sin(y[0], y[1], 1); + case 1: return __cos(y[0], y[1]); + case 2: return -__sin(y[0], y[1], 1); + default: return -__cos(y[0], y[1]); + } + } +} +$endif; \ No newline at end of file diff --git a/lib/std/math/math_nolibc/sincosf.c3 b/lib/std/math/math_nolibc/sincosf.c3 new file mode 100644 index 000000000..11f7b839c --- /dev/null +++ b/lib/std/math/math_nolibc/sincosf.c3 @@ -0,0 +1,112 @@ +module std::math::nolibc; + +$if (!env::COMPILER_LIBC_AVAILABLE): + +/* origin: FreeBSD /usr/src/lib/msun/src/s_sinf.c */ +/* + * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. + * Optimized by Bruce D. Evans. + */ +/* + * ==================================================== + * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. + * + * Developed at SunPro, a Sun Microsystems, Inc. business. + * Permission to use, copy, modify, and distribute this + * software is freely granted, provided that this notice + * is preserved. + * ==================================================== + */ + +fn void sincosf(float x, float *sin, float *cos) @extern("sincosf") @weak +{ + + uint ix = bitcast(x, uint); + uint sign = ix >> 31; + ix &= 0x7fffffff; + + switch + { + // |x| ~<= pi/4 + case ix <= 0x3f490fda: + // |x| < 2**-12 + if (ix < 0x39800000) + { + // raise inexact if x!=0 and underflow if subnormal + float z; + @volatile_store(z, ix < 0x00100000 ? @volatile_load(x) / 0x1p120f : @volatile_load(x) + 0x1p120f); + *sin = x; + *cos = 1.0f; + return; + } + *sin = __sindf(x); + *cos = __cosdf(x); + return; + // |x| ~<= 5*pi/4 + case ix <= 0x407b53d1: + // |x| ~<= 3pi/4 + if (ix <= 0x4016cbe3) + { + if (sign) + { + *sin = -__cosdf(x + S1PI2); + *cos = __sindf(x + S1PI2); + return; + } + *sin = __cosdf(S1PI2 - x); + *cos = __sindf(S1PI2 - x); + return; + } + /* -sin(x+c) is not correct if x+c could be 0: -0 vs +0 */ + *sin = -__sindf(sign ? x + S2PI2 : x - S2PI2); + *cos = -__cosdf(sign ? x + S2PI2 : x - S2PI2); + return; + case ix <= 0x40e231d5: + // |x| ~<= 9*pi/4 + if (ix <= 0x40afeddf) + { + // |x| ~<= 7*pi/4 + if (sign) + { + *sin = __cosdf(x + S3PI2); + *cos = -__sindf(x + S3PI2); + return; + } + *sin = -__cosdf(x - S3PI2); + *cos = __sindf(x - S3PI2); + return; + } + *sin = __sindf(sign ? x + S4PI2 : x - S4PI2); + *cos = __cosdf(sign ? x + S4PI2 : x - S4PI2); + return; + case ix >= 0x7f800000: + // sin(Inf or NaN) is NaN + *sin = *cos = x - x; + return; + default: + // general argument reduction needed + double y = void; + uint n = __rem_pio2f(x, &y); + float s = __sindf(y); + float c = __cosdf(y); + switch (n & 3) + { + case 0: + *sin = s; + *cos = c; + case 1: + *sin = c; + *cos = -s; + case 2: + *sin = -s; + *cos = -c; + case 3: + default: + *sin = -c; + *cos = s; + } + } + +} + +$endif; \ No newline at end of file diff --git a/lib/std/math/math_nolibc/trig.c3 b/lib/std/math/math_nolibc/trig.c3 index 33acb118c..77b46caff 100644 --- a/lib/std/math/math_nolibc/trig.c3 +++ b/lib/std/math/math_nolibc/trig.c3 @@ -2,218 +2,6 @@ module std::math::nolibc::trig; $if (!env::COMPILER_LIBC_AVAILABLE): - -/* origin: FreeBSD /usr/src/lib/msun/src/s_sinf.c */ -/* - * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. - * Optimized by Bruce D. Evans. - */ -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunPro, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ - -private const double S1PI2_F = math::PI_2; /* 0x3FF921FB, 0x54442D18 */ -private const double S2PI2_F = math::PI; /* 0x400921FB, 0x54442D18 */ -private const double S3PI2_F = math::PI + math::PI_2; /* 0x4012D97C, 0x7F3321D2 */ -private const double S4PI2_F = math::PI + math::PI; /* 0x401921FB, 0x54442D18 */ - - -fn float _sinf(float x) @weak @extname("sinf") -{ - uint ix = bitcast(x, uint); - int sign = ix >> 31; - ix &= 0x7fffffff; - switch - { - case ix <= 0x3f490fda: - // |x| ~<= pi/4 - if (ix < 0x39800000) - { - /* |x| < 2**-12 */ - // raise inexact if x!=0 and underflow if subnormal - if (ix < 0x00800000) - { - @volatile_load(x) / 0x1p120f; - } - else - { - @volatile_load(x) + 0x1p120f; - } - return x; - } - return __sindf(x); - case ix <= 0x407b53d1: - // |x| ~<= 5*pi/4 - if (ix <= 0x4016cbe3) - { - // |x| ~<= 3pi/4 - if (sign) return -__cosdf(x + S1PI2_F); - return __cosdf(x - S1PI2_F); - } - return __sindf(sign ? -(x + S2PI2_F) : -(x - S2PI2_F)); - case ix <= 0x40e231d5: - // |x| ~<= 9*pi/4 - if (ix <= 0x40afeddf) - { - // |x| ~<= 7*pi/4 - return sign ? __cosdf(x + S3PI2_F) : -__cosdf(x - S3PI2_F); - } - return __sindf(sign ? x + S4PI2_F : x - S4PI2_F); - case ix >= 0x7f800000: - // sin(Inf or NaN) is NaN - return x - x; - } - /* general argument reduction needed */ - double y = void; - int n = rempi::__rem_pio2f(x, &y); - switch (n & 3) - { - case 0: return __sindf(y); - case 1: return __cosdf(y); - case 2: return __sindf(-y); - default: return -__cosdf(y); - } - unreachable(); -} - -/* origin: FreeBSD /usr/src/lib/msun/src/k_sinf.c */ -/* - * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. - * Optimized by Bruce D. Evans. - */ -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunPro, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ - - -/* |sin(x)/x - s(x)| < 2**-37.5 (~[-4.89e-12, 4.824e-12]). */ -private const double S1 = -0x15555554cbac77.0p-55; /* -0.166666666416265235595 */ -private const double S2 = 0x111110896efbb2.0p-59; /* 0.0083333293858894631756 */ -private const double S3 = -0x1a00f9e2cae774.0p-65; /* -0.000198393348360966317347 */ -private const double S4 = 0x16cd878c3b46a7.0p-71; /* 0.0000027183114939898219064 */ - -fn float __sindf(double x) -{ - double z = x * x; - double w = z * z; - double r = S3 + z * S4; - double s = z * x; - return (float)((x + s * (S1 + z * S2)) + s * w * r); -} - -/* origin: FreeBSD /usr/src/lib/msun/src/k_cosf.c */ -/* - * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com. - * Debugged and optimized by Bruce D. Evans. - */ -/* - * ==================================================== - * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. - * - * Developed at SunPro, a Sun Microsystems, Inc. business. - * Permission to use, copy, modify, and distribute this - * software is freely granted, provided that this notice - * is preserved. - * ==================================================== - */ -/* |cos(x) - c(x)| < 2**-34.1 (~[-5.37e-11, 5.295e-11]). */ - -private const double C0 = -0x1ffffffd0c5e81.0p-54; /* -0.499999997251031003120 */ -private const double C1 = 0x155553e1053a42.0p-57; /* 0.0416666233237390631894 */ -private const double C2 = -0x16c087e80f1e27.0p-62; /* -0.00138867637746099294692 */ -private const double C3 = 0x199342e0ee5069.0p-68; /* 0.0000243904487962774090654 */ - -fn float __cosdf(double x) -{ - /* Try to optimize for parallel evaluation as in __tandf.c. */ - double z = x * x; - double w = z * z; - double r = C2 + z * C3; - return (float)(((1.0f + z * C0) + w * C1) + (w * z) * r); -} - -fn float _cosf(float x) @extname("cosf") @weak -{ - uint ix = bitcast(x, uint); - uint sign = ix >> 31; - ix &= 0x7fffffff; - - switch - { - case (ix <= 0x3f490fda): - // |x| < 2**-12 - if (ix < 0x39800000) { - /* raise inexact if x != 0 */ - @volatile_load(x) + 0x1p120f; - return 1.0f; - } - return __cosdf(x); - - // |x| ~<= 5*pi/4 - case (ix <= 0x407b53d1): - // |x| ~> 3*pi/4 - if (ix > 0x4016cbe3) - { - return -__cosdf(sign ? x + S2PI2_F : x - S2PI2_F); - } - else - { - if (sign) return __sindf(x + S1PI2_F); - return __sindf(S1PI2_F - x); - } - // |x| ~<= 9*pi/4 - case (ix <= 0x40e231d5): - if (ix > 0x40afeddf) - { - return __cosdf(sign ? x + S4PI2_F : x - S4PI2_F); - } - else - { - if (sign) return __sindf((double)(-x) - S3PI2_F); - return __sindf(x - S3PI2_F); - } - } - - /* general argument reduction needed */ - double y = void; - int n = rempi::__rem_pio2f(x, &y); - switch (n & 3) - { - case 0: return __cosdf(y); - case 1: return __sindf(-y); - case 2: return -__cosdf(y); - default: return __sindf(y); - } - unreachable(); -} - -fn float sincosf_broken(float x) @extname("sincosf") @weak -{ - unreachable("'sincosf' not supported"); -} - -fn double sin_broken(double x) @extname("sin") @weak -{ - unreachable("'sin' not supported"); -} -fn double cos_broken(double x) @extname("cos") @weak -{ - unreachable("'cos' not supported"); -} fn double sincos_broken(double x) @extname("sincos") @weak { unreachable("'sinccos' not supported"); diff --git a/lib/std/math/math_nolibc/trunc.c3 b/lib/std/math/math_nolibc/trunc.c3 index eb909633d..c72a9c7e0 100644 --- a/lib/std/math/math_nolibc/trunc.c3 +++ b/lib/std/math/math_nolibc/trunc.c3 @@ -10,7 +10,7 @@ fn double _trunc(double x) @weak @extname("trunc") if (e < 12) e = 1; ulong m = ((ulong)-1) >> e; if (i & m == 0) return x; - @force_eval_add(x, 0x1p120f); + force_eval_add(x, 0x1p120f); i &= ~m; return bitcast(i, double); } @@ -23,7 +23,7 @@ fn float _truncf(float x) @weak @extname("truncf") if (e < 9) e = 1; uint m = ((uint)-1) >> e; if (i & m == 0) return x; - @force_eval_add(x, 0x1p120f); + force_eval_add(x, 0x1p120f); i &= ~m; return bitcast(i, float); } diff --git a/src/compiler/float.c b/src/compiler/float.c index e6fbbeefc..944c9e627 100644 --- a/src/compiler/float.c +++ b/src/compiler/float.c @@ -243,13 +243,13 @@ Float float_from_hex(const char *string, char **error) } switch (i) { + case 0: case 32: kind = TYPE_F32; break; case 16: kind = TYPE_F16; break; - case 0: case 64: kind = TYPE_F64; break; diff --git a/src/version.h b/src/version.h index a111b76f6..c4ceee62f 100644 --- a/src/version.h +++ b/src/version.h @@ -1 +1 @@ -#define COMPILER_VERSION "0.4.39" \ No newline at end of file +#define COMPILER_VERSION "0.4.40" \ No newline at end of file