Fix issue with hex floats with f being assumed to be double. Added cos sincos sincosf pow2 pow2f to nolibc.

2026-02-27 12:01:16 +00:00 · 2023-02-02 13:20:52 +01:00
parent 1d8e341572
commit 0f4d20f168
17 changed files with 1063 additions and 242 deletions
--- a/lib/std/math/math_nolibc/__cos.c3
+++ b/lib/std/math/math_nolibc/__cos.c3
@@ -0,0 +1,34 @@
+module std::math::nolibc;
+
+$if (!env::COMPILER_LIBC_AVAILABLE):
+
+/* origin: FreeBSD /usr/src/lib/msun/src/k_cos.c */
+/*
+ * ====================================================
+ * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+ *
+ * Developed at SunSoft, a Sun Microsystems, Inc. business.
+ * Permission to use, copy, modify, and distribute this
+ * software is freely granted, provided that this notice
+ * is preserved.
+ * ====================================================
+ */
+
+fn double __cos(double x, double y) @extern("__cos") @weak
+{
+	const C1 =  4.16666666666666019037e-02; /* 0x3FA55555, 0x5555554C */
+	const C2 = -1.38888888888741095749e-03; /* 0xBF56C16C, 0x16C15177 */
+	const C3 =  2.48015872894767294178e-05; /* 0x3EFA01A0, 0x19CB1590 */
+	const C4 = -2.75573143513906633035e-07; /* 0xBE927E4F, 0x809C52AD */
+	const C5 =  2.08757232129817482790e-09; /* 0x3E21EE9E, 0xBDB4B1C4 */
+	const C6 = -1.13596475577881948265e-11; /* 0xBDA8FAE9, 0xBE8838D4 */
+
+	double z = x * x;
+	double w = z * z;
+	double r = z * (C1 + z * (C2 + z * C3)) + w * w * (C4 + z * (C5 + z * C6));
+	double hz = 0.5 * z;
+	w = 1.0 - hz;
+	return w + (((1.0 - w) - hz) + (z * r - x * y));
+}
+
+$endif;
--- a/lib/std/math/math_nolibc/__cosdf.c3
+++ b/lib/std/math/math_nolibc/__cosdf.c3
@@ -0,0 +1,36 @@
+module std::math::nolibc;
+
+$if (!env::COMPILER_LIBC_AVAILABLE):
+
+/* origin: FreeBSD /usr/src/lib/msun/src/k_cosf.c */
+/*
+ * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
+ * Debugged and optimized by Bruce D. Evans.
+ */
+/*
+ * ====================================================
+ * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+ *
+ * Developed at SunPro, a Sun Microsystems, Inc. business.
+ * Permission to use, copy, modify, and distribute this
+ * software is freely granted, provided that this notice
+ * is preserved.
+ * ====================================================
+ */
+/* |cos(x) - c(x)| < 2**-34.1 (~[-5.37e-11, 5.295e-11]). */
+
+private const double C0  = -0x1ffffffd0c5e81.0p-54; /* -0.499999997251031003120 */
+private const double C1  =  0x155553e1053a42.0p-57; /*  0.0416666233237390631894 */
+private const double C2  = -0x16c087e80f1e27.0p-62; /* -0.00138867637746099294692 */
+private const double C3  =  0x199342e0ee5069.0p-68; /*  0.0000243904487962774090654 */
+
+fn float __cosdf(double x) @extern("__cosdf") @weak
+{
+	/* Try to optimize for parallel evaluation as in __tandf.c. */
+	double z = x * x;
+	double w = z * z;
+	double r = C2 + z * C3;
+	return (float)(((1.0f + z * C0) + w * C1) + (w * z) * r);
+}
+
+$endif;
--- a/lib/std/math/math_nolibc/__sin.c3
+++ b/lib/std/math/math_nolibc/__sin.c3
@@ -0,0 +1,35 @@
+module std::math::nolibc;
+
+$if (!env::COMPILER_LIBC_AVAILABLE):
+
+/* origin: FreeBSD /usr/src/lib/msun/src/k_sin.c */
+/*
+ * ====================================================
+ * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+ *
+ * Developed at SunSoft, a Sun Microsystems, Inc. business.
+ * Permission to use, copy, modify, and distribute this
+ * software is freely granted, provided that this notice
+ * is preserved.
+ * ====================================================
+ */
+fn double __sin(double x, double y, int iy) @extern("__sin") @weak
+{
+
+	const S1 = -1.66666666666666324348e-01; /* 0xBFC55555, 0x55555549 */
+	const S2 = 8.33333333332248946124e-03; /* 0x3F811111, 0x1110F8A6 */
+	const S3 = -1.98412698298579493134e-04; /* 0xBF2A01A0, 0x19C161D5 */
+	const S4 = 2.75573137070700676789e-06; /* 0x3EC71DE3, 0x57B1FE7D */
+	const S5 = -2.50507602534068634195e-08; /* 0xBE5AE5E6, 0x8A2B9CEB */
+	const S6 = 1.58969099521155010221e-10; /* 0x3DE5D93A, 0x5ACFD57C */
+
+	double z = x * x;
+	double w = z * z;
+	double r = S2 + z * (S3 + z * S4) + z * w * (S5 + z * S6);
+	double v = z * x;
+	return iy == 0
+		? x + v * (S1 + z * r)
+		: x - ((z * (0.5 * y - v * r) - y) - v * S1);
+}
+
+$endif;
--- a/lib/std/math/math_nolibc/__sindf.c3
+++ b/lib/std/math/math_nolibc/__sindf.c3
@@ -0,0 +1,34 @@
+module std::math::nolibc;
+
+$if (!env::COMPILER_LIBC_AVAILABLE):
+
+/* origin: FreeBSD /usr/src/lib/msun/src/k_sinf.c */
+/*
+ * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
+ * Optimized by Bruce D. Evans.
+ */
+/*
+ * ====================================================
+ * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+ *
+ * Developed at SunPro, a Sun Microsystems, Inc. business.
+ * Permission to use, copy, modify, and distribute this
+ * software is freely granted, provided that this notice
+ * is preserved.
+ * ====================================================
+ */
+// |sin(x)/x - s(x)| < 2**-37.5 (~[-4.89e-12, 4.824e-12]).
+fn float __sindf(double x) @extern("__sindf") @weak
+{
+	const S1F = -0x15555554cbac77.0p-55; /* -0.166666666416265235595 */
+	const S2F =  0x111110896efbb2.0p-59; /*  0.0083333293858894631756 */
+	const S3F = -0x1a00f9e2cae774.0p-65; /* -0.000198393348360966317347 */
+	const S4F =  0x16cd878c3b46a7.0p-71; /*  0.0000027183114939898219064 */
+	double z = x * x;
+	double w = z * z;
+	double r = S3F + z * S4F;
+	double s = z * x;
+	return (float)((x + s * (S1F + z * S2F)) +  s * w * r);
+}
+
+$endif;
--- a/lib/std/math/math_nolibc/ceil.c3
+++ b/lib/std/math/math_nolibc/ceil.c3
@@ -12,7 +12,7 @@ fn double _ceil(double x) @weak @extname("ceil")
 	// special case because of non-nearest rounding modes
 	if (e <= 0x3ff - 1)
 	{
-		@force_eval_add(y, 0);
+		force_eval_add(y, 0);
 		return ui >> 63 ? -0.0 : 1;
 	}
 	return y < 0 ? x + y + 1 : x + y;
@@ -30,11 +30,11 @@ fn float _ceilf(float x) @weak @extname("ceilf")
 		case e >= 0:
 			uint m = 0x007fffff >> e;
 			if (u & m == 0) return x;
-			@force_eval_add(x, 0x1p120f);
+			force_eval_add(x, 0x1p120f);
 			if (u >> 31 == 0) u += m;
 			u &= ~m;
 		case u >> 31 != 0:
-			@force_eval_add(x, 0x1p120f);
+			force_eval_add(x, 0x1p120f);
 			return -0.0f;
 		case u << 1 != 0:
 			return 1;
--- a/lib/std/math/math_nolibc/cos.c3
+++ b/lib/std/math/math_nolibc/cos.c3
@@ -0,0 +1,91 @@
+module std::math::nolibc;
+
+$if (!env::COMPILER_LIBC_AVAILABLE):
+
+fn float _cosf(float x) @extern("cosf") @weak
+{
+	uint ix = bitcast(x, uint);
+	uint sign = ix >> 31;
+	ix &= 0x7fffffff;
+
+	switch
+	{
+		case (ix <= 0x3f490fda):
+			// |x| < 2**-12
+			if (ix < 0x39800000)
+			{
+				/* raise inexact if x != 0 */
+				force_eval_add(x, 0x1p120f);
+				return 1.0f;
+			}
+			return __cosdf(x);
+		// |x| ~<= 5*pi/4
+		case (ix <= 0x407b53d1):
+			// |x|  ~> 3*pi/4
+			if (ix > 0x4016cbe3) return -__cosdf(sign ? x + S2PI2 : x - S2PI2);
+			return sign ? __sindf(x + S1PI2) : __sindf(S1PI2 - x);
+		// |x| ~<= 9*pi/4
+		case (ix <= 0x40e231d5):
+			if (ix > 0x40afeddf) return __cosdf(sign ? x + S4PI2 : x - S4PI2);
+			return sign ? __sindf((double)(-x) - S3PI2) : __sindf(x - S3PI2);
+	}
+
+	// general argument reduction needed
+	double y = void;
+	switch (__rem_pio2f(x, &y) & 3)
+	{
+		case 0: return  __cosdf(y);
+		case 1: return  __sindf(-y);
+		case 2: return  -__cosdf(y);
+		default: return __sindf(y);
+	}
+	unreachable();
+}
+
+/* origin: FreeBSD /usr/src/lib/msun/src/s_cos.c */
+/*
+ * ====================================================
+ * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+ *
+ * Developed at SunPro, a Sun Microsystems, Inc. business.
+ * Permission to use, copy, modify, and distribute this
+ * software is freely granted, provided that this notice
+ * is preserved.
+ * ====================================================
+ */
+
+fn double _cos(double x) @extern("cos") @weak
+{
+	// High word of x.
+	uint ix = (uint)(bitcast(x, ulong) >> 32);
+	ix &= 0x7fffffff;
+
+	switch
+	{
+		// |x| ~< pi/4
+		case ix <= 0x3fe921fb:
+		    if (ix < 0x3e46a09e)
+		    {
+		        // |x| < 2**-27 * sqrt(2)
+			    // raise inexact if x!=0
+			    force_eval_add(x, 0x1p120f);
+			    return 1.0;
+		    }
+		    return __cos(x, 0);
+		case ix >= 0x7ff00000:
+			// cos(Inf or NaN) is NaN
+			return x - x;
+		default:
+			// argument reduction
+			double[2] y = void;
+			switch (__rem_pio2(x, &y) & 3)
+			{
+				case 0: return __cos(y[0], y[1]);
+				case 1: return -__sin(y[0], y[1], 1);
+				case 2: return -__cos(y[0], y[1]);
+				default: return __sin(y[0], y[1], 1);
+			}
+	}
+}
+
+$endif;
--- a/lib/std/math/math_nolibc/exp2.c3
+++ b/lib/std/math/math_nolibc/exp2.c3
@@ -2,14 +2,143 @@ module std::math::nolibc;

 $if (!env::COMPILER_LIBC_AVAILABLE):

-fn float exp2f_broken(float x) @extname("exp2f") @weak
+private macro uint _top12f(float x) => bitcast(x, uint) >> 20;
+
+
+fn float _exp2f(float x) @extern("exp2f") @weak
 {
-	unreachable("'exp2f' not supported");
+	double xd = x;
+	uint abstop = _top12f(x) & 0x7ff;
+	if (abstop >= _top12f(128.0f)) /* @unlikely */
+	{
+		switch
+		{
+			// |x| >= 128 or x is nan.
+			case bitcast(x, uint) == bitcast(-float.inf, uint):
+				return 0;
+			case abstop >= _top12f(float.inf):
+				return x + x;
+			case x > 0:
+				return __math_oflowf(0);
+			case x <= -150.0f:
+				return __math_uflowf(0);
+		}
+	}
+	const SHIFT = __EXP2F_DATA.shift_scaled;
+	const uint N = 1U << EXP2F_TABLE_BITS;
+	// x = k/N + r with r in [-1/(2N), 1/(2N)] and int k.
+	double kd = xd + SHIFT;
+	ulong ki = bitcast(kd, ulong);
+	kd -= SHIFT; /* k/N for int k.  */
+	double r = xd - kd;
+
+	// exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1)
+	ulong t = __EXP2F_DATA.tab[ki % N];
+	t += ki << (52 - EXP2F_TABLE_BITS);
+	double s = bitcast(t, double);
+	double z = __EXP2F_DATA.poly[0] * r + __EXP2F_DATA.poly[1];
+	double r2 = r * r;
+	double y = __EXP2F_DATA.poly[2] * r + 1;
+	y = z * r2 + y;
+	y = y * s;
+	return (float)y;
 }

-fn float exp2_broken(float x) @extname("exp2") @weak
+private fn double _exp2_specialcase(double tmp, ulong sbits, ulong ki)
 {
-	unreachable("'exp2' not supported");
+	if (ki & 0x80000000 == 0)
+	{
+		// k > 0, the exponent of scale might have overflowed by 1.
+		sbits -= 1u64 << 52;
+		double scale = bitcast(sbits, double);
+		double y = 2 * (scale + scale * tmp);
+		return y;
+	}
+	// k < 0, need special care in the subnormal range.
+	sbits += 1022u64 << 52;
+	double scale = bitcast(sbits, double);
+	double y = scale + scale * tmp;
+	if (y >= 1.0)
+	{
+		return 0x1p-1022 * y;
+	}
+	// Round y to the right precision before scaling it into the subnormal
+	// range to avoid double rounding that can cause 0.5+E/2 ulp error where
+	// E is the worst-case ulp error outside the subnormal range.  So this
+	// is only useful if the goal is better than 1 ulp worst-case error.
+	double lo = scale - y + scale * tmp;
+	double hi = 1.0 + y;
+	lo = 1.0 - hi + y + lo;
+	y = hi + lo - 1.0;
+	/* Avoid -0.0 with downward rounding.  */
+	if (WANT_ROUNDING && y == 0.0) y = 0.0;
+	/* The underflow exception needs to be signaled explicitly.  */
+	return __math_xflow(0, 0x1p-1022);
 }

+
+private macro uint _top12d(double x)
+{
+	return (uint)(bitcast(x, ulong) >> 52);
+}
+
+fn double _exp2(double x) @extern("exp2") @weak
+{
+	uint abstop = _top12d(x) & 0x7ff;
+	ulong u = bitcast(x, ulong);
+	if (abstop - _top12d(0x1p-54) >= _top12d(512.0) - _top12d(0x1p-54)) /* @unlikely */
+	{
+		if (abstop - _top12d(0x1p-54) >= 0x80000000)
+		{
+			// Avoid spurious underflow for tiny x.  */
+			// Note: 0 is common input.  */
+			return WANT_ROUNDING ? 1.0 + x : 1.0;
+		}
+		if (abstop >= _top12d(1024.0))
+		{
+			switch
+			{
+				case u == bitcast(-double.inf, ulong):
+					return 0.0;
+				case abstop >= _top12d(double.inf):
+					return 1.0 + x;
+				case !(u >> 63):
+					return __math_oflow(0);
+				case u >= bitcast(-1075.0, ulong):
+					return __math_uflow(0);
+			}
+		}
+		// Large x is special cased below.
+		if (2 * u > 2 * bitcast(928.0, ulong)) abstop = 0;
+	}
+	const SHIFT = __EXP2_DATA.exp2_shift;
+	// exp2(x) = 2^(k/N) * 2^r, with 2^r in [2^(-1/2N),2^(1/2N)].  */
+	// x = k/N + r, with int k and r in [-1/2N, 1/2N].  */
+	double kd = x + SHIFT;
+	ulong ki = bitcast(kd, ulong); /* k.  */
+	kd -= SHIFT; /* k/N for int k.  */
+	double r = x - kd;
+	/* 2^(k/N) ~= scale * (1 + tail).  */
+	ulong idx = 2 * (ki % EXP_DATA_WIDTH);
+	ulong top = ki << (52 - EXP_TABLE_BITS);
+	double tail = __EXP2_DATA.tab[idx];
+	/* This is only a valid scale when -1023*N < k < 1024*N.  */
+	ulong sbits = __EXP2_DATA.tab[idx + 1] + top;
+	/* exp2(x) = 2^(k/N) * 2^r ~= scale + scale * (tail + 2^r - 1).  */
+	/* Evaluation is optimized assuming superscalar pipelined execution.  */
+	double r2 = r * r;
+	const C1 = __EXP2_DATA.exp2_poly[0];
+    const C2 = __EXP2_DATA.exp2_poly[1];
+    const C3 = __EXP2_DATA.exp2_poly[2];
+    const C4 = __EXP2_DATA.exp2_poly[3];
+    const C5 = __EXP2_DATA.exp2_poly[4];
+	/* Without fma the worst case error is 0.5/N ulp larger.  */
+	/* Worst case error is less than 0.5+0.86/N+(abs poly error * 2^53) ulp.  */
+	double tmp = tail + r * C1 + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
+	if (abstop == 0 /* @unlikely */) return _exp2_specialcase(tmp, sbits, ki);
+	double scale = bitcast(sbits, double);
+	/* Note: tmp == 0 or |tmp| > 2^-65 and scale > 2^-928, so there
+	   is no spurious underflow here even without fma.  */
+	return scale + scale * tmp;
+}
 $endif;
--- a/lib/std/math/math_nolibc/floor.c3
+++ b/lib/std/math/math_nolibc/floor.c3
@@ -12,7 +12,7 @@ fn double _floor(double x) @weak @extname("floor")
 	// special case because of non-nearest rounding modes
 	if (e <= 0x3ff - 1)
 	{
-		@force_eval_add(y, 0);
+		force_eval_add(y, 0);
 		return ui >> 63 ? -1 : 0;
 	}
 	return y > 0 ? x + y - 1 : x + y;
@@ -30,11 +30,11 @@ fn float _floorf(float x) @weak @extname("floorf")
 		case e >= 0:
 			uint m = 0x007fffff >> e;
 			if (u & m == 0) return x;
-			@force_eval_add(x, 0x1p120f);
+			force_eval_add(x, 0x1p120f);
 			if (u >> 31) u += m;
 			u &= ~m;
 		case u >> 31 == 0:
-			@force_eval_add(x, 0x1p120f);
+			force_eval_add(x, 0x1p120f);
 			u = 0;
 		case (u << 1 != 0):
 			return -1;
--- a/lib/std/math/math_nolibc/math_nolibc.c3
+++ b/lib/std/math/math_nolibc/math_nolibc.c3
@@ -1,9 +1,256 @@
 module std::math::nolibc;

 const double TOINT = 1 / math::DOUBLE_EPSILON;
+const double TOINT15 = 1.5 / math::DOUBLE_EPSILON;
 const float TOINTF = (float)(1 / math::FLOAT_EPSILON);
+private const double S1PI2 = math::PI_2; /* 0x3FF921FB, 0x54442D18 */
+private const double S2PI2 = math::PI; /* 0x400921FB, 0x54442D18 */
+private const double S3PI2 = math::PI + math::PI_2; /* 0x4012D97C, 0x7F3321D2 */
+private const double S4PI2 = math::PI + math::PI; /* 0x401921FB, 0x54442D18 */

-macro @force_eval_add(&x, v)
+// Shared between expf, exp2f and powf.
+private const EXP2F_TABLE_BITS = 5;
+private const EXP2F_POLY_ORDER = 3;
+private struct Exp2fData
 {
-	@volatile_store(x, @volatile_load(x) + v);
+	ulong[1 << EXP2F_TABLE_BITS] tab;
+	double shift_scaled;
+	double[EXP2F_POLY_ORDER] poly;
+	double shift;
+	double invln2_scaled;
+	double[EXP2F_POLY_ORDER] poly_scaled;
+}
+
+private const Exp2fData __EXP2F_DATA = {
+	.tab = {
+		0x3ff0000000000000, 0x3fefd9b0d3158574, 0x3fefb5586cf9890f, 0x3fef9301d0125b51,
+		0x3fef72b83c7d517b, 0x3fef54873168b9aa, 0x3fef387a6e756238, 0x3fef1e9df51fdee1,
+		0x3fef06fe0a31b715, 0x3feef1a7373aa9cb, 0x3feedea64c123422, 0x3feece086061892d,
+		0x3feebfdad5362a27, 0x3feeb42b569d4f82, 0x3feeab07dd485429, 0x3feea47eb03a5585,
+		0x3feea09e667f3bcd, 0x3fee9f75e8ec5f74, 0x3feea11473eb0187, 0x3feea589994cce13,
+		0x3feeace5422aa0db, 0x3feeb737b0cdc5e5, 0x3feec49182a3f090, 0x3feed503b23e255d,
+		0x3feee89f995ad3ad, 0x3feeff76f2fb5e47, 0x3fef199bdd85529c, 0x3fef3720dcef9069,
+		0x3fef5818dcfba487, 0x3fef7c97337b9b5f, 0x3fefa4afa2a490da, 0x3fefd0765b6e4540,
+    },
+    .shift_scaled = 0x1.8p+52 / (1 <<  5),
+    .poly = {
+        0x1.c6af84b912394p-5, 0x1.ebfce50fac4f3p-3, 0x1.62e42ff0c52d6p-1,
+    },
+    .shift = 0x1.8p+52,
+    .invln2_scaled = 0x1.71547652b82fep+0 * (1 << 5),
+    .poly_scaled = {
+        0x1.c6af84b912394p-5 / (1 <<  5) / (1 <<  5) / (1 <<  5),
+        0x1.ebfce50fac4f3p-3 / (1 <<  5) / (1 <<  5),
+        0x1.62e42ff0c52d6p-1 / (1 <<  5),
+    },
+};
+
+const EXP_TABLE_BITS = 7;
+const EXP_POLY_ORDER = 5;
+const EXP2_POLY_ORDER = 5;
+const EXP_DATA_WIDTH = 1 << EXP_TABLE_BITS;
+private struct Exp2Data
+{
+	double invln2N;
+	double shift;
+	double negln2hiN;
+	double negln2loN;
+	double[4] poly; // Last four coefficients.
+	double exp2_shift;
+	double[EXP2_POLY_ORDER] exp2_poly;
+	ulong[2 * EXP_DATA_WIDTH] tab;
+}
+
+/*
+ * Shared data between exp, exp2 and pow.
+ *
+ * Copyright (c) 2018, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+
+const Exp2Data __EXP2_DATA = {
+	// N/ln2
+	.invln2N = 0x1.71547652b82fep0 * EXP_DATA_WIDTH,
+	// -ln2/N
+	.negln2hiN = -0x1.62e42fefa0000p-8,
+	.negln2loN = -0x1.cf79abc9e3b3ap-47,
+	.shift = 0x1.8p52,
+	// exp polynomial coefficients.
+	.poly = {
+		// abs error: 1.555*2^-66
+		// ulp error: 0.509 (0.511 without fma)
+		// if |x| < ln2/256+eps
+		// abs error if |x| < ln2/256+0x1p-15: 1.09*2^-65
+		// abs error if |x| < ln2/128: 1.7145*2^-56
+		0x1.ffffffffffdbdp-2,
+		0x1.555555555543cp-3,
+		0x1.55555cf172b91p-5,
+		0x1.1111167a4d017p-7,
+	},
+	.exp2_shift = 0x1.8p52 / EXP_DATA_WIDTH,
+	// exp2 polynomial coefficients.
+	.exp2_poly = {
+		// abs error: 1.2195*2^-65
+		// ulp error: 0.507 (0.511 without fma)
+		// if |x| < 1/256
+		// abs error if |x| < 1/128: 1.9941*2^-56
+		0x1.62e42fefa39efp-1,
+		0x1.ebfbdff82c424p-3,
+		0x1.c6b08d70cf4b5p-5,
+		0x1.3b2abd24650ccp-7,
+		0x1.5d7e09b4e3a84p-10,
+	},
+	// 2^(k/N) ~= H[k]*(1 + T[k]) for int k in [0,N)
+	// tab[2*k] = asuint64(T[k])
+	// tab[2*k+1] = asuint64(H[k]) - (k << 52)/N
+	.tab = {
+		0x0, 0x3ff0000000000000,
+		0x3c9b3b4f1a88bf6e, 0x3feff63da9fb3335,
+		0xbc7160139cd8dc5d, 0x3fefec9a3e778061,
+		0xbc905e7a108766d1, 0x3fefe315e86e7f85,
+		0x3c8cd2523567f613, 0x3fefd9b0d3158574,
+		0xbc8bce8023f98efa, 0x3fefd06b29ddf6de,
+		0x3c60f74e61e6c861, 0x3fefc74518759bc8,
+		0x3c90a3e45b33d399, 0x3fefbe3ecac6f383,
+		0x3c979aa65d837b6d, 0x3fefb5586cf9890f,
+		0x3c8eb51a92fdeffc, 0x3fefac922b7247f7,
+		0x3c3ebe3d702f9cd1, 0x3fefa3ec32d3d1a2,
+		0xbc6a033489906e0b, 0x3fef9b66affed31b,
+		0xbc9556522a2fbd0e, 0x3fef9301d0125b51,
+		0xbc5080ef8c4eea55, 0x3fef8abdc06c31cc,
+		0xbc91c923b9d5f416, 0x3fef829aaea92de0,
+		0x3c80d3e3e95c55af, 0x3fef7a98c8a58e51,
+		0xbc801b15eaa59348, 0x3fef72b83c7d517b,
+		0xbc8f1ff055de323d, 0x3fef6af9388c8dea,
+		0x3c8b898c3f1353bf, 0x3fef635beb6fcb75,
+		0xbc96d99c7611eb26, 0x3fef5be084045cd4,
+		0x3c9aecf73e3a2f60, 0x3fef54873168b9aa,
+		0xbc8fe782cb86389d, 0x3fef4d5022fcd91d,
+		0x3c8a6f4144a6c38d, 0x3fef463b88628cd6,
+		0x3c807a05b0e4047d, 0x3fef3f49917ddc96,
+		0x3c968efde3a8a894, 0x3fef387a6e756238,
+		0x3c875e18f274487d, 0x3fef31ce4fb2a63f,
+		0x3c80472b981fe7f2, 0x3fef2b4565e27cdd,
+		0xbc96b87b3f71085e, 0x3fef24dfe1f56381,
+		0x3c82f7e16d09ab31, 0x3fef1e9df51fdee1,
+		0xbc3d219b1a6fbffa, 0x3fef187fd0dad990,
+		0x3c8b3782720c0ab4, 0x3fef1285a6e4030b,
+		0x3c6e149289cecb8f, 0x3fef0cafa93e2f56,
+		0x3c834d754db0abb6, 0x3fef06fe0a31b715,
+		0x3c864201e2ac744c, 0x3fef0170fc4cd831,
+		0x3c8fdd395dd3f84a, 0x3feefc08b26416ff,
+		0xbc86a3803b8e5b04, 0x3feef6c55f929ff1,
+		0xbc924aedcc4b5068, 0x3feef1a7373aa9cb,
+		0xbc9907f81b512d8e, 0x3feeecae6d05d866,
+		0xbc71d1e83e9436d2, 0x3feee7db34e59ff7,
+		0xbc991919b3ce1b15, 0x3feee32dc313a8e5,
+		0x3c859f48a72a4c6d, 0x3feedea64c123422,
+		0xbc9312607a28698a, 0x3feeda4504ac801c,
+		0xbc58a78f4817895b, 0x3feed60a21f72e2a,
+		0xbc7c2c9b67499a1b, 0x3feed1f5d950a897,
+		0x3c4363ed60c2ac11, 0x3feece086061892d,
+		0x3c9666093b0664ef, 0x3feeca41ed1d0057,
+		0x3c6ecce1daa10379, 0x3feec6a2b5c13cd0,
+		0x3c93ff8e3f0f1230, 0x3feec32af0d7d3de,
+		0x3c7690cebb7aafb0, 0x3feebfdad5362a27,
+		0x3c931dbdeb54e077, 0x3feebcb299fddd0d,
+		0xbc8f94340071a38e, 0x3feeb9b2769d2ca7,
+		0xbc87deccdc93a349, 0x3feeb6daa2cf6642,
+		0xbc78dec6bd0f385f, 0x3feeb42b569d4f82,
+		0xbc861246ec7b5cf6, 0x3feeb1a4ca5d920f,
+		0x3c93350518fdd78e, 0x3feeaf4736b527da,
+		0x3c7b98b72f8a9b05, 0x3feead12d497c7fd,
+		0x3c9063e1e21c5409, 0x3feeab07dd485429,
+		0x3c34c7855019c6ea, 0x3feea9268a5946b7,
+		0x3c9432e62b64c035, 0x3feea76f15ad2148,
+		0xbc8ce44a6199769f, 0x3feea5e1b976dc09,
+		0xbc8c33c53bef4da8, 0x3feea47eb03a5585,
+		0xbc845378892be9ae, 0x3feea34634ccc320,
+		0xbc93cedd78565858, 0x3feea23882552225,
+		0x3c5710aa807e1964, 0x3feea155d44ca973,
+		0xbc93b3efbf5e2228, 0x3feea09e667f3bcd,
+		0xbc6a12ad8734b982, 0x3feea012750bdabf,
+		0xbc6367efb86da9ee, 0x3fee9fb23c651a2f,
+		0xbc80dc3d54e08851, 0x3fee9f7df9519484,
+		0xbc781f647e5a3ecf, 0x3fee9f75e8ec5f74,
+		0xbc86ee4ac08b7db0, 0x3fee9f9a48a58174,
+		0xbc8619321e55e68a, 0x3fee9feb564267c9,
+		0x3c909ccb5e09d4d3, 0x3feea0694fde5d3f,
+		0xbc7b32dcb94da51d, 0x3feea11473eb0187,
+		0x3c94ecfd5467c06b, 0x3feea1ed0130c132,
+		0x3c65ebe1abd66c55, 0x3feea2f336cf4e62,
+		0xbc88a1c52fb3cf42, 0x3feea427543e1a12,
+		0xbc9369b6f13b3734, 0x3feea589994cce13,
+		0xbc805e843a19ff1e, 0x3feea71a4623c7ad,
+		0xbc94d450d872576e, 0x3feea8d99b4492ed,
+		0x3c90ad675b0e8a00, 0x3feeaac7d98a6699,
+		0x3c8db72fc1f0eab4, 0x3feeace5422aa0db,
+		0xbc65b6609cc5e7ff, 0x3feeaf3216b5448c,
+		0x3c7bf68359f35f44, 0x3feeb1ae99157736,
+		0xbc93091fa71e3d83, 0x3feeb45b0b91ffc6,
+		0xbc5da9b88b6c1e29, 0x3feeb737b0cdc5e5,
+		0xbc6c23f97c90b959, 0x3feeba44cbc8520f,
+		0xbc92434322f4f9aa, 0x3feebd829fde4e50,
+		0xbc85ca6cd7668e4b, 0x3feec0f170ca07ba,
+		0x3c71affc2b91ce27, 0x3feec49182a3f090,
+		0x3c6dd235e10a73bb, 0x3feec86319e32323,
+		0xbc87c50422622263, 0x3feecc667b5de565,
+		0x3c8b1c86e3e231d5, 0x3feed09bec4a2d33,
+		0xbc91bbd1d3bcbb15, 0x3feed503b23e255d,
+		0x3c90cc319cee31d2, 0x3feed99e1330b358,
+		0x3c8469846e735ab3, 0x3feede6b5579fdbf,
+		0xbc82dfcd978e9db4, 0x3feee36bbfd3f37a,
+		0x3c8c1a7792cb3387, 0x3feee89f995ad3ad,
+		0xbc907b8f4ad1d9fa, 0x3feeee07298db666,
+		0xbc55c3d956dcaeba, 0x3feef3a2b84f15fb,
+		0xbc90a40e3da6f640, 0x3feef9728de5593a,
+		0xbc68d6f438ad9334, 0x3feeff76f2fb5e47,
+		0xbc91eee26b588a35, 0x3fef05b030a1064a,
+		0x3c74ffd70a5fddcd, 0x3fef0c1e904bc1d2,
+		0xbc91bdfbfa9298ac, 0x3fef12c25bd71e09,
+		0x3c736eae30af0cb3, 0x3fef199bdd85529c,
+		0x3c8ee3325c9ffd94, 0x3fef20ab5fffd07a,
+		0x3c84e08fd10959ac, 0x3fef27f12e57d14b,
+		0x3c63cdaf384e1a67, 0x3fef2f6d9406e7b5,
+		0x3c676b2c6c921968, 0x3fef3720dcef9069,
+		0xbc808a1883ccb5d2, 0x3fef3f0b555dc3fa,
+		0xbc8fad5d3ffffa6f, 0x3fef472d4a07897c,
+		0xbc900dae3875a949, 0x3fef4f87080d89f2,
+		0x3c74a385a63d07a7, 0x3fef5818dcfba487,
+		0xbc82919e2040220f, 0x3fef60e316c98398,
+		0x3c8e5a50d5c192ac, 0x3fef69e603db3285,
+		0x3c843a59ac016b4b, 0x3fef7321f301b460,
+		0xbc82d52107b43e1f, 0x3fef7c97337b9b5f,
+		0xbc892ab93b470dc9, 0x3fef864614f5a129,
+		0x3c74b604603a88d3, 0x3fef902ee78b3ff6,
+		0x3c83c5ec519d7271, 0x3fef9a51fbc74c83,
+		0xbc8ff7128fd391f0, 0x3fefa4afa2a490da,
+		0xbc8dae98e223747d, 0x3fefaf482d8e67f1,
+		0x3c8ec3bc41aa2008, 0x3fefba1bee615a27,
+		0x3c842b94c3a9eb32, 0x3fefc52b376bba97,
+		0x3c8a64a931d185ee, 0x3fefd0765b6e4540,
+		0xbc8e37bae43be3ed, 0x3fefdbfdad9cbe14,
+		0x3c77893b4d91cd9d, 0x3fefe7c1819e90d8,
+		0x3c5305c14160cc89, 0x3feff3c22b8f71f1,
+	}
+};
+
+const bool WANT_ROUNDING = true;
+macro float __math_uflowf(uint sign) => __math_xflow(sign, 0x1p97f);
+macro double __math_uflow(ulong sign) => __math_xflow(sign, 0x1p769);
+macro float __math_oflowf(uint sign) => __math_xflow(sign, 0x1p-95f);
+macro double __math_oflow(ulong sign) => __math_xflow(sign, 0x1p-767);
+
+macro __math_xflow(sign, v)
+{
+	$typeof(v) temp;
+	@volatile_store(temp, (sign ? -v : v) * v);
+	return temp;
+}
+
+macro force_eval_add(x, v)
+{
+	$typeof(x) temp;
+	@volatile_store(temp, x + v);
 }
--- a/lib/std/math/math_nolibc/rempi.c3
+++ b/lib/std/math/math_nolibc/rempi.c3
@@ -1,5 +1,8 @@
-module std::math::nolibc::rempi;
+module std::math::nolibc;
 import std::math;
+
+$if (!env::COMPILER_LIBC_AVAILABLE):
+
 /* origin: FreeBSD /usr/src/lib/msun/src/e_rem_pio2f.c */
 /*
 * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
@@ -22,21 +25,18 @@ import std::math;
 * use __rem_pio2_large() for large x
 */

-$if (!env::COMPILER_LIBC_AVAILABLE):
-
 /*
 * invpio2:  53 bits of 2/pi
 * pio2_1:   first 25 bits of pi/2
 * pio2_1t:  pi/2 - pio2_1
 */
-private const double TOINT = 1.5 / math::DOUBLE_EPSILON;
-private const double PIO4 = 0x1.921fb6p-1;
-private const double INVPIO2 = 6.36619772367581382433e-01; /* 0x3FE45F30, 0x6DC9C883 */
-private const double PIO2_1 = 1.57079631090164184570e+00; /* 0x3FF921FB, 0x50000000 */
-private const double PIO2_1T = 1.58932547735281966916e-08; /* 0x3E5110b4, 0x611A6263 */
-
 fn int __rem_pio2f(float x, double *y)
 {
+	const double PIO4 = 0x1.921fb6p-1;
+	const double INVPIO2 = 6.36619772367581382433e-01; /* 0x3FE45F30, 0x6DC9C883 */
+	const double PIO2_1 = 1.57079631090164184570e+00; /* 0x3FF921FB, 0x50000000 */
+	const double PIO2_1T = 1.58932547735281966916e-08; /* 0x3E5110b4, 0x611A6263 */
+
 	uint ux = bitcast(x, uint);
 	uint ix = ux & 0x7fffffff;
 	/* 25+53 bit pi is good enough for medium size */
@@ -44,7 +44,7 @@ fn int __rem_pio2f(float x, double *y)
 	{
 		// |x| ~< 2^28*(pi/2), medium size
 		// Use a specialized rint() to get f.
-		uint f = (uint)(((double)x * INVPIO2 + TOINT) - TOINT);
+		uint f = (uint)(((double)x * INVPIO2 + TOINT15) - TOINT15);
 		int n  = (int)f;
 		*y = x - f * PIO2_1 - f * PIO2_1T;
 		/* Matters with directed rounding. */
@@ -265,7 +265,7 @@ fn int __rem_pio2_large(double* x, double* y, int e0, int nx, int prec)
 	else
 	{
 		/* break z into 24-bit if necessary */
-		z = nolibc::_scalbn(z,-q0);
+		z = _scalbn(z,-q0);
 		if (z >= 0x1p24)
 		{
 			fw = (double)(int)(0x1p-24 * z);
@@ -344,4 +344,197 @@ fn int __rem_pio2_large(double* x, double* y, int e0, int nx, int prec)
 	return n & 7;
 }

+/* origin: FreeBSD /usr/src/lib/msun/src/e_rem_pio2.c */
+/*
+ * ====================================================
+ * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+ *
+ * Developed at SunSoft, a Sun Microsystems, Inc. business.
+ * Permission to use, copy, modify, and distribute this
+ * software is freely granted, provided that this notice
+ * is preserved.
+ * ====================================================
+ *
+ * Optimized by Bruce D. Evans.
+ */
+/*
+ * invpio2:  53 bits of 2/pi
+ * pio2_1:   first  33 bit of pi/2
+ * pio2_1t:  pi/2 - pio2_1
+ * pio2_2:   second 33 bit of pi/2
+ * pio2_2t:  pi/2 - (pio2_1+pio2_2)
+ * pio2_3:   third  33 bit of pi/2
+ * pio2_3t:  pi/2 - (pio2_1+pio2_2+pio2_3)
+ */
+
+/* caller must handle the case when reduction is not needed: |x| ~<= pi/4 */
+fn int __rem_pio2(double x, double *y)
+{
+	const double PIO4 = 0x1.921fb54442d18p-1;
+    const double INVPIO2 = 6.36619772367581382433e-01; /* 0x3FE45F30, 0x6DC9C883 */
+    const double PIO2_1 = 1.57079632673412561417e+00; /* 0x3FF921FB, 0x54400000 */
+    const double PIO2_1T = 6.07710050650619224932e-11; /* 0x3DD0B461, 0x1A626331 */
+    const double PIO2_2 = 6.07710050630396597660e-11; /* 0x3DD0B461, 0x1A600000 */
+    const double PIO2_2T = 2.02226624879595063154e-21; /* 0x3BA3198A, 0x2E037073 */
+    const double PIO2_3 = 2.02226624871116645580e-21; /* 0x3BA3198A, 0x2E000000 */
+    const double PIO2_3T = 8.47842766036889956997e-32; /* 0x397B839A, 0x252049C1 */
+
+	ulong u = bitcast(x, ulong);
+	int sign = (int)(u >> 63);
+	uint ix = (uint)(u >> 32 & 0x7fffffff);
+	switch
+	{
+		// |x| ~<= 5pi/4
+		case ix <= 0x400f6a7a:
+			// |x| ~= pi/2 or 2pi/2
+			if ((ix & 0xfffff) == 0x921fb)
+			{
+				// cancellation -- use medium case
+				break;
+			}
+			// |x| ~<= 3pi/4
+			if (ix <= 0x4002d97c)
+			{
+				if (!sign)
+				{
+					double z = x - PIO2_1;  /* one round good to 85 bits */
+					y[0] = z - PIO2_1T;
+					y[1] = (z - y[0]) - PIO2_1T;
+					return 1;
+				}
+				double z = x + PIO2_1;
+				y[0] = z + PIO2_1T;
+				y[1] = (z - y[0]) + PIO2_1T;
+				return -1;
+			}
+			if (!sign)
+			{
+				double z = x - 2 * PIO2_1;
+				y[0] = z - 2 * PIO2_1T;
+				y[1] = (z - y[0]) - 2 * PIO2_1T;
+				return 2;
+			}
+			double z = x + 2 * PIO2_1;
+			y[0] = z + 2 * PIO2_1T;
+			y[1] = (z - y[0]) + 2 * PIO2_1T;
+			return -2;
+		// |x| ~<= 9pi/4
+		case ix <= 0x401c463b:
+			// |x| ~<= 7pi/4
+			if (ix <= 0x4015fdbc)
+			{
+				// |x| ~= 3pi/2
+				if (ix == 0x4012d97c) break; // Use medium
+
+				if (!sign)
+				{
+					double z = x - 3 * PIO2_1;
+					y[0] = z - 3 * PIO2_1T;
+					y[1] = (z - y[0]) - 3 * PIO2_1T;
+					return 3;
+				}
+				double z = x + 3 * PIO2_1;
+				y[0] = z + 3 * PIO2_1T;
+				y[1] = (z - y[0]) + 3 * PIO2_1T;
+				return -3;
+			}
+			// |x| ~= 4pi/2
+			if (ix == 0x401921fb) break; // Use medium
+			if (!sign)
+			{
+				double z = x - 4 * PIO2_1;
+				y[0] = z - 4 * PIO2_1T;
+				y[1] = (z - y[0]) - 4 * PIO2_1T;
+				return 4;
+			}
+			double z = x + 4*PIO2_1;
+			y[0] = z + 4 * PIO2_1T;
+			y[1] = (z - y[0]) + 4 * PIO2_1T;
+			return -4;
+		case ix < 0x413921fb:
+			break; // medium
+		case ix >= 0x7ff00000:
+		    // x is inf or NaN */
+            y[0] = y[1] = x - x;
+            return 0;
+		default:
+			// all other (large) arguments
+        	// set z = scalbn(|x|,-ilogb(x)+23)
+        	u = bitcast(x, ulong);
+        	u &= ((ulong)-1) >> 12;
+        	u |= (ulong)(0x3ff + 23) << 52;
+        	double z = bitcast(u, double);
+        	double[3] tx;
+        	int i;
+        	for (i = 0; i < 2; i++)
+        	{
+        		tx[i] = (double)(int)z;
+        		z = (z - tx[i]) * 0x1p24;
+        	}
+        	tx[i] = z;
+        	// skip zero terms, first term is non-zero
+        	while (tx[i] == 0.0) i--;
+        	double[2] ty;
+        	int n = __rem_pio2_large(&tx, &ty, (int)(ix >> 20) - (0x3ff + 23), i + 1, 1);
+        	if (sign)
+        	{
+        		y[0] = -ty[0];
+        		y[1] = -ty[1];
+        		return -n;
+        	}
+        	y[0] = ty[0];
+        	y[1] = ty[1];
+        	return n;
+	}
+
+	// |x| ~< 2^20*(pi/2), medium size
+	// rint(x/(pi/2))
+	double fnn = (double)x * INVPIO2 + TOINT15 - TOINT15;
+	int n = (int)fnn;
+	double r = x - fnn * PIO2_1;
+	double w = fnn * PIO2_1T;  // 1st round, good to 85 bits
+	// Matters with directed rounding.
+	if (r - w < -PIO4) /*@unlikely*/
+	{
+		n--;
+		fnn--;
+		r = x - fnn * PIO2_1;
+		w = fnn * PIO2_1T;
+	}
+	else if (r - w > PIO4) /*@unlikely*/
+	{
+		n++;
+		fnn++;
+		r = x - fnn * PIO2_1;
+		w = fnn * PIO2_1T;
+	}
+	y[0] = r - w;
+	u = bitcast(y[0], ulong);
+	int ey = (int)(u >> 52 & 0x7ff);
+	int ex = (int)(ix >> 20);
+	if (ex - ey > 16)
+	{
+		// 2nd round, good to 118 bits */
+		double t = r;
+		w = fnn * PIO2_2;
+		r = t - w;
+		w = fnn * PIO2_2T - ((t - r) - w);
+		y[0] = r - w;
+		u = bitcast(y[0], ulong);
+		ey = (int)(u >> 52 & 0x7ff);
+		if (ex - ey > 49)
+		{
+			// 3rd round, good to 151 bits, covers all cases
+			t = r;
+			w = fnn * PIO2_3;
+			r = t - w;
+			w = fnn * PIO2_3T - ((t - r) - w);
+			y[0] = r - w;
+		}
+	}
+	y[1] = (r - y[0]) - w;
+	return n;
+
+
+}
 $endif;
--- a/lib/std/math/math_nolibc/round.c3
+++ b/lib/std/math/math_nolibc/round.c3
@@ -11,7 +11,7 @@ fn double _round(double x) @extname("round") @weak
 	if (e < 0x3ff - 1)
 	{
 		/* raise inexact if x!=0 */
-		@force_eval_add(x, TOINT);
+		force_eval_add(x, TOINT);
 		return 0 * x;
 	}
 	double y = (x + TOINT) - TOINT - x;
@@ -28,7 +28,7 @@ fn double _round(double x) @extname("round") @weak
 	return y;
 }

-fn float roundf(float x) @extname("roundf") @weak
+fn float _roundf(float x) @extname("roundf") @weak
 {
 	uint u = bitcast(x, uint);
 	int e = (u >> 23) & 0xff;
@@ -36,7 +36,7 @@ fn float roundf(float x) @extname("roundf") @weak
 	if (u >> 31) x = -x;
 	if (e < 0x7f - 1)
 	{
-		@force_eval_add(x, TOINTF);
+		force_eval_add(x, TOINTF);
 		return 0 * x;
 	}
 	float y = (x + TOINTF) - TOINTF - x;
--- a/lib/std/math/math_nolibc/sin.c3
+++ b/lib/std/math/math_nolibc/sin.c3
@@ -0,0 +1,122 @@
+module std::math::nolibc;
+
+$if (!env::COMPILER_LIBC_AVAILABLE):
+
+/* origin: FreeBSD /usr/src/lib/msun/src/s_sinf.c */
+/*
+ * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
+ * Optimized by Bruce D. Evans.
+ */
+/*
+ * ====================================================
+ * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+ *
+ * Developed at SunPro, a Sun Microsystems, Inc. business.
+ * Permission to use, copy, modify, and distribute this
+ * software is freely granted, provided that this notice
+ * is preserved.
+ * ====================================================
+ */
+
+fn float _sinf(float x) @weak @extname("sinf")
+{
+	uint ix = bitcast(x, uint);
+	int sign = ix >> 31;
+	ix &= 0x7fffffff;
+	switch
+	{
+		case ix <= 0x3f490fda:
+			// |x| ~<= pi/4
+			if (ix < 0x39800000)
+			{
+				/* |x| < 2**-12 */
+				// raise inexact if x!=0 and underflow if subnormal
+				if (ix < 0x00800000)
+				{
+					@volatile_load(x) / 0x1p120f;
+				}
+				else
+				{
+					@volatile_load(x) + 0x1p120f;
+				}
+				return x;
+			}
+			return __sindf(x);
+		case ix <= 0x407b53d1:
+		    // |x| ~<= 5*pi/4
+			if (ix <= 0x4016cbe3)
+			{
+				// |x| ~<= 3pi/4
+				if (sign) return -__cosdf(x + S1PI2);
+				return __cosdf(x - S1PI2);
+			}
+			return __sindf(sign ? -(x + S2PI2) : -(x - S2PI2));
+		case ix <= 0x40e231d5:
+		    // |x| ~<= 9*pi/4
+			if (ix <= 0x40afeddf)
+			{
+				// |x| ~<= 7*pi/4
+				return sign ? __cosdf(x + S3PI2) : -__cosdf(x - S3PI2);
+			}
+			return __sindf(sign ? x + S4PI2 : x - S4PI2);
+		case ix >= 0x7f800000:
+			// sin(Inf or NaN) is NaN
+			return x - x;
+	}
+	/* general argument reduction needed */
+	double y = void;
+	switch (__rem_pio2f(x, &y) & 3)
+	{
+		case 0: return  __sindf(y);
+		case 1: return  __cosdf(y);
+		case 2: return  __sindf(-y);
+		default: return -__cosdf(y);
+	}
+	unreachable();
+}
+
+/* origin: FreeBSD /usr/src/lib/msun/src/s_sin.c */
+/*
+ * ====================================================
+ * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+ *
+ * Developed at SunPro, a Sun Microsystems, Inc. business.
+ * Permission to use, copy, modify, and distribute this
+ * software is freely granted, provided that this notice
+ * is preserved.
+ * ====================================================
+ */
+
+fn double sin(double x) @extern("sin") @weak
+{
+	// High word of x.
+	uint ix = (uint)(bitcast(x, ulong) >> 32);
+	ix &= 0x7fffffff;
+	switch
+	{
+		// |x| ~< pi/4
+		case ix <= 0x3fe921fb:
+			// |x| < 2**-26
+			if (ix < 0x3e500000)
+			{
+				// raise inexact if x != 0 and underflow if subnormal
+				force_eval_add(x, ix < 0x00100000 ? -0x1p120f : 0x1p120f);
+				return x;
+			}
+			return __sin(x, 0.0, 0);
+		// sin(Inf or NaN) is NaN
+		case ix >= 0x7ff00000:
+			return x - x;
+		default:
+			// argument reduction needed
+			double[2] y;
+			switch (__rem_pio2(x, &y) & 3)
+			{
+				case 0: return __sin(y[0], y[1], 1);
+				case 1: return __cos(y[0], y[1]);
+				case 2: return -__sin(y[0], y[1], 1);
+				default: return -__cos(y[0], y[1]);
+			}
+	}
+}
+$endif;
--- a/lib/std/math/math_nolibc/sincosf.c3
+++ b/lib/std/math/math_nolibc/sincosf.c3
@@ -0,0 +1,112 @@
+module std::math::nolibc;
+
+$if (!env::COMPILER_LIBC_AVAILABLE):
+
+/* origin: FreeBSD /usr/src/lib/msun/src/s_sinf.c */
+/*
+ * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
+ * Optimized by Bruce D. Evans.
+ */
+/*
+ * ====================================================
+ * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
+ *
+ * Developed at SunPro, a Sun Microsystems, Inc. business.
+ * Permission to use, copy, modify, and distribute this
+ * software is freely granted, provided that this notice
+ * is preserved.
+ * ====================================================
+ */
+
+fn void sincosf(float x, float *sin, float *cos) @extern("sincosf") @weak
+{
+
+	uint ix = bitcast(x, uint);
+	uint sign = ix >> 31;
+	ix &= 0x7fffffff;
+
+	switch
+	{
+		// |x| ~<= pi/4
+		case ix <= 0x3f490fda:
+			// |x| < 2**-12
+			if (ix < 0x39800000)
+			{
+				// raise inexact if x!=0 and underflow if subnormal
+				float z;
+				@volatile_store(z, ix < 0x00100000 ? @volatile_load(x) / 0x1p120f : @volatile_load(x) + 0x1p120f);
+				*sin = x;
+				*cos = 1.0f;
+				return;
+			}
+			*sin = __sindf(x);
+			*cos = __cosdf(x);
+			return;
+		// |x| ~<= 5*pi/4
+		case ix <= 0x407b53d1:
+			// |x| ~<= 3pi/4
+			if (ix <= 0x4016cbe3)
+			{
+				if (sign)
+				{
+					*sin = -__cosdf(x + S1PI2);
+					*cos = __sindf(x + S1PI2);
+					return;
+				}
+				*sin = __cosdf(S1PI2 - x);
+				*cos = __sindf(S1PI2 - x);
+				return;
+			}
+			/* -sin(x+c) is not correct if x+c could be 0: -0 vs +0 */
+			*sin = -__sindf(sign ? x + S2PI2 : x - S2PI2);
+			*cos = -__cosdf(sign ? x + S2PI2 : x - S2PI2);
+			return;
+		case ix <= 0x40e231d5:
+			// |x| ~<= 9*pi/4
+			if (ix <= 0x40afeddf)
+			{
+				// |x| ~<= 7*pi/4
+        		if (sign)
+        		{
+        		    *sin = __cosdf(x + S3PI2);
+        			*cos = -__sindf(x + S3PI2);
+        			return;
+        		}
+        		*sin = -__cosdf(x - S3PI2);
+        		*cos = __sindf(x - S3PI2);
+        		return;
+        	}
+        	*sin = __sindf(sign ? x + S4PI2 : x - S4PI2);
+        	*cos = __cosdf(sign ? x + S4PI2 : x - S4PI2);
+        	return;
+        case ix >= 0x7f800000:
+            // sin(Inf or NaN) is NaN
+			*sin = *cos = x - x;
+			return;
+		default:
+			// general argument reduction needed
+			double y = void;
+			uint n = __rem_pio2f(x, &y);
+			float s = __sindf(y);
+			float c = __cosdf(y);
+			switch (n & 3)
+			{
+				case 0:
+					*sin = s;
+					*cos = c;
+				case 1:
+					*sin = c;
+					*cos = -s;
+				case 2:
+					*sin = -s;
+					*cos = -c;
+				case 3:
+				default:
+					*sin = -c;
+					*cos = s;
+			}
+	}
+
+}
+
+$endif;
--- a/lib/std/math/math_nolibc/trig.c3
+++ b/lib/std/math/math_nolibc/trig.c3
@@ -2,218 +2,6 @@ module std::math::nolibc::trig;

 $if (!env::COMPILER_LIBC_AVAILABLE):

-
-/* origin: FreeBSD /usr/src/lib/msun/src/s_sinf.c */
-/*
- * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
- * Optimized by Bruce D. Evans.
- */
-/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunPro, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
-
-private const double S1PI2_F = math::PI_2; /* 0x3FF921FB, 0x54442D18 */
-private const double S2PI2_F = math::PI; /* 0x400921FB, 0x54442D18 */
-private const double S3PI2_F = math::PI + math::PI_2; /* 0x4012D97C, 0x7F3321D2 */
-private const double S4PI2_F = math::PI + math::PI; /* 0x401921FB, 0x54442D18 */
-
-
-fn float _sinf(float x) @weak @extname("sinf")
-{
-	uint ix = bitcast(x, uint);
-	int sign = ix >> 31;
-	ix &= 0x7fffffff;
-	switch
-	{
-		case ix <= 0x3f490fda:
-			// |x| ~<= pi/4
-			if (ix < 0x39800000)
-			{
-				/* |x| < 2**-12 */
-				// raise inexact if x!=0 and underflow if subnormal
-				if (ix < 0x00800000)
-				{
-					@volatile_load(x) / 0x1p120f;
-				}
-				else
-				{
-					@volatile_load(x) + 0x1p120f;
-				}
-				return x;
-			}
-			return __sindf(x);
-		case ix <= 0x407b53d1:
-		    // |x| ~<= 5*pi/4
-			if (ix <= 0x4016cbe3)
-			{
-				// |x| ~<= 3pi/4
-				if (sign) return -__cosdf(x + S1PI2_F);
-				return __cosdf(x - S1PI2_F);
-			}
-			return __sindf(sign ? -(x + S2PI2_F) : -(x - S2PI2_F));
-		case ix <= 0x40e231d5:
-		    // |x| ~<= 9*pi/4
-			if (ix <= 0x40afeddf)
-			{
-				// |x| ~<= 7*pi/4
-				return sign ? __cosdf(x + S3PI2_F) : -__cosdf(x - S3PI2_F);
-			}
-			return __sindf(sign ? x + S4PI2_F : x - S4PI2_F);
-		case ix >= 0x7f800000:
-			// sin(Inf or NaN) is NaN
-			return x - x;
-	}
-	/* general argument reduction needed */
-	double y = void;
-	int n = rempi::__rem_pio2f(x, &y);
-	switch (n & 3)
-	{
-		case 0: return  __sindf(y);
-		case 1: return  __cosdf(y);
-		case 2: return  __sindf(-y);
-		default: return -__cosdf(y);
-	}
-	unreachable();
-}
-
-/* origin: FreeBSD /usr/src/lib/msun/src/k_sinf.c */
-/*
- * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
- * Optimized by Bruce D. Evans.
- */
-/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunPro, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
-
-
-/* |sin(x)/x - s(x)| < 2**-37.5 (~[-4.89e-12, 4.824e-12]). */
-private const double S1 = -0x15555554cbac77.0p-55; /* -0.166666666416265235595 */
-private const double S2 =  0x111110896efbb2.0p-59; /*  0.0083333293858894631756 */
-private const double S3 = -0x1a00f9e2cae774.0p-65; /* -0.000198393348360966317347 */
-private const double S4 =  0x16cd878c3b46a7.0p-71; /*  0.0000027183114939898219064 */
-
-fn float __sindf(double x)
-{
-	double z = x * x;
-	double w = z * z;
-	double r = S3 + z * S4;
-	double s = z * x;
-	return (float)((x + s * (S1 + z * S2)) +  s * w * r);
-}
-
-/* origin: FreeBSD /usr/src/lib/msun/src/k_cosf.c */
-/*
- * Conversion to float by Ian Lance Taylor, Cygnus Support, ian@cygnus.com.
- * Debugged and optimized by Bruce D. Evans.
- */
-/*
- * ====================================================
- * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved.
- *
- * Developed at SunPro, a Sun Microsystems, Inc. business.
- * Permission to use, copy, modify, and distribute this
- * software is freely granted, provided that this notice
- * is preserved.
- * ====================================================
- */
-/* |cos(x) - c(x)| < 2**-34.1 (~[-5.37e-11, 5.295e-11]). */
-
-private const double C0  = -0x1ffffffd0c5e81.0p-54; /* -0.499999997251031003120 */
-private const double C1  =  0x155553e1053a42.0p-57; /*  0.0416666233237390631894 */
-private const double C2  = -0x16c087e80f1e27.0p-62; /* -0.00138867637746099294692 */
-private const double C3  =  0x199342e0ee5069.0p-68; /*  0.0000243904487962774090654 */
-
-fn float __cosdf(double x)
-{
-	/* Try to optimize for parallel evaluation as in __tandf.c. */
-	double z = x * x;
-	double w = z * z;
-	double r = C2 + z * C3;
-	return (float)(((1.0f + z * C0) + w * C1) + (w * z) * r);
-}
-
-fn float _cosf(float x) @extname("cosf") @weak
-{
-	uint ix = bitcast(x, uint);
-	uint sign = ix >> 31;
-	ix &= 0x7fffffff;
-
-	switch 
-	{
-		case (ix <= 0x3f490fda):
-			// |x| < 2**-12
-			if (ix < 0x39800000) {
-			/* raise inexact if x != 0 */
-			@volatile_load(x) + 0x1p120f;
-			return 1.0f;
-			}
-			return __cosdf(x);
-
-		// |x| ~<= 5*pi/4
-		case (ix <= 0x407b53d1):
-			// |x|  ~> 3*pi/4
-			if (ix > 0x4016cbe3)  
-			{
-				return -__cosdf(sign ? x + S2PI2_F : x - S2PI2_F);
-			}
-			else 
-			{
-			if (sign) return __sindf(x + S1PI2_F);
-			return __sindf(S1PI2_F - x);
-		}
-		// |x| ~<= 9*pi/4
-		case (ix <= 0x40e231d5):
-			if (ix > 0x40afeddf) 
-			{
- 				return __cosdf(sign ? x + S4PI2_F : x - S4PI2_F);
-			}
-			else 
-			{
-				if (sign) return __sindf((double)(-x) - S3PI2_F);
-				return __sindf(x - S3PI2_F);
-			}
-	}
-
-	/* general argument reduction needed */
-	double y = void;
-	int n = rempi::__rem_pio2f(x, &y);
-	switch (n & 3)
-	{
-		case 0: return  __cosdf(y);
-		case 1: return  __sindf(-y);
-		case 2: return  -__cosdf(y);
-		default: return __sindf(y);
-	}
-	unreachable();
-}
-
-fn float sincosf_broken(float x) @extname("sincosf") @weak
-{
-	unreachable("'sincosf' not supported");
-}
-
-fn double sin_broken(double x) @extname("sin") @weak
-{
-	unreachable("'sin' not supported");
-}
-fn double cos_broken(double x) @extname("cos") @weak
-{
-	unreachable("'cos' not supported");
-}
 fn double sincos_broken(double x) @extname("sincos") @weak
 {
 	unreachable("'sinccos' not supported");
--- a/lib/std/math/math_nolibc/trunc.c3
+++ b/lib/std/math/math_nolibc/trunc.c3
@@ -10,7 +10,7 @@ fn double _trunc(double x) @weak @extname("trunc")
 	if (e < 12) e = 1;
 	ulong m = ((ulong)-1) >> e;
 	if (i & m == 0) return x;
-	@force_eval_add(x, 0x1p120f);
+	force_eval_add(x, 0x1p120f);
 	i &= ~m;
 	return bitcast(i, double);
 }
@@ -23,7 +23,7 @@ fn float _truncf(float x) @weak @extname("truncf")
 	if (e < 9) e = 1;
 	uint m = ((uint)-1) >> e;
 	if (i & m == 0) return x;
-	@force_eval_add(x, 0x1p120f);
+	force_eval_add(x, 0x1p120f);
 	i &= ~m;
 	return bitcast(i, float);
 }