module std::math::nolibc @if(env::NO_LIBC || $feature(C3_MATH));

macro uint _top12f(float x) @private => bitcast(x, uint) >> 20;


fn float _exp2f(float x) @extern("exp2f") @weak @nostrip
{
	double xd = x;
	uint abstop = _top12f(x) & 0x7ff;
	if (abstop >= _top12f(128.0f)) /* @unlikely */
	{
		switch
		{
			// |x| >= 128 or x is nan.
			case bitcast(x, uint) == bitcast(-float.inf, uint):
				return 0;
			case abstop >= _top12f(float.inf):
				return x + x;
			case x > 0:
				return __math_oflowf(0);
			case x <= -150.0f:
				return __math_uflowf(0);
		}
	}
	const SHIFT = __EXP2F_DATA.shift_scaled;
	const uint N = 1U << EXP2F_TABLE_BITS;
	// x = k/N + r with r in [-1/(2N), 1/(2N)] and int k.
	double kd = xd + SHIFT;
	ulong ki = bitcast(kd, ulong);
	kd -= SHIFT; /* k/N for int k.  */
	double r = xd - kd;

	// exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1)
	ulong t = __EXP2F_DATA.tab[ki % N];
	t += ki << (52 - EXP2F_TABLE_BITS);
	double s = bitcast(t, double);
	double z = __EXP2F_DATA.poly[0] * r + __EXP2F_DATA.poly[1];
	double r2 = r * r;
	double y = __EXP2F_DATA.poly[2] * r + 1;
	y = z * r2 + y;
	y = y * s;
	return (float)y;
}

fn double _exp2_specialcase(double tmp, ulong sbits, ulong ki) @private
{
	if (ki & 0x80000000 == 0)
	{
		// k > 0, the exponent of scale might have overflowed by 1.
		sbits -= 1UL << 52;
		double scale = bitcast(sbits, double);
		double y = 2 * (scale + scale * tmp);
		return y;
	}
	// k < 0, need special care in the subnormal range.
	sbits += 1022UL << 52;
	double scale = bitcast(sbits, double);
	double y = scale + scale * tmp;
	if (y >= 1.0)
	{
		return 0x1p-1022 * y;
	}
	// Round y to the right precision before scaling it into the subnormal
	// range to avoid double rounding that can cause 0.5+E/2 ulp error where
	// E is the worst-case ulp error outside the subnormal range.  So this
	// is only useful if the goal is better than 1 ulp worst-case error.
	double lo = scale - y + scale * tmp;
	double hi = 1.0 + y;
	lo = 1.0 - hi + y + lo;
	y = hi + lo - 1.0;
	/* Avoid -0.0 with downward rounding.  */
	if (WANT_ROUNDING && y == 0.0) y = 0.0;
	/* The underflow exception needs to be signaled explicitly.  */
	return __math_xflow(0, 0x1p-1022);
}


macro uint _top12d(double x) @private
{
	return (uint)(bitcast(x, ulong) >> 52);
}

fn double _exp2(double x) @extern("exp2") @weak @nostrip
{
	uint abstop = _top12d(x) & 0x7ff;
	ulong u = bitcast(x, ulong);
	if (abstop - _top12d(0x1p-54) >= _top12d(512.0) - _top12d(0x1p-54)) /* @unlikely */
	{
		if (abstop - _top12d(0x1p-54) >= 0x80000000)
		{
			// Avoid spurious underflow for tiny x.
			// Note: 0 is common input.
			return WANT_ROUNDING ? 1.0 + x : 1.0;
		}
		if (abstop >= _top12d(1024.0))
		{
			switch
			{
				case u == bitcast(-double.inf, ulong):
					return 0.0;
				case abstop >= _top12d(double.inf):
					return 1.0 + x;
				case !(u >> 63):
					return __math_oflow(0);
				case u >= bitcast(-1075.0, ulong):
					return __math_uflow(0);
			}
		}
		// Large x is special cased below.
		if (2 * u > 2 * bitcast(928.0, ulong)) abstop = 0;
	}
	const SHIFT = __EXP2_DATA.exp2_shift;
	// exp2(x) = 2^(k/N) * 2^r, with 2^r in [2^(-1/2N),2^(1/2N)].
	// x = k/N + r, with int k and r in [-1/2N, 1/2N].
	double kd = x + SHIFT;
	ulong ki = bitcast(kd, ulong); /* k.  */
	kd -= SHIFT; /* k/N for int k.  */
	double r = x - kd;
	/* 2^(k/N) ~= scale * (1 + tail).  */
	ulong idx = 2 * (ki % EXP_DATA_WIDTH);
	ulong top = ki << (52 - EXP_TABLE_BITS);
	double tail = __EXP2_DATA.tab[idx];
	/* This is only a valid scale when -1023*N < k < 1024*N.  */
	ulong sbits = __EXP2_DATA.tab[idx + 1] + top;
	/* exp2(x) = 2^(k/N) * 2^r ~= scale + scale * (tail + 2^r - 1).  */
	/* Evaluation is optimized assuming superscalar pipelined execution.  */
	double r2 = r * r;
	const C1 = __EXP2_DATA.exp2_poly[0];
	const C2 = __EXP2_DATA.exp2_poly[1];
	const C3 = __EXP2_DATA.exp2_poly[2];
	const C4 = __EXP2_DATA.exp2_poly[3];
	const C5 = __EXP2_DATA.exp2_poly[4];
	/* Without fma the worst case error is 0.5/N ulp larger.  */
	/* Worst case error is less than 0.5+0.86/N+(abs poly error * 2^53) ulp.  */
	double tmp = tail + r * C1 + r2 * (C2 + r * C3) + r2 * r2 * (C4 + r * C5);
	if (abstop == 0 /* @unlikely */) return _exp2_specialcase(tmp, sbits, ki);
	double scale = bitcast(sbits, double);
	/* Note: tmp == 0 or |tmp| > 2^-65 and scale > 2^-928, so there
	   is no spurious underflow here even without fma.  */
	return scale + scale * tmp;
}