//
// XM6i
// Copyright (c) 2013 Y.Sugahara
//
// [ fpn → BCD 変換 ]
//

#include "fpu_emulate.h"
#include "fpu_bcd.h"
#include <string.h>
#include <stdint.h>
#include <stdio.h>
#include <math.h>
#include <stdlib.h>

//#define FPU_BCD_DEBUG

#if defined(FPU_BCD_DEBUG)
#define DPRINTF(msg, ...)	printf(msg, ## __VA_ARGS__)
#define dump(s, a)	debug_dump(s, a)
#else
#define DPRINTF(msg, ...)	/**/
#define dump(s, a)	/**/
#endif

static int fpu_bcd_iszero(const BCD *a);
static uint8_t fpu_bcd_shr(BCD *a);
static uint8_t fpu_bcd_shr_n(BCD *a, int n);
       uint8_t fpu_bcd_shl(BCD *a);
static uint8_t fpu_bcd_shl_n(BCD *a, int n);
static uint8_t fpu_bcd_add_d(BCD *a, const BCD *b);
static void fpu_bcd_add_last_n(BCD *a, uint8_t n);
static void fpu_bcd_round_cy(BCD *a, uint8_t cy);
static void fpu_bcd_round(BCD *a);
static void fpu_bcd_add(BCD *a, const BCD *b);
static void fpu_bcd_mul(BCD *a, const BCD *b);
static void fpu_bcd_exp2(BCD *a, int n);
static int getbit(struct fpn *fp, int n);
static void fpu_itobcd(BCD *a, int x);
static void fpu_fpntobcd(BCD *res, struct fpn *fp);

#if defined(FPU_BCD_DEBUG)
static void __attribute__((__unused__))
debug_dump(const char *s, const BCD *a)
{
	int i;

	printf("%10s ", s);
	printf("e=%+5d ", a->e);
	printf("%d.", a->d[0]);
	for (i = 1; i < FPU_BCD_DIGIT; i++) {
		printf("%d", a->d[i]);
	}
	printf("\n");
}
#endif

/*
 * (a == 0)
 */
static int
fpu_bcd_iszero(const BCD *a)
{
	int i;
	for (i = 0; i < FPU_BCD_DIGIT; i++) {
		if (a->d[i]) return 0;
	}
	return 1;
}

/*
 * a >>= 1
 * return: carry digit
 */
static uint8_t
fpu_bcd_shr(BCD *a)
{
	int i;
	uint8_t cy;

	cy = a->d[FPU_BCD_DIGIT - 1];
	for (i = FPU_BCD_DIGIT - 1; i >= 1; i--) {
		a->d[i] = a->d[i - 1];
	}
	a->d[0] = 0;
dump("shr", a);
	return cy;
}

/*
 * a >>= n
 */
static uint8_t
fpu_bcd_shr_n(BCD *a, int n)
{
	uint8_t cy;
	int count;

	if (n <= 0) return 0;
	count = FPU_BCD_DIGIT - n;
	if (count < 0) {
		memset(a->d, 0, FPU_BCD_DIGIT * sizeof(a->d[0]));
		return 0;
	}
	cy = a->d[count];

	memmove(&a->d[n], &a->d[0], count * sizeof(a->d[0]));
	memset(&a->d[0], 0, n * sizeof(a->d[0]));
	return cy;
}

/*
 * a <<= 1
 * return: carry digit
 */
/* 使われてないけど対称性のため残してある。どうすべ */
uint8_t
fpu_bcd_shl(BCD *a)
{
	int i;
	uint8_t cy;

	cy = a->d[0];
	for (i = 0; i < FPU_BCD_DIGIT - 1; i++) {
		a->d[i] = a->d[i + 1];
	}
	a->d[FPU_BCD_DIGIT - 1] = 0;
dump("shl", a);
	return cy;
}

/*
 * a <<= n
 * return: carry digit
 */
static uint8_t
fpu_bcd_shl_n(BCD *a, int n)
{
	uint8_t cy;

	if (n <= 0)
		return 0;
	if (n >= FPU_BCD_DIGIT) {
		memset(a->d, 0, FPU_BCD_DIGIT * sizeof(a->d[0]));
		return 0;
	}
	cy = a->d[n - 1];

	memmove(&a->d[0], &a->d[n], (FPU_BCD_DIGIT - n) * sizeof(a->d[0]));
	memset(&a->d[FPU_BCD_DIGIT - n], 0, n * sizeof(a->d[0]));
	return cy;
}

/*
 * a = a + b
 * add digit only
 */
static uint8_t
fpu_bcd_add_d(BCD *a, const BCD *b)
{
	uint8_t cy = 0;
	int i;

	for (i = FPU_BCD_DIGIT - 1; i >= 0; i--) {
		a->d[i] = a->d[i] + b->d[i] + cy;
		if (a->d[i] >= 10) {
			a->d[i] -= 10;
			cy = 1;
		} else {
			cy = 0;
		}
	}
	return cy;
}

/*
 * a = a[last digit] + n
 * for rounding
 */
static void
fpu_bcd_add_last_n(BCD *a, uint8_t n)
{
	uint8_t cy = n;
	int i;

	for (i = FPU_BCD_DIGIT - 1; i >= 0; i--) {
		a->d[i] += cy;
		if (a->d[i] >= 10) {
			a->d[i] -= 10;
			cy = 1;
		} else {
			cy = 0;
			/* breakable because no more carry */
			break;
		}
	}

	if (cy) {
		cy = fpu_bcd_shr(a);
		a->d[0] = 1;
		a->e++;
		fpu_bcd_round_cy(a, cy);
	}
dump("add_last_n", a);
}

/*
 * banker's rounding by carry
 */
static void
fpu_bcd_round_cy(BCD *a, uint8_t cy)
{
	if (cy > 5
	 || (cy == 5 && a->d[FPU_BCD_DIGIT - 1] % 2 != 0)) {
		fpu_bcd_add_last_n(a, 1);
	}
}

/*
 * rounding at last digit
 */
static void
fpu_bcd_round(BCD *a)
{
	if (a->d[FPU_BCD_DIGIT - 1] >= 5) {
		fpu_bcd_add_last_n(a, 10);
		a->d[FPU_BCD_DIGIT - 1] = 0;
	}
}

/*
 * a = a + b
 */
static void
fpu_bcd_add(BCD *a, const BCD *b)
{
	uint8_t cy = 0;
	BCD t0;
	BCD *t = &t0;

	if (fpu_bcd_iszero(a)) {
		*a = *b;
		return;
	}
	if (fpu_bcd_iszero(b)) {
		return;
	}

	*t = *b;

	while (a->e != t->e) {
		if (a->e > t->e) {
			t->e++;
			cy = fpu_bcd_shr(t);
		} else if (a->e < t->e) {
			a->e++;
			cy = fpu_bcd_shr(a);
		}
	}
	fpu_bcd_round_cy(a, cy);

dump("add:a", a);
dump("add:t", t);
	cy = fpu_bcd_add_d(a, t);

	if (cy) {
		// last digit rounding
		cy = fpu_bcd_shr(a);
		a->d[0] = 1;
		a->e++;
		fpu_bcd_round_cy(a, cy);
	}
dump("add", a);
}

/*
 * a = a * b
 */
static void
fpu_bcd_mul(BCD *a, const BCD *b)
{
	/* need DIGIT * 2 + 1 for mul + cy */
	uint8_t t0[FPU_BCD_DIGIT * 2 + 1 + 1];
	uint8_t *t = &t0[1];
	int i, j, k;

dump("mul:a", a);
dump("mul:b", b);
	memset(t0, 0, sizeof(t0));

	for (i = FPU_BCD_DIGIT - 1; i >= 0; i--) {
		for (j = FPU_BCD_DIGIT - 1; j >= 0; j--) {
			t[i + j] += a->d[i] * b->d[j];
			for (k = i + j; k >= 0; k--) {
				if (t[k] >= 10) {
					/* t[-1] available */
					t[k - 1] += t[k] / 10;
					t[k] %= 10;
				} else {
					break;
				}
			}
		}
	}

	a->e = a->e + b->e;
	if (t[-1]) {
		a->e++;
		/* copy 1 digit shifted */
		memcpy(a->d, &t[-1], sizeof(a->d));
		fpu_bcd_round_cy(a, t[FPU_BCD_DIGIT - 1]);
	} else {
		memcpy(a->d, t, sizeof(a->d));
		fpu_bcd_round_cy(a, t[FPU_BCD_DIGIT]);
	}
dump("mul", a);
}

/*
 * a = pow(2, n)
 */
static void
fpu_bcd_exp2(BCD *a, int n)
{
	BCD r0;
	BCD *r = &r0;
	int i;
#if defined(FPU_BCD_DEBUG)
int save_n = n;
#endif
DPRINTF("exp2 n=%d\n", n);
	memset(a, 0, sizeof(*a));
	a->d[0] = 1;

	if (n == 0) {
	} else {
		memset(r, 0, sizeof(*r));
		if (n > 0) {
			/* r = 2 */
			r->d[0] = 2;
		} else {
			n = -n;
			/* r = 0.5 */
			r->e = -1;
			r->d[0] = 5;
		}

		for (i = 0; i < 32; i++) {
			if (n & (1 << i)) {
				n ^= (1 << i);
				fpu_bcd_mul(a, r);
			}
			if (n == 0) break;
			fpu_bcd_mul(r, r);
		}
	}
dump("exp2", a);
#if defined(__NetBSD__)	/* とりあえず */
DPRINTF("%18s %.*e\n", "pow", FPU_BCD_DIGIT-1, pow(2, save_n));
#else
DPRINTF("%18s %.*Le\n", "powl", FPU_BCD_DIGIT-1, powl(2, save_n));
#endif
}

/* relative mantissa bit */
int
getbit(struct fpn *fp, int n)
{
	int t;
	uint32_t x;
	int b;

	t = n + (32 - FP_LG);
	x = fp->fp_mant[(t - 1) / 32];
	b = x & (1 << (31 - ((t - 1) % 32)));
DPRINTF("n=%d %d\n", n, b);
	return b;
}

static void
fpu_itobcd(BCD *a, int x)
{
	int i;
	BCD r0;
	BCD *r = &r0;

	memset(a, 0, sizeof(*a));

	x = abs(x);

	for (i = 0; i < 32; i++) {
		if (x == 0) break;
		if (x & (1 << i)) {
			x ^= (1 << i);
			fpu_bcd_exp2(r, i);
			fpu_bcd_add(a, r);
		}
	}
}

/* Convert fpn to internal BCD structure */
static void
fpu_fpntobcd(BCD *res, struct fpn *fp)
{
	BCD bcd_r0;
	BCD *bcd_r = &bcd_r0;
	BCD bcd_a0;
	BCD *bcd_a = &bcd_a0;
	BCD bcd_b0;
	BCD *bcd_b = &bcd_b0;
	int i;

	memset(bcd_r, 0, sizeof(*bcd_r));
	memset(bcd_a, 0, sizeof(*bcd_a));
	memset(bcd_b, 0, sizeof(*bcd_b));

	for (i = FP_NMANT - 1; i >= 0; i--) {
		// 最下位ビットから取り出していく
		if (getbit(fp, i)) {

			// 調べるビットの相当する 2^(exp-i) を計算する
			fpu_bcd_exp2(bcd_r, fp->fp_exp - i);
dump("r", bcd_r);

			fpu_bcd_add(bcd_b, bcd_r);
dump("b", bcd_b);
		}
	}
dump("b", bcd_b);

	fpu_bcd_round(bcd_b);
dump("b", bcd_b);

	/* とりあえず */
	*res = *bcd_b;
	return;
}

/*
 * fpn -> 96-bit packed BCD(space)
 */
void
fpu_ftop(struct fpemu *fe, struct fpn *fp, uint32_t *space, int kfactor)
{
	BCD b0, *b;
	BCD e0, *e;
	int i;
	int shift = 0;

	b = &b0;
	e = &e0;

	space[0] = 0;
	space[1] = 0;
	space[2] = 0;

	if (fp->fp_sign) {
		space[0] |= 0x80000000;
	}

	if (ISINF(fp)) {
		space[0] |= 0x7fff0000;
		return;
	}

	/* fp をアンパックド BCD に変換 */
	fpu_fpntobcd(b, fp);

	/*
	 * k-factor
	 */
DPRINTF("kfactor=%d\n", kfactor);
	if (kfactor <= 0) {
		/*
		 * FORTRAN "F" format.
		 * 小数以下 -k 桁まで出力する。
		 */
		if (b->e >= 17) {
			/* 10^17以上の値なら17桁で小数に到達しないので k=+17 と等価 */
			kfactor = 17;
		} else {
			shift = FPU_BCD_DIGIT - b->e + kfactor - 2;
		}
	}
	if (kfactor > 0) {
		if (kfactor > 17) {
			fe->fe_fpsr |= FPSR_OPERR;
			kfactor = 17;
		}
		/*
		 * FORTRAN "E" format.
		 * k は小数点位置に関わらず出力する桁数(文字数) を表す。
		 * 1 なら 1桁だけ(つまり整数部のみ)、17 で全桁となる。
		 */
		shift = FPU_BCD_DIGIT - kfactor - 1;
	}
dump("k:0", b);
	fpu_bcd_shr_n(b, shift);
dump("k:1", b);
	fpu_bcd_round(b);
	/* 四捨五入に使った最終桁はもう不要 */
	b->d[FPU_BCD_DIGIT - 1] = 0;
dump("k:2", b);
	fpu_bcd_shl_n(b, shift);
dump("k:3", b);

	/*
	 * ここから 68881 の パックド BCD に変換していく
	 */
	/* exp の変換 */
	/* P 形式の EXP フィールドは 10 基数 */
	if (b->e < 0) {
		space[0] |= 0x40000000;
	}

	fpu_itobcd(e, abs(b->e));
	fpu_bcd_shr_n(e, 3 - e->e);
	space[0] |= e->d[0] << 12;	/* EXP3 */
	space[0] |= e->d[1] << 24;	/* EXP2 */
	space[0] |= e->d[2] << 20;	/* EXP1 */
	space[0] |= e->d[3] << 16;	/* EXP0 */

	space[0] |= b->d[0];
DPRINTF("BCD[0]=%08x\n", space[0]);

	/* 小数部 */
	for (i = 1; i < 9; i++) {
		space[1] <<= 4;
		space[1] |= b->d[i];
	}

	for (i = 9; i < 17; i++) {
		space[2] <<= 4;
		space[2] |= b->d[i];
	}
}
