[net-next-2.6.git] / arch / sh / kernel / cpu / sh4 / fpu.c

/*
 * Save/restore floating point context for signal handlers.
 *
 * This file is subject to the terms and conditions of the GNU General Public
 * License.  See the file "COPYING" in the main directory of this archive
 * for more details.
 *
 * Copyright (C) 1999, 2000  Kaz Kojima & Niibe Yutaka
 * Copyright (C) 2006  ST Microelectronics Ltd. (denorm support)
 *
 * FIXME! These routines have not been tested for big endian case.
 */
#include <linux/sched.h>
#include <linux/signal.h>
#include <linux/io.h>
#include <cpu/fpu.h>
#include <asm/processor.h>
#include <asm/system.h>
#include <asm/fpu.h>

/* The PR (precision) bit in the FP Status Register must be clear when
 * an frchg instruction is executed, otherwise the instruction is undefined.
 * Executing frchg with PR set causes a trap on some SH4 implementations.
 */

#define FPSCR_RCHG 0x00000000
extern unsigned long long float64_div(unsigned long long a,
				      unsigned long long b);
extern unsigned long int float32_div(unsigned long int a, unsigned long int b);
extern unsigned long long float64_mul(unsigned long long a,
				      unsigned long long b);
extern unsigned long int float32_mul(unsigned long int a, unsigned long int b);
extern unsigned long long float64_add(unsigned long long a,
				      unsigned long long b);
extern unsigned long int float32_add(unsigned long int a, unsigned long int b);
extern unsigned long long float64_sub(unsigned long long a,
				      unsigned long long b);
extern unsigned long int float32_sub(unsigned long int a, unsigned long int b);
extern unsigned long int float64_to_float32(unsigned long long a);
static unsigned int fpu_exception_flags;

/*
 * Save FPU registers onto task structure.
 * Assume called with FPU enabled (SR.FD=0).
 */
void save_fpu(struct task_struct *tsk, struct pt_regs *regs)
{
	unsigned long dummy;

	clear_tsk_thread_flag(tsk, TIF_USEDFPU);
	enable_fpu();
	asm volatile ("sts.l	fpul, @-%0\n\t"
		      "sts.l	fpscr, @-%0\n\t"
		      "lds	%2, fpscr\n\t"
		      "frchg\n\t"
		      "fmov.s	fr15, @-%0\n\t"
		      "fmov.s	fr14, @-%0\n\t"
		      "fmov.s	fr13, @-%0\n\t"
		      "fmov.s	fr12, @-%0\n\t"
		      "fmov.s	fr11, @-%0\n\t"
		      "fmov.s	fr10, @-%0\n\t"
		      "fmov.s	fr9, @-%0\n\t"
		      "fmov.s	fr8, @-%0\n\t"
		      "fmov.s	fr7, @-%0\n\t"
		      "fmov.s	fr6, @-%0\n\t"
		      "fmov.s	fr5, @-%0\n\t"
		      "fmov.s	fr4, @-%0\n\t"
		      "fmov.s	fr3, @-%0\n\t"
		      "fmov.s	fr2, @-%0\n\t"
		      "fmov.s	fr1, @-%0\n\t"
		      "fmov.s	fr0, @-%0\n\t"
		      "frchg\n\t"
		      "fmov.s	fr15, @-%0\n\t"
		      "fmov.s	fr14, @-%0\n\t"
		      "fmov.s	fr13, @-%0\n\t"
		      "fmov.s	fr12, @-%0\n\t"
		      "fmov.s	fr11, @-%0\n\t"
		      "fmov.s	fr10, @-%0\n\t"
		      "fmov.s	fr9, @-%0\n\t"
		      "fmov.s	fr8, @-%0\n\t"
		      "fmov.s	fr7, @-%0\n\t"
		      "fmov.s	fr6, @-%0\n\t"
		      "fmov.s	fr5, @-%0\n\t"
		      "fmov.s	fr4, @-%0\n\t"
		      "fmov.s	fr3, @-%0\n\t"
		      "fmov.s	fr2, @-%0\n\t"
		      "fmov.s	fr1, @-%0\n\t"
		      "fmov.s	fr0, @-%0\n\t"
		      "lds	%3, fpscr\n\t":"=r" (dummy)
		      :"0"((char *)(&tsk->thread.fpu.hard.status)),
		      "r"(FPSCR_RCHG), "r"(FPSCR_INIT)
		      :"memory");

	disable_fpu();
	release_fpu(regs);
}

static void restore_fpu(struct task_struct *tsk)
{
	unsigned long dummy;

	enable_fpu();
	asm volatile ("lds	%2, fpscr\n\t"
		      "fmov.s	@%0+, fr0\n\t"
		      "fmov.s	@%0+, fr1\n\t"
		      "fmov.s	@%0+, fr2\n\t"
		      "fmov.s	@%0+, fr3\n\t"
		      "fmov.s	@%0+, fr4\n\t"
		      "fmov.s	@%0+, fr5\n\t"
		      "fmov.s	@%0+, fr6\n\t"
		      "fmov.s	@%0+, fr7\n\t"
		      "fmov.s	@%0+, fr8\n\t"
		      "fmov.s	@%0+, fr9\n\t"
		      "fmov.s	@%0+, fr10\n\t"
		      "fmov.s	@%0+, fr11\n\t"
		      "fmov.s	@%0+, fr12\n\t"
		      "fmov.s	@%0+, fr13\n\t"
		      "fmov.s	@%0+, fr14\n\t"
		      "fmov.s	@%0+, fr15\n\t"
		      "frchg\n\t"
		      "fmov.s	@%0+, fr0\n\t"
		      "fmov.s	@%0+, fr1\n\t"
		      "fmov.s	@%0+, fr2\n\t"
		      "fmov.s	@%0+, fr3\n\t"
		      "fmov.s	@%0+, fr4\n\t"
		      "fmov.s	@%0+, fr5\n\t"
		      "fmov.s	@%0+, fr6\n\t"
		      "fmov.s	@%0+, fr7\n\t"
		      "fmov.s	@%0+, fr8\n\t"
		      "fmov.s	@%0+, fr9\n\t"
		      "fmov.s	@%0+, fr10\n\t"
		      "fmov.s	@%0+, fr11\n\t"
		      "fmov.s	@%0+, fr12\n\t"
		      "fmov.s	@%0+, fr13\n\t"
		      "fmov.s	@%0+, fr14\n\t"
		      "fmov.s	@%0+, fr15\n\t"
		      "frchg\n\t"
		      "lds.l	@%0+, fpscr\n\t"
		      "lds.l	@%0+, fpul\n\t"
		      :"=r" (dummy)
		      :"0"(&tsk->thread.fpu), "r"(FPSCR_RCHG)
		      :"memory");
	disable_fpu();
}

/*
 * Load the FPU with signalling NANS.  This bit pattern we're using
 * has the property that no matter wether considered as single or as
 * double precision represents signaling NANS.
 */

static void fpu_init(void)
{
	enable_fpu();
	asm volatile (	"lds	%0, fpul\n\t"
			"lds	%1, fpscr\n\t"
			"fsts	fpul, fr0\n\t"
			"fsts	fpul, fr1\n\t"
			"fsts	fpul, fr2\n\t"
			"fsts	fpul, fr3\n\t"
			"fsts	fpul, fr4\n\t"
			"fsts	fpul, fr5\n\t"
			"fsts	fpul, fr6\n\t"
			"fsts	fpul, fr7\n\t"
			"fsts	fpul, fr8\n\t"
			"fsts	fpul, fr9\n\t"
			"fsts	fpul, fr10\n\t"
			"fsts	fpul, fr11\n\t"
			"fsts	fpul, fr12\n\t"
			"fsts	fpul, fr13\n\t"
			"fsts	fpul, fr14\n\t"
			"fsts	fpul, fr15\n\t"
			"frchg\n\t"
			"fsts	fpul, fr0\n\t"
			"fsts	fpul, fr1\n\t"
			"fsts	fpul, fr2\n\t"
			"fsts	fpul, fr3\n\t"
			"fsts	fpul, fr4\n\t"
			"fsts	fpul, fr5\n\t"
			"fsts	fpul, fr6\n\t"
			"fsts	fpul, fr7\n\t"
			"fsts	fpul, fr8\n\t"
			"fsts	fpul, fr9\n\t"
			"fsts	fpul, fr10\n\t"
			"fsts	fpul, fr11\n\t"
			"fsts	fpul, fr12\n\t"
			"fsts	fpul, fr13\n\t"
			"fsts	fpul, fr14\n\t"
			"fsts	fpul, fr15\n\t"
			"frchg\n\t"
			"lds	%2, fpscr\n\t"
			:	/* no output */
			:"r" (0), "r"(FPSCR_RCHG), "r"(FPSCR_INIT));
	disable_fpu();
}

/**
 *      denormal_to_double - Given denormalized float number,
 *                           store double float
 *
 *      @fpu: Pointer to sh_fpu_hard structure
 *      @n: Index to FP register
 */
static void denormal_to_double(struct sh_fpu_hard_struct *fpu, int n)
{
	unsigned long du, dl;
	unsigned long x = fpu->fpul;
	int exp = 1023 - 126;

	if (x != 0 && (x & 0x7f800000) == 0) {
		du = (x & 0x80000000);
		while ((x & 0x00800000) == 0) {
			x <<= 1;
			exp--;
		}
		x &= 0x007fffff;
		du |= (exp << 20) | (x >> 3);
		dl = x << 29;

		fpu->fp_regs[n] = du;
		fpu->fp_regs[n + 1] = dl;
	}
}

/**
 *	ieee_fpe_handler - Handle denormalized number exception
 *
 *	@regs: Pointer to register structure
 *
 *	Returns 1 when it's handled (should not cause exception).
 */
static int ieee_fpe_handler(struct pt_regs *regs)
{
	unsigned short insn = *(unsigned short *)regs->pc;
	unsigned short finsn;
	unsigned long nextpc;
	int nib[4] = {
		(insn >> 12) & 0xf,
		(insn >> 8) & 0xf,
		(insn >> 4) & 0xf,
		insn & 0xf
	};

	if (nib[0] == 0xb || (nib[0] == 0x4 && nib[2] == 0x0 && nib[3] == 0xb))
		regs->pr = regs->pc + 4;  /* bsr & jsr */

	if (nib[0] == 0xa || nib[0] == 0xb) {
		/* bra & bsr */
		nextpc = regs->pc + 4 + ((short)((insn & 0xfff) << 4) >> 3);
		finsn = *(unsigned short *)(regs->pc + 2);
	} else if (nib[0] == 0x8 && nib[1] == 0xd) {
		/* bt/s */
		if (regs->sr & 1)
			nextpc = regs->pc + 4 + ((char)(insn & 0xff) << 1);
		else
			nextpc = regs->pc + 4;
		finsn = *(unsigned short *)(regs->pc + 2);
	} else if (nib[0] == 0x8 && nib[1] == 0xf) {
		/* bf/s */
		if (regs->sr & 1)
			nextpc = regs->pc + 4;
		else
			nextpc = regs->pc + 4 + ((char)(insn & 0xff) << 1);
		finsn = *(unsigned short *)(regs->pc + 2);
	} else if (nib[0] == 0x4 && nib[3] == 0xb &&
		   (nib[2] == 0x0 || nib[2] == 0x2)) {
		/* jmp & jsr */
		nextpc = regs->regs[nib[1]];
		finsn = *(unsigned short *)(regs->pc + 2);
	} else if (nib[0] == 0x0 && nib[3] == 0x3 &&
		   (nib[2] == 0x0 || nib[2] == 0x2)) {
		/* braf & bsrf */
		nextpc = regs->pc + 4 + regs->regs[nib[1]];
		finsn = *(unsigned short *)(regs->pc + 2);
	} else if (insn == 0x000b) {
		/* rts */
		nextpc = regs->pr;
		finsn = *(unsigned short *)(regs->pc + 2);
	} else {
		nextpc = regs->pc + instruction_size(insn);
		finsn = insn;
	}

	if ((finsn & 0xf1ff) == 0xf0ad) {
		/* fcnvsd */
		struct task_struct *tsk = current;

		save_fpu(tsk, regs);
		if ((tsk->thread.fpu.hard.fpscr & FPSCR_CAUSE_ERROR))
			/* FPU error */
			denormal_to_double(&tsk->thread.fpu.hard,
					   (finsn >> 8) & 0xf);
		else
			return 0;

		regs->pc = nextpc;
		return 1;
	} else if ((finsn & 0xf00f) == 0xf002) {
		/* fmul */
		struct task_struct *tsk = current;
		int fpscr;
		int n, m, prec;
		unsigned int hx, hy;

		n = (finsn >> 8) & 0xf;
		m = (finsn >> 4) & 0xf;
		hx = tsk->thread.fpu.hard.fp_regs[n];
		hy = tsk->thread.fpu.hard.fp_regs[m];
		fpscr = tsk->thread.fpu.hard.fpscr;
		prec = fpscr & FPSCR_DBL_PRECISION;

		if ((fpscr & FPSCR_CAUSE_ERROR)
		    && (prec && ((hx & 0x7fffffff) < 0x00100000
				 || (hy & 0x7fffffff) < 0x00100000))) {
			long long llx, lly;

			/* FPU error because of denormal (doubles) */
			llx = ((long long)hx << 32)
			    | tsk->thread.fpu.hard.fp_regs[n + 1];
			lly = ((long long)hy << 32)
			    | tsk->thread.fpu.hard.fp_regs[m + 1];
			llx = float64_mul(llx, lly);
			tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
			tsk->thread.fpu.hard.fp_regs[n + 1] = llx & 0xffffffff;
		} else if ((fpscr & FPSCR_CAUSE_ERROR)
			   && (!prec && ((hx & 0x7fffffff) < 0x00800000
					 || (hy & 0x7fffffff) < 0x00800000))) {
			/* FPU error because of denormal (floats) */
			hx = float32_mul(hx, hy);
			tsk->thread.fpu.hard.fp_regs[n] = hx;
		} else
			return 0;

		regs->pc = nextpc;
		return 1;
	} else if ((finsn & 0xf00e) == 0xf000) {
		/* fadd, fsub */
		struct task_struct *tsk = current;
		int fpscr;
		int n, m, prec;
		unsigned int hx, hy;

		n = (finsn >> 8) & 0xf;
		m = (finsn >> 4) & 0xf;
		hx = tsk->thread.fpu.hard.fp_regs[n];
		hy = tsk->thread.fpu.hard.fp_regs[m];
		fpscr = tsk->thread.fpu.hard.fpscr;
		prec = fpscr & FPSCR_DBL_PRECISION;

		if ((fpscr & FPSCR_CAUSE_ERROR)
		    && (prec && ((hx & 0x7fffffff) < 0x00100000
				 || (hy & 0x7fffffff) < 0x00100000))) {
			long long llx, lly;

			/* FPU error because of denormal (doubles) */
			llx = ((long long)hx << 32)
			    | tsk->thread.fpu.hard.fp_regs[n + 1];
			lly = ((long long)hy << 32)
			    | tsk->thread.fpu.hard.fp_regs[m + 1];
			if ((finsn & 0xf00f) == 0xf000)
				llx = float64_add(llx, lly);
			else
				llx = float64_sub(llx, lly);
			tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
			tsk->thread.fpu.hard.fp_regs[n + 1] = llx & 0xffffffff;
		} else if ((fpscr & FPSCR_CAUSE_ERROR)
			   && (!prec && ((hx & 0x7fffffff) < 0x00800000
					 || (hy & 0x7fffffff) < 0x00800000))) {
			/* FPU error because of denormal (floats) */
			if ((finsn & 0xf00f) == 0xf000)
				hx = float32_add(hx, hy);
			else
				hx = float32_sub(hx, hy);
			tsk->thread.fpu.hard.fp_regs[n] = hx;
		} else
			return 0;

		regs->pc = nextpc;
		return 1;
	} else if ((finsn & 0xf003) == 0xf003) {
		/* fdiv */
		struct task_struct *tsk = current;
		int fpscr;
		int n, m, prec;
		unsigned int hx, hy;

		n = (finsn >> 8) & 0xf;
		m = (finsn >> 4) & 0xf;
		hx = tsk->thread.fpu.hard.fp_regs[n];
		hy = tsk->thread.fpu.hard.fp_regs[m];
		fpscr = tsk->thread.fpu.hard.fpscr;
		prec = fpscr & FPSCR_DBL_PRECISION;

		if ((fpscr & FPSCR_CAUSE_ERROR)
		    && (prec && ((hx & 0x7fffffff) < 0x00100000
				 || (hy & 0x7fffffff) < 0x00100000))) {
			long long llx, lly;

			/* FPU error because of denormal (doubles) */
			llx = ((long long)hx << 32)
			    | tsk->thread.fpu.hard.fp_regs[n + 1];
			lly = ((long long)hy << 32)
			    | tsk->thread.fpu.hard.fp_regs[m + 1];

			llx = float64_div(llx, lly);

			tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
			tsk->thread.fpu.hard.fp_regs[n + 1] = llx & 0xffffffff;
		} else if ((fpscr & FPSCR_CAUSE_ERROR)
			   && (!prec && ((hx & 0x7fffffff) < 0x00800000
					 || (hy & 0x7fffffff) < 0x00800000))) {
			/* FPU error because of denormal (floats) */
			hx = float32_div(hx, hy);
			tsk->thread.fpu.hard.fp_regs[n] = hx;
		} else
			return 0;

		regs->pc = nextpc;
		return 1;
	} else if ((finsn & 0xf0bd) == 0xf0bd) {
		/* fcnvds - double to single precision convert */
		struct task_struct *tsk = current;
		int m;
		unsigned int hx;

		m = (finsn >> 8) & 0x7;
		hx = tsk->thread.fpu.hard.fp_regs[m];

		if ((tsk->thread.fpu.hard.fpscr & FPSCR_CAUSE_ERROR)
			&& ((hx & 0x7fffffff) < 0x00100000)) {
			/* subnormal double to float conversion */
			long long llx;

			llx = ((long long)tsk->thread.fpu.hard.fp_regs[m] << 32)
			    | tsk->thread.fpu.hard.fp_regs[m + 1];

			tsk->thread.fpu.hard.fpul = float64_to_float32(llx);
		} else
			return 0;

		regs->pc = nextpc;
		return 1;
	}

	return 0;
}

void float_raise(unsigned int flags)
{
	fpu_exception_flags |= flags;
}

int float_rounding_mode(void)
{
	struct task_struct *tsk = current;
	int roundingMode = FPSCR_ROUNDING_MODE(tsk->thread.fpu.hard.fpscr);
	return roundingMode;
}

BUILD_TRAP_HANDLER(fpu_error)
{
	struct task_struct *tsk = current;
	TRAP_HANDLER_DECL;

	save_fpu(tsk, regs);
	fpu_exception_flags = 0;
	if (ieee_fpe_handler(regs)) {
		tsk->thread.fpu.hard.fpscr &=
		    ~(FPSCR_CAUSE_MASK | FPSCR_FLAG_MASK);
		tsk->thread.fpu.hard.fpscr |= fpu_exception_flags;
		/* Set the FPSCR flag as well as cause bits - simply
		 * replicate the cause */
		tsk->thread.fpu.hard.fpscr |= (fpu_exception_flags >> 10);
		grab_fpu(regs);
		restore_fpu(tsk);
		set_tsk_thread_flag(tsk, TIF_USEDFPU);
		if ((((tsk->thread.fpu.hard.fpscr & FPSCR_ENABLE_MASK) >> 7) &
		     (fpu_exception_flags >> 2)) == 0) {
			return;
		}
	}

	force_sig(SIGFPE, tsk);
}

void fpu_state_restore(struct pt_regs *regs)
{
	struct task_struct *tsk = current;

	grab_fpu(regs);
	if (unlikely(!user_mode(regs))) {
		printk(KERN_ERR "BUG: FPU is used in kernel mode.\n");
		BUG();
		return;
	}

	if (likely(used_math())) {
		/* Using the FPU again.  */
		restore_fpu(tsk);
	} else {
		/* First time FPU user.  */
		fpu_init();
		set_used_math();
	}
	set_tsk_thread_flag(tsk, TIF_USEDFPU);
	tsk->fpu_counter++;
}

BUILD_TRAP_HANDLER(fpu_state_restore)
{
	TRAP_HANDLER_DECL;

	fpu_state_restore(regs);
}
Commit	Line	Data
c8c0a1ab	1	/*
1da177e4 LT	2	* Save/restore floating point context for signal handlers.
	3	*
	4	* This file is subject to the terms and conditions of the GNU General Public
	5	* License. See the file "COPYING" in the main directory of this archive
	6	* for more details.
	7	*
	8	* Copyright (C) 1999, 2000 Kaz Kojima & Niibe Yutaka
c8c0a1ab	9	* Copyright (C) 2006 ST Microelectronics Ltd. (denorm support)
1da177e4	10	*
c8c0a1ab	11	* FIXME! These routines have not been tested for big endian case.
1da177e4	12	*/
1da177e4 LT	13	#include <linux/sched.h>
1da177e4 LT	14	#include <linux/signal.h>
c8c0a1ab	15	#include <linux/io.h>
f15cbe6f	16	#include <cpu/fpu.h>
1da177e4	17	#include <asm/processor.h>
53f983a9	18	#include <asm/system.h>
9bbafce2	19	#include <asm/fpu.h>
1da177e4 LT	20
	21	/* The PR (precision) bit in the FP Status Register must be clear when
	22	* an frchg instruction is executed, otherwise the instruction is undefined.
	23	* Executing frchg with PR set causes a trap on some SH4 implementations.
	24	*/
	25
	26	#define FPSCR_RCHG 0x00000000
c8c0a1ab SM	27	extern unsigned long long float64_div(unsigned long long a,
	28	unsigned long long b);
	29	extern unsigned long int float32_div(unsigned long int a, unsigned long int b);
	30	extern unsigned long long float64_mul(unsigned long long a,
	31	unsigned long long b);
	32	extern unsigned long int float32_mul(unsigned long int a, unsigned long int b);
	33	extern unsigned long long float64_add(unsigned long long a,
	34	unsigned long long b);
	35	extern unsigned long int float32_add(unsigned long int a, unsigned long int b);
	36	extern unsigned long long float64_sub(unsigned long long a,
	37	unsigned long long b);
	38	extern unsigned long int float32_sub(unsigned long int a, unsigned long int b);
b6ad1e8c	39	extern unsigned long int float64_to_float32(unsigned long long a);
c8c0a1ab	40	static unsigned int fpu_exception_flags;
1da177e4 LT	41
	42	/*
	43	* Save FPU registers onto task structure.
	44	* Assume called with FPU enabled (SR.FD=0).
	45	*/
c8c0a1ab	46	void save_fpu(struct task_struct tsk, struct pt_regs regs)
1da177e4 LT	47	{
	48	unsigned long dummy;
	49
	50	clear_tsk_thread_flag(tsk, TIF_USEDFPU);
	51	enable_fpu();
c8c0a1ab SM	52	asm volatile ("sts.l fpul, @-%0\n\t"
	53	"sts.l fpscr, @-%0\n\t"
	54	"lds %2, fpscr\n\t"
	55	"frchg\n\t"
	56	"fmov.s fr15, @-%0\n\t"
	57	"fmov.s fr14, @-%0\n\t"
	58	"fmov.s fr13, @-%0\n\t"
	59	"fmov.s fr12, @-%0\n\t"
	60	"fmov.s fr11, @-%0\n\t"
	61	"fmov.s fr10, @-%0\n\t"
	62	"fmov.s fr9, @-%0\n\t"
	63	"fmov.s fr8, @-%0\n\t"
	64	"fmov.s fr7, @-%0\n\t"
	65	"fmov.s fr6, @-%0\n\t"
	66	"fmov.s fr5, @-%0\n\t"
	67	"fmov.s fr4, @-%0\n\t"
	68	"fmov.s fr3, @-%0\n\t"
	69	"fmov.s fr2, @-%0\n\t"
	70	"fmov.s fr1, @-%0\n\t"
	71	"fmov.s fr0, @-%0\n\t"
	72	"frchg\n\t"
	73	"fmov.s fr15, @-%0\n\t"
	74	"fmov.s fr14, @-%0\n\t"
	75	"fmov.s fr13, @-%0\n\t"
	76	"fmov.s fr12, @-%0\n\t"
	77	"fmov.s fr11, @-%0\n\t"
	78	"fmov.s fr10, @-%0\n\t"
	79	"fmov.s fr9, @-%0\n\t"
	80	"fmov.s fr8, @-%0\n\t"
	81	"fmov.s fr7, @-%0\n\t"
	82	"fmov.s fr6, @-%0\n\t"
	83	"fmov.s fr5, @-%0\n\t"
	84	"fmov.s fr4, @-%0\n\t"
	85	"fmov.s fr3, @-%0\n\t"
	86	"fmov.s fr2, @-%0\n\t"
	87	"fmov.s fr1, @-%0\n\t"
	88	"fmov.s fr0, @-%0\n\t"
	89	"lds %3, fpscr\n\t":"=r" (dummy)
	90	:"0"((char *)(&tsk->thread.fpu.hard.status)),
	91	"r"(FPSCR_RCHG), "r"(FPSCR_INIT)
	92	:"memory");
1da177e4	93
74d99a5e PM	94	disable_fpu();
74d99a5e PM	95	release_fpu(regs);
1da177e4 LT	96	}
1da177e4 LT	97
c8c0a1ab	98	static void restore_fpu(struct task_struct *tsk)
1da177e4 LT	99	{
	100	unsigned long dummy;
	101
74d99a5e	102	enable_fpu();
c8c0a1ab SM	103	asm volatile ("lds %2, fpscr\n\t"
	104	"fmov.s @%0+, fr0\n\t"
	105	"fmov.s @%0+, fr1\n\t"
	106	"fmov.s @%0+, fr2\n\t"
	107	"fmov.s @%0+, fr3\n\t"
	108	"fmov.s @%0+, fr4\n\t"
	109	"fmov.s @%0+, fr5\n\t"
	110	"fmov.s @%0+, fr6\n\t"
	111	"fmov.s @%0+, fr7\n\t"
	112	"fmov.s @%0+, fr8\n\t"
	113	"fmov.s @%0+, fr9\n\t"
	114	"fmov.s @%0+, fr10\n\t"
	115	"fmov.s @%0+, fr11\n\t"
	116	"fmov.s @%0+, fr12\n\t"
	117	"fmov.s @%0+, fr13\n\t"
	118	"fmov.s @%0+, fr14\n\t"
	119	"fmov.s @%0+, fr15\n\t"
	120	"frchg\n\t"
	121	"fmov.s @%0+, fr0\n\t"
	122	"fmov.s @%0+, fr1\n\t"
	123	"fmov.s @%0+, fr2\n\t"
	124	"fmov.s @%0+, fr3\n\t"
	125	"fmov.s @%0+, fr4\n\t"
	126	"fmov.s @%0+, fr5\n\t"
	127	"fmov.s @%0+, fr6\n\t"
	128	"fmov.s @%0+, fr7\n\t"
	129	"fmov.s @%0+, fr8\n\t"
	130	"fmov.s @%0+, fr9\n\t"
	131	"fmov.s @%0+, fr10\n\t"
	132	"fmov.s @%0+, fr11\n\t"
	133	"fmov.s @%0+, fr12\n\t"
	134	"fmov.s @%0+, fr13\n\t"
	135	"fmov.s @%0+, fr14\n\t"
	136	"fmov.s @%0+, fr15\n\t"
	137	"frchg\n\t"
	138	"lds.l @%0+, fpscr\n\t"
	139	"lds.l @%0+, fpul\n\t"
	140	:"=r" (dummy)
	141	:"0"(&tsk->thread.fpu), "r"(FPSCR_RCHG)
	142	:"memory");
1da177e4 LT	143	disable_fpu();
	144	}
	145
	146	/*
	147	* Load the FPU with signalling NANS. This bit pattern we're using
	148	* has the property that no matter wether considered as single or as
74d99a5e	149	* double precision represents signaling NANS.
1da177e4 LT	150	*/
1da177e4 LT	151
c8c0a1ab	152	static void fpu_init(void)
1da177e4 LT	153	{
1da177e4 LT	154	enable_fpu();
c8c0a1ab SM	155	asm volatile ( "lds %0, fpul\n\t"
	156	"lds %1, fpscr\n\t"
	157	"fsts fpul, fr0\n\t"
	158	"fsts fpul, fr1\n\t"
	159	"fsts fpul, fr2\n\t"
	160	"fsts fpul, fr3\n\t"
	161	"fsts fpul, fr4\n\t"
	162	"fsts fpul, fr5\n\t"
	163	"fsts fpul, fr6\n\t"
	164	"fsts fpul, fr7\n\t"
	165	"fsts fpul, fr8\n\t"
	166	"fsts fpul, fr9\n\t"
	167	"fsts fpul, fr10\n\t"
	168	"fsts fpul, fr11\n\t"
	169	"fsts fpul, fr12\n\t"
	170	"fsts fpul, fr13\n\t"
	171	"fsts fpul, fr14\n\t"
	172	"fsts fpul, fr15\n\t"
	173	"frchg\n\t"
	174	"fsts fpul, fr0\n\t"
	175	"fsts fpul, fr1\n\t"
	176	"fsts fpul, fr2\n\t"
	177	"fsts fpul, fr3\n\t"
	178	"fsts fpul, fr4\n\t"
	179	"fsts fpul, fr5\n\t"
	180	"fsts fpul, fr6\n\t"
	181	"fsts fpul, fr7\n\t"
	182	"fsts fpul, fr8\n\t"
	183	"fsts fpul, fr9\n\t"
	184	"fsts fpul, fr10\n\t"
	185	"fsts fpul, fr11\n\t"
	186	"fsts fpul, fr12\n\t"
	187	"fsts fpul, fr13\n\t"
	188	"fsts fpul, fr14\n\t"
	189	"fsts fpul, fr15\n\t"
	190	"frchg\n\t"
	191	"lds %2, fpscr\n\t"
	192	: /* no output */
	193	:"r" (0), "r"(FPSCR_RCHG), "r"(FPSCR_INIT));
74d99a5e	194	disable_fpu();
1da177e4 LT	195	}
	196
	197	/**
c8c0a1ab SM	198	* denormal_to_double - Given denormalized float number,
c8c0a1ab SM	199	* store double float
1da177e4	200	*
c8c0a1ab SM	201	* @fpu: Pointer to sh_fpu_hard structure
c8c0a1ab SM	202	* @n: Index to FP register
1da177e4	203	*/
c8c0a1ab	204	static void denormal_to_double(struct sh_fpu_hard_struct *fpu, int n)
1da177e4 LT	205	{
	206	unsigned long du, dl;
	207	unsigned long x = fpu->fpul;
	208	int exp = 1023 - 126;
	209
	210	if (x != 0 && (x & 0x7f800000) == 0) {
	211	du = (x & 0x80000000);
	212	while ((x & 0x00800000) == 0) {
	213	x <<= 1;
	214	exp--;
	215	}
	216	x &= 0x007fffff;
	217	du \|= (exp << 20) \| (x >> 3);
	218	dl = x << 29;
	219
	220	fpu->fp_regs[n] = du;
c8c0a1ab	221	fpu->fp_regs[n + 1] = dl;
1da177e4 LT	222	}
	223	}
	224
	225	/**
	226	* ieee_fpe_handler - Handle denormalized number exception
	227	*
	228	* @regs: Pointer to register structure
	229	*
	230	* Returns 1 when it's handled (should not cause exception).
	231	*/
c8c0a1ab	232	static int ieee_fpe_handler(struct pt_regs *regs)
1da177e4	233	{
c8c0a1ab	234	unsigned short insn = (unsigned short )regs->pc;
1da177e4 LT	235	unsigned short finsn;
	236	unsigned long nextpc;
	237	int nib[4] = {
	238	(insn >> 12) & 0xf,
	239	(insn >> 8) & 0xf,
	240	(insn >> 4) & 0xf,
c8c0a1ab SM	241	insn & 0xf
	242	};
	243
	244	if (nib[0] == 0xb \|\| (nib[0] == 0x4 && nib[2] == 0x0 && nib[3] == 0xb))
	245	regs->pr = regs->pc + 4; /* bsr & jsr */
	246
	247	if (nib[0] == 0xa \|\| nib[0] == 0xb) {
	248	/* bra & bsr */
	249	nextpc = regs->pc + 4 + ((short)((insn & 0xfff) << 4) >> 3);
	250	finsn = (unsigned short )(regs->pc + 2);
	251	} else if (nib[0] == 0x8 && nib[1] == 0xd) {
	252	/* bt/s */
1da177e4	253	if (regs->sr & 1)
c8c0a1ab	254	nextpc = regs->pc + 4 + ((char)(insn & 0xff) << 1);
1da177e4 LT	255	else
1da177e4 LT	256	nextpc = regs->pc + 4;
c8c0a1ab SM	257	finsn = (unsigned short )(regs->pc + 2);
	258	} else if (nib[0] == 0x8 && nib[1] == 0xf) {
	259	/* bf/s */
1da177e4 LT	260	if (regs->sr & 1)
	261	nextpc = regs->pc + 4;
	262	else
c8c0a1ab SM	263	nextpc = regs->pc + 4 + ((char)(insn & 0xff) << 1);
c8c0a1ab SM	264	finsn = (unsigned short )(regs->pc + 2);
1da177e4	265	} else if (nib[0] == 0x4 && nib[3] == 0xb &&
c8c0a1ab SM	266	(nib[2] == 0x0 \|\| nib[2] == 0x2)) {
c8c0a1ab SM	267	/* jmp & jsr */
1da177e4	268	nextpc = regs->regs[nib[1]];
c8c0a1ab	269	finsn = (unsigned short )(regs->pc + 2);
1da177e4	270	} else if (nib[0] == 0x0 && nib[3] == 0x3 &&
c8c0a1ab SM	271	(nib[2] == 0x0 \|\| nib[2] == 0x2)) {
c8c0a1ab SM	272	/* braf & bsrf */
1da177e4	273	nextpc = regs->pc + 4 + regs->regs[nib[1]];
c8c0a1ab SM	274	finsn = (unsigned short )(regs->pc + 2);
	275	} else if (insn == 0x000b) {
	276	/* rts */
1da177e4	277	nextpc = regs->pr;
c8c0a1ab	278	finsn = (unsigned short )(regs->pc + 2);
1da177e4	279	} else {
53f983a9	280	nextpc = regs->pc + instruction_size(insn);
1da177e4 LT	281	finsn = insn;
	282	}
	283
c8c0a1ab SM	284	if ((finsn & 0xf1ff) == 0xf0ad) {
c8c0a1ab SM	285	/* fcnvsd */
1da177e4 LT	286	struct task_struct *tsk = current;
	287
	288	save_fpu(tsk, regs);
c8c0a1ab	289	if ((tsk->thread.fpu.hard.fpscr & FPSCR_CAUSE_ERROR))
1da177e4	290	/* FPU error */
c8c0a1ab SM	291	denormal_to_double(&tsk->thread.fpu.hard,
	292	(finsn >> 8) & 0xf);
	293	else
	294	return 0;
	295
	296	regs->pc = nextpc;
	297	return 1;
	298	} else if ((finsn & 0xf00f) == 0xf002) {
	299	/* fmul */
	300	struct task_struct *tsk = current;
	301	int fpscr;
	302	int n, m, prec;
	303	unsigned int hx, hy;
	304
	305	n = (finsn >> 8) & 0xf;
	306	m = (finsn >> 4) & 0xf;
	307	hx = tsk->thread.fpu.hard.fp_regs[n];
	308	hy = tsk->thread.fpu.hard.fp_regs[m];
	309	fpscr = tsk->thread.fpu.hard.fpscr;
	310	prec = fpscr & FPSCR_DBL_PRECISION;
	311
	312	if ((fpscr & FPSCR_CAUSE_ERROR)
	313	&& (prec && ((hx & 0x7fffffff) < 0x00100000
	314	\|\| (hy & 0x7fffffff) < 0x00100000))) {
	315	long long llx, lly;
	316
	317	/* FPU error because of denormal (doubles) */
	318	llx = ((long long)hx << 32)
	319	\| tsk->thread.fpu.hard.fp_regs[n + 1];
	320	lly = ((long long)hy << 32)
	321	\| tsk->thread.fpu.hard.fp_regs[m + 1];
	322	llx = float64_mul(llx, lly);
	323	tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
	324	tsk->thread.fpu.hard.fp_regs[n + 1] = llx & 0xffffffff;
	325	} else if ((fpscr & FPSCR_CAUSE_ERROR)
	326	&& (!prec && ((hx & 0x7fffffff) < 0x00800000
	327	\|\| (hy & 0x7fffffff) < 0x00800000))) {
	328	/* FPU error because of denormal (floats) */
	329	hx = float32_mul(hx, hy);
	330	tsk->thread.fpu.hard.fp_regs[n] = hx;
	331	} else
	332	return 0;
	333
	334	regs->pc = nextpc;
	335	return 1;
	336	} else if ((finsn & 0xf00e) == 0xf000) {
	337	/* fadd, fsub */
	338	struct task_struct *tsk = current;
	339	int fpscr;
	340	int n, m, prec;
	341	unsigned int hx, hy;
	342
	343	n = (finsn >> 8) & 0xf;
	344	m = (finsn >> 4) & 0xf;
	345	hx = tsk->thread.fpu.hard.fp_regs[n];
	346	hy = tsk->thread.fpu.hard.fp_regs[m];
	347	fpscr = tsk->thread.fpu.hard.fpscr;
	348	prec = fpscr & FPSCR_DBL_PRECISION;
	349
	350	if ((fpscr & FPSCR_CAUSE_ERROR)
	351	&& (prec && ((hx & 0x7fffffff) < 0x00100000
	352	\|\| (hy & 0x7fffffff) < 0x00100000))) {
	353	long long llx, lly;
	354
355	/* FPU error because of denormal (doubles) */
356	llx = ((long long)hx << 32)
357	\| tsk->thread.fpu.hard.fp_regs[n + 1];
358	lly = ((long long)hy << 32)
359	\| tsk->thread.fpu.hard.fp_regs[m + 1];
360	if ((finsn & 0xf00f) == 0xf000)
361	llx = float64_add(llx, lly);
362	else
363	llx = float64_sub(llx, lly);
364	tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
365	tsk->thread.fpu.hard.fp_regs[n + 1] = llx & 0xffffffff;
366	} else if ((fpscr & FPSCR_CAUSE_ERROR)
367	&& (!prec && ((hx & 0x7fffffff) < 0x00800000
368	\|\| (hy & 0x7fffffff) < 0x00800000))) {
369	/* FPU error because of denormal (floats) */
370	if ((finsn & 0xf00f) == 0xf000)
371	hx = float32_add(hx, hy);
372	else
373	hx = float32_sub(hx, hy);
374	tsk->thread.fpu.hard.fp_regs[n] = hx;
375	} else
376	return 0;
377
378	regs->pc = nextpc;
379	return 1;
380	} else if ((finsn & 0xf003) == 0xf003) {
381	/* fdiv */
382	struct task_struct *tsk = current;
383	int fpscr;
384	int n, m, prec;
385	unsigned int hx, hy;
386
387	n = (finsn >> 8) & 0xf;
388	m = (finsn >> 4) & 0xf;
389	hx = tsk->thread.fpu.hard.fp_regs[n];
390	hy = tsk->thread.fpu.hard.fp_regs[m];
391	fpscr = tsk->thread.fpu.hard.fpscr;
392	prec = fpscr & FPSCR_DBL_PRECISION;
393
394	if ((fpscr & FPSCR_CAUSE_ERROR)
395	&& (prec && ((hx & 0x7fffffff) < 0x00100000
396	\|\| (hy & 0x7fffffff) < 0x00100000))) {
397	long long llx, lly;
398
399	/* FPU error because of denormal (doubles) */
400	llx = ((long long)hx << 32)
401	\| tsk->thread.fpu.hard.fp_regs[n + 1];
402	lly = ((long long)hy << 32)
403	\| tsk->thread.fpu.hard.fp_regs[m + 1];
404
405	llx = float64_div(llx, lly);
406
407	tsk->thread.fpu.hard.fp_regs[n] = llx >> 32;
408	tsk->thread.fpu.hard.fp_regs[n + 1] = llx & 0xffffffff;
409	} else if ((fpscr & FPSCR_CAUSE_ERROR)
410	&& (!prec && ((hx & 0x7fffffff) < 0x00800000
411	\|\| (hy & 0x7fffffff) < 0x00800000))) {
412	/* FPU error because of denormal (floats) */
413	hx = float32_div(hx, hy);
414	tsk->thread.fpu.hard.fp_regs[n] = hx;
b5a1bcbe	415	} else
c8c0a1ab	416	return 0;
1da177e4	417
b6ad1e8c CS	418	regs->pc = nextpc;
	419	return 1;
	420	} else if ((finsn & 0xf0bd) == 0xf0bd) {
	421	/* fcnvds - double to single precision convert */
	422	struct task_struct *tsk = current;
	423	int m;
	424	unsigned int hx;
	425
0f6dee23	426	m = (finsn >> 8) & 0x7;
b6ad1e8c CS	427	hx = tsk->thread.fpu.hard.fp_regs[m];
	428
	429	if ((tsk->thread.fpu.hard.fpscr & FPSCR_CAUSE_ERROR)
	430	&& ((hx & 0x7fffffff) < 0x00100000)) {
	431	/* subnormal double to float conversion */
	432	long long llx;
	433
	434	llx = ((long long)tsk->thread.fpu.hard.fp_regs[m] << 32)
	435	\| tsk->thread.fpu.hard.fp_regs[m + 1];
	436
	437	tsk->thread.fpu.hard.fpul = float64_to_float32(llx);
	438	} else
	439	return 0;
	440
1da177e4 LT	441	regs->pc = nextpc;
	442	return 1;
	443	}
	444
	445	return 0;
	446	}
	447
c8c0a1ab SM	448	void float_raise(unsigned int flags)
	449	{
	450	fpu_exception_flags \|= flags;
	451	}
	452
	453	int float_rounding_mode(void)
	454	{
	455	struct task_struct *tsk = current;
	456	int roundingMode = FPSCR_ROUNDING_MODE(tsk->thread.fpu.hard.fpscr);
	457	return roundingMode;
	458	}
	459
74d99a5e	460	BUILD_TRAP_HANDLER(fpu_error)
1da177e4 LT	461	{
1da177e4 LT	462	struct task_struct *tsk = current;
74d99a5e	463	TRAP_HANDLER_DECL;
1da177e4	464
f0bc814c	465	save_fpu(tsk, regs);
c8c0a1ab SM	466	fpu_exception_flags = 0;
	467	if (ieee_fpe_handler(regs)) {
	468	tsk->thread.fpu.hard.fpscr &=
	469	~(FPSCR_CAUSE_MASK \| FPSCR_FLAG_MASK);
	470	tsk->thread.fpu.hard.fpscr \|= fpu_exception_flags;
	471	/* Set the FPSCR flag as well as cause bits - simply
	472	* replicate the cause */
	473	tsk->thread.fpu.hard.fpscr \|= (fpu_exception_flags >> 10);
	474	grab_fpu(regs);
	475	restore_fpu(tsk);
	476	set_tsk_thread_flag(tsk, TIF_USEDFPU);
	477	if ((((tsk->thread.fpu.hard.fpscr & FPSCR_ENABLE_MASK) >> 7) &
	478	(fpu_exception_flags >> 2)) == 0) {
	479	return;
	480	}
	481	}
	482
1da177e4 LT	483	force_sig(SIGFPE, tsk);
	484	}
	485
a0458b07	486	void fpu_state_restore(struct pt_regs *regs)
1da177e4 LT	487	{
	488	struct task_struct *tsk = current;
	489
f0bc814c	490	grab_fpu(regs);
a0458b07	491	if (unlikely(!user_mode(regs))) {
1da177e4	492	printk(KERN_ERR "BUG: FPU is used in kernel mode.\n");
a0458b07	493	BUG();
1da177e4 LT	494	return;
	495	}
	496
a0458b07	497	if (likely(used_math())) {
1da177e4 LT	498	/* Using the FPU again. */
1da177e4 LT	499	restore_fpu(tsk);
c8c0a1ab	500	} else {
1da177e4 LT	501	/* First time FPU user. */
	502	fpu_init();
	503	set_used_math();
	504	}
	505	set_tsk_thread_flag(tsk, TIF_USEDFPU);
a0458b07 GC	506	tsk->fpu_counter++;
	507	}
	508
	509	BUILD_TRAP_HANDLER(fpu_state_restore)
	510	{
	511	TRAP_HANDLER_DECL;
	512
	513	fpu_state_restore(regs);
1da177e4	514	}