4 files changed, 607 insertions, 0 deletions
diff --git a/arch/alpha/math-emu/Makefile b/arch/alpha/math-emu/Makefile
new file mode 100644
index 000000000000..359ef087e69e
--- /dev/null
+++ b/arch/alpha/math-emu/Makefile
@@ -0,0 +1,9 @@
+#
+# Makefile for the FPU instruction emulation.
+#
+EXTRA_CFLAGS := -w
+obj-$(CONFIG_MATHEMU) += math-emu.o
+math-emu-objs := math.o qrnnd.o
diff --git a/arch/alpha/math-emu/math.c b/arch/alpha/math-emu/math.c
new file mode 100644
index 000000000000..ae79dd970b02
--- /dev/null
+++ b/arch/alpha/math-emu/math.c
@@ -0,0 +1,400 @@
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <asm/uaccess.h>
+#include "sfp-util.h"
+#include <math-emu/soft-fp.h>
+#include <math-emu/single.h>
+#include <math-emu/double.h>
+#define OPC_PAL         0x00
+#define OPC_INTA        0x10
+#define OPC_INTL        0x11
+#define OPC_INTS        0x12
+#define OPC_INTM        0x13
+#define OPC_FLTC        0x14
+#define OPC_FLTV        0x15
+#define OPC_FLTI        0x16
+#define OPC_FLTL        0x17
+#define OPC_MISC        0x18
+#define OPC_JSR         0x1a
+#define FOP_SRC_S       0
+#define FOP_SRC_T       2
+#define FOP_SRC_Q       3
+#define FOP_FNC_ADDx    0
+#define FOP_FNC_CVTQL   0
+#define FOP_FNC_SUBx    1
+#define FOP_FNC_MULx    2
+#define FOP_FNC_DIVx    3
+#define FOP_FNC_CMPxUN  4
+#define FOP_FNC_CMPxEQ  5
+#define FOP_FNC_CMPxLT  6
+#define FOP_FNC_CMPxLE  7
+#define FOP_FNC_SQRTx   11
+#define FOP_FNC_CVTxS   12
+#define FOP_FNC_CVTxT   14
+#define FOP_FNC_CVTxQ   15
+#define MISC_TRAPB      0x0000
+#define MISC_EXCB       0x0400
+extern unsigned long alpha_read_fp_reg (unsigned long reg);
+extern void alpha_write_fp_reg (unsigned long reg, unsigned long val);
+extern unsigned long alpha_read_fp_reg_s (unsigned long reg);
+extern void alpha_write_fp_reg_s (unsigned long reg, unsigned long val);
+#ifdef MODULE
+MODULE_DESCRIPTION("FP Software completion module");
+extern long (*alpha_fp_emul_imprecise)(struct pt_regs *, unsigned long);
+extern long (*alpha_fp_emul) (unsigned long pc);
+static long (*save_emul_imprecise)(struct pt_regs *, unsigned long);
+static long (*save_emul) (unsigned long pc);
+long do_alpha_fp_emul_imprecise(struct pt_regs *, unsigned long);
+long do_alpha_fp_emul(unsigned long);
+int init_module(void)
+{
+        save_emul_imprecise = alpha_fp_emul_imprecise;
+        save_emul = alpha_fp_emul;
+        alpha_fp_emul_imprecise = do_alpha_fp_emul_imprecise;
+        alpha_fp_emul = do_alpha_fp_emul;
+        return 0;
+}
+void cleanup_module(void)
+{
+        alpha_fp_emul_imprecise = save_emul_imprecise;
+        alpha_fp_emul = save_emul;
+}
+#undef  alpha_fp_emul_imprecise
+#define alpha_fp_emul_imprecise         do_alpha_fp_emul_imprecise
+#undef  alpha_fp_emul
+#define alpha_fp_emul                   do_alpha_fp_emul
+#endif /* MODULE */
+/*
+ * Emulate the floating point instruction at address PC.  Returns -1 if the
+ * instruction to be emulated is illegal (such as with the opDEC trap), else
+ * the SI_CODE for a SIGFPE signal, else 0 if everything's ok.
+ *
+ * Notice that the kernel does not and cannot use FP regs.  This is good
+ * because it means that instead of saving/restoring all fp regs, we simply
+ * stick the result of the operation into the appropriate register.
+ */
+long
+alpha_fp_emul (unsigned long pc)
+{
+        FP_DECL_EX;
+        FP_DECL_S(SA); FP_DECL_S(SB); FP_DECL_S(SR);
+        FP_DECL_D(DA); FP_DECL_D(DB); FP_DECL_D(DR);
+        unsigned long fa, fb, fc, func, mode, src;
+        unsigned long res, va, vb, vc, swcr, fpcr;
+        __u32 insn;
+        long si_code;
+        get_user(insn, (__u32 __user *)pc);
+        fc     = (insn >>  0) & 0x1f;   /* destination register */
+        fb     = (insn >> 16) & 0x1f;
+        fa     = (insn >> 21) & 0x1f;
+        func   = (insn >>  5) & 0xf;
+        src    = (insn >>  9) & 0x3;
+        mode   = (insn >> 11) & 0x3;
+        
+        fpcr = rdfpcr();
+        swcr = swcr_update_status(current_thread_info()->ieee_state, fpcr);
+        if (mode == 3) {
+                /* Dynamic -- get rounding mode from fpcr.  */
+                mode = (fpcr >> FPCR_DYN_SHIFT) & 3;
+        }
+        switch (src) {
+        case FOP_SRC_S:
+                va = alpha_read_fp_reg_s(fa);
+                vb = alpha_read_fp_reg_s(fb);
+                
+                FP_UNPACK_SP(SA, &va);
+                FP_UNPACK_SP(SB, &vb);
+                switch (func) {
+                case FOP_FNC_SUBx:
+                        FP_SUB_S(SR, SA, SB);
+                        goto pack_s;
+                case FOP_FNC_ADDx:
+                        FP_ADD_S(SR, SA, SB);
+                        goto pack_s;
+                case FOP_FNC_MULx:
+                        FP_MUL_S(SR, SA, SB);
+                        goto pack_s;
+                case FOP_FNC_DIVx:
+                        FP_DIV_S(SR, SA, SB);
+                        goto pack_s;
+                case FOP_FNC_SQRTx:
+                        FP_SQRT_S(SR, SB);
+                        goto pack_s;
+                }
+                goto bad_insn;
+        case FOP_SRC_T:
+                va = alpha_read_fp_reg(fa);
+                vb = alpha_read_fp_reg(fb);
+                if ((func & ~3) == FOP_FNC_CMPxUN) {
+                        FP_UNPACK_RAW_DP(DA, &va);
+                        FP_UNPACK_RAW_DP(DB, &vb);
+                        if (!DA_e && !_FP_FRAC_ZEROP_1(DA)) {
+                                FP_SET_EXCEPTION(FP_EX_DENORM);
+                                if (FP_DENORM_ZERO)
+                                        _FP_FRAC_SET_1(DA, _FP_ZEROFRAC_1);
+                        }
+                        if (!DB_e && !_FP_FRAC_ZEROP_1(DB)) {
+                                FP_SET_EXCEPTION(FP_EX_DENORM);
+                                if (FP_DENORM_ZERO)
+                                        _FP_FRAC_SET_1(DB, _FP_ZEROFRAC_1);
+                        }
+                        FP_CMP_D(res, DA, DB, 3);
+                        vc = 0x4000000000000000UL;
+                        /* CMPTEQ, CMPTUN don't trap on QNaN,
+                           while CMPTLT and CMPTLE do */
+                        if (res == 3
+                            && ((func & 3) >= 2
+                                || FP_ISSIGNAN_D(DA)
+                                || FP_ISSIGNAN_D(DB))) {
+                                FP_SET_EXCEPTION(FP_EX_INVALID);
+                        }
+                        switch (func) {
+                        case FOP_FNC_CMPxUN: if (res != 3) vc = 0; break;
+                        case FOP_FNC_CMPxEQ: if (res) vc = 0; break;
+                        case FOP_FNC_CMPxLT: if (res != -1) vc = 0; break;
+                        case FOP_FNC_CMPxLE: if ((long)res > 0) vc = 0; break;
+                        }
+                        goto done_d;
+                }
+                FP_UNPACK_DP(DA, &va);
+                FP_UNPACK_DP(DB, &vb);
+                switch (func) {
+                case FOP_FNC_SUBx:
+                        FP_SUB_D(DR, DA, DB);
+                        goto pack_d;
+                case FOP_FNC_ADDx:
+                        FP_ADD_D(DR, DA, DB);
+                        goto pack_d;
+                case FOP_FNC_MULx:
+                        FP_MUL_D(DR, DA, DB);
+                        goto pack_d;
+                case FOP_FNC_DIVx:
+                        FP_DIV_D(DR, DA, DB);
+                        goto pack_d;
+                case FOP_FNC_SQRTx:
+                        FP_SQRT_D(DR, DB);
+                        goto pack_d;
+                case FOP_FNC_CVTxS:
+                        /* It is irritating that DEC encoded CVTST with
+                           SRC == T_floating.  It is also interesting that
+                           the bit used to tell the two apart is /U... */
+                        if (insn & 0x2000) {
+                                FP_CONV(S,D,1,1,SR,DB);
+                                goto pack_s;
+                        } else {
+                                vb = alpha_read_fp_reg_s(fb);
+                                FP_UNPACK_SP(SB, &vb);
+                                DR_c = DB_c;
+                                DR_s = DB_s;
+                                DR_e = DB_e;
+                                DR_f = SB_f << (52 - 23);
+                                goto pack_d;
+                        }
+                case FOP_FNC_CVTxQ:
+                        if (DB_c == FP_CLS_NAN
+                            && (_FP_FRAC_HIGH_RAW_D(DB) & _FP_QNANBIT_D)) {
+                          /* AAHB Table B-2 says QNaN should not trigger INV */
+                                vc = 0;
+                        } else
+                                FP_TO_INT_ROUND_D(vc, DB, 64, 2);
+                        goto done_d;
+                }
+                goto bad_insn;
+        case FOP_SRC_Q:
+                vb = alpha_read_fp_reg(fb);
+                switch (func) {
+                case FOP_FNC_CVTQL:
+                        /* Notice: We can get here only due to an integer
+                           overflow.  Such overflows are reported as invalid
+                           ops.  We return the result the hw would have
+                           computed.  */
+                        vc = ((vb & 0xc0000000) << 32 | /* sign and msb */
+                              (vb & 0x3fffffff) << 29); /* rest of the int */
+                        FP_SET_EXCEPTION (FP_EX_INVALID);
+                        goto done_d;
+                case FOP_FNC_CVTxS:
+                        FP_FROM_INT_S(SR, ((long)vb), 64, long);
+                        goto pack_s;
+                case FOP_FNC_CVTxT:
+                        FP_FROM_INT_D(DR, ((long)vb), 64, long);
+                        goto pack_d;
+                }
+                goto bad_insn;
+        }
+        goto bad_insn;
+pack_s:
+        FP_PACK_SP(&vc, SR);
+        if ((_fex & FP_EX_UNDERFLOW) && (swcr & IEEE_MAP_UMZ))
+                vc = 0;
+        alpha_write_fp_reg_s(fc, vc);
+        goto done;
+pack_d:
+        FP_PACK_DP(&vc, DR);
+        if ((_fex & FP_EX_UNDERFLOW) && (swcr & IEEE_MAP_UMZ))
+                vc = 0;
+done_d:
+        alpha_write_fp_reg(fc, vc);
+        goto done;
+        /*
+         * Take the appropriate action for each possible
+         * floating-point result:
+         *
+         *      - Set the appropriate bits in the FPCR
+         *      - If the specified exception is enabled in the FPCR,
+         *        return.  The caller (entArith) will dispatch
+         *        the appropriate signal to the translated program.
+         *
+         * In addition, properly track the exception state in software
+         * as described in the Alpha Architecture Handbook section 4.7.7.3.
+         */
+done:
+        if (_fex) {
+                /* Record exceptions in software control word.  */
+                swcr |= (_fex << IEEE_STATUS_TO_EXCSUM_SHIFT);
+                current_thread_info()->ieee_state
+                  |= (_fex << IEEE_STATUS_TO_EXCSUM_SHIFT);
+                /* Update hardware control register.  */
+                fpcr &= (~FPCR_MASK | FPCR_DYN_MASK);
+                fpcr |= ieee_swcr_to_fpcr(swcr);
+                wrfpcr(fpcr);
+                /* Do we generate a signal?  */
+                _fex = _fex & swcr & IEEE_TRAP_ENABLE_MASK;
+                si_code = 0;
+                if (_fex) {
+                        if (_fex & IEEE_TRAP_ENABLE_DNO) si_code = FPE_FLTUND;
+                        if (_fex & IEEE_TRAP_ENABLE_INE) si_code = FPE_FLTRES;
+                        if (_fex & IEEE_TRAP_ENABLE_UNF) si_code = FPE_FLTUND;
+                        if (_fex & IEEE_TRAP_ENABLE_OVF) si_code = FPE_FLTOVF;
+                        if (_fex & IEEE_TRAP_ENABLE_DZE) si_code = FPE_FLTDIV;
+                        if (_fex & IEEE_TRAP_ENABLE_INV) si_code = FPE_FLTINV;
+                }
+                return si_code;
+        }
+        /* We used to write the destination register here, but DEC FORTRAN
+           requires that the result *always* be written... so we do the write
+           immediately after the operations above.  */
+        return 0;
+bad_insn:
+        printk(KERN_ERR "alpha_fp_emul: Invalid FP insn %#x at %#lx\n",
+               insn, pc);
+        return -1;
+}
+long
+alpha_fp_emul_imprecise (struct pt_regs *regs, unsigned long write_mask)
+{
+        unsigned long trigger_pc = regs->pc - 4;
+        unsigned long insn, opcode, rc, si_code = 0;
+        /*
+         * Turn off the bits corresponding to registers that are the
+         * target of instructions that set bits in the exception
+         * summary register.  We have some slack doing this because a
+         * register that is the target of a trapping instruction can
+         * be written at most once in the trap shadow.
+         *
+         * Branches, jumps, TRAPBs, EXCBs and calls to PALcode all
+         * bound the trap shadow, so we need not look any further than
+         * up to the first occurrence of such an instruction.
+         */
+        while (write_mask) {
+                get_user(insn, (__u32 __user *)(trigger_pc));
+                opcode = insn >> 26;
+                rc = insn & 0x1f;
+                switch (opcode) {
+                      case OPC_PAL:
+                      case OPC_JSR:
+                      case 0x30 ... 0x3f:       /* branches */
+                        goto egress;
+                      case OPC_MISC:
+                        switch (insn & 0xffff) {
+                              case MISC_TRAPB:
+                              case MISC_EXCB:
+                                goto egress;
+                              default:
+                                break;
+                        }
+                        break;
+                      case OPC_INTA:
+                      case OPC_INTL:
+                      case OPC_INTS:
+                      case OPC_INTM:
+                        write_mask &= ~(1UL << rc);
+                        break;
+                      case OPC_FLTC:
+                      case OPC_FLTV:
+                      case OPC_FLTI:
+                      case OPC_FLTL:
+                        write_mask &= ~(1UL << (rc + 32));
+                        break;
+                }
+                if (!write_mask) {
+                        /* Re-execute insns in the trap-shadow.  */
+                        regs->pc = trigger_pc + 4;
+                        si_code = alpha_fp_emul(trigger_pc);
+                        goto egress;
+                }
+                trigger_pc -= 4;
+        }
+egress:
+        return si_code;
+}
diff --git a/arch/alpha/math-emu/qrnnd.S b/arch/alpha/math-emu/qrnnd.S
new file mode 100644
index 000000000000..d6373ec1bff9
--- /dev/null
+++ b/arch/alpha/math-emu/qrnnd.S
@@ -0,0 +1,163 @@
+ # Alpha 21064 __udiv_qrnnd
+ # Copyright (C) 1992, 1994, 1995, 2000 Free Software Foundation, Inc.
+ # This file is part of GCC.
+ # The GNU MP Library is free software; you can redistribute it and/or modify
+ # it under the terms of the GNU General Public License as published by
+ # the Free Software Foundation; either version 2 of the License, or (at your
+ # option) any later version.
+ # In addition to the permissions in the GNU General Public License, the
+ # Free Software Foundation gives you unlimited permission to link the
+ # compiled version of this file with other programs, and to distribute
+ # those programs without any restriction coming from the use of this
+ # file.  (The General Public License restrictions do apply in other
+ # respects; for example, they cover modification of the file, and
+ # distribution when not linked into another program.)
+ # This file is distributed in the hope that it will be useful, but
+ # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ # or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+ # License for more details.
+ # You should have received a copy of the GNU General Public License
+ # along with GCC; see the file COPYING.  If not, write to the 
+ # Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ # MA 02111-1307, USA.
+        .set noreorder
+        .set noat
+        .text
+        .globl __udiv_qrnnd
+        .ent __udiv_qrnnd
+__udiv_qrnnd:
+        .frame $30,0,$26,0
+        .prologue 0
+#define cnt     $2
+#define tmp     $3
+#define rem_ptr $16
+#define n1      $17
+#define n0      $18
+#define d       $19
+#define qb      $20
+#define AT      $at
+        ldiq    cnt,16
+        blt     d,$largedivisor
+$loop1: cmplt   n0,0,tmp
+        addq    n1,n1,n1
+        bis     n1,tmp,n1
+        addq    n0,n0,n0
+        cmpule  d,n1,qb
+        subq    n1,d,tmp
+        cmovne  qb,tmp,n1
+        bis     n0,qb,n0
+        cmplt   n0,0,tmp
+        addq    n1,n1,n1
+        bis     n1,tmp,n1
+        addq    n0,n0,n0
+        cmpule  d,n1,qb
+        subq    n1,d,tmp
+        cmovne  qb,tmp,n1
+        bis     n0,qb,n0
+        cmplt   n0,0,tmp
+        addq    n1,n1,n1
+        bis     n1,tmp,n1
+        addq    n0,n0,n0
+        cmpule  d,n1,qb
+        subq    n1,d,tmp
+        cmovne  qb,tmp,n1
+        bis     n0,qb,n0
+        cmplt   n0,0,tmp
+        addq    n1,n1,n1
+        bis     n1,tmp,n1
+        addq    n0,n0,n0
+        cmpule  d,n1,qb
+        subq    n1,d,tmp
+        cmovne  qb,tmp,n1
+        bis     n0,qb,n0
+        subq    cnt,1,cnt
+        bgt     cnt,$loop1
+        stq     n1,0(rem_ptr)
+        bis     $31,n0,$0
+        ret     $31,($26),1
+$largedivisor:
+        and     n0,1,$4
+        srl     n0,1,n0
+        sll     n1,63,tmp
+        or      tmp,n0,n0
+        srl     n1,1,n1
+        and     d,1,$6
+        srl     d,1,$5
+        addq    $5,$6,$5
+$loop2: cmplt   n0,0,tmp
+        addq    n1,n1,n1
+        bis     n1,tmp,n1
+        addq    n0,n0,n0
+        cmpule  $5,n1,qb
+        subq    n1,$5,tmp
+        cmovne  qb,tmp,n1
+        bis     n0,qb,n0
+        cmplt   n0,0,tmp
+        addq    n1,n1,n1
+        bis     n1,tmp,n1
+        addq    n0,n0,n0
+        cmpule  $5,n1,qb
+        subq    n1,$5,tmp
+        cmovne  qb,tmp,n1
+        bis     n0,qb,n0
+        cmplt   n0,0,tmp
+        addq    n1,n1,n1
+        bis     n1,tmp,n1
+        addq    n0,n0,n0
+        cmpule  $5,n1,qb
+        subq    n1,$5,tmp
+        cmovne  qb,tmp,n1
+        bis     n0,qb,n0
+        cmplt   n0,0,tmp
+        addq    n1,n1,n1
+        bis     n1,tmp,n1
+        addq    n0,n0,n0
+        cmpule  $5,n1,qb
+        subq    n1,$5,tmp
+        cmovne  qb,tmp,n1
+        bis     n0,qb,n0
+        subq    cnt,1,cnt
+        bgt     cnt,$loop2
+        addq    n1,n1,n1
+        addq    $4,n1,n1
+        bne     $6,$Odd
+        stq     n1,0(rem_ptr)
+        bis     $31,n0,$0
+        ret     $31,($26),1
+$Odd:
+        /* q' in n0. r' in n1 */
+        addq    n1,n0,n1
+        cmpult  n1,n0,tmp       # tmp := carry from addq
+        subq    n1,d,AT
+        addq    n0,tmp,n0
+        cmovne  tmp,AT,n1
+        cmpult  n1,d,tmp
+        addq    n0,1,AT
+        cmoveq  tmp,AT,n0
+        subq    n1,d,AT
+        cmoveq  tmp,AT,n1
+        stq     n1,0(rem_ptr)
+        bis     $31,n0,$0
+        ret     $31,($26),1
+        .end    __udiv_qrnnd
diff --git a/arch/alpha/math-emu/sfp-util.h b/arch/alpha/math-emu/sfp-util.h
new file mode 100644
index 000000000000..f53707f77455
--- /dev/null
+++ b/arch/alpha/math-emu/sfp-util.h
@@ -0,0 +1,35 @@
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/types.h>
+#include <asm/byteorder.h>
+#include <asm/fpu.h>
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+  ((sl) = (al) + (bl), (sh) = (ah) + (bh) + ((sl) < (al)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+  ((sl) = (al) - (bl), (sh) = (ah) - (bh) - ((al) < (bl)))
+#define umul_ppmm(wh, wl, u, v)                 \
+  __asm__ ("mulq %2,%3,%1; umulh %2,%3,%0"      \
+           : "=r" ((UDItype)(wh)),              \
+             "=&r" ((UDItype)(wl))              \
+           : "r" ((UDItype)(u)),                \
+             "r" ((UDItype)(v)))
+#define udiv_qrnnd(q, r, n1, n0, d)                             \
+  do { unsigned long __r;                                       \
+    (q) = __udiv_qrnnd (&__r, (n1), (n0), (d));                 \
+    (r) = __r;                                                  \
+  } while (0)
+extern unsigned long __udiv_qrnnd (unsigned long *, unsigned long,
+                                   unsigned long , unsigned long);
+#define UDIV_NEEDS_NORMALIZATION 1  
+#define abort()                 goto bad_insn
+#ifndef __LITTLE_ENDIAN
+#define __LITTLE_ENDIAN -1
+#endif
+#define __BYTE_ORDER __LITTLE_ENDIAN

diff --git a/arch/alpha/math-emu/Makefile b/arch/alpha/math-emu/Makefile new file mode 100644 index 000000000000..359ef087e69e --- /dev/null +++ b/arch/alpha/math-emu/Makefile
@@ -0,0 +1,9 @@
	1	#
	2	# Makefile for the FPU instruction emulation.
	3	#
	4
	5	EXTRA_CFLAGS := -w
	6
	7	obj-$(CONFIG_MATHEMU) += math-emu.o
	8
	9	math-emu-objs := math.o qrnnd.o


diff --git a/arch/alpha/math-emu/math.c b/arch/alpha/math-emu/math.c new file mode 100644 index 000000000000..ae79dd970b02 --- /dev/null +++ b/arch/alpha/math-emu/math.c
@@ -0,0 +1,400 @@
	1	#include <linux/module.h>
	2	#include <linux/types.h>
	3	#include <linux/kernel.h>
	4	#include <linux/sched.h>
	5
	6	#include <asm/uaccess.h>
	7
	8	#include "sfp-util.h"
	9	#include <math-emu/soft-fp.h>
	10	#include <math-emu/single.h>
	11	#include <math-emu/double.h>
	12
	13	#define OPC_PAL 0x00
	14	#define OPC_INTA 0x10
	15	#define OPC_INTL 0x11
	16	#define OPC_INTS 0x12
	17	#define OPC_INTM 0x13
	18	#define OPC_FLTC 0x14
	19	#define OPC_FLTV 0x15
	20	#define OPC_FLTI 0x16
	21	#define OPC_FLTL 0x17
	22	#define OPC_MISC 0x18
	23	#define OPC_JSR 0x1a
	24
	25	#define FOP_SRC_S 0
	26	#define FOP_SRC_T 2
	27	#define FOP_SRC_Q 3
	28
	29	#define FOP_FNC_ADDx 0
	30	#define FOP_FNC_CVTQL 0
	31	#define FOP_FNC_SUBx 1
	32	#define FOP_FNC_MULx 2
	33	#define FOP_FNC_DIVx 3
	34	#define FOP_FNC_CMPxUN 4
	35	#define FOP_FNC_CMPxEQ 5
	36	#define FOP_FNC_CMPxLT 6
	37	#define FOP_FNC_CMPxLE 7
	38	#define FOP_FNC_SQRTx 11
	39	#define FOP_FNC_CVTxS 12
	40	#define FOP_FNC_CVTxT 14
	41	#define FOP_FNC_CVTxQ 15
	42
	43	#define MISC_TRAPB 0x0000
	44	#define MISC_EXCB 0x0400
	45
	46	extern unsigned long alpha_read_fp_reg (unsigned long reg);
	47	extern void alpha_write_fp_reg (unsigned long reg, unsigned long val);
	48	extern unsigned long alpha_read_fp_reg_s (unsigned long reg);
	49	extern void alpha_write_fp_reg_s (unsigned long reg, unsigned long val);
	50
	51
	52	#ifdef MODULE
	53
	54	MODULE_DESCRIPTION("FP Software completion module");
	55
	56	extern long (alpha_fp_emul_imprecise)(struct pt_regs , unsigned long);
	57	extern long (*alpha_fp_emul) (unsigned long pc);
	58
	59	static long (save_emul_imprecise)(struct pt_regs , unsigned long);
	60	static long (*save_emul) (unsigned long pc);
	61
	62	long do_alpha_fp_emul_imprecise(struct pt_regs *, unsigned long);
	63	long do_alpha_fp_emul(unsigned long);
	64
	65	int init_module(void)
	66	{
	67	save_emul_imprecise = alpha_fp_emul_imprecise;
	68	save_emul = alpha_fp_emul;
	69	alpha_fp_emul_imprecise = do_alpha_fp_emul_imprecise;
	70	alpha_fp_emul = do_alpha_fp_emul;
	71	return 0;
	72	}
	73
	74	void cleanup_module(void)
	75	{
	76	alpha_fp_emul_imprecise = save_emul_imprecise;
	77	alpha_fp_emul = save_emul;
	78	}
	79
	80	#undef alpha_fp_emul_imprecise
	81	#define alpha_fp_emul_imprecise do_alpha_fp_emul_imprecise
	82	#undef alpha_fp_emul
	83	#define alpha_fp_emul do_alpha_fp_emul
	84
	85	#endif /* MODULE */
	86
	87
	88	/*
	89	* Emulate the floating point instruction at address PC. Returns -1 if the
	90	* instruction to be emulated is illegal (such as with the opDEC trap), else
	91	* the SI_CODE for a SIGFPE signal, else 0 if everything's ok.
	92	*
	93	* Notice that the kernel does not and cannot use FP regs. This is good
	94	* because it means that instead of saving/restoring all fp regs, we simply
	95	* stick the result of the operation into the appropriate register.
	96	*/
	97	long
	98	alpha_fp_emul (unsigned long pc)
	99	{
	100	FP_DECL_EX;
	101	FP_DECL_S(SA); FP_DECL_S(SB); FP_DECL_S(SR);
	102	FP_DECL_D(DA); FP_DECL_D(DB); FP_DECL_D(DR);
	103
	104	unsigned long fa, fb, fc, func, mode, src;
	105	unsigned long res, va, vb, vc, swcr, fpcr;
	106	__u32 insn;
	107	long si_code;
	108
	109	get_user(insn, (__u32 __user *)pc);
	110	fc = (insn >> 0) & 0x1f; /* destination register */
	111	fb = (insn >> 16) & 0x1f;
	112	fa = (insn >> 21) & 0x1f;
	113	func = (insn >> 5) & 0xf;
	114	src = (insn >> 9) & 0x3;
	115	mode = (insn >> 11) & 0x3;
	116
	117	fpcr = rdfpcr();
	118	swcr = swcr_update_status(current_thread_info()->ieee_state, fpcr);
	119
	120	if (mode == 3) {
	121	/* Dynamic -- get rounding mode from fpcr. */
	122	mode = (fpcr >> FPCR_DYN_SHIFT) & 3;
	123	}
	124
	125	switch (src) {
	126	case FOP_SRC_S:
	127	va = alpha_read_fp_reg_s(fa);
	128	vb = alpha_read_fp_reg_s(fb);
	129
	130	FP_UNPACK_SP(SA, &va);
	131	FP_UNPACK_SP(SB, &vb);
	132
	133	switch (func) {
	134	case FOP_FNC_SUBx:
	135	FP_SUB_S(SR, SA, SB);
	136	goto pack_s;
	137
	138	case FOP_FNC_ADDx:
	139	FP_ADD_S(SR, SA, SB);
	140	goto pack_s;
	141
	142	case FOP_FNC_MULx:
	143	FP_MUL_S(SR, SA, SB);
	144	goto pack_s;
	145
	146	case FOP_FNC_DIVx:
	147	FP_DIV_S(SR, SA, SB);
	148	goto pack_s;
	149
	150	case FOP_FNC_SQRTx:
	151	FP_SQRT_S(SR, SB);
	152	goto pack_s;
	153	}
	154	goto bad_insn;
	155
	156	case FOP_SRC_T:
	157	va = alpha_read_fp_reg(fa);
	158	vb = alpha_read_fp_reg(fb);
	159
	160	if ((func & ~3) == FOP_FNC_CMPxUN) {
	161	FP_UNPACK_RAW_DP(DA, &va);
	162	FP_UNPACK_RAW_DP(DB, &vb);
	163	if (!DA_e && !_FP_FRAC_ZEROP_1(DA)) {
	164	FP_SET_EXCEPTION(FP_EX_DENORM);
	165	if (FP_DENORM_ZERO)
	166	_FP_FRAC_SET_1(DA, _FP_ZEROFRAC_1);
	167	}
	168	if (!DB_e && !_FP_FRAC_ZEROP_1(DB)) {
	169	FP_SET_EXCEPTION(FP_EX_DENORM);
	170	if (FP_DENORM_ZERO)
	171	_FP_FRAC_SET_1(DB, _FP_ZEROFRAC_1);
	172	}
	173	FP_CMP_D(res, DA, DB, 3);
	174	vc = 0x4000000000000000UL;
	175	/* CMPTEQ, CMPTUN don't trap on QNaN,
	176	while CMPTLT and CMPTLE do */
	177	if (res == 3
	178	&& ((func & 3) >= 2
	179	\|\| FP_ISSIGNAN_D(DA)
	180	\|\| FP_ISSIGNAN_D(DB))) {
	181	FP_SET_EXCEPTION(FP_EX_INVALID);
	182	}
	183	switch (func) {
	184	case FOP_FNC_CMPxUN: if (res != 3) vc = 0; break;
	185	case FOP_FNC_CMPxEQ: if (res) vc = 0; break;
	186	case FOP_FNC_CMPxLT: if (res != -1) vc = 0; break;
	187	case FOP_FNC_CMPxLE: if ((long)res > 0) vc = 0; break;
	188	}
	189	goto done_d;
	190	}
	191
	192	FP_UNPACK_DP(DA, &va);
	193	FP_UNPACK_DP(DB, &vb);
	194
	195	switch (func) {
	196	case FOP_FNC_SUBx:
	197	FP_SUB_D(DR, DA, DB);
	198	goto pack_d;
	199
	200	case FOP_FNC_ADDx:
	201	FP_ADD_D(DR, DA, DB);
	202	goto pack_d;
	203
	204	case FOP_FNC_MULx:
	205	FP_MUL_D(DR, DA, DB);
	206	goto pack_d;
	207
	208	case FOP_FNC_DIVx:
	209	FP_DIV_D(DR, DA, DB);
	210	goto pack_d;
	211
	212	case FOP_FNC_SQRTx:
	213	FP_SQRT_D(DR, DB);
	214	goto pack_d;
	215
	216	case FOP_FNC_CVTxS:
	217	/* It is irritating that DEC encoded CVTST with
	218	SRC == T_floating. It is also interesting that
	219	the bit used to tell the two apart is /U... */
	220	if (insn & 0x2000) {
	221	FP_CONV(S,D,1,1,SR,DB);
	222	goto pack_s;
	223	} else {
	224	vb = alpha_read_fp_reg_s(fb);
	225	FP_UNPACK_SP(SB, &vb);
	226	DR_c = DB_c;
	227	DR_s = DB_s;
	228	DR_e = DB_e;
	229	DR_f = SB_f << (52 - 23);
	230	goto pack_d;
	231	}
	232
	233	case FOP_FNC_CVTxQ:
	234	if (DB_c == FP_CLS_NAN
	235	&& (_FP_FRAC_HIGH_RAW_D(DB) & _FP_QNANBIT_D)) {
	236	/* AAHB Table B-2 says QNaN should not trigger INV */
	237	vc = 0;
	238	} else
	239	FP_TO_INT_ROUND_D(vc, DB, 64, 2);
	240	goto done_d;
	241	}
	242	goto bad_insn;
	243
	244	case FOP_SRC_Q:
	245	vb = alpha_read_fp_reg(fb);
	246
	247	switch (func) {
	248	case FOP_FNC_CVTQL:
	249	/* Notice: We can get here only due to an integer
	250	overflow. Such overflows are reported as invalid
	251	ops. We return the result the hw would have
	252	computed. */
	253	vc = ((vb & 0xc0000000) << 32 \| /* sign and msb */
	254	(vb & 0x3fffffff) << 29); /* rest of the int */
	255	FP_SET_EXCEPTION (FP_EX_INVALID);
	256	goto done_d;
	257
	258	case FOP_FNC_CVTxS:
	259	FP_FROM_INT_S(SR, ((long)vb), 64, long);
	260	goto pack_s;
	261
	262	case FOP_FNC_CVTxT:
	263	FP_FROM_INT_D(DR, ((long)vb), 64, long);
	264	goto pack_d;
	265	}
	266	goto bad_insn;
	267	}
	268	goto bad_insn;
	269
	270	pack_s:
	271	FP_PACK_SP(&vc, SR);
	272	if ((_fex & FP_EX_UNDERFLOW) && (swcr & IEEE_MAP_UMZ))
	273	vc = 0;
	274	alpha_write_fp_reg_s(fc, vc);
	275	goto done;
	276
	277	pack_d:
	278	FP_PACK_DP(&vc, DR);
	279	if ((_fex & FP_EX_UNDERFLOW) && (swcr & IEEE_MAP_UMZ))
	280	vc = 0;
	281	done_d:
	282	alpha_write_fp_reg(fc, vc);
	283	goto done;
	284
	285	/*
	286	* Take the appropriate action for each possible
	287	* floating-point result:
	288	*
	289	* - Set the appropriate bits in the FPCR
	290	* - If the specified exception is enabled in the FPCR,
	291	* return. The caller (entArith) will dispatch
	292	* the appropriate signal to the translated program.
	293	*
	294	* In addition, properly track the exception state in software
	295	* as described in the Alpha Architecture Handbook section 4.7.7.3.
	296	*/
	297	done:
	298	if (_fex) {
	299	/* Record exceptions in software control word. */
	300	swcr \|= (_fex << IEEE_STATUS_TO_EXCSUM_SHIFT);
	301	current_thread_info()->ieee_state
	302	\|= (_fex << IEEE_STATUS_TO_EXCSUM_SHIFT);
	303
	304	/* Update hardware control register. */
	305	fpcr &= (~FPCR_MASK \| FPCR_DYN_MASK);
	306	fpcr \|= ieee_swcr_to_fpcr(swcr);
	307	wrfpcr(fpcr);
	308
	309	/* Do we generate a signal? */
	310	_fex = _fex & swcr & IEEE_TRAP_ENABLE_MASK;
	311	si_code = 0;
	312	if (_fex) {
	313	if (_fex & IEEE_TRAP_ENABLE_DNO) si_code = FPE_FLTUND;
	314	if (_fex & IEEE_TRAP_ENABLE_INE) si_code = FPE_FLTRES;
	315	if (_fex & IEEE_TRAP_ENABLE_UNF) si_code = FPE_FLTUND;
	316	if (_fex & IEEE_TRAP_ENABLE_OVF) si_code = FPE_FLTOVF;
	317	if (_fex & IEEE_TRAP_ENABLE_DZE) si_code = FPE_FLTDIV;
	318	if (_fex & IEEE_TRAP_ENABLE_INV) si_code = FPE_FLTINV;
	319	}
	320
	321	return si_code;
	322	}
	323
	324	/* We used to write the destination register here, but DEC FORTRAN
	325	requires that the result always be written... so we do the write
	326	immediately after the operations above. */
	327
	328	return 0;
	329
	330	bad_insn:
	331	printk(KERN_ERR "alpha_fp_emul: Invalid FP insn %#x at %#lx\n",
	332	insn, pc);
	333	return -1;
	334	}
	335
	336	long
	337	alpha_fp_emul_imprecise (struct pt_regs *regs, unsigned long write_mask)
	338	{
	339	unsigned long trigger_pc = regs->pc - 4;
	340	unsigned long insn, opcode, rc, si_code = 0;
	341
	342	/*
	343	* Turn off the bits corresponding to registers that are the
	344	* target of instructions that set bits in the exception
	345	* summary register. We have some slack doing this because a
	346	* register that is the target of a trapping instruction can
	347	* be written at most once in the trap shadow.
	348	*
	349	* Branches, jumps, TRAPBs, EXCBs and calls to PALcode all
	350	* bound the trap shadow, so we need not look any further than
	351	* up to the first occurrence of such an instruction.
	352	*/
	353	while (write_mask) {
	354	get_user(insn, (__u32 __user *)(trigger_pc));
	355	opcode = insn >> 26;
	356	rc = insn & 0x1f;
	357
	358	switch (opcode) {
	359	case OPC_PAL:
	360	case OPC_JSR:
	361	case 0x30 ... 0x3f: /* branches */
	362	goto egress;
	363
	364	case OPC_MISC:
	365	switch (insn & 0xffff) {
	366	case MISC_TRAPB:
	367	case MISC_EXCB:
	368	goto egress;
	369
	370	default:
	371	break;
	372	}
	373	break;
	374
	375	case OPC_INTA:
	376	case OPC_INTL:
	377	case OPC_INTS:
	378	case OPC_INTM:
	379	write_mask &= ~(1UL << rc);
	380	break;
	381
	382	case OPC_FLTC:
	383	case OPC_FLTV:
	384	case OPC_FLTI:
	385	case OPC_FLTL:
	386	write_mask &= ~(1UL << (rc + 32));
	387	break;
	388	}
	389	if (!write_mask) {
	390	/* Re-execute insns in the trap-shadow. */
	391	regs->pc = trigger_pc + 4;
	392	si_code = alpha_fp_emul(trigger_pc);
	393	goto egress;
	394	}
	395	trigger_pc -= 4;
	396	}
	397
	398	egress:
	399	return si_code;
	400	}


diff --git a/arch/alpha/math-emu/qrnnd.S b/arch/alpha/math-emu/qrnnd.S new file mode 100644 index 000000000000..d6373ec1bff9 --- /dev/null +++ b/arch/alpha/math-emu/qrnnd.S
@@ -0,0 +1,163 @@
	1	# Alpha 21064 __udiv_qrnnd
	2	# Copyright (C) 1992, 1994, 1995, 2000 Free Software Foundation, Inc.
	3
	4	# This file is part of GCC.
	5
	6	# The GNU MP Library is free software; you can redistribute it and/or modify
	7	# it under the terms of the GNU General Public License as published by
	8	# the Free Software Foundation; either version 2 of the License, or (at your
	9	# option) any later version.
	10
	11	# In addition to the permissions in the GNU General Public License, the
	12	# Free Software Foundation gives you unlimited permission to link the
	13	# compiled version of this file with other programs, and to distribute
	14	# those programs without any restriction coming from the use of this
	15	# file. (The General Public License restrictions do apply in other
	16	# respects; for example, they cover modification of the file, and
	17	# distribution when not linked into another program.)
	18
	19	# This file is distributed in the hope that it will be useful, but
	20	# WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
	21	# or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
	22	# License for more details.
	23
	24	# You should have received a copy of the GNU General Public License
	25	# along with GCC; see the file COPYING. If not, write to the
	26	# Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
	27	# MA 02111-1307, USA.
	28
	29	.set noreorder
	30	.set noat
	31
	32	.text
	33
	34	.globl __udiv_qrnnd
	35	.ent __udiv_qrnnd
	36	__udiv_qrnnd:
	37	.frame $30,0,$26,0
	38	.prologue 0
	39
	40	#define cnt $2
	41	#define tmp $3
	42	#define rem_ptr $16
	43	#define n1 $17
	44	#define n0 $18
	45	#define d $19
	46	#define qb $20
	47	#define AT $at
	48
	49	ldiq cnt,16
	50	blt d,$largedivisor
	51
	52	$loop1: cmplt n0,0,tmp
	53	addq n1,n1,n1
	54	bis n1,tmp,n1
	55	addq n0,n0,n0
	56	cmpule d,n1,qb
	57	subq n1,d,tmp
	58	cmovne qb,tmp,n1
	59	bis n0,qb,n0
	60	cmplt n0,0,tmp
	61	addq n1,n1,n1
	62	bis n1,tmp,n1
	63	addq n0,n0,n0
	64	cmpule d,n1,qb
	65	subq n1,d,tmp
	66	cmovne qb,tmp,n1
	67	bis n0,qb,n0
	68	cmplt n0,0,tmp
	69	addq n1,n1,n1
	70	bis n1,tmp,n1
	71	addq n0,n0,n0
	72	cmpule d,n1,qb
	73	subq n1,d,tmp
	74	cmovne qb,tmp,n1
	75	bis n0,qb,n0
	76	cmplt n0,0,tmp
	77	addq n1,n1,n1
	78	bis n1,tmp,n1
	79	addq n0,n0,n0
	80	cmpule d,n1,qb
	81	subq n1,d,tmp
	82	cmovne qb,tmp,n1
	83	bis n0,qb,n0
	84	subq cnt,1,cnt
	85	bgt cnt,$loop1
	86	stq n1,0(rem_ptr)
	87	bis $31,n0,$0
	88	ret $31,($26),1
	89
	90	$largedivisor:
	91	and n0,1,$4
	92
	93	srl n0,1,n0
	94	sll n1,63,tmp
	95	or tmp,n0,n0
	96	srl n1,1,n1
	97
	98	and d,1,$6
	99	srl d,1,$5
	100	addq $5,$6,$5
	101
	102	$loop2: cmplt n0,0,tmp
	103	addq n1,n1,n1
	104	bis n1,tmp,n1
	105	addq n0,n0,n0
	106	cmpule $5,n1,qb
	107	subq n1,$5,tmp
	108	cmovne qb,tmp,n1
	109	bis n0,qb,n0
	110	cmplt n0,0,tmp
	111	addq n1,n1,n1
	112	bis n1,tmp,n1
	113	addq n0,n0,n0
	114	cmpule $5,n1,qb
	115	subq n1,$5,tmp
	116	cmovne qb,tmp,n1
	117	bis n0,qb,n0
	118	cmplt n0,0,tmp
	119	addq n1,n1,n1
	120	bis n1,tmp,n1
	121	addq n0,n0,n0
	122	cmpule $5,n1,qb
	123	subq n1,$5,tmp
	124	cmovne qb,tmp,n1
	125	bis n0,qb,n0
	126	cmplt n0,0,tmp
	127	addq n1,n1,n1
	128	bis n1,tmp,n1
	129	addq n0,n0,n0
	130	cmpule $5,n1,qb
	131	subq n1,$5,tmp
	132	cmovne qb,tmp,n1
	133	bis n0,qb,n0
	134	subq cnt,1,cnt
	135	bgt cnt,$loop2
	136
	137	addq n1,n1,n1
	138	addq $4,n1,n1
	139	bne $6,$Odd
	140	stq n1,0(rem_ptr)
	141	bis $31,n0,$0
	142	ret $31,($26),1
	143
	144	$Odd:
	145	/* q' in n0. r' in n1 */
	146	addq n1,n0,n1
	147
	148	cmpult n1,n0,tmp # tmp := carry from addq
	149	subq n1,d,AT
	150	addq n0,tmp,n0
	151	cmovne tmp,AT,n1
	152
	153	cmpult n1,d,tmp
	154	addq n0,1,AT
	155	cmoveq tmp,AT,n0
	156	subq n1,d,AT
	157	cmoveq tmp,AT,n1
	158
	159	stq n1,0(rem_ptr)
	160	bis $31,n0,$0
	161	ret $31,($26),1
	162
	163	.end __udiv_qrnnd


diff --git a/arch/alpha/math-emu/sfp-util.h b/arch/alpha/math-emu/sfp-util.h new file mode 100644 index 000000000000..f53707f77455 --- /dev/null +++ b/arch/alpha/math-emu/sfp-util.h
@@ -0,0 +1,35 @@
	1	#include <linux/kernel.h>
	2	#include <linux/sched.h>
	3	#include <linux/types.h>
	4	#include <asm/byteorder.h>
	5	#include <asm/fpu.h>
	6
	7	#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
	8	((sl) = (al) + (bl), (sh) = (ah) + (bh) + ((sl) < (al)))
	9
	10	#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
	11	((sl) = (al) - (bl), (sh) = (ah) - (bh) - ((al) < (bl)))
	12
	13	#define umul_ppmm(wh, wl, u, v) \
	14	__asm__ ("mulq %2,%3,%1; umulh %2,%3,%0" \
	15	: "=r" ((UDItype)(wh)), \
	16	"=&r" ((UDItype)(wl)) \
	17	: "r" ((UDItype)(u)), \
	18	"r" ((UDItype)(v)))
	19
	20	#define udiv_qrnnd(q, r, n1, n0, d) \
	21	do { unsigned long __r; \
	22	(q) = __udiv_qrnnd (&__r, (n1), (n0), (d)); \
	23	(r) = __r; \
	24	} while (0)
	25	extern unsigned long __udiv_qrnnd (unsigned long *, unsigned long,
	26	unsigned long , unsigned long);
	27
	28	#define UDIV_NEEDS_NORMALIZATION 1
	29
	30	#define abort() goto bad_insn
	31
	32	#ifndef __LITTLE_ENDIAN
	33	#define __LITTLE_ENDIAN -1
	34	#endif
	35	#define __BYTE_ORDER __LITTLE_ENDIAN