53 files changed, 4508 insertions, 0 deletions
diff --git a/arch/ppc/math-emu/Makefile b/arch/ppc/math-emu/Makefile
new file mode 100644
index 000000000000..754143e8936b
--- /dev/null
+++ b/arch/ppc/math-emu/Makefile
@@ -0,0 +1,13 @@
+obj-y                           := math.o fmr.o lfd.o stfd.o
+obj-$(CONFIG_MATH_EMULATION)    += fabs.o fadd.o fadds.o fcmpo.o fcmpu.o \
+                                        fctiw.o fctiwz.o fdiv.o fdivs.o \
+                                        fmadd.o fmadds.o fmsub.o fmsubs.o \
+                                        fmul.o fmuls.o fnabs.o fneg.o types.o \
+                                        fnmadd.o fnmadds.o fnmsub.o fnmsubs.o \
+                                        fres.o frsp.o frsqrte.o fsel.o lfs.o \
+                                        fsqrt.o fsqrts.o fsub.o fsubs.o \
+                                        mcrfs.o mffs.o mtfsb0.o mtfsb1.o \
+                                        mtfsf.o mtfsfi.o stfiwx.o stfs.o \
+                                        udivmodti4.o
diff --git a/arch/ppc/math-emu/double.h b/arch/ppc/math-emu/double.h
new file mode 100644
index 000000000000..ffba8b67f059
--- /dev/null
+++ b/arch/ppc/math-emu/double.h
@@ -0,0 +1,129 @@
+/*
+ * Definitions for IEEE Double Precision
+ */
+#if _FP_W_TYPE_SIZE < 32
+#error "Here's a nickel kid.  Go buy yourself a real computer."
+#endif
+#if _FP_W_TYPE_SIZE < 64
+#define _FP_FRACTBITS_D         (2 * _FP_W_TYPE_SIZE)
+#else
+#define _FP_FRACTBITS_D         _FP_W_TYPE_SIZE
+#endif
+#define _FP_FRACBITS_D          53
+#define _FP_FRACXBITS_D         (_FP_FRACTBITS_D - _FP_FRACBITS_D)
+#define _FP_WFRACBITS_D         (_FP_WORKBITS + _FP_FRACBITS_D)
+#define _FP_WFRACXBITS_D        (_FP_FRACTBITS_D - _FP_WFRACBITS_D)
+#define _FP_EXPBITS_D           11
+#define _FP_EXPBIAS_D           1023
+#define _FP_EXPMAX_D            2047
+#define _FP_QNANBIT_D           \
+        ((_FP_W_TYPE)1 << ((_FP_FRACBITS_D-2) % _FP_W_TYPE_SIZE))
+#define _FP_IMPLBIT_D           \
+        ((_FP_W_TYPE)1 << ((_FP_FRACBITS_D-1) % _FP_W_TYPE_SIZE))
+#define _FP_OVERFLOW_D          \
+        ((_FP_W_TYPE)1 << (_FP_WFRACBITS_D % _FP_W_TYPE_SIZE))
+#if _FP_W_TYPE_SIZE < 64
+union _FP_UNION_D
+{
+  double flt;
+  struct {
+#if __BYTE_ORDER == __BIG_ENDIAN
+    unsigned sign  : 1;
+    unsigned exp   : _FP_EXPBITS_D;
+    unsigned frac1 : _FP_FRACBITS_D - (_FP_IMPLBIT_D != 0) - _FP_W_TYPE_SIZE;
+    unsigned frac0 : _FP_W_TYPE_SIZE;
+#else
+    unsigned frac0 : _FP_W_TYPE_SIZE;
+    unsigned frac1 : _FP_FRACBITS_D - (_FP_IMPLBIT_D != 0) - _FP_W_TYPE_SIZE;
+    unsigned exp   : _FP_EXPBITS_D;
+    unsigned sign  : 1;
+#endif
+  } bits __attribute__((packed));
+};
+#define FP_DECL_D(X)            _FP_DECL(2,X)
+#define FP_UNPACK_RAW_D(X,val)  _FP_UNPACK_RAW_2(D,X,val)
+#define FP_PACK_RAW_D(val,X)    _FP_PACK_RAW_2(D,val,X)
+#define FP_UNPACK_D(X,val)              \
+  do {                                  \
+    _FP_UNPACK_RAW_2(D,X,val);          \
+    _FP_UNPACK_CANONICAL(D,2,X);        \
+  } while (0)
+#define FP_PACK_D(val,X)                \
+  do {                                  \
+    _FP_PACK_CANONICAL(D,2,X);          \
+    _FP_PACK_RAW_2(D,val,X);            \
+  } while (0)
+#define FP_NEG_D(R,X)           _FP_NEG(D,2,R,X)
+#define FP_ADD_D(R,X,Y)         _FP_ADD(D,2,R,X,Y)
+#define FP_SUB_D(R,X,Y)         _FP_SUB(D,2,R,X,Y)
+#define FP_MUL_D(R,X,Y)         _FP_MUL(D,2,R,X,Y)
+#define FP_DIV_D(R,X,Y)         _FP_DIV(D,2,R,X,Y)
+#define FP_SQRT_D(R,X)          _FP_SQRT(D,2,R,X)
+#define FP_CMP_D(r,X,Y,un)      _FP_CMP(D,2,r,X,Y,un)
+#define FP_CMP_EQ_D(r,X,Y)      _FP_CMP_EQ(D,2,r,X,Y)
+#define FP_TO_INT_D(r,X,rsz,rsg)  _FP_TO_INT(D,2,r,X,rsz,rsg)
+#define FP_FROM_INT_D(X,r,rs,rt)  _FP_FROM_INT(D,2,X,r,rs,rt)
+#else
+union _FP_UNION_D
+{
+  double flt;
+  struct {
+#if __BYTE_ORDER == __BIG_ENDIAN
+    unsigned sign : 1;
+    unsigned exp  : _FP_EXPBITS_D;
+    unsigned long frac : _FP_FRACBITS_D - (_FP_IMPLBIT_D != 0);
+#else
+    unsigned long frac : _FP_FRACBITS_D - (_FP_IMPLBIT_D != 0);
+    unsigned exp  : _FP_EXPBITS_D;
+    unsigned sign : 1;
+#endif
+  } bits __attribute__((packed));
+};
+#define FP_DECL_D(X)            _FP_DECL(1,X)
+#define FP_UNPACK_RAW_D(X,val)  _FP_UNPACK_RAW_1(D,X,val)
+#define FP_PACK_RAW_D(val,X)    _FP_PACK_RAW_1(D,val,X)
+#define FP_UNPACK_D(X,val)              \
+  do {                                  \
+    _FP_UNPACK_RAW_1(D,X,val);          \
+    _FP_UNPACK_CANONICAL(D,1,X);        \
+  } while (0)
+#define FP_PACK_D(val,X)                \
+  do {                                  \
+    _FP_PACK_CANONICAL(D,1,X);          \
+    _FP_PACK_RAW_1(D,val,X);            \
+  } while (0)
+#define FP_NEG_D(R,X)           _FP_NEG(D,1,R,X)
+#define FP_ADD_D(R,X,Y)         _FP_ADD(D,1,R,X,Y)
+#define FP_SUB_D(R,X,Y)         _FP_SUB(D,1,R,X,Y)
+#define FP_MUL_D(R,X,Y)         _FP_MUL(D,1,R,X,Y)
+#define FP_DIV_D(R,X,Y)         _FP_DIV(D,1,R,X,Y)
+#define FP_SQRT_D(R,X)          _FP_SQRT(D,1,R,X)
+/* The implementation of _FP_MUL_D and _FP_DIV_D should be chosen by
+   the target machine.  */
+#define FP_CMP_D(r,X,Y,un)      _FP_CMP(D,1,r,X,Y,un)
+#define FP_CMP_EQ_D(r,X,Y)      _FP_CMP_EQ(D,1,r,X,Y)
+#define FP_TO_INT_D(r,X,rsz,rsg)  _FP_TO_INT(D,1,r,X,rsz,rsg)
+#define FP_FROM_INT_D(X,r,rs,rt)  _FP_FROM_INT(D,1,X,r,rs,rt)
+#endif /* W_TYPE_SIZE < 64 */
diff --git a/arch/ppc/math-emu/fabs.c b/arch/ppc/math-emu/fabs.c
new file mode 100644
index 000000000000..41f0617f3d3a
--- /dev/null
+++ b/arch/ppc/math-emu/fabs.c
@@ -0,0 +1,18 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+int
+fabs(u32 *frD, u32 *frB)
+{
+        frD[0] = frB[0] & 0x7fffffff;
+        frD[1] = frB[1];
+#ifdef DEBUG
+        printk("%s: D %p, B %p: ", __FUNCTION__, frD, frB);
+        dump_double(frD);
+        printk("\n");
+#endif
+        return 0;
+}
diff --git a/arch/ppc/math-emu/fadd.c b/arch/ppc/math-emu/fadd.c
new file mode 100644
index 000000000000..fc8836488b64
--- /dev/null
+++ b/arch/ppc/math-emu/fadd.c
@@ -0,0 +1,38 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+#include "soft-fp.h"
+#include "double.h"
+int
+fadd(void *frD, void *frA, void *frB)
+{
+        FP_DECL_D(A);
+        FP_DECL_D(B);
+        FP_DECL_D(R);
+        int ret = 0;
+#ifdef DEBUG
+        printk("%s: %p %p %p\n", __FUNCTION__, frD, frA, frB);
+#endif
+        __FP_UNPACK_D(A, frA);
+        __FP_UNPACK_D(B, frB);
+#ifdef DEBUG
+        printk("A: %ld %lu %lu %ld (%ld)\n", A_s, A_f1, A_f0, A_e, A_c);
+        printk("B: %ld %lu %lu %ld (%ld)\n", B_s, B_f1, B_f0, B_e, B_c);
+#endif
+        if (A_s != B_s && A_c == FP_CLS_INF && B_c == FP_CLS_INF)
+                ret |= EFLAG_VXISI;
+        FP_ADD_D(R, A, B);
+#ifdef DEBUG
+        printk("D: %ld %lu %lu %ld (%ld)\n", R_s, R_f1, R_f0, R_e, R_c);
+#endif
+        return (ret | __FP_PACK_D(frD, R));
+}
diff --git a/arch/ppc/math-emu/fadds.c b/arch/ppc/math-emu/fadds.c
new file mode 100644
index 000000000000..93025b6c8f3c
--- /dev/null
+++ b/arch/ppc/math-emu/fadds.c
@@ -0,0 +1,39 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+#include "soft-fp.h"
+#include "double.h"
+#include "single.h"
+int
+fadds(void *frD, void *frA, void *frB)
+{
+        FP_DECL_D(A);
+        FP_DECL_D(B);
+        FP_DECL_D(R);
+        int ret = 0;
+#ifdef DEBUG
+        printk("%s: %p %p %p\n", __FUNCTION__, frD, frA, frB);
+#endif
+        __FP_UNPACK_D(A, frA);
+        __FP_UNPACK_D(B, frB);
+#ifdef DEBUG
+        printk("A: %ld %lu %lu %ld (%ld)\n", A_s, A_f1, A_f0, A_e, A_c);
+        printk("B: %ld %lu %lu %ld (%ld)\n", B_s, B_f1, B_f0, B_e, B_c);
+#endif
+        if (A_s != B_s && A_c == FP_CLS_INF && B_c == FP_CLS_INF)
+                ret |= EFLAG_VXISI;
+        FP_ADD_D(R, A, B);
+#ifdef DEBUG
+        printk("D: %ld %lu %lu %ld (%ld)\n", R_s, R_f1, R_f0, R_e, R_c);
+#endif
+        return (ret | __FP_PACK_DS(frD, R));
+}
diff --git a/arch/ppc/math-emu/fcmpo.c b/arch/ppc/math-emu/fcmpo.c
new file mode 100644
index 000000000000..4efac394b4cb
--- /dev/null
+++ b/arch/ppc/math-emu/fcmpo.c
@@ -0,0 +1,46 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+#include "soft-fp.h"
+#include "double.h"
+int
+fcmpo(u32 *ccr, int crfD, void *frA, void *frB)
+{
+        FP_DECL_D(A);
+        FP_DECL_D(B);
+        int code[4] = { (1 << 3), (1 << 1), (1 << 2), (1 << 0) };
+        long cmp;
+        int ret = 0;
+#ifdef DEBUG
+        printk("%s: %p (%08x) %d %p %p\n", __FUNCTION__, ccr, *ccr, crfD, frA, frB);
+#endif
+        __FP_UNPACK_D(A, frA);
+        __FP_UNPACK_D(B, frB);
+#ifdef DEBUG
+        printk("A: %ld %lu %lu %ld (%ld)\n", A_s, A_f1, A_f0, A_e, A_c);
+        printk("B: %ld %lu %lu %ld (%ld)\n", B_s, B_f1, B_f0, B_e, B_c);
+#endif
+        if (A_c == FP_CLS_NAN || B_c == FP_CLS_NAN)
+                ret |= EFLAG_VXVC;
+        FP_CMP_D(cmp, A, B, 2);
+        cmp = code[(cmp + 1) & 3];
+        __FPU_FPSCR &= ~(0x1f000);
+        __FPU_FPSCR |= (cmp << 12);
+        *ccr &= ~(15 << ((7 - crfD) << 2));
+        *ccr |= (cmp << ((7 - crfD) << 2));
+#ifdef DEBUG
+        printk("CR: %08x\n", *ccr);
+#endif
+        return ret;
+}
diff --git a/arch/ppc/math-emu/fcmpu.c b/arch/ppc/math-emu/fcmpu.c
new file mode 100644
index 000000000000..b7e33176e618
--- /dev/null
+++ b/arch/ppc/math-emu/fcmpu.c
@@ -0,0 +1,42 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+#include "soft-fp.h"
+#include "double.h"
+int
+fcmpu(u32 *ccr, int crfD, void *frA, void *frB)
+{
+        FP_DECL_D(A);
+        FP_DECL_D(B);
+        int code[4] = { (1 << 3), (1 << 1), (1 << 2), (1 << 0) };
+        long cmp;
+#ifdef DEBUG
+        printk("%s: %p (%08x) %d %p %p\n", __FUNCTION__, ccr, *ccr, crfD, frA, frB);
+#endif
+        __FP_UNPACK_D(A, frA);
+        __FP_UNPACK_D(B, frB);
+#ifdef DEBUG
+        printk("A: %ld %lu %lu %ld (%ld)\n", A_s, A_f1, A_f0, A_e, A_c);
+        printk("B: %ld %lu %lu %ld (%ld)\n", B_s, B_f1, B_f0, B_e, B_c);
+#endif
+        FP_CMP_D(cmp, A, B, 2);
+        cmp = code[(cmp + 1) & 3];
+        __FPU_FPSCR &= ~(0x1f000);
+        __FPU_FPSCR |= (cmp << 12);
+        *ccr &= ~(15 << ((7 - crfD) << 2));
+        *ccr |= (cmp << ((7 - crfD) << 2));
+#ifdef DEBUG
+        printk("CR: %08x\n", *ccr);
+#endif
+        return 0;
+}
diff --git a/arch/ppc/math-emu/fctiw.c b/arch/ppc/math-emu/fctiw.c
new file mode 100644
index 000000000000..3b3c98b840cf
--- /dev/null
+++ b/arch/ppc/math-emu/fctiw.c
@@ -0,0 +1,25 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+#include "soft-fp.h"
+#include "double.h"
+int
+fctiw(u32 *frD, void *frB)
+{
+        FP_DECL_D(B);
+        unsigned int r;
+        __FP_UNPACK_D(B, frB);
+        FP_TO_INT_D(r, B, 32, 1);
+        frD[1] = r;
+#ifdef DEBUG
+        printk("%s: D %p, B %p: ", __FUNCTION__, frD, frB);
+        dump_double(frD);
+        printk("\n");
+#endif
+        return 0;
+}
diff --git a/arch/ppc/math-emu/fctiwz.c b/arch/ppc/math-emu/fctiwz.c
new file mode 100644
index 000000000000..7717eb6fcfb6
--- /dev/null
+++ b/arch/ppc/math-emu/fctiwz.c
@@ -0,0 +1,32 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+#include "soft-fp.h"
+#include "double.h"
+int
+fctiwz(u32 *frD, void *frB)
+{
+        FP_DECL_D(B);
+        u32 fpscr;
+        unsigned int r;
+        fpscr = __FPU_FPSCR;
+        __FPU_FPSCR &= ~(3);
+        __FPU_FPSCR |= FP_RND_ZERO;
+        __FP_UNPACK_D(B, frB);
+        FP_TO_INT_D(r, B, 32, 1);
+        frD[1] = r;
+        __FPU_FPSCR = fpscr;
+#ifdef DEBUG
+        printk("%s: D %p, B %p: ", __FUNCTION__, frD, frB);
+        dump_double(frD);
+        printk("\n");
+#endif
+        return 0;
+}
diff --git a/arch/ppc/math-emu/fdiv.c b/arch/ppc/math-emu/fdiv.c
new file mode 100644
index 000000000000..f2fba825b2d0
--- /dev/null
+++ b/arch/ppc/math-emu/fdiv.c
@@ -0,0 +1,53 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+#include "soft-fp.h"
+#include "double.h"
+int
+fdiv(void *frD, void *frA, void *frB)
+{
+        FP_DECL_D(A);
+        FP_DECL_D(B);
+        FP_DECL_D(R);
+        int ret = 0;
+#ifdef DEBUG
+        printk("%s: %p %p %p\n", __FUNCTION__, frD, frA, frB);
+#endif
+        __FP_UNPACK_D(A, frA);
+        __FP_UNPACK_D(B, frB);
+#ifdef DEBUG
+        printk("A: %ld %lu %lu %ld (%ld)\n", A_s, A_f1, A_f0, A_e, A_c);
+        printk("B: %ld %lu %lu %ld (%ld)\n", B_s, B_f1, B_f0, B_e, B_c);
+#endif
+        if (A_c == FP_CLS_ZERO && B_c == FP_CLS_ZERO) {
+                ret |= EFLAG_VXZDZ;
+#ifdef DEBUG
+                printk("%s: FPSCR_VXZDZ raised\n", __FUNCTION__);
+#endif
+        }
+        if (A_c == FP_CLS_INF && B_c == FP_CLS_INF) {
+                ret |= EFLAG_VXIDI;
+#ifdef DEBUG
+                printk("%s: FPSCR_VXIDI raised\n", __FUNCTION__);
+#endif
+        }
+        if (B_c == FP_CLS_ZERO && A_c != FP_CLS_ZERO) {
+                ret |= EFLAG_DIVZERO;
+                if (__FPU_TRAP_P(EFLAG_DIVZERO))
+                        return ret;
+        }
+        FP_DIV_D(R, A, B);
+#ifdef DEBUG
+        printk("D: %ld %lu %lu %ld (%ld)\n", R_s, R_f1, R_f0, R_e, R_c);
+#endif
+        return (ret | __FP_PACK_D(frD, R));
+}
diff --git a/arch/ppc/math-emu/fdivs.c b/arch/ppc/math-emu/fdivs.c
new file mode 100644
index 000000000000..b971196e3175
--- /dev/null
+++ b/arch/ppc/math-emu/fdivs.c
@@ -0,0 +1,55 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+#include "soft-fp.h"
+#include "double.h"
+#include "single.h"
+int
+fdivs(void *frD, void *frA, void *frB)
+{
+        FP_DECL_D(A);
+        FP_DECL_D(B);
+        FP_DECL_D(R);
+        int ret = 0;
+#ifdef DEBUG
+        printk("%s: %p %p %p\n", __FUNCTION__, frD, frA, frB);
+#endif
+        __FP_UNPACK_D(A, frA);
+        __FP_UNPACK_D(B, frB);
+#ifdef DEBUG
+        printk("A: %ld %lu %lu %ld (%ld)\n", A_s, A_f1, A_f0, A_e, A_c);
+        printk("B: %ld %lu %lu %ld (%ld)\n", B_s, B_f1, B_f0, B_e, B_c);
+#endif
+        if (A_c == FP_CLS_ZERO && B_c == FP_CLS_ZERO) {
+                ret |= EFLAG_VXZDZ;
+#ifdef DEBUG
+                printk("%s: FPSCR_VXZDZ raised\n", __FUNCTION__);
+#endif
+        }
+        if (A_c == FP_CLS_INF && B_c == FP_CLS_INF) {
+                ret |= EFLAG_VXIDI;
+#ifdef DEBUG
+                printk("%s: FPSCR_VXIDI raised\n", __FUNCTION__);
+#endif
+        }
+        if (B_c == FP_CLS_ZERO && A_c != FP_CLS_ZERO) {
+                ret |= EFLAG_DIVZERO;
+                if (__FPU_TRAP_P(EFLAG_DIVZERO))
+                        return ret;
+        }
+        FP_DIV_D(R, A, B);
+#ifdef DEBUG
+        printk("D: %ld %lu %lu %ld (%ld)\n", R_s, R_f1, R_f0, R_e, R_c);
+#endif
+        return (ret | __FP_PACK_DS(frD, R));
+}
diff --git a/arch/ppc/math-emu/fmadd.c b/arch/ppc/math-emu/fmadd.c
new file mode 100644
index 000000000000..0a1dbce793e9
--- /dev/null
+++ b/arch/ppc/math-emu/fmadd.c
@@ -0,0 +1,48 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+#include "soft-fp.h"
+#include "double.h"
+int
+fmadd(void *frD, void *frA, void *frB, void *frC)
+{
+        FP_DECL_D(R);
+        FP_DECL_D(A);
+        FP_DECL_D(B);
+        FP_DECL_D(C);
+        FP_DECL_D(T);
+        int ret = 0;
+#ifdef DEBUG
+        printk("%s: %p %p %p %p\n", __FUNCTION__, frD, frA, frB, frC);
+#endif
+        __FP_UNPACK_D(A, frA);
+        __FP_UNPACK_D(B, frB);
+        __FP_UNPACK_D(C, frC);
+#ifdef DEBUG
+        printk("A: %ld %lu %lu %ld (%ld)\n", A_s, A_f1, A_f0, A_e, A_c);
+        printk("B: %ld %lu %lu %ld (%ld)\n", B_s, B_f1, B_f0, B_e, B_c);
+        printk("C: %ld %lu %lu %ld (%ld)\n", C_s, C_f1, C_f0, C_e, C_c);
+#endif
+        if ((A_c == FP_CLS_INF && C_c == FP_CLS_ZERO) ||
+            (A_c == FP_CLS_ZERO && C_c == FP_CLS_INF))
+                ret |= EFLAG_VXIMZ;
+        FP_MUL_D(T, A, C);
+        if (T_s != B_s && T_c == FP_CLS_INF && B_c == FP_CLS_INF)
+                ret |= EFLAG_VXISI;
+        FP_ADD_D(R, T, B);
+#ifdef DEBUG
+        printk("D: %ld %lu %lu %ld (%ld)\n", R_s, R_f1, R_f0, R_e, R_c);
+#endif
+        return (ret | __FP_PACK_D(frD, R));
+}
diff --git a/arch/ppc/math-emu/fmadds.c b/arch/ppc/math-emu/fmadds.c
new file mode 100644
index 000000000000..0f70bba9445e
--- /dev/null
+++ b/arch/ppc/math-emu/fmadds.c
@@ -0,0 +1,49 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+#include "soft-fp.h"
+#include "double.h"
+#include "single.h"
+int
+fmadds(void *frD, void *frA, void *frB, void *frC)
+{
+        FP_DECL_D(R);
+        FP_DECL_D(A);
+        FP_DECL_D(B);
+        FP_DECL_D(C);
+        FP_DECL_D(T);
+        int ret = 0;
+#ifdef DEBUG
+        printk("%s: %p %p %p %p\n", __FUNCTION__, frD, frA, frB, frC);
+#endif
+        __FP_UNPACK_D(A, frA);
+        __FP_UNPACK_D(B, frB);
+        __FP_UNPACK_D(C, frC);
+#ifdef DEBUG
+        printk("A: %ld %lu %lu %ld (%ld)\n", A_s, A_f1, A_f0, A_e, A_c);
+        printk("B: %ld %lu %lu %ld (%ld)\n", B_s, B_f1, B_f0, B_e, B_c);
+        printk("C: %ld %lu %lu %ld (%ld)\n", C_s, C_f1, C_f0, C_e, C_c);
+#endif
+        if ((A_c == FP_CLS_INF && C_c == FP_CLS_ZERO) ||
+            (A_c == FP_CLS_ZERO && C_c == FP_CLS_INF))
+                ret |= EFLAG_VXIMZ;
+        FP_MUL_D(T, A, C);
+        if (T_s != B_s && T_c == FP_CLS_INF && B_c == FP_CLS_INF)
+                ret |= EFLAG_VXISI;
+        FP_ADD_D(R, T, B);
+#ifdef DEBUG
+        printk("D: %ld %lu %lu %ld (%ld)\n", R_s, R_f1, R_f0, R_e, R_c);
+#endif
+        return (ret | __FP_PACK_DS(frD, R));
+}
diff --git a/arch/ppc/math-emu/fmr.c b/arch/ppc/math-emu/fmr.c
new file mode 100644
index 000000000000..28df700c0c7e
--- /dev/null
+++ b/arch/ppc/math-emu/fmr.c
@@ -0,0 +1,18 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+int
+fmr(u32 *frD, u32 *frB)
+{
+        frD[0] = frB[0];
+        frD[1] = frB[1];
+#ifdef DEBUG
+        printk("%s: D %p, B %p: ", __FUNCTION__, frD, frB);
+        dump_double(frD);
+        printk("\n");
+#endif
+        return 0;
+}
diff --git a/arch/ppc/math-emu/fmsub.c b/arch/ppc/math-emu/fmsub.c
new file mode 100644
index 000000000000..203fd48a6fec
--- /dev/null
+++ b/arch/ppc/math-emu/fmsub.c
@@ -0,0 +1,51 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+#include "soft-fp.h"
+#include "double.h"
+int
+fmsub(void *frD, void *frA, void *frB, void *frC)
+{
+        FP_DECL_D(R);
+        FP_DECL_D(A);
+        FP_DECL_D(B);
+        FP_DECL_D(C);
+        FP_DECL_D(T);
+        int ret = 0;
+#ifdef DEBUG
+        printk("%s: %p %p %p %p\n", __FUNCTION__, frD, frA, frB, frC);
+#endif
+        __FP_UNPACK_D(A, frA);
+        __FP_UNPACK_D(B, frB);
+        __FP_UNPACK_D(C, frC);
+#ifdef DEBUG
+        printk("A: %ld %lu %lu %ld (%ld)\n", A_s, A_f1, A_f0, A_e, A_c);
+        printk("B: %ld %lu %lu %ld (%ld)\n", B_s, B_f1, B_f0, B_e, B_c);
+        printk("C: %ld %lu %lu %ld (%ld)\n", C_s, C_f1, C_f0, C_e, C_c);
+#endif
+        if ((A_c == FP_CLS_INF && C_c == FP_CLS_ZERO) ||
+            (A_c == FP_CLS_ZERO && C_c == FP_CLS_INF))
+                ret |= EFLAG_VXIMZ;
+        FP_MUL_D(T, A, C);
+        if (B_c != FP_CLS_NAN)
+                B_s ^= 1;
+        if (T_s != B_s && T_c == FP_CLS_INF && B_c == FP_CLS_INF)
+                ret |= EFLAG_VXISI;
+        FP_ADD_D(R, T, B);
+#ifdef DEBUG
+        printk("D: %ld %lu %lu %ld (%ld)\n", R_s, R_f1, R_f0, R_e, R_c);
+#endif
+        return (ret | __FP_PACK_D(frD, R));
+}
diff --git a/arch/ppc/math-emu/fmsubs.c b/arch/ppc/math-emu/fmsubs.c
new file mode 100644
index 000000000000..8ce68624c189
--- /dev/null
+++ b/arch/ppc/math-emu/fmsubs.c
@@ -0,0 +1,52 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+#include "soft-fp.h"
+#include "double.h"
+#include "single.h"
+int
+fmsubs(void *frD, void *frA, void *frB, void *frC)
+{
+        FP_DECL_D(R);
+        FP_DECL_D(A);
+        FP_DECL_D(B);
+        FP_DECL_D(C);
+        FP_DECL_D(T);
+        int ret = 0;
+#ifdef DEBUG
+        printk("%s: %p %p %p %p\n", __FUNCTION__, frD, frA, frB, frC);
+#endif
+        __FP_UNPACK_D(A, frA);
+        __FP_UNPACK_D(B, frB);
+        __FP_UNPACK_D(C, frC);
+#ifdef DEBUG
+        printk("A: %ld %lu %lu %ld (%ld)\n", A_s, A_f1, A_f0, A_e, A_c);
+        printk("B: %ld %lu %lu %ld (%ld)\n", B_s, B_f1, B_f0, B_e, B_c);
+        printk("C: %ld %lu %lu %ld (%ld)\n", C_s, C_f1, C_f0, C_e, C_c);
+#endif
+        if ((A_c == FP_CLS_INF && C_c == FP_CLS_ZERO) ||
+            (A_c == FP_CLS_ZERO && C_c == FP_CLS_INF))
+                ret |= EFLAG_VXIMZ;
+        FP_MUL_D(T, A, C);
+        if (B_c != FP_CLS_NAN)
+                B_s ^= 1;
+        if (T_s != B_s && T_c == FP_CLS_INF && B_c == FP_CLS_INF)
+                ret |= EFLAG_VXISI;
+        FP_ADD_D(R, T, B);
+#ifdef DEBUG
+        printk("D: %ld %lu %lu %ld (%ld)\n", R_s, R_f1, R_f0, R_e, R_c);
+#endif
+        return (ret | __FP_PACK_DS(frD, R));
+}
diff --git a/arch/ppc/math-emu/fmul.c b/arch/ppc/math-emu/fmul.c
new file mode 100644
index 000000000000..66c7e79aae2e
--- /dev/null
+++ b/arch/ppc/math-emu/fmul.c
@@ -0,0 +1,42 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+#include "soft-fp.h"
+#include "double.h"
+int
+fmul(void *frD, void *frA, void *frB)
+{
+        FP_DECL_D(A);
+        FP_DECL_D(B);
+        FP_DECL_D(R);
+        int ret = 0;
+#ifdef DEBUG
+        printk("%s: %p %p %p\n", __FUNCTION__, frD, frA, frB);
+#endif
+        __FP_UNPACK_D(A, frA);
+        __FP_UNPACK_D(B, frB);
+#ifdef DEBUG
+        printk("A: %ld %lu %lu %ld (%ld) [%08lx.%08lx %lx]\n",
+               A_s, A_f1, A_f0, A_e, A_c, A_f1, A_f0, A_e + 1023);
+        printk("B: %ld %lu %lu %ld (%ld) [%08lx.%08lx %lx]\n",
+               B_s, B_f1, B_f0, B_e, B_c, B_f1, B_f0, B_e + 1023);
+#endif
+        if ((A_c == FP_CLS_INF && B_c == FP_CLS_ZERO) ||
+            (A_c == FP_CLS_ZERO && B_c == FP_CLS_INF))
+                ret |= EFLAG_VXIMZ;
+        FP_MUL_D(R, A, B);
+#ifdef DEBUG
+        printk("D: %ld %lu %lu %ld (%ld) [%08lx.%08lx %lx]\n",
+               R_s, R_f1, R_f0, R_e, R_c, R_f1, R_f0, R_e + 1023);
+#endif
+        return (ret | __FP_PACK_D(frD, R));
+}
diff --git a/arch/ppc/math-emu/fmuls.c b/arch/ppc/math-emu/fmuls.c
new file mode 100644
index 000000000000..26bc4278271c
--- /dev/null
+++ b/arch/ppc/math-emu/fmuls.c
@@ -0,0 +1,43 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+#include "soft-fp.h"
+#include "double.h"
+#include "single.h"
+int
+fmuls(void *frD, void *frA, void *frB)
+{
+        FP_DECL_D(A);
+        FP_DECL_D(B);
+        FP_DECL_D(R);
+        int ret = 0;
+#ifdef DEBUG
+        printk("%s: %p %p %p\n", __FUNCTION__, frD, frA, frB);
+#endif
+        __FP_UNPACK_D(A, frA);
+        __FP_UNPACK_D(B, frB);
+#ifdef DEBUG
+        printk("A: %ld %lu %lu %ld (%ld) [%08lx.%08lx %lx]\n",
+               A_s, A_f1, A_f0, A_e, A_c, A_f1, A_f0, A_e + 1023);
+        printk("B: %ld %lu %lu %ld (%ld) [%08lx.%08lx %lx]\n",
+               B_s, B_f1, B_f0, B_e, B_c, B_f1, B_f0, B_e + 1023);
+#endif
+        if ((A_c == FP_CLS_INF && B_c == FP_CLS_ZERO) ||
+            (A_c == FP_CLS_ZERO && B_c == FP_CLS_INF))
+                ret |= EFLAG_VXIMZ;
+        FP_MUL_D(R, A, B);
+#ifdef DEBUG
+        printk("D: %ld %lu %lu %ld (%ld) [%08lx.%08lx %lx]\n",
+               R_s, R_f1, R_f0, R_e, R_c, R_f1, R_f0, R_e + 1023);
+#endif
+        return (ret | __FP_PACK_DS(frD, R));
+}
diff --git a/arch/ppc/math-emu/fnabs.c b/arch/ppc/math-emu/fnabs.c
new file mode 100644
index 000000000000..c6b913d179e0
--- /dev/null
+++ b/arch/ppc/math-emu/fnabs.c
@@ -0,0 +1,18 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+int
+fnabs(u32 *frD, u32 *frB)
+{
+        frD[0] = frB[0] | 0x80000000;
+        frD[1] = frB[1];
+#ifdef DEBUG
+        printk("%s: D %p, B %p: ", __FUNCTION__, frD, frB);
+        dump_double(frD);
+        printk("\n");
+#endif
+        return 0;
+}
diff --git a/arch/ppc/math-emu/fneg.c b/arch/ppc/math-emu/fneg.c
new file mode 100644
index 000000000000..fe9a98deff69
--- /dev/null
+++ b/arch/ppc/math-emu/fneg.c
@@ -0,0 +1,18 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+int
+fneg(u32 *frD, u32 *frB)
+{
+        frD[0] = frB[0] ^ 0x80000000;
+        frD[1] = frB[1];
+#ifdef DEBUG
+        printk("%s: D %p, B %p: ", __FUNCTION__, frD, frB);
+        dump_double(frD);
+        printk("\n");
+#endif
+        return 0;
+}
diff --git a/arch/ppc/math-emu/fnmadd.c b/arch/ppc/math-emu/fnmadd.c
new file mode 100644
index 000000000000..7f312276d920
--- /dev/null
+++ b/arch/ppc/math-emu/fnmadd.c
@@ -0,0 +1,51 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+#include "soft-fp.h"
+#include "double.h"
+int
+fnmadd(void *frD, void *frA, void *frB, void *frC)
+{
+        FP_DECL_D(R);
+        FP_DECL_D(A);
+        FP_DECL_D(B);
+        FP_DECL_D(C);
+        FP_DECL_D(T);
+        int ret = 0;
+#ifdef DEBUG
+        printk("%s: %p %p %p %p\n", __FUNCTION__, frD, frA, frB, frC);
+#endif
+        __FP_UNPACK_D(A, frA);
+        __FP_UNPACK_D(B, frB);
+        __FP_UNPACK_D(C, frC);
+#ifdef DEBUG
+        printk("A: %ld %lu %lu %ld (%ld)\n", A_s, A_f1, A_f0, A_e, A_c);
+        printk("B: %ld %lu %lu %ld (%ld)\n", B_s, B_f1, B_f0, B_e, B_c);
+        printk("C: %ld %lu %lu %ld (%ld)\n", C_s, C_f1, C_f0, C_e, C_c);
+#endif
+        if ((A_c == FP_CLS_INF && C_c == FP_CLS_ZERO) ||
+            (A_c == FP_CLS_ZERO && C_c == FP_CLS_INF))
+                ret |= EFLAG_VXIMZ;
+        FP_MUL_D(T, A, C);
+        if (T_s != B_s && T_c == FP_CLS_INF && B_c == FP_CLS_INF)
+                ret |= EFLAG_VXISI;
+        FP_ADD_D(R, T, B);
+        if (R_c != FP_CLS_NAN)
+                R_s ^= 1;
+#ifdef DEBUG
+        printk("D: %ld %lu %lu %ld (%ld)\n", R_s, R_f1, R_f0, R_e, R_c);
+#endif
+        return (ret | __FP_PACK_D(frD, R));
+}
diff --git a/arch/ppc/math-emu/fnmadds.c b/arch/ppc/math-emu/fnmadds.c
new file mode 100644
index 000000000000..65454c9c70bc
--- /dev/null
+++ b/arch/ppc/math-emu/fnmadds.c
@@ -0,0 +1,52 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+#include "soft-fp.h"
+#include "double.h"
+#include "single.h"
+int
+fnmadds(void *frD, void *frA, void *frB, void *frC)
+{
+        FP_DECL_D(R);
+        FP_DECL_D(A);
+        FP_DECL_D(B);
+        FP_DECL_D(C);
+        FP_DECL_D(T);
+        int ret = 0;
+#ifdef DEBUG
+        printk("%s: %p %p %p %p\n", __FUNCTION__, frD, frA, frB, frC);
+#endif
+        __FP_UNPACK_D(A, frA);
+        __FP_UNPACK_D(B, frB);
+        __FP_UNPACK_D(C, frC);
+#ifdef DEBUG
+        printk("A: %ld %lu %lu %ld (%ld)\n", A_s, A_f1, A_f0, A_e, A_c);
+        printk("B: %ld %lu %lu %ld (%ld)\n", B_s, B_f1, B_f0, B_e, B_c);
+        printk("C: %ld %lu %lu %ld (%ld)\n", C_s, C_f1, C_f0, C_e, C_c);
+#endif
+        if ((A_c == FP_CLS_INF && C_c == FP_CLS_ZERO) ||
+            (A_c == FP_CLS_ZERO && C_c == FP_CLS_INF))
+                ret |= EFLAG_VXIMZ;
+        FP_MUL_D(T, A, C);
+        if (T_s != B_s && T_c == FP_CLS_INF && B_c == FP_CLS_INF)
+                ret |= EFLAG_VXISI;
+        FP_ADD_D(R, T, B);
+        if (R_c != FP_CLS_NAN)
+                R_s ^= 1;
+#ifdef DEBUG
+        printk("D: %ld %lu %lu %ld (%ld)\n", R_s, R_f1, R_f0, R_e, R_c);
+#endif
+        return (ret | __FP_PACK_DS(frD, R));
+}
diff --git a/arch/ppc/math-emu/fnmsub.c b/arch/ppc/math-emu/fnmsub.c
new file mode 100644
index 000000000000..f1ca7482b5f0
--- /dev/null
+++ b/arch/ppc/math-emu/fnmsub.c
@@ -0,0 +1,54 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+#include "soft-fp.h"
+#include "double.h"
+int
+fnmsub(void *frD, void *frA, void *frB, void *frC)
+{
+        FP_DECL_D(R);
+        FP_DECL_D(A);
+        FP_DECL_D(B);
+        FP_DECL_D(C);
+        FP_DECL_D(T);
+        int ret = 0;
+#ifdef DEBUG
+        printk("%s: %p %p %p %p\n", __FUNCTION__, frD, frA, frB, frC);
+#endif
+        __FP_UNPACK_D(A, frA);
+        __FP_UNPACK_D(B, frB);
+        __FP_UNPACK_D(C, frC);
+#ifdef DEBUG
+        printk("A: %ld %lu %lu %ld (%ld)\n", A_s, A_f1, A_f0, A_e, A_c);
+        printk("B: %ld %lu %lu %ld (%ld)\n", B_s, B_f1, B_f0, B_e, B_c);
+        printk("C: %ld %lu %lu %ld (%ld)\n", C_s, C_f1, C_f0, C_e, C_c);
+#endif
+        if ((A_c == FP_CLS_INF && C_c == FP_CLS_ZERO) ||
+            (A_c == FP_CLS_ZERO && C_c == FP_CLS_INF))
+                ret |= EFLAG_VXIMZ;
+        FP_MUL_D(T, A, C);
+        if (B_c != FP_CLS_NAN)
+                B_s ^= 1;
+        if (T_s != B_s && T_c == FP_CLS_INF && B_c == FP_CLS_INF)
+                ret |= EFLAG_VXISI;
+        FP_ADD_D(R, T, B);
+        if (R_c != FP_CLS_NAN)
+                R_s ^= 1;
+#ifdef DEBUG
+        printk("D: %ld %lu %lu %ld (%ld)\n", R_s, R_f1, R_f0, R_e, R_c);
+#endif
+        return (ret | __FP_PACK_D(frD, R));
+}
diff --git a/arch/ppc/math-emu/fnmsubs.c b/arch/ppc/math-emu/fnmsubs.c
new file mode 100644
index 000000000000..5c9a09a87dc7
--- /dev/null
+++ b/arch/ppc/math-emu/fnmsubs.c
@@ -0,0 +1,55 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+#include "soft-fp.h"
+#include "double.h"
+#include "single.h"
+int
+fnmsubs(void *frD, void *frA, void *frB, void *frC)
+{
+        FP_DECL_D(R);
+        FP_DECL_D(A);
+        FP_DECL_D(B);
+        FP_DECL_D(C);
+        FP_DECL_D(T);
+        int ret = 0;
+#ifdef DEBUG
+        printk("%s: %p %p %p %p\n", __FUNCTION__, frD, frA, frB, frC);
+#endif
+        __FP_UNPACK_D(A, frA);
+        __FP_UNPACK_D(B, frB);
+        __FP_UNPACK_D(C, frC);
+#ifdef DEBUG
+        printk("A: %ld %lu %lu %ld (%ld)\n", A_s, A_f1, A_f0, A_e, A_c);
+        printk("B: %ld %lu %lu %ld (%ld)\n", B_s, B_f1, B_f0, B_e, B_c);
+        printk("C: %ld %lu %lu %ld (%ld)\n", C_s, C_f1, C_f0, C_e, C_c);
+#endif
+        if ((A_c == FP_CLS_INF && C_c == FP_CLS_ZERO) ||
+            (A_c == FP_CLS_ZERO && C_c == FP_CLS_INF))
+                ret |= EFLAG_VXIMZ;
+        FP_MUL_D(T, A, C);
+        if (B_c != FP_CLS_NAN)
+                B_s ^= 1;
+        if (T_s != B_s && T_c == FP_CLS_INF && B_c == FP_CLS_INF)
+                ret |= EFLAG_VXISI;
+        FP_ADD_D(R, T, B);
+        if (R_c != FP_CLS_NAN)
+                R_s ^= 1;
+#ifdef DEBUG
+        printk("D: %ld %lu %lu %ld (%ld)\n", R_s, R_f1, R_f0, R_e, R_c);
+#endif
+        return (ret | __FP_PACK_DS(frD, R));
+}
diff --git a/arch/ppc/math-emu/fres.c b/arch/ppc/math-emu/fres.c
new file mode 100644
index 000000000000..ec11e46d20af
--- /dev/null
+++ b/arch/ppc/math-emu/fres.c
@@ -0,0 +1,12 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+int
+fres(void *frD, void *frB)
+{
+#ifdef DEBUG
+        printk("%s: %p %p\n", __FUNCTION__, frD, frB);
+#endif
+        return -ENOSYS;
+}
diff --git a/arch/ppc/math-emu/frsp.c b/arch/ppc/math-emu/frsp.c
new file mode 100644
index 000000000000..d879b2a3d0c9
--- /dev/null
+++ b/arch/ppc/math-emu/frsp.c
@@ -0,0 +1,25 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+#include "soft-fp.h"
+#include "double.h"
+#include "single.h"
+int
+frsp(void *frD, void *frB)
+{
+        FP_DECL_D(B);
+#ifdef DEBUG
+        printk("%s: D %p, B %p\n", __FUNCTION__, frD, frB);
+#endif
+        __FP_UNPACK_D(B, frB);
+#ifdef DEBUG
+        printk("B: %ld %lu %lu %ld (%ld)\n", B_s, B_f1, B_f0, B_e, B_c);
+#endif
+        return __FP_PACK_DS(frD, B);
+}
diff --git a/arch/ppc/math-emu/frsqrte.c b/arch/ppc/math-emu/frsqrte.c
new file mode 100644
index 000000000000..a11ae1829850
--- /dev/null
+++ b/arch/ppc/math-emu/frsqrte.c
@@ -0,0 +1,12 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+int
+frsqrte(void *frD, void *frB)
+{
+#ifdef DEBUG
+        printk("%s: %p %p\n", __FUNCTION__, frD, frB);
+#endif
+        return 0;
+}
diff --git a/arch/ppc/math-emu/fsel.c b/arch/ppc/math-emu/fsel.c
new file mode 100644
index 000000000000..e36e6e72819a
--- /dev/null
+++ b/arch/ppc/math-emu/fsel.c
@@ -0,0 +1,38 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+#include "soft-fp.h"
+#include "double.h"
+int
+fsel(u32 *frD, void *frA, u32 *frB, u32 *frC)
+{
+        FP_DECL_D(A);
+#ifdef DEBUG
+        printk("%s: %p %p %p %p\n", __FUNCTION__, frD, frA, frB, frC);
+#endif
+        __FP_UNPACK_D(A, frA);
+#ifdef DEBUG
+        printk("A: %ld %lu %lu %ld (%ld)\n", A_s, A_f1, A_f0, A_e, A_c);
+        printk("B: %08x %08x\n", frB[0], frB[1]);
+        printk("C: %08x %08x\n", frC[0], frC[1]);
+#endif
+        if (A_c == FP_CLS_NAN || (A_c != FP_CLS_ZERO && A_s)) {
+                frD[0] = frB[0];
+                frD[1] = frB[1];
+        } else {
+                frD[0] = frC[0];
+                frD[1] = frC[1];
+        }
+#ifdef DEBUG
+        printk("D: %08x.%08x\n", frD[0], frD[1]);
+#endif
+        return 0;
+}
diff --git a/arch/ppc/math-emu/fsqrt.c b/arch/ppc/math-emu/fsqrt.c
new file mode 100644
index 000000000000..6f8319f64a8a
--- /dev/null
+++ b/arch/ppc/math-emu/fsqrt.c
@@ -0,0 +1,37 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+#include "soft-fp.h"
+#include "double.h"
+int
+fsqrt(void *frD, void *frB)
+{
+        FP_DECL_D(B);
+        FP_DECL_D(R);
+        int ret = 0;
+#ifdef DEBUG
+        printk("%s: %p %p %p %p\n", __FUNCTION__, frD, frB);
+#endif
+        __FP_UNPACK_D(B, frB);
+#ifdef DEBUG
+        printk("B: %ld %lu %lu %ld (%ld)\n", B_s, B_f1, B_f0, B_e, B_c);
+#endif
+        if (B_s && B_c != FP_CLS_ZERO)
+                ret |= EFLAG_VXSQRT;
+        if (B_c == FP_CLS_NAN)
+                ret |= EFLAG_VXSNAN;
+        FP_SQRT_D(R, B);
+#ifdef DEBUG
+        printk("R: %ld %lu %lu %ld (%ld)\n", R_s, R_f1, R_f0, R_e, R_c);
+#endif
+        return (ret | __FP_PACK_D(frD, R));
+}
diff --git a/arch/ppc/math-emu/fsqrts.c b/arch/ppc/math-emu/fsqrts.c
new file mode 100644
index 000000000000..3b2b1cf55c12
--- /dev/null
+++ b/arch/ppc/math-emu/fsqrts.c
@@ -0,0 +1,38 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+#include "soft-fp.h"
+#include "double.h"
+#include "single.h"
+int
+fsqrts(void *frD, void *frB)
+{
+        FP_DECL_D(B);
+        FP_DECL_D(R);
+        int ret = 0;
+#ifdef DEBUG
+        printk("%s: %p %p %p %p\n", __FUNCTION__, frD, frB);
+#endif
+        __FP_UNPACK_D(B, frB);
+#ifdef DEBUG
+        printk("B: %ld %lu %lu %ld (%ld)\n", B_s, B_f1, B_f0, B_e, B_c);
+#endif
+        if (B_s && B_c != FP_CLS_ZERO)
+                ret |= EFLAG_VXSQRT;
+        if (B_c == FP_CLS_NAN)
+                ret |= EFLAG_VXSNAN;
+        FP_SQRT_D(R, B);
+#ifdef DEBUG
+        printk("R: %ld %lu %lu %ld (%ld)\n", R_s, R_f1, R_f0, R_e, R_c);
+#endif
+        return (ret | __FP_PACK_DS(frD, R));
+}
diff --git a/arch/ppc/math-emu/fsub.c b/arch/ppc/math-emu/fsub.c
new file mode 100644
index 000000000000..956679042bb2
--- /dev/null
+++ b/arch/ppc/math-emu/fsub.c
@@ -0,0 +1,41 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+#include "soft-fp.h"
+#include "double.h"
+int
+fsub(void *frD, void *frA, void *frB)
+{
+        FP_DECL_D(A);
+        FP_DECL_D(B);
+        FP_DECL_D(R);
+        int ret = 0;
+#ifdef DEBUG
+        printk("%s: %p %p %p\n", __FUNCTION__, frD, frA, frB);
+#endif
+        __FP_UNPACK_D(A, frA);
+        __FP_UNPACK_D(B, frB);
+#ifdef DEBUG
+        printk("A: %ld %lu %lu %ld (%ld)\n", A_s, A_f1, A_f0, A_e, A_c);
+        printk("B: %ld %lu %lu %ld (%ld)\n", B_s, B_f1, B_f0, B_e, B_c);
+#endif
+        if (B_c != FP_CLS_NAN)
+                B_s ^= 1;
+        if (A_s != B_s && A_c == FP_CLS_INF && B_c == FP_CLS_INF)
+                ret |= EFLAG_VXISI;
+        FP_ADD_D(R, A, B);
+#ifdef DEBUG
+        printk("D: %ld %lu %lu %ld (%ld)\n", R_s, R_f1, R_f0, R_e, R_c);
+#endif
+        return (ret | __FP_PACK_D(frD, R));
+}
diff --git a/arch/ppc/math-emu/fsubs.c b/arch/ppc/math-emu/fsubs.c
new file mode 100644
index 000000000000..3428117dfe8c
--- /dev/null
+++ b/arch/ppc/math-emu/fsubs.c
@@ -0,0 +1,42 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+#include "soft-fp.h"
+#include "double.h"
+#include "single.h"
+int
+fsubs(void *frD, void *frA, void *frB)
+{
+        FP_DECL_D(A);
+        FP_DECL_D(B);
+        FP_DECL_D(R);
+        int ret = 0;
+#ifdef DEBUG
+        printk("%s: %p %p %p\n", __FUNCTION__, frD, frA, frB);
+#endif
+        __FP_UNPACK_D(A, frA);
+        __FP_UNPACK_D(B, frB);
+#ifdef DEBUG
+        printk("A: %ld %lu %lu %ld (%ld)\n", A_s, A_f1, A_f0, A_e, A_c);
+        printk("B: %ld %lu %lu %ld (%ld)\n", B_s, B_f1, B_f0, B_e, B_c);
+#endif
+        if (B_c != FP_CLS_NAN)
+                B_s ^= 1;
+        if (A_s != B_s && A_c == FP_CLS_INF && B_c == FP_CLS_INF)
+                ret |= EFLAG_VXISI;
+        FP_ADD_D(R, A, B);
+#ifdef DEBUG
+        printk("D: %ld %lu %lu %ld (%ld)\n", R_s, R_f1, R_f0, R_e, R_c);
+#endif
+        return (ret | __FP_PACK_DS(frD, R));
+}
diff --git a/arch/ppc/math-emu/lfd.c b/arch/ppc/math-emu/lfd.c
new file mode 100644
index 000000000000..7d38101c329b
--- /dev/null
+++ b/arch/ppc/math-emu/lfd.c
@@ -0,0 +1,19 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+#include "sfp-machine.h"
+#include "double.h"
+int
+lfd(void *frD, void *ea)
+{
+        if (copy_from_user(frD, ea, sizeof(double)))
+                return -EFAULT;
+#ifdef DEBUG
+        printk("%s: D %p, ea %p: ", __FUNCTION__, frD, ea);
+        dump_double(frD);
+        printk("\n");
+#endif
+        return 0;
+}
diff --git a/arch/ppc/math-emu/lfs.c b/arch/ppc/math-emu/lfs.c
new file mode 100644
index 000000000000..c86dee3d7655
--- /dev/null
+++ b/arch/ppc/math-emu/lfs.c
@@ -0,0 +1,37 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+#include "soft-fp.h"
+#include "double.h"
+#include "single.h"
+int
+lfs(void *frD, void *ea)
+{
+        FP_DECL_D(R);
+        FP_DECL_S(A);
+        float f;
+#ifdef DEBUG
+        printk("%s: D %p, ea %p\n", __FUNCTION__, frD, ea);
+#endif
+        if (copy_from_user(&f, ea, sizeof(float)))
+                return -EFAULT;
+        __FP_UNPACK_S(A, &f);
+#ifdef DEBUG
+        printk("A: %ld %lu %ld (%ld) [%08lx]\n", A_s, A_f, A_e, A_c,
+               *(unsigned long *)&f);
+#endif
+        FP_CONV(D, S, 2, 1, R, A);
+#ifdef DEBUG
+        printk("R: %ld %lu %lu %ld (%ld)\n", R_s, R_f1, R_f0, R_e, R_c);
+#endif
+        return __FP_PACK_D(frD, R);
+}
diff --git a/arch/ppc/math-emu/math.c b/arch/ppc/math-emu/math.c
new file mode 100644
index 000000000000..b7dff53a7103
--- /dev/null
+++ b/arch/ppc/math-emu/math.c
@@ -0,0 +1,485 @@
+/*
+ * arch/ppc/math-emu/math.c
+ *
+ * Copyright (C) 1999  Eddie C. Dost  (ecd@atecom.com)
+ */
+#include <linux/config.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <asm/uaccess.h>
+#include <asm/reg.h>
+#include "sfp-machine.h"
+#include "double.h"
+#define FLOATFUNC(x)    extern int x(void *, void *, void *, void *)
+FLOATFUNC(fadd);
+FLOATFUNC(fadds);
+FLOATFUNC(fdiv);
+FLOATFUNC(fdivs);
+FLOATFUNC(fmul);
+FLOATFUNC(fmuls);
+FLOATFUNC(fsub);
+FLOATFUNC(fsubs);
+FLOATFUNC(fmadd);
+FLOATFUNC(fmadds);
+FLOATFUNC(fmsub);
+FLOATFUNC(fmsubs);
+FLOATFUNC(fnmadd);
+FLOATFUNC(fnmadds);
+FLOATFUNC(fnmsub);
+FLOATFUNC(fnmsubs);
+FLOATFUNC(fctiw);
+FLOATFUNC(fctiwz);
+FLOATFUNC(frsp);
+FLOATFUNC(fcmpo);
+FLOATFUNC(fcmpu);
+FLOATFUNC(mcrfs);
+FLOATFUNC(mffs);
+FLOATFUNC(mtfsb0);
+FLOATFUNC(mtfsb1);
+FLOATFUNC(mtfsf);
+FLOATFUNC(mtfsfi);
+FLOATFUNC(lfd);
+FLOATFUNC(lfs);
+FLOATFUNC(stfd);
+FLOATFUNC(stfs);
+FLOATFUNC(stfiwx);
+FLOATFUNC(fabs);
+FLOATFUNC(fmr);
+FLOATFUNC(fnabs);
+FLOATFUNC(fneg);
+/* Optional */
+FLOATFUNC(fres);
+FLOATFUNC(frsqrte);
+FLOATFUNC(fsel);
+FLOATFUNC(fsqrt);
+FLOATFUNC(fsqrts);
+#define OP31            0x1f            /*   31 */
+#define LFS             0x30            /*   48 */
+#define LFSU            0x31            /*   49 */
+#define LFD             0x32            /*   50 */
+#define LFDU            0x33            /*   51 */
+#define STFS            0x34            /*   52 */
+#define STFSU           0x35            /*   53 */
+#define STFD            0x36            /*   54 */
+#define STFDU           0x37            /*   55 */
+#define OP59            0x3b            /*   59 */
+#define OP63            0x3f            /*   63 */
+/* Opcode 31: */
+/* X-Form: */
+#define LFSX            0x217           /*  535 */
+#define LFSUX           0x237           /*  567 */
+#define LFDX            0x257           /*  599 */
+#define LFDUX           0x277           /*  631 */
+#define STFSX           0x297           /*  663 */
+#define STFSUX          0x2b7           /*  695 */
+#define STFDX           0x2d7           /*  727 */
+#define STFDUX          0x2f7           /*  759 */
+#define STFIWX          0x3d7           /*  983 */
+/* Opcode 59: */
+/* A-Form: */
+#define FDIVS           0x012           /*   18 */
+#define FSUBS           0x014           /*   20 */
+#define FADDS           0x015           /*   21 */
+#define FSQRTS          0x016           /*   22 */
+#define FRES            0x018           /*   24 */
+#define FMULS           0x019           /*   25 */
+#define FMSUBS          0x01c           /*   28 */
+#define FMADDS          0x01d           /*   29 */
+#define FNMSUBS         0x01e           /*   30 */
+#define FNMADDS         0x01f           /*   31 */
+/* Opcode 63: */
+/* A-Form: */
+#define FDIV            0x012           /*   18 */
+#define FSUB            0x014           /*   20 */
+#define FADD            0x015           /*   21 */
+#define FSQRT           0x016           /*   22 */
+#define FSEL            0x017           /*   23 */
+#define FMUL            0x019           /*   25 */
+#define FRSQRTE         0x01a           /*   26 */
+#define FMSUB           0x01c           /*   28 */
+#define FMADD           0x01d           /*   29 */
+#define FNMSUB          0x01e           /*   30 */
+#define FNMADD          0x01f           /*   31 */
+/* X-Form: */
+#define FCMPU           0x000           /*    0 */
+#define FRSP            0x00c           /*   12 */
+#define FCTIW           0x00e           /*   14 */
+#define FCTIWZ          0x00f           /*   15 */
+#define FCMPO           0x020           /*   32 */
+#define MTFSB1          0x026           /*   38 */
+#define FNEG            0x028           /*   40 */
+#define MCRFS           0x040           /*   64 */
+#define MTFSB0          0x046           /*   70 */
+#define FMR             0x048           /*   72 */
+#define MTFSFI          0x086           /*  134 */
+#define FNABS           0x088           /*  136 */
+#define FABS            0x108           /*  264 */
+#define MFFS            0x247           /*  583 */
+#define MTFSF           0x2c7           /*  711 */
+#define AB      2
+#define AC      3
+#define ABC     4
+#define D       5
+#define DU      6
+#define X       7
+#define XA      8
+#define XB      9
+#define XCR     11
+#define XCRB    12
+#define XCRI    13
+#define XCRL    16
+#define XE      14
+#define XEU     15
+#define XFLB    10
+#ifdef CONFIG_MATH_EMULATION
+static int
+record_exception(struct pt_regs *regs, int eflag)
+{
+        u32 fpscr;
+        fpscr = __FPU_FPSCR;
+        if (eflag) {
+                fpscr |= FPSCR_FX;
+                if (eflag & EFLAG_OVERFLOW)
+                        fpscr |= FPSCR_OX;
+                if (eflag & EFLAG_UNDERFLOW)
+                        fpscr |= FPSCR_UX;
+                if (eflag & EFLAG_DIVZERO)
+                        fpscr |= FPSCR_ZX;
+                if (eflag & EFLAG_INEXACT)
+                        fpscr |= FPSCR_XX;
+                if (eflag & EFLAG_VXSNAN)
+                        fpscr |= FPSCR_VXSNAN;
+                if (eflag & EFLAG_VXISI)
+                        fpscr |= FPSCR_VXISI;
+                if (eflag & EFLAG_VXIDI)
+                        fpscr |= FPSCR_VXIDI;
+                if (eflag & EFLAG_VXZDZ)
+                        fpscr |= FPSCR_VXZDZ;
+                if (eflag & EFLAG_VXIMZ)
+                        fpscr |= FPSCR_VXIMZ;
+                if (eflag & EFLAG_VXVC)
+                        fpscr |= FPSCR_VXVC;
+                if (eflag & EFLAG_VXSOFT)
+                        fpscr |= FPSCR_VXSOFT;
+                if (eflag & EFLAG_VXSQRT)
+                        fpscr |= FPSCR_VXSQRT;
+                if (eflag & EFLAG_VXCVI)
+                        fpscr |= FPSCR_VXCVI;
+        }
+        fpscr &= ~(FPSCR_VX);
+        if (fpscr & (FPSCR_VXSNAN | FPSCR_VXISI | FPSCR_VXIDI |
+                     FPSCR_VXZDZ | FPSCR_VXIMZ | FPSCR_VXVC |
+                     FPSCR_VXSOFT | FPSCR_VXSQRT | FPSCR_VXCVI))
+                fpscr |= FPSCR_VX;
+        fpscr &= ~(FPSCR_FEX);
+        if (((fpscr & FPSCR_VX) && (fpscr & FPSCR_VE)) ||
+            ((fpscr & FPSCR_OX) && (fpscr & FPSCR_OE)) ||
+            ((fpscr & FPSCR_UX) && (fpscr & FPSCR_UE)) ||
+            ((fpscr & FPSCR_ZX) && (fpscr & FPSCR_ZE)) ||
+            ((fpscr & FPSCR_XX) && (fpscr & FPSCR_XE)))
+                fpscr |= FPSCR_FEX;
+        __FPU_FPSCR = fpscr;
+        return (fpscr & FPSCR_FEX) ? 1 : 0;
+}
+#endif /* CONFIG_MATH_EMULATION */
+int
+do_mathemu(struct pt_regs *regs)
+{
+        void *op0 = 0, *op1 = 0, *op2 = 0, *op3 = 0;
+        unsigned long pc = regs->nip;
+        signed short sdisp;
+        u32 insn = 0;
+        int idx = 0;
+#ifdef CONFIG_MATH_EMULATION
+        int (*func)(void *, void *, void *, void *);
+        int type = 0;
+        int eflag, trap;
+#endif
+        if (get_user(insn, (u32 *)pc))
+                return -EFAULT;
+#ifndef CONFIG_MATH_EMULATION
+        switch (insn >> 26) {
+        case LFD:
+                idx = (insn >> 16) & 0x1f;
+                sdisp = (insn & 0xffff);
+                op0 = (void *)&current->thread.fpr[(insn >> 21) & 0x1f];
+                op1 = (void *)((idx ? regs->gpr[idx] : 0) + sdisp);
+                lfd(op0, op1, op2, op3);
+                break;
+        case LFDU:
+                idx = (insn >> 16) & 0x1f;
+                sdisp = (insn & 0xffff);
+                op0 = (void *)&current->thread.fpr[(insn >> 21) & 0x1f];
+                op1 = (void *)((idx ? regs->gpr[idx] : 0) + sdisp);
+                lfd(op0, op1, op2, op3);
+                regs->gpr[idx] = (unsigned long)op1;
+                break;
+        case STFD:
+                idx = (insn >> 16) & 0x1f;
+                sdisp = (insn & 0xffff);
+                op0 = (void *)&current->thread.fpr[(insn >> 21) & 0x1f];
+                op1 = (void *)((idx ? regs->gpr[idx] : 0) + sdisp);
+                stfd(op0, op1, op2, op3);
+                break;
+        case STFDU:
+                idx = (insn >> 16) & 0x1f;
+                sdisp = (insn & 0xffff);
+                op0 = (void *)&current->thread.fpr[(insn >> 21) & 0x1f];
+                op1 = (void *)((idx ? regs->gpr[idx] : 0) + sdisp);
+                stfd(op0, op1, op2, op3);
+                regs->gpr[idx] = (unsigned long)op1;
+                break;
+        case OP63:
+                op0 = (void *)&current->thread.fpr[(insn >> 21) & 0x1f];
+                op1 = (void *)&current->thread.fpr[(insn >> 11) & 0x1f];
+                fmr(op0, op1, op2, op3);
+                break;
+        default:
+                goto illegal;
+        }
+#else /* CONFIG_MATH_EMULATION */
+        switch (insn >> 26) {
+        case LFS:       func = lfs;     type = D;       break;
+        case LFSU:      func = lfs;     type = DU;      break;
+        case LFD:       func = lfd;     type = D;       break;
+        case LFDU:      func = lfd;     type = DU;      break;
+        case STFS:      func = stfs;    type = D;       break;
+        case STFSU:     func = stfs;    type = DU;      break;
+        case STFD:      func = stfd;    type = D;       break;
+        case STFDU:     func = stfd;    type = DU;      break;
+        case OP31:
+                switch ((insn >> 1) & 0x3ff) {
+                case LFSX:      func = lfs;     type = XE;      break;
+                case LFSUX:     func = lfs;     type = XEU;     break;
+                case LFDX:      func = lfd;     type = XE;      break;
+                case LFDUX:     func = lfd;     type = XEU;     break;
+                case STFSX:     func = stfs;    type = XE;      break;
+                case STFSUX:    func = stfs;    type = XEU;     break;
+                case STFDX:     func = stfd;    type = XE;      break;
+                case STFDUX:    func = stfd;    type = XEU;     break;
+                case STFIWX:    func = stfiwx;  type = XE;      break;
+                default:
+                        goto illegal;
+                }
+                break;
+        case OP59:
+                switch ((insn >> 1) & 0x1f) {
+                case FDIVS:     func = fdivs;   type = AB;      break;
+                case FSUBS:     func = fsubs;   type = AB;      break;
+                case FADDS:     func = fadds;   type = AB;      break;
+                case FSQRTS:    func = fsqrts;  type = AB;      break;
+                case FRES:      func = fres;    type = AB;      break;
+                case FMULS:     func = fmuls;   type = AC;      break;
+                case FMSUBS:    func = fmsubs;  type = ABC;     break;
+                case FMADDS:    func = fmadds;  type = ABC;     break;
+                case FNMSUBS:   func = fnmsubs; type = ABC;     break;
+                case FNMADDS:   func = fnmadds; type = ABC;     break;
+                default:
+                        goto illegal;
+                }
+                break;
+        case OP63:
+                if (insn & 0x20) {
+                        switch ((insn >> 1) & 0x1f) {
+                        case FDIV:      func = fdiv;    type = AB;      break;
+                        case FSUB:      func = fsub;    type = AB;      break;
+                        case FADD:      func = fadd;    type = AB;      break;
+                        case FSQRT:     func = fsqrt;   type = AB;      break;
+                        case FSEL:      func = fsel;    type = ABC;     break;
+                        case FMUL:      func = fmul;    type = AC;      break;
+                        case FRSQRTE:   func = frsqrte; type = AB;      break;
+                        case FMSUB:     func = fmsub;   type = ABC;     break;
+                        case FMADD:     func = fmadd;   type = ABC;     break;
+                        case FNMSUB:    func = fnmsub;  type = ABC;     break;
+                        case FNMADD:    func = fnmadd;  type = ABC;     break;
+                        default:
+                                goto illegal;
+                        }
+                        break;
+                }
+                switch ((insn >> 1) & 0x3ff) {
+                case FCMPU:     func = fcmpu;   type = XCR;     break;
+                case FRSP:      func = frsp;    type = XB;      break;
+                case FCTIW:     func = fctiw;   type = XB;      break;
+                case FCTIWZ:    func = fctiwz;  type = XB;      break;
+                case FCMPO:     func = fcmpo;   type = XCR;     break;
+                case MTFSB1:    func = mtfsb1;  type = XCRB;    break;
+                case FNEG:      func = fneg;    type = XB;      break;
+                case MCRFS:     func = mcrfs;   type = XCRL;    break;
+                case MTFSB0:    func = mtfsb0;  type = XCRB;    break;
+                case FMR:       func = fmr;     type = XB;      break;
+                case MTFSFI:    func = mtfsfi;  type = XCRI;    break;
+                case FNABS:     func = fnabs;   type = XB;      break;
+                case FABS:      func = fabs;    type = XB;      break;
+                case MFFS:      func = mffs;    type = X;       break;
+                case MTFSF:     func = mtfsf;   type = XFLB;    break;
+                default:
+                        goto illegal;
+                }
+                break;
+        default:
+                goto illegal;
+        }
+        switch (type) {
+        case AB:
+                op0 = (void *)&current->thread.fpr[(insn >> 21) & 0x1f];
+                op1 = (void *)&current->thread.fpr[(insn >> 16) & 0x1f];
+                op2 = (void *)&current->thread.fpr[(insn >> 11) & 0x1f];
+                break;
+        case AC:
+                op0 = (void *)&current->thread.fpr[(insn >> 21) & 0x1f];
+                op1 = (void *)&current->thread.fpr[(insn >> 16) & 0x1f];
+                op2 = (void *)&current->thread.fpr[(insn >>  6) & 0x1f];
+                break;
+        case ABC:
+                op0 = (void *)&current->thread.fpr[(insn >> 21) & 0x1f];
+                op1 = (void *)&current->thread.fpr[(insn >> 16) & 0x1f];
+                op2 = (void *)&current->thread.fpr[(insn >> 11) & 0x1f];
+                op3 = (void *)&current->thread.fpr[(insn >>  6) & 0x1f];
+                break;
+        case D:
+                idx = (insn >> 16) & 0x1f;
+                sdisp = (insn & 0xffff);
+                op0 = (void *)&current->thread.fpr[(insn >> 21) & 0x1f];
+                op1 = (void *)((idx ? regs->gpr[idx] : 0) + sdisp);
+                break;
+        case DU:
+                idx = (insn >> 16) & 0x1f;
+                if (!idx)
+                        goto illegal;
+                sdisp = (insn & 0xffff);
+                op0 = (void *)&current->thread.fpr[(insn >> 21) & 0x1f];
+                op1 = (void *)(regs->gpr[idx] + sdisp);
+                break;
+        case X:
+                op0 = (void *)&current->thread.fpr[(insn >> 21) & 0x1f];
+                break;
+        case XA:
+                op0 = (void *)&current->thread.fpr[(insn >> 21) & 0x1f];
+                op1 = (void *)&current->thread.fpr[(insn >> 16) & 0x1f];
+                break;
+        case XB:
+                op0 = (void *)&current->thread.fpr[(insn >> 21) & 0x1f];
+                op1 = (void *)&current->thread.fpr[(insn >> 11) & 0x1f];
+                break;
+        case XE:
+                idx = (insn >> 16) & 0x1f;
+                if (!idx)
+                        goto illegal;
+                op0 = (void *)&current->thread.fpr[(insn >> 21) & 0x1f];
+                op1 = (void *)(regs->gpr[idx] + regs->gpr[(insn >> 11) & 0x1f]);
+                break;
+        case XEU:
+                idx = (insn >> 16) & 0x1f;
+                op0 = (void *)&current->thread.fpr[(insn >> 21) & 0x1f];
+                op1 = (void *)((idx ? regs->gpr[idx] : 0)
+                                + regs->gpr[(insn >> 11) & 0x1f]);
+                break;
+        case XCR:
+                op0 = (void *)&regs->ccr;
+                op1 = (void *)((insn >> 23) & 0x7);
+                op2 = (void *)&current->thread.fpr[(insn >> 16) & 0x1f];
+                op3 = (void *)&current->thread.fpr[(insn >> 11) & 0x1f];
+                break;
+        case XCRL:
+                op0 = (void *)&regs->ccr;
+                op1 = (void *)((insn >> 23) & 0x7);
+                op2 = (void *)((insn >> 18) & 0x7);
+                break;
+        case XCRB:
+                op0 = (void *)((insn >> 21) & 0x1f);
+                break;
+        case XCRI:
+                op0 = (void *)((insn >> 23) & 0x7);
+                op1 = (void *)((insn >> 12) & 0xf);
+                break;
+        case XFLB:
+                op0 = (void *)((insn >> 17) & 0xff);
+                op1 = (void *)&current->thread.fpr[(insn >> 11) & 0x1f];
+                break;
+        default:
+                goto illegal;
+        }
+        eflag = func(op0, op1, op2, op3);
+        if (insn & 1) {
+                regs->ccr &= ~(0x0f000000);
+                regs->ccr |= (__FPU_FPSCR >> 4) & 0x0f000000;
+        }
+        trap = record_exception(regs, eflag);
+        if (trap)
+                return 1;
+        switch (type) {
+        case DU:
+        case XEU:
+                regs->gpr[idx] = (unsigned long)op1;
+                break;
+        default:
+                break;
+        }
+#endif /* CONFIG_MATH_EMULATION */
+        regs->nip += 4;
+        return 0;
+illegal:
+        return -ENOSYS;
+}
diff --git a/arch/ppc/math-emu/mcrfs.c b/arch/ppc/math-emu/mcrfs.c
new file mode 100644
index 000000000000..106dd912914b
--- /dev/null
+++ b/arch/ppc/math-emu/mcrfs.c
@@ -0,0 +1,31 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+#include "soft-fp.h"
+int
+mcrfs(u32 *ccr, u32 crfD, u32 crfS)
+{
+        u32 value, clear;
+#ifdef DEBUG
+        printk("%s: %p (%08x) %d %d\n", __FUNCTION__, ccr, *ccr, crfD, crfS);
+#endif
+        clear = 15 << ((7 - crfS) << 2);
+        if (!crfS)
+                clear = 0x90000000;
+        value = (__FPU_FPSCR >> ((7 - crfS) << 2)) & 15;
+        __FPU_FPSCR &= ~(clear);
+        *ccr &= ~(15 << ((7 - crfD) << 2));
+        *ccr |= (value << ((7 - crfD) << 2));
+#ifdef DEBUG
+        printk("CR: %08x\n", __FUNCTION__, *ccr);
+#endif
+        return 0;
+}
diff --git a/arch/ppc/math-emu/mffs.c b/arch/ppc/math-emu/mffs.c
new file mode 100644
index 000000000000..f477c9170e75
--- /dev/null
+++ b/arch/ppc/math-emu/mffs.c
@@ -0,0 +1,17 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+#include "soft-fp.h"
+int
+mffs(u32 *frD)
+{
+        frD[1] = __FPU_FPSCR;
+#ifdef DEBUG
+        printk("%s: frD %p: %08x.%08x\n", __FUNCTION__, frD, frD[0], frD[1]);
+#endif
+        return 0;
+}
diff --git a/arch/ppc/math-emu/mtfsb0.c b/arch/ppc/math-emu/mtfsb0.c
new file mode 100644
index 000000000000..99bfd80f4af3
--- /dev/null
+++ b/arch/ppc/math-emu/mtfsb0.c
@@ -0,0 +1,18 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+#include "soft-fp.h"
+int
+mtfsb0(int crbD)
+{
+        if ((crbD != 1) && (crbD != 2))
+                __FPU_FPSCR &= ~(1 << (31 - crbD));
+#ifdef DEBUG
+        printk("%s: %d %08lx\n", __FUNCTION__, crbD, __FPU_FPSCR);
+#endif
+        return 0;
+}
diff --git a/arch/ppc/math-emu/mtfsb1.c b/arch/ppc/math-emu/mtfsb1.c
new file mode 100644
index 000000000000..3d9e7ed92d2b
--- /dev/null
+++ b/arch/ppc/math-emu/mtfsb1.c
@@ -0,0 +1,18 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+#include "soft-fp.h"
+int
+mtfsb1(int crbD)
+{
+        if ((crbD != 1) && (crbD != 2))
+                __FPU_FPSCR |= (1 << (31 - crbD));
+#ifdef DEBUG
+        printk("%s: %d %08lx\n", __FUNCTION__, crbD, __FPU_FPSCR);
+#endif
+        return 0;
+}
diff --git a/arch/ppc/math-emu/mtfsf.c b/arch/ppc/math-emu/mtfsf.c
new file mode 100644
index 000000000000..d70cf714994c
--- /dev/null
+++ b/arch/ppc/math-emu/mtfsf.c
@@ -0,0 +1,45 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+#include "soft-fp.h"
+int
+mtfsf(unsigned int FM, u32 *frB)
+{
+        u32 mask;
+        if (FM == 0)
+                return 0;
+        if (FM == 0xff)
+                mask = 0x9fffffff;
+        else {
+                mask = 0;
+                if (FM & (1 << 0))
+                        mask |= 0x90000000;
+                if (FM & (1 << 1))
+                        mask |= 0x0f000000;
+                if (FM & (1 << 2))
+                        mask |= 0x00f00000;
+                if (FM & (1 << 3))
+                        mask |= 0x000f0000;
+                if (FM & (1 << 4))
+                        mask |= 0x0000f000;
+                if (FM & (1 << 5))
+                        mask |= 0x00000f00;
+                if (FM & (1 << 6))
+                        mask |= 0x000000f0;
+                if (FM & (1 << 7))
+                        mask |= 0x0000000f;
+        }
+        __FPU_FPSCR &= ~(mask);
+        __FPU_FPSCR |= (frB[1] & mask);
+#ifdef DEBUG
+        printk("%s: %02x %p: %08lx\n", __FUNCTION__, FM, frB, __FPU_FPSCR);
+#endif
+        return 0;
+}
diff --git a/arch/ppc/math-emu/mtfsfi.c b/arch/ppc/math-emu/mtfsfi.c
new file mode 100644
index 000000000000..71df854baa7e
--- /dev/null
+++ b/arch/ppc/math-emu/mtfsfi.c
@@ -0,0 +1,23 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+#include "soft-fp.h"
+int
+mtfsfi(unsigned int crfD, unsigned int IMM)
+{
+        u32 mask = 0xf;
+        if (!crfD)
+                mask = 9;
+        __FPU_FPSCR &= ~(mask << ((7 - crfD) << 2));
+        __FPU_FPSCR |= (IMM & 0xf) << ((7 - crfD) << 2);
+#ifdef DEBUG
+        printk("%s: %d %x: %08lx\n", __FUNCTION__, crfD, IMM, __FPU_FPSCR);
+#endif
+        return 0;
+}
diff --git a/arch/ppc/math-emu/op-1.h b/arch/ppc/math-emu/op-1.h
new file mode 100644
index 000000000000..c92fa95f562e
--- /dev/null
+++ b/arch/ppc/math-emu/op-1.h
@@ -0,0 +1,245 @@
+/*
+ * Basic one-word fraction declaration and manipulation.
+ */
+#define _FP_FRAC_DECL_1(X)      _FP_W_TYPE X##_f
+#define _FP_FRAC_COPY_1(D,S)    (D##_f = S##_f)
+#define _FP_FRAC_SET_1(X,I)     (X##_f = I)
+#define _FP_FRAC_HIGH_1(X)      (X##_f)
+#define _FP_FRAC_LOW_1(X)       (X##_f)
+#define _FP_FRAC_WORD_1(X,w)    (X##_f)
+#define _FP_FRAC_ADDI_1(X,I)    (X##_f += I)
+#define _FP_FRAC_SLL_1(X,N)                     \
+  do {                                          \
+    if (__builtin_constant_p(N) && (N) == 1)    \
+      X##_f += X##_f;                           \
+    else                                        \
+      X##_f <<= (N);                            \
+  } while (0)
+#define _FP_FRAC_SRL_1(X,N)     (X##_f >>= N)
+/* Right shift with sticky-lsb.  */
+#define _FP_FRAC_SRS_1(X,N,sz)  __FP_FRAC_SRS_1(X##_f, N, sz)
+#define __FP_FRAC_SRS_1(X,N,sz)                                         \
+   (X = (X >> (N) | (__builtin_constant_p(N) && (N) == 1                \
+                     ? X & 1 : (X << (_FP_W_TYPE_SIZE - (N))) != 0)))
+#define _FP_FRAC_ADD_1(R,X,Y)   (R##_f = X##_f + Y##_f)
+#define _FP_FRAC_SUB_1(R,X,Y)   (R##_f = X##_f - Y##_f)
+#define _FP_FRAC_CLZ_1(z, X)    __FP_CLZ(z, X##_f)
+/* Predicates */
+#define _FP_FRAC_NEGP_1(X)      ((_FP_WS_TYPE)X##_f < 0)
+#define _FP_FRAC_ZEROP_1(X)     (X##_f == 0)
+#define _FP_FRAC_OVERP_1(fs,X)  (X##_f & _FP_OVERFLOW_##fs)
+#define _FP_FRAC_EQ_1(X, Y)     (X##_f == Y##_f)
+#define _FP_FRAC_GE_1(X, Y)     (X##_f >= Y##_f)
+#define _FP_FRAC_GT_1(X, Y)     (X##_f > Y##_f)
+#define _FP_ZEROFRAC_1          0
+#define _FP_MINFRAC_1           1
+/*
+ * Unpack the raw bits of a native fp value.  Do not classify or
+ * normalize the data.
+ */
+#define _FP_UNPACK_RAW_1(fs, X, val)                            \
+  do {                                                          \
+    union _FP_UNION_##fs _flo; _flo.flt = (val);                \
+                                                                \
+    X##_f = _flo.bits.frac;                                     \
+    X##_e = _flo.bits.exp;                                      \
+    X##_s = _flo.bits.sign;                                     \
+  } while (0)
+/*
+ * Repack the raw bits of a native fp value.
+ */
+#define _FP_PACK_RAW_1(fs, val, X)                              \
+  do {                                                          \
+    union _FP_UNION_##fs _flo;                                  \
+                                                                \
+    _flo.bits.frac = X##_f;                                     \
+    _flo.bits.exp  = X##_e;                                     \
+    _flo.bits.sign = X##_s;                                     \
+                                                                \
+    (val) = _flo.flt;                                           \
+  } while (0)
+/*
+ * Multiplication algorithms:
+ */
+/* Basic.  Assuming the host word size is >= 2*FRACBITS, we can do the
+   multiplication immediately.  */
+#define _FP_MUL_MEAT_1_imm(fs, R, X, Y)                                 \
+  do {                                                                  \
+    R##_f = X##_f * Y##_f;                                              \
+    /* Normalize since we know where the msb of the multiplicands       \
+       were (bit B), we know that the msb of the of the product is      \
+       at either 2B or 2B-1.  */                                        \
+    _FP_FRAC_SRS_1(R, _FP_WFRACBITS_##fs-1, 2*_FP_WFRACBITS_##fs);      \
+  } while (0)
+/* Given a 1W * 1W => 2W primitive, do the extended multiplication.  */
+#define _FP_MUL_MEAT_1_wide(fs, R, X, Y, doit)                          \
+  do {                                                                  \
+    _FP_W_TYPE _Z_f0, _Z_f1;                                            \
+    doit(_Z_f1, _Z_f0, X##_f, Y##_f);                                   \
+    /* Normalize since we know where the msb of the multiplicands       \
+       were (bit B), we know that the msb of the of the product is      \
+       at either 2B or 2B-1.  */                                        \
+    _FP_FRAC_SRS_2(_Z, _FP_WFRACBITS_##fs-1, 2*_FP_WFRACBITS_##fs);     \
+    R##_f = _Z_f0;                                                      \
+  } while (0)
+/* Finally, a simple widening multiply algorithm.  What fun!  */
+#define _FP_MUL_MEAT_1_hard(fs, R, X, Y)                                \
+  do {                                                                  \
+    _FP_W_TYPE _xh, _xl, _yh, _yl, _z_f0, _z_f1, _a_f0, _a_f1;          \
+                                                                        \
+    /* split the words in half */                                       \
+    _xh = X##_f >> (_FP_W_TYPE_SIZE/2);                                 \
+    _xl = X##_f & (((_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE/2)) - 1);         \
+    _yh = Y##_f >> (_FP_W_TYPE_SIZE/2);                                 \
+    _yl = Y##_f & (((_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE/2)) - 1);         \
+                                                                        \
+    /* multiply the pieces */                                           \
+    _z_f0 = _xl * _yl;                                                  \
+    _a_f0 = _xh * _yl;                                                  \
+    _a_f1 = _xl * _yh;                                                  \
+    _z_f1 = _xh * _yh;                                                  \
+                                                                        \
+    /* reassemble into two full words */                                \
+    if ((_a_f0 += _a_f1) < _a_f1)                                       \
+      _z_f1 += (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE/2);                    \
+    _a_f1 = _a_f0 >> (_FP_W_TYPE_SIZE/2);                               \
+    _a_f0 = _a_f0 << (_FP_W_TYPE_SIZE/2);                               \
+    _FP_FRAC_ADD_2(_z, _z, _a);                                         \
+                                                                        \
+    /* normalize */                                                     \
+    _FP_FRAC_SRS_2(_z, _FP_WFRACBITS_##fs - 1, 2*_FP_WFRACBITS_##fs);   \
+    R##_f = _z_f0;                                                      \
+  } while (0)
+/*
+ * Division algorithms:
+ */
+/* Basic.  Assuming the host word size is >= 2*FRACBITS, we can do the
+   division immediately.  Give this macro either _FP_DIV_HELP_imm for
+   C primitives or _FP_DIV_HELP_ldiv for the ISO function.  Which you
+   choose will depend on what the compiler does with divrem4.  */
+#define _FP_DIV_MEAT_1_imm(fs, R, X, Y, doit)           \
+  do {                                                  \
+    _FP_W_TYPE _q, _r;                                  \
+    X##_f <<= (X##_f < Y##_f                            \
+               ? R##_e--, _FP_WFRACBITS_##fs            \
+               : _FP_WFRACBITS_##fs - 1);               \
+    doit(_q, _r, X##_f, Y##_f);                         \
+    R##_f = _q | (_r != 0);                             \
+  } while (0)
+/* GCC's longlong.h defines a 2W / 1W => (1W,1W) primitive udiv_qrnnd
+   that may be useful in this situation.  This first is for a primitive
+   that requires normalization, the second for one that does not.  Look
+   for UDIV_NEEDS_NORMALIZATION to tell which your machine needs.  */
+#define _FP_DIV_MEAT_1_udiv_norm(fs, R, X, Y)                           \
+  do {                                                                  \
+    _FP_W_TYPE _nh, _nl, _q, _r;                                        \
+                                                                        \
+    /* Normalize Y -- i.e. make the most significant bit set.  */       \
+    Y##_f <<= _FP_WFRACXBITS_##fs - 1;                                  \
+                                                                        \
+    /* Shift X op correspondingly high, that is, up one full word.  */  \
+    if (X##_f <= Y##_f)                                                 \
+      {                                                                 \
+        _nl = 0;                                                        \
+        _nh = X##_f;                                                    \
+      }                                                                 \
+    else                                                                \
+      {                                                                 \
+        R##_e++;                                                        \
+        _nl = X##_f << (_FP_W_TYPE_SIZE-1);                             \
+        _nh = X##_f >> 1;                                               \
+      }                                                                 \
+                                                                        \
+    udiv_qrnnd(_q, _r, _nh, _nl, Y##_f);                                \
+    R##_f = _q | (_r != 0);                                             \
+  } while (0)
+#define _FP_DIV_MEAT_1_udiv(fs, R, X, Y)                \
+  do {                                                  \
+    _FP_W_TYPE _nh, _nl, _q, _r;                        \
+    if (X##_f < Y##_f)                                  \
+      {                                                 \
+        R##_e--;                                        \
+        _nl = X##_f << _FP_WFRACBITS_##fs;              \
+        _nh = X##_f >> _FP_WFRACXBITS_##fs;             \
+      }                                                 \
+    else                                                \
+      {                                                 \
+        _nl = X##_f << (_FP_WFRACBITS_##fs - 1);        \
+        _nh = X##_f >> (_FP_WFRACXBITS_##fs + 1);       \
+      }                                                 \
+    udiv_qrnnd(_q, _r, _nh, _nl, Y##_f);                \
+    R##_f = _q | (_r != 0);                             \
+  } while (0)
+/*
+ * Square root algorithms:
+ * We have just one right now, maybe Newton approximation
+ * should be added for those machines where division is fast.
+ */
+#define _FP_SQRT_MEAT_1(R, S, T, X, q)                  \
+  do {                                                  \
+    while (q)                                           \
+      {                                                 \
+        T##_f = S##_f + q;                              \
+        if (T##_f <= X##_f)                             \
+          {                                             \
+            S##_f = T##_f + q;                          \
+            X##_f -= T##_f;                             \
+            R##_f += q;                                 \
+          }                                             \
+        _FP_FRAC_SLL_1(X, 1);                           \
+        q >>= 1;                                        \
+      }                                                 \
+  } while (0)
+/*
+ * Assembly/disassembly for converting to/from integral types.
+ * No shifting or overflow handled here.
+ */
+#define _FP_FRAC_ASSEMBLE_1(r, X, rsize)        (r = X##_f)
+#define _FP_FRAC_DISASSEMBLE_1(X, r, rsize)     (X##_f = r)
+/*
+ * Convert FP values between word sizes
+ */
+#define _FP_FRAC_CONV_1_1(dfs, sfs, D, S)                               \
+  do {                                                                  \
+    D##_f = S##_f;                                                      \
+    if (_FP_WFRACBITS_##sfs > _FP_WFRACBITS_##dfs)                      \
+      _FP_FRAC_SRS_1(D, (_FP_WFRACBITS_##sfs-_FP_WFRACBITS_##dfs),      \
+                     _FP_WFRACBITS_##sfs);                              \
+    else                                                                \
+      D##_f <<= _FP_WFRACBITS_##dfs - _FP_WFRACBITS_##sfs;              \
+  } while (0)
diff --git a/arch/ppc/math-emu/op-2.h b/arch/ppc/math-emu/op-2.h
new file mode 100644
index 000000000000..b9b06b4c6ea1
--- /dev/null
+++ b/arch/ppc/math-emu/op-2.h
@@ -0,0 +1,433 @@
+/*
+ * Basic two-word fraction declaration and manipulation.
+ */
+#define _FP_FRAC_DECL_2(X)      _FP_W_TYPE X##_f0, X##_f1
+#define _FP_FRAC_COPY_2(D,S)    (D##_f0 = S##_f0, D##_f1 = S##_f1)
+#define _FP_FRAC_SET_2(X,I)     __FP_FRAC_SET_2(X, I)
+#define _FP_FRAC_HIGH_2(X)      (X##_f1)
+#define _FP_FRAC_LOW_2(X)       (X##_f0)
+#define _FP_FRAC_WORD_2(X,w)    (X##_f##w)
+#define _FP_FRAC_SLL_2(X,N)                                             \
+  do {                                                                  \
+    if ((N) < _FP_W_TYPE_SIZE)                                          \
+      {                                                                 \
+        if (__builtin_constant_p(N) && (N) == 1)                        \
+          {                                                             \
+            X##_f1 = X##_f1 + X##_f1 + (((_FP_WS_TYPE)(X##_f0)) < 0);   \
+            X##_f0 += X##_f0;                                           \
+          }                                                             \
+        else                                                            \
+          {                                                             \
+            X##_f1 = X##_f1 << (N) | X##_f0 >> (_FP_W_TYPE_SIZE - (N)); \
+            X##_f0 <<= (N);                                             \
+          }                                                             \
+      }                                                                 \
+    else                                                                \
+      {                                                                 \
+        X##_f1 = X##_f0 << ((N) - _FP_W_TYPE_SIZE);                     \
+        X##_f0 = 0;                                                     \
+      }                                                                 \
+  } while (0)
+#define _FP_FRAC_SRL_2(X,N)                                             \
+  do {                                                                  \
+    if ((N) < _FP_W_TYPE_SIZE)                                          \
+      {                                                                 \
+        X##_f0 = X##_f0 >> (N) | X##_f1 << (_FP_W_TYPE_SIZE - (N));     \
+        X##_f1 >>= (N);                                                 \
+      }                                                                 \
+    else                                                                \
+      {                                                                 \
+        X##_f0 = X##_f1 >> ((N) - _FP_W_TYPE_SIZE);                     \
+        X##_f1 = 0;                                                     \
+      }                                                                 \
+  } while (0)
+/* Right shift with sticky-lsb.  */
+#define _FP_FRAC_SRS_2(X,N,sz)                                          \
+  do {                                                                  \
+    if ((N) < _FP_W_TYPE_SIZE)                                          \
+      {                                                                 \
+        X##_f0 = (X##_f1 << (_FP_W_TYPE_SIZE - (N)) | X##_f0 >> (N) |   \
+                  (__builtin_constant_p(N) && (N) == 1                  \
+                   ? X##_f0 & 1                                         \
+                   : (X##_f0 << (_FP_W_TYPE_SIZE - (N))) != 0));        \
+        X##_f1 >>= (N);                                                 \
+      }                                                                 \
+    else                                                                \
+      {                                                                 \
+        X##_f0 = (X##_f1 >> ((N) - _FP_W_TYPE_SIZE) |                   \
+                  (((X##_f1 << (sz - (N))) | X##_f0) != 0));            \
+        X##_f1 = 0;                                                     \
+      }                                                                 \
+  } while (0)
+#define _FP_FRAC_ADDI_2(X,I) \
+  __FP_FRAC_ADDI_2(X##_f1, X##_f0, I)
+#define _FP_FRAC_ADD_2(R,X,Y) \
+  __FP_FRAC_ADD_2(R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
+#define _FP_FRAC_SUB_2(R,X,Y) \
+  __FP_FRAC_SUB_2(R##_f1, R##_f0, X##_f1, X##_f0, Y##_f1, Y##_f0)
+#define _FP_FRAC_CLZ_2(R,X)     \
+  do {                          \
+    if (X##_f1)                 \
+      __FP_CLZ(R,X##_f1);       \
+    else                        \
+    {                           \
+      __FP_CLZ(R,X##_f0);       \
+      R += _FP_W_TYPE_SIZE;     \
+    }                           \
+  } while(0)
+/* Predicates */
+#define _FP_FRAC_NEGP_2(X)      ((_FP_WS_TYPE)X##_f1 < 0)
+#define _FP_FRAC_ZEROP_2(X)     ((X##_f1 | X##_f0) == 0)
+#define _FP_FRAC_OVERP_2(fs,X)  (X##_f1 & _FP_OVERFLOW_##fs)
+#define _FP_FRAC_EQ_2(X, Y)     (X##_f1 == Y##_f1 && X##_f0 == Y##_f0)
+#define _FP_FRAC_GT_2(X, Y)     \
+  ((X##_f1 > Y##_f1) || (X##_f1 == Y##_f1 && X##_f0 > Y##_f0))
+#define _FP_FRAC_GE_2(X, Y)     \
+  ((X##_f1 > Y##_f1) || (X##_f1 == Y##_f1 && X##_f0 >= Y##_f0))
+#define _FP_ZEROFRAC_2          0, 0
+#define _FP_MINFRAC_2           0, 1
+/*
+ * Internals
+ */
+#define __FP_FRAC_SET_2(X,I1,I0)        (X##_f0 = I0, X##_f1 = I1)
+#define __FP_CLZ_2(R, xh, xl)   \
+  do {                          \
+    if (xh)                     \
+      __FP_CLZ(R,xl);           \
+    else                        \
+    {                           \
+      __FP_CLZ(R,xl);           \
+      R += _FP_W_TYPE_SIZE;     \
+    }                           \
+  } while(0)
+#if 0
+#ifndef __FP_FRAC_ADDI_2
+#define __FP_FRAC_ADDI_2(xh, xl, i) \
+  (xh += ((xl += i) < i))
+#endif
+#ifndef __FP_FRAC_ADD_2
+#define __FP_FRAC_ADD_2(rh, rl, xh, xl, yh, yl) \
+  (rh = xh + yh + ((rl = xl + yl) < xl))
+#endif
+#ifndef __FP_FRAC_SUB_2
+#define __FP_FRAC_SUB_2(rh, rl, xh, xl, yh, yl) \
+  (rh = xh - yh - ((rl = xl - yl) > xl))
+#endif
+#else
+#undef __FP_FRAC_ADDI_2
+#define __FP_FRAC_ADDI_2(xh, xl, i)     add_ssaaaa(xh, xl, xh, xl, 0, i)
+#undef __FP_FRAC_ADD_2
+#define __FP_FRAC_ADD_2                 add_ssaaaa
+#undef __FP_FRAC_SUB_2
+#define __FP_FRAC_SUB_2                 sub_ddmmss
+#endif
+/*
+ * Unpack the raw bits of a native fp value.  Do not classify or
+ * normalize the data.
+ */
+#define _FP_UNPACK_RAW_2(fs, X, val)                    \
+  do {                                                  \
+    union _FP_UNION_##fs _flo; _flo.flt = (val);        \
+                                                        \
+    X##_f0 = _flo.bits.frac0;                           \
+    X##_f1 = _flo.bits.frac1;                           \
+    X##_e  = _flo.bits.exp;                             \
+    X##_s  = _flo.bits.sign;                            \
+  } while (0)
+/*
+ * Repack the raw bits of a native fp value.
+ */
+#define _FP_PACK_RAW_2(fs, val, X)                      \
+  do {                                                  \
+    union _FP_UNION_##fs _flo;                          \
+                                                        \
+    _flo.bits.frac0 = X##_f0;                           \
+    _flo.bits.frac1 = X##_f1;                           \
+    _flo.bits.exp   = X##_e;                            \
+    _flo.bits.sign  = X##_s;                            \
+                                                        \
+    (val) = _flo.flt;                                   \
+  } while (0)
+/*
+ * Multiplication algorithms:
+ */
+/* Given a 1W * 1W => 2W primitive, do the extended multiplication.  */
+#define _FP_MUL_MEAT_2_wide(fs, R, X, Y, doit)                          \
+  do {                                                                  \
+    _FP_FRAC_DECL_4(_z); _FP_FRAC_DECL_2(_b); _FP_FRAC_DECL_2(_c);      \
+                                                                        \
+    doit(_FP_FRAC_WORD_4(_z,1), _FP_FRAC_WORD_4(_z,0), X##_f0, Y##_f0); \
+    doit(_b_f1, _b_f0, X##_f0, Y##_f1);                                 \
+    doit(_c_f1, _c_f0, X##_f1, Y##_f0);                                 \
+    doit(_FP_FRAC_WORD_4(_z,3), _FP_FRAC_WORD_4(_z,2), X##_f1, Y##_f1); \
+                                                                        \
+    __FP_FRAC_ADD_4(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),        \
+                    _FP_FRAC_WORD_4(_z,1),_FP_FRAC_WORD_4(_z,0),        \
+                    0, _b_f1, _b_f0, 0,                                 \
+                    _FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),        \
+                    _FP_FRAC_WORD_4(_z,1),_FP_FRAC_WORD_4(_z,0));       \
+    __FP_FRAC_ADD_4(_FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),        \
+                    _FP_FRAC_WORD_4(_z,1),_FP_FRAC_WORD_4(_z,0),        \
+                    0, _c_f1, _c_f0, 0,                                 \
+                    _FP_FRAC_WORD_4(_z,3),_FP_FRAC_WORD_4(_z,2),        \
+                    _FP_FRAC_WORD_4(_z,1),_FP_FRAC_WORD_4(_z,0));       \
+                                                                        \
+    /* Normalize since we know where the msb of the multiplicands       \
+       were (bit B), we know that the msb of the of the product is      \
+       at either 2B or 2B-1.  */                                        \
+    _FP_FRAC_SRS_4(_z, _FP_WFRACBITS_##fs-1, 2*_FP_WFRACBITS_##fs);     \
+    R##_f0 = _FP_FRAC_WORD_4(_z,0);                                     \
+    R##_f1 = _FP_FRAC_WORD_4(_z,1);                                     \
+  } while (0)
+/* This next macro appears to be totally broken. Fortunately nowhere
+ * seems to use it :-> The problem is that we define _z[4] but
+ * then use it in _FP_FRAC_SRS_4, which will attempt to access
+ * _z_f[n] which will cause an error. The fix probably involves
+ * declaring it with _FP_FRAC_DECL_4, see previous macro. -- PMM 02/1998
+ */
+#define _FP_MUL_MEAT_2_gmp(fs, R, X, Y)                                 \
+  do {                                                                  \
+    _FP_W_TYPE _x[2], _y[2], _z[4];                                     \
+    _x[0] = X##_f0; _x[1] = X##_f1;                                     \
+    _y[0] = Y##_f0; _y[1] = Y##_f1;                                     \
+                                                                        \
+    mpn_mul_n(_z, _x, _y, 2);                                           \
+                                                                        \
+    /* Normalize since we know where the msb of the multiplicands       \
+       were (bit B), we know that the msb of the of the product is      \
+       at either 2B or 2B-1.  */                                        \
+    _FP_FRAC_SRS_4(_z, _FP_WFRACBITS##_fs-1, 2*_FP_WFRACBITS_##fs);     \
+    R##_f0 = _z[0];                                                     \
+    R##_f1 = _z[1];                                                     \
+  } while (0)
+/*
+ * Division algorithms:
+ * This seems to be giving me difficulties -- PMM
+ * Look, NetBSD seems to be able to comment algorithms. Can't you?
+ * I've thrown printks at the problem.
+ * This now appears to work, but I still don't really know why.
+ * Also, I don't think the result is properly normalised...
+ */
+#define _FP_DIV_MEAT_2_udiv_64(fs, R, X, Y)                             \
+  do {                                                                  \
+    extern void _fp_udivmodti4(_FP_W_TYPE q[2], _FP_W_TYPE r[2],        \
+                               _FP_W_TYPE n1, _FP_W_TYPE n0,            \
+                               _FP_W_TYPE d1, _FP_W_TYPE d0);           \
+    _FP_W_TYPE _n_f3, _n_f2, _n_f1, _n_f0, _r_f1, _r_f0;                \
+    _FP_W_TYPE _q_f1, _q_f0, _m_f1, _m_f0;                              \
+    _FP_W_TYPE _rmem[2], _qmem[2];                                      \
+    /* I think this check is to ensure that the result is normalised.   \
+     * Assuming X,Y normalised (ie in [1.0,2.0)) X/Y will be in         \
+     * [0.5,2.0). Furthermore, it will be less than 1.0 iff X < Y.      \
+     * In this case we tweak things. (this is based on comments in      \
+     * the NetBSD FPU emulation code. )                                 \
+     * We know X,Y are normalised because we ensure this as part of     \
+     * the unpacking process. -- PMM                                    \
+     */                                                                 \
+    if (_FP_FRAC_GT_2(X, Y))                                            \
+      {                                                                 \
+/*      R##_e++; */                                                     \
+        _n_f3 = X##_f1 >> 1;                                            \
+        _n_f2 = X##_f1 << (_FP_W_TYPE_SIZE - 1) | X##_f0 >> 1;          \
+        _n_f1 = X##_f0 << (_FP_W_TYPE_SIZE - 1);                        \
+        _n_f0 = 0;                                                      \
+      }                                                                 \
+    else                                                                \
+      {                                                                 \
+        R##_e--;                                                        \
+        _n_f3 = X##_f1;                                                 \
+        _n_f2 = X##_f0;                                                 \
+        _n_f1 = _n_f0 = 0;                                              \
+      }                                                                 \
+                                                                        \
+    /* Normalize, i.e. make the most significant bit of the             \
+       denominator set.  CHANGED: - 1 to nothing -- PMM */              \
+    _FP_FRAC_SLL_2(Y, _FP_WFRACXBITS_##fs /* -1 */);                    \
+                                                                        \
+    /* Do the 256/128 bit division given the 128-bit _fp_udivmodtf4     \
+       primitive snagged from libgcc2.c.  */                            \
+                                                                        \
+    _fp_udivmodti4(_qmem, _rmem, _n_f3, _n_f2, 0, Y##_f1);              \
+    _q_f1 = _qmem[0];                                                   \
+    umul_ppmm(_m_f1, _m_f0, _q_f1, Y##_f0);                             \
+    _r_f1 = _rmem[0];                                                   \
+    _r_f0 = _n_f1;                                                      \
+    if (_FP_FRAC_GT_2(_m, _r))                                          \
+      {                                                                 \
+        _q_f1--;                                                        \
+        _FP_FRAC_ADD_2(_r, _r, Y);                                      \
+        if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_GT_2(_m, _r))              \
+          {                                                             \
+            _q_f1--;                                                    \
+            _FP_FRAC_ADD_2(_r, _r, Y);                                  \
+          }                                                             \
+      }                                                                 \
+    _FP_FRAC_SUB_2(_r, _r, _m);                                         \
+                                                                        \
+    _fp_udivmodti4(_qmem, _rmem, _r_f1, _r_f0, 0, Y##_f1);              \
+    _q_f0 = _qmem[0];                                                   \
+    umul_ppmm(_m_f1, _m_f0, _q_f0, Y##_f0);                             \
+    _r_f1 = _rmem[0];                                                   \
+    _r_f0 = _n_f0;                                                      \
+    if (_FP_FRAC_GT_2(_m, _r))                                          \
+      {                                                                 \
+        _q_f0--;                                                        \
+        _FP_FRAC_ADD_2(_r, _r, Y);                                      \
+        if (_FP_FRAC_GE_2(_r, Y) && _FP_FRAC_GT_2(_m, _r))              \
+          {                                                             \
+            _q_f0--;                                                    \
+            _FP_FRAC_ADD_2(_r, _r, Y);                                  \
+          }                                                             \
+      }                                                                 \
+    _FP_FRAC_SUB_2(_r, _r, _m);                                         \
+                                                                        \
+    R##_f1 = _q_f1;                                                     \
+    R##_f0 = _q_f0 | ((_r_f1 | _r_f0) != 0);                            \
+    /* adjust so answer is normalized again. I'm not sure what the      \
+     * final sz param should be. In practice it's never used since      \
+     * N is 1 which is always going to be < _FP_W_TYPE_SIZE...          \
+     */                                                                 \
+    /* _FP_FRAC_SRS_2(R,1,_FP_WFRACBITS_##fs);  */                      \
+  } while (0)
+#define _FP_DIV_MEAT_2_gmp(fs, R, X, Y)                                 \
+  do {                                                                  \
+    _FP_W_TYPE _x[4], _y[2], _z[4];                                     \
+    _y[0] = Y##_f0; _y[1] = Y##_f1;                                     \
+    _x[0] = _x[3] = 0;                                                  \
+    if (_FP_FRAC_GT_2(X, Y))                                            \
+      {                                                                 \
+        R##_e++;                                                        \
+        _x[1] = (X##_f0 << (_FP_WFRACBITS-1 - _FP_W_TYPE_SIZE) |        \
+                 X##_f1 >> (_FP_W_TYPE_SIZE -                           \
+                            (_FP_WFRACBITS-1 - _FP_W_TYPE_SIZE)));      \
+        _x[2] = X##_f1 << (_FP_WFRACBITS-1 - _FP_W_TYPE_SIZE);          \
+      }                                                                 \
+    else                                                                \
+      {                                                                 \
+        _x[1] = (X##_f0 << (_FP_WFRACBITS - _FP_W_TYPE_SIZE) |          \
+                 X##_f1 >> (_FP_W_TYPE_SIZE -                           \
+                            (_FP_WFRACBITS - _FP_W_TYPE_SIZE)));        \
+        _x[2] = X##_f1 << (_FP_WFRACBITS - _FP_W_TYPE_SIZE);            \
+      }                                                                 \
+                                                                        \
+    (void) mpn_divrem (_z, 0, _x, 4, _y, 2);                            \
+    R##_f1 = _z[1];                                                     \
+    R##_f0 = _z[0] | ((_x[0] | _x[1]) != 0);                            \
+  } while (0)
+/*
+ * Square root algorithms:
+ * We have just one right now, maybe Newton approximation
+ * should be added for those machines where division is fast.
+ */
+#define _FP_SQRT_MEAT_2(R, S, T, X, q)                  \
+  do {                                                  \
+    while (q)                                           \
+      {                                                 \
+        T##_f1 = S##_f1 + q;                            \
+        if (T##_f1 <= X##_f1)                           \
+          {                                             \
+            S##_f1 = T##_f1 + q;                        \
+            X##_f1 -= T##_f1;                           \
+            R##_f1 += q;                                \
+          }                                             \
+        _FP_FRAC_SLL_2(X, 1);                           \
+        q >>= 1;                                        \
+      }                                                 \
+    q = (_FP_W_TYPE)1 << (_FP_W_TYPE_SIZE - 1);         \
+    while (q)                                           \
+      {                                                 \
+        T##_f0 = S##_f0 + q;                            \
+        T##_f1 = S##_f1;                                \
+        if (T##_f1 < X##_f1 ||                          \
+            (T##_f1 == X##_f1 && T##_f0 < X##_f0))      \
+          {                                             \
+            S##_f0 = T##_f0 + q;                        \
+            if (((_FP_WS_TYPE)T##_f0) < 0 &&            \
+                ((_FP_WS_TYPE)S##_f0) >= 0)             \
+              S##_f1++;                                 \
+            _FP_FRAC_SUB_2(X, X, T);                    \
+            R##_f0 += q;                                \
+          }                                             \
+        _FP_FRAC_SLL_2(X, 1);                           \
+        q >>= 1;                                        \
+      }                                                 \
+  } while (0)
+/*
+ * Assembly/disassembly for converting to/from integral types.
+ * No shifting or overflow handled here.
+ */
+#define _FP_FRAC_ASSEMBLE_2(r, X, rsize)        \
+  do {                                          \
+    if (rsize <= _FP_W_TYPE_SIZE)               \
+      r = X##_f0;                               \
+    else                                        \
+      {                                         \
+        r = X##_f1;                             \
+        r <<= _FP_W_TYPE_SIZE;                  \
+        r += X##_f0;                            \
+      }                                         \
+  } while (0)
+#define _FP_FRAC_DISASSEMBLE_2(X, r, rsize)                             \
+  do {                                                                  \
+    X##_f0 = r;                                                         \
+    X##_f1 = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE);     \
+  } while (0)
+/*
+ * Convert FP values between word sizes
+ */
+#define _FP_FRAC_CONV_1_2(dfs, sfs, D, S)                               \
+  do {                                                                  \
+    _FP_FRAC_SRS_2(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs),      \
+                   _FP_WFRACBITS_##sfs);                                \
+    D##_f = S##_f0;                                                     \
+  } while (0)
+#define _FP_FRAC_CONV_2_1(dfs, sfs, D, S)                               \
+  do {                                                                  \
+    D##_f0 = S##_f;                                                     \
+    D##_f1 = 0;                                                         \
+    _FP_FRAC_SLL_2(D, (_FP_WFRACBITS_##dfs - _FP_WFRACBITS_##sfs));     \
+  } while (0)
diff --git a/arch/ppc/math-emu/op-4.h b/arch/ppc/math-emu/op-4.h
new file mode 100644
index 000000000000..fcdd6d064c54
--- /dev/null
+++ b/arch/ppc/math-emu/op-4.h
@@ -0,0 +1,297 @@
+/*
+ * Basic four-word fraction declaration and manipulation.
+ *
+ * When adding quadword support for 32 bit machines, we need
+ * to be a little careful as double multiply uses some of these
+ * macros: (in op-2.h)
+ * _FP_MUL_MEAT_2_wide() uses _FP_FRAC_DECL_4, _FP_FRAC_WORD_4,
+ * _FP_FRAC_ADD_4, _FP_FRAC_SRS_4
+ * _FP_MUL_MEAT_2_gmp() uses _FP_FRAC_SRS_4 (and should use
+ * _FP_FRAC_DECL_4: it appears to be broken and is not used
+ * anywhere anyway. )
+ *
+ * I've now fixed all the macros that were here from the sparc64 code.
+ * [*none* of the shift macros were correct!] -- PMM 02/1998
+ *
+ * The only quadword stuff that remains to be coded is:
+ * 1) the conversion to/from ints, which requires
+ * that we check (in op-common.h) that the following do the right thing
+ * for quadwords: _FP_TO_INT(Q,4,r,X,rsz,rsg), _FP_FROM_INT(Q,4,X,r,rs,rt)
+ * 2) multiply, divide and sqrt, which require:
+ * _FP_MUL_MEAT_4_*(R,X,Y), _FP_DIV_MEAT_4_*(R,X,Y), _FP_SQRT_MEAT_4(R,S,T,X,q),
+ * This also needs _FP_MUL_MEAT_Q and _FP_DIV_MEAT_Q to be defined to
+ * some suitable _FP_MUL_MEAT_4_* macros in sfp-machine.h.
+ * [we're free to choose whatever FP_MUL_MEAT_4_* macros we need for
+ * these; they are used nowhere else. ]
+ */
+#define _FP_FRAC_DECL_4(X)      _FP_W_TYPE X##_f[4]
+#define _FP_FRAC_COPY_4(D,S)                    \
+  (D##_f[0] = S##_f[0], D##_f[1] = S##_f[1],    \
+   D##_f[2] = S##_f[2], D##_f[3] = S##_f[3])
+/* The _FP_FRAC_SET_n(X,I) macro is intended for use with another
+ * macro such as _FP_ZEROFRAC_n which returns n comma separated values.
+ * The result is that we get an expansion of __FP_FRAC_SET_n(X,I0,I1,I2,I3)
+ * which just assigns the In values to the array X##_f[].
+ * This is why the number of parameters doesn't appear to match
+ * at first glance...      -- PMM
+ */
+#define _FP_FRAC_SET_4(X,I)     __FP_FRAC_SET_4(X, I)
+#define _FP_FRAC_HIGH_4(X)      (X##_f[3])
+#define _FP_FRAC_LOW_4(X)       (X##_f[0])
+#define _FP_FRAC_WORD_4(X,w)    (X##_f[w])
+#define _FP_FRAC_SLL_4(X,N)                                             \
+  do {                                                                  \
+    _FP_I_TYPE _up, _down, _skip, _i;                                   \
+    _skip = (N) / _FP_W_TYPE_SIZE;                                      \
+    _up = (N) % _FP_W_TYPE_SIZE;                                        \
+    _down = _FP_W_TYPE_SIZE - _up;                                      \
+    for (_i = 3; _i > _skip; --_i)                                      \
+      X##_f[_i] = X##_f[_i-_skip] << _up | X##_f[_i-_skip-1] >> _down;  \
+/* bugfixed: was X##_f[_i] <<= _up;  -- PMM 02/1998 */                  \
+    X##_f[_i] = X##_f[0] << _up;                                        \
+    for (--_i; _i >= 0; --_i)                                           \
+      X##_f[_i] = 0;                                                    \
+  } while (0)
+/* This one was broken too */
+#define _FP_FRAC_SRL_4(X,N)                                             \
+  do {                                                                  \
+    _FP_I_TYPE _up, _down, _skip, _i;                                   \
+    _skip = (N) / _FP_W_TYPE_SIZE;                                      \
+    _down = (N) % _FP_W_TYPE_SIZE;                                      \
+    _up = _FP_W_TYPE_SIZE - _down;                                      \
+    for (_i = 0; _i < 3-_skip; ++_i)                                    \
+      X##_f[_i] = X##_f[_i+_skip] >> _down | X##_f[_i+_skip+1] << _up;  \
+    X##_f[_i] = X##_f[3] >> _down;                                      \
+    for (++_i; _i < 4; ++_i)                                            \
+      X##_f[_i] = 0;                                                    \
+  } while (0)
+/* Right shift with sticky-lsb.
+ * What this actually means is that we do a standard right-shift,
+ * but that if any of the bits that fall off the right hand side
+ * were one then we always set the LSbit.
+ */
+#define _FP_FRAC_SRS_4(X,N,size)                                        \
+  do {                                                                  \
+    _FP_I_TYPE _up, _down, _skip, _i;                                   \
+    _FP_W_TYPE _s;                                                      \
+    _skip = (N) / _FP_W_TYPE_SIZE;                                      \
+    _down = (N) % _FP_W_TYPE_SIZE;                                      \
+    _up = _FP_W_TYPE_SIZE - _down;                                      \
+    for (_s = _i = 0; _i < _skip; ++_i)                                 \
+      _s |= X##_f[_i];                                                  \
+    _s |= X##_f[_i] << _up;                                             \
+/* s is now != 0 if we want to set the LSbit */                         \
+    for (_i = 0; _i < 3-_skip; ++_i)                                    \
+      X##_f[_i] = X##_f[_i+_skip] >> _down | X##_f[_i+_skip+1] << _up;  \
+    X##_f[_i] = X##_f[3] >> _down;                                      \
+    for (++_i; _i < 4; ++_i)                                            \
+      X##_f[_i] = 0;                                                    \
+    /* don't fix the LSB until the very end when we're sure f[0] is stable */ \
+    X##_f[0] |= (_s != 0);                                              \
+  } while (0)
+#define _FP_FRAC_ADD_4(R,X,Y)                                           \
+  __FP_FRAC_ADD_4(R##_f[3], R##_f[2], R##_f[1], R##_f[0],               \
+                  X##_f[3], X##_f[2], X##_f[1], X##_f[0],               \
+                  Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
+#define _FP_FRAC_SUB_4(R,X,Y)                                           \
+  __FP_FRAC_SUB_4(R##_f[3], R##_f[2], R##_f[1], R##_f[0],               \
+                  X##_f[3], X##_f[2], X##_f[1], X##_f[0],               \
+                  Y##_f[3], Y##_f[2], Y##_f[1], Y##_f[0])
+#define _FP_FRAC_ADDI_4(X,I)                                            \
+  __FP_FRAC_ADDI_4(X##_f[3], X##_f[2], X##_f[1], X##_f[0], I)
+#define _FP_ZEROFRAC_4  0,0,0,0
+#define _FP_MINFRAC_4   0,0,0,1
+#define _FP_FRAC_ZEROP_4(X)     ((X##_f[0] | X##_f[1] | X##_f[2] | X##_f[3]) == 0)
+#define _FP_FRAC_NEGP_4(X)      ((_FP_WS_TYPE)X##_f[3] < 0)
+#define _FP_FRAC_OVERP_4(fs,X)  (X##_f[0] & _FP_OVERFLOW_##fs)
+#define _FP_FRAC_EQ_4(X,Y)                              \
+ (X##_f[0] == Y##_f[0] && X##_f[1] == Y##_f[1]          \
+  && X##_f[2] == Y##_f[2] && X##_f[3] == Y##_f[3])
+#define _FP_FRAC_GT_4(X,Y)                              \
+ (X##_f[3] > Y##_f[3] ||                                \
+  (X##_f[3] == Y##_f[3] && (X##_f[2] > Y##_f[2] ||      \
+   (X##_f[2] == Y##_f[2] && (X##_f[1] > Y##_f[1] ||     \
+    (X##_f[1] == Y##_f[1] && X##_f[0] > Y##_f[0])       \
+   ))                                                   \
+  ))                                                    \
+ )
+#define _FP_FRAC_GE_4(X,Y)                              \
+ (X##_f[3] > Y##_f[3] ||                                \
+  (X##_f[3] == Y##_f[3] && (X##_f[2] > Y##_f[2] ||      \
+   (X##_f[2] == Y##_f[2] && (X##_f[1] > Y##_f[1] ||     \
+    (X##_f[1] == Y##_f[1] && X##_f[0] >= Y##_f[0])      \
+   ))                                                   \
+  ))                                                    \
+ )
+#define _FP_FRAC_CLZ_4(R,X)             \
+  do {                                  \
+    if (X##_f[3])                       \
+    {                                   \
+        __FP_CLZ(R,X##_f[3]);           \
+    }                                   \
+    else if (X##_f[2])                  \
+    {                                   \
+        __FP_CLZ(R,X##_f[2]);           \
+        R += _FP_W_TYPE_SIZE;           \
+    }                                   \
+    else if (X##_f[1])                  \
+    {                                   \
+        __FP_CLZ(R,X##_f[2]);           \
+        R += _FP_W_TYPE_SIZE*2;         \
+    }                                   \
+    else                                \
+    {                                   \
+        __FP_CLZ(R,X##_f[0]);           \
+        R += _FP_W_TYPE_SIZE*3;         \
+    }                                   \
+  } while(0)
+#define _FP_UNPACK_RAW_4(fs, X, val)                            \
+  do {                                                          \
+    union _FP_UNION_##fs _flo; _flo.flt = (val);                \
+    X##_f[0] = _flo.bits.frac0;                                 \
+    X##_f[1] = _flo.bits.frac1;                                 \
+    X##_f[2] = _flo.bits.frac2;                                 \
+    X##_f[3] = _flo.bits.frac3;                                 \
+    X##_e  = _flo.bits.exp;                                     \
+    X##_s  = _flo.bits.sign;                                    \
+  } while (0)
+#define _FP_PACK_RAW_4(fs, val, X)                              \
+  do {                                                          \
+    union _FP_UNION_##fs _flo;                                  \
+    _flo.bits.frac0 = X##_f[0];                                 \
+    _flo.bits.frac1 = X##_f[1];                                 \
+    _flo.bits.frac2 = X##_f[2];                                 \
+    _flo.bits.frac3 = X##_f[3];                                 \
+    _flo.bits.exp   = X##_e;                                    \
+    _flo.bits.sign  = X##_s;                                    \
+    (val) = _flo.flt;                                           \
+  } while (0)
+/*
+ * Internals
+ */
+#define __FP_FRAC_SET_4(X,I3,I2,I1,I0)                                  \
+  (X##_f[3] = I3, X##_f[2] = I2, X##_f[1] = I1, X##_f[0] = I0)
+#ifndef __FP_FRAC_ADD_4
+#define __FP_FRAC_ADD_4(r3,r2,r1,r0,x3,x2,x1,x0,y3,y2,y1,y0)            \
+  (r0 = x0 + y0,                                                        \
+   r1 = x1 + y1 + (r0 < x0),                                            \
+   r2 = x2 + y2 + (r1 < x1),                                            \
+   r3 = x3 + y3 + (r2 < x2))
+#endif
+#ifndef __FP_FRAC_SUB_4
+#define __FP_FRAC_SUB_4(r3,r2,r1,r0,x3,x2,x1,x0,y3,y2,y1,y0)            \
+  (r0 = x0 - y0,                                                        \
+   r1 = x1 - y1 - (r0 > x0),                                            \
+   r2 = x2 - y2 - (r1 > x1),                                            \
+   r3 = x3 - y3 - (r2 > x2))
+#endif
+#ifndef __FP_FRAC_ADDI_4
+/* I always wanted to be a lisp programmer :-> */
+#define __FP_FRAC_ADDI_4(x3,x2,x1,x0,i)                                 \
+  (x3 += ((x2 += ((x1 += ((x0 += i) < x0)) < x1) < x2)))
+#endif
+/* Convert FP values between word sizes. This appears to be more
+ * complicated than I'd have expected it to be, so these might be
+ * wrong... These macros are in any case somewhat bogus because they
+ * use information about what various FRAC_n variables look like
+ * internally [eg, that 2 word vars are X_f0 and x_f1]. But so do
+ * the ones in op-2.h and op-1.h.
+ */
+#define _FP_FRAC_CONV_1_4(dfs, sfs, D, S)                               \
+   do {                                                                 \
+     _FP_FRAC_SRS_4(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs),     \
+                        _FP_WFRACBITS_##sfs);                           \
+     D##_f = S##_f[0];                                                   \
+  } while (0)
+#define _FP_FRAC_CONV_2_4(dfs, sfs, D, S)                               \
+   do {                                                                 \
+     _FP_FRAC_SRS_4(S, (_FP_WFRACBITS_##sfs - _FP_WFRACBITS_##dfs),     \
+                        _FP_WFRACBITS_##sfs);                           \
+     D##_f0 = S##_f[0];                                                  \
+     D##_f1 = S##_f[1];                                                  \
+  } while (0)
+/* Assembly/disassembly for converting to/from integral types.
+ * No shifting or overflow handled here.
+ */
+/* Put the FP value X into r, which is an integer of size rsize. */
+#define _FP_FRAC_ASSEMBLE_4(r, X, rsize)                                \
+  do {                                                                  \
+    if (rsize <= _FP_W_TYPE_SIZE)                                       \
+      r = X##_f[0];                                                     \
+    else if (rsize <= 2*_FP_W_TYPE_SIZE)                                \
+    {                                                                   \
+      r = X##_f[1];                                                     \
+      r <<= _FP_W_TYPE_SIZE;                                            \
+      r += X##_f[0];                                                    \
+    }                                                                   \
+    else                                                                \
+    {                                                                   \
+      /* I'm feeling lazy so we deal with int == 3words (implausible)*/ \
+      /* and int == 4words as a single case.                         */ \
+      r = X##_f[3];                                                     \
+      r <<= _FP_W_TYPE_SIZE;                                            \
+      r += X##_f[2];                                                    \
+      r <<= _FP_W_TYPE_SIZE;                                            \
+      r += X##_f[1];                                                    \
+      r <<= _FP_W_TYPE_SIZE;                                            \
+      r += X##_f[0];                                                    \
+    }                                                                   \
+  } while (0)
+/* "No disassemble Number Five!" */
+/* move an integer of size rsize into X's fractional part. We rely on
+ * the _f[] array consisting of words of size _FP_W_TYPE_SIZE to avoid
+ * having to mask the values we store into it.
+ */
+#define _FP_FRAC_DISASSEMBLE_4(X, r, rsize)                             \
+  do {                                                                  \
+    X##_f[0] = r;                                                       \
+    X##_f[1] = (rsize <= _FP_W_TYPE_SIZE ? 0 : r >> _FP_W_TYPE_SIZE);   \
+    X##_f[2] = (rsize <= 2*_FP_W_TYPE_SIZE ? 0 : r >> 2*_FP_W_TYPE_SIZE); \
+    X##_f[3] = (rsize <= 3*_FP_W_TYPE_SIZE ? 0 : r >> 3*_FP_W_TYPE_SIZE); \
+  } while (0)
+#define _FP_FRAC_CONV_4_1(dfs, sfs, D, S)                               \
+   do {                                                                 \
+     D##_f[0] = S##_f;                                                  \
+     D##_f[1] = D##_f[2] = D##_f[3] = 0;                                \
+     _FP_FRAC_SLL_4(D, (_FP_WFRACBITS_##dfs - _FP_WFRACBITS_##sfs));    \
+   } while (0)
+#define _FP_FRAC_CONV_4_2(dfs, sfs, D, S)                               \
+   do {                                                                 \
+     D##_f[0] = S##_f0;                                                 \
+     D##_f[1] = S##_f1;                                                 \
+     D##_f[2] = D##_f[3] = 0;                                           \
+     _FP_FRAC_SLL_4(D, (_FP_WFRACBITS_##dfs - _FP_WFRACBITS_##sfs));    \
+   } while (0)
+/* FIXME! This has to be written */
+#define _FP_SQRT_MEAT_4(R, S, T, X, q)
diff --git a/arch/ppc/math-emu/op-common.h b/arch/ppc/math-emu/op-common.h
new file mode 100644
index 000000000000..afb82b6498ce
--- /dev/null
+++ b/arch/ppc/math-emu/op-common.h
@@ -0,0 +1,688 @@
+#define _FP_DECL(wc, X)                 \
+  _FP_I_TYPE X##_c, X##_s, X##_e;       \
+  _FP_FRAC_DECL_##wc(X)
+/*
+ * Finish truely unpacking a native fp value by classifying the kind
+ * of fp value and normalizing both the exponent and the fraction.
+ */
+#define _FP_UNPACK_CANONICAL(fs, wc, X)                                 \
+do {                                                                    \
+  switch (X##_e)                                                        \
+  {                                                                     \
+  default:                                                              \
+    _FP_FRAC_HIGH_##wc(X) |= _FP_IMPLBIT_##fs;                          \
+    _FP_FRAC_SLL_##wc(X, _FP_WORKBITS);                                 \
+    X##_e -= _FP_EXPBIAS_##fs;                                          \
+    X##_c = FP_CLS_NORMAL;                                              \
+    break;                                                              \
+                                                                        \
+  case 0:                                                               \
+    if (_FP_FRAC_ZEROP_##wc(X))                                         \
+      X##_c = FP_CLS_ZERO;                                              \
+    else                                                                \
+      {                                                                 \
+        /* a denormalized number */                                     \
+        _FP_I_TYPE _shift;                                              \
+        _FP_FRAC_CLZ_##wc(_shift, X);                                   \
+        _shift -= _FP_FRACXBITS_##fs;                                   \
+        _FP_FRAC_SLL_##wc(X, (_shift+_FP_WORKBITS));                    \
+        X##_e -= _FP_EXPBIAS_##fs - 1 + _shift;                         \
+        X##_c = FP_CLS_NORMAL;                                          \
+      }                                                                 \
+    break;                                                              \
+                                                                        \
+  case _FP_EXPMAX_##fs:                                                 \
+    if (_FP_FRAC_ZEROP_##wc(X))                                         \
+      X##_c = FP_CLS_INF;                                               \
+    else                                                                \
+      /* we don't differentiate between signaling and quiet nans */     \
+      X##_c = FP_CLS_NAN;                                               \
+    break;                                                              \
+  }                                                                     \
+} while (0)
+/*
+ * Before packing the bits back into the native fp result, take care
+ * of such mundane things as rounding and overflow.  Also, for some
+ * kinds of fp values, the original parts may not have been fully
+ * extracted -- but that is ok, we can regenerate them now.
+ */
+#define _FP_PACK_CANONICAL(fs, wc, X)                           \
+({int __ret = 0;                                                \
+  switch (X##_c)                                                \
+  {                                                             \
+  case FP_CLS_NORMAL:                                           \
+    X##_e += _FP_EXPBIAS_##fs;                                  \
+    if (X##_e > 0)                                              \
+      {                                                         \
+        __ret |= _FP_ROUND(wc, X);                              \
+        if (_FP_FRAC_OVERP_##wc(fs, X))                         \
+          {                                                     \
+            _FP_FRAC_SRL_##wc(X, (_FP_WORKBITS+1));             \
+            X##_e++;                                            \
+          }                                                     \
+        else                                                    \
+          _FP_FRAC_SRL_##wc(X, _FP_WORKBITS);                   \
+        if (X##_e >= _FP_EXPMAX_##fs)                           \
+          {                                                     \
+            /* overflow to infinity */                          \
+            X##_e = _FP_EXPMAX_##fs;                            \
+            _FP_FRAC_SET_##wc(X, _FP_ZEROFRAC_##wc);            \
+            __ret |= EFLAG_OVERFLOW;                            \
+          }                                                     \
+      }                                                         \
+    else                                                        \
+      {                                                         \
+        /* we've got a denormalized number */                   \
+        X##_e = -X##_e + 1;                                     \
+        if (X##_e <= _FP_WFRACBITS_##fs)                        \
+          {                                                     \
+            _FP_FRAC_SRS_##wc(X, X##_e, _FP_WFRACBITS_##fs);    \
+            _FP_FRAC_SLL_##wc(X, 1);                            \
+            if (_FP_FRAC_OVERP_##wc(fs, X))                     \
+              {                                                 \
+                X##_e = 1;                                      \
+                _FP_FRAC_SET_##wc(X, _FP_ZEROFRAC_##wc);        \
+              }                                                 \
+            else                                                \
+              {                                                 \
+                X##_e = 0;                                      \
+                _FP_FRAC_SRL_##wc(X, _FP_WORKBITS+1);           \
+                __ret |= EFLAG_UNDERFLOW;                       \
+              }                                                 \
+          }                                                     \
+        else                                                    \
+          {                                                     \
+            /* underflow to zero */                             \
+            X##_e = 0;                                          \
+            _FP_FRAC_SET_##wc(X, _FP_ZEROFRAC_##wc);            \
+            __ret |= EFLAG_UNDERFLOW;                           \
+          }                                                     \
+      }                                                         \
+    break;                                                      \
+                                                                \
+  case FP_CLS_ZERO:                                             \
+    X##_e = 0;                                                  \
+    _FP_FRAC_SET_##wc(X, _FP_ZEROFRAC_##wc);                    \
+    break;                                                      \
+                                                                \
+  case FP_CLS_INF:                                              \
+    X##_e = _FP_EXPMAX_##fs;                                    \
+    _FP_FRAC_SET_##wc(X, _FP_ZEROFRAC_##wc);                    \
+    break;                                                      \
+                                                                \
+  case FP_CLS_NAN:                                              \
+    X##_e = _FP_EXPMAX_##fs;                                    \
+    if (!_FP_KEEPNANFRACP)                                      \
+      {                                                         \
+        _FP_FRAC_SET_##wc(X, _FP_NANFRAC_##fs);                 \
+        X##_s = 0;                                              \
+      }                                                         \
+    else                                                        \
+      _FP_FRAC_HIGH_##wc(X) |= _FP_QNANBIT_##fs;                \
+    break;                                                      \
+  }                                                             \
+  __ret;                                                        \
+})
+/*
+ * Main addition routine.  The input values should be cooked.
+ */
+#define _FP_ADD(fs, wc, R, X, Y)                                             \
+do {                                                                         \
+  switch (_FP_CLS_COMBINE(X##_c, Y##_c))                                     \
+  {                                                                          \
+  case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_NORMAL):                         \
+    {                                                                        \
+      /* shift the smaller number so that its exponent matches the larger */ \
+      _FP_I_TYPE diff = X##_e - Y##_e;                                       \
+                                                                             \
+      if (diff < 0)                                                          \
+        {                                                                    \
+          diff = -diff;                                                      \
+          if (diff <= _FP_WFRACBITS_##fs)                                    \
+            _FP_FRAC_SRS_##wc(X, diff, _FP_WFRACBITS_##fs);                  \
+          else if (!_FP_FRAC_ZEROP_##wc(X))                                  \
+            _FP_FRAC_SET_##wc(X, _FP_MINFRAC_##wc);                          \
+          else                                                               \
+            _FP_FRAC_SET_##wc(X, _FP_ZEROFRAC_##wc);                         \
+          R##_e = Y##_e;                                                     \
+        }                                                                    \
+      else                                                                   \
+        {                                                                    \
+          if (diff > 0)                                                      \
+            {                                                                \
+              if (diff <= _FP_WFRACBITS_##fs)                                \
+                _FP_FRAC_SRS_##wc(Y, diff, _FP_WFRACBITS_##fs);              \
+              else if (!_FP_FRAC_ZEROP_##wc(Y))                              \
+                _FP_FRAC_SET_##wc(Y, _FP_MINFRAC_##wc);                      \
+              else                                                           \
+                _FP_FRAC_SET_##wc(Y, _FP_ZEROFRAC_##wc);                     \
+            }                                                                \
+          R##_e = X##_e;                                                     \
+        }                                                                    \
+                                                                             \
+      R##_c = FP_CLS_NORMAL;                                                 \
+                                                                             \
+      if (X##_s == Y##_s)                                                    \
+        {                                                                    \
+          R##_s = X##_s;                                                     \
+          _FP_FRAC_ADD_##wc(R, X, Y);                                        \
+          if (_FP_FRAC_OVERP_##wc(fs, R))                                    \
+            {                                                                \
+              _FP_FRAC_SRS_##wc(R, 1, _FP_WFRACBITS_##fs);                   \
+              R##_e++;                                                       \
+            }                                                                \
+        }                                                                    \
+      else                                                                   \
+        {                                                                    \
+          R##_s = X##_s;                                                     \
+          _FP_FRAC_SUB_##wc(R, X, Y);                                        \
+          if (_FP_FRAC_ZEROP_##wc(R))                                        \
+            {                                                                \
+              /* return an exact zero */                                     \
+              if (FP_ROUNDMODE == FP_RND_MINF)                               \
+                R##_s |= Y##_s;                                              \
+              else                                                           \
+                R##_s &= Y##_s;                                              \
+              R##_c = FP_CLS_ZERO;                                           \
+            }                                                                \
+          else                                                               \
+            {                                                                \
+              if (_FP_FRAC_NEGP_##wc(R))                                     \
+                {                                                            \
+                  _FP_FRAC_SUB_##wc(R, Y, X);                                \
+                  R##_s = Y##_s;                                             \
+                }                                                            \
+                                                                             \
+              /* renormalize after subtraction */                            \
+              _FP_FRAC_CLZ_##wc(diff, R);                                    \
+              diff -= _FP_WFRACXBITS_##fs;                                   \
+              if (diff)                                                      \
+                {                                                            \
+                  R##_e -= diff;                                             \
+                  _FP_FRAC_SLL_##wc(R, diff);                                \
+                }                                                            \
+            }                                                                \
+        }                                                                    \
+      break;                                                                 \
+    }                                                                        \
+                                                                             \
+  case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_NAN):                               \
+    _FP_CHOOSENAN(fs, wc, R, X, Y);                                          \
+    break;                                                                   \
+                                                                             \
+  case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_ZERO):                           \
+    R##_e = X##_e;                                                           \
+  case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_NORMAL):                            \
+  case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_INF):                               \
+  case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_ZERO):                              \
+    _FP_FRAC_COPY_##wc(R, X);                                                \
+    R##_s = X##_s;                                                           \
+    R##_c = X##_c;                                                           \
+    break;                                                                   \
+                                                                             \
+  case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_NORMAL):                           \
+    R##_e = Y##_e;                                                           \
+  case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_NAN):                            \
+  case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_NAN):                               \
+  case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_NAN):                              \
+    _FP_FRAC_COPY_##wc(R, Y);                                                \
+    R##_s = Y##_s;                                                           \
+    R##_c = Y##_c;                                                           \
+    break;                                                                   \
+                                                                             \
+  case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_INF):                               \
+    if (X##_s != Y##_s)                                                      \
+      {                                                                      \
+        /* +INF + -INF => NAN */                                             \
+        _FP_FRAC_SET_##wc(R, _FP_NANFRAC_##fs);                              \
+        R##_s = X##_s ^ Y##_s;                                               \
+        R##_c = FP_CLS_NAN;                                                  \
+        break;                                                               \
+      }                                                                      \
+    /* FALLTHRU */                                                           \
+                                                                             \
+  case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_NORMAL):                            \
+  case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_ZERO):                              \
+    R##_s = X##_s;                                                           \
+    R##_c = FP_CLS_INF;                                                      \
+    break;                                                                   \
+                                                                             \
+  case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_INF):                            \
+  case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_INF):                              \
+    R##_s = Y##_s;                                                           \
+    R##_c = FP_CLS_INF;                                                      \
+    break;                                                                   \
+                                                                             \
+  case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_ZERO):                             \
+    /* make sure the sign is correct */                                      \
+    if (FP_ROUNDMODE == FP_RND_MINF)                                         \
+      R##_s = X##_s | Y##_s;                                                 \
+    else                                                                     \
+      R##_s = X##_s & Y##_s;                                                 \
+    R##_c = FP_CLS_ZERO;                                                     \
+    break;                                                                   \
+                                                                             \
+  default:                                                                   \
+    abort();                                                                 \
+  }                                                                          \
+} while (0)
+/*
+ * Main negation routine.  FIXME -- when we care about setting exception
+ * bits reliably, this will not do.  We should examine all of the fp classes.
+ */
+#define _FP_NEG(fs, wc, R, X)           \
+  do {                                  \
+    _FP_FRAC_COPY_##wc(R, X);           \
+    R##_c = X##_c;                      \
+    R##_e = X##_e;                      \
+    R##_s = 1 ^ X##_s;                  \
+  } while (0)
+/*
+ * Main multiplication routine.  The input values should be cooked.
+ */
+#define _FP_MUL(fs, wc, R, X, Y)                        \
+do {                                                    \
+  R##_s = X##_s ^ Y##_s;                                \
+  switch (_FP_CLS_COMBINE(X##_c, Y##_c))                \
+  {                                                     \
+  case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_NORMAL):    \
+    R##_c = FP_CLS_NORMAL;                              \
+    R##_e = X##_e + Y##_e + 1;                          \
+                                                        \
+    _FP_MUL_MEAT_##fs(R,X,Y);                           \
+                                                        \
+    if (_FP_FRAC_OVERP_##wc(fs, R))                     \
+      _FP_FRAC_SRS_##wc(R, 1, _FP_WFRACBITS_##fs);      \
+    else                                                \
+      R##_e--;                                          \
+    break;                                              \
+                                                        \
+  case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_NAN):          \
+    _FP_CHOOSENAN(fs, wc, R, X, Y);                     \
+    break;                                              \
+                                                        \
+  case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_NORMAL):       \
+  case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_INF):          \
+  case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_ZERO):         \
+    R##_s = X##_s;                                      \
+                                                        \
+  case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_INF):          \
+  case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_NORMAL):       \
+  case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_NORMAL):      \
+  case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_ZERO):        \
+    _FP_FRAC_COPY_##wc(R, X);                           \
+    R##_c = X##_c;                                      \
+    break;                                              \
+                                                        \
+  case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_NAN):       \
+  case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_NAN):          \
+  case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_NAN):         \
+    R##_s = Y##_s;                                      \
+                                                        \
+  case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_INF):       \
+  case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_ZERO):      \
+    _FP_FRAC_COPY_##wc(R, Y);                           \
+    R##_c = Y##_c;                                      \
+    break;                                              \
+                                                        \
+  case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_ZERO):         \
+  case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_INF):         \
+    R##_c = FP_CLS_NAN;                                 \
+    _FP_FRAC_SET_##wc(R, _FP_NANFRAC_##fs);             \
+    break;                                              \
+                                                        \
+  default:                                              \
+    abort();                                            \
+  }                                                     \
+} while (0)
+/*
+ * Main division routine.  The input values should be cooked.
+ */
+#define _FP_DIV(fs, wc, R, X, Y)                        \
+do {                                                    \
+  R##_s = X##_s ^ Y##_s;                                \
+  switch (_FP_CLS_COMBINE(X##_c, Y##_c))                \
+  {                                                     \
+  case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_NORMAL):    \
+    R##_c = FP_CLS_NORMAL;                              \
+    R##_e = X##_e - Y##_e;                              \
+                                                        \
+    _FP_DIV_MEAT_##fs(R,X,Y);                           \
+    break;                                              \
+                                                        \
+  case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_NAN):          \
+    _FP_CHOOSENAN(fs, wc, R, X, Y);                     \
+    break;                                              \
+                                                        \
+  case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_NORMAL):       \
+  case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_INF):          \
+  case _FP_CLS_COMBINE(FP_CLS_NAN,FP_CLS_ZERO):         \
+    R##_s = X##_s;                                      \
+    _FP_FRAC_COPY_##wc(R, X);                           \
+    R##_c = X##_c;                                      \
+    break;                                              \
+                                                        \
+  case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_NAN):       \
+  case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_NAN):          \
+  case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_NAN):         \
+    R##_s = Y##_s;                                      \
+    _FP_FRAC_COPY_##wc(R, Y);                           \
+    R##_c = Y##_c;                                      \
+    break;                                              \
+                                                        \
+  case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_INF):       \
+  case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_INF):         \
+  case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_NORMAL):      \
+    R##_c = FP_CLS_ZERO;                                \
+    break;                                              \
+                                                        \
+  case _FP_CLS_COMBINE(FP_CLS_NORMAL,FP_CLS_ZERO):      \
+  case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_ZERO):         \
+  case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_NORMAL):       \
+    R##_c = FP_CLS_INF;                                 \
+    break;                                              \
+                                                        \
+  case _FP_CLS_COMBINE(FP_CLS_INF,FP_CLS_INF):          \
+  case _FP_CLS_COMBINE(FP_CLS_ZERO,FP_CLS_ZERO):        \
+    R##_c = FP_CLS_NAN;                                 \
+    _FP_FRAC_SET_##wc(R, _FP_NANFRAC_##fs);             \
+    break;                                              \
+                                                        \
+  default:                                              \
+    abort();                                            \
+  }                                                     \
+} while (0)
+/*
+ * Main differential comparison routine.  The inputs should be raw not
+ * cooked.  The return is -1,0,1 for normal values, 2 otherwise.
+ */
+#define _FP_CMP(fs, wc, ret, X, Y, un)                                  \
+  do {                                                                  \
+    /* NANs are unordered */                                            \
+    if ((X##_e == _FP_EXPMAX_##fs && !_FP_FRAC_ZEROP_##wc(X))           \
+        || (Y##_e == _FP_EXPMAX_##fs && !_FP_FRAC_ZEROP_##wc(Y)))       \
+      {                                                                 \
+        ret = un;                                                       \
+      }                                                                 \
+    else                                                                \
+      {                                                                 \
+        int __x_zero = (!X##_e && _FP_FRAC_ZEROP_##wc(X)) ? 1 : 0;      \
+        int __y_zero = (!Y##_e && _FP_FRAC_ZEROP_##wc(Y)) ? 1 : 0;      \
+                                                                        \
+        if (__x_zero && __y_zero)                                       \
+          ret = 0;                                                      \
+        else if (__x_zero)                                              \
+          ret = Y##_s ? 1 : -1;                                         \
+        else if (__y_zero)                                              \
+          ret = X##_s ? -1 : 1;                                         \
+        else if (X##_s != Y##_s)                                        \
+          ret = X##_s ? -1 : 1;                                         \
+        else if (X##_e > Y##_e)                                         \
+          ret = X##_s ? -1 : 1;                                         \
+        else if (X##_e < Y##_e)                                         \
+          ret = X##_s ? 1 : -1;                                         \
+        else if (_FP_FRAC_GT_##wc(X, Y))                                \
+          ret = X##_s ? -1 : 1;                                         \
+        else if (_FP_FRAC_GT_##wc(Y, X))                                \
+          ret = X##_s ? 1 : -1;                                         \
+        else                                                            \
+          ret = 0;                                                      \
+      }                                                                 \
+  } while (0)
+/* Simplification for strict equality.  */
+#define _FP_CMP_EQ(fs, wc, ret, X, Y)                                     \
+  do {                                                                    \
+    /* NANs are unordered */                                              \
+    if ((X##_e == _FP_EXPMAX_##fs && !_FP_FRAC_ZEROP_##wc(X))             \
+        || (Y##_e == _FP_EXPMAX_##fs && !_FP_FRAC_ZEROP_##wc(Y)))         \
+      {                                                                   \
+        ret = 1;                                                          \
+      }                                                                   \
+    else                                                                  \
+      {                                                                   \
+        ret = !(X##_e == Y##_e                                            \
+                && _FP_FRAC_EQ_##wc(X, Y)                                 \
+                && (X##_s == Y##_s || !X##_e && _FP_FRAC_ZEROP_##wc(X))); \
+      }                                                                   \
+  } while (0)
+/*
+ * Main square root routine.  The input value should be cooked.
+ */
+#define _FP_SQRT(fs, wc, R, X)                                          \
+do {                                                                    \
+    _FP_FRAC_DECL_##wc(T); _FP_FRAC_DECL_##wc(S);                       \
+    _FP_W_TYPE q;                                                       \
+    switch (X##_c)                                                      \
+    {                                                                   \
+    case FP_CLS_NAN:                                                    \
+        R##_s = 0;                                                      \
+        R##_c = FP_CLS_NAN;                                             \
+        _FP_FRAC_SET_##wc(X, _FP_ZEROFRAC_##wc);                        \
+        break;                                                          \
+    case FP_CLS_INF:                                                    \
+        if (X##_s)                                                      \
+          {                                                             \
+            R##_s = 0;                                                  \
+            R##_c = FP_CLS_NAN; /* sNAN */                              \
+          }                                                             \
+        else                                                            \
+          {                                                             \
+            R##_s = 0;                                                  \
+            R##_c = FP_CLS_INF; /* sqrt(+inf) = +inf */                 \
+          }                                                             \
+        break;                                                          \
+    case FP_CLS_ZERO:                                                   \
+        R##_s = X##_s;                                                  \
+        R##_c = FP_CLS_ZERO; /* sqrt(+-0) = +-0 */                      \
+        break;                                                          \
+    case FP_CLS_NORMAL:                                                 \
+        R##_s = 0;                                                      \
+        if (X##_s)                                                      \
+          {                                                             \
+            R##_c = FP_CLS_NAN; /* sNAN */                              \
+            break;                                                      \
+          }                                                             \
+        R##_c = FP_CLS_NORMAL;                                          \
+        if (X##_e & 1)                                                  \
+          _FP_FRAC_SLL_##wc(X, 1);                                      \
+        R##_e = X##_e >> 1;                                             \
+        _FP_FRAC_SET_##wc(S, _FP_ZEROFRAC_##wc);                        \
+        _FP_FRAC_SET_##wc(R, _FP_ZEROFRAC_##wc);                        \
+        q = _FP_OVERFLOW_##fs;                                          \
+        _FP_FRAC_SLL_##wc(X, 1);                                        \
+        _FP_SQRT_MEAT_##wc(R, S, T, X, q);                              \
+        _FP_FRAC_SRL_##wc(R, 1);                                        \
+    }                                                                   \
+  } while (0)
+/*
+ * Convert from FP to integer
+ */
+/* "When a NaN, infinity, large positive argument >= 2147483648.0, or
+ * large negative argument <= -2147483649.0 is converted to an integer,
+ * the invalid_current bit...should be set and fp_exception_IEEE_754 should
+ * be raised. If the floating point invalid trap is disabled, no trap occurs
+ * and a numerical result is generated: if the sign bit of the operand
+ * is 0, the result is 2147483647; if the sign bit of the operand is 1,
+ * the result is -2147483648."
+ * Similarly for conversion to extended ints, except that the boundaries
+ * are >= 2^63, <= -(2^63 + 1), and the results are 2^63 + 1 for s=0 and
+ * -2^63 for s=1.
+ * -- SPARC Architecture Manual V9, Appendix B, which specifies how
+ * SPARCs resolve implementation dependencies in the IEEE-754 spec.
+ * I don't believe that the code below follows this. I'm not even sure
+ * it's right!
+ * It doesn't cope with needing to convert to an n bit integer when there
+ * is no n bit integer type. Fortunately gcc provides long long so this
+ * isn't a problem for sparc32.
+ * I have, however, fixed its NaN handling to conform as above.
+ *         -- PMM 02/1998
+ * NB: rsigned is not 'is r declared signed?' but 'should the value stored
+ * in r be signed or unsigned?'. r is always(?) declared unsigned.
+ * Comments below are mine, BTW -- PMM
+ */
+#define _FP_TO_INT(fs, wc, r, X, rsize, rsigned)                        \
+  do {                                                                  \
+    switch (X##_c)                                                      \
+      {                                                                 \
+      case FP_CLS_NORMAL:                                               \
+        if (X##_e < 0)                                                  \
+          {                                                             \
+          /* case FP_CLS_NAN: see above! */                             \
+          case FP_CLS_ZERO:                                             \
+            r = 0;                                                      \
+          }                                                             \
+        else if (X##_e >= rsize - (rsigned != 0))                       \
+          {     /* overflow */                                          \
+          case FP_CLS_NAN:                                              \
+          case FP_CLS_INF:                                              \
+            if (rsigned)                                                \
+              {                                                         \
+                r = 1;                                                  \
+                r <<= rsize - 1;                                        \
+                r -= 1 - X##_s;                                         \
+              }                                                         \
+            else                                                        \
+              {                                                         \
+                r = 0;                                                  \
+                if (!X##_s)                                             \
+                  r = ~r;                                               \
+              }                                                         \
+          }                                                             \
+        else                                                            \
+          {                                                             \
+            if (_FP_W_TYPE_SIZE*wc < rsize)                             \
+              {                                                         \
+                _FP_FRAC_ASSEMBLE_##wc(r, X, rsize);                    \
+                r <<= X##_e - _FP_WFRACBITS_##fs;                       \
+              }                                                         \
+            else                                                        \
+              {                                                         \
+                if (X##_e >= _FP_WFRACBITS_##fs)                        \
+                  _FP_FRAC_SLL_##wc(X, (X##_e - _FP_WFRACBITS_##fs + 1));\
+                else                                                    \
+                  _FP_FRAC_SRL_##wc(X, (_FP_WFRACBITS_##fs - X##_e - 1));\
+                _FP_FRAC_ASSEMBLE_##wc(r, X, rsize);                    \
+              }                                                         \
+            if (rsigned && X##_s)                                       \
+              r = -r;                                                   \
+          }                                                             \
+        break;                                                          \
+      }                                                                 \
+  } while (0)
+#define _FP_FROM_INT(fs, wc, X, r, rsize, rtype)                        \
+  do {                                                                  \
+    if (r)                                                              \
+      {                                                                 \
+        X##_c = FP_CLS_NORMAL;                                          \
+                                                                        \
+        if ((X##_s = (r < 0)))                                          \
+          r = -r;                                                       \
+        /* Note that `r' is now considered unsigned, so we don't have   \
+           to worry about the single signed overflow case.  */          \
+                                                                        \
+        if (rsize <= _FP_W_TYPE_SIZE)                                   \
+          __FP_CLZ(X##_e, r);                                           \
+        else                                                            \
+          __FP_CLZ_2(X##_e, (_FP_W_TYPE)(r >> _FP_W_TYPE_SIZE),         \
+                     (_FP_W_TYPE)r);                                    \
+        if (rsize < _FP_W_TYPE_SIZE)                                    \
+                X##_e -= (_FP_W_TYPE_SIZE - rsize);                     \
+        X##_e = rsize - X##_e - 1;                                      \
+                                                                        \
+        if (_FP_FRACBITS_##fs < rsize && _FP_WFRACBITS_##fs < X##_e)    \
+          __FP_FRAC_SRS_1(r, (X##_e - _FP_WFRACBITS_##fs), rsize);      \
+        r &= ~((_FP_W_TYPE)1 << X##_e);                                 \
+        _FP_FRAC_DISASSEMBLE_##wc(X, ((unsigned rtype)r), rsize);       \
+        _FP_FRAC_SLL_##wc(X, (_FP_WFRACBITS_##fs - X##_e - 1));         \
+      }                                                                 \
+    else                                                                \
+      {                                                                 \
+        X##_c = FP_CLS_ZERO, X##_s = 0;                                 \
+      }                                                                 \
+  } while (0)
+#define FP_CONV(dfs,sfs,dwc,swc,D,S)                    \
+  do {                                                  \
+    _FP_FRAC_CONV_##dwc##_##swc(dfs, sfs, D, S);        \
+    D##_e = S##_e;                                      \
+    D##_c = S##_c;                                      \
+    D##_s = S##_s;                                      \
+  } while (0)
+/*
+ * Helper primitives.
+ */
+/* Count leading zeros in a word.  */
+#ifndef __FP_CLZ
+#if _FP_W_TYPE_SIZE < 64
+/* this is just to shut the compiler up about shifts > word length -- PMM 02/1998 */
+#define __FP_CLZ(r, x)                          \
+  do {                                          \
+    _FP_W_TYPE _t = (x);                        \
+    r = _FP_W_TYPE_SIZE - 1;                    \
+    if (_t > 0xffff) r -= 16;                   \
+    if (_t > 0xffff) _t >>= 16;                 \
+    if (_t > 0xff) r -= 8;                      \
+    if (_t > 0xff) _t >>= 8;                    \
+    if (_t & 0xf0) r -= 4;                      \
+    if (_t & 0xf0) _t >>= 4;                    \
+    if (_t & 0xc) r -= 2;                       \
+    if (_t & 0xc) _t >>= 2;                     \
+    if (_t & 0x2) r -= 1;                       \
+  } while (0)
+#else /* not _FP_W_TYPE_SIZE < 64 */
+#define __FP_CLZ(r, x)                          \
+  do {                                          \
+    _FP_W_TYPE _t = (x);                        \
+    r = _FP_W_TYPE_SIZE - 1;                    \
+    if (_t > 0xffffffff) r -= 32;               \
+    if (_t > 0xffffffff) _t >>= 32;             \
+    if (_t > 0xffff) r -= 16;                   \
+    if (_t > 0xffff) _t >>= 16;                 \
+    if (_t > 0xff) r -= 8;                      \
+    if (_t > 0xff) _t >>= 8;                    \
+    if (_t & 0xf0) r -= 4;                      \
+    if (_t & 0xf0) _t >>= 4;                    \
+    if (_t & 0xc) r -= 2;                       \
+    if (_t & 0xc) _t >>= 2;                     \
+    if (_t & 0x2) r -= 1;                       \
+  } while (0)
+#endif /* not _FP_W_TYPE_SIZE < 64 */
+#endif /* ndef __FP_CLZ */
+#define _FP_DIV_HELP_imm(q, r, n, d)            \
+  do {                                          \
+    q = n / d, r = n % d;                       \
+  } while (0)
diff --git a/arch/ppc/math-emu/sfp-machine.h b/arch/ppc/math-emu/sfp-machine.h
new file mode 100644
index 000000000000..686e06d29186
--- /dev/null
+++ b/arch/ppc/math-emu/sfp-machine.h
@@ -0,0 +1,377 @@
+/* Machine-dependent software floating-point definitions.  PPC version.
+   Copyright (C) 1997 Free Software Foundation, Inc.
+   This file is part of the GNU C Library.
+   The GNU C Library is free software; you can redistribute it and/or
+   modify it under the terms of the GNU Library General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+   The GNU C Library is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   Library General Public License for more details.
+   You should have received a copy of the GNU Library General Public
+   License along with the GNU C Library; see the file COPYING.LIB.  If
+   not, write to the Free Software Foundation, Inc.,
+   59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+   Actually, this is a PPC (32bit) version, written based on the
+   i386, sparc, and sparc64 versions, by me,
+   Peter Maydell (pmaydell@chiark.greenend.org.uk).
+   Comments are by and large also mine, although they may be inaccurate.
+   In picking out asm fragments I've gone with the lowest common
+   denominator, which also happens to be the hardware I have :->
+   That is, a SPARC without hardware multiply and divide.
+ */
+/* basic word size definitions */
+#define _FP_W_TYPE_SIZE         32
+#define _FP_W_TYPE              unsigned long
+#define _FP_WS_TYPE             signed long
+#define _FP_I_TYPE              long
+#define __ll_B                  ((UWtype) 1 << (W_TYPE_SIZE / 2))
+#define __ll_lowpart(t)         ((UWtype) (t) & (__ll_B - 1))
+#define __ll_highpart(t)        ((UWtype) (t) >> (W_TYPE_SIZE / 2))
+/* You can optionally code some things like addition in asm. For
+ * example, i386 defines __FP_FRAC_ADD_2 as asm. If you don't
+ * then you get a fragment of C code [if you change an #ifdef 0
+ * in op-2.h] or a call to add_ssaaaa (see below).
+ * Good places to look for asm fragments to use are gcc and glibc.
+ * gcc's longlong.h is useful.
+ */
+/* We need to know how to multiply and divide. If the host word size
+ * is >= 2*fracbits you can use FP_MUL_MEAT_n_imm(t,R,X,Y) which
+ * codes the multiply with whatever gcc does to 'a * b'.
+ * _FP_MUL_MEAT_n_wide(t,R,X,Y,f) is used when you have an asm
+ * function that can multiply two 1W values and get a 2W result.
+ * Otherwise you're stuck with _FP_MUL_MEAT_n_hard(t,R,X,Y) which
+ * does bitshifting to avoid overflow.
+ * For division there is FP_DIV_MEAT_n_imm(t,R,X,Y,f) for word size
+ * >= 2*fracbits, where f is either _FP_DIV_HELP_imm or
+ * _FP_DIV_HELP_ldiv (see op-1.h).
+ * _FP_DIV_MEAT_udiv() is if you have asm to do 2W/1W => (1W, 1W).
+ * [GCC and glibc have longlong.h which has the asm macro udiv_qrnnd
+ * to do this.]
+ * In general, 'n' is the number of words required to hold the type,
+ * and 't' is either S, D or Q for single/double/quad.
+ *           -- PMM
+ */
+/* Example: SPARC64:
+ * #define _FP_MUL_MEAT_S(R,X,Y)        _FP_MUL_MEAT_1_imm(S,R,X,Y)
+ * #define _FP_MUL_MEAT_D(R,X,Y)        _FP_MUL_MEAT_1_wide(D,R,X,Y,umul_ppmm)
+ * #define _FP_MUL_MEAT_Q(R,X,Y)        _FP_MUL_MEAT_2_wide(Q,R,X,Y,umul_ppmm)
+ *
+ * #define _FP_DIV_MEAT_S(R,X,Y)        _FP_DIV_MEAT_1_imm(S,R,X,Y,_FP_DIV_HELP_imm)
+ * #define _FP_DIV_MEAT_D(R,X,Y)        _FP_DIV_MEAT_1_udiv(D,R,X,Y)
+ * #define _FP_DIV_MEAT_Q(R,X,Y)        _FP_DIV_MEAT_2_udiv_64(Q,R,X,Y)
+ *
+ * Example: i386:
+ * #define _FP_MUL_MEAT_S(R,X,Y)   _FP_MUL_MEAT_1_wide(S,R,X,Y,_i386_mul_32_64)
+ * #define _FP_MUL_MEAT_D(R,X,Y)   _FP_MUL_MEAT_2_wide(D,R,X,Y,_i386_mul_32_64)
+ *
+ * #define _FP_DIV_MEAT_S(R,X,Y)   _FP_DIV_MEAT_1_udiv(S,R,X,Y,_i386_div_64_32)
+ * #define _FP_DIV_MEAT_D(R,X,Y)   _FP_DIV_MEAT_2_udiv_64(D,R,X,Y)
+ */
+#define _FP_MUL_MEAT_S(R,X,Y)   _FP_MUL_MEAT_1_wide(S,R,X,Y,umul_ppmm)
+#define _FP_MUL_MEAT_D(R,X,Y)   _FP_MUL_MEAT_2_wide(D,R,X,Y,umul_ppmm)
+#define _FP_DIV_MEAT_S(R,X,Y)   _FP_DIV_MEAT_1_udiv(S,R,X,Y)
+#define _FP_DIV_MEAT_D(R,X,Y)   _FP_DIV_MEAT_2_udiv_64(D,R,X,Y)
+/* These macros define what NaN looks like. They're supposed to expand to
+ * a comma-separated set of 32bit unsigned ints that encode NaN.
+ */
+#define _FP_NANFRAC_S           _FP_QNANBIT_S
+#define _FP_NANFRAC_D           _FP_QNANBIT_D, 0
+#define _FP_NANFRAC_Q           _FP_QNANBIT_Q, 0, 0, 0
+#define _FP_KEEPNANFRACP 1
+/* This macro appears to be called when both X and Y are NaNs, and
+ * has to choose one and copy it to R. i386 goes for the larger of the
+ * two, sparc64 just picks Y. I don't understand this at all so I'll
+ * go with sparc64 because it's shorter :->   -- PMM
+ */
+#define _FP_CHOOSENAN(fs, wc, R, X, Y)                  \
+  do {                                                  \
+    R##_s = Y##_s;                                      \
+    _FP_FRAC_COPY_##wc(R,Y);                            \
+    R##_c = FP_CLS_NAN;                                 \
+  } while (0)
+extern void fp_unpack_d(long *, unsigned long *, unsigned long *,
+                        long *, long *, void *);
+extern int  fp_pack_d(void *, long, unsigned long, unsigned long, long, long);
+extern int  fp_pack_ds(void *, long, unsigned long, unsigned long, long, long);
+#define __FP_UNPACK_RAW_1(fs, X, val)                   \
+  do {                                                  \
+    union _FP_UNION_##fs *_flo =                        \
+        (union _FP_UNION_##fs *)val;                    \
+                                                        \
+    X##_f = _flo->bits.frac;                            \
+    X##_e = _flo->bits.exp;                             \
+    X##_s = _flo->bits.sign;                            \
+  } while (0)
+#define __FP_UNPACK_RAW_2(fs, X, val)                   \
+  do {                                                  \
+    union _FP_UNION_##fs *_flo =                        \
+        (union _FP_UNION_##fs *)val;                    \
+                                                        \
+    X##_f0 = _flo->bits.frac0;                          \
+    X##_f1 = _flo->bits.frac1;                          \
+    X##_e  = _flo->bits.exp;                            \
+    X##_s  = _flo->bits.sign;                           \
+  } while (0)
+#define __FP_UNPACK_S(X,val)            \
+  do {                                  \
+    __FP_UNPACK_RAW_1(S,X,val);         \
+    _FP_UNPACK_CANONICAL(S,1,X);        \
+  } while (0)
+#define __FP_UNPACK_D(X,val)            \
+        fp_unpack_d(&X##_s, &X##_f1, &X##_f0, &X##_e, &X##_c, val)
+#define __FP_PACK_RAW_1(fs, val, X)                     \
+  do {                                                  \
+    union _FP_UNION_##fs *_flo =                        \
+        (union _FP_UNION_##fs *)val;                    \
+                                                        \
+    _flo->bits.frac = X##_f;                            \
+    _flo->bits.exp  = X##_e;                            \
+    _flo->bits.sign = X##_s;                            \
+  } while (0)
+#define __FP_PACK_RAW_2(fs, val, X)                     \
+  do {                                                  \
+    union _FP_UNION_##fs *_flo =                        \
+        (union _FP_UNION_##fs *)val;                    \
+                                                        \
+    _flo->bits.frac0 = X##_f0;                          \
+    _flo->bits.frac1 = X##_f1;                          \
+    _flo->bits.exp   = X##_e;                           \
+    _flo->bits.sign  = X##_s;                           \
+  } while (0)
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#define __FPU_FPSCR     (current->thread.fpscr)
+/* We only actually write to the destination register
+ * if exceptions signalled (if any) will not trap.
+ */
+#define __FPU_ENABLED_EXC \
+({                                              \
+        (__FPU_FPSCR >> 3) & 0x1f;      \
+})
+#define __FPU_TRAP_P(bits) \
+        ((__FPU_ENABLED_EXC & (bits)) != 0)
+#define __FP_PACK_S(val,X)                      \
+({  int __exc = _FP_PACK_CANONICAL(S,1,X);      \
+    if(!__exc || !__FPU_TRAP_P(__exc))          \
+        __FP_PACK_RAW_1(S,val,X);               \
+    __exc;                                      \
+})
+#define __FP_PACK_D(val,X)                      \
+        fp_pack_d(val, X##_s, X##_f1, X##_f0, X##_e, X##_c)
+#define __FP_PACK_DS(val,X)                     \
+        fp_pack_ds(val, X##_s, X##_f1, X##_f0, X##_e, X##_c)
+/* Obtain the current rounding mode. */
+#define FP_ROUNDMODE                    \
+({                                      \
+        __FPU_FPSCR & 0x3;              \
+})
+/* the asm fragments go here: all these are taken from glibc-2.0.5's
+ * stdlib/longlong.h
+ */
+#include <linux/types.h>
+#include <asm/byteorder.h>
+/* add_ssaaaa is used in op-2.h and should be equivalent to
+ * #define add_ssaaaa(sh,sl,ah,al,bh,bl) (sh = ah+bh+ (( sl = al+bl) < al))
+ * add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1,
+ * high_addend_2, low_addend_2) adds two UWtype integers, composed by
+ * HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2
+ * respectively.  The result is placed in HIGH_SUM and LOW_SUM.  Overflow
+ * (i.e. carry out) is not stored anywhere, and is lost.
+ */
+#define add_ssaaaa(sh, sl, ah, al, bh, bl)                              \
+  do {                                                                  \
+    if (__builtin_constant_p (bh) && (bh) == 0)                         \
+      __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2"           \
+             : "=r" ((USItype)(sh)),                                    \
+               "=&r" ((USItype)(sl))                                    \
+             : "%r" ((USItype)(ah)),                                    \
+               "%r" ((USItype)(al)),                                    \
+               "rI" ((USItype)(bl)));                                   \
+    else if (__builtin_constant_p (bh) && (bh) ==~(USItype) 0)          \
+      __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2"           \
+             : "=r" ((USItype)(sh)),                                    \
+               "=&r" ((USItype)(sl))                                    \
+             : "%r" ((USItype)(ah)),                                    \
+               "%r" ((USItype)(al)),                                    \
+               "rI" ((USItype)(bl)));                                   \
+    else                                                                \
+      __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3"          \
+             : "=r" ((USItype)(sh)),                                    \
+               "=&r" ((USItype)(sl))                                    \
+             : "%r" ((USItype)(ah)),                                    \
+               "r" ((USItype)(bh)),                                     \
+               "%r" ((USItype)(al)),                                    \
+               "rI" ((USItype)(bl)));                                   \
+  } while (0)
+/* sub_ddmmss is used in op-2.h and udivmodti4.c and should be equivalent to
+ * #define sub_ddmmss(sh, sl, ah, al, bh, bl) (sh = ah-bh - ((sl = al-bl) > al))
+ * sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend,
+ * high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers,
+ * composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and
+ * LOW_SUBTRAHEND_2 respectively.  The result is placed in HIGH_DIFFERENCE
+ * and LOW_DIFFERENCE.  Overflow (i.e. carry out) is not stored anywhere,
+ * and is lost.
+ */
+#define sub_ddmmss(sh, sl, ah, al, bh, bl)                              \
+  do {                                                                  \
+    if (__builtin_constant_p (ah) && (ah) == 0)                         \
+      __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2"       \
+               : "=r" ((USItype)(sh)),                                  \
+                 "=&r" ((USItype)(sl))                                  \
+               : "r" ((USItype)(bh)),                                   \
+                 "rI" ((USItype)(al)),                                  \
+                 "r" ((USItype)(bl)));                                  \
+    else if (__builtin_constant_p (ah) && (ah) ==~(USItype) 0)          \
+      __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2"       \
+               : "=r" ((USItype)(sh)),                                  \
+                 "=&r" ((USItype)(sl))                                  \
+               : "r" ((USItype)(bh)),                                   \
+                 "rI" ((USItype)(al)),                                  \
+                 "r" ((USItype)(bl)));                                  \
+    else if (__builtin_constant_p (bh) && (bh) == 0)                    \
+      __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2"         \
+               : "=r" ((USItype)(sh)),                                  \
+                 "=&r" ((USItype)(sl))                                  \
+               : "r" ((USItype)(ah)),                                   \
+                 "rI" ((USItype)(al)),                                  \
+                 "r" ((USItype)(bl)));                                  \
+    else if (__builtin_constant_p (bh) && (bh) ==~(USItype) 0)          \
+      __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2"         \
+               : "=r" ((USItype)(sh)),                                  \
+                 "=&r" ((USItype)(sl))                                  \
+               : "r" ((USItype)(ah)),                                   \
+                 "rI" ((USItype)(al)),                                  \
+                 "r" ((USItype)(bl)));                                  \
+    else                                                                \
+      __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2"      \
+               : "=r" ((USItype)(sh)),                                  \
+                 "=&r" ((USItype)(sl))                                  \
+               : "r" ((USItype)(ah)),                                   \
+                 "r" ((USItype)(bh)),                                   \
+                 "rI" ((USItype)(al)),                                  \
+                 "r" ((USItype)(bl)));                                  \
+  } while (0)
+/* asm fragments for mul and div */
+/* umul_ppmm(high_prod, low_prod, multipler, multiplicand) multiplies two
+ * UWtype integers MULTIPLER and MULTIPLICAND, and generates a two UWtype
+ * word product in HIGH_PROD and LOW_PROD.
+ */
+#define umul_ppmm(ph, pl, m0, m1)                                       \
+  do {                                                                  \
+    USItype __m0 = (m0), __m1 = (m1);                                   \
+    __asm__ ("mulhwu %0,%1,%2"                                          \
+             : "=r" ((USItype)(ph))                                     \
+             : "%r" (__m0),                                             \
+               "r" (__m1));                                             \
+    (pl) = __m0 * __m1;                                                 \
+  } while (0)
+/* udiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
+ * denominator) divides a UDWtype, composed by the UWtype integers
+ * HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient
+ * in QUOTIENT and the remainder in REMAINDER.  HIGH_NUMERATOR must be less
+ * than DENOMINATOR for correct operation.  If, in addition, the most
+ * significant bit of DENOMINATOR must be 1, then the pre-processor symbol
+ * UDIV_NEEDS_NORMALIZATION is defined to 1.
+ */
+#define udiv_qrnnd(q, r, n1, n0, d)                                     \
+  do {                                                                  \
+    UWtype __d1, __d0, __q1, __q0, __r1, __r0, __m;                     \
+    __d1 = __ll_highpart (d);                                           \
+    __d0 = __ll_lowpart (d);                                            \
+                                                                        \
+    __r1 = (n1) % __d1;                                                 \
+    __q1 = (n1) / __d1;                                                 \
+    __m = (UWtype) __q1 * __d0;                                         \
+    __r1 = __r1 * __ll_B | __ll_highpart (n0);                          \
+    if (__r1 < __m)                                                     \
+      {                                                                 \
+        __q1--, __r1 += (d);                                            \
+        if (__r1 >= (d)) /* we didn't get carry when adding to __r1 */  \
+          if (__r1 < __m)                                               \
+            __q1--, __r1 += (d);                                        \
+      }                                                                 \
+    __r1 -= __m;                                                        \
+                                                                        \
+    __r0 = __r1 % __d1;                                                 \
+    __q0 = __r1 / __d1;                                                 \
+    __m = (UWtype) __q0 * __d0;                                         \
+    __r0 = __r0 * __ll_B | __ll_lowpart (n0);                           \
+    if (__r0 < __m)                                                     \
+      {                                                                 \
+        __q0--, __r0 += (d);                                            \
+        if (__r0 >= (d))                                                \
+          if (__r0 < __m)                                               \
+            __q0--, __r0 += (d);                                        \
+      }                                                                 \
+    __r0 -= __m;                                                        \
+                                                                        \
+    (q) = (UWtype) __q1 * __ll_B | __q0;                                \
+    (r) = __r0;                                                         \
+  } while (0)
+#define UDIV_NEEDS_NORMALIZATION 1
+#define abort()                                                         \
+        return 0
+#ifdef __BIG_ENDIAN
+#define __BYTE_ORDER __BIG_ENDIAN
+#else
+#define __BYTE_ORDER __LITTLE_ENDIAN
+#endif
+/* Exception flags. */
+#define EFLAG_INVALID           (1 << (31 - 2))
+#define EFLAG_OVERFLOW          (1 << (31 - 3))
+#define EFLAG_UNDERFLOW         (1 << (31 - 4))
+#define EFLAG_DIVZERO           (1 << (31 - 5))
+#define EFLAG_INEXACT           (1 << (31 - 6))
+#define EFLAG_VXSNAN            (1 << (31 - 7))
+#define EFLAG_VXISI             (1 << (31 - 8))
+#define EFLAG_VXIDI             (1 << (31 - 9))
+#define EFLAG_VXZDZ             (1 << (31 - 10))
+#define EFLAG_VXIMZ             (1 << (31 - 11))
+#define EFLAG_VXVC              (1 << (31 - 12))
+#define EFLAG_VXSOFT            (1 << (31 - 21))
+#define EFLAG_VXSQRT            (1 << (31 - 22))
+#define EFLAG_VXCVI             (1 << (31 - 23))
diff --git a/arch/ppc/math-emu/single.h b/arch/ppc/math-emu/single.h
new file mode 100644
index 000000000000..f19d99451815
--- /dev/null
+++ b/arch/ppc/math-emu/single.h
@@ -0,0 +1,66 @@
+/*
+ * Definitions for IEEE Single Precision
+ */
+#if _FP_W_TYPE_SIZE < 32
+#error "Here's a nickel kid.  Go buy yourself a real computer."
+#endif
+#define _FP_FRACBITS_S          24
+#define _FP_FRACXBITS_S         (_FP_W_TYPE_SIZE - _FP_FRACBITS_S)
+#define _FP_WFRACBITS_S         (_FP_WORKBITS + _FP_FRACBITS_S)
+#define _FP_WFRACXBITS_S        (_FP_W_TYPE_SIZE - _FP_WFRACBITS_S)
+#define _FP_EXPBITS_S           8
+#define _FP_EXPBIAS_S           127
+#define _FP_EXPMAX_S            255
+#define _FP_QNANBIT_S           ((_FP_W_TYPE)1 << (_FP_FRACBITS_S-2))
+#define _FP_IMPLBIT_S           ((_FP_W_TYPE)1 << (_FP_FRACBITS_S-1))
+#define _FP_OVERFLOW_S          ((_FP_W_TYPE)1 << (_FP_WFRACBITS_S))
+/* The implementation of _FP_MUL_MEAT_S and _FP_DIV_MEAT_S should be
+   chosen by the target machine.  */
+union _FP_UNION_S
+{
+  float flt;
+  struct {
+#if __BYTE_ORDER == __BIG_ENDIAN
+    unsigned sign : 1;
+    unsigned exp  : _FP_EXPBITS_S;
+    unsigned frac : _FP_FRACBITS_S - (_FP_IMPLBIT_S != 0);
+#else
+    unsigned frac : _FP_FRACBITS_S - (_FP_IMPLBIT_S != 0);
+    unsigned exp  : _FP_EXPBITS_S;
+    unsigned sign : 1;
+#endif
+  } bits __attribute__((packed));
+};
+#define FP_DECL_S(X)            _FP_DECL(1,X)
+#define FP_UNPACK_RAW_S(X,val)  _FP_UNPACK_RAW_1(S,X,val)
+#define FP_PACK_RAW_S(val,X)    _FP_PACK_RAW_1(S,val,X)
+#define FP_UNPACK_S(X,val)              \
+  do {                                  \
+    _FP_UNPACK_RAW_1(S,X,val);          \
+    _FP_UNPACK_CANONICAL(S,1,X);        \
+  } while (0)
+#define FP_PACK_S(val,X)                \
+  do {                                  \
+    _FP_PACK_CANONICAL(S,1,X);          \
+    _FP_PACK_RAW_1(S,val,X);            \
+  } while (0)
+#define FP_NEG_S(R,X)           _FP_NEG(S,1,R,X)
+#define FP_ADD_S(R,X,Y)         _FP_ADD(S,1,R,X,Y)
+#define FP_SUB_S(R,X,Y)         _FP_SUB(S,1,R,X,Y)
+#define FP_MUL_S(R,X,Y)         _FP_MUL(S,1,R,X,Y)
+#define FP_DIV_S(R,X,Y)         _FP_DIV(S,1,R,X,Y)
+#define FP_SQRT_S(R,X)          _FP_SQRT(S,1,R,X)
+#define FP_CMP_S(r,X,Y,un)      _FP_CMP(S,1,r,X,Y,un)
+#define FP_CMP_EQ_S(r,X,Y)      _FP_CMP_EQ(S,1,r,X,Y)
+#define FP_TO_INT_S(r,X,rsz,rsg)  _FP_TO_INT(S,1,r,X,rsz,rsg)
+#define FP_FROM_INT_S(X,r,rs,rt)  _FP_FROM_INT(S,1,X,r,rs,rt)
diff --git a/arch/ppc/math-emu/soft-fp.h b/arch/ppc/math-emu/soft-fp.h
new file mode 100644
index 000000000000..cca39598f873
--- /dev/null
+++ b/arch/ppc/math-emu/soft-fp.h
@@ -0,0 +1,104 @@
+#ifndef SOFT_FP_H
+#define SOFT_FP_H
+#include "sfp-machine.h"
+#define _FP_WORKBITS            3
+#define _FP_WORK_LSB            ((_FP_W_TYPE)1 << 3)
+#define _FP_WORK_ROUND          ((_FP_W_TYPE)1 << 2)
+#define _FP_WORK_GUARD          ((_FP_W_TYPE)1 << 1)
+#define _FP_WORK_STICKY         ((_FP_W_TYPE)1 << 0)
+#ifndef FP_RND_NEAREST
+# define FP_RND_NEAREST         0
+# define FP_RND_ZERO            1
+# define FP_RND_PINF            2
+# define FP_RND_MINF            3
+#ifndef FP_ROUNDMODE
+# define FP_ROUNDMODE           FP_RND_NEAREST
+#endif
+#endif
+#define _FP_ROUND_NEAREST(wc, X)                        \
+({  int __ret = 0;                                      \
+    int __frac = _FP_FRAC_LOW_##wc(X) & 15;             \
+    if (__frac & 7) {                                   \
+      __ret = EFLAG_INEXACT;                            \
+      if ((__frac & 7) != _FP_WORK_ROUND)               \
+        _FP_FRAC_ADDI_##wc(X, _FP_WORK_ROUND);          \
+      else if (__frac & _FP_WORK_LSB)                   \
+        _FP_FRAC_ADDI_##wc(X, _FP_WORK_ROUND);          \
+    }                                                   \
+    __ret;                                              \
+})
+#define _FP_ROUND_ZERO(wc, X)                           \
+({  int __ret = 0;                                      \
+    if (_FP_FRAC_LOW_##wc(X) & 7)                       \
+      __ret = EFLAG_INEXACT;                            \
+    __ret;                                              \
+})
+#define _FP_ROUND_PINF(wc, X)                           \
+({  int __ret = EFLAG_INEXACT;                          \
+    if (!X##_s && (_FP_FRAC_LOW_##wc(X) & 7))           \
+      _FP_FRAC_ADDI_##wc(X, _FP_WORK_LSB);              \
+    else __ret = 0;                                     \
+    __ret;                                              \
+})
+#define _FP_ROUND_MINF(wc, X)                           \
+({  int __ret = EFLAG_INEXACT;                          \
+    if (X##_s && (_FP_FRAC_LOW_##wc(X) & 7))            \
+      _FP_FRAC_ADDI_##wc(X, _FP_WORK_LSB);              \
+    else __ret = 0;                                     \
+    __ret;                                              \
+})
+#define _FP_ROUND(wc, X)                        \
+({      int __ret = 0;                          \
+        switch (FP_ROUNDMODE)                   \
+        {                                       \
+          case FP_RND_NEAREST:                  \
+            __ret |= _FP_ROUND_NEAREST(wc,X);   \
+            break;                              \
+          case FP_RND_ZERO:                     \
+            __ret |= _FP_ROUND_ZERO(wc,X);      \
+            break;                              \
+          case FP_RND_PINF:                     \
+            __ret |= _FP_ROUND_PINF(wc,X);      \
+            break;                              \
+          case FP_RND_MINF:                     \
+            __ret |= _FP_ROUND_MINF(wc,X);      \
+            break;                              \
+        };                                      \
+        __ret;                                  \
+})
+#define FP_CLS_NORMAL           0
+#define FP_CLS_ZERO             1
+#define FP_CLS_INF              2
+#define FP_CLS_NAN              3
+#define _FP_CLS_COMBINE(x,y)    (((x) << 2) | (y))
+#include "op-1.h"
+#include "op-2.h"
+#include "op-4.h"
+#include "op-common.h"
+/* Sigh.  Silly things longlong.h needs.  */
+#define UWtype          _FP_W_TYPE
+#define W_TYPE_SIZE     _FP_W_TYPE_SIZE
+typedef int SItype __attribute__((mode(SI)));
+typedef int DItype __attribute__((mode(DI)));
+typedef unsigned int USItype __attribute__((mode(SI)));
+typedef unsigned int UDItype __attribute__((mode(DI)));
+#if _FP_W_TYPE_SIZE == 32
+typedef unsigned int UHWtype __attribute__((mode(HI)));
+#elif _FP_W_TYPE_SIZE == 64
+typedef USItype UHWtype;
+#endif
+#endif
diff --git a/arch/ppc/math-emu/stfd.c b/arch/ppc/math-emu/stfd.c
new file mode 100644
index 000000000000..3f8c2558a9e8
--- /dev/null
+++ b/arch/ppc/math-emu/stfd.c
@@ -0,0 +1,20 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+int
+stfd(void *frS, void *ea)
+{
+#if 0
+#ifdef DEBUG
+        printk("%s: S %p, ea %p: ", __FUNCTION__, frS, ea);
+        dump_double(frS);
+        printk("\n");
+#endif
+#endif
+        if (copy_to_user(ea, frS, sizeof(double)))
+                return -EFAULT;
+        return 0;
+}
diff --git a/arch/ppc/math-emu/stfiwx.c b/arch/ppc/math-emu/stfiwx.c
new file mode 100644
index 000000000000..95caaeec6a08
--- /dev/null
+++ b/arch/ppc/math-emu/stfiwx.c
@@ -0,0 +1,16 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+int
+stfiwx(u32 *frS, void *ea)
+{
+#ifdef DEBUG
+        printk("%s: %p %p\n", __FUNCTION__, frS, ea);
+#endif
+        if (copy_to_user(ea, &frS[1], sizeof(frS[1])))
+                return -EFAULT;
+        return 0;
+}
diff --git a/arch/ppc/math-emu/stfs.c b/arch/ppc/math-emu/stfs.c
new file mode 100644
index 000000000000..e87ca23c6dc3
--- /dev/null
+++ b/arch/ppc/math-emu/stfs.c
@@ -0,0 +1,41 @@
+#include <linux/types.h>
+#include <linux/errno.h>
+#include <asm/uaccess.h>
+#include "soft-fp.h"
+#include "double.h"
+#include "single.h"
+int
+stfs(void *frS, void *ea)
+{
+        FP_DECL_D(A);
+        FP_DECL_S(R);
+        float f;
+        int err;
+#ifdef DEBUG
+        printk("%s: S %p, ea %p\n", __FUNCTION__, frS, ea);
+#endif
+        __FP_UNPACK_D(A, frS);
+#ifdef DEBUG
+        printk("A: %ld %lu %lu %ld (%ld)\n", A_s, A_f1, A_f0, A_e, A_c);
+#endif
+        FP_CONV(S, D, 1, 2, R, A);
+#ifdef DEBUG
+        printk("R: %ld %lu %ld (%ld)\n", R_s, R_f, R_e, R_c);
+#endif
+        err = _FP_PACK_CANONICAL(S, 1, R);
+        if (!err || !__FPU_TRAP_P(err)) {
+                __FP_PACK_RAW_1(S, &f, R);
+                if (copy_to_user(ea, &f, sizeof(float)))
+                        return -EFAULT;
+        }
+        return err;
+}
diff --git a/arch/ppc/math-emu/types.c b/arch/ppc/math-emu/types.c
new file mode 100644
index 000000000000..e1ed15d829db
--- /dev/null
+++ b/arch/ppc/math-emu/types.c
@@ -0,0 +1,51 @@
+#include "soft-fp.h"
+#include "double.h"
+#include "single.h"
+void
+fp_unpack_d(long *_s, unsigned long *_f1, unsigned long *_f0,
+            long *_e, long *_c, void *val)
+{
+        FP_DECL_D(X);
+        __FP_UNPACK_RAW_2(D, X, val);
+        _FP_UNPACK_CANONICAL(D, 2, X);
+        *_s = X_s;
+        *_f1 = X_f1;
+        *_f0 = X_f0;
+        *_e = X_e;
+        *_c = X_c;
+}
+int
+fp_pack_d(void *val, long X_s, unsigned long X_f1,
+          unsigned long X_f0, long X_e, long X_c)
+{
+        int exc;
+        exc = _FP_PACK_CANONICAL(D, 2, X);
+        if (!exc || !__FPU_TRAP_P(exc))
+                __FP_PACK_RAW_2(D, val, X);
+        return exc;
+}
+int
+fp_pack_ds(void *val, long X_s, unsigned long X_f1,
+           unsigned long X_f0, long X_e, long X_c)
+{
+        FP_DECL_S(__X);
+        int exc;
+        FP_CONV(S, D, 1, 2, __X, X);
+        exc = _FP_PACK_CANONICAL(S, 1, __X);
+        if (!exc || !__FPU_TRAP_P(exc)) {
+                _FP_UNPACK_CANONICAL(S, 1, __X);
+                FP_CONV(D, S, 2, 1, X, __X);
+                exc |= _FP_PACK_CANONICAL(D, 2, X);
+                if (!exc || !__FPU_TRAP_P(exc))
+                        __FP_PACK_RAW_2(D, val, X);
+        }
+        return exc;
+}
diff --git a/arch/ppc/math-emu/udivmodti4.c b/arch/ppc/math-emu/udivmodti4.c
new file mode 100644
index 000000000000..7e112dc1e2f2
--- /dev/null
+++ b/arch/ppc/math-emu/udivmodti4.c
@@ -0,0 +1,191 @@
+/* This has so very few changes over libgcc2's __udivmoddi4 it isn't funny.  */
+#include "soft-fp.h"
+#undef count_leading_zeros
+#define count_leading_zeros  __FP_CLZ
+void
+_fp_udivmodti4(_FP_W_TYPE q[2], _FP_W_TYPE r[2],
+               _FP_W_TYPE n1, _FP_W_TYPE n0,
+               _FP_W_TYPE d1, _FP_W_TYPE d0)
+{
+  _FP_W_TYPE q0, q1, r0, r1;
+  _FP_I_TYPE b, bm;
+  if (d1 == 0)
+    {
+#if !UDIV_NEEDS_NORMALIZATION
+      if (d0 > n1)
+        {
+          /* 0q = nn / 0D */
+          udiv_qrnnd (q0, n0, n1, n0, d0);
+          q1 = 0;
+          /* Remainder in n0.  */
+        }
+      else
+        {
+          /* qq = NN / 0d */
+          if (d0 == 0)
+            d0 = 1 / d0;        /* Divide intentionally by zero.  */
+          udiv_qrnnd (q1, n1, 0, n1, d0);
+          udiv_qrnnd (q0, n0, n1, n0, d0);
+          /* Remainder in n0.  */
+        }
+      r0 = n0;
+      r1 = 0;
+#else /* UDIV_NEEDS_NORMALIZATION */
+      if (d0 > n1)
+        {
+          /* 0q = nn / 0D */
+          count_leading_zeros (bm, d0);
+          if (bm != 0)
+            {
+              /* Normalize, i.e. make the most significant bit of the
+                 denominator set.  */
+              d0 = d0 << bm;
+              n1 = (n1 << bm) | (n0 >> (_FP_W_TYPE_SIZE - bm));
+              n0 = n0 << bm;
+            }
+          udiv_qrnnd (q0, n0, n1, n0, d0);
+          q1 = 0;
+          /* Remainder in n0 >> bm.  */
+        }
+      else
+        {
+          /* qq = NN / 0d */
+          if (d0 == 0)
+            d0 = 1 / d0;        /* Divide intentionally by zero.  */
+          count_leading_zeros (bm, d0);
+          if (bm == 0)
+            {
+              /* From (n1 >= d0) /\ (the most significant bit of d0 is set),
+                 conclude (the most significant bit of n1 is set) /\ (the
+                 leading quotient digit q1 = 1).
+                 This special case is necessary, not an optimization.
+                 (Shifts counts of SI_TYPE_SIZE are undefined.)  */
+              n1 -= d0;
+              q1 = 1;
+            }
+          else
+            {
+              _FP_W_TYPE n2;
+              /* Normalize.  */
+              b = _FP_W_TYPE_SIZE - bm;
+              d0 = d0 << bm;
+              n2 = n1 >> b;
+              n1 = (n1 << bm) | (n0 >> b);
+              n0 = n0 << bm;
+              udiv_qrnnd (q1, n1, n2, n1, d0);
+            }
+          /* n1 != d0...  */
+          udiv_qrnnd (q0, n0, n1, n0, d0);
+          /* Remainder in n0 >> bm.  */
+        }
+      r0 = n0 >> bm;
+      r1 = 0;
+#endif /* UDIV_NEEDS_NORMALIZATION */
+    }
+  else
+    {
+      if (d1 > n1)
+        {
+          /* 00 = nn / DD */
+          q0 = 0;
+          q1 = 0;
+          /* Remainder in n1n0.  */
+          r0 = n0;
+          r1 = n1;
+        }
+      else
+        {
+          /* 0q = NN / dd */
+          count_leading_zeros (bm, d1);
+          if (bm == 0)
+            {
+              /* From (n1 >= d1) /\ (the most significant bit of d1 is set),
+                 conclude (the most significant bit of n1 is set) /\ (the
+                 quotient digit q0 = 0 or 1).
+                 This special case is necessary, not an optimization.  */
+              /* The condition on the next line takes advantage of that
+                 n1 >= d1 (true due to program flow).  */
+              if (n1 > d1 || n0 >= d0)
+                {
+                  q0 = 1;
+                  sub_ddmmss (n1, n0, n1, n0, d1, d0);
+                }
+              else
+                q0 = 0;
+              q1 = 0;
+              r0 = n0;
+              r1 = n1;
+            }
+          else
+            {
+              _FP_W_TYPE m1, m0, n2;
+              /* Normalize.  */
+              b = _FP_W_TYPE_SIZE - bm;
+              d1 = (d1 << bm) | (d0 >> b);
+              d0 = d0 << bm;
+              n2 = n1 >> b;
+              n1 = (n1 << bm) | (n0 >> b);
+              n0 = n0 << bm;
+              udiv_qrnnd (q0, n1, n2, n1, d1);
+              umul_ppmm (m1, m0, q0, d0);
+              if (m1 > n1 || (m1 == n1 && m0 > n0))
+                {
+                  q0--;
+                  sub_ddmmss (m1, m0, m1, m0, d1, d0);
+                }
+              q1 = 0;
+              /* Remainder in (n1n0 - m1m0) >> bm.  */
+              sub_ddmmss (n1, n0, n1, n0, m1, m0);
+              r0 = (n1 << b) | (n0 >> bm);
+              r1 = n1 >> bm;
+            }
+        }
+    }
+  q[0] = q0; q[1] = q1;
+  r[0] = r0, r[1] = r1;
+}