18 files changed, 3825 insertions, 0 deletions
diff --git a/lib/mpi/Makefile b/lib/mpi/Makefile
new file mode 100644
index 000000000000..45ca90a8639c
--- /dev/null
+++ b/lib/mpi/Makefile
@@ -0,0 +1,21 @@
+#
+# MPI multiprecision maths library (from gpg)
+#
+obj-$(CONFIG_MPILIB) = mpi.o
+mpi-y = \
+        generic_mpih-lshift.o           \
+        generic_mpih-mul1.o             \
+        generic_mpih-mul2.o             \
+        generic_mpih-mul3.o             \
+        generic_mpih-rshift.o           \
+        generic_mpih-sub1.o             \
+        generic_mpih-add1.o             \
+        mpicoder.o                      \
+        mpi-bit.o                       \
+        mpih-cmp.o                      \
+        mpih-div.o                      \
+        mpih-mul.o                      \
+        mpi-pow.o                       \
+        mpiutil.o
diff --git a/lib/mpi/generic_mpih-add1.c b/lib/mpi/generic_mpih-add1.c
new file mode 100644
index 000000000000..c94c7dd344b3
--- /dev/null
+++ b/lib/mpi/generic_mpih-add1.c
@@ -0,0 +1,61 @@
+/* mpihelp-add_1.c  -  MPI helper functions
+ * Copyright (C) 1994, 1996, 1997, 1998,
+ *               2000 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *       Actually it's the same code with only minor changes in the
+ *       way the data is stored; this is to support the abstraction
+ *       of an optional secure memory allocation which may be used
+ *       to avoid revealing of sensitive data due to paging etc.
+ *       The GNU MP Library itself is published under the LGPL;
+ *       however I decided to publish this code under the plain GPL.
+ */
+#include "mpi-internal.h"
+#include "longlong.h"
+mpi_limb_t
+mpihelp_add_n(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+              mpi_ptr_t s2_ptr, mpi_size_t size)
+{
+        mpi_limb_t x, y, cy;
+        mpi_size_t j;
+        /* The loop counter and index J goes from -SIZE to -1.  This way
+           the loop becomes faster.  */
+        j = -size;
+        /* Offset the base pointers to compensate for the negative indices. */
+        s1_ptr -= j;
+        s2_ptr -= j;
+        res_ptr -= j;
+        cy = 0;
+        do {
+                y = s2_ptr[j];
+                x = s1_ptr[j];
+                y += cy;        /* add previous carry to one addend */
+                cy = y < cy;    /* get out carry from that addition */
+                y += x;         /* add other addend */
+                cy += y < x;    /* get out carry from that add, combine */
+                res_ptr[j] = y;
+        } while (++j);
+        return cy;
+}
diff --git a/lib/mpi/generic_mpih-lshift.c b/lib/mpi/generic_mpih-lshift.c
new file mode 100644
index 000000000000..86318927231a
--- /dev/null
+++ b/lib/mpi/generic_mpih-lshift.c
@@ -0,0 +1,63 @@
+/* mpihelp-lshift.c  -  MPI helper functions
+ * Copyright (C) 1994, 1996, 1998, 2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *       Actually it's the same code with only minor changes in the
+ *       way the data is stored; this is to support the abstraction
+ *       of an optional secure memory allocation which may be used
+ *       to avoid revealing of sensitive data due to paging etc.
+ *       The GNU MP Library itself is published under the LGPL;
+ *       however I decided to publish this code under the plain GPL.
+ */
+#include "mpi-internal.h"
+/* Shift U (pointed to by UP and USIZE digits long) CNT bits to the left
+ * and store the USIZE least significant digits of the result at WP.
+ * Return the bits shifted out from the most significant digit.
+ *
+ * Argument constraints:
+ * 1. 0 < CNT < BITS_PER_MP_LIMB
+ * 2. If the result is to be written over the input, WP must be >= UP.
+ */
+mpi_limb_t
+mpihelp_lshift(mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize, unsigned int cnt)
+{
+        mpi_limb_t high_limb, low_limb;
+        unsigned sh_1, sh_2;
+        mpi_size_t i;
+        mpi_limb_t retval;
+        sh_1 = cnt;
+        wp += 1;
+        sh_2 = BITS_PER_MPI_LIMB - sh_1;
+        i = usize - 1;
+        low_limb = up[i];
+        retval = low_limb >> sh_2;
+        high_limb = low_limb;
+        while (--i >= 0) {
+                low_limb = up[i];
+                wp[i] = (high_limb << sh_1) | (low_limb >> sh_2);
+                high_limb = low_limb;
+        }
+        wp[i] = high_limb << sh_1;
+        return retval;
+}
diff --git a/lib/mpi/generic_mpih-mul1.c b/lib/mpi/generic_mpih-mul1.c
new file mode 100644
index 000000000000..1668dfd9092c
--- /dev/null
+++ b/lib/mpi/generic_mpih-mul1.c
@@ -0,0 +1,57 @@
+/* mpihelp-mul_1.c  -  MPI helper functions
+ * Copyright (C) 1994, 1996, 1997, 1998, 2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *       Actually it's the same code with only minor changes in the
+ *       way the data is stored; this is to support the abstraction
+ *       of an optional secure memory allocation which may be used
+ *       to avoid revealing of sensitive data due to paging etc.
+ *       The GNU MP Library itself is published under the LGPL;
+ *       however I decided to publish this code under the plain GPL.
+ */
+#include "mpi-internal.h"
+#include "longlong.h"
+mpi_limb_t
+mpihelp_mul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size,
+              mpi_limb_t s2_limb)
+{
+        mpi_limb_t cy_limb;
+        mpi_size_t j;
+        mpi_limb_t prod_high, prod_low;
+        /* The loop counter and index J goes from -S1_SIZE to -1.  This way
+         * the loop becomes faster.  */
+        j = -s1_size;
+        /* Offset the base pointers to compensate for the negative indices.  */
+        s1_ptr -= j;
+        res_ptr -= j;
+        cy_limb = 0;
+        do {
+                umul_ppmm(prod_high, prod_low, s1_ptr[j], s2_limb);
+                prod_low += cy_limb;
+                cy_limb = (prod_low < cy_limb ? 1 : 0) + prod_high;
+                res_ptr[j] = prod_low;
+        } while (++j);
+        return cy_limb;
+}
diff --git a/lib/mpi/generic_mpih-mul2.c b/lib/mpi/generic_mpih-mul2.c
new file mode 100644
index 000000000000..8a7b29ee1740
--- /dev/null
+++ b/lib/mpi/generic_mpih-mul2.c
@@ -0,0 +1,60 @@
+/* mpihelp-mul_2.c  -  MPI helper functions
+ * Copyright (C) 1994, 1996, 1997, 1998, 2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *       Actually it's the same code with only minor changes in the
+ *       way the data is stored; this is to support the abstraction
+ *       of an optional secure memory allocation which may be used
+ *       to avoid revealing of sensitive data due to paging etc.
+ *       The GNU MP Library itself is published under the LGPL;
+ *       however I decided to publish this code under the plain GPL.
+ */
+#include "mpi-internal.h"
+#include "longlong.h"
+mpi_limb_t
+mpihelp_addmul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+                 mpi_size_t s1_size, mpi_limb_t s2_limb)
+{
+        mpi_limb_t cy_limb;
+        mpi_size_t j;
+        mpi_limb_t prod_high, prod_low;
+        mpi_limb_t x;
+        /* The loop counter and index J goes from -SIZE to -1.  This way
+         * the loop becomes faster.  */
+        j = -s1_size;
+        res_ptr -= j;
+        s1_ptr -= j;
+        cy_limb = 0;
+        do {
+                umul_ppmm(prod_high, prod_low, s1_ptr[j], s2_limb);
+                prod_low += cy_limb;
+                cy_limb = (prod_low < cy_limb ? 1 : 0) + prod_high;
+                x = res_ptr[j];
+                prod_low = x + prod_low;
+                cy_limb += prod_low < x ? 1 : 0;
+                res_ptr[j] = prod_low;
+        } while (++j);
+        return cy_limb;
+}
diff --git a/lib/mpi/generic_mpih-mul3.c b/lib/mpi/generic_mpih-mul3.c
new file mode 100644
index 000000000000..f96df327be63
--- /dev/null
+++ b/lib/mpi/generic_mpih-mul3.c
@@ -0,0 +1,61 @@
+/* mpihelp-mul_3.c  -  MPI helper functions
+ * Copyright (C) 1994, 1996, 1997, 1998, 2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *       Actually it's the same code with only minor changes in the
+ *       way the data is stored; this is to support the abstraction
+ *       of an optional secure memory allocation which may be used
+ *       to avoid revealing of sensitive data due to paging etc.
+ *       The GNU MP Library itself is published under the LGPL;
+ *       however I decided to publish this code under the plain GPL.
+ */
+#include "mpi-internal.h"
+#include "longlong.h"
+mpi_limb_t
+mpihelp_submul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+                 mpi_size_t s1_size, mpi_limb_t s2_limb)
+{
+        mpi_limb_t cy_limb;
+        mpi_size_t j;
+        mpi_limb_t prod_high, prod_low;
+        mpi_limb_t x;
+        /* The loop counter and index J goes from -SIZE to -1.  This way
+         * the loop becomes faster.  */
+        j = -s1_size;
+        res_ptr -= j;
+        s1_ptr -= j;
+        cy_limb = 0;
+        do {
+                umul_ppmm(prod_high, prod_low, s1_ptr[j], s2_limb);
+                prod_low += cy_limb;
+                cy_limb = (prod_low < cy_limb ? 1 : 0) + prod_high;
+                x = res_ptr[j];
+                prod_low = x - prod_low;
+                cy_limb += prod_low > x ? 1 : 0;
+                res_ptr[j] = prod_low;
+        } while (++j);
+        return cy_limb;
+}
diff --git a/lib/mpi/generic_mpih-rshift.c b/lib/mpi/generic_mpih-rshift.c
new file mode 100644
index 000000000000..ffa328818ca6
--- /dev/null
+++ b/lib/mpi/generic_mpih-rshift.c
@@ -0,0 +1,63 @@
+/* mpih-rshift.c  -  MPI helper functions
+ * Copyright (C) 1994, 1996, 1998, 1999,
+ *               2000, 2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GNUPG
+ *
+ * GNUPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GNUPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *       Actually it's the same code with only minor changes in the
+ *       way the data is stored; this is to support the abstraction
+ *       of an optional secure memory allocation which may be used
+ *       to avoid revealing of sensitive data due to paging etc.
+ *       The GNU MP Library itself is published under the LGPL;
+ *       however I decided to publish this code under the plain GPL.
+ */
+#include "mpi-internal.h"
+/* Shift U (pointed to by UP and USIZE limbs long) CNT bits to the right
+ * and store the USIZE least significant limbs of the result at WP.
+ * The bits shifted out to the right are returned.
+ *
+ * Argument constraints:
+ * 1. 0 < CNT < BITS_PER_MP_LIMB
+ * 2. If the result is to be written over the input, WP must be <= UP.
+ */
+mpi_limb_t
+mpihelp_rshift(mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize, unsigned cnt)
+{
+        mpi_limb_t high_limb, low_limb;
+        unsigned sh_1, sh_2;
+        mpi_size_t i;
+        mpi_limb_t retval;
+        sh_1 = cnt;
+        wp -= 1;
+        sh_2 = BITS_PER_MPI_LIMB - sh_1;
+        high_limb = up[0];
+        retval = high_limb << sh_2;
+        low_limb = high_limb;
+        for (i = 1; i < usize; i++) {
+                high_limb = up[i];
+                wp[i] = (low_limb >> sh_1) | (high_limb << sh_2);
+                low_limb = high_limb;
+        }
+        wp[i] = low_limb >> sh_1;
+        return retval;
+}
diff --git a/lib/mpi/generic_mpih-sub1.c b/lib/mpi/generic_mpih-sub1.c
new file mode 100644
index 000000000000..5d98ab7d6853
--- /dev/null
+++ b/lib/mpi/generic_mpih-sub1.c
@@ -0,0 +1,60 @@
+/* mpihelp-add_2.c  -  MPI helper functions
+ * Copyright (C) 1994, 1996, 1997, 1998, 2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *       Actually it's the same code with only minor changes in the
+ *       way the data is stored; this is to support the abstraction
+ *       of an optional secure memory allocation which may be used
+ *       to avoid revealing of sensitive data due to paging etc.
+ *       The GNU MP Library itself is published under the LGPL;
+ *       however I decided to publish this code under the plain GPL.
+ */
+#include "mpi-internal.h"
+#include "longlong.h"
+mpi_limb_t
+mpihelp_sub_n(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+              mpi_ptr_t s2_ptr, mpi_size_t size)
+{
+        mpi_limb_t x, y, cy;
+        mpi_size_t j;
+        /* The loop counter and index J goes from -SIZE to -1.  This way
+           the loop becomes faster.  */
+        j = -size;
+        /* Offset the base pointers to compensate for the negative indices.  */
+        s1_ptr -= j;
+        s2_ptr -= j;
+        res_ptr -= j;
+        cy = 0;
+        do {
+                y = s2_ptr[j];
+                x = s1_ptr[j];
+                y += cy;        /* add previous carry to subtrahend */
+                cy = y < cy;    /* get out carry from that addition */
+                y = x - y;      /* main subtract */
+                cy += y > x;    /* get out carry from the subtract, combine */
+                res_ptr[j] = y;
+        } while (++j);
+        return cy;
+}
diff --git a/lib/mpi/longlong.h b/lib/mpi/longlong.h
new file mode 100644
index 000000000000..29f98624ef93
--- /dev/null
+++ b/lib/mpi/longlong.h
@@ -0,0 +1,1500 @@
+/* longlong.h -- definitions for mixed size 32/64 bit arithmetic.
+ * Note: I added some stuff for use with gnupg
+ *
+ * Copyright (C) 1991, 1992, 1993, 1994, 1996, 1998,
+ *      2000, 2001, 2002, 2003 Free Software Foundation, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Library General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+ * License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this file; see the file COPYING.LIB.  If not, write to
+ * the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ * MA 02111-1307, USA. */
+/* You have to define the following before including this file:
+ *
+ * UWtype -- An unsigned type, default type for operations (typically a "word")
+ * UHWtype -- An unsigned type, at least half the size of UWtype.
+ * UDWtype -- An unsigned type, at least twice as large a UWtype
+ * W_TYPE_SIZE -- size in bits of UWtype
+ *
+ * SItype, USItype -- Signed and unsigned 32 bit types.
+ * DItype, UDItype -- Signed and unsigned 64 bit types.
+ *
+ * On a 32 bit machine UWtype should typically be USItype;
+ * on a 64 bit machine, UWtype should typically be UDItype.
+*/
+#define __BITS4 (W_TYPE_SIZE / 4)
+#define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2))
+#define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1))
+#define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2))
+/* This is used to make sure no undesirable sharing between different libraries
+        that use this file takes place.  */
+#ifndef __MPN
+#define __MPN(x) __##x
+#endif
+/* Define auxiliary asm macros.
+ *
+ * 1) umul_ppmm(high_prod, low_prod, multipler, multiplicand) multiplies two
+ * UWtype integers MULTIPLER and MULTIPLICAND, and generates a two UWtype
+ * word product in HIGH_PROD and LOW_PROD.
+ *
+ * 2) __umulsidi3(a,b) multiplies two UWtype integers A and B, and returns a
+ * UDWtype product.  This is just a variant of umul_ppmm.
+ * 3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
+ * denominator) divides a UDWtype, composed by the UWtype integers
+ * HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient
+ * in QUOTIENT and the remainder in REMAINDER.  HIGH_NUMERATOR must be less
+ * than DENOMINATOR for correct operation.  If, in addition, the most
+ * significant bit of DENOMINATOR must be 1, then the pre-processor symbol
+ * UDIV_NEEDS_NORMALIZATION is defined to 1.
+ * 4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
+ * denominator).  Like udiv_qrnnd but the numbers are signed.  The quotient
+ * is rounded towards 0.
+ *
+ * 5) count_leading_zeros(count, x) counts the number of zero-bits from the
+ * msb to the first non-zero bit in the UWtype X.  This is the number of
+ * steps X needs to be shifted left to set the msb.  Undefined for X == 0,
+ * unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value.
+ *
+ * 6) count_trailing_zeros(count, x) like count_leading_zeros, but counts
+ * from the least significant end.
+ *
+ * 7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1,
+ * high_addend_2, low_addend_2) adds two UWtype integers, composed by
+ * HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2
+ * respectively.  The result is placed in HIGH_SUM and LOW_SUM.  Overflow
+ * (i.e. carry out) is not stored anywhere, and is lost.
+ *
+ * 8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend,
+ * high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers,
+ * composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and
+ * LOW_SUBTRAHEND_2 respectively.  The result is placed in HIGH_DIFFERENCE
+ * and LOW_DIFFERENCE.  Overflow (i.e. carry out) is not stored anywhere,
+ * and is lost.
+ *
+ * If any of these macros are left undefined for a particular CPU,
+ * C macros are used.  */
+/* The CPUs come in alphabetical order below.
+ *
+ * Please add support for more CPUs here, or improve the current support
+ * for the CPUs below!  */
+#if defined(__GNUC__) && !defined(NO_ASM)
+/* We sometimes need to clobber "cc" with gcc2, but that would not be
+        understood by gcc1.     Use cpp to avoid major code duplication.  */
+#if __GNUC__ < 2
+#define __CLOBBER_CC
+#define __AND_CLOBBER_CC
+#else /* __GNUC__ >= 2 */
+#define __CLOBBER_CC : "cc"
+#define __AND_CLOBBER_CC , "cc"
+#endif /* __GNUC__ < 2 */
+/***************************************
+        **************  A29K  *****************
+        ***************************************/
+#if (defined(__a29k__) || defined(_AM29K)) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+        __asm__ ("add %1,%4,%5\n" \
+                "addc %0,%2,%3" \
+        : "=r" ((USItype)(sh)), \
+                "=&r" ((USItype)(sl)) \
+        : "%r" ((USItype)(ah)), \
+                "rI" ((USItype)(bh)), \
+                "%r" ((USItype)(al)), \
+                "rI" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+        __asm__ ("sub %1,%4,%5\n" \
+                "subc %0,%2,%3" \
+        : "=r" ((USItype)(sh)), \
+                "=&r" ((USItype)(sl)) \
+        : "r" ((USItype)(ah)), \
+                "rI" ((USItype)(bh)), \
+                "r" ((USItype)(al)), \
+                "rI" ((USItype)(bl)))
+#define umul_ppmm(xh, xl, m0, m1) \
+do { \
+                USItype __m0 = (m0), __m1 = (m1); \
+                __asm__ ("multiplu %0,%1,%2" \
+                : "=r" ((USItype)(xl)) \
+                : "r" (__m0), \
+                        "r" (__m1)); \
+                __asm__ ("multmu %0,%1,%2" \
+                : "=r" ((USItype)(xh)) \
+                : "r" (__m0), \
+                        "r" (__m1)); \
+} while (0)
+#define udiv_qrnnd(q, r, n1, n0, d) \
+        __asm__ ("dividu %0,%3,%4" \
+        : "=r" ((USItype)(q)), \
+                "=q" ((USItype)(r)) \
+        : "1" ((USItype)(n1)), \
+                "r" ((USItype)(n0)), \
+                "r" ((USItype)(d)))
+#define count_leading_zeros(count, x) \
+        __asm__ ("clz %0,%1" \
+        : "=r" ((USItype)(count)) \
+        : "r" ((USItype)(x)))
+#define COUNT_LEADING_ZEROS_0 32
+#endif /* __a29k__ */
+#if defined(__alpha) && W_TYPE_SIZE == 64
+#define umul_ppmm(ph, pl, m0, m1) \
+do { \
+                UDItype __m0 = (m0), __m1 = (m1); \
+                __asm__ ("umulh %r1,%2,%0" \
+                : "=r" ((UDItype) ph) \
+                : "%rJ" (__m0), \
+                        "rI" (__m1)); \
+                (pl) = __m0 * __m1; \
+        } while (0)
+#define UMUL_TIME 46
+#ifndef LONGLONG_STANDALONE
+#define udiv_qrnnd(q, r, n1, n0, d) \
+do { UDItype __r; \
+        (q) = __udiv_qrnnd(&__r, (n1), (n0), (d)); \
+        (r) = __r; \
+} while (0)
+extern UDItype __udiv_qrnnd();
+#define UDIV_TIME 220
+#endif /* LONGLONG_STANDALONE */
+#endif /* __alpha */
+/***************************************
+        **************  ARM  ******************
+        ***************************************/
+#if defined(__arm__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+        __asm__ ("adds %1, %4, %5\n" \
+                "adc  %0, %2, %3" \
+        : "=r" ((USItype)(sh)), \
+                "=&r" ((USItype)(sl)) \
+        : "%r" ((USItype)(ah)), \
+                "rI" ((USItype)(bh)), \
+                "%r" ((USItype)(al)), \
+                "rI" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+        __asm__ ("subs %1, %4, %5\n" \
+                "sbc  %0, %2, %3" \
+        : "=r" ((USItype)(sh)), \
+                "=&r" ((USItype)(sl)) \
+        : "r" ((USItype)(ah)), \
+                "rI" ((USItype)(bh)), \
+                "r" ((USItype)(al)), \
+                "rI" ((USItype)(bl)))
+#if defined __ARM_ARCH_2__ || defined __ARM_ARCH_3__
+#define umul_ppmm(xh, xl, a, b) \
+        __asm__ ("%@ Inlined umul_ppmm\n" \
+                "mov    %|r0, %2, lsr #16               @ AAAA\n" \
+                "mov    %|r2, %3, lsr #16               @ BBBB\n" \
+                "bic    %|r1, %2, %|r0, lsl #16         @ aaaa\n" \
+                "bic    %0, %3, %|r2, lsl #16           @ bbbb\n" \
+                "mul    %1, %|r1, %|r2                  @ aaaa * BBBB\n" \
+                "mul    %|r2, %|r0, %|r2                @ AAAA * BBBB\n" \
+                "mul    %|r1, %0, %|r1                  @ aaaa * bbbb\n" \
+                "mul    %0, %|r0, %0                    @ AAAA * bbbb\n" \
+                "adds   %|r0, %1, %0                    @ central sum\n" \
+                "addcs  %|r2, %|r2, #65536\n" \
+                "adds   %1, %|r1, %|r0, lsl #16\n" \
+                "adc    %0, %|r2, %|r0, lsr #16" \
+        : "=&r" ((USItype)(xh)), \
+                "=r" ((USItype)(xl)) \
+        : "r" ((USItype)(a)), \
+                "r" ((USItype)(b)) \
+        : "r0", "r1", "r2")
+#else
+#define umul_ppmm(xh, xl, a, b) \
+        __asm__ ("%@ Inlined umul_ppmm\n" \
+                "umull %r1, %r0, %r2, %r3" \
+        : "=&r" ((USItype)(xh)), \
+                        "=r" ((USItype)(xl)) \
+        : "r" ((USItype)(a)), \
+                        "r" ((USItype)(b)) \
+        : "r0", "r1")
+#endif
+#define UMUL_TIME 20
+#define UDIV_TIME 100
+#endif /* __arm__ */
+/***************************************
+        **************  CLIPPER  **************
+        ***************************************/
+#if defined(__clipper__) && W_TYPE_SIZE == 32
+#define umul_ppmm(w1, w0, u, v) \
+        ({union {UDItype __ll; \
+                struct {USItype __l, __h; } __i; \
+        } __xx; \
+        __asm__ ("mulwux %2,%0" \
+        : "=r" (__xx.__ll) \
+        : "%0" ((USItype)(u)), \
+                "r" ((USItype)(v))); \
+        (w1) = __xx.__i.__h; (w0) = __xx.__i.__l; })
+#define smul_ppmm(w1, w0, u, v) \
+        ({union {DItype __ll; \
+                struct {SItype __l, __h; } __i; \
+        } __xx; \
+        __asm__ ("mulwx %2,%0" \
+        : "=r" (__xx.__ll) \
+        : "%0" ((SItype)(u)), \
+                "r" ((SItype)(v))); \
+        (w1) = __xx.__i.__h; (w0) = __xx.__i.__l; })
+#define __umulsidi3(u, v) \
+        ({UDItype __w; \
+        __asm__ ("mulwux %2,%0" \
+        : "=r" (__w) \
+        : "%0" ((USItype)(u)), \
+                "r" ((USItype)(v))); \
+        __w; })
+#endif /* __clipper__ */
+/***************************************
+        **************  GMICRO  ***************
+        ***************************************/
+#if defined(__gmicro__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+        __asm__ ("add.w %5,%1\n" \
+                "addx %3,%0" \
+        : "=g" ((USItype)(sh)), \
+                "=&g" ((USItype)(sl)) \
+        : "%0" ((USItype)(ah)), \
+                "g" ((USItype)(bh)), \
+                "%1" ((USItype)(al)), \
+                "g" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+        __asm__ ("sub.w %5,%1\n" \
+                "subx %3,%0" \
+        : "=g" ((USItype)(sh)), \
+                "=&g" ((USItype)(sl)) \
+        : "0" ((USItype)(ah)), \
+                "g" ((USItype)(bh)), \
+                "1" ((USItype)(al)), \
+                "g" ((USItype)(bl)))
+#define umul_ppmm(ph, pl, m0, m1) \
+        __asm__ ("mulx %3,%0,%1" \
+        : "=g" ((USItype)(ph)), \
+                "=r" ((USItype)(pl)) \
+        : "%0" ((USItype)(m0)), \
+                "g" ((USItype)(m1)))
+#define udiv_qrnnd(q, r, nh, nl, d) \
+        __asm__ ("divx %4,%0,%1" \
+        : "=g" ((USItype)(q)), \
+                "=r" ((USItype)(r)) \
+        : "1" ((USItype)(nh)), \
+                "0" ((USItype)(nl)), \
+                "g" ((USItype)(d)))
+#define count_leading_zeros(count, x) \
+        __asm__ ("bsch/1 %1,%0" \
+        : "=g" (count) \
+        : "g" ((USItype)(x)), \
+             "0" ((USItype)0))
+#endif
+/***************************************
+        **************  HPPA  *****************
+        ***************************************/
+#if defined(__hppa) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+        __asm__ ("add %4,%5,%1\n" \
+                   "addc %2,%3,%0" \
+        : "=r" ((USItype)(sh)), \
+             "=&r" ((USItype)(sl)) \
+        : "%rM" ((USItype)(ah)), \
+             "rM" ((USItype)(bh)), \
+             "%rM" ((USItype)(al)), \
+             "rM" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+        __asm__ ("sub %4,%5,%1\n" \
+           "subb %2,%3,%0" \
+        : "=r" ((USItype)(sh)), \
+             "=&r" ((USItype)(sl)) \
+        : "rM" ((USItype)(ah)), \
+             "rM" ((USItype)(bh)), \
+             "rM" ((USItype)(al)), \
+             "rM" ((USItype)(bl)))
+#if defined(_PA_RISC1_1)
+#define umul_ppmm(wh, wl, u, v) \
+do { \
+        union {UDItype __ll; \
+        struct {USItype __h, __l; } __i; \
+        } __xx; \
+        __asm__ ("xmpyu %1,%2,%0" \
+        : "=*f" (__xx.__ll) \
+        : "*f" ((USItype)(u)), \
+               "*f" ((USItype)(v))); \
+        (wh) = __xx.__i.__h; \
+        (wl) = __xx.__i.__l; \
+} while (0)
+#define UMUL_TIME 8
+#define UDIV_TIME 60
+#else
+#define UMUL_TIME 40
+#define UDIV_TIME 80
+#endif
+#ifndef LONGLONG_STANDALONE
+#define udiv_qrnnd(q, r, n1, n0, d) \
+do { USItype __r; \
+        (q) = __udiv_qrnnd(&__r, (n1), (n0), (d)); \
+        (r) = __r; \
+} while (0)
+extern USItype __udiv_qrnnd();
+#endif /* LONGLONG_STANDALONE */
+#define count_leading_zeros(count, x) \
+do { \
+        USItype __tmp; \
+        __asm__ ( \
+        "ldi             1,%0\n" \
+        "extru,=        %1,15,16,%%r0  ; Bits 31..16 zero?\n" \
+        "extru,tr       %1,15,16,%1    ; No.  Shift down, skip add.\n" \
+        "ldo            16(%0),%0      ; Yes.   Perform add.\n" \
+        "extru,=        %1,23,8,%%r0   ; Bits 15..8 zero?\n" \
+        "extru,tr       %1,23,8,%1     ; No.  Shift down, skip add.\n" \
+        "ldo            8(%0),%0       ; Yes.   Perform add.\n" \
+        "extru,=        %1,27,4,%%r0   ; Bits 7..4 zero?\n" \
+        "extru,tr       %1,27,4,%1     ; No.  Shift down, skip add.\n" \
+        "ldo            4(%0),%0       ; Yes.   Perform add.\n" \
+        "extru,=        %1,29,2,%%r0   ; Bits 3..2 zero?\n" \
+        "extru,tr       %1,29,2,%1     ; No.  Shift down, skip add.\n" \
+        "ldo            2(%0),%0       ; Yes.   Perform add.\n" \
+        "extru          %1,30,1,%1     ; Extract bit 1.\n" \
+        "sub            %0,%1,%0       ; Subtract it.              " \
+        : "=r" (count), "=r" (__tmp) : "1" (x)); \
+} while (0)
+#endif /* hppa */
+/***************************************
+        **************  I370  *****************
+        ***************************************/
+#if (defined(__i370__) || defined(__mvs__)) && W_TYPE_SIZE == 32
+#define umul_ppmm(xh, xl, m0, m1) \
+do { \
+        union {UDItype __ll; \
+           struct {USItype __h, __l; } __i; \
+        } __xx; \
+        USItype __m0 = (m0), __m1 = (m1); \
+        __asm__ ("mr %0,%3" \
+        : "=r" (__xx.__i.__h), \
+               "=r" (__xx.__i.__l) \
+        : "%1" (__m0), \
+               "r" (__m1)); \
+        (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \
+        (xh) += ((((SItype) __m0 >> 31) & __m1) \
+             + (((SItype) __m1 >> 31) & __m0)); \
+} while (0)
+#define smul_ppmm(xh, xl, m0, m1) \
+do { \
+        union {DItype __ll; \
+           struct {USItype __h, __l; } __i; \
+        } __xx; \
+        __asm__ ("mr %0,%3" \
+        : "=r" (__xx.__i.__h), \
+               "=r" (__xx.__i.__l) \
+        : "%1" (m0), \
+               "r" (m1)); \
+        (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \
+} while (0)
+#define sdiv_qrnnd(q, r, n1, n0, d) \
+do { \
+        union {DItype __ll; \
+           struct {USItype __h, __l; } __i; \
+        } __xx; \
+        __xx.__i.__h = n1; __xx.__i.__l = n0; \
+        __asm__ ("dr %0,%2" \
+        : "=r" (__xx.__ll) \
+        : "0" (__xx.__ll), "r" (d)); \
+        (q) = __xx.__i.__l; (r) = __xx.__i.__h; \
+} while (0)
+#endif
+/***************************************
+        **************  I386  *****************
+        ***************************************/
+#undef __i386__
+#if (defined(__i386__) || defined(__i486__)) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+        __asm__ ("addl %5,%1\n" \
+           "adcl %3,%0" \
+        : "=r" ((USItype)(sh)), \
+             "=&r" ((USItype)(sl)) \
+        : "%0" ((USItype)(ah)), \
+             "g" ((USItype)(bh)), \
+             "%1" ((USItype)(al)), \
+             "g" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+        __asm__ ("subl %5,%1\n" \
+           "sbbl %3,%0" \
+        : "=r" ((USItype)(sh)), \
+             "=&r" ((USItype)(sl)) \
+        : "0" ((USItype)(ah)), \
+             "g" ((USItype)(bh)), \
+             "1" ((USItype)(al)), \
+             "g" ((USItype)(bl)))
+#define umul_ppmm(w1, w0, u, v) \
+        __asm__ ("mull %3" \
+        : "=a" ((USItype)(w0)), \
+             "=d" ((USItype)(w1)) \
+        : "%0" ((USItype)(u)), \
+             "rm" ((USItype)(v)))
+#define udiv_qrnnd(q, r, n1, n0, d) \
+        __asm__ ("divl %4" \
+        : "=a" ((USItype)(q)), \
+             "=d" ((USItype)(r)) \
+        : "0" ((USItype)(n0)), \
+             "1" ((USItype)(n1)), \
+             "rm" ((USItype)(d)))
+#define count_leading_zeros(count, x) \
+do { \
+        USItype __cbtmp; \
+        __asm__ ("bsrl %1,%0" \
+        : "=r" (__cbtmp) : "rm" ((USItype)(x))); \
+        (count) = __cbtmp ^ 31; \
+} while (0)
+#define count_trailing_zeros(count, x) \
+        __asm__ ("bsfl %1,%0" : "=r" (count) : "rm" ((USItype)(x)))
+#ifndef UMUL_TIME
+#define UMUL_TIME 40
+#endif
+#ifndef UDIV_TIME
+#define UDIV_TIME 40
+#endif
+#endif /* 80x86 */
+/***************************************
+        **************  I860  *****************
+        ***************************************/
+#if defined(__i860__) && W_TYPE_SIZE == 32
+#define rshift_rhlc(r, h, l, c) \
+        __asm__ ("shr %3,r0,r0\n" \
+        "shrd %1,%2,%0" \
+           "=r" (r) : "r" (h), "r" (l), "rn" (c))
+#endif /* i860 */
+/***************************************
+        **************  I960  *****************
+        ***************************************/
+#if defined(__i960__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+        __asm__ ("cmpo 1,0\n" \
+        "addc %5,%4,%1\n" \
+        "addc %3,%2,%0" \
+        : "=r" ((USItype)(sh)), \
+             "=&r" ((USItype)(sl)) \
+        : "%dI" ((USItype)(ah)), \
+             "dI" ((USItype)(bh)), \
+             "%dI" ((USItype)(al)), \
+             "dI" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+        __asm__ ("cmpo 0,0\n" \
+        "subc %5,%4,%1\n" \
+        "subc %3,%2,%0" \
+        : "=r" ((USItype)(sh)), \
+             "=&r" ((USItype)(sl)) \
+        : "dI" ((USItype)(ah)), \
+             "dI" ((USItype)(bh)), \
+             "dI" ((USItype)(al)), \
+             "dI" ((USItype)(bl)))
+#define umul_ppmm(w1, w0, u, v) \
+        ({union {UDItype __ll; \
+           struct {USItype __l, __h; } __i; \
+        } __xx; \
+        __asm__ ("emul        %2,%1,%0" \
+        : "=d" (__xx.__ll) \
+        : "%dI" ((USItype)(u)), \
+             "dI" ((USItype)(v))); \
+        (w1) = __xx.__i.__h; (w0) = __xx.__i.__l; })
+#define __umulsidi3(u, v) \
+        ({UDItype __w; \
+        __asm__ ("emul      %2,%1,%0" \
+        : "=d" (__w) \
+        : "%dI" ((USItype)(u)), \
+               "dI" ((USItype)(v))); \
+        __w; })
+#define udiv_qrnnd(q, r, nh, nl, d) \
+do { \
+        union {UDItype __ll; \
+           struct {USItype __l, __h; } __i; \
+        } __nn; \
+        __nn.__i.__h = (nh); __nn.__i.__l = (nl); \
+        __asm__ ("ediv %d,%n,%0" \
+        : "=d" (__rq.__ll) \
+        : "dI" (__nn.__ll), \
+             "dI" ((USItype)(d))); \
+        (r) = __rq.__i.__l; (q) = __rq.__i.__h; \
+} while (0)
+#define count_leading_zeros(count, x) \
+do { \
+        USItype __cbtmp; \
+        __asm__ ("scanbit %1,%0" \
+        : "=r" (__cbtmp) \
+        : "r" ((USItype)(x))); \
+        (count) = __cbtmp ^ 31; \
+} while (0)
+#define COUNT_LEADING_ZEROS_0 (-32)     /* sic */
+#if defined(__i960mx)           /* what is the proper symbol to test??? */
+#define rshift_rhlc(r, h, l, c) \
+do { \
+        union {UDItype __ll; \
+           struct {USItype __l, __h; } __i; \
+        } __nn; \
+        __nn.__i.__h = (h); __nn.__i.__l = (l); \
+        __asm__ ("shre %2,%1,%0" \
+        : "=d" (r) : "dI" (__nn.__ll), "dI" (c)); \
+}
+#endif /* i960mx */
+#endif /* i960 */
+/***************************************
+        **************  68000   ****************
+        ***************************************/
+#if (defined(__mc68000__) || defined(__mc68020__) || defined(__NeXT__) || defined(mc68020)) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+        __asm__ ("add%.l %5,%1\n" \
+           "addx%.l %3,%0" \
+        : "=d" ((USItype)(sh)), \
+             "=&d" ((USItype)(sl)) \
+        : "%0" ((USItype)(ah)), \
+             "d" ((USItype)(bh)), \
+             "%1" ((USItype)(al)), \
+             "g" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+        __asm__ ("sub%.l %5,%1\n" \
+           "subx%.l %3,%0" \
+        : "=d" ((USItype)(sh)), \
+             "=&d" ((USItype)(sl)) \
+        : "0" ((USItype)(ah)), \
+             "d" ((USItype)(bh)), \
+             "1" ((USItype)(al)), \
+             "g" ((USItype)(bl)))
+#if (defined(__mc68020__) || defined(__NeXT__) || defined(mc68020))
+#define umul_ppmm(w1, w0, u, v) \
+        __asm__ ("mulu%.l %3,%1:%0" \
+        : "=d" ((USItype)(w0)), \
+             "=d" ((USItype)(w1)) \
+        : "%0" ((USItype)(u)), \
+             "dmi" ((USItype)(v)))
+#define UMUL_TIME 45
+#define udiv_qrnnd(q, r, n1, n0, d) \
+        __asm__ ("divu%.l %4,%1:%0" \
+        : "=d" ((USItype)(q)), \
+             "=d" ((USItype)(r)) \
+        : "0" ((USItype)(n0)), \
+             "1" ((USItype)(n1)), \
+             "dmi" ((USItype)(d)))
+#define UDIV_TIME 90
+#define sdiv_qrnnd(q, r, n1, n0, d) \
+        __asm__ ("divs%.l %4,%1:%0" \
+        : "=d" ((USItype)(q)), \
+             "=d" ((USItype)(r)) \
+        : "0" ((USItype)(n0)), \
+             "1" ((USItype)(n1)), \
+             "dmi" ((USItype)(d)))
+#define count_leading_zeros(count, x) \
+        __asm__ ("bfffo %1{%b2:%b2},%0" \
+        : "=d" ((USItype)(count)) \
+        : "od" ((USItype)(x)), "n" (0))
+#define COUNT_LEADING_ZEROS_0 32
+#else /* not mc68020 */
+#define umul_ppmm(xh, xl, a, b) \
+do { USItype __umul_tmp1, __umul_tmp2; \
+        __asm__ ("| Inlined umul_ppmm\n" \
+        "move%.l %5,%3\n" \
+        "move%.l %2,%0\n" \
+        "move%.w %3,%1\n" \
+        "swap   %3\n" \
+        "swap   %0\n" \
+        "mulu   %2,%1\n" \
+        "mulu   %3,%0\n" \
+        "mulu   %2,%3\n" \
+        "swap   %2\n" \
+        "mulu   %5,%2\n" \
+        "add%.l %3,%2\n" \
+        "jcc    1f\n" \
+        "add%.l %#0x10000,%0\n" \
+        "1:     move%.l %2,%3\n" \
+        "clr%.w %2\n" \
+        "swap   %2\n" \
+        "swap   %3\n" \
+        "clr%.w %3\n" \
+        "add%.l %3,%1\n" \
+        "addx%.l %2,%0\n" \
+        "| End inlined umul_ppmm" \
+        : "=&d" ((USItype)(xh)), "=&d" ((USItype)(xl)), \
+                "=d" (__umul_tmp1), "=&d" (__umul_tmp2) \
+        : "%2" ((USItype)(a)), "d" ((USItype)(b))); \
+} while (0)
+#define UMUL_TIME 100
+#define UDIV_TIME 400
+#endif /* not mc68020 */
+#endif /* mc68000 */
+/***************************************
+        **************  88000   ****************
+        ***************************************/
+#if defined(__m88000__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+        __asm__ ("addu.co %1,%r4,%r5\n" \
+           "addu.ci %0,%r2,%r3" \
+        : "=r" ((USItype)(sh)), \
+             "=&r" ((USItype)(sl)) \
+        : "%rJ" ((USItype)(ah)), \
+             "rJ" ((USItype)(bh)), \
+             "%rJ" ((USItype)(al)), \
+             "rJ" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+        __asm__ ("subu.co %1,%r4,%r5\n" \
+           "subu.ci %0,%r2,%r3" \
+        : "=r" ((USItype)(sh)), \
+             "=&r" ((USItype)(sl)) \
+        : "rJ" ((USItype)(ah)), \
+             "rJ" ((USItype)(bh)), \
+             "rJ" ((USItype)(al)), \
+             "rJ" ((USItype)(bl)))
+#define count_leading_zeros(count, x) \
+do { \
+        USItype __cbtmp; \
+        __asm__ ("ff1 %0,%1" \
+        : "=r" (__cbtmp) \
+        : "r" ((USItype)(x))); \
+        (count) = __cbtmp ^ 31; \
+} while (0)
+#define COUNT_LEADING_ZEROS_0 63        /* sic */
+#if defined(__m88110__)
+#define umul_ppmm(wh, wl, u, v) \
+do { \
+        union {UDItype __ll; \
+           struct {USItype __h, __l; } __i; \
+        } __x; \
+        __asm__ ("mulu.d %0,%1,%2" : "=r" (__x.__ll) : "r" (u), "r" (v)); \
+        (wh) = __x.__i.__h; \
+        (wl) = __x.__i.__l; \
+} while (0)
+#define udiv_qrnnd(q, r, n1, n0, d) \
+        ({union {UDItype __ll; \
+           struct {USItype __h, __l; } __i; \
+        } __x, __q; \
+        __x.__i.__h = (n1); __x.__i.__l = (n0); \
+        __asm__ ("divu.d %0,%1,%2" \
+        : "=r" (__q.__ll) : "r" (__x.__ll), "r" (d)); \
+        (r) = (n0) - __q.__l * (d); (q) = __q.__l; })
+#define UMUL_TIME 5
+#define UDIV_TIME 25
+#else
+#define UMUL_TIME 17
+#define UDIV_TIME 150
+#endif /* __m88110__ */
+#endif /* __m88000__ */
+/***************************************
+        **************  MIPS  *****************
+        ***************************************/
+#if defined(__mips__) && W_TYPE_SIZE == 32
+#if __GNUC__ > 2 || __GNUC_MINOR__ >= 7
+#define umul_ppmm(w1, w0, u, v) \
+        __asm__ ("multu %2,%3" \
+        : "=l" ((USItype)(w0)), \
+             "=h" ((USItype)(w1)) \
+        : "d" ((USItype)(u)), \
+             "d" ((USItype)(v)))
+#else
+#define umul_ppmm(w1, w0, u, v) \
+        __asm__ ("multu %2,%3\n" \
+           "mflo %0\n" \
+           "mfhi %1" \
+        : "=d" ((USItype)(w0)), \
+             "=d" ((USItype)(w1)) \
+        : "d" ((USItype)(u)), \
+             "d" ((USItype)(v)))
+#endif
+#define UMUL_TIME 10
+#define UDIV_TIME 100
+#endif /* __mips__ */
+/***************************************
+        **************  MIPS/64  **************
+        ***************************************/
+#if (defined(__mips) && __mips >= 3) && W_TYPE_SIZE == 64
+#if __GNUC__ > 2 || __GNUC_MINOR__ >= 7
+#define umul_ppmm(w1, w0, u, v) \
+        __asm__ ("dmultu %2,%3" \
+        : "=l" ((UDItype)(w0)), \
+             "=h" ((UDItype)(w1)) \
+        : "d" ((UDItype)(u)), \
+             "d" ((UDItype)(v)))
+#else
+#define umul_ppmm(w1, w0, u, v) \
+        __asm__ ("dmultu %2,%3\n" \
+           "mflo %0\n" \
+           "mfhi %1" \
+        : "=d" ((UDItype)(w0)), \
+             "=d" ((UDItype)(w1)) \
+        : "d" ((UDItype)(u)), \
+             "d" ((UDItype)(v)))
+#endif
+#define UMUL_TIME 20
+#define UDIV_TIME 140
+#endif /* __mips__ */
+/***************************************
+        **************  32000   ****************
+        ***************************************/
+#if defined(__ns32000__) && W_TYPE_SIZE == 32
+#define umul_ppmm(w1, w0, u, v) \
+        ({union {UDItype __ll; \
+           struct {USItype __l, __h; } __i; \
+        } __xx; \
+        __asm__ ("meid %2,%0" \
+        : "=g" (__xx.__ll) \
+        : "%0" ((USItype)(u)), \
+             "g" ((USItype)(v))); \
+        (w1) = __xx.__i.__h; (w0) = __xx.__i.__l; })
+#define __umulsidi3(u, v) \
+        ({UDItype __w; \
+        __asm__ ("meid %2,%0" \
+        : "=g" (__w) \
+        : "%0" ((USItype)(u)), \
+               "g" ((USItype)(v))); \
+        __w; })
+#define udiv_qrnnd(q, r, n1, n0, d) \
+        ({union {UDItype __ll; \
+           struct {USItype __l, __h; } __i; \
+        } __xx; \
+        __xx.__i.__h = (n1); __xx.__i.__l = (n0); \
+        __asm__ ("deid %2,%0" \
+        : "=g" (__xx.__ll) \
+        : "0" (__xx.__ll), \
+             "g" ((USItype)(d))); \
+        (r) = __xx.__i.__l; (q) = __xx.__i.__h; })
+#define count_trailing_zeros(count, x) \
+do { \
+        __asm__("ffsd      %2,%0" \
+        : "=r"((USItype) (count)) \
+        : "0"((USItype) 0), "r"((USItype) (x))); \
+        } while (0)
+#endif /* __ns32000__ */
+/***************************************
+        **************  PPC  ******************
+        ***************************************/
+#if (defined(_ARCH_PPC) || defined(_IBMR2)) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+do { \
+        if (__builtin_constant_p(bh) && (bh) == 0) \
+                __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2" \
+                : "=r" ((USItype)(sh)), \
+                "=&r" ((USItype)(sl)) \
+                : "%r" ((USItype)(ah)), \
+                "%r" ((USItype)(al)), \
+                "rI" ((USItype)(bl))); \
+        else if (__builtin_constant_p(bh) && (bh) == ~(USItype) 0) \
+                __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2" \
+                : "=r" ((USItype)(sh)), \
+                "=&r" ((USItype)(sl)) \
+                : "%r" ((USItype)(ah)), \
+                "%r" ((USItype)(al)), \
+                "rI" ((USItype)(bl))); \
+        else \
+                __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3" \
+                : "=r" ((USItype)(sh)), \
+                "=&r" ((USItype)(sl)) \
+                : "%r" ((USItype)(ah)), \
+                "r" ((USItype)(bh)), \
+                "%r" ((USItype)(al)), \
+                "rI" ((USItype)(bl))); \
+} while (0)
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+do { \
+        if (__builtin_constant_p(ah) && (ah) == 0) \
+                __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2" \
+                : "=r" ((USItype)(sh)), \
+                "=&r" ((USItype)(sl)) \
+                : "r" ((USItype)(bh)), \
+                "rI" ((USItype)(al)), \
+                "r" ((USItype)(bl))); \
+        else if (__builtin_constant_p(ah) && (ah) == ~(USItype) 0) \
+                __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2" \
+                : "=r" ((USItype)(sh)), \
+                "=&r" ((USItype)(sl)) \
+                : "r" ((USItype)(bh)), \
+                "rI" ((USItype)(al)), \
+                "r" ((USItype)(bl))); \
+        else if (__builtin_constant_p(bh) && (bh) == 0) \
+                __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2" \
+                : "=r" ((USItype)(sh)), \
+                "=&r" ((USItype)(sl)) \
+                : "r" ((USItype)(ah)), \
+                "rI" ((USItype)(al)), \
+                "r" ((USItype)(bl))); \
+        else if (__builtin_constant_p(bh) && (bh) == ~(USItype) 0) \
+                __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2" \
+                : "=r" ((USItype)(sh)), \
+                "=&r" ((USItype)(sl)) \
+                : "r" ((USItype)(ah)), \
+                "rI" ((USItype)(al)), \
+                "r" ((USItype)(bl))); \
+        else \
+                __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2" \
+                : "=r" ((USItype)(sh)), \
+                "=&r" ((USItype)(sl)) \
+                : "r" ((USItype)(ah)), \
+                "r" ((USItype)(bh)), \
+                "rI" ((USItype)(al)), \
+                "r" ((USItype)(bl))); \
+} while (0)
+#define count_leading_zeros(count, x) \
+        __asm__ ("{cntlz|cntlzw} %0,%1" \
+        : "=r" ((USItype)(count)) \
+        : "r" ((USItype)(x)))
+#define COUNT_LEADING_ZEROS_0 32
+#if defined(_ARCH_PPC)
+#define umul_ppmm(ph, pl, m0, m1) \
+do { \
+        USItype __m0 = (m0), __m1 = (m1); \
+        __asm__ ("mulhwu %0,%1,%2" \
+        : "=r" ((USItype) ph) \
+        : "%r" (__m0), \
+        "r" (__m1)); \
+        (pl) = __m0 * __m1; \
+} while (0)
+#define UMUL_TIME 15
+#define smul_ppmm(ph, pl, m0, m1) \
+do { \
+        SItype __m0 = (m0), __m1 = (m1); \
+        __asm__ ("mulhw %0,%1,%2" \
+        : "=r" ((SItype) ph) \
+        : "%r" (__m0), \
+        "r" (__m1)); \
+        (pl) = __m0 * __m1; \
+} while (0)
+#define SMUL_TIME 14
+#define UDIV_TIME 120
+#else
+#define umul_ppmm(xh, xl, m0, m1) \
+do { \
+        USItype __m0 = (m0), __m1 = (m1); \
+        __asm__ ("mul %0,%2,%3" \
+        : "=r" ((USItype)(xh)), \
+        "=q" ((USItype)(xl)) \
+        : "r" (__m0), \
+        "r" (__m1)); \
+        (xh) += ((((SItype) __m0 >> 31) & __m1) \
+        + (((SItype) __m1 >> 31) & __m0)); \
+} while (0)
+#define UMUL_TIME 8
+#define smul_ppmm(xh, xl, m0, m1) \
+        __asm__ ("mul %0,%2,%3" \
+        : "=r" ((SItype)(xh)), \
+        "=q" ((SItype)(xl)) \
+        : "r" (m0), \
+        "r" (m1))
+#define SMUL_TIME 4
+#define sdiv_qrnnd(q, r, nh, nl, d) \
+        __asm__ ("div %0,%2,%4" \
+        : "=r" ((SItype)(q)), "=q" ((SItype)(r)) \
+        : "r" ((SItype)(nh)), "1" ((SItype)(nl)), "r" ((SItype)(d)))
+#define UDIV_TIME 100
+#endif
+#endif /* Power architecture variants.  */
+/***************************************
+        **************  PYR  ******************
+        ***************************************/
+#if defined(__pyr__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+        __asm__ ("addw        %5,%1\n" \
+        "addwc  %3,%0" \
+        : "=r" ((USItype)(sh)), \
+        "=&r" ((USItype)(sl)) \
+        : "%0" ((USItype)(ah)), \
+        "g" ((USItype)(bh)), \
+        "%1" ((USItype)(al)), \
+        "g" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+        __asm__ ("subw        %5,%1\n" \
+        "subwb  %3,%0" \
+        : "=r" ((USItype)(sh)), \
+        "=&r" ((USItype)(sl)) \
+        : "0" ((USItype)(ah)), \
+        "g" ((USItype)(bh)), \
+        "1" ((USItype)(al)), \
+        "g" ((USItype)(bl)))
+        /* This insn works on Pyramids with AP, XP, or MI CPUs, but not with SP.  */
+#define umul_ppmm(w1, w0, u, v) \
+        ({union {UDItype __ll; \
+        struct {USItype __h, __l; } __i; \
+        } __xx; \
+        __asm__ ("movw %1,%R0\n" \
+        "uemul %2,%0" \
+        : "=&r" (__xx.__ll) \
+        : "g" ((USItype) (u)), \
+        "g" ((USItype)(v))); \
+        (w1) = __xx.__i.__h; (w0) = __xx.__i.__l; })
+#endif /* __pyr__ */
+/***************************************
+        **************  RT/ROMP  **************
+        ***************************************/
+#if defined(__ibm032__) /* RT/ROMP */   && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+        __asm__ ("a %1,%5\n" \
+        "ae %0,%3" \
+        : "=r" ((USItype)(sh)), \
+        "=&r" ((USItype)(sl)) \
+        : "%0" ((USItype)(ah)), \
+        "r" ((USItype)(bh)), \
+        "%1" ((USItype)(al)), \
+        "r" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+        __asm__ ("s %1,%5\n" \
+        "se %0,%3" \
+        : "=r" ((USItype)(sh)), \
+        "=&r" ((USItype)(sl)) \
+        : "0" ((USItype)(ah)), \
+        "r" ((USItype)(bh)), \
+        "1" ((USItype)(al)), \
+        "r" ((USItype)(bl)))
+#define umul_ppmm(ph, pl, m0, m1) \
+do { \
+        USItype __m0 = (m0), __m1 = (m1); \
+        __asm__ ( \
+        "s       r2,r2\n" \
+        "mts    r10,%2\n" \
+        "m      r2,%3\n" \
+        "m      r2,%3\n" \
+        "m      r2,%3\n" \
+        "m      r2,%3\n" \
+        "m      r2,%3\n" \
+        "m      r2,%3\n" \
+        "m      r2,%3\n" \
+        "m      r2,%3\n" \
+        "m      r2,%3\n" \
+        "m      r2,%3\n" \
+        "m      r2,%3\n" \
+        "m      r2,%3\n" \
+        "m      r2,%3\n" \
+        "m      r2,%3\n" \
+        "m      r2,%3\n" \
+        "m      r2,%3\n" \
+        "cas    %0,r2,r0\n" \
+        "mfs    r10,%1" \
+        : "=r" ((USItype)(ph)), \
+        "=r" ((USItype)(pl)) \
+        : "%r" (__m0), \
+        "r" (__m1) \
+        : "r2"); \
+        (ph) += ((((SItype) __m0 >> 31) & __m1) \
+        + (((SItype) __m1 >> 31) & __m0)); \
+} while (0)
+#define UMUL_TIME 20
+#define UDIV_TIME 200
+#define count_leading_zeros(count, x) \
+do { \
+        if ((x) >= 0x10000) \
+                __asm__ ("clz     %0,%1" \
+                : "=r" ((USItype)(count)) \
+                : "r" ((USItype)(x) >> 16)); \
+        else { \
+                __asm__ ("clz   %0,%1" \
+                : "=r" ((USItype)(count)) \
+                : "r" ((USItype)(x))); \
+                (count) += 16; \
+        } \
+} while (0)
+#endif /* RT/ROMP */
+/***************************************
+        **************  SH2  ******************
+        ***************************************/
+#if (defined(__sh2__) || defined(__sh3__) || defined(__SH4__)) \
+        && W_TYPE_SIZE == 32
+#define umul_ppmm(w1, w0, u, v) \
+        __asm__ ( \
+        "dmulu.l %2,%3\n" \
+        "sts    macl,%1\n" \
+        "sts    mach,%0" \
+        : "=r" ((USItype)(w1)), \
+        "=r" ((USItype)(w0)) \
+        : "r" ((USItype)(u)), \
+        "r" ((USItype)(v)) \
+        : "macl", "mach")
+#define UMUL_TIME 5
+#endif
+/***************************************
+        **************  SPARC   ****************
+        ***************************************/
+#if defined(__sparc__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+        __asm__ ("addcc %r4,%5,%1\n" \
+        "addx %r2,%3,%0" \
+        : "=r" ((USItype)(sh)), \
+        "=&r" ((USItype)(sl)) \
+        : "%rJ" ((USItype)(ah)), \
+        "rI" ((USItype)(bh)), \
+        "%rJ" ((USItype)(al)), \
+        "rI" ((USItype)(bl)) \
+        __CLOBBER_CC)
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+        __asm__ ("subcc %r4,%5,%1\n" \
+        "subx %r2,%3,%0" \
+        : "=r" ((USItype)(sh)), \
+        "=&r" ((USItype)(sl)) \
+        : "rJ" ((USItype)(ah)), \
+        "rI" ((USItype)(bh)), \
+        "rJ" ((USItype)(al)), \
+        "rI" ((USItype)(bl)) \
+        __CLOBBER_CC)
+#if defined(__sparc_v8__)
+/* Don't match immediate range because, 1) it is not often useful,
+        2) the 'I' flag thinks of the range as a 13 bit signed interval,
+        while we want to match a 13 bit interval, sign extended to 32 bits,
+        but INTERPRETED AS UNSIGNED.  */
+#define umul_ppmm(w1, w0, u, v) \
+        __asm__ ("umul %2,%3,%1;rd %%y,%0" \
+        : "=r" ((USItype)(w1)), \
+        "=r" ((USItype)(w0)) \
+        : "r" ((USItype)(u)), \
+        "r" ((USItype)(v)))
+#define UMUL_TIME 5
+#ifndef SUPERSPARC              /* SuperSPARC's udiv only handles 53 bit dividends */
+#define udiv_qrnnd(q, r, n1, n0, d) \
+do { \
+        USItype __q; \
+        __asm__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0" \
+        : "=r" ((USItype)(__q)) \
+        : "r" ((USItype)(n1)), \
+        "r" ((USItype)(n0)), \
+        "r" ((USItype)(d))); \
+        (r) = (n0) - __q * (d); \
+        (q) = __q; \
+} while (0)
+#define UDIV_TIME 25
+#endif /* SUPERSPARC */
+#else /* ! __sparc_v8__ */
+#if defined(__sparclite__)
+/* This has hardware multiply but not divide.  It also has two additional
+        instructions scan (ffs from high bit) and divscc.  */
+#define umul_ppmm(w1, w0, u, v) \
+        __asm__ ("umul %2,%3,%1;rd %%y,%0" \
+        : "=r" ((USItype)(w1)), \
+        "=r" ((USItype)(w0)) \
+        : "r" ((USItype)(u)), \
+        "r" ((USItype)(v)))
+#define UMUL_TIME 5
+#define udiv_qrnnd(q, r, n1, n0, d) \
+        __asm__ ("! Inlined udiv_qrnnd\n" \
+        "wr     %%g0,%2,%%y     ! Not a delayed write for sparclite\n" \
+        "tst    %%g0\n" \
+        "divscc %3,%4,%%g1\n" \
+        "divscc %%g1,%4,%%g1\n" \
+        "divscc %%g1,%4,%%g1\n" \
+        "divscc %%g1,%4,%%g1\n" \
+        "divscc %%g1,%4,%%g1\n" \
+        "divscc %%g1,%4,%%g1\n" \
+        "divscc %%g1,%4,%%g1\n" \
+        "divscc %%g1,%4,%%g1\n" \
+        "divscc %%g1,%4,%%g1\n" \
+        "divscc %%g1,%4,%%g1\n" \
+        "divscc %%g1,%4,%%g1\n" \
+        "divscc %%g1,%4,%%g1\n" \
+        "divscc %%g1,%4,%%g1\n" \
+        "divscc %%g1,%4,%%g1\n" \
+        "divscc %%g1,%4,%%g1\n" \
+        "divscc %%g1,%4,%%g1\n" \
+        "divscc %%g1,%4,%%g1\n" \
+        "divscc %%g1,%4,%%g1\n" \
+        "divscc %%g1,%4,%%g1\n" \
+        "divscc %%g1,%4,%%g1\n" \
+        "divscc %%g1,%4,%%g1\n" \
+        "divscc %%g1,%4,%%g1\n" \
+        "divscc %%g1,%4,%%g1\n" \
+        "divscc %%g1,%4,%%g1\n" \
+        "divscc %%g1,%4,%%g1\n" \
+        "divscc %%g1,%4,%%g1\n" \
+        "divscc %%g1,%4,%%g1\n" \
+        "divscc %%g1,%4,%%g1\n" \
+        "divscc %%g1,%4,%%g1\n" \
+        "divscc %%g1,%4,%%g1\n" \
+        "divscc %%g1,%4,%%g1\n" \
+        "divscc %%g1,%4,%0\n" \
+        "rd     %%y,%1\n" \
+        "bl,a 1f\n" \
+        "add    %1,%4,%1\n" \
+        "1:     ! End of inline udiv_qrnnd" \
+        : "=r" ((USItype)(q)), \
+        "=r" ((USItype)(r)) \
+        : "r" ((USItype)(n1)), \
+        "r" ((USItype)(n0)), \
+        "rI" ((USItype)(d)) \
+        : "%g1" __AND_CLOBBER_CC)
+#define UDIV_TIME 37
+#define count_leading_zeros(count, x) \
+        __asm__ ("scan %1,0,%0" \
+        : "=r" ((USItype)(x)) \
+        : "r" ((USItype)(count)))
+/* Early sparclites return 63 for an argument of 0, but they warn that future
+        implementations might change this.  Therefore, leave COUNT_LEADING_ZEROS_0
+        undefined.  */
+#endif /* __sparclite__ */
+#endif /* __sparc_v8__ */
+        /* Default to sparc v7 versions of umul_ppmm and udiv_qrnnd.  */
+#ifndef umul_ppmm
+#define umul_ppmm(w1, w0, u, v) \
+        __asm__ ("! Inlined umul_ppmm\n" \
+        "wr     %%g0,%2,%%y     ! SPARC has 0-3 delay insn after a wr\n" \
+        "sra    %3,31,%%g2      ! Don't move this insn\n" \
+        "and    %2,%%g2,%%g2    ! Don't move this insn\n" \
+        "andcc  %%g0,0,%%g1     ! Don't move this insn\n" \
+        "mulscc %%g1,%3,%%g1\n" \
+        "mulscc %%g1,%3,%%g1\n" \
+        "mulscc %%g1,%3,%%g1\n" \
+        "mulscc %%g1,%3,%%g1\n" \
+        "mulscc %%g1,%3,%%g1\n" \
+        "mulscc %%g1,%3,%%g1\n" \
+        "mulscc %%g1,%3,%%g1\n" \
+        "mulscc %%g1,%3,%%g1\n" \
+        "mulscc %%g1,%3,%%g1\n" \
+        "mulscc %%g1,%3,%%g1\n" \
+        "mulscc %%g1,%3,%%g1\n" \
+        "mulscc %%g1,%3,%%g1\n" \
+        "mulscc %%g1,%3,%%g1\n" \
+        "mulscc %%g1,%3,%%g1\n" \
+        "mulscc %%g1,%3,%%g1\n" \
+        "mulscc %%g1,%3,%%g1\n" \
+        "mulscc %%g1,%3,%%g1\n" \
+        "mulscc %%g1,%3,%%g1\n" \
+        "mulscc %%g1,%3,%%g1\n" \
+        "mulscc %%g1,%3,%%g1\n" \
+        "mulscc %%g1,%3,%%g1\n" \
+        "mulscc %%g1,%3,%%g1\n" \
+        "mulscc %%g1,%3,%%g1\n" \
+        "mulscc %%g1,%3,%%g1\n" \
+        "mulscc %%g1,%3,%%g1\n" \
+        "mulscc %%g1,%3,%%g1\n" \
+        "mulscc %%g1,%3,%%g1\n" \
+        "mulscc %%g1,%3,%%g1\n" \
+        "mulscc %%g1,%3,%%g1\n" \
+        "mulscc %%g1,%3,%%g1\n" \
+        "mulscc %%g1,%3,%%g1\n" \
+        "mulscc %%g1,%3,%%g1\n" \
+        "mulscc %%g1,0,%%g1\n" \
+        "add    %%g1,%%g2,%0\n" \
+        "rd     %%y,%1" \
+        : "=r" ((USItype)(w1)), \
+        "=r" ((USItype)(w0)) \
+        : "%rI" ((USItype)(u)), \
+        "r" ((USItype)(v)) \
+        : "%g1", "%g2" __AND_CLOBBER_CC)
+#define UMUL_TIME 39            /* 39 instructions */
+/* It's quite necessary to add this much assembler for the sparc.
+   The default udiv_qrnnd (in C) is more than 10 times slower!  */
+#define udiv_qrnnd(q, r, n1, n0, d) \
+  __asm__ ("! Inlined udiv_qrnnd\n\t"                                   \
+           "mov 32,%%g1\n\t"                                            \
+           "subcc       %1,%2,%%g0\n\t"                                 \
+           "1:  bcs     5f\n\t"                                         \
+           "addxcc %0,%0,%0     ! shift n1n0 and a q-bit in lsb\n\t"    \
+           "sub %1,%2,%1        ! this kills msb of n\n\t"              \
+           "addx        %1,%1,%1        ! so this can't give carry\n\t" \
+           "subcc       %%g1,1,%%g1\n\t"                                \
+           "2:  bne     1b\n\t"                                         \
+           "subcc       %1,%2,%%g0\n\t"                                 \
+           "bcs 3f\n\t"                                                 \
+           "addxcc %0,%0,%0     ! shift n1n0 and a q-bit in lsb\n\t"    \
+           "b           3f\n\t"                                         \
+           "sub %1,%2,%1        ! this kills msb of n\n\t"              \
+           "4:  sub     %1,%2,%1\n\t"                                   \
+           "5:  addxcc  %1,%1,%1\n\t"                                   \
+           "bcc 2b\n\t"                                                 \
+           "subcc       %%g1,1,%%g1\n\t"                                \
+           "! Got carry from n.  Subtract next step to cancel this carry.\n\t" \
+           "bne 4b\n\t"                                                 \
+           "addcc       %0,%0,%0        ! shift n1n0 and a 0-bit in lsb\n\t" \
+           "sub %1,%2,%1\n\t"                                           \
+           "3:  xnor    %0,0,%0\n\t"                                    \
+           "! End of inline udiv_qrnnd\n"                               \
+           : "=&r" ((USItype)(q)),                                      \
+             "=&r" ((USItype)(r))                                       \
+           : "r" ((USItype)(d)),                                        \
+             "1" ((USItype)(n1)),                                       \
+             "0" ((USItype)(n0)) : "%g1", "cc")
+#define UDIV_TIME (3+7*32)      /* 7 instructions/iteration. 32 iterations.  */
+#endif
+#endif /* __sparc__ */
+/***************************************
+        **************  VAX  ******************
+        ***************************************/
+#if defined(__vax__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+        __asm__ ("addl2 %5,%1\n" \
+        "adwc %3,%0" \
+        : "=g" ((USItype)(sh)), \
+        "=&g" ((USItype)(sl)) \
+        : "%0" ((USItype)(ah)), \
+        "g" ((USItype)(bh)), \
+        "%1" ((USItype)(al)), \
+        "g" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+        __asm__ ("subl2 %5,%1\n" \
+        "sbwc %3,%0" \
+        : "=g" ((USItype)(sh)), \
+        "=&g" ((USItype)(sl)) \
+        : "0" ((USItype)(ah)), \
+        "g" ((USItype)(bh)), \
+        "1" ((USItype)(al)), \
+        "g" ((USItype)(bl)))
+#define umul_ppmm(xh, xl, m0, m1) \
+do { \
+        union {UDItype __ll; \
+        struct {USItype __l, __h; } __i; \
+        } __xx; \
+        USItype __m0 = (m0), __m1 = (m1); \
+        __asm__ ("emul %1,%2,$0,%0" \
+        : "=g" (__xx.__ll) \
+        : "g" (__m0), \
+        "g" (__m1)); \
+        (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \
+        (xh) += ((((SItype) __m0 >> 31) & __m1) \
+        + (((SItype) __m1 >> 31) & __m0)); \
+} while (0)
+#define sdiv_qrnnd(q, r, n1, n0, d) \
+do { \
+        union {DItype __ll; \
+        struct {SItype __l, __h; } __i; \
+        } __xx; \
+        __xx.__i.__h = n1; __xx.__i.__l = n0; \
+        __asm__ ("ediv %3,%2,%0,%1" \
+        : "=g" (q), "=g" (r) \
+        : "g" (__xx.__ll), "g" (d)); \
+} while (0)
+#endif /* __vax__ */
+/***************************************
+        **************  Z8000   ****************
+        ***************************************/
+#if defined(__z8000__) && W_TYPE_SIZE == 16
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+        __asm__ ("add %H1,%H5\n\tadc  %H0,%H3" \
+        : "=r" ((unsigned int)(sh)), \
+        "=&r" ((unsigned int)(sl)) \
+        : "%0" ((unsigned int)(ah)), \
+        "r" ((unsigned int)(bh)), \
+        "%1" ((unsigned int)(al)), \
+        "rQR" ((unsigned int)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+        __asm__ ("sub %H1,%H5\n\tsbc  %H0,%H3" \
+        : "=r" ((unsigned int)(sh)), \
+        "=&r" ((unsigned int)(sl)) \
+        : "0" ((unsigned int)(ah)), \
+        "r" ((unsigned int)(bh)), \
+        "1" ((unsigned int)(al)), \
+        "rQR" ((unsigned int)(bl)))
+#define umul_ppmm(xh, xl, m0, m1) \
+do { \
+        union {long int __ll; \
+        struct {unsigned int __h, __l; } __i; \
+        } __xx; \
+        unsigned int __m0 = (m0), __m1 = (m1); \
+        __asm__ ("mult      %S0,%H3" \
+        : "=r" (__xx.__i.__h), \
+        "=r" (__xx.__i.__l) \
+        : "%1" (__m0), \
+        "rQR" (__m1)); \
+        (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \
+        (xh) += ((((signed int) __m0 >> 15) & __m1) \
+        + (((signed int) __m1 >> 15) & __m0)); \
+} while (0)
+#endif /* __z8000__ */
+#endif /* __GNUC__ */
+/***************************************
+        ***********  Generic Versions   ********
+        ***************************************/
+#if !defined(umul_ppmm) && defined(__umulsidi3)
+#define umul_ppmm(ph, pl, m0, m1) \
+{ \
+        UDWtype __ll = __umulsidi3(m0, m1); \
+        ph = (UWtype) (__ll >> W_TYPE_SIZE); \
+        pl = (UWtype) __ll; \
+}
+#endif
+#if !defined(__umulsidi3)
+#define __umulsidi3(u, v) \
+        ({UWtype __hi, __lo; \
+        umul_ppmm(__hi, __lo, u, v); \
+        ((UDWtype) __hi << W_TYPE_SIZE) | __lo; })
+#endif
+        /* If this machine has no inline assembler, use C macros.  */
+#if !defined(add_ssaaaa)
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+do { \
+        UWtype __x; \
+        __x = (al) + (bl); \
+        (sh) = (ah) + (bh) + (__x < (al)); \
+        (sl) = __x; \
+} while (0)
+#endif
+#if !defined(sub_ddmmss)
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+do { \
+        UWtype __x; \
+        __x = (al) - (bl); \
+        (sh) = (ah) - (bh) - (__x > (al)); \
+        (sl) = __x; \
+} while (0)
+#endif
+#if !defined(umul_ppmm)
+#define umul_ppmm(w1, w0, u, v) \
+do { \
+        UWtype __x0, __x1, __x2, __x3; \
+        UHWtype __ul, __vl, __uh, __vh; \
+        UWtype __u = (u), __v = (v); \
+        \
+        __ul = __ll_lowpart(__u); \
+        __uh = __ll_highpart(__u); \
+        __vl = __ll_lowpart(__v); \
+        __vh = __ll_highpart(__v); \
+        \
+        __x0 = (UWtype) __ul * __vl; \
+        __x1 = (UWtype) __ul * __vh; \
+        __x2 = (UWtype) __uh * __vl; \
+        __x3 = (UWtype) __uh * __vh; \
+        \
+        __x1 += __ll_highpart(__x0);/* this can't give carry */ \
+        __x1 += __x2;           /* but this indeed can */ \
+        if (__x1 < __x2)                /* did we get it? */ \
+        __x3 += __ll_B;         /* yes, add it in the proper pos. */ \
+        \
+        (w1) = __x3 + __ll_highpart(__x1); \
+        (w0) = (__ll_lowpart(__x1) << W_TYPE_SIZE/2) + __ll_lowpart(__x0); \
+} while (0)
+#endif
+#if !defined(umul_ppmm)
+#define smul_ppmm(w1, w0, u, v) \
+do { \
+        UWtype __w1; \
+        UWtype __m0 = (u), __m1 = (v); \
+        umul_ppmm(__w1, w0, __m0, __m1); \
+        (w1) = __w1 - (-(__m0 >> (W_TYPE_SIZE - 1)) & __m1) \
+        - (-(__m1 >> (W_TYPE_SIZE - 1)) & __m0); \
+} while (0)
+#endif
+        /* Define this unconditionally, so it can be used for debugging.  */
+#define __udiv_qrnnd_c(q, r, n1, n0, d) \
+do { \
+        UWtype __d1, __d0, __q1, __q0, __r1, __r0, __m; \
+        __d1 = __ll_highpart(d); \
+        __d0 = __ll_lowpart(d); \
+        \
+        __r1 = (n1) % __d1; \
+        __q1 = (n1) / __d1; \
+        __m = (UWtype) __q1 * __d0; \
+        __r1 = __r1 * __ll_B | __ll_highpart(n0); \
+        if (__r1 < __m) { \
+                __q1--, __r1 += (d); \
+                if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */ \
+                if (__r1 < __m) \
+                        __q1--, __r1 += (d); \
+        } \
+        __r1 -= __m; \
+        \
+        __r0 = __r1 % __d1; \
+        __q0 = __r1 / __d1; \
+        __m = (UWtype) __q0 * __d0; \
+        __r0 = __r0 * __ll_B | __ll_lowpart(n0); \
+        if (__r0 < __m) { \
+                __q0--, __r0 += (d); \
+                if (__r0 >= (d)) \
+                        if (__r0 < __m) \
+                                __q0--, __r0 += (d); \
+        } \
+        __r0 -= __m; \
+        \
+        (q) = (UWtype) __q1 * __ll_B | __q0; \
+        (r) = __r0; \
+} while (0)
+/* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through
+        __udiv_w_sdiv (defined in libgcc or elsewhere).  */
+#if !defined(udiv_qrnnd) && defined(sdiv_qrnnd)
+#define udiv_qrnnd(q, r, nh, nl, d) \
+do { \
+        UWtype __r; \
+        (q) = __MPN(udiv_w_sdiv) (&__r, nh, nl, d); \
+        (r) = __r; \
+} while (0)
+#endif
+        /* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c.  */
+#if !defined(udiv_qrnnd)
+#define UDIV_NEEDS_NORMALIZATION 1
+#define udiv_qrnnd __udiv_qrnnd_c
+#endif
+#undef count_leading_zeros
+#if !defined(count_leading_zeros)
+        extern
+#ifdef __STDC__
+                        const
+#endif
+                        unsigned char __clz_tab[];
+#define count_leading_zeros(count, x) \
+do { \
+        UWtype __xr = (x); \
+        UWtype __a; \
+        \
+        if (W_TYPE_SIZE <= 32) { \
+                __a = __xr < ((UWtype) 1 << 2*__BITS4) \
+                ? (__xr < ((UWtype) 1 << __BITS4) ? 0 : __BITS4) \
+                : (__xr < ((UWtype) 1 << 3*__BITS4) ?  2*__BITS4 : 3*__BITS4); \
+        } \
+        else { \
+                for (__a = W_TYPE_SIZE - 8; __a > 0; __a -= 8) \
+                        if (((__xr >> __a) & 0xff) != 0) \
+                                break; \
+        } \
+        \
+        (count) = W_TYPE_SIZE - (__clz_tab[__xr >> __a] + __a); \
+} while (0)
+        /* This version gives a well-defined value for zero. */
+#define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE
+#endif
+#if !defined(count_trailing_zeros)
+/* Define count_trailing_zeros using count_leading_zeros.  The latter might be
+        defined in asm, but if it is not, the C version above is good enough.  */
+#define count_trailing_zeros(count, x) \
+do { \
+        UWtype __ctz_x = (x); \
+        UWtype __ctz_c; \
+        count_leading_zeros(__ctz_c, __ctz_x & -__ctz_x); \
+        (count) = W_TYPE_SIZE - 1 - __ctz_c; \
+} while (0)
+#endif
+#ifndef UDIV_NEEDS_NORMALIZATION
+#define UDIV_NEEDS_NORMALIZATION 0
+#endif
diff --git a/lib/mpi/mpi-bit.c b/lib/mpi/mpi-bit.c
new file mode 100644
index 000000000000..568724804f29
--- /dev/null
+++ b/lib/mpi/mpi-bit.c
@@ -0,0 +1,56 @@
+/* mpi-bit.c  -  MPI bit level fucntions
+ * Copyright (C) 1998, 1999 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+#include "mpi-internal.h"
+#include "longlong.h"
+#define A_LIMB_1 ((mpi_limb_t) 1)
+/****************
+ * Sometimes we have MSL (most significant limbs) which are 0;
+ * this is for some reasons not good, so this function removes them.
+ */
+void mpi_normalize(MPI a)
+{
+        for (; a->nlimbs && !a->d[a->nlimbs - 1]; a->nlimbs--)
+                ;
+}
+/****************
+ * Return the number of bits in A.
+ */
+unsigned mpi_get_nbits(MPI a)
+{
+        unsigned n;
+        mpi_normalize(a);
+        if (a->nlimbs) {
+                mpi_limb_t alimb = a->d[a->nlimbs - 1];
+                if (alimb)
+                        count_leading_zeros(n, alimb);
+                else
+                        n = BITS_PER_MPI_LIMB;
+                n = BITS_PER_MPI_LIMB - n + (a->nlimbs - 1) * BITS_PER_MPI_LIMB;
+        } else
+                n = 0;
+        return n;
+}
+EXPORT_SYMBOL_GPL(mpi_get_nbits);
diff --git a/lib/mpi/mpi-inline.h b/lib/mpi/mpi-inline.h
new file mode 100644
index 000000000000..e2b39852b30a
--- /dev/null
+++ b/lib/mpi/mpi-inline.h
@@ -0,0 +1,122 @@
+/* mpi-inline.h  -  Internal to the Multi Precision Integers
+ *      Copyright (C) 1994, 1996, 1998, 1999 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *       Actually it's the same code with only minor changes in the
+ *       way the data is stored; this is to support the abstraction
+ *       of an optional secure memory allocation which may be used
+ *       to avoid revealing of sensitive data due to paging etc.
+ *       The GNU MP Library itself is published under the LGPL;
+ *       however I decided to publish this code under the plain GPL.
+ */
+#ifndef G10_MPI_INLINE_H
+#define G10_MPI_INLINE_H
+#ifndef G10_MPI_INLINE_DECL
+#define G10_MPI_INLINE_DECL  extern inline
+#endif
+G10_MPI_INLINE_DECL mpi_limb_t
+mpihelp_add_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+              mpi_size_t s1_size, mpi_limb_t s2_limb)
+{
+        mpi_limb_t x;
+        x = *s1_ptr++;
+        s2_limb += x;
+        *res_ptr++ = s2_limb;
+        if (s2_limb < x) {      /* sum is less than the left operand: handle carry */
+                while (--s1_size) {
+                        x = *s1_ptr++ + 1;      /* add carry */
+                        *res_ptr++ = x; /* and store */
+                        if (x)  /* not 0 (no overflow): we can stop */
+                                goto leave;
+                }
+                return 1;       /* return carry (size of s1 to small) */
+        }
+leave:
+        if (res_ptr != s1_ptr) {        /* not the same variable */
+                mpi_size_t i;   /* copy the rest */
+                for (i = 0; i < s1_size - 1; i++)
+                        res_ptr[i] = s1_ptr[i];
+        }
+        return 0;               /* no carry */
+}
+G10_MPI_INLINE_DECL mpi_limb_t
+mpihelp_add(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size,
+            mpi_ptr_t s2_ptr, mpi_size_t s2_size)
+{
+        mpi_limb_t cy = 0;
+        if (s2_size)
+                cy = mpihelp_add_n(res_ptr, s1_ptr, s2_ptr, s2_size);
+        if (s1_size - s2_size)
+                cy = mpihelp_add_1(res_ptr + s2_size, s1_ptr + s2_size,
+                                   s1_size - s2_size, cy);
+        return cy;
+}
+G10_MPI_INLINE_DECL mpi_limb_t
+mpihelp_sub_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+              mpi_size_t s1_size, mpi_limb_t s2_limb)
+{
+        mpi_limb_t x;
+        x = *s1_ptr++;
+        s2_limb = x - s2_limb;
+        *res_ptr++ = s2_limb;
+        if (s2_limb > x) {
+                while (--s1_size) {
+                        x = *s1_ptr++;
+                        *res_ptr++ = x - 1;
+                        if (x)
+                                goto leave;
+                }
+                return 1;
+        }
+leave:
+        if (res_ptr != s1_ptr) {
+                mpi_size_t i;
+                for (i = 0; i < s1_size - 1; i++)
+                        res_ptr[i] = s1_ptr[i];
+        }
+        return 0;
+}
+G10_MPI_INLINE_DECL mpi_limb_t
+mpihelp_sub(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size,
+            mpi_ptr_t s2_ptr, mpi_size_t s2_size)
+{
+        mpi_limb_t cy = 0;
+        if (s2_size)
+                cy = mpihelp_sub_n(res_ptr, s1_ptr, s2_ptr, s2_size);
+        if (s1_size - s2_size)
+                cy = mpihelp_sub_1(res_ptr + s2_size, s1_ptr + s2_size,
+                                   s1_size - s2_size, cy);
+        return cy;
+}
+#endif /*G10_MPI_INLINE_H */
diff --git a/lib/mpi/mpi-internal.h b/lib/mpi/mpi-internal.h
new file mode 100644
index 000000000000..77adcf6bc257
--- /dev/null
+++ b/lib/mpi/mpi-internal.h
@@ -0,0 +1,261 @@
+/* mpi-internal.h  -  Internal to the Multi Precision Integers
+ *      Copyright (C) 1994, 1996 Free Software Foundation, Inc.
+ *      Copyright (C) 1998, 2000 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *       Actually it's the same code with only minor changes in the
+ *       way the data is stored; this is to support the abstraction
+ *       of an optional secure memory allocation which may be used
+ *       to avoid revealing of sensitive data due to paging etc.
+ *       The GNU MP Library itself is published under the LGPL;
+ *       however I decided to publish this code under the plain GPL.
+ */
+#ifndef G10_MPI_INTERNAL_H
+#define G10_MPI_INTERNAL_H
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/mpi.h>
+#include <linux/errno.h>
+#define log_debug printk
+#define log_bug printk
+#define assert(x) \
+        do { \
+                if (!x) \
+                        log_bug("failed assertion\n"); \
+        } while (0);
+/* If KARATSUBA_THRESHOLD is not already defined, define it to a
+ * value which is good on most machines.  */
+/* tested 4, 16, 32 and 64, where 16 gave the best performance when
+ * checking a 768 and a 1024 bit ElGamal signature.
+ * (wk 22.12.97) */
+#ifndef KARATSUBA_THRESHOLD
+#define KARATSUBA_THRESHOLD 16
+#endif
+/* The code can't handle KARATSUBA_THRESHOLD smaller than 2.  */
+#if KARATSUBA_THRESHOLD < 2
+#undef KARATSUBA_THRESHOLD
+#define KARATSUBA_THRESHOLD 2
+#endif
+typedef mpi_limb_t *mpi_ptr_t;  /* pointer to a limb */
+typedef int mpi_size_t;         /* (must be a signed type) */
+#define ABS(x) (x >= 0 ? x : -x)
+#define MIN(l, o) ((l) < (o) ? (l) : (o))
+#define MAX(h, i) ((h) > (i) ? (h) : (i))
+static inline int RESIZE_IF_NEEDED(MPI a, unsigned b)
+{
+        if (a->alloced < b)
+                return mpi_resize(a, b);
+        return 0;
+}
+/* Copy N limbs from S to D.  */
+#define MPN_COPY(d, s, n) \
+        do {                                    \
+                mpi_size_t _i;                  \
+                for (_i = 0; _i < (n); _i++)    \
+                        (d)[_i] = (s)[_i];      \
+        } while (0)
+#define MPN_COPY_INCR(d, s, n) \
+        do {                                    \
+                mpi_size_t _i;                  \
+                for (_i = 0; _i < (n); _i++)    \
+                        (d)[_i] = (d)[_i];      \
+        } while (0)
+#define MPN_COPY_DECR(d, s, n) \
+        do {                                    \
+                mpi_size_t _i;                  \
+                for (_i = (n)-1; _i >= 0; _i--) \
+                        (d)[_i] = (s)[_i];      \
+        } while (0)
+/* Zero N limbs at D */
+#define MPN_ZERO(d, n) \
+        do {                                    \
+                int  _i;                        \
+                for (_i = 0; _i < (n); _i++)    \
+                        (d)[_i] = 0;            \
+        } while (0)
+#define MPN_NORMALIZE(d, n)  \
+        do {                                    \
+                while ((n) > 0) {               \
+                        if ((d)[(n)-1])         \
+                                break;          \
+                        (n)--;                  \
+                }                               \
+        } while (0)
+#define MPN_NORMALIZE_NOT_ZERO(d, n) \
+        do {                            \
+                for (;;) {              \
+                        if ((d)[(n)-1]) \
+                                break;  \
+                        (n)--;          \
+                }                       \
+        } while (0)
+#define MPN_MUL_N_RECURSE(prodp, up, vp, size, tspace) \
+        do {                                                    \
+                if ((size) < KARATSUBA_THRESHOLD)               \
+                        mul_n_basecase(prodp, up, vp, size);    \
+                else                                            \
+                        mul_n(prodp, up, vp, size, tspace);     \
+        } while (0);
+/* Divide the two-limb number in (NH,,NL) by D, with DI being the largest
+ * limb not larger than (2**(2*BITS_PER_MP_LIMB))/D - (2**BITS_PER_MP_LIMB).
+ * If this would yield overflow, DI should be the largest possible number
+ * (i.e., only ones).  For correct operation, the most significant bit of D
+ * has to be set.  Put the quotient in Q and the remainder in R.
+ */
+#define UDIV_QRNND_PREINV(q, r, nh, nl, d, di) \
+        do {                                                            \
+                mpi_limb_t _q, _ql, _r;                                 \
+                mpi_limb_t _xh, _xl;                                    \
+                umul_ppmm(_q, _ql, (nh), (di));                         \
+                _q += (nh);     /* DI is 2**BITS_PER_MPI_LIMB too small */ \
+                umul_ppmm(_xh, _xl, _q, (d));                           \
+                sub_ddmmss(_xh, _r, (nh), (nl), _xh, _xl);              \
+                if (_xh) {                                              \
+                        sub_ddmmss(_xh, _r, _xh, _r, 0, (d));           \
+                        _q++;                                           \
+                        if (_xh) {                                      \
+                                sub_ddmmss(_xh, _r, _xh, _r, 0, (d));   \
+                                _q++;                                   \
+                        }                                               \
+                }                                                       \
+                if (_r >= (d)) {                                        \
+                        _r -= (d);                                      \
+                        _q++;                                           \
+                }                                                       \
+                (r) = _r;                                               \
+                (q) = _q;                                               \
+        } while (0)
+/*-- mpiutil.c --*/
+mpi_ptr_t mpi_alloc_limb_space(unsigned nlimbs);
+void mpi_free_limb_space(mpi_ptr_t a);
+void mpi_assign_limb_space(MPI a, mpi_ptr_t ap, unsigned nlimbs);
+/*-- mpi-bit.c --*/
+void mpi_rshift_limbs(MPI a, unsigned int count);
+int mpi_lshift_limbs(MPI a, unsigned int count);
+/*-- mpihelp-add.c --*/
+mpi_limb_t mpihelp_add_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+                         mpi_size_t s1_size, mpi_limb_t s2_limb);
+mpi_limb_t mpihelp_add_n(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+                         mpi_ptr_t s2_ptr, mpi_size_t size);
+mpi_limb_t mpihelp_add(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size,
+                       mpi_ptr_t s2_ptr, mpi_size_t s2_size);
+/*-- mpihelp-sub.c --*/
+mpi_limb_t mpihelp_sub_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+                         mpi_size_t s1_size, mpi_limb_t s2_limb);
+mpi_limb_t mpihelp_sub_n(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+                         mpi_ptr_t s2_ptr, mpi_size_t size);
+mpi_limb_t mpihelp_sub(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size,
+                       mpi_ptr_t s2_ptr, mpi_size_t s2_size);
+/*-- mpihelp-cmp.c --*/
+int mpihelp_cmp(mpi_ptr_t op1_ptr, mpi_ptr_t op2_ptr, mpi_size_t size);
+/*-- mpihelp-mul.c --*/
+struct karatsuba_ctx {
+        struct karatsuba_ctx *next;
+        mpi_ptr_t tspace;
+        mpi_size_t tspace_size;
+        mpi_ptr_t tp;
+        mpi_size_t tp_size;
+};
+void mpihelp_release_karatsuba_ctx(struct karatsuba_ctx *ctx);
+mpi_limb_t mpihelp_addmul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+                            mpi_size_t s1_size, mpi_limb_t s2_limb);
+mpi_limb_t mpihelp_submul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+                            mpi_size_t s1_size, mpi_limb_t s2_limb);
+int mpihelp_mul_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size);
+int mpihelp_mul(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t usize,
+                mpi_ptr_t vp, mpi_size_t vsize, mpi_limb_t *_result);
+void mpih_sqr_n_basecase(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size);
+void mpih_sqr_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size,
+                mpi_ptr_t tspace);
+int mpihelp_mul_karatsuba_case(mpi_ptr_t prodp,
+                               mpi_ptr_t up, mpi_size_t usize,
+                               mpi_ptr_t vp, mpi_size_t vsize,
+                               struct karatsuba_ctx *ctx);
+/*-- mpihelp-mul_1.c (or xxx/cpu/ *.S) --*/
+mpi_limb_t mpihelp_mul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+                         mpi_size_t s1_size, mpi_limb_t s2_limb);
+/*-- mpihelp-div.c --*/
+mpi_limb_t mpihelp_mod_1(mpi_ptr_t dividend_ptr, mpi_size_t dividend_size,
+                         mpi_limb_t divisor_limb);
+mpi_limb_t mpihelp_divrem(mpi_ptr_t qp, mpi_size_t qextra_limbs,
+                          mpi_ptr_t np, mpi_size_t nsize,
+                          mpi_ptr_t dp, mpi_size_t dsize);
+mpi_limb_t mpihelp_divmod_1(mpi_ptr_t quot_ptr,
+                            mpi_ptr_t dividend_ptr, mpi_size_t dividend_size,
+                            mpi_limb_t divisor_limb);
+/*-- mpihelp-shift.c --*/
+mpi_limb_t mpihelp_lshift(mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize,
+                          unsigned cnt);
+mpi_limb_t mpihelp_rshift(mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize,
+                          unsigned cnt);
+/* Define stuff for longlong.h.  */
+#define W_TYPE_SIZE BITS_PER_MPI_LIMB
+typedef mpi_limb_t UWtype;
+typedef unsigned int UHWtype;
+#if defined(__GNUC__)
+typedef unsigned int UQItype __attribute__ ((mode(QI)));
+typedef int SItype __attribute__ ((mode(SI)));
+typedef unsigned int USItype __attribute__ ((mode(SI)));
+typedef int DItype __attribute__ ((mode(DI)));
+typedef unsigned int UDItype __attribute__ ((mode(DI)));
+#else
+typedef unsigned char UQItype;
+typedef long SItype;
+typedef unsigned long USItype;
+#endif
+#ifdef __GNUC__
+#include "mpi-inline.h"
+#endif
+#endif /*G10_MPI_INTERNAL_H */
diff --git a/lib/mpi/mpi-pow.c b/lib/mpi/mpi-pow.c
new file mode 100644
index 000000000000..67f3e79af914
--- /dev/null
+++ b/lib/mpi/mpi-pow.c
@@ -0,0 +1,323 @@
+/* mpi-pow.c  -  MPI functions
+ *      Copyright (C) 1994, 1996, 1998, 2000 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *       Actually it's the same code with only minor changes in the
+ *       way the data is stored; this is to support the abstraction
+ *       of an optional secure memory allocation which may be used
+ *       to avoid revealing of sensitive data due to paging etc.
+ *       The GNU MP Library itself is published under the LGPL;
+ *       however I decided to publish this code under the plain GPL.
+ */
+#include <linux/string.h>
+#include "mpi-internal.h"
+#include "longlong.h"
+/****************
+ * RES = BASE ^ EXP mod MOD
+ */
+int mpi_powm(MPI res, MPI base, MPI exp, MPI mod)
+{
+        mpi_ptr_t mp_marker = NULL, bp_marker = NULL, ep_marker = NULL;
+        mpi_ptr_t xp_marker = NULL;
+        mpi_ptr_t tspace = NULL;
+        mpi_ptr_t rp, ep, mp, bp;
+        mpi_size_t esize, msize, bsize, rsize;
+        int esign, msign, bsign, rsign;
+        mpi_size_t size;
+        int mod_shift_cnt;
+        int negative_result;
+        int assign_rp = 0;
+        mpi_size_t tsize = 0;   /* to avoid compiler warning */
+        /* fixme: we should check that the warning is void */
+        int rc = -ENOMEM;
+        esize = exp->nlimbs;
+        msize = mod->nlimbs;
+        size = 2 * msize;
+        esign = exp->sign;
+        msign = mod->sign;
+        rp = res->d;
+        ep = exp->d;
+        if (!msize)
+                return -EINVAL;
+        if (!esize) {
+                /* Exponent is zero, result is 1 mod MOD, i.e., 1 or 0
+                 * depending on if MOD equals 1.  */
+                rp[0] = 1;
+                res->nlimbs = (msize == 1 && mod->d[0] == 1) ? 0 : 1;
+                res->sign = 0;
+                goto leave;
+        }
+        /* Normalize MOD (i.e. make its most significant bit set) as required by
+         * mpn_divrem.  This will make the intermediate values in the calculation
+         * slightly larger, but the correct result is obtained after a final
+         * reduction using the original MOD value.  */
+        mp = mp_marker = mpi_alloc_limb_space(msize);
+        if (!mp)
+                goto enomem;
+        count_leading_zeros(mod_shift_cnt, mod->d[msize - 1]);
+        if (mod_shift_cnt)
+                mpihelp_lshift(mp, mod->d, msize, mod_shift_cnt);
+        else
+                MPN_COPY(mp, mod->d, msize);
+        bsize = base->nlimbs;
+        bsign = base->sign;
+        if (bsize > msize) {    /* The base is larger than the module. Reduce it. */
+                /* Allocate (BSIZE + 1) with space for remainder and quotient.
+                 * (The quotient is (bsize - msize + 1) limbs.)  */
+                bp = bp_marker = mpi_alloc_limb_space(bsize + 1);
+                if (!bp)
+                        goto enomem;
+                MPN_COPY(bp, base->d, bsize);
+                /* We don't care about the quotient, store it above the remainder,
+                 * at BP + MSIZE.  */
+                mpihelp_divrem(bp + msize, 0, bp, bsize, mp, msize);
+                bsize = msize;
+                /* Canonicalize the base, since we are going to multiply with it
+                 * quite a few times.  */
+                MPN_NORMALIZE(bp, bsize);
+        } else
+                bp = base->d;
+        if (!bsize) {
+                res->nlimbs = 0;
+                res->sign = 0;
+                goto leave;
+        }
+        if (res->alloced < size) {
+                /* We have to allocate more space for RES.  If any of the input
+                 * parameters are identical to RES, defer deallocation of the old
+                 * space.  */
+                if (rp == ep || rp == mp || rp == bp) {
+                        rp = mpi_alloc_limb_space(size);
+                        if (!rp)
+                                goto enomem;
+                        assign_rp = 1;
+                } else {
+                        if (mpi_resize(res, size) < 0)
+                                goto enomem;
+                        rp = res->d;
+                }
+        } else {                /* Make BASE, EXP and MOD not overlap with RES.  */
+                if (rp == bp) {
+                        /* RES and BASE are identical.  Allocate temp. space for BASE.  */
+                        BUG_ON(bp_marker);
+                        bp = bp_marker = mpi_alloc_limb_space(bsize);
+                        if (!bp)
+                                goto enomem;
+                        MPN_COPY(bp, rp, bsize);
+                }
+                if (rp == ep) {
+                        /* RES and EXP are identical.  Allocate temp. space for EXP.  */
+                        ep = ep_marker = mpi_alloc_limb_space(esize);
+                        if (!ep)
+                                goto enomem;
+                        MPN_COPY(ep, rp, esize);
+                }
+                if (rp == mp) {
+                        /* RES and MOD are identical.  Allocate temporary space for MOD. */
+                        BUG_ON(mp_marker);
+                        mp = mp_marker = mpi_alloc_limb_space(msize);
+                        if (!mp)
+                                goto enomem;
+                        MPN_COPY(mp, rp, msize);
+                }
+        }
+        MPN_COPY(rp, bp, bsize);
+        rsize = bsize;
+        rsign = bsign;
+        {
+                mpi_size_t i;
+                mpi_ptr_t xp;
+                int c;
+                mpi_limb_t e;
+                mpi_limb_t carry_limb;
+                struct karatsuba_ctx karactx;
+                xp = xp_marker = mpi_alloc_limb_space(2 * (msize + 1));
+                if (!xp)
+                        goto enomem;
+                memset(&karactx, 0, sizeof karactx);
+                negative_result = (ep[0] & 1) && base->sign;
+                i = esize - 1;
+                e = ep[i];
+                count_leading_zeros(c, e);
+                e = (e << c) << 1;      /* shift the exp bits to the left, lose msb */
+                c = BITS_PER_MPI_LIMB - 1 - c;
+                /* Main loop.
+                 *
+                 * Make the result be pointed to alternately by XP and RP.  This
+                 * helps us avoid block copying, which would otherwise be necessary
+                 * with the overlap restrictions of mpihelp_divmod. With 50% probability
+                 * the result after this loop will be in the area originally pointed
+                 * by RP (==RES->d), and with 50% probability in the area originally
+                 * pointed to by XP.
+                 */
+                for (;;) {
+                        while (c) {
+                                mpi_ptr_t tp;
+                                mpi_size_t xsize;
+                                /*if (mpihelp_mul_n(xp, rp, rp, rsize) < 0) goto enomem */
+                                if (rsize < KARATSUBA_THRESHOLD)
+                                        mpih_sqr_n_basecase(xp, rp, rsize);
+                                else {
+                                        if (!tspace) {
+                                                tsize = 2 * rsize;
+                                                tspace =
+                                                    mpi_alloc_limb_space(tsize);
+                                                if (!tspace)
+                                                        goto enomem;
+                                        } else if (tsize < (2 * rsize)) {
+                                                mpi_free_limb_space(tspace);
+                                                tsize = 2 * rsize;
+                                                tspace =
+                                                    mpi_alloc_limb_space(tsize);
+                                                if (!tspace)
+                                                        goto enomem;
+                                        }
+                                        mpih_sqr_n(xp, rp, rsize, tspace);
+                                }
+                                xsize = 2 * rsize;
+                                if (xsize > msize) {
+                                        mpihelp_divrem(xp + msize, 0, xp, xsize,
+                                                       mp, msize);
+                                        xsize = msize;
+                                }
+                                tp = rp;
+                                rp = xp;
+                                xp = tp;
+                                rsize = xsize;
+                                if ((mpi_limb_signed_t) e < 0) {
+                                        /*mpihelp_mul( xp, rp, rsize, bp, bsize ); */
+                                        if (bsize < KARATSUBA_THRESHOLD) {
+                                                mpi_limb_t tmp;
+                                                if (mpihelp_mul
+                                                    (xp, rp, rsize, bp, bsize,
+                                                     &tmp) < 0)
+                                                        goto enomem;
+                                        } else {
+                                                if (mpihelp_mul_karatsuba_case
+                                                    (xp, rp, rsize, bp, bsize,
+                                                     &karactx) < 0)
+                                                        goto enomem;
+                                        }
+                                        xsize = rsize + bsize;
+                                        if (xsize > msize) {
+                                                mpihelp_divrem(xp + msize, 0,
+                                                               xp, xsize, mp,
+                                                               msize);
+                                                xsize = msize;
+                                        }
+                                        tp = rp;
+                                        rp = xp;
+                                        xp = tp;
+                                        rsize = xsize;
+                                }
+                                e <<= 1;
+                                c--;
+                        }
+                        i--;
+                        if (i < 0)
+                                break;
+                        e = ep[i];
+                        c = BITS_PER_MPI_LIMB;
+                }
+                /* We shifted MOD, the modulo reduction argument, left MOD_SHIFT_CNT
+                 * steps.  Adjust the result by reducing it with the original MOD.
+                 *
+                 * Also make sure the result is put in RES->d (where it already
+                 * might be, see above).
+                 */
+                if (mod_shift_cnt) {
+                        carry_limb =
+                            mpihelp_lshift(res->d, rp, rsize, mod_shift_cnt);
+                        rp = res->d;
+                        if (carry_limb) {
+                                rp[rsize] = carry_limb;
+                                rsize++;
+                        }
+                } else {
+                        MPN_COPY(res->d, rp, rsize);
+                        rp = res->d;
+                }
+                if (rsize >= msize) {
+                        mpihelp_divrem(rp + msize, 0, rp, rsize, mp, msize);
+                        rsize = msize;
+                }
+                /* Remove any leading zero words from the result.  */
+                if (mod_shift_cnt)
+                        mpihelp_rshift(rp, rp, rsize, mod_shift_cnt);
+                MPN_NORMALIZE(rp, rsize);
+                mpihelp_release_karatsuba_ctx(&karactx);
+        }
+        if (negative_result && rsize) {
+                if (mod_shift_cnt)
+                        mpihelp_rshift(mp, mp, msize, mod_shift_cnt);
+                mpihelp_sub(rp, mp, msize, rp, rsize);
+                rsize = msize;
+                rsign = msign;
+                MPN_NORMALIZE(rp, rsize);
+        }
+        res->nlimbs = rsize;
+        res->sign = rsign;
+leave:
+        rc = 0;
+enomem:
+        if (assign_rp)
+                mpi_assign_limb_space(res, rp, size);
+        if (mp_marker)
+                mpi_free_limb_space(mp_marker);
+        if (bp_marker)
+                mpi_free_limb_space(bp_marker);
+        if (ep_marker)
+                mpi_free_limb_space(ep_marker);
+        if (xp_marker)
+                mpi_free_limb_space(xp_marker);
+        if (tspace)
+                mpi_free_limb_space(tspace);
+        return rc;
+}
+EXPORT_SYMBOL_GPL(mpi_powm);
diff --git a/lib/mpi/mpicoder.c b/lib/mpi/mpicoder.c
new file mode 100644
index 000000000000..f0fa65995800
--- /dev/null
+++ b/lib/mpi/mpicoder.c
@@ -0,0 +1,205 @@
+/* mpicoder.c  -  Coder for the external representation of MPIs
+ * Copyright (C) 1998, 1999 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+#include "mpi-internal.h"
+#define MAX_EXTERN_MPI_BITS 16384
+MPI mpi_read_from_buffer(const void *xbuffer, unsigned *ret_nread)
+{
+        const uint8_t *buffer = xbuffer;
+        int i, j;
+        unsigned nbits, nbytes, nlimbs, nread = 0;
+        mpi_limb_t a;
+        MPI val = NULL;
+        if (*ret_nread < 2)
+                goto leave;
+        nbits = buffer[0] << 8 | buffer[1];
+        if (nbits > MAX_EXTERN_MPI_BITS) {
+                pr_info("MPI: mpi too large (%u bits)\n", nbits);
+                goto leave;
+        }
+        buffer += 2;
+        nread = 2;
+        nbytes = (nbits + 7) / 8;
+        nlimbs = (nbytes + BYTES_PER_MPI_LIMB - 1) / BYTES_PER_MPI_LIMB;
+        val = mpi_alloc(nlimbs);
+        if (!val)
+                return NULL;
+        i = BYTES_PER_MPI_LIMB - nbytes % BYTES_PER_MPI_LIMB;
+        i %= BYTES_PER_MPI_LIMB;
+        val->nbits = nbits;
+        j = val->nlimbs = nlimbs;
+        val->sign = 0;
+        for (; j > 0; j--) {
+                a = 0;
+                for (; i < BYTES_PER_MPI_LIMB; i++) {
+                        if (++nread > *ret_nread) {
+                                printk
+                                    ("MPI: mpi larger than buffer nread=%d ret_nread=%d\n",
+                                     nread, *ret_nread);
+                                goto leave;
+                        }
+                        a <<= 8;
+                        a |= *buffer++;
+                }
+                i = 0;
+                val->d[j - 1] = a;
+        }
+leave:
+        *ret_nread = nread;
+        return val;
+}
+EXPORT_SYMBOL_GPL(mpi_read_from_buffer);
+/****************
+ * Return an allocated buffer with the MPI (msb first).
+ * NBYTES receives the length of this buffer. Caller must free the
+ * return string (This function does return a 0 byte buffer with NBYTES
+ * set to zero if the value of A is zero. If sign is not NULL, it will
+ * be set to the sign of the A.
+ */
+void *mpi_get_buffer(MPI a, unsigned *nbytes, int *sign)
+{
+        uint8_t *p, *buffer;
+        mpi_limb_t alimb;
+        int i;
+        unsigned int n;
+        if (sign)
+                *sign = a->sign;
+        *nbytes = n = a->nlimbs * BYTES_PER_MPI_LIMB;
+        if (!n)
+                n++;            /* avoid zero length allocation */
+        p = buffer = kmalloc(n, GFP_KERNEL);
+        if (!p)
+                return NULL;
+        for (i = a->nlimbs - 1; i >= 0; i--) {
+                alimb = a->d[i];
+#if BYTES_PER_MPI_LIMB == 4
+                *p++ = alimb >> 24;
+                *p++ = alimb >> 16;
+                *p++ = alimb >> 8;
+                *p++ = alimb;
+#elif BYTES_PER_MPI_LIMB == 8
+                *p++ = alimb >> 56;
+                *p++ = alimb >> 48;
+                *p++ = alimb >> 40;
+                *p++ = alimb >> 32;
+                *p++ = alimb >> 24;
+                *p++ = alimb >> 16;
+                *p++ = alimb >> 8;
+                *p++ = alimb;
+#else
+#error please implement for this limb size.
+#endif
+        }
+        /* this is sub-optimal but we need to do the shift operation
+         * because the caller has to free the returned buffer */
+        for (p = buffer; !*p && *nbytes; p++, --*nbytes)
+                ;
+        if (p != buffer)
+                memmove(buffer, p, *nbytes);
+        return buffer;
+}
+EXPORT_SYMBOL_GPL(mpi_get_buffer);
+/****************
+ * Use BUFFER to update MPI.
+ */
+int mpi_set_buffer(MPI a, const void *xbuffer, unsigned nbytes, int sign)
+{
+        const uint8_t *buffer = xbuffer, *p;
+        mpi_limb_t alimb;
+        int nlimbs;
+        int i;
+        nlimbs = (nbytes + BYTES_PER_MPI_LIMB - 1) / BYTES_PER_MPI_LIMB;
+        if (RESIZE_IF_NEEDED(a, nlimbs) < 0)
+                return -ENOMEM;
+        a->sign = sign;
+        for (i = 0, p = buffer + nbytes - 1; p >= buffer + BYTES_PER_MPI_LIMB;) {
+#if BYTES_PER_MPI_LIMB == 4
+                alimb = (mpi_limb_t) *p--;
+                alimb |= (mpi_limb_t) *p-- << 8;
+                alimb |= (mpi_limb_t) *p-- << 16;
+                alimb |= (mpi_limb_t) *p-- << 24;
+#elif BYTES_PER_MPI_LIMB == 8
+                alimb = (mpi_limb_t) *p--;
+                alimb |= (mpi_limb_t) *p-- << 8;
+                alimb |= (mpi_limb_t) *p-- << 16;
+                alimb |= (mpi_limb_t) *p-- << 24;
+                alimb |= (mpi_limb_t) *p-- << 32;
+                alimb |= (mpi_limb_t) *p-- << 40;
+                alimb |= (mpi_limb_t) *p-- << 48;
+                alimb |= (mpi_limb_t) *p-- << 56;
+#else
+#error please implement for this limb size.
+#endif
+                a->d[i++] = alimb;
+        }
+        if (p >= buffer) {
+#if BYTES_PER_MPI_LIMB == 4
+                alimb = *p--;
+                if (p >= buffer)
+                        alimb |= (mpi_limb_t) *p-- << 8;
+                if (p >= buffer)
+                        alimb |= (mpi_limb_t) *p-- << 16;
+                if (p >= buffer)
+                        alimb |= (mpi_limb_t) *p-- << 24;
+#elif BYTES_PER_MPI_LIMB == 8
+                alimb = (mpi_limb_t) *p--;
+                if (p >= buffer)
+                        alimb |= (mpi_limb_t) *p-- << 8;
+                if (p >= buffer)
+                        alimb |= (mpi_limb_t) *p-- << 16;
+                if (p >= buffer)
+                        alimb |= (mpi_limb_t) *p-- << 24;
+                if (p >= buffer)
+                        alimb |= (mpi_limb_t) *p-- << 32;
+                if (p >= buffer)
+                        alimb |= (mpi_limb_t) *p-- << 40;
+                if (p >= buffer)
+                        alimb |= (mpi_limb_t) *p-- << 48;
+                if (p >= buffer)
+                        alimb |= (mpi_limb_t) *p-- << 56;
+#else
+#error please implement for this limb size.
+#endif
+                a->d[i++] = alimb;
+        }
+        a->nlimbs = i;
+        if (i != nlimbs) {
+                pr_emerg("MPI: mpi_set_buffer: Assertion failed (%d != %d)", i,
+                       nlimbs);
+                BUG();
+        }
+        return 0;
+}
+EXPORT_SYMBOL_GPL(mpi_set_buffer);
diff --git a/lib/mpi/mpih-cmp.c b/lib/mpi/mpih-cmp.c
new file mode 100644
index 000000000000..b2fd39677f1b
--- /dev/null
+++ b/lib/mpi/mpih-cmp.c
@@ -0,0 +1,56 @@
+/* mpihelp-sub.c  -  MPI helper functions
+ *      Copyright (C) 1994, 1996 Free Software Foundation, Inc.
+ *      Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *       Actually it's the same code with only minor changes in the
+ *       way the data is stored; this is to support the abstraction
+ *       of an optional secure memory allocation which may be used
+ *       to avoid revealing of sensitive data due to paging etc.
+ *       The GNU MP Library itself is published under the LGPL;
+ *       however I decided to publish this code under the plain GPL.
+ */
+#include "mpi-internal.h"
+/****************
+ * Compare OP1_PTR/OP1_SIZE with OP2_PTR/OP2_SIZE.
+ * There are no restrictions on the relative sizes of
+ * the two arguments.
+ * Return 1 if OP1 > OP2, 0 if they are equal, and -1 if OP1 < OP2.
+ */
+int mpihelp_cmp(mpi_ptr_t op1_ptr, mpi_ptr_t op2_ptr, mpi_size_t size)
+{
+        mpi_size_t i;
+        mpi_limb_t op1_word, op2_word;
+        for (i = size - 1; i >= 0; i--) {
+                op1_word = op1_ptr[i];
+                op2_word = op2_ptr[i];
+                if (op1_word != op2_word)
+                        goto diff;
+        }
+        return 0;
+diff:
+        /* This can *not* be simplified to
+         *   op2_word - op2_word
+         * since that expression might give signed overflow.  */
+        return (op1_word > op2_word) ? 1 : -1;
+}
diff --git a/lib/mpi/mpih-div.c b/lib/mpi/mpih-div.c
new file mode 100644
index 000000000000..c57d1d46295e
--- /dev/null
+++ b/lib/mpi/mpih-div.c
@@ -0,0 +1,236 @@
+/* mpihelp-div.c  -  MPI helper functions
+ *      Copyright (C) 1994, 1996 Free Software Foundation, Inc.
+ *      Copyright (C) 1998, 1999 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *       Actually it's the same code with only minor changes in the
+ *       way the data is stored; this is to support the abstraction
+ *       of an optional secure memory allocation which may be used
+ *       to avoid revealing of sensitive data due to paging etc.
+ *       The GNU MP Library itself is published under the LGPL;
+ *       however I decided to publish this code under the plain GPL.
+ */
+#include "mpi-internal.h"
+#include "longlong.h"
+#ifndef UMUL_TIME
+#define UMUL_TIME 1
+#endif
+#ifndef UDIV_TIME
+#define UDIV_TIME UMUL_TIME
+#endif
+/* Divide num (NP/NSIZE) by den (DP/DSIZE) and write
+ * the NSIZE-DSIZE least significant quotient limbs at QP
+ * and the DSIZE long remainder at NP.  If QEXTRA_LIMBS is
+ * non-zero, generate that many fraction bits and append them after the
+ * other quotient limbs.
+ * Return the most significant limb of the quotient, this is always 0 or 1.
+ *
+ * Preconditions:
+ * 0. NSIZE >= DSIZE.
+ * 1. The most significant bit of the divisor must be set.
+ * 2. QP must either not overlap with the input operands at all, or
+ *    QP + DSIZE >= NP must hold true.  (This means that it's
+ *    possible to put the quotient in the high part of NUM, right after the
+ *    remainder in NUM.
+ * 3. NSIZE >= DSIZE, even if QEXTRA_LIMBS is non-zero.
+ */
+mpi_limb_t
+mpihelp_divrem(mpi_ptr_t qp, mpi_size_t qextra_limbs,
+               mpi_ptr_t np, mpi_size_t nsize, mpi_ptr_t dp, mpi_size_t dsize)
+{
+        mpi_limb_t most_significant_q_limb = 0;
+        switch (dsize) {
+        case 0:
+                /* We are asked to divide by zero, so go ahead and do it!  (To make
+                   the compiler not remove this statement, return the value.)  */
+                /*
+                 * existing clients of this function have been modified
+                 * not to call it with dsize == 0, so this should not happen
+                 */
+                return 1 / dsize;
+        case 1:
+                {
+                        mpi_size_t i;
+                        mpi_limb_t n1;
+                        mpi_limb_t d;
+                        d = dp[0];
+                        n1 = np[nsize - 1];
+                        if (n1 >= d) {
+                                n1 -= d;
+                                most_significant_q_limb = 1;
+                        }
+                        qp += qextra_limbs;
+                        for (i = nsize - 2; i >= 0; i--)
+                                udiv_qrnnd(qp[i], n1, n1, np[i], d);
+                        qp -= qextra_limbs;
+                        for (i = qextra_limbs - 1; i >= 0; i--)
+                                udiv_qrnnd(qp[i], n1, n1, 0, d);
+                        np[0] = n1;
+                }
+                break;
+        case 2:
+                {
+                        mpi_size_t i;
+                        mpi_limb_t n1, n0, n2;
+                        mpi_limb_t d1, d0;
+                        np += nsize - 2;
+                        d1 = dp[1];
+                        d0 = dp[0];
+                        n1 = np[1];
+                        n0 = np[0];
+                        if (n1 >= d1 && (n1 > d1 || n0 >= d0)) {
+                                sub_ddmmss(n1, n0, n1, n0, d1, d0);
+                                most_significant_q_limb = 1;
+                        }
+                        for (i = qextra_limbs + nsize - 2 - 1; i >= 0; i--) {
+                                mpi_limb_t q;
+                                mpi_limb_t r;
+                                if (i >= qextra_limbs)
+                                        np--;
+                                else
+                                        np[0] = 0;
+                                if (n1 == d1) {
+                                        /* Q should be either 111..111 or 111..110.  Need special
+                                         * treatment of this rare case as normal division would
+                                         * give overflow.  */
+                                        q = ~(mpi_limb_t) 0;
+                                        r = n0 + d1;
+                                        if (r < d1) {   /* Carry in the addition? */
+                                                add_ssaaaa(n1, n0, r - d0,
+                                                           np[0], 0, d0);
+                                                qp[i] = q;
+                                                continue;
+                                        }
+                                        n1 = d0 - (d0 != 0 ? 1 : 0);
+                                        n0 = -d0;
+                                } else {
+                                        udiv_qrnnd(q, r, n1, n0, d1);
+                                        umul_ppmm(n1, n0, d0, q);
+                                }
+                                n2 = np[0];
+q_test:
+                                if (n1 > r || (n1 == r && n0 > n2)) {
+                                        /* The estimated Q was too large.  */
+                                        q--;
+                                        sub_ddmmss(n1, n0, n1, n0, 0, d0);
+                                        r += d1;
+                                        if (r >= d1)    /* If not carry, test Q again.  */
+                                                goto q_test;
+                                }
+                                qp[i] = q;
+                                sub_ddmmss(n1, n0, r, n2, n1, n0);
+                        }
+                        np[1] = n1;
+                        np[0] = n0;
+                }
+                break;
+        default:
+                {
+                        mpi_size_t i;
+                        mpi_limb_t dX, d1, n0;
+                        np += nsize - dsize;
+                        dX = dp[dsize - 1];
+                        d1 = dp[dsize - 2];
+                        n0 = np[dsize - 1];
+                        if (n0 >= dX) {
+                                if (n0 > dX
+                                    || mpihelp_cmp(np, dp, dsize - 1) >= 0) {
+                                        mpihelp_sub_n(np, np, dp, dsize);
+                                        n0 = np[dsize - 1];
+                                        most_significant_q_limb = 1;
+                                }
+                        }
+                        for (i = qextra_limbs + nsize - dsize - 1; i >= 0; i--) {
+                                mpi_limb_t q;
+                                mpi_limb_t n1, n2;
+                                mpi_limb_t cy_limb;
+                                if (i >= qextra_limbs) {
+                                        np--;
+                                        n2 = np[dsize];
+                                } else {
+                                        n2 = np[dsize - 1];
+                                        MPN_COPY_DECR(np + 1, np, dsize - 1);
+                                        np[0] = 0;
+                                }
+                                if (n0 == dX) {
+                                        /* This might over-estimate q, but it's probably not worth
+                                         * the extra code here to find out.  */
+                                        q = ~(mpi_limb_t) 0;
+                                } else {
+                                        mpi_limb_t r;
+                                        udiv_qrnnd(q, r, n0, np[dsize - 1], dX);
+                                        umul_ppmm(n1, n0, d1, q);
+                                        while (n1 > r
+                                               || (n1 == r
+                                                   && n0 > np[dsize - 2])) {
+                                                q--;
+                                                r += dX;
+                                                if (r < dX)     /* I.e. "carry in previous addition?" */
+                                                        break;
+                                                n1 -= n0 < d1;
+                                                n0 -= d1;
+                                        }
+                                }
+                                /* Possible optimization: We already have (q * n0) and (1 * n1)
+                                 * after the calculation of q.  Taking advantage of that, we
+                                 * could make this loop make two iterations less.  */
+                                cy_limb = mpihelp_submul_1(np, dp, dsize, q);
+                                if (n2 != cy_limb) {
+                                        mpihelp_add_n(np, np, dp, dsize);
+                                        q--;
+                                }
+                                qp[i] = q;
+                                n0 = np[dsize - 1];
+                        }
+                }
+        }
+        return most_significant_q_limb;
+}
diff --git a/lib/mpi/mpih-mul.c b/lib/mpi/mpih-mul.c
new file mode 100644
index 000000000000..7c841719fdfb
--- /dev/null
+++ b/lib/mpi/mpih-mul.c
@@ -0,0 +1,497 @@
+/* mpihelp-mul.c  -  MPI helper functions
+ * Copyright (C) 1994, 1996, 1998, 1999,
+ *               2000 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *       Actually it's the same code with only minor changes in the
+ *       way the data is stored; this is to support the abstraction
+ *       of an optional secure memory allocation which may be used
+ *       to avoid revealing of sensitive data due to paging etc.
+ *       The GNU MP Library itself is published under the LGPL;
+ *       however I decided to publish this code under the plain GPL.
+ */
+#include <linux/string.h>
+#include "mpi-internal.h"
+#include "longlong.h"
+#define MPN_MUL_N_RECURSE(prodp, up, vp, size, tspace)          \
+        do {                                                    \
+                if ((size) < KARATSUBA_THRESHOLD)               \
+                        mul_n_basecase(prodp, up, vp, size);    \
+                else                                            \
+                        mul_n(prodp, up, vp, size, tspace);     \
+        } while (0);
+#define MPN_SQR_N_RECURSE(prodp, up, size, tspace)              \
+        do {                                                    \
+                if ((size) < KARATSUBA_THRESHOLD)               \
+                        mpih_sqr_n_basecase(prodp, up, size);   \
+                else                                            \
+                        mpih_sqr_n(prodp, up, size, tspace);    \
+        } while (0);
+/* Multiply the natural numbers u (pointed to by UP) and v (pointed to by VP),
+ * both with SIZE limbs, and store the result at PRODP.  2 * SIZE limbs are
+ * always stored.  Return the most significant limb.
+ *
+ * Argument constraints:
+ * 1. PRODP != UP and PRODP != VP, i.e. the destination
+ *    must be distinct from the multiplier and the multiplicand.
+ *
+ *
+ * Handle simple cases with traditional multiplication.
+ *
+ * This is the most critical code of multiplication.  All multiplies rely
+ * on this, both small and huge.  Small ones arrive here immediately.  Huge
+ * ones arrive here as this is the base case for Karatsuba's recursive
+ * algorithm below.
+ */
+static mpi_limb_t
+mul_n_basecase(mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size)
+{
+        mpi_size_t i;
+        mpi_limb_t cy;
+        mpi_limb_t v_limb;
+        /* Multiply by the first limb in V separately, as the result can be
+         * stored (not added) to PROD.  We also avoid a loop for zeroing.  */
+        v_limb = vp[0];
+        if (v_limb <= 1) {
+                if (v_limb == 1)
+                        MPN_COPY(prodp, up, size);
+                else
+                        MPN_ZERO(prodp, size);
+                cy = 0;
+        } else
+                cy = mpihelp_mul_1(prodp, up, size, v_limb);
+        prodp[size] = cy;
+        prodp++;
+        /* For each iteration in the outer loop, multiply one limb from
+         * U with one limb from V, and add it to PROD.  */
+        for (i = 1; i < size; i++) {
+                v_limb = vp[i];
+                if (v_limb <= 1) {
+                        cy = 0;
+                        if (v_limb == 1)
+                                cy = mpihelp_add_n(prodp, prodp, up, size);
+                } else
+                        cy = mpihelp_addmul_1(prodp, up, size, v_limb);
+                prodp[size] = cy;
+                prodp++;
+        }
+        return cy;
+}
+static void
+mul_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp,
+                mpi_size_t size, mpi_ptr_t tspace)
+{
+        if (size & 1) {
+                /* The size is odd, and the code below doesn't handle that.
+                 * Multiply the least significant (size - 1) limbs with a recursive
+                 * call, and handle the most significant limb of S1 and S2
+                 * separately.
+                 * A slightly faster way to do this would be to make the Karatsuba
+                 * code below behave as if the size were even, and let it check for
+                 * odd size in the end.  I.e., in essence move this code to the end.
+                 * Doing so would save us a recursive call, and potentially make the
+                 * stack grow a lot less.
+                 */
+                mpi_size_t esize = size - 1;    /* even size */
+                mpi_limb_t cy_limb;
+                MPN_MUL_N_RECURSE(prodp, up, vp, esize, tspace);
+                cy_limb = mpihelp_addmul_1(prodp + esize, up, esize, vp[esize]);
+                prodp[esize + esize] = cy_limb;
+                cy_limb = mpihelp_addmul_1(prodp + esize, vp, size, up[esize]);
+                prodp[esize + size] = cy_limb;
+        } else {
+                /* Anatolij Alekseevich Karatsuba's divide-and-conquer algorithm.
+                 *
+                 * Split U in two pieces, U1 and U0, such that
+                 * U = U0 + U1*(B**n),
+                 * and V in V1 and V0, such that
+                 * V = V0 + V1*(B**n).
+                 *
+                 * UV is then computed recursively using the identity
+                 *
+                 *        2n   n          n                     n
+                 * UV = (B  + B )U V  +  B (U -U )(V -V )  +  (B + 1)U V
+                 *                1 1        1  0   0  1              0 0
+                 *
+                 * Where B = 2**BITS_PER_MP_LIMB.
+                 */
+                mpi_size_t hsize = size >> 1;
+                mpi_limb_t cy;
+                int negflg;
+                /* Product H.      ________________  ________________
+                 *                |_____U1 x V1____||____U0 x V0_____|
+                 * Put result in upper part of PROD and pass low part of TSPACE
+                 * as new TSPACE.
+                 */
+                MPN_MUL_N_RECURSE(prodp + size, up + hsize, vp + hsize, hsize,
+                                  tspace);
+                /* Product M.      ________________
+                 *                |_(U1-U0)(V0-V1)_|
+                 */
+                if (mpihelp_cmp(up + hsize, up, hsize) >= 0) {
+                        mpihelp_sub_n(prodp, up + hsize, up, hsize);
+                        negflg = 0;
+                } else {
+                        mpihelp_sub_n(prodp, up, up + hsize, hsize);
+                        negflg = 1;
+                }
+                if (mpihelp_cmp(vp + hsize, vp, hsize) >= 0) {
+                        mpihelp_sub_n(prodp + hsize, vp + hsize, vp, hsize);
+                        negflg ^= 1;
+                } else {
+                        mpihelp_sub_n(prodp + hsize, vp, vp + hsize, hsize);
+                        /* No change of NEGFLG.  */
+                }
+                /* Read temporary operands from low part of PROD.
+                 * Put result in low part of TSPACE using upper part of TSPACE
+                 * as new TSPACE.
+                 */
+                MPN_MUL_N_RECURSE(tspace, prodp, prodp + hsize, hsize,
+                                  tspace + size);
+                /* Add/copy product H. */
+                MPN_COPY(prodp + hsize, prodp + size, hsize);
+                cy = mpihelp_add_n(prodp + size, prodp + size,
+                                   prodp + size + hsize, hsize);
+                /* Add product M (if NEGFLG M is a negative number) */
+                if (negflg)
+                        cy -=
+                            mpihelp_sub_n(prodp + hsize, prodp + hsize, tspace,
+                                          size);
+                else
+                        cy +=
+                            mpihelp_add_n(prodp + hsize, prodp + hsize, tspace,
+                                          size);
+                /* Product L.      ________________  ________________
+                 *                |________________||____U0 x V0_____|
+                 * Read temporary operands from low part of PROD.
+                 * Put result in low part of TSPACE using upper part of TSPACE
+                 * as new TSPACE.
+                 */
+                MPN_MUL_N_RECURSE(tspace, up, vp, hsize, tspace + size);
+                /* Add/copy Product L (twice) */
+                cy += mpihelp_add_n(prodp + hsize, prodp + hsize, tspace, size);
+                if (cy)
+                        mpihelp_add_1(prodp + hsize + size,
+                                      prodp + hsize + size, hsize, cy);
+                MPN_COPY(prodp, tspace, hsize);
+                cy = mpihelp_add_n(prodp + hsize, prodp + hsize, tspace + hsize,
+                                   hsize);
+                if (cy)
+                        mpihelp_add_1(prodp + size, prodp + size, size, 1);
+        }
+}
+void mpih_sqr_n_basecase(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size)
+{
+        mpi_size_t i;
+        mpi_limb_t cy_limb;
+        mpi_limb_t v_limb;
+        /* Multiply by the first limb in V separately, as the result can be
+         * stored (not added) to PROD.  We also avoid a loop for zeroing.  */
+        v_limb = up[0];
+        if (v_limb <= 1) {
+                if (v_limb == 1)
+                        MPN_COPY(prodp, up, size);
+                else
+                        MPN_ZERO(prodp, size);
+                cy_limb = 0;
+        } else
+                cy_limb = mpihelp_mul_1(prodp, up, size, v_limb);
+        prodp[size] = cy_limb;
+        prodp++;
+        /* For each iteration in the outer loop, multiply one limb from
+         * U with one limb from V, and add it to PROD.  */
+        for (i = 1; i < size; i++) {
+                v_limb = up[i];
+                if (v_limb <= 1) {
+                        cy_limb = 0;
+                        if (v_limb == 1)
+                                cy_limb = mpihelp_add_n(prodp, prodp, up, size);
+                } else
+                        cy_limb = mpihelp_addmul_1(prodp, up, size, v_limb);
+                prodp[size] = cy_limb;
+                prodp++;
+        }
+}
+void
+mpih_sqr_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size, mpi_ptr_t tspace)
+{
+        if (size & 1) {
+                /* The size is odd, and the code below doesn't handle that.
+                 * Multiply the least significant (size - 1) limbs with a recursive
+                 * call, and handle the most significant limb of S1 and S2
+                 * separately.
+                 * A slightly faster way to do this would be to make the Karatsuba
+                 * code below behave as if the size were even, and let it check for
+                 * odd size in the end.  I.e., in essence move this code to the end.
+                 * Doing so would save us a recursive call, and potentially make the
+                 * stack grow a lot less.
+                 */
+                mpi_size_t esize = size - 1;    /* even size */
+                mpi_limb_t cy_limb;
+                MPN_SQR_N_RECURSE(prodp, up, esize, tspace);
+                cy_limb = mpihelp_addmul_1(prodp + esize, up, esize, up[esize]);
+                prodp[esize + esize] = cy_limb;
+                cy_limb = mpihelp_addmul_1(prodp + esize, up, size, up[esize]);
+                prodp[esize + size] = cy_limb;
+        } else {
+                mpi_size_t hsize = size >> 1;
+                mpi_limb_t cy;
+                /* Product H.      ________________  ________________
+                 *                |_____U1 x U1____||____U0 x U0_____|
+                 * Put result in upper part of PROD and pass low part of TSPACE
+                 * as new TSPACE.
+                 */
+                MPN_SQR_N_RECURSE(prodp + size, up + hsize, hsize, tspace);
+                /* Product M.      ________________
+                 *                |_(U1-U0)(U0-U1)_|
+                 */
+                if (mpihelp_cmp(up + hsize, up, hsize) >= 0)
+                        mpihelp_sub_n(prodp, up + hsize, up, hsize);
+                else
+                        mpihelp_sub_n(prodp, up, up + hsize, hsize);
+                /* Read temporary operands from low part of PROD.
+                 * Put result in low part of TSPACE using upper part of TSPACE
+                 * as new TSPACE.  */
+                MPN_SQR_N_RECURSE(tspace, prodp, hsize, tspace + size);
+                /* Add/copy product H  */
+                MPN_COPY(prodp + hsize, prodp + size, hsize);
+                cy = mpihelp_add_n(prodp + size, prodp + size,
+                                   prodp + size + hsize, hsize);
+                /* Add product M (if NEGFLG M is a negative number).  */
+                cy -= mpihelp_sub_n(prodp + hsize, prodp + hsize, tspace, size);
+                /* Product L.      ________________  ________________
+                 *                |________________||____U0 x U0_____|
+                 * Read temporary operands from low part of PROD.
+                 * Put result in low part of TSPACE using upper part of TSPACE
+                 * as new TSPACE.  */
+                MPN_SQR_N_RECURSE(tspace, up, hsize, tspace + size);
+                /* Add/copy Product L (twice).  */
+                cy += mpihelp_add_n(prodp + hsize, prodp + hsize, tspace, size);
+                if (cy)
+                        mpihelp_add_1(prodp + hsize + size,
+                                      prodp + hsize + size, hsize, cy);
+                MPN_COPY(prodp, tspace, hsize);
+                cy = mpihelp_add_n(prodp + hsize, prodp + hsize, tspace + hsize,
+                                   hsize);
+                if (cy)
+                        mpihelp_add_1(prodp + size, prodp + size, size, 1);
+        }
+}
+int
+mpihelp_mul_karatsuba_case(mpi_ptr_t prodp,
+                           mpi_ptr_t up, mpi_size_t usize,
+                           mpi_ptr_t vp, mpi_size_t vsize,
+                           struct karatsuba_ctx *ctx)
+{
+        mpi_limb_t cy;
+        if (!ctx->tspace || ctx->tspace_size < vsize) {
+                if (ctx->tspace)
+                        mpi_free_limb_space(ctx->tspace);
+                ctx->tspace = mpi_alloc_limb_space(2 * vsize);
+                if (!ctx->tspace)
+                        return -ENOMEM;
+                ctx->tspace_size = vsize;
+        }
+        MPN_MUL_N_RECURSE(prodp, up, vp, vsize, ctx->tspace);
+        prodp += vsize;
+        up += vsize;
+        usize -= vsize;
+        if (usize >= vsize) {
+                if (!ctx->tp || ctx->tp_size < vsize) {
+                        if (ctx->tp)
+                                mpi_free_limb_space(ctx->tp);
+                        ctx->tp = mpi_alloc_limb_space(2 * vsize);
+                        if (!ctx->tp) {
+                                if (ctx->tspace)
+                                        mpi_free_limb_space(ctx->tspace);
+                                ctx->tspace = NULL;
+                                return -ENOMEM;
+                        }
+                        ctx->tp_size = vsize;
+                }
+                do {
+                        MPN_MUL_N_RECURSE(ctx->tp, up, vp, vsize, ctx->tspace);
+                        cy = mpihelp_add_n(prodp, prodp, ctx->tp, vsize);
+                        mpihelp_add_1(prodp + vsize, ctx->tp + vsize, vsize,
+                                      cy);
+                        prodp += vsize;
+                        up += vsize;
+                        usize -= vsize;
+                } while (usize >= vsize);
+        }
+        if (usize) {
+                if (usize < KARATSUBA_THRESHOLD) {
+                        mpi_limb_t tmp;
+                        if (mpihelp_mul(ctx->tspace, vp, vsize, up, usize, &tmp)
+                            < 0)
+                                return -ENOMEM;
+                } else {
+                        if (!ctx->next) {
+                                ctx->next = kzalloc(sizeof *ctx, GFP_KERNEL);
+                                if (!ctx->next)
+                                        return -ENOMEM;
+                        }
+                        if (mpihelp_mul_karatsuba_case(ctx->tspace,
+                                                       vp, vsize,
+                                                       up, usize,
+                                                       ctx->next) < 0)
+                                return -ENOMEM;
+                }
+                cy = mpihelp_add_n(prodp, prodp, ctx->tspace, vsize);
+                mpihelp_add_1(prodp + vsize, ctx->tspace + vsize, usize, cy);
+        }
+        return 0;
+}
+void mpihelp_release_karatsuba_ctx(struct karatsuba_ctx *ctx)
+{
+        struct karatsuba_ctx *ctx2;
+        if (ctx->tp)
+                mpi_free_limb_space(ctx->tp);
+        if (ctx->tspace)
+                mpi_free_limb_space(ctx->tspace);
+        for (ctx = ctx->next; ctx; ctx = ctx2) {
+                ctx2 = ctx->next;
+                if (ctx->tp)
+                        mpi_free_limb_space(ctx->tp);
+                if (ctx->tspace)
+                        mpi_free_limb_space(ctx->tspace);
+                kfree(ctx);
+        }
+}
+/* Multiply the natural numbers u (pointed to by UP, with USIZE limbs)
+ * and v (pointed to by VP, with VSIZE limbs), and store the result at
+ * PRODP.  USIZE + VSIZE limbs are always stored, but if the input
+ * operands are normalized.  Return the most significant limb of the
+ * result.
+ *
+ * NOTE: The space pointed to by PRODP is overwritten before finished
+ * with U and V, so overlap is an error.
+ *
+ * Argument constraints:
+ * 1. USIZE >= VSIZE.
+ * 2. PRODP != UP and PRODP != VP, i.e. the destination
+ *    must be distinct from the multiplier and the multiplicand.
+ */
+int
+mpihelp_mul(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t usize,
+            mpi_ptr_t vp, mpi_size_t vsize, mpi_limb_t *_result)
+{
+        mpi_ptr_t prod_endp = prodp + usize + vsize - 1;
+        mpi_limb_t cy;
+        struct karatsuba_ctx ctx;
+        if (vsize < KARATSUBA_THRESHOLD) {
+                mpi_size_t i;
+                mpi_limb_t v_limb;
+                if (!vsize) {
+                        *_result = 0;
+                        return 0;
+                }
+                /* Multiply by the first limb in V separately, as the result can be
+                 * stored (not added) to PROD.  We also avoid a loop for zeroing.  */
+                v_limb = vp[0];
+                if (v_limb <= 1) {
+                        if (v_limb == 1)
+                                MPN_COPY(prodp, up, usize);
+                        else
+                                MPN_ZERO(prodp, usize);
+                        cy = 0;
+                } else
+                        cy = mpihelp_mul_1(prodp, up, usize, v_limb);
+                prodp[usize] = cy;
+                prodp++;
+                /* For each iteration in the outer loop, multiply one limb from
+                 * U with one limb from V, and add it to PROD.  */
+                for (i = 1; i < vsize; i++) {
+                        v_limb = vp[i];
+                        if (v_limb <= 1) {
+                                cy = 0;
+                                if (v_limb == 1)
+                                        cy = mpihelp_add_n(prodp, prodp, up,
+                                                           usize);
+                        } else
+                                cy = mpihelp_addmul_1(prodp, up, usize, v_limb);
+                        prodp[usize] = cy;
+                        prodp++;
+                }
+                *_result = cy;
+                return 0;
+        }
+        memset(&ctx, 0, sizeof ctx);
+        if (mpihelp_mul_karatsuba_case(prodp, up, usize, vp, vsize, &ctx) < 0)
+                return -ENOMEM;
+        mpihelp_release_karatsuba_ctx(&ctx);
+        *_result = *prod_endp;
+        return 0;
+}
diff --git a/lib/mpi/mpiutil.c b/lib/mpi/mpiutil.c
new file mode 100644
index 000000000000..657979f71bef
--- /dev/null
+++ b/lib/mpi/mpiutil.c
@@ -0,0 +1,123 @@
+/* mpiutil.ac  -  Utility functions for MPI
+ * Copyright (C) 1998, 1999 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+#include "mpi-internal.h"
+/****************
+ * Note:  It was a bad idea to use the number of limbs to allocate
+ *        because on a alpha the limbs are large but we normally need
+ *        integers of n bits - So we should chnage this to bits (or bytes).
+ *
+ *        But mpi_alloc is used in a lot of places :-)
+ */
+MPI mpi_alloc(unsigned nlimbs)
+{
+        MPI a;
+        a = kmalloc(sizeof *a, GFP_KERNEL);
+        if (!a)
+                return a;
+        if (nlimbs) {
+                a->d = mpi_alloc_limb_space(nlimbs);
+                if (!a->d) {
+                        kfree(a);
+                        return NULL;
+                }
+        } else {
+                a->d = NULL;
+        }
+        a->alloced = nlimbs;
+        a->nlimbs = 0;
+        a->sign = 0;
+        a->flags = 0;
+        a->nbits = 0;
+        return a;
+}
+EXPORT_SYMBOL_GPL(mpi_alloc);
+mpi_ptr_t mpi_alloc_limb_space(unsigned nlimbs)
+{
+        size_t len = nlimbs * sizeof(mpi_limb_t);
+        if (!len)
+                return NULL;
+        return kmalloc(len, GFP_KERNEL);
+}
+void mpi_free_limb_space(mpi_ptr_t a)
+{
+        if (!a)
+                return;
+        kfree(a);
+}
+void mpi_assign_limb_space(MPI a, mpi_ptr_t ap, unsigned nlimbs)
+{
+        mpi_free_limb_space(a->d);
+        a->d = ap;
+        a->alloced = nlimbs;
+}
+/****************
+ * Resize the array of A to NLIMBS. the additional space is cleared
+ * (set to 0) [done by m_realloc()]
+ */
+int mpi_resize(MPI a, unsigned nlimbs)
+{
+        void *p;
+        if (nlimbs <= a->alloced)
+                return 0;       /* no need to do it */
+        if (a->d) {
+                p = kmalloc(nlimbs * sizeof(mpi_limb_t), GFP_KERNEL);
+                if (!p)
+                        return -ENOMEM;
+                memcpy(p, a->d, a->alloced * sizeof(mpi_limb_t));
+                kfree(a->d);
+                a->d = p;
+        } else {
+                a->d = kzalloc(nlimbs * sizeof(mpi_limb_t), GFP_KERNEL);
+                if (!a->d)
+                        return -ENOMEM;
+        }
+        a->alloced = nlimbs;
+        return 0;
+}
+void mpi_free(MPI a)
+{
+        if (!a)
+                return;
+        if (a->flags & 4)
+                kfree(a->d);
+        else
+                mpi_free_limb_space(a->d);
+        if (a->flags & ~7)
+                pr_info("invalid flag value in mpi\n");
+        kfree(a);
+}
+EXPORT_SYMBOL_GPL(mpi_free);