28 files changed, 5954 insertions, 0 deletions
diff --git a/lib/mpi/Makefile b/lib/mpi/Makefile
new file mode 100644
index 000000000000..567d52e74d77
--- /dev/null
+++ b/lib/mpi/Makefile
@@ -0,0 +1,32 @@
+#
+# MPI multiprecision maths library (from gpg)
+#
+obj-$(CONFIG_MPILIB) = mpi.o
+mpi-y = \
+        generic_mpih-lshift.o           \
+        generic_mpih-mul1.o             \
+        generic_mpih-mul2.o             \
+        generic_mpih-mul3.o             \
+        generic_mpih-rshift.o           \
+        generic_mpih-sub1.o             \
+        generic_mpih-add1.o             \
+        mpicoder.o                      \
+        mpi-bit.o                       \
+        mpih-cmp.o                      \
+        mpih-div.o                      \
+        mpih-mul.o                      \
+        mpi-pow.o                       \
+        mpiutil.o
+mpi-$(CONFIG_MPILIB_EXTRA) += \
+        mpi-add.o                       \
+        mpi-div.o                       \
+        mpi-cmp.o                       \
+        mpi-gcd.o                       \
+        mpi-inline.o                    \
+        mpi-inv.o                       \
+        mpi-mpow.o                      \
+        mpi-mul.o                       \
+        mpi-scan.o
diff --git a/lib/mpi/generic_mpi-asm-defs.h b/lib/mpi/generic_mpi-asm-defs.h
new file mode 100644
index 000000000000..047d1f5a7249
--- /dev/null
+++ b/lib/mpi/generic_mpi-asm-defs.h
@@ -0,0 +1,4 @@
+/* This file defines some basic constants for the MPI machinery.  We
+ * need to define the types on a per-CPU basis, so it is done with
+ * this file here.  */
+#define BYTES_PER_MPI_LIMB  (SIZEOF_UNSIGNED_LONG)
diff --git a/lib/mpi/generic_mpih-add1.c b/lib/mpi/generic_mpih-add1.c
new file mode 100644
index 000000000000..c94c7dd344b3
--- /dev/null
+++ b/lib/mpi/generic_mpih-add1.c
@@ -0,0 +1,61 @@
+/* mpihelp-add_1.c  -  MPI helper functions
+ * Copyright (C) 1994, 1996, 1997, 1998,
+ *               2000 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *       Actually it's the same code with only minor changes in the
+ *       way the data is stored; this is to support the abstraction
+ *       of an optional secure memory allocation which may be used
+ *       to avoid revealing of sensitive data due to paging etc.
+ *       The GNU MP Library itself is published under the LGPL;
+ *       however I decided to publish this code under the plain GPL.
+ */
+#include "mpi-internal.h"
+#include "longlong.h"
+mpi_limb_t
+mpihelp_add_n(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+              mpi_ptr_t s2_ptr, mpi_size_t size)
+{
+        mpi_limb_t x, y, cy;
+        mpi_size_t j;
+        /* The loop counter and index J goes from -SIZE to -1.  This way
+           the loop becomes faster.  */
+        j = -size;
+        /* Offset the base pointers to compensate for the negative indices. */
+        s1_ptr -= j;
+        s2_ptr -= j;
+        res_ptr -= j;
+        cy = 0;
+        do {
+                y = s2_ptr[j];
+                x = s1_ptr[j];
+                y += cy;        /* add previous carry to one addend */
+                cy = y < cy;    /* get out carry from that addition */
+                y += x;         /* add other addend */
+                cy += y < x;    /* get out carry from that add, combine */
+                res_ptr[j] = y;
+        } while (++j);
+        return cy;
+}
diff --git a/lib/mpi/generic_mpih-lshift.c b/lib/mpi/generic_mpih-lshift.c
new file mode 100644
index 000000000000..86318927231a
--- /dev/null
+++ b/lib/mpi/generic_mpih-lshift.c
@@ -0,0 +1,63 @@
+/* mpihelp-lshift.c  -  MPI helper functions
+ * Copyright (C) 1994, 1996, 1998, 2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *       Actually it's the same code with only minor changes in the
+ *       way the data is stored; this is to support the abstraction
+ *       of an optional secure memory allocation which may be used
+ *       to avoid revealing of sensitive data due to paging etc.
+ *       The GNU MP Library itself is published under the LGPL;
+ *       however I decided to publish this code under the plain GPL.
+ */
+#include "mpi-internal.h"
+/* Shift U (pointed to by UP and USIZE digits long) CNT bits to the left
+ * and store the USIZE least significant digits of the result at WP.
+ * Return the bits shifted out from the most significant digit.
+ *
+ * Argument constraints:
+ * 1. 0 < CNT < BITS_PER_MP_LIMB
+ * 2. If the result is to be written over the input, WP must be >= UP.
+ */
+mpi_limb_t
+mpihelp_lshift(mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize, unsigned int cnt)
+{
+        mpi_limb_t high_limb, low_limb;
+        unsigned sh_1, sh_2;
+        mpi_size_t i;
+        mpi_limb_t retval;
+        sh_1 = cnt;
+        wp += 1;
+        sh_2 = BITS_PER_MPI_LIMB - sh_1;
+        i = usize - 1;
+        low_limb = up[i];
+        retval = low_limb >> sh_2;
+        high_limb = low_limb;
+        while (--i >= 0) {
+                low_limb = up[i];
+                wp[i] = (high_limb << sh_1) | (low_limb >> sh_2);
+                high_limb = low_limb;
+        }
+        wp[i] = high_limb << sh_1;
+        return retval;
+}
diff --git a/lib/mpi/generic_mpih-mul1.c b/lib/mpi/generic_mpih-mul1.c
new file mode 100644
index 000000000000..1668dfd9092c
--- /dev/null
+++ b/lib/mpi/generic_mpih-mul1.c
@@ -0,0 +1,57 @@
+/* mpihelp-mul_1.c  -  MPI helper functions
+ * Copyright (C) 1994, 1996, 1997, 1998, 2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *       Actually it's the same code with only minor changes in the
+ *       way the data is stored; this is to support the abstraction
+ *       of an optional secure memory allocation which may be used
+ *       to avoid revealing of sensitive data due to paging etc.
+ *       The GNU MP Library itself is published under the LGPL;
+ *       however I decided to publish this code under the plain GPL.
+ */
+#include "mpi-internal.h"
+#include "longlong.h"
+mpi_limb_t
+mpihelp_mul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size,
+              mpi_limb_t s2_limb)
+{
+        mpi_limb_t cy_limb;
+        mpi_size_t j;
+        mpi_limb_t prod_high, prod_low;
+        /* The loop counter and index J goes from -S1_SIZE to -1.  This way
+         * the loop becomes faster.  */
+        j = -s1_size;
+        /* Offset the base pointers to compensate for the negative indices.  */
+        s1_ptr -= j;
+        res_ptr -= j;
+        cy_limb = 0;
+        do {
+                umul_ppmm(prod_high, prod_low, s1_ptr[j], s2_limb);
+                prod_low += cy_limb;
+                cy_limb = (prod_low < cy_limb ? 1 : 0) + prod_high;
+                res_ptr[j] = prod_low;
+        } while (++j);
+        return cy_limb;
+}
diff --git a/lib/mpi/generic_mpih-mul2.c b/lib/mpi/generic_mpih-mul2.c
new file mode 100644
index 000000000000..8a7b29ee1740
--- /dev/null
+++ b/lib/mpi/generic_mpih-mul2.c
@@ -0,0 +1,60 @@
+/* mpihelp-mul_2.c  -  MPI helper functions
+ * Copyright (C) 1994, 1996, 1997, 1998, 2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *       Actually it's the same code with only minor changes in the
+ *       way the data is stored; this is to support the abstraction
+ *       of an optional secure memory allocation which may be used
+ *       to avoid revealing of sensitive data due to paging etc.
+ *       The GNU MP Library itself is published under the LGPL;
+ *       however I decided to publish this code under the plain GPL.
+ */
+#include "mpi-internal.h"
+#include "longlong.h"
+mpi_limb_t
+mpihelp_addmul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+                 mpi_size_t s1_size, mpi_limb_t s2_limb)
+{
+        mpi_limb_t cy_limb;
+        mpi_size_t j;
+        mpi_limb_t prod_high, prod_low;
+        mpi_limb_t x;
+        /* The loop counter and index J goes from -SIZE to -1.  This way
+         * the loop becomes faster.  */
+        j = -s1_size;
+        res_ptr -= j;
+        s1_ptr -= j;
+        cy_limb = 0;
+        do {
+                umul_ppmm(prod_high, prod_low, s1_ptr[j], s2_limb);
+                prod_low += cy_limb;
+                cy_limb = (prod_low < cy_limb ? 1 : 0) + prod_high;
+                x = res_ptr[j];
+                prod_low = x + prod_low;
+                cy_limb += prod_low < x ? 1 : 0;
+                res_ptr[j] = prod_low;
+        } while (++j);
+        return cy_limb;
+}
diff --git a/lib/mpi/generic_mpih-mul3.c b/lib/mpi/generic_mpih-mul3.c
new file mode 100644
index 000000000000..f96df327be63
--- /dev/null
+++ b/lib/mpi/generic_mpih-mul3.c
@@ -0,0 +1,61 @@
+/* mpihelp-mul_3.c  -  MPI helper functions
+ * Copyright (C) 1994, 1996, 1997, 1998, 2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *       Actually it's the same code with only minor changes in the
+ *       way the data is stored; this is to support the abstraction
+ *       of an optional secure memory allocation which may be used
+ *       to avoid revealing of sensitive data due to paging etc.
+ *       The GNU MP Library itself is published under the LGPL;
+ *       however I decided to publish this code under the plain GPL.
+ */
+#include "mpi-internal.h"
+#include "longlong.h"
+mpi_limb_t
+mpihelp_submul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+                 mpi_size_t s1_size, mpi_limb_t s2_limb)
+{
+        mpi_limb_t cy_limb;
+        mpi_size_t j;
+        mpi_limb_t prod_high, prod_low;
+        mpi_limb_t x;
+        /* The loop counter and index J goes from -SIZE to -1.  This way
+         * the loop becomes faster.  */
+        j = -s1_size;
+        res_ptr -= j;
+        s1_ptr -= j;
+        cy_limb = 0;
+        do {
+                umul_ppmm(prod_high, prod_low, s1_ptr[j], s2_limb);
+                prod_low += cy_limb;
+                cy_limb = (prod_low < cy_limb ? 1 : 0) + prod_high;
+                x = res_ptr[j];
+                prod_low = x - prod_low;
+                cy_limb += prod_low > x ? 1 : 0;
+                res_ptr[j] = prod_low;
+        } while (++j);
+        return cy_limb;
+}
diff --git a/lib/mpi/generic_mpih-rshift.c b/lib/mpi/generic_mpih-rshift.c
new file mode 100644
index 000000000000..ffa328818ca6
--- /dev/null
+++ b/lib/mpi/generic_mpih-rshift.c
@@ -0,0 +1,63 @@
+/* mpih-rshift.c  -  MPI helper functions
+ * Copyright (C) 1994, 1996, 1998, 1999,
+ *               2000, 2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GNUPG
+ *
+ * GNUPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GNUPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *       Actually it's the same code with only minor changes in the
+ *       way the data is stored; this is to support the abstraction
+ *       of an optional secure memory allocation which may be used
+ *       to avoid revealing of sensitive data due to paging etc.
+ *       The GNU MP Library itself is published under the LGPL;
+ *       however I decided to publish this code under the plain GPL.
+ */
+#include "mpi-internal.h"
+/* Shift U (pointed to by UP and USIZE limbs long) CNT bits to the right
+ * and store the USIZE least significant limbs of the result at WP.
+ * The bits shifted out to the right are returned.
+ *
+ * Argument constraints:
+ * 1. 0 < CNT < BITS_PER_MP_LIMB
+ * 2. If the result is to be written over the input, WP must be <= UP.
+ */
+mpi_limb_t
+mpihelp_rshift(mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize, unsigned cnt)
+{
+        mpi_limb_t high_limb, low_limb;
+        unsigned sh_1, sh_2;
+        mpi_size_t i;
+        mpi_limb_t retval;
+        sh_1 = cnt;
+        wp -= 1;
+        sh_2 = BITS_PER_MPI_LIMB - sh_1;
+        high_limb = up[0];
+        retval = high_limb << sh_2;
+        low_limb = high_limb;
+        for (i = 1; i < usize; i++) {
+                high_limb = up[i];
+                wp[i] = (low_limb >> sh_1) | (high_limb << sh_2);
+                low_limb = high_limb;
+        }
+        wp[i] = low_limb >> sh_1;
+        return retval;
+}
diff --git a/lib/mpi/generic_mpih-sub1.c b/lib/mpi/generic_mpih-sub1.c
new file mode 100644
index 000000000000..5d98ab7d6853
--- /dev/null
+++ b/lib/mpi/generic_mpih-sub1.c
@@ -0,0 +1,60 @@
+/* mpihelp-add_2.c  -  MPI helper functions
+ * Copyright (C) 1994, 1996, 1997, 1998, 2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *       Actually it's the same code with only minor changes in the
+ *       way the data is stored; this is to support the abstraction
+ *       of an optional secure memory allocation which may be used
+ *       to avoid revealing of sensitive data due to paging etc.
+ *       The GNU MP Library itself is published under the LGPL;
+ *       however I decided to publish this code under the plain GPL.
+ */
+#include "mpi-internal.h"
+#include "longlong.h"
+mpi_limb_t
+mpihelp_sub_n(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+              mpi_ptr_t s2_ptr, mpi_size_t size)
+{
+        mpi_limb_t x, y, cy;
+        mpi_size_t j;
+        /* The loop counter and index J goes from -SIZE to -1.  This way
+           the loop becomes faster.  */
+        j = -size;
+        /* Offset the base pointers to compensate for the negative indices.  */
+        s1_ptr -= j;
+        s2_ptr -= j;
+        res_ptr -= j;
+        cy = 0;
+        do {
+                y = s2_ptr[j];
+                x = s1_ptr[j];
+                y += cy;        /* add previous carry to subtrahend */
+                cy = y < cy;    /* get out carry from that addition */
+                y = x - y;      /* main subtract */
+                cy += y > x;    /* get out carry from the subtract, combine */
+                res_ptr[j] = y;
+        } while (++j);
+        return cy;
+}
diff --git a/lib/mpi/longlong.h b/lib/mpi/longlong.h
new file mode 100644
index 000000000000..b87487b40a8b
--- /dev/null
+++ b/lib/mpi/longlong.h
@@ -0,0 +1,1478 @@
+/* longlong.h -- definitions for mixed size 32/64 bit arithmetic.
+ * Note: I added some stuff for use with gnupg
+ *
+ * Copyright (C) 1991, 1992, 1993, 1994, 1996, 1998,
+ *      2000, 2001, 2002, 2003 Free Software Foundation, Inc.
+ *
+ * This file is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Library General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or (at your
+ * option) any later version.
+ *
+ * This file is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+ * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Library General Public
+ * License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this file; see the file COPYING.LIB.  If not, write to
+ * the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
+ * MA 02111-1307, USA. */
+/* You have to define the following before including this file:
+ *
+ * UWtype -- An unsigned type, default type for operations (typically a "word")
+ * UHWtype -- An unsigned type, at least half the size of UWtype.
+ * UDWtype -- An unsigned type, at least twice as large a UWtype
+ * W_TYPE_SIZE -- size in bits of UWtype
+ *
+ * SItype, USItype -- Signed and unsigned 32 bit types.
+ * DItype, UDItype -- Signed and unsigned 64 bit types.
+ *
+ * On a 32 bit machine UWtype should typically be USItype;
+ * on a 64 bit machine, UWtype should typically be UDItype.
+*/
+#define __BITS4 (W_TYPE_SIZE / 4)
+#define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2))
+#define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1))
+#define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2))
+/* This is used to make sure no undesirable sharing between different libraries
+        that use this file takes place.  */
+#ifndef __MPN
+#define __MPN(x) __##x
+#endif
+/* Define auxiliary asm macros.
+ *
+ * 1) umul_ppmm(high_prod, low_prod, multipler, multiplicand) multiplies two
+ * UWtype integers MULTIPLER and MULTIPLICAND, and generates a two UWtype
+ * word product in HIGH_PROD and LOW_PROD.
+ *
+ * 2) __umulsidi3(a,b) multiplies two UWtype integers A and B, and returns a
+ * UDWtype product.  This is just a variant of umul_ppmm.
+ * 3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
+ * denominator) divides a UDWtype, composed by the UWtype integers
+ * HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient
+ * in QUOTIENT and the remainder in REMAINDER.  HIGH_NUMERATOR must be less
+ * than DENOMINATOR for correct operation.  If, in addition, the most
+ * significant bit of DENOMINATOR must be 1, then the pre-processor symbol
+ * UDIV_NEEDS_NORMALIZATION is defined to 1.
+ * 4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator,
+ * denominator).  Like udiv_qrnnd but the numbers are signed.  The quotient
+ * is rounded towards 0.
+ *
+ * 5) count_leading_zeros(count, x) counts the number of zero-bits from the
+ * msb to the first non-zero bit in the UWtype X.  This is the number of
+ * steps X needs to be shifted left to set the msb.  Undefined for X == 0,
+ * unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value.
+ *
+ * 6) count_trailing_zeros(count, x) like count_leading_zeros, but counts
+ * from the least significant end.
+ *
+ * 7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1,
+ * high_addend_2, low_addend_2) adds two UWtype integers, composed by
+ * HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2
+ * respectively.  The result is placed in HIGH_SUM and LOW_SUM.  Overflow
+ * (i.e. carry out) is not stored anywhere, and is lost.
+ *
+ * 8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend,
+ * high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers,
+ * composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and
+ * LOW_SUBTRAHEND_2 respectively.  The result is placed in HIGH_DIFFERENCE
+ * and LOW_DIFFERENCE.  Overflow (i.e. carry out) is not stored anywhere,
+ * and is lost.
+ *
+ * If any of these macros are left undefined for a particular CPU,
+ * C macros are used.  */
+/* The CPUs come in alphabetical order below.
+ *
+ * Please add support for more CPUs here, or improve the current support
+ * for the CPUs below!  */
+#if defined(__GNUC__) && !defined(NO_ASM)
+/* We sometimes need to clobber "cc" with gcc2, but that would not be
+        understood by gcc1.     Use cpp to avoid major code duplication.  */
+#if __GNUC__ < 2
+#define __CLOBBER_CC
+#define __AND_CLOBBER_CC
+#else /* __GNUC__ >= 2 */
+#define __CLOBBER_CC : "cc"
+#define __AND_CLOBBER_CC , "cc"
+#endif /* __GNUC__ < 2 */
+/***************************************
+        **************  A29K  *****************
+        ***************************************/
+#if (defined(__a29k__) || defined(_AM29K)) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+        __asm__ ("add %1,%4,%5\n" \
+                "addc %0,%2,%3" \
+        : "=r" ((USItype)(sh)), \
+                "=&r" ((USItype)(sl)) \
+        : "%r" ((USItype)(ah)), \
+                "rI" ((USItype)(bh)), \
+                "%r" ((USItype)(al)), \
+                "rI" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+        __asm__ ("sub %1,%4,%5\n" \
+                "subc %0,%2,%3" \
+        : "=r" ((USItype)(sh)), \
+                "=&r" ((USItype)(sl)) \
+        : "r" ((USItype)(ah)), \
+                "rI" ((USItype)(bh)), \
+                "r" ((USItype)(al)), \
+                "rI" ((USItype)(bl)))
+#define umul_ppmm(xh, xl, m0, m1) \
+do { \
+                USItype __m0 = (m0), __m1 = (m1); \
+                __asm__ ("multiplu %0,%1,%2" \
+                : "=r" ((USItype)(xl)) \
+                : "r" (__m0), \
+                        "r" (__m1)); \
+                __asm__ ("multmu %0,%1,%2" \
+                : "=r" ((USItype)(xh)) \
+                : "r" (__m0), \
+                        "r" (__m1)); \
+} while (0)
+#define udiv_qrnnd(q, r, n1, n0, d) \
+        __asm__ ("dividu %0,%3,%4" \
+        : "=r" ((USItype)(q)), \
+                "=q" ((USItype)(r)) \
+        : "1" ((USItype)(n1)), \
+                "r" ((USItype)(n0)), \
+                "r" ((USItype)(d)))
+#define count_leading_zeros(count, x) \
+        __asm__ ("clz %0,%1" \
+        : "=r" ((USItype)(count)) \
+        : "r" ((USItype)(x)))
+#define COUNT_LEADING_ZEROS_0 32
+#endif /* __a29k__ */
+#if defined(__alpha) && W_TYPE_SIZE == 64
+#define umul_ppmm(ph, pl, m0, m1) \
+do { \
+                UDItype __m0 = (m0), __m1 = (m1); \
+                __asm__ ("umulh %r1,%2,%0" \
+                : "=r" ((UDItype) ph) \
+                : "%rJ" (__m0), \
+                        "rI" (__m1)); \
+                (pl) = __m0 * __m1; \
+        } while (0)
+#define UMUL_TIME 46
+#ifndef LONGLONG_STANDALONE
+#define udiv_qrnnd(q, r, n1, n0, d) \
+do { UDItype __r; \
+        (q) = __udiv_qrnnd(&__r, (n1), (n0), (d)); \
+        (r) = __r; \
+} while (0)
+extern UDItype __udiv_qrnnd();
+#define UDIV_TIME 220
+#endif /* LONGLONG_STANDALONE */
+#endif /* __alpha */
+/***************************************
+        **************  ARM  ******************
+        ***************************************/
+#if defined(__arm__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+        __asm__ ("adds %1, %4, %5\n" \
+                "adc  %0, %2, %3" \
+        : "=r" ((USItype)(sh)), \
+                "=&r" ((USItype)(sl)) \
+        : "%r" ((USItype)(ah)), \
+                "rI" ((USItype)(bh)), \
+                "%r" ((USItype)(al)), \
+                "rI" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+        __asm__ ("subs %1, %4, %5\n" \
+                "sbc  %0, %2, %3" \
+        : "=r" ((USItype)(sh)), \
+                "=&r" ((USItype)(sl)) \
+        : "r" ((USItype)(ah)), \
+                "rI" ((USItype)(bh)), \
+                "r" ((USItype)(al)), \
+                "rI" ((USItype)(bl)))
+#if defined __ARM_ARCH_2__ || defined __ARM_ARCH_3__
+#define umul_ppmm(xh, xl, a, b) \
+        __asm__ ("%@ Inlined umul_ppmm\n" \
+                "mov    %|r0, %2, lsr #16               @ AAAA\n" \
+                "mov    %|r2, %3, lsr #16               @ BBBB\n" \
+                "bic    %|r1, %2, %|r0, lsl #16         @ aaaa\n" \
+                "bic    %0, %3, %|r2, lsl #16           @ bbbb\n" \
+                "mul    %1, %|r1, %|r2                  @ aaaa * BBBB\n" \
+                "mul    %|r2, %|r0, %|r2                @ AAAA * BBBB\n" \
+                "mul    %|r1, %0, %|r1                  @ aaaa * bbbb\n" \
+                "mul    %0, %|r0, %0                    @ AAAA * bbbb\n" \
+                "adds   %|r0, %1, %0                    @ central sum\n" \
+                "addcs  %|r2, %|r2, #65536\n" \
+                "adds   %1, %|r1, %|r0, lsl #16\n" \
+                "adc    %0, %|r2, %|r0, lsr #16" \
+        : "=&r" ((USItype)(xh)), \
+                "=r" ((USItype)(xl)) \
+        : "r" ((USItype)(a)), \
+                "r" ((USItype)(b)) \
+        : "r0", "r1", "r2")
+#else
+#define umul_ppmm(xh, xl, a, b) \
+        __asm__ ("%@ Inlined umul_ppmm\n" \
+                "umull %r1, %r0, %r2, %r3" \
+        : "=&r" ((USItype)(xh)), \
+                        "=r" ((USItype)(xl)) \
+        : "r" ((USItype)(a)), \
+                        "r" ((USItype)(b)) \
+        : "r0", "r1")
+#endif
+#define UMUL_TIME 20
+#define UDIV_TIME 100
+#endif /* __arm__ */
+/***************************************
+        **************  CLIPPER  **************
+        ***************************************/
+#if defined(__clipper__) && W_TYPE_SIZE == 32
+#define umul_ppmm(w1, w0, u, v) \
+        ({union {UDItype __ll; \
+                struct {USItype __l, __h; } __i; \
+        } __xx; \
+        __asm__ ("mulwux %2,%0" \
+        : "=r" (__xx.__ll) \
+        : "%0" ((USItype)(u)), \
+                "r" ((USItype)(v))); \
+        (w1) = __xx.__i.__h; (w0) = __xx.__i.__l; })
+#define smul_ppmm(w1, w0, u, v) \
+        ({union {DItype __ll; \
+                struct {SItype __l, __h; } __i; \
+        } __xx; \
+        __asm__ ("mulwx %2,%0" \
+        : "=r" (__xx.__ll) \
+        : "%0" ((SItype)(u)), \
+                "r" ((SItype)(v))); \
+        (w1) = __xx.__i.__h; (w0) = __xx.__i.__l; })
+#define __umulsidi3(u, v) \
+        ({UDItype __w; \
+        __asm__ ("mulwux %2,%0" \
+        : "=r" (__w) \
+        : "%0" ((USItype)(u)), \
+                "r" ((USItype)(v))); \
+        __w; })
+#endif /* __clipper__ */
+/***************************************
+        **************  GMICRO  ***************
+        ***************************************/
+#if defined(__gmicro__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+        __asm__ ("add.w %5,%1\n" \
+                "addx %3,%0" \
+        : "=g" ((USItype)(sh)), \
+                "=&g" ((USItype)(sl)) \
+        : "%0" ((USItype)(ah)), \
+                "g" ((USItype)(bh)), \
+                "%1" ((USItype)(al)), \
+                "g" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+        __asm__ ("sub.w %5,%1\n" \
+                "subx %3,%0" \
+        : "=g" ((USItype)(sh)), \
+                "=&g" ((USItype)(sl)) \
+        : "0" ((USItype)(ah)), \
+                "g" ((USItype)(bh)), \
+                "1" ((USItype)(al)), \
+                "g" ((USItype)(bl)))
+#define umul_ppmm(ph, pl, m0, m1) \
+        __asm__ ("mulx %3,%0,%1" \
+        : "=g" ((USItype)(ph)), \
+                "=r" ((USItype)(pl)) \
+        : "%0" ((USItype)(m0)), \
+                "g" ((USItype)(m1)))
+#define udiv_qrnnd(q, r, nh, nl, d) \
+        __asm__ ("divx %4,%0,%1" \
+        : "=g" ((USItype)(q)), \
+                "=r" ((USItype)(r)) \
+        : "1" ((USItype)(nh)), \
+                "0" ((USItype)(nl)), \
+                "g" ((USItype)(d)))
+#define count_leading_zeros(count, x) \
+        __asm__ ("bsch/1 %1,%0" \
+        : "=g" (count) \
+        : "g" ((USItype)(x)), \
+             "0" ((USItype)0))
+#endif
+/***************************************
+        **************  HPPA  *****************
+        ***************************************/
+#if defined(__hppa) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+        __asm__ ("add %4,%5,%1\n" \
+                   "addc %2,%3,%0" \
+        : "=r" ((USItype)(sh)), \
+             "=&r" ((USItype)(sl)) \
+        : "%rM" ((USItype)(ah)), \
+             "rM" ((USItype)(bh)), \
+             "%rM" ((USItype)(al)), \
+             "rM" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+        __asm__ ("sub %4,%5,%1\n" \
+           "subb %2,%3,%0" \
+        : "=r" ((USItype)(sh)), \
+             "=&r" ((USItype)(sl)) \
+        : "rM" ((USItype)(ah)), \
+             "rM" ((USItype)(bh)), \
+             "rM" ((USItype)(al)), \
+             "rM" ((USItype)(bl)))
+#if defined(_PA_RISC1_1)
+#define umul_ppmm(wh, wl, u, v) \
+do { \
+        union {UDItype __ll; \
+        struct {USItype __h, __l; } __i; \
+        } __xx; \
+        __asm__ ("xmpyu %1,%2,%0" \
+        : "=*f" (__xx.__ll) \
+        : "*f" ((USItype)(u)), \
+               "*f" ((USItype)(v))); \
+        (wh) = __xx.__i.__h; \
+        (wl) = __xx.__i.__l; \
+} while (0)
+#define UMUL_TIME 8
+#define UDIV_TIME 60
+#else
+#define UMUL_TIME 40
+#define UDIV_TIME 80
+#endif
+#ifndef LONGLONG_STANDALONE
+#define udiv_qrnnd(q, r, n1, n0, d) \
+do { USItype __r; \
+        (q) = __udiv_qrnnd(&__r, (n1), (n0), (d)); \
+        (r) = __r; \
+} while (0)
+extern USItype __udiv_qrnnd();
+#endif /* LONGLONG_STANDALONE */
+#define count_leading_zeros(count, x) \
+do { \
+        USItype __tmp; \
+        __asm__ ( \
+        "ldi             1,%0\n" \
+        "extru,=        %1,15,16,%%r0  ; Bits 31..16 zero?\n" \
+        "extru,tr       %1,15,16,%1    ; No.  Shift down, skip add.\n" \
+        "ldo            16(%0),%0      ; Yes.   Perform add.\n" \
+        "extru,=        %1,23,8,%%r0   ; Bits 15..8 zero?\n" \
+        "extru,tr       %1,23,8,%1     ; No.  Shift down, skip add.\n" \
+        "ldo            8(%0),%0       ; Yes.   Perform add.\n" \
+        "extru,=        %1,27,4,%%r0   ; Bits 7..4 zero?\n" \
+        "extru,tr       %1,27,4,%1     ; No.  Shift down, skip add.\n" \
+        "ldo            4(%0),%0       ; Yes.   Perform add.\n" \
+        "extru,=        %1,29,2,%%r0   ; Bits 3..2 zero?\n" \
+        "extru,tr       %1,29,2,%1     ; No.  Shift down, skip add.\n" \
+        "ldo            2(%0),%0       ; Yes.   Perform add.\n" \
+        "extru          %1,30,1,%1     ; Extract bit 1.\n" \
+        "sub            %0,%1,%0       ; Subtract it.              " \
+        : "=r" (count), "=r" (__tmp) : "1" (x)); \
+} while (0)
+#endif /* hppa */
+/***************************************
+        **************  I370  *****************
+        ***************************************/
+#if (defined(__i370__) || defined(__mvs__)) && W_TYPE_SIZE == 32
+#define umul_ppmm(xh, xl, m0, m1) \
+do { \
+        union {UDItype __ll; \
+           struct {USItype __h, __l; } __i; \
+        } __xx; \
+        USItype __m0 = (m0), __m1 = (m1); \
+        __asm__ ("mr %0,%3" \
+        : "=r" (__xx.__i.__h), \
+               "=r" (__xx.__i.__l) \
+        : "%1" (__m0), \
+               "r" (__m1)); \
+        (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \
+        (xh) += ((((SItype) __m0 >> 31) & __m1) \
+             + (((SItype) __m1 >> 31) & __m0)); \
+} while (0)
+#define smul_ppmm(xh, xl, m0, m1) \
+do { \
+        union {DItype __ll; \
+           struct {USItype __h, __l; } __i; \
+        } __xx; \
+        __asm__ ("mr %0,%3" \
+        : "=r" (__xx.__i.__h), \
+               "=r" (__xx.__i.__l) \
+        : "%1" (m0), \
+               "r" (m1)); \
+        (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \
+} while (0)
+#define sdiv_qrnnd(q, r, n1, n0, d) \
+do { \
+        union {DItype __ll; \
+           struct {USItype __h, __l; } __i; \
+        } __xx; \
+        __xx.__i.__h = n1; __xx.__i.__l = n0; \
+        __asm__ ("dr %0,%2" \
+        : "=r" (__xx.__ll) \
+        : "0" (__xx.__ll), "r" (d)); \
+        (q) = __xx.__i.__l; (r) = __xx.__i.__h; \
+} while (0)
+#endif
+/***************************************
+        **************  I386  *****************
+        ***************************************/
+#undef __i386__
+#if (defined(__i386__) || defined(__i486__)) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+        __asm__ ("addl %5,%1\n" \
+           "adcl %3,%0" \
+        : "=r" ((USItype)(sh)), \
+             "=&r" ((USItype)(sl)) \
+        : "%0" ((USItype)(ah)), \
+             "g" ((USItype)(bh)), \
+             "%1" ((USItype)(al)), \
+             "g" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+        __asm__ ("subl %5,%1\n" \
+           "sbbl %3,%0" \
+        : "=r" ((USItype)(sh)), \
+             "=&r" ((USItype)(sl)) \
+        : "0" ((USItype)(ah)), \
+             "g" ((USItype)(bh)), \
+             "1" ((USItype)(al)), \
+             "g" ((USItype)(bl)))
+#define umul_ppmm(w1, w0, u, v) \
+        __asm__ ("mull %3" \
+        : "=a" ((USItype)(w0)), \
+             "=d" ((USItype)(w1)) \
+        : "%0" ((USItype)(u)), \
+             "rm" ((USItype)(v)))
+#define udiv_qrnnd(q, r, n1, n0, d) \
+        __asm__ ("divl %4" \
+        : "=a" ((USItype)(q)), \
+             "=d" ((USItype)(r)) \
+        : "0" ((USItype)(n0)), \
+             "1" ((USItype)(n1)), \
+             "rm" ((USItype)(d)))
+#define count_leading_zeros(count, x) \
+do { \
+        USItype __cbtmp; \
+        __asm__ ("bsrl %1,%0" \
+        : "=r" (__cbtmp) : "rm" ((USItype)(x))); \
+        (count) = __cbtmp ^ 31; \
+} while (0)
+#define count_trailing_zeros(count, x) \
+        __asm__ ("bsfl %1,%0" : "=r" (count) : "rm" ((USItype)(x)))
+#ifndef UMUL_TIME
+#define UMUL_TIME 40
+#endif
+#ifndef UDIV_TIME
+#define UDIV_TIME 40
+#endif
+#endif /* 80x86 */
+/***************************************
+        **************  I860  *****************
+        ***************************************/
+#if defined(__i860__) && W_TYPE_SIZE == 32
+#define rshift_rhlc(r, h, l, c) \
+        __asm__ ("shr %3,r0,r0\n" \
+        "shrd %1,%2,%0" \
+           "=r" (r) : "r" (h), "r" (l), "rn" (c))
+#endif /* i860 */
+/***************************************
+        **************  I960  *****************
+        ***************************************/
+#if defined(__i960__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+        __asm__ ("cmpo 1,0\n" \
+        "addc %5,%4,%1\n" \
+        "addc %3,%2,%0" \
+        : "=r" ((USItype)(sh)), \
+             "=&r" ((USItype)(sl)) \
+        : "%dI" ((USItype)(ah)), \
+             "dI" ((USItype)(bh)), \
+             "%dI" ((USItype)(al)), \
+             "dI" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+        __asm__ ("cmpo 0,0\n" \
+        "subc %5,%4,%1\n" \
+        "subc %3,%2,%0" \
+        : "=r" ((USItype)(sh)), \
+             "=&r" ((USItype)(sl)) \
+        : "dI" ((USItype)(ah)), \
+             "dI" ((USItype)(bh)), \
+             "dI" ((USItype)(al)), \
+             "dI" ((USItype)(bl)))
+#define umul_ppmm(w1, w0, u, v) \
+        ({union {UDItype __ll; \
+           struct {USItype __l, __h; } __i; \
+        } __xx; \
+        __asm__ ("emul        %2,%1,%0" \
+        : "=d" (__xx.__ll) \
+        : "%dI" ((USItype)(u)), \
+             "dI" ((USItype)(v))); \
+        (w1) = __xx.__i.__h; (w0) = __xx.__i.__l; })
+#define __umulsidi3(u, v) \
+        ({UDItype __w; \
+        __asm__ ("emul      %2,%1,%0" \
+        : "=d" (__w) \
+        : "%dI" ((USItype)(u)), \
+               "dI" ((USItype)(v))); \
+        __w; })
+#define udiv_qrnnd(q, r, nh, nl, d) \
+do { \
+        union {UDItype __ll; \
+           struct {USItype __l, __h; } __i; \
+        } __nn; \
+        __nn.__i.__h = (nh); __nn.__i.__l = (nl); \
+        __asm__ ("ediv %d,%n,%0" \
+        : "=d" (__rq.__ll) \
+        : "dI" (__nn.__ll), \
+             "dI" ((USItype)(d))); \
+        (r) = __rq.__i.__l; (q) = __rq.__i.__h; \
+} while (0)
+#define count_leading_zeros(count, x) \
+do { \
+        USItype __cbtmp; \
+        __asm__ ("scanbit %1,%0" \
+        : "=r" (__cbtmp) \
+        : "r" ((USItype)(x))); \
+        (count) = __cbtmp ^ 31; \
+} while (0)
+#define COUNT_LEADING_ZEROS_0 (-32)     /* sic */
+#if defined(__i960mx)           /* what is the proper symbol to test??? */
+#define rshift_rhlc(r, h, l, c) \
+do { \
+        union {UDItype __ll; \
+           struct {USItype __l, __h; } __i; \
+        } __nn; \
+        __nn.__i.__h = (h); __nn.__i.__l = (l); \
+        __asm__ ("shre %2,%1,%0" \
+        : "=d" (r) : "dI" (__nn.__ll), "dI" (c)); \
+}
+#endif /* i960mx */
+#endif /* i960 */
+/***************************************
+        **************  68000   ****************
+        ***************************************/
+#if (defined(__mc68000__) || defined(__mc68020__) || defined(__NeXT__) || defined(mc68020)) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+        __asm__ ("add%.l %5,%1\n" \
+           "addx%.l %3,%0" \
+        : "=d" ((USItype)(sh)), \
+             "=&d" ((USItype)(sl)) \
+        : "%0" ((USItype)(ah)), \
+             "d" ((USItype)(bh)), \
+             "%1" ((USItype)(al)), \
+             "g" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+        __asm__ ("sub%.l %5,%1\n" \
+           "subx%.l %3,%0" \
+        : "=d" ((USItype)(sh)), \
+             "=&d" ((USItype)(sl)) \
+        : "0" ((USItype)(ah)), \
+             "d" ((USItype)(bh)), \
+             "1" ((USItype)(al)), \
+             "g" ((USItype)(bl)))
+#if (defined(__mc68020__) || defined(__NeXT__) || defined(mc68020))
+#define umul_ppmm(w1, w0, u, v) \
+        __asm__ ("mulu%.l %3,%1:%0" \
+        : "=d" ((USItype)(w0)), \
+             "=d" ((USItype)(w1)) \
+        : "%0" ((USItype)(u)), \
+             "dmi" ((USItype)(v)))
+#define UMUL_TIME 45
+#define udiv_qrnnd(q, r, n1, n0, d) \
+        __asm__ ("divu%.l %4,%1:%0" \
+        : "=d" ((USItype)(q)), \
+             "=d" ((USItype)(r)) \
+        : "0" ((USItype)(n0)), \
+             "1" ((USItype)(n1)), \
+             "dmi" ((USItype)(d)))
+#define UDIV_TIME 90
+#define sdiv_qrnnd(q, r, n1, n0, d) \
+        __asm__ ("divs%.l %4,%1:%0" \
+        : "=d" ((USItype)(q)), \
+             "=d" ((USItype)(r)) \
+        : "0" ((USItype)(n0)), \
+             "1" ((USItype)(n1)), \
+             "dmi" ((USItype)(d)))
+#define count_leading_zeros(count, x) \
+        __asm__ ("bfffo %1{%b2:%b2},%0" \
+        : "=d" ((USItype)(count)) \
+        : "od" ((USItype)(x)), "n" (0))
+#define COUNT_LEADING_ZEROS_0 32
+#else /* not mc68020 */
+#define umul_ppmm(xh, xl, a, b) \
+do { USItype __umul_tmp1, __umul_tmp2; \
+        __asm__ ("| Inlined umul_ppmm\n" \
+        "move%.l %5,%3\n" \
+        "move%.l %2,%0\n" \
+        "move%.w %3,%1\n" \
+        "swap   %3\n" \
+        "swap   %0\n" \
+        "mulu   %2,%1\n" \
+        "mulu   %3,%0\n" \
+        "mulu   %2,%3\n" \
+        "swap   %2\n" \
+        "mulu   %5,%2\n" \
+        "add%.l %3,%2\n" \
+        "jcc    1f\n" \
+        "add%.l %#0x10000,%0\n" \
+        "1:     move%.l %2,%3\n" \
+        "clr%.w %2\n" \
+        "swap   %2\n" \
+        "swap   %3\n" \
+        "clr%.w %3\n" \
+        "add%.l %3,%1\n" \
+        "addx%.l %2,%0\n" \
+        "| End inlined umul_ppmm" \
+        : "=&d" ((USItype)(xh)), "=&d" ((USItype)(xl)), \
+                "=d" (__umul_tmp1), "=&d" (__umul_tmp2) \
+        : "%2" ((USItype)(a)), "d" ((USItype)(b))); \
+} while (0)
+#define UMUL_TIME 100
+#define UDIV_TIME 400
+#endif /* not mc68020 */
+#endif /* mc68000 */
+/***************************************
+        **************  88000   ****************
+        ***************************************/
+#if defined(__m88000__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+        __asm__ ("addu.co %1,%r4,%r5\n" \
+           "addu.ci %0,%r2,%r3" \
+        : "=r" ((USItype)(sh)), \
+             "=&r" ((USItype)(sl)) \
+        : "%rJ" ((USItype)(ah)), \
+             "rJ" ((USItype)(bh)), \
+             "%rJ" ((USItype)(al)), \
+             "rJ" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+        __asm__ ("subu.co %1,%r4,%r5\n" \
+           "subu.ci %0,%r2,%r3" \
+        : "=r" ((USItype)(sh)), \
+             "=&r" ((USItype)(sl)) \
+        : "rJ" ((USItype)(ah)), \
+             "rJ" ((USItype)(bh)), \
+             "rJ" ((USItype)(al)), \
+             "rJ" ((USItype)(bl)))
+#define count_leading_zeros(count, x) \
+do { \
+        USItype __cbtmp; \
+        __asm__ ("ff1 %0,%1" \
+        : "=r" (__cbtmp) \
+        : "r" ((USItype)(x))); \
+        (count) = __cbtmp ^ 31; \
+} while (0)
+#define COUNT_LEADING_ZEROS_0 63        /* sic */
+#if defined(__m88110__)
+#define umul_ppmm(wh, wl, u, v) \
+do { \
+        union {UDItype __ll; \
+           struct {USItype __h, __l; } __i; \
+        } __x; \
+        __asm__ ("mulu.d %0,%1,%2" : "=r" (__x.__ll) : "r" (u), "r" (v)); \
+        (wh) = __x.__i.__h; \
+        (wl) = __x.__i.__l; \
+} while (0)
+#define udiv_qrnnd(q, r, n1, n0, d) \
+        ({union {UDItype __ll; \
+           struct {USItype __h, __l; } __i; \
+        } __x, __q; \
+        __x.__i.__h = (n1); __x.__i.__l = (n0); \
+        __asm__ ("divu.d %0,%1,%2" \
+        : "=r" (__q.__ll) : "r" (__x.__ll), "r" (d)); \
+        (r) = (n0) - __q.__l * (d); (q) = __q.__l; })
+#define UMUL_TIME 5
+#define UDIV_TIME 25
+#else
+#define UMUL_TIME 17
+#define UDIV_TIME 150
+#endif /* __m88110__ */
+#endif /* __m88000__ */
+/***************************************
+        **************  MIPS  *****************
+        ***************************************/
+#if defined(__mips__) && W_TYPE_SIZE == 32
+#if __GNUC__ > 2 || __GNUC_MINOR__ >= 7
+#define umul_ppmm(w1, w0, u, v) \
+        __asm__ ("multu %2,%3" \
+        : "=l" ((USItype)(w0)), \
+             "=h" ((USItype)(w1)) \
+        : "d" ((USItype)(u)), \
+             "d" ((USItype)(v)))
+#else
+#define umul_ppmm(w1, w0, u, v) \
+        __asm__ ("multu %2,%3\n" \
+           "mflo %0\n" \
+           "mfhi %1" \
+        : "=d" ((USItype)(w0)), \
+             "=d" ((USItype)(w1)) \
+        : "d" ((USItype)(u)), \
+             "d" ((USItype)(v)))
+#endif
+#define UMUL_TIME 10
+#define UDIV_TIME 100
+#endif /* __mips__ */
+/***************************************
+        **************  MIPS/64  **************
+        ***************************************/
+#if (defined(__mips) && __mips >= 3) && W_TYPE_SIZE == 64
+#if __GNUC__ > 2 || __GNUC_MINOR__ >= 7
+#define umul_ppmm(w1, w0, u, v) \
+        __asm__ ("dmultu %2,%3" \
+        : "=l" ((UDItype)(w0)), \
+             "=h" ((UDItype)(w1)) \
+        : "d" ((UDItype)(u)), \
+             "d" ((UDItype)(v)))
+#else
+#define umul_ppmm(w1, w0, u, v) \
+        __asm__ ("dmultu %2,%3\n" \
+           "mflo %0\n" \
+           "mfhi %1" \
+        : "=d" ((UDItype)(w0)), \
+             "=d" ((UDItype)(w1)) \
+        : "d" ((UDItype)(u)), \
+             "d" ((UDItype)(v)))
+#endif
+#define UMUL_TIME 20
+#define UDIV_TIME 140
+#endif /* __mips__ */
+/***************************************
+        **************  32000   ****************
+        ***************************************/
+#if defined(__ns32000__) && W_TYPE_SIZE == 32
+#define umul_ppmm(w1, w0, u, v) \
+        ({union {UDItype __ll; \
+           struct {USItype __l, __h; } __i; \
+        } __xx; \
+        __asm__ ("meid %2,%0" \
+        : "=g" (__xx.__ll) \
+        : "%0" ((USItype)(u)), \
+             "g" ((USItype)(v))); \
+        (w1) = __xx.__i.__h; (w0) = __xx.__i.__l; })
+#define __umulsidi3(u, v) \
+        ({UDItype __w; \
+        __asm__ ("meid %2,%0" \
+        : "=g" (__w) \
+        : "%0" ((USItype)(u)), \
+               "g" ((USItype)(v))); \
+        __w; })
+#define udiv_qrnnd(q, r, n1, n0, d) \
+        ({union {UDItype __ll; \
+           struct {USItype __l, __h; } __i; \
+        } __xx; \
+        __xx.__i.__h = (n1); __xx.__i.__l = (n0); \
+        __asm__ ("deid %2,%0" \
+        : "=g" (__xx.__ll) \
+        : "0" (__xx.__ll), \
+             "g" ((USItype)(d))); \
+        (r) = __xx.__i.__l; (q) = __xx.__i.__h; })
+#define count_trailing_zeros(count, x) \
+do { \
+        __asm__("ffsd      %2,%0" \
+        : "=r"((USItype) (count)) \
+        : "0"((USItype) 0), "r"((USItype) (x))); \
+        } while (0)
+#endif /* __ns32000__ */
+/***************************************
+        **************  PPC  ******************
+        ***************************************/
+#if (defined(_ARCH_PPC) || defined(_IBMR2)) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+do { \
+        if (__builtin_constant_p(bh) && (bh) == 0) \
+                __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{aze|addze} %0,%2" \
+                : "=r" ((USItype)(sh)), \
+                "=&r" ((USItype)(sl)) \
+                : "%r" ((USItype)(ah)), \
+                "%r" ((USItype)(al)), \
+                "rI" ((USItype)(bl))); \
+        else if (__builtin_constant_p(bh) && (bh) == ~(USItype) 0) \
+                __asm__ ("{a%I4|add%I4c} %1,%3,%4\n\t{ame|addme} %0,%2" \
+                : "=r" ((USItype)(sh)), \
+                "=&r" ((USItype)(sl)) \
+                : "%r" ((USItype)(ah)), \
+                "%r" ((USItype)(al)), \
+                "rI" ((USItype)(bl))); \
+        else \
+                __asm__ ("{a%I5|add%I5c} %1,%4,%5\n\t{ae|adde} %0,%2,%3" \
+                : "=r" ((USItype)(sh)), \
+                "=&r" ((USItype)(sl)) \
+                : "%r" ((USItype)(ah)), \
+                "r" ((USItype)(bh)), \
+                "%r" ((USItype)(al)), \
+                "rI" ((USItype)(bl))); \
+} while (0)
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+do { \
+        if (__builtin_constant_p(ah) && (ah) == 0) \
+                __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfze|subfze} %0,%2" \
+                : "=r" ((USItype)(sh)), \
+                "=&r" ((USItype)(sl)) \
+                : "r" ((USItype)(bh)), \
+                "rI" ((USItype)(al)), \
+                "r" ((USItype)(bl))); \
+        else if (__builtin_constant_p(ah) && (ah) == ~(USItype) 0) \
+                __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{sfme|subfme} %0,%2" \
+                : "=r" ((USItype)(sh)), \
+                "=&r" ((USItype)(sl)) \
+                : "r" ((USItype)(bh)), \
+                "rI" ((USItype)(al)), \
+                "r" ((USItype)(bl))); \
+        else if (__builtin_constant_p(bh) && (bh) == 0) \
+                __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{ame|addme} %0,%2" \
+                : "=r" ((USItype)(sh)), \
+                "=&r" ((USItype)(sl)) \
+                : "r" ((USItype)(ah)), \
+                "rI" ((USItype)(al)), \
+                "r" ((USItype)(bl))); \
+        else if (__builtin_constant_p(bh) && (bh) == ~(USItype) 0) \
+                __asm__ ("{sf%I3|subf%I3c} %1,%4,%3\n\t{aze|addze} %0,%2" \
+                : "=r" ((USItype)(sh)), \
+                "=&r" ((USItype)(sl)) \
+                : "r" ((USItype)(ah)), \
+                "rI" ((USItype)(al)), \
+                "r" ((USItype)(bl))); \
+        else \
+                __asm__ ("{sf%I4|subf%I4c} %1,%5,%4\n\t{sfe|subfe} %0,%3,%2" \
+                : "=r" ((USItype)(sh)), \
+                "=&r" ((USItype)(sl)) \
+                : "r" ((USItype)(ah)), \
+                "r" ((USItype)(bh)), \
+                "rI" ((USItype)(al)), \
+                "r" ((USItype)(bl))); \
+} while (0)
+#define count_leading_zeros(count, x) \
+        __asm__ ("{cntlz|cntlzw} %0,%1" \
+        : "=r" ((USItype)(count)) \
+        : "r" ((USItype)(x)))
+#define COUNT_LEADING_ZEROS_0 32
+#if defined(_ARCH_PPC)
+#define umul_ppmm(ph, pl, m0, m1) \
+do { \
+        USItype __m0 = (m0), __m1 = (m1); \
+        __asm__ ("mulhwu %0,%1,%2" \
+        : "=r" ((USItype) ph) \
+        : "%r" (__m0), \
+        "r" (__m1)); \
+        (pl) = __m0 * __m1; \
+} while (0)
+#define UMUL_TIME 15
+#define smul_ppmm(ph, pl, m0, m1) \
+do { \
+        SItype __m0 = (m0), __m1 = (m1); \
+        __asm__ ("mulhw %0,%1,%2" \
+        : "=r" ((SItype) ph) \
+        : "%r" (__m0), \
+        "r" (__m1)); \
+        (pl) = __m0 * __m1; \
+} while (0)
+#define SMUL_TIME 14
+#define UDIV_TIME 120
+#else
+#define umul_ppmm(xh, xl, m0, m1) \
+do { \
+        USItype __m0 = (m0), __m1 = (m1); \
+        __asm__ ("mul %0,%2,%3" \
+        : "=r" ((USItype)(xh)), \
+        "=q" ((USItype)(xl)) \
+        : "r" (__m0), \
+        "r" (__m1)); \
+        (xh) += ((((SItype) __m0 >> 31) & __m1) \
+        + (((SItype) __m1 >> 31) & __m0)); \
+} while (0)
+#define UMUL_TIME 8
+#define smul_ppmm(xh, xl, m0, m1) \
+        __asm__ ("mul %0,%2,%3" \
+        : "=r" ((SItype)(xh)), \
+        "=q" ((SItype)(xl)) \
+        : "r" (m0), \
+        "r" (m1))
+#define SMUL_TIME 4
+#define sdiv_qrnnd(q, r, nh, nl, d) \
+        __asm__ ("div %0,%2,%4" \
+        : "=r" ((SItype)(q)), "=q" ((SItype)(r)) \
+        : "r" ((SItype)(nh)), "1" ((SItype)(nl)), "r" ((SItype)(d)))
+#define UDIV_TIME 100
+#endif
+#endif /* Power architecture variants.  */
+/***************************************
+        **************  PYR  ******************
+        ***************************************/
+#if defined(__pyr__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+        __asm__ ("addw        %5,%1\n" \
+        "addwc  %3,%0" \
+        : "=r" ((USItype)(sh)), \
+        "=&r" ((USItype)(sl)) \
+        : "%0" ((USItype)(ah)), \
+        "g" ((USItype)(bh)), \
+        "%1" ((USItype)(al)), \
+        "g" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+        __asm__ ("subw        %5,%1\n" \
+        "subwb  %3,%0" \
+        : "=r" ((USItype)(sh)), \
+        "=&r" ((USItype)(sl)) \
+        : "0" ((USItype)(ah)), \
+        "g" ((USItype)(bh)), \
+        "1" ((USItype)(al)), \
+        "g" ((USItype)(bl)))
+        /* This insn works on Pyramids with AP, XP, or MI CPUs, but not with SP.  */
+#define umul_ppmm(w1, w0, u, v) \
+        ({union {UDItype __ll; \
+        struct {USItype __h, __l; } __i; \
+        } __xx; \
+        __asm__ ("movw %1,%R0\n" \
+        "uemul %2,%0" \
+        : "=&r" (__xx.__ll) \
+        : "g" ((USItype) (u)), \
+        "g" ((USItype)(v))); \
+        (w1) = __xx.__i.__h; (w0) = __xx.__i.__l; })
+#endif /* __pyr__ */
+/***************************************
+        **************  RT/ROMP  **************
+        ***************************************/
+#if defined(__ibm032__) /* RT/ROMP */   && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+        __asm__ ("a %1,%5\n" \
+        "ae %0,%3" \
+        : "=r" ((USItype)(sh)), \
+        "=&r" ((USItype)(sl)) \
+        : "%0" ((USItype)(ah)), \
+        "r" ((USItype)(bh)), \
+        "%1" ((USItype)(al)), \
+        "r" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+        __asm__ ("s %1,%5\n" \
+        "se %0,%3" \
+        : "=r" ((USItype)(sh)), \
+        "=&r" ((USItype)(sl)) \
+        : "0" ((USItype)(ah)), \
+        "r" ((USItype)(bh)), \
+        "1" ((USItype)(al)), \
+        "r" ((USItype)(bl)))
+#define umul_ppmm(ph, pl, m0, m1) \
+do { \
+        USItype __m0 = (m0), __m1 = (m1); \
+        __asm__ ( \
+        "s       r2,r2\n" \
+        "mts    r10,%2\n" \
+        "m      r2,%3\n" \
+        "m      r2,%3\n" \
+        "m      r2,%3\n" \
+        "m      r2,%3\n" \
+        "m      r2,%3\n" \
+        "m      r2,%3\n" \
+        "m      r2,%3\n" \
+        "m      r2,%3\n" \
+        "m      r2,%3\n" \
+        "m      r2,%3\n" \
+        "m      r2,%3\n" \
+        "m      r2,%3\n" \
+        "m      r2,%3\n" \
+        "m      r2,%3\n" \
+        "m      r2,%3\n" \
+        "m      r2,%3\n" \
+        "cas    %0,r2,r0\n" \
+        "mfs    r10,%1" \
+        : "=r" ((USItype)(ph)), \
+        "=r" ((USItype)(pl)) \
+        : "%r" (__m0), \
+        "r" (__m1) \
+        : "r2"); \
+        (ph) += ((((SItype) __m0 >> 31) & __m1) \
+        + (((SItype) __m1 >> 31) & __m0)); \
+} while (0)
+#define UMUL_TIME 20
+#define UDIV_TIME 200
+#define count_leading_zeros(count, x) \
+do { \
+        if ((x) >= 0x10000) \
+                __asm__ ("clz     %0,%1" \
+                : "=r" ((USItype)(count)) \
+                : "r" ((USItype)(x) >> 16)); \
+        else { \
+                __asm__ ("clz   %0,%1" \
+                : "=r" ((USItype)(count)) \
+                : "r" ((USItype)(x))); \
+                (count) += 16; \
+        } \
+} while (0)
+#endif /* RT/ROMP */
+/***************************************
+        **************  SH2  ******************
+        ***************************************/
+#if (defined(__sh2__) || defined(__sh3__) || defined(__SH4__)) \
+        && W_TYPE_SIZE == 32
+#define umul_ppmm(w1, w0, u, v) \
+        __asm__ ( \
+        "dmulu.l %2,%3\n" \
+        "sts    macl,%1\n" \
+        "sts    mach,%0" \
+        : "=r" ((USItype)(w1)), \
+        "=r" ((USItype)(w0)) \
+        : "r" ((USItype)(u)), \
+        "r" ((USItype)(v)) \
+        : "macl", "mach")
+#define UMUL_TIME 5
+#endif
+/***************************************
+        **************  SPARC   ****************
+        ***************************************/
+#if defined(__sparc__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+        __asm__ ("addcc %r4,%5,%1\n" \
+        "addx %r2,%3,%0" \
+        : "=r" ((USItype)(sh)), \
+        "=&r" ((USItype)(sl)) \
+        : "%rJ" ((USItype)(ah)), \
+        "rI" ((USItype)(bh)), \
+        "%rJ" ((USItype)(al)), \
+        "rI" ((USItype)(bl)) \
+        __CLOBBER_CC)
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+        __asm__ ("subcc %r4,%5,%1\n" \
+        "subx %r2,%3,%0" \
+        : "=r" ((USItype)(sh)), \
+        "=&r" ((USItype)(sl)) \
+        : "rJ" ((USItype)(ah)), \
+        "rI" ((USItype)(bh)), \
+        "rJ" ((USItype)(al)), \
+        "rI" ((USItype)(bl)) \
+        __CLOBBER_CC)
+#if defined(__sparc_v8__)
+/* Don't match immediate range because, 1) it is not often useful,
+        2) the 'I' flag thinks of the range as a 13 bit signed interval,
+        while we want to match a 13 bit interval, sign extended to 32 bits,
+        but INTERPRETED AS UNSIGNED.  */
+#define umul_ppmm(w1, w0, u, v) \
+        __asm__ ("umul %2,%3,%1;rd %%y,%0" \
+        : "=r" ((USItype)(w1)), \
+        "=r" ((USItype)(w0)) \
+        : "r" ((USItype)(u)), \
+        "r" ((USItype)(v)))
+#define UMUL_TIME 5
+#ifndef SUPERSPARC              /* SuperSPARC's udiv only handles 53 bit dividends */
+#define udiv_qrnnd(q, r, n1, n0, d) \
+do { \
+        USItype __q; \
+        __asm__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0" \
+        : "=r" ((USItype)(__q)) \
+        : "r" ((USItype)(n1)), \
+        "r" ((USItype)(n0)), \
+        "r" ((USItype)(d))); \
+        (r) = (n0) - __q * (d); \
+        (q) = __q; \
+} while (0)
+#define UDIV_TIME 25
+#endif /* SUPERSPARC */
+#else /* ! __sparc_v8__ */
+#if defined(__sparclite__)
+/* This has hardware multiply but not divide.  It also has two additional
+        instructions scan (ffs from high bit) and divscc.  */
+#define umul_ppmm(w1, w0, u, v) \
+        __asm__ ("umul %2,%3,%1;rd %%y,%0" \
+        : "=r" ((USItype)(w1)), \
+        "=r" ((USItype)(w0)) \
+        : "r" ((USItype)(u)), \
+        "r" ((USItype)(v)))
+#define UMUL_TIME 5
+#define udiv_qrnnd(q, r, n1, n0, d) \
+        __asm__ ("! Inlined udiv_qrnnd\n" \
+        "wr     %%g0,%2,%%y     ! Not a delayed write for sparclite\n" \
+        "tst    %%g0\n" \
+        "divscc %3,%4,%%g1\n" \
+        "divscc %%g1,%4,%%g1\n" \
+        "divscc %%g1,%4,%%g1\n" \
+        "divscc %%g1,%4,%%g1\n" \
+        "divscc %%g1,%4,%%g1\n" \
+        "divscc %%g1,%4,%%g1\n" \
+        "divscc %%g1,%4,%%g1\n" \
+        "divscc %%g1,%4,%%g1\n" \
+        "divscc %%g1,%4,%%g1\n" \
+        "divscc %%g1,%4,%%g1\n" \
+        "divscc %%g1,%4,%%g1\n" \
+        "divscc %%g1,%4,%%g1\n" \
+        "divscc %%g1,%4,%%g1\n" \
+        "divscc %%g1,%4,%%g1\n" \
+        "divscc %%g1,%4,%%g1\n" \
+        "divscc %%g1,%4,%%g1\n" \
+        "divscc %%g1,%4,%%g1\n" \
+        "divscc %%g1,%4,%%g1\n" \
+        "divscc %%g1,%4,%%g1\n" \
+        "divscc %%g1,%4,%%g1\n" \
+        "divscc %%g1,%4,%%g1\n" \
+        "divscc %%g1,%4,%%g1\n" \
+        "divscc %%g1,%4,%%g1\n" \
+        "divscc %%g1,%4,%%g1\n" \
+        "divscc %%g1,%4,%%g1\n" \
+        "divscc %%g1,%4,%%g1\n" \
+        "divscc %%g1,%4,%%g1\n" \
+        "divscc %%g1,%4,%%g1\n" \
+        "divscc %%g1,%4,%%g1\n" \
+        "divscc %%g1,%4,%%g1\n" \
+        "divscc %%g1,%4,%%g1\n" \
+        "divscc %%g1,%4,%0\n" \
+        "rd     %%y,%1\n" \
+        "bl,a 1f\n" \
+        "add    %1,%4,%1\n" \
+        "1:     ! End of inline udiv_qrnnd" \
+        : "=r" ((USItype)(q)), \
+        "=r" ((USItype)(r)) \
+        : "r" ((USItype)(n1)), \
+        "r" ((USItype)(n0)), \
+        "rI" ((USItype)(d)) \
+        : "%g1" __AND_CLOBBER_CC)
+#define UDIV_TIME 37
+#define count_leading_zeros(count, x) \
+        __asm__ ("scan %1,0,%0" \
+        : "=r" ((USItype)(x)) \
+        : "r" ((USItype)(count)))
+/* Early sparclites return 63 for an argument of 0, but they warn that future
+        implementations might change this.  Therefore, leave COUNT_LEADING_ZEROS_0
+        undefined.  */
+#endif /* __sparclite__ */
+#endif /* __sparc_v8__ */
+        /* Default to sparc v7 versions of umul_ppmm and udiv_qrnnd.  */
+#ifndef umul_ppmm
+#define umul_ppmm(w1, w0, u, v) \
+        __asm__ ("! Inlined umul_ppmm\n" \
+        "wr     %%g0,%2,%%y     ! SPARC has 0-3 delay insn after a wr\n" \
+        "sra    %3,31,%%g2      ! Don't move this insn\n" \
+        "and    %2,%%g2,%%g2    ! Don't move this insn\n" \
+        "andcc  %%g0,0,%%g1     ! Don't move this insn\n" \
+        "mulscc %%g1,%3,%%g1\n" \
+        "mulscc %%g1,%3,%%g1\n" \
+        "mulscc %%g1,%3,%%g1\n" \
+        "mulscc %%g1,%3,%%g1\n" \
+        "mulscc %%g1,%3,%%g1\n" \
+        "mulscc %%g1,%3,%%g1\n" \
+        "mulscc %%g1,%3,%%g1\n" \
+        "mulscc %%g1,%3,%%g1\n" \
+        "mulscc %%g1,%3,%%g1\n" \
+        "mulscc %%g1,%3,%%g1\n" \
+        "mulscc %%g1,%3,%%g1\n" \
+        "mulscc %%g1,%3,%%g1\n" \
+        "mulscc %%g1,%3,%%g1\n" \
+        "mulscc %%g1,%3,%%g1\n" \
+        "mulscc %%g1,%3,%%g1\n" \
+        "mulscc %%g1,%3,%%g1\n" \
+        "mulscc %%g1,%3,%%g1\n" \
+        "mulscc %%g1,%3,%%g1\n" \
+        "mulscc %%g1,%3,%%g1\n" \
+        "mulscc %%g1,%3,%%g1\n" \
+        "mulscc %%g1,%3,%%g1\n" \
+        "mulscc %%g1,%3,%%g1\n" \
+        "mulscc %%g1,%3,%%g1\n" \
+        "mulscc %%g1,%3,%%g1\n" \
+        "mulscc %%g1,%3,%%g1\n" \
+        "mulscc %%g1,%3,%%g1\n" \
+        "mulscc %%g1,%3,%%g1\n" \
+        "mulscc %%g1,%3,%%g1\n" \
+        "mulscc %%g1,%3,%%g1\n" \
+        "mulscc %%g1,%3,%%g1\n" \
+        "mulscc %%g1,%3,%%g1\n" \
+        "mulscc %%g1,%3,%%g1\n" \
+        "mulscc %%g1,0,%%g1\n" \
+        "add    %%g1,%%g2,%0\n" \
+        "rd     %%y,%1" \
+        : "=r" ((USItype)(w1)), \
+        "=r" ((USItype)(w0)) \
+        : "%rI" ((USItype)(u)), \
+        "r" ((USItype)(v)) \
+        : "%g1", "%g2" __AND_CLOBBER_CC)
+#define UMUL_TIME 39            /* 39 instructions */
+#endif
+#ifndef udiv_qrnnd
+#ifndef LONGLONG_STANDALONE
+#define udiv_qrnnd(q, r, n1, n0, d) \
+do { USItype __r; \
+        (q) = __udiv_qrnnd(&__r, (n1), (n0), (d)); \
+        (r) = __r; \
+} while (0)
+        extern USItype __udiv_qrnnd();
+#define UDIV_TIME 140
+#endif /* LONGLONG_STANDALONE */
+#endif /* udiv_qrnnd */
+#endif /* __sparc__ */
+/***************************************
+        **************  VAX  ******************
+        ***************************************/
+#if defined(__vax__) && W_TYPE_SIZE == 32
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+        __asm__ ("addl2 %5,%1\n" \
+        "adwc %3,%0" \
+        : "=g" ((USItype)(sh)), \
+        "=&g" ((USItype)(sl)) \
+        : "%0" ((USItype)(ah)), \
+        "g" ((USItype)(bh)), \
+        "%1" ((USItype)(al)), \
+        "g" ((USItype)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+        __asm__ ("subl2 %5,%1\n" \
+        "sbwc %3,%0" \
+        : "=g" ((USItype)(sh)), \
+        "=&g" ((USItype)(sl)) \
+        : "0" ((USItype)(ah)), \
+        "g" ((USItype)(bh)), \
+        "1" ((USItype)(al)), \
+        "g" ((USItype)(bl)))
+#define umul_ppmm(xh, xl, m0, m1) \
+do { \
+        union {UDItype __ll; \
+        struct {USItype __l, __h; } __i; \
+        } __xx; \
+        USItype __m0 = (m0), __m1 = (m1); \
+        __asm__ ("emul %1,%2,$0,%0" \
+        : "=g" (__xx.__ll) \
+        : "g" (__m0), \
+        "g" (__m1)); \
+        (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \
+        (xh) += ((((SItype) __m0 >> 31) & __m1) \
+        + (((SItype) __m1 >> 31) & __m0)); \
+} while (0)
+#define sdiv_qrnnd(q, r, n1, n0, d) \
+do { \
+        union {DItype __ll; \
+        struct {SItype __l, __h; } __i; \
+        } __xx; \
+        __xx.__i.__h = n1; __xx.__i.__l = n0; \
+        __asm__ ("ediv %3,%2,%0,%1" \
+        : "=g" (q), "=g" (r) \
+        : "g" (__xx.__ll), "g" (d)); \
+} while (0)
+#endif /* __vax__ */
+/***************************************
+        **************  Z8000   ****************
+        ***************************************/
+#if defined(__z8000__) && W_TYPE_SIZE == 16
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+        __asm__ ("add %H1,%H5\n\tadc  %H0,%H3" \
+        : "=r" ((unsigned int)(sh)), \
+        "=&r" ((unsigned int)(sl)) \
+        : "%0" ((unsigned int)(ah)), \
+        "r" ((unsigned int)(bh)), \
+        "%1" ((unsigned int)(al)), \
+        "rQR" ((unsigned int)(bl)))
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+        __asm__ ("sub %H1,%H5\n\tsbc  %H0,%H3" \
+        : "=r" ((unsigned int)(sh)), \
+        "=&r" ((unsigned int)(sl)) \
+        : "0" ((unsigned int)(ah)), \
+        "r" ((unsigned int)(bh)), \
+        "1" ((unsigned int)(al)), \
+        "rQR" ((unsigned int)(bl)))
+#define umul_ppmm(xh, xl, m0, m1) \
+do { \
+        union {long int __ll; \
+        struct {unsigned int __h, __l; } __i; \
+        } __xx; \
+        unsigned int __m0 = (m0), __m1 = (m1); \
+        __asm__ ("mult      %S0,%H3" \
+        : "=r" (__xx.__i.__h), \
+        "=r" (__xx.__i.__l) \
+        : "%1" (__m0), \
+        "rQR" (__m1)); \
+        (xh) = __xx.__i.__h; (xl) = __xx.__i.__l; \
+        (xh) += ((((signed int) __m0 >> 15) & __m1) \
+        + (((signed int) __m1 >> 15) & __m0)); \
+} while (0)
+#endif /* __z8000__ */
+#endif /* __GNUC__ */
+/***************************************
+        ***********  Generic Versions   ********
+        ***************************************/
+#if !defined(umul_ppmm) && defined(__umulsidi3)
+#define umul_ppmm(ph, pl, m0, m1) \
+{ \
+        UDWtype __ll = __umulsidi3(m0, m1); \
+        ph = (UWtype) (__ll >> W_TYPE_SIZE); \
+        pl = (UWtype) __ll; \
+}
+#endif
+#if !defined(__umulsidi3)
+#define __umulsidi3(u, v) \
+        ({UWtype __hi, __lo; \
+        umul_ppmm(__hi, __lo, u, v); \
+        ((UDWtype) __hi << W_TYPE_SIZE) | __lo; })
+#endif
+        /* If this machine has no inline assembler, use C macros.  */
+#if !defined(add_ssaaaa)
+#define add_ssaaaa(sh, sl, ah, al, bh, bl) \
+do { \
+        UWtype __x; \
+        __x = (al) + (bl); \
+        (sh) = (ah) + (bh) + (__x < (al)); \
+        (sl) = __x; \
+} while (0)
+#endif
+#if !defined(sub_ddmmss)
+#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
+do { \
+        UWtype __x; \
+        __x = (al) - (bl); \
+        (sh) = (ah) - (bh) - (__x > (al)); \
+        (sl) = __x; \
+} while (0)
+#endif
+#if !defined(umul_ppmm)
+#define umul_ppmm(w1, w0, u, v) \
+do { \
+        UWtype __x0, __x1, __x2, __x3; \
+        UHWtype __ul, __vl, __uh, __vh; \
+        UWtype __u = (u), __v = (v); \
+        \
+        __ul = __ll_lowpart(__u); \
+        __uh = __ll_highpart(__u); \
+        __vl = __ll_lowpart(__v); \
+        __vh = __ll_highpart(__v); \
+        \
+        __x0 = (UWtype) __ul * __vl; \
+        __x1 = (UWtype) __ul * __vh; \
+        __x2 = (UWtype) __uh * __vl; \
+        __x3 = (UWtype) __uh * __vh; \
+        \
+        __x1 += __ll_highpart(__x0);/* this can't give carry */ \
+        __x1 += __x2;           /* but this indeed can */ \
+        if (__x1 < __x2)                /* did we get it? */ \
+        __x3 += __ll_B;         /* yes, add it in the proper pos. */ \
+        \
+        (w1) = __x3 + __ll_highpart(__x1); \
+        (w0) = (__ll_lowpart(__x1) << W_TYPE_SIZE/2) + __ll_lowpart(__x0); \
+} while (0)
+#endif
+#if !defined(umul_ppmm)
+#define smul_ppmm(w1, w0, u, v) \
+do { \
+        UWtype __w1; \
+        UWtype __m0 = (u), __m1 = (v); \
+        umul_ppmm(__w1, w0, __m0, __m1); \
+        (w1) = __w1 - (-(__m0 >> (W_TYPE_SIZE - 1)) & __m1) \
+        - (-(__m1 >> (W_TYPE_SIZE - 1)) & __m0); \
+} while (0)
+#endif
+        /* Define this unconditionally, so it can be used for debugging.  */
+#define __udiv_qrnnd_c(q, r, n1, n0, d) \
+do { \
+        UWtype __d1, __d0, __q1, __q0, __r1, __r0, __m; \
+        __d1 = __ll_highpart(d); \
+        __d0 = __ll_lowpart(d); \
+        \
+        __r1 = (n1) % __d1; \
+        __q1 = (n1) / __d1; \
+        __m = (UWtype) __q1 * __d0; \
+        __r1 = __r1 * __ll_B | __ll_highpart(n0); \
+        if (__r1 < __m) { \
+                __q1--, __r1 += (d); \
+                if (__r1 >= (d)) /* i.e. we didn't get carry when adding to __r1 */ \
+                if (__r1 < __m) \
+                        __q1--, __r1 += (d); \
+        } \
+        __r1 -= __m; \
+        \
+        __r0 = __r1 % __d1; \
+        __q0 = __r1 / __d1; \
+        __m = (UWtype) __q0 * __d0; \
+        __r0 = __r0 * __ll_B | __ll_lowpart(n0); \
+        if (__r0 < __m) { \
+                __q0--, __r0 += (d); \
+                if (__r0 >= (d)) \
+                        if (__r0 < __m) \
+                                __q0--, __r0 += (d); \
+        } \
+        __r0 -= __m; \
+        \
+        (q) = (UWtype) __q1 * __ll_B | __q0; \
+        (r) = __r0; \
+} while (0)
+/* If the processor has no udiv_qrnnd but sdiv_qrnnd, go through
+        __udiv_w_sdiv (defined in libgcc or elsewhere).  */
+#if !defined(udiv_qrnnd) && defined(sdiv_qrnnd)
+#define udiv_qrnnd(q, r, nh, nl, d) \
+do { \
+        UWtype __r; \
+        (q) = __MPN(udiv_w_sdiv) (&__r, nh, nl, d); \
+        (r) = __r; \
+} while (0)
+#endif
+        /* If udiv_qrnnd was not defined for this processor, use __udiv_qrnnd_c.  */
+#if !defined(udiv_qrnnd)
+#define UDIV_NEEDS_NORMALIZATION 1
+#define udiv_qrnnd __udiv_qrnnd_c
+#endif
+#undef count_leading_zeros
+#if !defined(count_leading_zeros)
+        extern
+#ifdef __STDC__
+                        const
+#endif
+                        unsigned char __clz_tab[];
+#define count_leading_zeros(count, x) \
+do { \
+        UWtype __xr = (x); \
+        UWtype __a; \
+        \
+        if (W_TYPE_SIZE <= 32) { \
+                __a = __xr < ((UWtype) 1 << 2*__BITS4) \
+                ? (__xr < ((UWtype) 1 << __BITS4) ? 0 : __BITS4) \
+                : (__xr < ((UWtype) 1 << 3*__BITS4) ?  2*__BITS4 : 3*__BITS4); \
+        } \
+        else { \
+                for (__a = W_TYPE_SIZE - 8; __a > 0; __a -= 8) \
+                        if (((__xr >> __a) & 0xff) != 0) \
+                                break; \
+        } \
+        \
+        (count) = W_TYPE_SIZE - (__clz_tab[__xr >> __a] + __a); \
+} while (0)
+        /* This version gives a well-defined value for zero. */
+#define COUNT_LEADING_ZEROS_0 W_TYPE_SIZE
+#endif
+#if !defined(count_trailing_zeros)
+/* Define count_trailing_zeros using count_leading_zeros.  The latter might be
+        defined in asm, but if it is not, the C version above is good enough.  */
+#define count_trailing_zeros(count, x) \
+do { \
+        UWtype __ctz_x = (x); \
+        UWtype __ctz_c; \
+        count_leading_zeros(__ctz_c, __ctz_x & -__ctz_x); \
+        (count) = W_TYPE_SIZE - 1 - __ctz_c; \
+} while (0)
+#endif
+#ifndef UDIV_NEEDS_NORMALIZATION
+#define UDIV_NEEDS_NORMALIZATION 0
+#endif
diff --git a/lib/mpi/mpi-add.c b/lib/mpi/mpi-add.c
new file mode 100644
index 000000000000..f56b9ba295e6
--- /dev/null
+++ b/lib/mpi/mpi-add.c
@@ -0,0 +1,234 @@
+/* mpi-add.c  -  MPI functions
+ *      Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
+ *      Copyright (C) 1994, 1996 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *       Actually it's the same code with only minor changes in the
+ *       way the data is stored; this is to support the abstraction
+ *       of an optional secure memory allocation which may be used
+ *       to avoid revealing of sensitive data due to paging etc.
+ *       The GNU MP Library itself is published under the LGPL;
+ *       however I decided to publish this code under the plain GPL.
+ */
+#include "mpi-internal.h"
+/****************
+ * Add the unsigned integer V to the mpi-integer U and store the
+ * result in W. U and V may be the same.
+ */
+int mpi_add_ui(MPI w, const MPI u, unsigned long v)
+{
+        mpi_ptr_t wp, up;
+        mpi_size_t usize, wsize;
+        int usign, wsign;
+        usize = u->nlimbs;
+        usign = u->sign;
+        wsign = 0;
+        /* If not space for W (and possible carry), increase space.  */
+        wsize = usize + 1;
+        if (w->alloced < wsize)
+                if (mpi_resize(w, wsize) < 0)
+                        return -ENOMEM;
+        /* These must be after realloc (U may be the same as W).  */
+        up = u->d;
+        wp = w->d;
+        if (!usize) {           /* simple */
+                wp[0] = v;
+                wsize = v ? 1 : 0;
+        } else if (!usign) {    /* mpi is not negative */
+                mpi_limb_t cy;
+                cy = mpihelp_add_1(wp, up, usize, v);
+                wp[usize] = cy;
+                wsize = usize + cy;
+        } else {                /* The signs are different.  Need exact comparison to determine
+                                 * which operand to subtract from which.  */
+                if (usize == 1 && up[0] < v) {
+                        wp[0] = v - up[0];
+                        wsize = 1;
+                } else {
+                        mpihelp_sub_1(wp, up, usize, v);
+                        /* Size can decrease with at most one limb. */
+                        wsize = usize - (wp[usize - 1] == 0);
+                        wsign = 1;
+                }
+        }
+        w->nlimbs = wsize;
+        w->sign = wsign;
+        return 0;
+}
+int mpi_add(MPI w, MPI u, MPI v)
+{
+        mpi_ptr_t wp, up, vp;
+        mpi_size_t usize, vsize, wsize;
+        int usign, vsign, wsign;
+        if (u->nlimbs < v->nlimbs) {    /* Swap U and V. */
+                usize = v->nlimbs;
+                usign = v->sign;
+                vsize = u->nlimbs;
+                vsign = u->sign;
+                wsize = usize + 1;
+                if (RESIZE_IF_NEEDED(w, wsize) < 0)
+                        return -ENOMEM;
+                /* These must be after realloc (u or v may be the same as w).  */
+                up = v->d;
+                vp = u->d;
+        } else {
+                usize = u->nlimbs;
+                usign = u->sign;
+                vsize = v->nlimbs;
+                vsign = v->sign;
+                wsize = usize + 1;
+                if (RESIZE_IF_NEEDED(w, wsize) < 0)
+                        return -ENOMEM;
+                /* These must be after realloc (u or v may be the same as w).  */
+                up = u->d;
+                vp = v->d;
+        }
+        wp = w->d;
+        wsign = 0;
+        if (!vsize) {           /* simple */
+                MPN_COPY(wp, up, usize);
+                wsize = usize;
+                wsign = usign;
+        } else if (usign != vsign) {    /* different sign */
+                /* This test is right since USIZE >= VSIZE */
+                if (usize != vsize) {
+                        mpihelp_sub(wp, up, usize, vp, vsize);
+                        wsize = usize;
+                        MPN_NORMALIZE(wp, wsize);
+                        wsign = usign;
+                } else if (mpihelp_cmp(up, vp, usize) < 0) {
+                        mpihelp_sub_n(wp, vp, up, usize);
+                        wsize = usize;
+                        MPN_NORMALIZE(wp, wsize);
+                        if (!usign)
+                                wsign = 1;
+                } else {
+                        mpihelp_sub_n(wp, up, vp, usize);
+                        wsize = usize;
+                        MPN_NORMALIZE(wp, wsize);
+                        if (usign)
+                                wsign = 1;
+                }
+        } else {                /* U and V have same sign. Add them. */
+                mpi_limb_t cy = mpihelp_add(wp, up, usize, vp, vsize);
+                wp[usize] = cy;
+                wsize = usize + cy;
+                if (usign)
+                        wsign = 1;
+        }
+        w->nlimbs = wsize;
+        w->sign = wsign;
+        return 0;
+}
+/****************
+ * Subtract the unsigned integer V from the mpi-integer U and store the
+ * result in W.
+ */
+int mpi_sub_ui(MPI w, MPI u, unsigned long v)
+{
+        mpi_ptr_t wp, up;
+        mpi_size_t usize, wsize;
+        int usign, wsign;
+        usize = u->nlimbs;
+        usign = u->sign;
+        wsign = 0;
+        /* If not space for W (and possible carry), increase space.  */
+        wsize = usize + 1;
+        if (w->alloced < wsize)
+                if (mpi_resize(w, wsize) < 0)
+                        return -ENOMEM;
+        /* These must be after realloc (U may be the same as W).  */
+        up = u->d;
+        wp = w->d;
+        if (!usize) {           /* simple */
+                wp[0] = v;
+                wsize = v ? 1 : 0;
+                wsign = 1;
+        } else if (usign) {     /* mpi and v are negative */
+                mpi_limb_t cy;
+                cy = mpihelp_add_1(wp, up, usize, v);
+                wp[usize] = cy;
+                wsize = usize + cy;
+        } else {                /* The signs are different.  Need exact comparison to determine
+                                 * which operand to subtract from which.  */
+                if (usize == 1 && up[0] < v) {
+                        wp[0] = v - up[0];
+                        wsize = 1;
+                        wsign = 1;
+                } else {
+                        mpihelp_sub_1(wp, up, usize, v);
+                        /* Size can decrease with at most one limb. */
+                        wsize = usize - (wp[usize - 1] == 0);
+                }
+        }
+        w->nlimbs = wsize;
+        w->sign = wsign;
+        return 0;
+}
+int mpi_sub(MPI w, MPI u, MPI v)
+{
+        int rc;
+        if (w == v) {
+                MPI vv;
+                if (mpi_copy(&vv, v) < 0)
+                        return -ENOMEM;
+                vv->sign = !vv->sign;
+                rc = mpi_add(w, u, vv);
+                mpi_free(vv);
+        } else {
+                /* fixme: this is not thread-save (we temp. modify v) */
+                v->sign = !v->sign;
+                rc = mpi_add(w, u, v);
+                v->sign = !v->sign;
+        }
+        return rc;
+}
+int mpi_addm(MPI w, MPI u, MPI v, MPI m)
+{
+        if (mpi_add(w, u, v) < 0 || mpi_fdiv_r(w, w, m) < 0)
+                return -ENOMEM;
+        return 0;
+}
+int mpi_subm(MPI w, MPI u, MPI v, MPI m)
+{
+        if (mpi_sub(w, u, v) < 0 || mpi_fdiv_r(w, w, m) < 0)
+                return -ENOMEM;
+        return 0;
+}
diff --git a/lib/mpi/mpi-bit.c b/lib/mpi/mpi-bit.c
new file mode 100644
index 000000000000..854c9c6da025
--- /dev/null
+++ b/lib/mpi/mpi-bit.c
@@ -0,0 +1,236 @@
+/* mpi-bit.c  -  MPI bit level fucntions
+ * Copyright (C) 1998, 1999 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+#include "mpi-internal.h"
+#include "longlong.h"
+const unsigned char __clz_tab[] = {
+        0, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5,
+            5, 5, 5, 5, 5, 5, 5, 5,
+        6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+            6, 6, 6, 6, 6, 6, 6, 6,
+        7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+            7, 7, 7, 7, 7, 7, 7, 7,
+        7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7,
+            7, 7, 7, 7, 7, 7, 7, 7,
+        8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+            8, 8, 8, 8, 8, 8, 8, 8,
+        8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+            8, 8, 8, 8, 8, 8, 8, 8,
+        8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+            8, 8, 8, 8, 8, 8, 8, 8,
+        8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+            8, 8, 8, 8, 8, 8, 8, 8,
+};
+#define A_LIMB_1 ((mpi_limb_t) 1)
+/****************
+ * Sometimes we have MSL (most significant limbs) which are 0;
+ * this is for some reasons not good, so this function removes them.
+ */
+void mpi_normalize(MPI a)
+{
+        for (; a->nlimbs && !a->d[a->nlimbs - 1]; a->nlimbs--)
+                ;
+}
+/****************
+ * Return the number of bits in A.
+ */
+unsigned mpi_get_nbits(MPI a)
+{
+        unsigned n;
+        mpi_normalize(a);
+        if (a->nlimbs) {
+                mpi_limb_t alimb = a->d[a->nlimbs - 1];
+                if (alimb)
+                        count_leading_zeros(n, alimb);
+                else
+                        n = BITS_PER_MPI_LIMB;
+                n = BITS_PER_MPI_LIMB - n + (a->nlimbs - 1) * BITS_PER_MPI_LIMB;
+        } else
+                n = 0;
+        return n;
+}
+EXPORT_SYMBOL_GPL(mpi_get_nbits);
+/****************
+ * Test whether bit N is set.
+ */
+int mpi_test_bit(MPI a, unsigned n)
+{
+        unsigned limbno, bitno;
+        mpi_limb_t limb;
+        limbno = n / BITS_PER_MPI_LIMB;
+        bitno = n % BITS_PER_MPI_LIMB;
+        if (limbno >= a->nlimbs)
+                return 0;       /* too far left: this is a 0 */
+        limb = a->d[limbno];
+        return (limb & (A_LIMB_1 << bitno)) ? 1 : 0;
+}
+/****************
+ * Set bit N of A.
+ */
+int mpi_set_bit(MPI a, unsigned n)
+{
+        unsigned limbno, bitno;
+        limbno = n / BITS_PER_MPI_LIMB;
+        bitno = n % BITS_PER_MPI_LIMB;
+        if (limbno >= a->nlimbs) {      /* resize */
+                if (a->alloced >= limbno)
+                        if (mpi_resize(a, limbno + 1) < 0)
+                                return -ENOMEM;
+                a->nlimbs = limbno + 1;
+        }
+        a->d[limbno] |= (A_LIMB_1 << bitno);
+        return 0;
+}
+/****************
+ * Set bit N of A. and clear all bits above
+ */
+int mpi_set_highbit(MPI a, unsigned n)
+{
+        unsigned limbno, bitno;
+        limbno = n / BITS_PER_MPI_LIMB;
+        bitno = n % BITS_PER_MPI_LIMB;
+        if (limbno >= a->nlimbs) {      /* resize */
+                if (a->alloced >= limbno)
+                        if (mpi_resize(a, limbno + 1) < 0)
+                                return -ENOMEM;
+                a->nlimbs = limbno + 1;
+        }
+        a->d[limbno] |= (A_LIMB_1 << bitno);
+        for (bitno++; bitno < BITS_PER_MPI_LIMB; bitno++)
+                a->d[limbno] &= ~(A_LIMB_1 << bitno);
+        a->nlimbs = limbno + 1;
+        return 0;
+}
+/****************
+ * clear bit N of A and all bits above
+ */
+void mpi_clear_highbit(MPI a, unsigned n)
+{
+        unsigned limbno, bitno;
+        limbno = n / BITS_PER_MPI_LIMB;
+        bitno = n % BITS_PER_MPI_LIMB;
+        if (limbno >= a->nlimbs)
+                return;         /* not allocated, so need to clear bits :-) */
+        for (; bitno < BITS_PER_MPI_LIMB; bitno++)
+                a->d[limbno] &= ~(A_LIMB_1 << bitno);
+        a->nlimbs = limbno + 1;
+}
+/****************
+ * Clear bit N of A.
+ */
+void mpi_clear_bit(MPI a, unsigned n)
+{
+        unsigned limbno, bitno;
+        limbno = n / BITS_PER_MPI_LIMB;
+        bitno = n % BITS_PER_MPI_LIMB;
+        if (limbno >= a->nlimbs)
+                return;         /* don't need to clear this bit, it's to far to left */
+        a->d[limbno] &= ~(A_LIMB_1 << bitno);
+}
+/****************
+ * Shift A by N bits to the right
+ * FIXME: should use alloc_limb if X and A are same.
+ */
+int mpi_rshift(MPI x, MPI a, unsigned n)
+{
+        mpi_ptr_t xp;
+        mpi_size_t xsize;
+        xsize = a->nlimbs;
+        x->sign = a->sign;
+        if (RESIZE_IF_NEEDED(x, (size_t) xsize) < 0)
+                return -ENOMEM;
+        xp = x->d;
+        if (xsize) {
+                mpihelp_rshift(xp, a->d, xsize, n);
+                MPN_NORMALIZE(xp, xsize);
+        }
+        x->nlimbs = xsize;
+        return 0;
+}
+/****************
+ * Shift A by COUNT limbs to the left
+ * This is used only within the MPI library
+ */
+int mpi_lshift_limbs(MPI a, unsigned int count)
+{
+        mpi_ptr_t ap = a->d;
+        int n = a->nlimbs;
+        int i;
+        if (!count || !n)
+                return 0;
+        if (RESIZE_IF_NEEDED(a, n + count) < 0)
+                return -ENOMEM;
+        for (i = n - 1; i >= 0; i--)
+                ap[i + count] = ap[i];
+        for (i = 0; i < count; i++)
+                ap[i] = 0;
+        a->nlimbs += count;
+        return 0;
+}
+/****************
+ * Shift A by COUNT limbs to the right
+ * This is used only within the MPI library
+ */
+void mpi_rshift_limbs(MPI a, unsigned int count)
+{
+        mpi_ptr_t ap = a->d;
+        mpi_size_t n = a->nlimbs;
+        unsigned int i;
+        if (count >= n) {
+                a->nlimbs = 0;
+                return;
+        }
+        for (i = 0; i < n - count; i++)
+                ap[i] = ap[i + count];
+        ap[i] = 0;
+        a->nlimbs -= count;
+}
diff --git a/lib/mpi/mpi-cmp.c b/lib/mpi/mpi-cmp.c
new file mode 100644
index 000000000000..914bc42a8a80
--- /dev/null
+++ b/lib/mpi/mpi-cmp.c
@@ -0,0 +1,68 @@
+/* mpi-cmp.c  -  MPI functions
+ * Copyright (C) 1998, 1999 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+#include "mpi-internal.h"
+int mpi_cmp_ui(MPI u, unsigned long v)
+{
+        mpi_limb_t limb = v;
+        mpi_normalize(u);
+        if (!u->nlimbs && !limb)
+                return 0;
+        if (u->sign)
+                return -1;
+        if (u->nlimbs > 1)
+                return 1;
+        if (u->d[0] == limb)
+                return 0;
+        else if (u->d[0] > limb)
+                return 1;
+        else
+                return -1;
+}
+int mpi_cmp(MPI u, MPI v)
+{
+        mpi_size_t usize, vsize;
+        int cmp;
+        mpi_normalize(u);
+        mpi_normalize(v);
+        usize = u->nlimbs;
+        vsize = v->nlimbs;
+        if (!u->sign && v->sign)
+                return 1;
+        if (u->sign && !v->sign)
+                return -1;
+        if (usize != vsize && !u->sign && !v->sign)
+                return usize - vsize;
+        if (usize != vsize && u->sign && v->sign)
+                return vsize + usize;
+        if (!usize)
+                return 0;
+        cmp = mpihelp_cmp(u->d, v->d, usize);
+        if (!cmp)
+                return 0;
+        if ((cmp < 0 ? 1 : 0) == (u->sign ? 1 : 0))
+                return 1;
+        return -1;
+}
diff --git a/lib/mpi/mpi-div.c b/lib/mpi/mpi-div.c
new file mode 100644
index 000000000000..c3087d1390ce
--- /dev/null
+++ b/lib/mpi/mpi-div.c
@@ -0,0 +1,333 @@
+/* mpi-div.c  -  MPI functions
+ *      Copyright (C) 1994, 1996 Free Software Foundation, Inc.
+ *      Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *       Actually it's the same code with only minor changes in the
+ *       way the data is stored; this is to support the abstraction
+ *       of an optional secure memory allocation which may be used
+ *       to avoid revealing of sensitive data due to paging etc.
+ *       The GNU MP Library itself is published under the LGPL;
+ *       however I decided to publish this code under the plain GPL.
+ */
+#include <linux/string.h>
+#include "mpi-internal.h"
+#include "longlong.h"
+int mpi_fdiv_r(MPI rem, MPI dividend, MPI divisor)
+{
+        int rc = -ENOMEM;
+        int divisor_sign = divisor->sign;
+        MPI temp_divisor = NULL;
+        /* We need the original value of the divisor after the remainder has been
+         * preliminary calculated.      We have to copy it to temporary space if it's
+         * the same variable as REM.  */
+        if (rem == divisor) {
+                if (mpi_copy(&temp_divisor, divisor) < 0)
+                        goto nomem;
+                divisor = temp_divisor;
+        }
+        if (mpi_tdiv_qr(NULL, rem, dividend, divisor) < 0)
+                goto nomem;
+        if (((divisor_sign ? 1 : 0) ^ (dividend->sign ? 1 : 0)) && rem->nlimbs)
+                if (mpi_add(rem, rem, divisor) < 0)
+                        goto nomem;
+        rc = 0;
+nomem:
+        if (temp_divisor)
+                mpi_free(temp_divisor);
+        return rc;
+}
+/****************
+ * Division rounding the quotient towards -infinity.
+ * The remainder gets the same sign as the denominator.
+ * rem is optional
+ */
+ulong mpi_fdiv_r_ui(MPI rem, MPI dividend, ulong divisor)
+{
+        mpi_limb_t rlimb;
+        rlimb = mpihelp_mod_1(dividend->d, dividend->nlimbs, divisor);
+        if (rlimb && dividend->sign)
+                rlimb = divisor - rlimb;
+        if (rem) {
+                rem->d[0] = rlimb;
+                rem->nlimbs = rlimb ? 1 : 0;
+        }
+        return rlimb;
+}
+int mpi_fdiv_q(MPI quot, MPI dividend, MPI divisor)
+{
+        MPI tmp = mpi_alloc(mpi_get_nlimbs(quot));
+        if (!tmp)
+                return -ENOMEM;
+        mpi_fdiv_qr(quot, tmp, dividend, divisor);
+        mpi_free(tmp);
+        return 0;
+}
+int mpi_fdiv_qr(MPI quot, MPI rem, MPI dividend, MPI divisor)
+{
+        int divisor_sign = divisor->sign;
+        MPI temp_divisor = NULL;
+        if (quot == divisor || rem == divisor) {
+                if (mpi_copy(&temp_divisor, divisor) < 0)
+                        return -ENOMEM;
+                divisor = temp_divisor;
+        }
+        if (mpi_tdiv_qr(quot, rem, dividend, divisor) < 0)
+                goto nomem;
+        if ((divisor_sign ^ dividend->sign) && rem->nlimbs) {
+                if (mpi_sub_ui(quot, quot, 1) < 0)
+                        goto nomem;
+                if (mpi_add(rem, rem, divisor) < 0)
+                        goto nomem;
+        }
+        if (temp_divisor)
+                mpi_free(temp_divisor);
+        return 0;
+nomem:
+        mpi_free(temp_divisor);
+        return -ENOMEM;
+}
+/* If den == quot, den needs temporary storage.
+ * If den == rem, den needs temporary storage.
+ * If num == quot, num needs temporary storage.
+ * If den has temporary storage, it can be normalized while being copied,
+ *   i.e no extra storage should be allocated.
+ */
+int mpi_tdiv_r(MPI rem, MPI num, MPI den)
+{
+        return mpi_tdiv_qr(NULL, rem, num, den);
+}
+int mpi_tdiv_qr(MPI quot, MPI rem, MPI num, MPI den)
+{
+        int rc = -ENOMEM;
+        mpi_ptr_t np, dp;
+        mpi_ptr_t qp, rp;
+        mpi_size_t nsize = num->nlimbs;
+        mpi_size_t dsize = den->nlimbs;
+        mpi_size_t qsize, rsize;
+        mpi_size_t sign_remainder = num->sign;
+        mpi_size_t sign_quotient = num->sign ^ den->sign;
+        unsigned normalization_steps;
+        mpi_limb_t q_limb;
+        mpi_ptr_t marker[5];
+        int markidx = 0;
+        memset(marker, 0, sizeof(marker));
+        /* Ensure space is enough for quotient and remainder.
+         * We need space for an extra limb in the remainder, because it's
+         * up-shifted (normalized) below.  */
+        rsize = nsize + 1;
+        if (mpi_resize(rem, rsize) < 0)
+                goto nomem;
+        qsize = rsize - dsize;  /* qsize cannot be bigger than this.  */
+        if (qsize <= 0) {
+                if (num != rem) {
+                        rem->nlimbs = num->nlimbs;
+                        rem->sign = num->sign;
+                        MPN_COPY(rem->d, num->d, nsize);
+                }
+                if (quot) {
+                        /* This needs to follow the assignment to rem, in case the
+                         * numerator and quotient are the same.  */
+                        quot->nlimbs = 0;
+                        quot->sign = 0;
+                }
+                return 0;
+        }
+        if (quot)
+                if (mpi_resize(quot, qsize) < 0)
+                        goto nomem;
+        /* Read pointers here, when reallocation is finished.  */
+        np = num->d;
+        dp = den->d;
+        rp = rem->d;
+        /* Optimize division by a single-limb divisor.  */
+        if (dsize == 1) {
+                mpi_limb_t rlimb;
+                if (quot) {
+                        qp = quot->d;
+                        rlimb = mpihelp_divmod_1(qp, np, nsize, dp[0]);
+                        qsize -= qp[qsize - 1] == 0;
+                        quot->nlimbs = qsize;
+                        quot->sign = sign_quotient;
+                } else
+                        rlimb = mpihelp_mod_1(np, nsize, dp[0]);
+                rp[0] = rlimb;
+                rsize = rlimb != 0 ? 1 : 0;
+                rem->nlimbs = rsize;
+                rem->sign = sign_remainder;
+                return 0;
+        }
+        if (quot) {
+                qp = quot->d;
+                /* Make sure QP and NP point to different objects.  Otherwise the
+                 * numerator would be gradually overwritten by the quotient limbs.  */
+                if (qp == np) { /* Copy NP object to temporary space.  */
+                        np = marker[markidx++] = mpi_alloc_limb_space(nsize);
+                        MPN_COPY(np, qp, nsize);
+                }
+        } else                  /* Put quotient at top of remainder. */
+                qp = rp + dsize;
+        count_leading_zeros(normalization_steps, dp[dsize - 1]);
+        /* Normalize the denominator, i.e. make its most significant bit set by
+         * shifting it NORMALIZATION_STEPS bits to the left.  Also shift the
+         * numerator the same number of steps (to keep the quotient the same!).
+         */
+        if (normalization_steps) {
+                mpi_ptr_t tp;
+                mpi_limb_t nlimb;
+                /* Shift up the denominator setting the most significant bit of
+                 * the most significant word.  Use temporary storage not to clobber
+                 * the original contents of the denominator.  */
+                tp = marker[markidx++] = mpi_alloc_limb_space(dsize);
+                if (!tp)
+                        goto nomem;
+                mpihelp_lshift(tp, dp, dsize, normalization_steps);
+                dp = tp;
+                /* Shift up the numerator, possibly introducing a new most
+                 * significant word.  Move the shifted numerator in the remainder
+                 * meanwhile.  */
+                nlimb = mpihelp_lshift(rp, np, nsize, normalization_steps);
+                if (nlimb) {
+                        rp[nsize] = nlimb;
+                        rsize = nsize + 1;
+                } else
+                        rsize = nsize;
+        } else {
+                /* The denominator is already normalized, as required.  Copy it to
+                 * temporary space if it overlaps with the quotient or remainder.  */
+                if (dp == rp || (quot && (dp == qp))) {
+                        mpi_ptr_t tp;
+                        tp = marker[markidx++] = mpi_alloc_limb_space(dsize);
+                        if (!tp)
+                                goto nomem;
+                        MPN_COPY(tp, dp, dsize);
+                        dp = tp;
+                }
+                /* Move the numerator to the remainder.  */
+                if (rp != np)
+                        MPN_COPY(rp, np, nsize);
+                rsize = nsize;
+        }
+        q_limb = mpihelp_divrem(qp, 0, rp, rsize, dp, dsize);
+        if (quot) {
+                qsize = rsize - dsize;
+                if (q_limb) {
+                        qp[qsize] = q_limb;
+                        qsize += 1;
+                }
+                quot->nlimbs = qsize;
+                quot->sign = sign_quotient;
+        }
+        rsize = dsize;
+        MPN_NORMALIZE(rp, rsize);
+        if (normalization_steps && rsize) {
+                mpihelp_rshift(rp, rp, rsize, normalization_steps);
+                rsize -= rp[rsize - 1] == 0 ? 1 : 0;
+        }
+        rem->nlimbs = rsize;
+        rem->sign = sign_remainder;
+        rc = 0;
+nomem:
+        while (markidx)
+                mpi_free_limb_space(marker[--markidx]);
+        return rc;
+}
+int mpi_tdiv_q_2exp(MPI w, MPI u, unsigned count)
+{
+        mpi_size_t usize, wsize;
+        mpi_size_t limb_cnt;
+        usize = u->nlimbs;
+        limb_cnt = count / BITS_PER_MPI_LIMB;
+        wsize = usize - limb_cnt;
+        if (limb_cnt >= usize)
+                w->nlimbs = 0;
+        else {
+                mpi_ptr_t wp;
+                mpi_ptr_t up;
+                if (RESIZE_IF_NEEDED(w, wsize) < 0)
+                        return -ENOMEM;
+                wp = w->d;
+                up = u->d;
+                count %= BITS_PER_MPI_LIMB;
+                if (count) {
+                        mpihelp_rshift(wp, up + limb_cnt, wsize, count);
+                        wsize -= !wp[wsize - 1];
+                } else {
+                        MPN_COPY_INCR(wp, up + limb_cnt, wsize);
+                }
+                w->nlimbs = wsize;
+        }
+        return 0;
+}
+/****************
+ * Check whether dividend is divisible by divisor
+ * (note: divisor must fit into a limb)
+ */
+int mpi_divisible_ui(MPI dividend, ulong divisor)
+{
+        return !mpihelp_mod_1(dividend->d, dividend->nlimbs, divisor);
+}
diff --git a/lib/mpi/mpi-gcd.c b/lib/mpi/mpi-gcd.c
new file mode 100644
index 000000000000..13c48aef9c4e
--- /dev/null
+++ b/lib/mpi/mpi-gcd.c
@@ -0,0 +1,59 @@
+/* mpi-gcd.c  -  MPI functions
+ * Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+#include "mpi-internal.h"
+/****************
+ * Find the greatest common divisor G of A and B.
+ * Return: true if this 1, false in all other cases
+ */
+int mpi_gcd(MPI g, const MPI xa, const MPI xb)
+{
+        MPI a = NULL, b = NULL;
+        if (mpi_copy(&a, xa) < 0)
+                goto nomem;
+        if (mpi_copy(&b, xb) < 0)
+                goto nomem;
+        /* TAOCP Vol II, 4.5.2, Algorithm A */
+        a->sign = 0;
+        b->sign = 0;
+        while (mpi_cmp_ui(b, 0)) {
+                if (mpi_fdiv_r(g, a, b) < 0)    /* g used as temorary variable */
+                        goto nomem;
+                if (mpi_set(a, b) < 0)
+                        goto nomem;
+                if (mpi_set(b, g) < 0)
+                        goto nomem;
+        }
+        if (mpi_set(g, a) < 0)
+                goto nomem;
+        mpi_free(a);
+        mpi_free(b);
+        return !mpi_cmp_ui(g, 1);
+nomem:
+        mpi_free(a);
+        mpi_free(b);
+        return -ENOMEM;
+}
diff --git a/lib/mpi/mpi-inline.c b/lib/mpi/mpi-inline.c
new file mode 100644
index 000000000000..654f68aeed8b
--- /dev/null
+++ b/lib/mpi/mpi-inline.c
@@ -0,0 +1,31 @@
+/* mpi-inline.c
+ * Copyright (C) 1999, 2000, 2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+/* put the inline functions as real functions into the lib */
+#define G10_MPI_INLINE_DECL
+#include "mpi-internal.h"
+/* always include the header becuase it is only
+ * included by mpi-internal if __GCC__ is defined but we
+ * need it here in all cases and the above definition of
+ * of the macro allows us to do so
+ */
+#include "mpi-inline.h"
diff --git a/lib/mpi/mpi-inline.h b/lib/mpi/mpi-inline.h
new file mode 100644
index 000000000000..e2b39852b30a
--- /dev/null
+++ b/lib/mpi/mpi-inline.h
@@ -0,0 +1,122 @@
+/* mpi-inline.h  -  Internal to the Multi Precision Integers
+ *      Copyright (C) 1994, 1996, 1998, 1999 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *       Actually it's the same code with only minor changes in the
+ *       way the data is stored; this is to support the abstraction
+ *       of an optional secure memory allocation which may be used
+ *       to avoid revealing of sensitive data due to paging etc.
+ *       The GNU MP Library itself is published under the LGPL;
+ *       however I decided to publish this code under the plain GPL.
+ */
+#ifndef G10_MPI_INLINE_H
+#define G10_MPI_INLINE_H
+#ifndef G10_MPI_INLINE_DECL
+#define G10_MPI_INLINE_DECL  extern inline
+#endif
+G10_MPI_INLINE_DECL mpi_limb_t
+mpihelp_add_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+              mpi_size_t s1_size, mpi_limb_t s2_limb)
+{
+        mpi_limb_t x;
+        x = *s1_ptr++;
+        s2_limb += x;
+        *res_ptr++ = s2_limb;
+        if (s2_limb < x) {      /* sum is less than the left operand: handle carry */
+                while (--s1_size) {
+                        x = *s1_ptr++ + 1;      /* add carry */
+                        *res_ptr++ = x; /* and store */
+                        if (x)  /* not 0 (no overflow): we can stop */
+                                goto leave;
+                }
+                return 1;       /* return carry (size of s1 to small) */
+        }
+leave:
+        if (res_ptr != s1_ptr) {        /* not the same variable */
+                mpi_size_t i;   /* copy the rest */
+                for (i = 0; i < s1_size - 1; i++)
+                        res_ptr[i] = s1_ptr[i];
+        }
+        return 0;               /* no carry */
+}
+G10_MPI_INLINE_DECL mpi_limb_t
+mpihelp_add(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size,
+            mpi_ptr_t s2_ptr, mpi_size_t s2_size)
+{
+        mpi_limb_t cy = 0;
+        if (s2_size)
+                cy = mpihelp_add_n(res_ptr, s1_ptr, s2_ptr, s2_size);
+        if (s1_size - s2_size)
+                cy = mpihelp_add_1(res_ptr + s2_size, s1_ptr + s2_size,
+                                   s1_size - s2_size, cy);
+        return cy;
+}
+G10_MPI_INLINE_DECL mpi_limb_t
+mpihelp_sub_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+              mpi_size_t s1_size, mpi_limb_t s2_limb)
+{
+        mpi_limb_t x;
+        x = *s1_ptr++;
+        s2_limb = x - s2_limb;
+        *res_ptr++ = s2_limb;
+        if (s2_limb > x) {
+                while (--s1_size) {
+                        x = *s1_ptr++;
+                        *res_ptr++ = x - 1;
+                        if (x)
+                                goto leave;
+                }
+                return 1;
+        }
+leave:
+        if (res_ptr != s1_ptr) {
+                mpi_size_t i;
+                for (i = 0; i < s1_size - 1; i++)
+                        res_ptr[i] = s1_ptr[i];
+        }
+        return 0;
+}
+G10_MPI_INLINE_DECL mpi_limb_t
+mpihelp_sub(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size,
+            mpi_ptr_t s2_ptr, mpi_size_t s2_size)
+{
+        mpi_limb_t cy = 0;
+        if (s2_size)
+                cy = mpihelp_sub_n(res_ptr, s1_ptr, s2_ptr, s2_size);
+        if (s1_size - s2_size)
+                cy = mpihelp_sub_1(res_ptr + s2_size, s1_ptr + s2_size,
+                                   s1_size - s2_size, cy);
+        return cy;
+}
+#endif /*G10_MPI_INLINE_H */
diff --git a/lib/mpi/mpi-internal.h b/lib/mpi/mpi-internal.h
new file mode 100644
index 000000000000..77adcf6bc257
--- /dev/null
+++ b/lib/mpi/mpi-internal.h
@@ -0,0 +1,261 @@
+/* mpi-internal.h  -  Internal to the Multi Precision Integers
+ *      Copyright (C) 1994, 1996 Free Software Foundation, Inc.
+ *      Copyright (C) 1998, 2000 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *       Actually it's the same code with only minor changes in the
+ *       way the data is stored; this is to support the abstraction
+ *       of an optional secure memory allocation which may be used
+ *       to avoid revealing of sensitive data due to paging etc.
+ *       The GNU MP Library itself is published under the LGPL;
+ *       however I decided to publish this code under the plain GPL.
+ */
+#ifndef G10_MPI_INTERNAL_H
+#define G10_MPI_INTERNAL_H
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/string.h>
+#include <linux/mpi.h>
+#include <linux/errno.h>
+#define log_debug printk
+#define log_bug printk
+#define assert(x) \
+        do { \
+                if (!x) \
+                        log_bug("failed assertion\n"); \
+        } while (0);
+/* If KARATSUBA_THRESHOLD is not already defined, define it to a
+ * value which is good on most machines.  */
+/* tested 4, 16, 32 and 64, where 16 gave the best performance when
+ * checking a 768 and a 1024 bit ElGamal signature.
+ * (wk 22.12.97) */
+#ifndef KARATSUBA_THRESHOLD
+#define KARATSUBA_THRESHOLD 16
+#endif
+/* The code can't handle KARATSUBA_THRESHOLD smaller than 2.  */
+#if KARATSUBA_THRESHOLD < 2
+#undef KARATSUBA_THRESHOLD
+#define KARATSUBA_THRESHOLD 2
+#endif
+typedef mpi_limb_t *mpi_ptr_t;  /* pointer to a limb */
+typedef int mpi_size_t;         /* (must be a signed type) */
+#define ABS(x) (x >= 0 ? x : -x)
+#define MIN(l, o) ((l) < (o) ? (l) : (o))
+#define MAX(h, i) ((h) > (i) ? (h) : (i))
+static inline int RESIZE_IF_NEEDED(MPI a, unsigned b)
+{
+        if (a->alloced < b)
+                return mpi_resize(a, b);
+        return 0;
+}
+/* Copy N limbs from S to D.  */
+#define MPN_COPY(d, s, n) \
+        do {                                    \
+                mpi_size_t _i;                  \
+                for (_i = 0; _i < (n); _i++)    \
+                        (d)[_i] = (s)[_i];      \
+        } while (0)
+#define MPN_COPY_INCR(d, s, n) \
+        do {                                    \
+                mpi_size_t _i;                  \
+                for (_i = 0; _i < (n); _i++)    \
+                        (d)[_i] = (d)[_i];      \
+        } while (0)
+#define MPN_COPY_DECR(d, s, n) \
+        do {                                    \
+                mpi_size_t _i;                  \
+                for (_i = (n)-1; _i >= 0; _i--) \
+                        (d)[_i] = (s)[_i];      \
+        } while (0)
+/* Zero N limbs at D */
+#define MPN_ZERO(d, n) \
+        do {                                    \
+                int  _i;                        \
+                for (_i = 0; _i < (n); _i++)    \
+                        (d)[_i] = 0;            \
+        } while (0)
+#define MPN_NORMALIZE(d, n)  \
+        do {                                    \
+                while ((n) > 0) {               \
+                        if ((d)[(n)-1])         \
+                                break;          \
+                        (n)--;                  \
+                }                               \
+        } while (0)
+#define MPN_NORMALIZE_NOT_ZERO(d, n) \
+        do {                            \
+                for (;;) {              \
+                        if ((d)[(n)-1]) \
+                                break;  \
+                        (n)--;          \
+                }                       \
+        } while (0)
+#define MPN_MUL_N_RECURSE(prodp, up, vp, size, tspace) \
+        do {                                                    \
+                if ((size) < KARATSUBA_THRESHOLD)               \
+                        mul_n_basecase(prodp, up, vp, size);    \
+                else                                            \
+                        mul_n(prodp, up, vp, size, tspace);     \
+        } while (0);
+/* Divide the two-limb number in (NH,,NL) by D, with DI being the largest
+ * limb not larger than (2**(2*BITS_PER_MP_LIMB))/D - (2**BITS_PER_MP_LIMB).
+ * If this would yield overflow, DI should be the largest possible number
+ * (i.e., only ones).  For correct operation, the most significant bit of D
+ * has to be set.  Put the quotient in Q and the remainder in R.
+ */
+#define UDIV_QRNND_PREINV(q, r, nh, nl, d, di) \
+        do {                                                            \
+                mpi_limb_t _q, _ql, _r;                                 \
+                mpi_limb_t _xh, _xl;                                    \
+                umul_ppmm(_q, _ql, (nh), (di));                         \
+                _q += (nh);     /* DI is 2**BITS_PER_MPI_LIMB too small */ \
+                umul_ppmm(_xh, _xl, _q, (d));                           \
+                sub_ddmmss(_xh, _r, (nh), (nl), _xh, _xl);              \
+                if (_xh) {                                              \
+                        sub_ddmmss(_xh, _r, _xh, _r, 0, (d));           \
+                        _q++;                                           \
+                        if (_xh) {                                      \
+                                sub_ddmmss(_xh, _r, _xh, _r, 0, (d));   \
+                                _q++;                                   \
+                        }                                               \
+                }                                                       \
+                if (_r >= (d)) {                                        \
+                        _r -= (d);                                      \
+                        _q++;                                           \
+                }                                                       \
+                (r) = _r;                                               \
+                (q) = _q;                                               \
+        } while (0)
+/*-- mpiutil.c --*/
+mpi_ptr_t mpi_alloc_limb_space(unsigned nlimbs);
+void mpi_free_limb_space(mpi_ptr_t a);
+void mpi_assign_limb_space(MPI a, mpi_ptr_t ap, unsigned nlimbs);
+/*-- mpi-bit.c --*/
+void mpi_rshift_limbs(MPI a, unsigned int count);
+int mpi_lshift_limbs(MPI a, unsigned int count);
+/*-- mpihelp-add.c --*/
+mpi_limb_t mpihelp_add_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+                         mpi_size_t s1_size, mpi_limb_t s2_limb);
+mpi_limb_t mpihelp_add_n(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+                         mpi_ptr_t s2_ptr, mpi_size_t size);
+mpi_limb_t mpihelp_add(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size,
+                       mpi_ptr_t s2_ptr, mpi_size_t s2_size);
+/*-- mpihelp-sub.c --*/
+mpi_limb_t mpihelp_sub_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+                         mpi_size_t s1_size, mpi_limb_t s2_limb);
+mpi_limb_t mpihelp_sub_n(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+                         mpi_ptr_t s2_ptr, mpi_size_t size);
+mpi_limb_t mpihelp_sub(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr, mpi_size_t s1_size,
+                       mpi_ptr_t s2_ptr, mpi_size_t s2_size);
+/*-- mpihelp-cmp.c --*/
+int mpihelp_cmp(mpi_ptr_t op1_ptr, mpi_ptr_t op2_ptr, mpi_size_t size);
+/*-- mpihelp-mul.c --*/
+struct karatsuba_ctx {
+        struct karatsuba_ctx *next;
+        mpi_ptr_t tspace;
+        mpi_size_t tspace_size;
+        mpi_ptr_t tp;
+        mpi_size_t tp_size;
+};
+void mpihelp_release_karatsuba_ctx(struct karatsuba_ctx *ctx);
+mpi_limb_t mpihelp_addmul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+                            mpi_size_t s1_size, mpi_limb_t s2_limb);
+mpi_limb_t mpihelp_submul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+                            mpi_size_t s1_size, mpi_limb_t s2_limb);
+int mpihelp_mul_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size);
+int mpihelp_mul(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t usize,
+                mpi_ptr_t vp, mpi_size_t vsize, mpi_limb_t *_result);
+void mpih_sqr_n_basecase(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size);
+void mpih_sqr_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size,
+                mpi_ptr_t tspace);
+int mpihelp_mul_karatsuba_case(mpi_ptr_t prodp,
+                               mpi_ptr_t up, mpi_size_t usize,
+                               mpi_ptr_t vp, mpi_size_t vsize,
+                               struct karatsuba_ctx *ctx);
+/*-- mpihelp-mul_1.c (or xxx/cpu/ *.S) --*/
+mpi_limb_t mpihelp_mul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
+                         mpi_size_t s1_size, mpi_limb_t s2_limb);
+/*-- mpihelp-div.c --*/
+mpi_limb_t mpihelp_mod_1(mpi_ptr_t dividend_ptr, mpi_size_t dividend_size,
+                         mpi_limb_t divisor_limb);
+mpi_limb_t mpihelp_divrem(mpi_ptr_t qp, mpi_size_t qextra_limbs,
+                          mpi_ptr_t np, mpi_size_t nsize,
+                          mpi_ptr_t dp, mpi_size_t dsize);
+mpi_limb_t mpihelp_divmod_1(mpi_ptr_t quot_ptr,
+                            mpi_ptr_t dividend_ptr, mpi_size_t dividend_size,
+                            mpi_limb_t divisor_limb);
+/*-- mpihelp-shift.c --*/
+mpi_limb_t mpihelp_lshift(mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize,
+                          unsigned cnt);
+mpi_limb_t mpihelp_rshift(mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize,
+                          unsigned cnt);
+/* Define stuff for longlong.h.  */
+#define W_TYPE_SIZE BITS_PER_MPI_LIMB
+typedef mpi_limb_t UWtype;
+typedef unsigned int UHWtype;
+#if defined(__GNUC__)
+typedef unsigned int UQItype __attribute__ ((mode(QI)));
+typedef int SItype __attribute__ ((mode(SI)));
+typedef unsigned int USItype __attribute__ ((mode(SI)));
+typedef int DItype __attribute__ ((mode(DI)));
+typedef unsigned int UDItype __attribute__ ((mode(DI)));
+#else
+typedef unsigned char UQItype;
+typedef long SItype;
+typedef unsigned long USItype;
+#endif
+#ifdef __GNUC__
+#include "mpi-inline.h"
+#endif
+#endif /*G10_MPI_INTERNAL_H */
diff --git a/lib/mpi/mpi-inv.c b/lib/mpi/mpi-inv.c
new file mode 100644
index 000000000000..0951f9847745
--- /dev/null
+++ b/lib/mpi/mpi-inv.c
@@ -0,0 +1,187 @@
+/* mpi-inv.c  -  MPI functions
+ * Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+#include "mpi-internal.h"
+/****************
+ * Calculate the multiplicative inverse X of A mod N
+ * That is: Find the solution x for
+ *              1 = (a*x) mod n
+ */
+int mpi_invm(MPI x, const MPI a, const MPI n)
+{
+        /* Extended Euclid's algorithm (See TAOPC Vol II, 4.5.2, Alg X)
+         * modified according to Michael Penk's solution for Exercice 35
+         * with further enhancement */
+        MPI u = NULL, v = NULL;
+        MPI u1 = NULL, u2 = NULL, u3 = NULL;
+        MPI v1 = NULL, v2 = NULL, v3 = NULL;
+        MPI t1 = NULL, t2 = NULL, t3 = NULL;
+        unsigned k;
+        int sign;
+        int odd = 0;
+        int rc = -ENOMEM;
+        if (mpi_copy(&u, a) < 0)
+                goto cleanup;
+        if (mpi_copy(&v, n) < 0)
+                goto cleanup;
+        for (k = 0; !mpi_test_bit(u, 0) && !mpi_test_bit(v, 0); k++) {
+                if (mpi_rshift(u, u, 1) < 0)
+                        goto cleanup;
+                if (mpi_rshift(v, v, 1) < 0)
+                        goto cleanup;
+        }
+        odd = mpi_test_bit(v, 0);
+        u1 = mpi_alloc_set_ui(1);
+        if (!u1)
+                goto cleanup;
+        if (!odd) {
+                u2 = mpi_alloc_set_ui(0);
+                if (!u2)
+                        goto cleanup;
+        }
+        if (mpi_copy(&u3, u) < 0)
+                goto cleanup;
+        if (mpi_copy(&v1, v) < 0)
+                goto cleanup;
+        if (!odd) {
+                v2 = mpi_alloc(mpi_get_nlimbs(u));
+                if (!v2)
+                        goto cleanup;
+                if (mpi_sub(v2, u1, u) < 0)
+                        goto cleanup;   /* U is used as const 1 */
+        }
+        if (mpi_copy(&v3, v) < 0)
+                goto cleanup;
+        if (mpi_test_bit(u, 0)) {       /* u is odd */
+                t1 = mpi_alloc_set_ui(0);
+                if (!t1)
+                        goto cleanup;
+                if (!odd) {
+                        t2 = mpi_alloc_set_ui(1);
+                        if (!t2)
+                                goto cleanup;
+                        t2->sign = 1;
+                }
+                if (mpi_copy(&t3, v) < 0)
+                        goto cleanup;
+                t3->sign = !t3->sign;
+                goto Y4;
+        } else {
+                t1 = mpi_alloc_set_ui(1);
+                if (!t1)
+                        goto cleanup;
+                if (!odd) {
+                        t2 = mpi_alloc_set_ui(0);
+                        if (!t2)
+                                goto cleanup;
+                }
+                if (mpi_copy(&t3, u) < 0)
+                        goto cleanup;
+        }
+        do {
+                do {
+                        if (!odd) {
+                                if (mpi_test_bit(t1, 0) || mpi_test_bit(t2, 0)) {       /* one is odd */
+                                        if (mpi_add(t1, t1, v) < 0)
+                                                goto cleanup;
+                                        if (mpi_sub(t2, t2, u) < 0)
+                                                goto cleanup;
+                                }
+                                if (mpi_rshift(t1, t1, 1) < 0)
+                                        goto cleanup;
+                                if (mpi_rshift(t2, t2, 1) < 0)
+                                        goto cleanup;
+                                if (mpi_rshift(t3, t3, 1) < 0)
+                                        goto cleanup;
+                        } else {
+                                if (mpi_test_bit(t1, 0))
+                                        if (mpi_add(t1, t1, v) < 0)
+                                                goto cleanup;
+                                if (mpi_rshift(t1, t1, 1) < 0)
+                                        goto cleanup;
+                                if (mpi_rshift(t3, t3, 1) < 0)
+                                        goto cleanup;
+                        }
+Y4:
+                        ;
+                } while (!mpi_test_bit(t3, 0)); /* while t3 is even */
+                if (!t3->sign) {
+                        if (mpi_set(u1, t1) < 0)
+                                goto cleanup;
+                        if (!odd)
+                                if (mpi_set(u2, t2) < 0)
+                                        goto cleanup;
+                        if (mpi_set(u3, t3) < 0)
+                                goto cleanup;
+                } else {
+                        if (mpi_sub(v1, v, t1) < 0)
+                                goto cleanup;
+                        sign = u->sign;
+                        u->sign = !u->sign;
+                        if (!odd)
+                                if (mpi_sub(v2, u, t2) < 0)
+                                        goto cleanup;
+                        u->sign = sign;
+                        sign = t3->sign;
+                        t3->sign = !t3->sign;
+                        if (mpi_set(v3, t3) < 0)
+                                goto cleanup;
+                        t3->sign = sign;
+                }
+                if (mpi_sub(t1, u1, v1) < 0)
+                        goto cleanup;
+                if (!odd)
+                        if (mpi_sub(t2, u2, v2) < 0)
+                                goto cleanup;
+                if (mpi_sub(t3, u3, v3) < 0)
+                        goto cleanup;
+                if (t1->sign) {
+                        if (mpi_add(t1, t1, v) < 0)
+                                goto cleanup;
+                        if (!odd)
+                                if (mpi_sub(t2, t2, u) < 0)
+                                        goto cleanup;
+                }
+        } while (mpi_cmp_ui(t3, 0));    /* while t3 != 0 */
+        /* mpi_lshift( u3, k ); */
+        rc = mpi_set(x, u1);
+cleanup:
+        mpi_free(u1);
+        mpi_free(v1);
+        mpi_free(t1);
+        if (!odd) {
+                mpi_free(u2);
+                mpi_free(v2);
+                mpi_free(t2);
+        }
+        mpi_free(u3);
+        mpi_free(v3);
+        mpi_free(t3);
+        mpi_free(u);
+        mpi_free(v);
+        return rc;
+}
diff --git a/lib/mpi/mpi-mpow.c b/lib/mpi/mpi-mpow.c
new file mode 100644
index 000000000000..7328d0d6c748
--- /dev/null
+++ b/lib/mpi/mpi-mpow.c
@@ -0,0 +1,134 @@
+/* mpi-mpow.c  -  MPI functions
+ * Copyright (C) 1998, 1999, 2000 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+#include "mpi-internal.h"
+#include "longlong.h"
+static int build_index(const MPI *exparray, int k, int i, int t)
+{
+        int j, bitno;
+        int index = 0;
+        bitno = t - i;
+        for (j = k - 1; j >= 0; j--) {
+                index <<= 1;
+                if (mpi_test_bit(exparray[j], bitno))
+                        index |= 1;
+        }
+        return index;
+}
+/****************
+ * RES = (BASE[0] ^ EXP[0]) *  (BASE[1] ^ EXP[1]) * ... * mod M
+ */
+int mpi_mulpowm(MPI res, MPI *basearray, MPI *exparray, MPI m)
+{
+        int rc = -ENOMEM;
+        int k;                  /* number of elements */
+        int t;                  /* bit size of largest exponent */
+        int i, j, idx;
+        MPI *G = NULL;          /* table with precomputed values of size 2^k */
+        MPI tmp = NULL;
+        for (k = 0; basearray[k]; k++)
+                ;
+        if (!k) {
+                pr_emerg("mpi_mulpowm: assert(k) failed\n");
+                BUG();
+        }
+        for (t = 0, i = 0; (tmp = exparray[i]); i++) {
+                j = mpi_get_nbits(tmp);
+                if (j > t)
+                        t = j;
+        }
+        if (i != k) {
+                pr_emerg("mpi_mulpowm: assert(i==k) failed\n");
+                BUG();
+        }
+        if (!t) {
+                pr_emerg("mpi_mulpowm: assert(t) failed\n");
+                BUG();
+        }
+        if (k >= 10) {
+                pr_emerg("mpi_mulpowm: assert(k<10) failed\n");
+                BUG();
+        }
+        G = kzalloc((1 << k) * sizeof *G, GFP_KERNEL);
+        if (!G)
+                goto err_out;
+        /* and calculate */
+        tmp = mpi_alloc(mpi_get_nlimbs(m) + 1);
+        if (!tmp)
+                goto nomem;
+        if (mpi_set_ui(res, 1) < 0)
+                goto nomem;
+        for (i = 1; i <= t; i++) {
+                if (mpi_mulm(tmp, res, res, m) < 0)
+                        goto nomem;
+                idx = build_index(exparray, k, i, t);
+                if (!(idx >= 0 && idx < (1 << k))) {
+                        pr_emerg("mpi_mulpowm: assert(idx >= 0 && idx < (1<<k)) failed\n");
+                        BUG();
+                }
+                if (!G[idx]) {
+                        if (!idx) {
+                                G[0] = mpi_alloc_set_ui(1);
+                                if (!G[0])
+                                        goto nomem;
+                        } else {
+                                for (j = 0; j < k; j++) {
+                                        if ((idx & (1 << j))) {
+                                                if (!G[idx]) {
+                                                        if (mpi_copy
+                                                            (&G[idx],
+                                                             basearray[j]) < 0)
+                                                                goto nomem;
+                                                } else {
+                                                        if (mpi_mulm
+                                                            (G[idx], G[idx],
+                                                             basearray[j],
+                                                             m) < 0)
+                                                                goto nomem;
+                                                }
+                                        }
+                                }
+                                if (!G[idx]) {
+                                        G[idx] = mpi_alloc(0);
+                                        if (!G[idx])
+                                                goto nomem;
+                                }
+                        }
+                }
+                if (mpi_mulm(res, tmp, G[idx], m) < 0)
+                        goto nomem;
+        }
+        rc = 0;
+nomem:
+        /* cleanup */
+        mpi_free(tmp);
+        for (i = 0; i < (1 << k); i++)
+                mpi_free(G[i]);
+        kfree(G);
+err_out:
+        return rc;
+}
diff --git a/lib/mpi/mpi-mul.c b/lib/mpi/mpi-mul.c
new file mode 100644
index 000000000000..1f3219e27292
--- /dev/null
+++ b/lib/mpi/mpi-mul.c
@@ -0,0 +1,194 @@
+/* mpi-mul.c  -  MPI functions
+ *      Copyright (C) 1994, 1996 Free Software Foundation, Inc.
+ *      Copyright (C) 1998, 2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *       Actually it's the same code with only minor changes in the
+ *       way the data is stored; this is to support the abstraction
+ *       of an optional secure memory allocation which may be used
+ *       to avoid revealing of sensitive data due to paging etc.
+ *       The GNU MP Library itself is published under the LGPL;
+ *       however I decided to publish this code under the plain GPL.
+ */
+#include "mpi-internal.h"
+int mpi_mul_ui(MPI prod, MPI mult, unsigned long small_mult)
+{
+        mpi_size_t size, prod_size;
+        mpi_ptr_t prod_ptr;
+        mpi_limb_t cy;
+        int sign;
+        size = mult->nlimbs;
+        sign = mult->sign;
+        if (!size || !small_mult) {
+                prod->nlimbs = 0;
+                prod->sign = 0;
+                return 0;
+        }
+        prod_size = size + 1;
+        if (prod->alloced < prod_size)
+                if (mpi_resize(prod, prod_size) < 0)
+                        return -ENOMEM;
+        prod_ptr = prod->d;
+        cy = mpihelp_mul_1(prod_ptr, mult->d, size, (mpi_limb_t) small_mult);
+        if (cy)
+                prod_ptr[size++] = cy;
+        prod->nlimbs = size;
+        prod->sign = sign;
+        return 0;
+}
+int mpi_mul_2exp(MPI w, MPI u, unsigned long cnt)
+{
+        mpi_size_t usize, wsize, limb_cnt;
+        mpi_ptr_t wp;
+        mpi_limb_t wlimb;
+        int usign, wsign;
+        usize = u->nlimbs;
+        usign = u->sign;
+        if (!usize) {
+                w->nlimbs = 0;
+                w->sign = 0;
+                return 0;
+        }
+        limb_cnt = cnt / BITS_PER_MPI_LIMB;
+        wsize = usize + limb_cnt + 1;
+        if (w->alloced < wsize)
+                if (mpi_resize(w, wsize) < 0)
+                        return -ENOMEM;
+        wp = w->d;
+        wsize = usize + limb_cnt;
+        wsign = usign;
+        cnt %= BITS_PER_MPI_LIMB;
+        if (cnt) {
+                wlimb = mpihelp_lshift(wp + limb_cnt, u->d, usize, cnt);
+                if (wlimb) {
+                        wp[wsize] = wlimb;
+                        wsize++;
+                }
+        } else {
+                MPN_COPY_DECR(wp + limb_cnt, u->d, usize);
+        }
+        /* Zero all whole limbs at low end.  Do it here and not before calling
+         * mpn_lshift, not to lose for U == W.  */
+        MPN_ZERO(wp, limb_cnt);
+        w->nlimbs = wsize;
+        w->sign = wsign;
+        return 0;
+}
+int mpi_mul(MPI w, MPI u, MPI v)
+{
+        int rc = -ENOMEM;
+        mpi_size_t usize, vsize, wsize;
+        mpi_ptr_t up, vp, wp;
+        mpi_limb_t cy;
+        int usign, vsign, sign_product;
+        int assign_wp = 0;
+        mpi_ptr_t tmp_limb = NULL;
+        if (u->nlimbs < v->nlimbs) {    /* Swap U and V. */
+                usize = v->nlimbs;
+                usign = v->sign;
+                up = v->d;
+                vsize = u->nlimbs;
+                vsign = u->sign;
+                vp = u->d;
+        } else {
+                usize = u->nlimbs;
+                usign = u->sign;
+                up = u->d;
+                vsize = v->nlimbs;
+                vsign = v->sign;
+                vp = v->d;
+        }
+        sign_product = usign ^ vsign;
+        wp = w->d;
+        /* Ensure W has space enough to store the result.  */
+        wsize = usize + vsize;
+        if (w->alloced < (size_t) wsize) {
+                if (wp == up || wp == vp) {
+                        wp = mpi_alloc_limb_space(wsize);
+                        if (!wp)
+                                goto nomem;
+                        assign_wp = 1;
+                } else {
+                        if (mpi_resize(w, wsize) < 0)
+                                goto nomem;
+                        wp = w->d;
+                }
+        } else {                /* Make U and V not overlap with W.      */
+                if (wp == up) {
+                        /* W and U are identical.  Allocate temporary space for U.      */
+                        up = tmp_limb = mpi_alloc_limb_space(usize);
+                        if (!up)
+                                goto nomem;
+                        /* Is V identical too?  Keep it identical with U.  */
+                        if (wp == vp)
+                                vp = up;
+                        /* Copy to the temporary space.  */
+                        MPN_COPY(up, wp, usize);
+                } else if (wp == vp) {
+                        /* W and V are identical.  Allocate temporary space for V.      */
+                        vp = tmp_limb = mpi_alloc_limb_space(vsize);
+                        if (!vp)
+                                goto nomem;
+                        /* Copy to the temporary space.  */
+                        MPN_COPY(vp, wp, vsize);
+                }
+        }
+        if (!vsize)
+                wsize = 0;
+        else {
+                if (mpihelp_mul(wp, up, usize, vp, vsize, &cy) < 0)
+                        goto nomem;
+                wsize -= cy ? 0 : 1;
+        }
+        if (assign_wp)
+                mpi_assign_limb_space(w, wp, wsize);
+        w->nlimbs = wsize;
+        w->sign = sign_product;
+        rc = 0;
+nomem:
+        if (tmp_limb)
+                mpi_free_limb_space(tmp_limb);
+        return rc;
+}
+int mpi_mulm(MPI w, MPI u, MPI v, MPI m)
+{
+        if (mpi_mul(w, u, v) < 0)
+                return -ENOMEM;
+        return mpi_fdiv_r(w, w, m);
+}
diff --git a/lib/mpi/mpi-pow.c b/lib/mpi/mpi-pow.c
new file mode 100644
index 000000000000..b04a3cf80080
--- /dev/null
+++ b/lib/mpi/mpi-pow.c
@@ -0,0 +1,323 @@
+/* mpi-pow.c  -  MPI functions
+ *      Copyright (C) 1994, 1996, 1998, 2000 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *       Actually it's the same code with only minor changes in the
+ *       way the data is stored; this is to support the abstraction
+ *       of an optional secure memory allocation which may be used
+ *       to avoid revealing of sensitive data due to paging etc.
+ *       The GNU MP Library itself is published under the LGPL;
+ *       however I decided to publish this code under the plain GPL.
+ */
+#include <linux/string.h>
+#include "mpi-internal.h"
+#include "longlong.h"
+/****************
+ * RES = BASE ^ EXP mod MOD
+ */
+int mpi_powm(MPI res, MPI base, MPI exp, MPI mod)
+{
+        mpi_ptr_t mp_marker = NULL, bp_marker = NULL, ep_marker = NULL;
+        mpi_ptr_t xp_marker = NULL;
+        mpi_ptr_t tspace = NULL;
+        mpi_ptr_t rp, ep, mp, bp;
+        mpi_size_t esize, msize, bsize, rsize;
+        int esign, msign, bsign, rsign;
+        mpi_size_t size;
+        int mod_shift_cnt;
+        int negative_result;
+        int assign_rp = 0;
+        mpi_size_t tsize = 0;   /* to avoid compiler warning */
+        /* fixme: we should check that the warning is void */
+        int rc = -ENOMEM;
+        esize = exp->nlimbs;
+        msize = mod->nlimbs;
+        size = 2 * msize;
+        esign = exp->sign;
+        msign = mod->sign;
+        rp = res->d;
+        ep = exp->d;
+        if (!msize)
+                msize = 1 / msize;      /* provoke a signal */
+        if (!esize) {
+                /* Exponent is zero, result is 1 mod MOD, i.e., 1 or 0
+                 * depending on if MOD equals 1.  */
+                rp[0] = 1;
+                res->nlimbs = (msize == 1 && mod->d[0] == 1) ? 0 : 1;
+                res->sign = 0;
+                goto leave;
+        }
+        /* Normalize MOD (i.e. make its most significant bit set) as required by
+         * mpn_divrem.  This will make the intermediate values in the calculation
+         * slightly larger, but the correct result is obtained after a final
+         * reduction using the original MOD value.  */
+        mp = mp_marker = mpi_alloc_limb_space(msize);
+        if (!mp)
+                goto enomem;
+        count_leading_zeros(mod_shift_cnt, mod->d[msize - 1]);
+        if (mod_shift_cnt)
+                mpihelp_lshift(mp, mod->d, msize, mod_shift_cnt);
+        else
+                MPN_COPY(mp, mod->d, msize);
+        bsize = base->nlimbs;
+        bsign = base->sign;
+        if (bsize > msize) {    /* The base is larger than the module. Reduce it. */
+                /* Allocate (BSIZE + 1) with space for remainder and quotient.
+                 * (The quotient is (bsize - msize + 1) limbs.)  */
+                bp = bp_marker = mpi_alloc_limb_space(bsize + 1);
+                if (!bp)
+                        goto enomem;
+                MPN_COPY(bp, base->d, bsize);
+                /* We don't care about the quotient, store it above the remainder,
+                 * at BP + MSIZE.  */
+                mpihelp_divrem(bp + msize, 0, bp, bsize, mp, msize);
+                bsize = msize;
+                /* Canonicalize the base, since we are going to multiply with it
+                 * quite a few times.  */
+                MPN_NORMALIZE(bp, bsize);
+        } else
+                bp = base->d;
+        if (!bsize) {
+                res->nlimbs = 0;
+                res->sign = 0;
+                goto leave;
+        }
+        if (res->alloced < size) {
+                /* We have to allocate more space for RES.  If any of the input
+                 * parameters are identical to RES, defer deallocation of the old
+                 * space.  */
+                if (rp == ep || rp == mp || rp == bp) {
+                        rp = mpi_alloc_limb_space(size);
+                        if (!rp)
+                                goto enomem;
+                        assign_rp = 1;
+                } else {
+                        if (mpi_resize(res, size) < 0)
+                                goto enomem;
+                        rp = res->d;
+                }
+        } else {                /* Make BASE, EXP and MOD not overlap with RES.  */
+                if (rp == bp) {
+                        /* RES and BASE are identical.  Allocate temp. space for BASE.  */
+                        BUG_ON(bp_marker);
+                        bp = bp_marker = mpi_alloc_limb_space(bsize);
+                        if (!bp)
+                                goto enomem;
+                        MPN_COPY(bp, rp, bsize);
+                }
+                if (rp == ep) {
+                        /* RES and EXP are identical.  Allocate temp. space for EXP.  */
+                        ep = ep_marker = mpi_alloc_limb_space(esize);
+                        if (!ep)
+                                goto enomem;
+                        MPN_COPY(ep, rp, esize);
+                }
+                if (rp == mp) {
+                        /* RES and MOD are identical.  Allocate temporary space for MOD. */
+                        BUG_ON(mp_marker);
+                        mp = mp_marker = mpi_alloc_limb_space(msize);
+                        if (!mp)
+                                goto enomem;
+                        MPN_COPY(mp, rp, msize);
+                }
+        }
+        MPN_COPY(rp, bp, bsize);
+        rsize = bsize;
+        rsign = bsign;
+        {
+                mpi_size_t i;
+                mpi_ptr_t xp;
+                int c;
+                mpi_limb_t e;
+                mpi_limb_t carry_limb;
+                struct karatsuba_ctx karactx;
+                xp = xp_marker = mpi_alloc_limb_space(2 * (msize + 1));
+                if (!xp)
+                        goto enomem;
+                memset(&karactx, 0, sizeof karactx);
+                negative_result = (ep[0] & 1) && base->sign;
+                i = esize - 1;
+                e = ep[i];
+                count_leading_zeros(c, e);
+                e = (e << c) << 1;      /* shift the exp bits to the left, lose msb */
+                c = BITS_PER_MPI_LIMB - 1 - c;
+                /* Main loop.
+                 *
+                 * Make the result be pointed to alternately by XP and RP.  This
+                 * helps us avoid block copying, which would otherwise be necessary
+                 * with the overlap restrictions of mpihelp_divmod. With 50% probability
+                 * the result after this loop will be in the area originally pointed
+                 * by RP (==RES->d), and with 50% probability in the area originally
+                 * pointed to by XP.
+                 */
+                for (;;) {
+                        while (c) {
+                                mpi_ptr_t tp;
+                                mpi_size_t xsize;
+                                /*if (mpihelp_mul_n(xp, rp, rp, rsize) < 0) goto enomem */
+                                if (rsize < KARATSUBA_THRESHOLD)
+                                        mpih_sqr_n_basecase(xp, rp, rsize);
+                                else {
+                                        if (!tspace) {
+                                                tsize = 2 * rsize;
+                                                tspace =
+                                                    mpi_alloc_limb_space(tsize);
+                                                if (!tspace)
+                                                        goto enomem;
+                                        } else if (tsize < (2 * rsize)) {
+                                                mpi_free_limb_space(tspace);
+                                                tsize = 2 * rsize;
+                                                tspace =
+                                                    mpi_alloc_limb_space(tsize);
+                                                if (!tspace)
+                                                        goto enomem;
+                                        }
+                                        mpih_sqr_n(xp, rp, rsize, tspace);
+                                }
+                                xsize = 2 * rsize;
+                                if (xsize > msize) {
+                                        mpihelp_divrem(xp + msize, 0, xp, xsize,
+                                                       mp, msize);
+                                        xsize = msize;
+                                }
+                                tp = rp;
+                                rp = xp;
+                                xp = tp;
+                                rsize = xsize;
+                                if ((mpi_limb_signed_t) e < 0) {
+                                        /*mpihelp_mul( xp, rp, rsize, bp, bsize ); */
+                                        if (bsize < KARATSUBA_THRESHOLD) {
+                                                mpi_limb_t tmp;
+                                                if (mpihelp_mul
+                                                    (xp, rp, rsize, bp, bsize,
+                                                     &tmp) < 0)
+                                                        goto enomem;
+                                        } else {
+                                                if (mpihelp_mul_karatsuba_case
+                                                    (xp, rp, rsize, bp, bsize,
+                                                     &karactx) < 0)
+                                                        goto enomem;
+                                        }
+                                        xsize = rsize + bsize;
+                                        if (xsize > msize) {
+                                                mpihelp_divrem(xp + msize, 0,
+                                                               xp, xsize, mp,
+                                                               msize);
+                                                xsize = msize;
+                                        }
+                                        tp = rp;
+                                        rp = xp;
+                                        xp = tp;
+                                        rsize = xsize;
+                                }
+                                e <<= 1;
+                                c--;
+                        }
+                        i--;
+                        if (i < 0)
+                                break;
+                        e = ep[i];
+                        c = BITS_PER_MPI_LIMB;
+                }
+                /* We shifted MOD, the modulo reduction argument, left MOD_SHIFT_CNT
+                 * steps.  Adjust the result by reducing it with the original MOD.
+                 *
+                 * Also make sure the result is put in RES->d (where it already
+                 * might be, see above).
+                 */
+                if (mod_shift_cnt) {
+                        carry_limb =
+                            mpihelp_lshift(res->d, rp, rsize, mod_shift_cnt);
+                        rp = res->d;
+                        if (carry_limb) {
+                                rp[rsize] = carry_limb;
+                                rsize++;
+                        }
+                } else {
+                        MPN_COPY(res->d, rp, rsize);
+                        rp = res->d;
+                }
+                if (rsize >= msize) {
+                        mpihelp_divrem(rp + msize, 0, rp, rsize, mp, msize);
+                        rsize = msize;
+                }
+                /* Remove any leading zero words from the result.  */
+                if (mod_shift_cnt)
+                        mpihelp_rshift(rp, rp, rsize, mod_shift_cnt);
+                MPN_NORMALIZE(rp, rsize);
+                mpihelp_release_karatsuba_ctx(&karactx);
+        }
+        if (negative_result && rsize) {
+                if (mod_shift_cnt)
+                        mpihelp_rshift(mp, mp, msize, mod_shift_cnt);
+                mpihelp_sub(rp, mp, msize, rp, rsize);
+                rsize = msize;
+                rsign = msign;
+                MPN_NORMALIZE(rp, rsize);
+        }
+        res->nlimbs = rsize;
+        res->sign = rsign;
+leave:
+        rc = 0;
+enomem:
+        if (assign_rp)
+                mpi_assign_limb_space(res, rp, size);
+        if (mp_marker)
+                mpi_free_limb_space(mp_marker);
+        if (bp_marker)
+                mpi_free_limb_space(bp_marker);
+        if (ep_marker)
+                mpi_free_limb_space(ep_marker);
+        if (xp_marker)
+                mpi_free_limb_space(xp_marker);
+        if (tspace)
+                mpi_free_limb_space(tspace);
+        return rc;
+}
+EXPORT_SYMBOL_GPL(mpi_powm);
diff --git a/lib/mpi/mpi-scan.c b/lib/mpi/mpi-scan.c
new file mode 100644
index 000000000000..b2da5ad96199
--- /dev/null
+++ b/lib/mpi/mpi-scan.c
@@ -0,0 +1,136 @@
+/* mpi-scan.c  -  MPI functions
+ * Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+#include "mpi-internal.h"
+#include "longlong.h"
+/****************
+ * Scan through an mpi and return byte for byte. a -1 is returned to indicate
+ * the end of the mpi. Scanning is done from the lsb to the msb, returned
+ * values are in the range of 0 .. 255.
+ *
+ * FIXME: This code is VERY ugly!
+ */
+int mpi_getbyte(const MPI a, unsigned idx)
+{
+        int i, j;
+        unsigned n;
+        mpi_ptr_t ap;
+        mpi_limb_t limb;
+        ap = a->d;
+        for (n = 0, i = 0; i < a->nlimbs; i++) {
+                limb = ap[i];
+                for (j = 0; j < BYTES_PER_MPI_LIMB; j++, n++)
+                        if (n == idx)
+                                return (limb >> j * 8) & 0xff;
+        }
+        return -1;
+}
+/****************
+ * Put a value at position IDX into A. idx counts from lsb to msb
+ */
+void mpi_putbyte(MPI a, unsigned idx, int xc)
+{
+        int i, j;
+        unsigned n;
+        mpi_ptr_t ap;
+        mpi_limb_t limb, c;
+        c = xc & 0xff;
+        ap = a->d;
+        for (n = 0, i = 0; i < a->alloced; i++) {
+                limb = ap[i];
+                for (j = 0; j < BYTES_PER_MPI_LIMB; j++, n++)
+                        if (n == idx) {
+#if BYTES_PER_MPI_LIMB == 4
+                                if (j == 0)
+                                        limb = (limb & 0xffffff00) | c;
+                                else if (j == 1)
+                                        limb = (limb & 0xffff00ff) | (c << 8);
+                                else if (j == 2)
+                                        limb = (limb & 0xff00ffff) | (c << 16);
+                                else
+                                        limb = (limb & 0x00ffffff) | (c << 24);
+#elif BYTES_PER_MPI_LIMB == 8
+                                if (j == 0)
+                                        limb = (limb & 0xffffffffffffff00) | c;
+                                else if (j == 1)
+                                        limb =
+                                            (limb & 0xffffffffffff00ff) | (c <<
+                                                                           8);
+                                else if (j == 2)
+                                        limb =
+                                            (limb & 0xffffffffff00ffff) | (c <<
+                                                                           16);
+                                else if (j == 3)
+                                        limb =
+                                            (limb & 0xffffffff00ffffff) | (c <<
+                                                                           24);
+                                else if (j == 4)
+                                        limb =
+                                            (limb & 0xffffff00ffffffff) | (c <<
+                                                                           32);
+                                else if (j == 5)
+                                        limb =
+                                            (limb & 0xffff00ffffffffff) | (c <<
+                                                                           40);
+                                else if (j == 6)
+                                        limb =
+                                            (limb & 0xff00ffffffffffff) | (c <<
+                                                                           48);
+                                else
+                                        limb =
+                                            (limb & 0x00ffffffffffffff) | (c <<
+                                                                           56);
+#else
+#error please enhance this function, its ugly - i know.
+#endif
+                                if (a->nlimbs <= i)
+                                        a->nlimbs = i + 1;
+                                ap[i] = limb;
+                                return;
+                        }
+        }
+        log_bug("index out of range\n");
+}
+/****************
+ * Count the number of zerobits at the low end of A
+ */
+unsigned mpi_trailing_zeros(const MPI a)
+{
+        unsigned n, count = 0;
+        for (n = 0; n < a->nlimbs; n++) {
+                if (a->d[n]) {
+                        unsigned nn;
+                        mpi_limb_t alimb = a->d[n];
+                        count_trailing_zeros(nn, alimb);
+                        count += nn;
+                        break;
+                }
+                count += BITS_PER_MPI_LIMB;
+        }
+        return count;
+}
diff --git a/lib/mpi/mpicoder.c b/lib/mpi/mpicoder.c
new file mode 100644
index 000000000000..fe84bb978e3b
--- /dev/null
+++ b/lib/mpi/mpicoder.c
@@ -0,0 +1,365 @@
+/* mpicoder.c  -  Coder for the external representation of MPIs
+ * Copyright (C) 1998, 1999 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+#include "mpi-internal.h"
+#define DIM(v) (sizeof(v)/sizeof((v)[0]))
+#define MAX_EXTERN_MPI_BITS 16384
+static uint8_t asn[15] =        /* Object ID is 1.3.14.3.2.26 */
+{ 0x30, 0x21, 0x30, 0x09, 0x06, 0x05, 0x2b, 0x0e, 0x03,
+        0x02, 0x1a, 0x05, 0x00, 0x04, 0x14
+};
+MPI do_encode_md(const void *sha_buffer, unsigned nbits)
+{
+        int nframe = (nbits + 7) / 8;
+        uint8_t *frame, *fr_pt;
+        int i = 0, n;
+        size_t asnlen = DIM(asn);
+        MPI a = MPI_NULL;
+        if (SHA1_DIGEST_LENGTH + asnlen + 4 > nframe)
+                pr_info("MPI: can't encode a %d bit MD into a %d bits frame\n",
+                       (int)(SHA1_DIGEST_LENGTH * 8), (int)nbits);
+        /* We encode the MD in this way:
+         *
+         *       0  A PAD(n bytes)   0  ASN(asnlen bytes)  MD(len bytes)
+         *
+         * PAD consists of FF bytes.
+         */
+        frame = kmalloc(nframe, GFP_KERNEL);
+        if (!frame)
+                return MPI_NULL;
+        n = 0;
+        frame[n++] = 0;
+        frame[n++] = 1;         /* block type */
+        i = nframe - SHA1_DIGEST_LENGTH - asnlen - 3;
+        if (i <= 1) {
+                pr_info("MPI: message digest encoding failed\n");
+                kfree(frame);
+                return a;
+        }
+        memset(frame + n, 0xff, i);
+        n += i;
+        frame[n++] = 0;
+        memcpy(frame + n, &asn, asnlen);
+        n += asnlen;
+        memcpy(frame + n, sha_buffer, SHA1_DIGEST_LENGTH);
+        n += SHA1_DIGEST_LENGTH;
+        i = nframe;
+        fr_pt = frame;
+        if (n != nframe) {
+                printk
+                    ("MPI: message digest encoding failed, frame length is wrong\n");
+                kfree(frame);
+                return a;
+        }
+        a = mpi_alloc((nframe + BYTES_PER_MPI_LIMB - 1) / BYTES_PER_MPI_LIMB);
+        mpi_set_buffer(a, frame, nframe, 0);
+        kfree(frame);
+        return a;
+}
+MPI mpi_read_from_buffer(const void *xbuffer, unsigned *ret_nread)
+{
+        const uint8_t *buffer = xbuffer;
+        int i, j;
+        unsigned nbits, nbytes, nlimbs, nread = 0;
+        mpi_limb_t a;
+        MPI val = MPI_NULL;
+        if (*ret_nread < 2)
+                goto leave;
+        nbits = buffer[0] << 8 | buffer[1];
+        if (nbits > MAX_EXTERN_MPI_BITS) {
+                pr_info("MPI: mpi too large (%u bits)\n", nbits);
+                goto leave;
+        }
+        buffer += 2;
+        nread = 2;
+        nbytes = (nbits + 7) / 8;
+        nlimbs = (nbytes + BYTES_PER_MPI_LIMB - 1) / BYTES_PER_MPI_LIMB;
+        val = mpi_alloc(nlimbs);
+        if (!val)
+                return MPI_NULL;
+        i = BYTES_PER_MPI_LIMB - nbytes % BYTES_PER_MPI_LIMB;
+        i %= BYTES_PER_MPI_LIMB;
+        val->nbits = nbits;
+        j = val->nlimbs = nlimbs;
+        val->sign = 0;
+        for (; j > 0; j--) {
+                a = 0;
+                for (; i < BYTES_PER_MPI_LIMB; i++) {
+                        if (++nread > *ret_nread) {
+                                printk
+                                    ("MPI: mpi larger than buffer nread=%d ret_nread=%d\n",
+                                     nread, *ret_nread);
+                                goto leave;
+                        }
+                        a <<= 8;
+                        a |= *buffer++;
+                }
+                i = 0;
+                val->d[j - 1] = a;
+        }
+leave:
+        *ret_nread = nread;
+        return val;
+}
+EXPORT_SYMBOL_GPL(mpi_read_from_buffer);
+/****************
+ * Make an mpi from a character string.
+ */
+int mpi_fromstr(MPI val, const char *str)
+{
+        int hexmode = 0, sign = 0, prepend_zero = 0, i, j, c, c1, c2;
+        unsigned nbits, nbytes, nlimbs;
+        mpi_limb_t a;
+        if (*str == '-') {
+                sign = 1;
+                str++;
+        }
+        if (*str == '0' && str[1] == 'x')
+                hexmode = 1;
+        else
+                return -EINVAL; /* other bases are not yet supported */
+        str += 2;
+        nbits = strlen(str) * 4;
+        if (nbits % 8)
+                prepend_zero = 1;
+        nbytes = (nbits + 7) / 8;
+        nlimbs = (nbytes + BYTES_PER_MPI_LIMB - 1) / BYTES_PER_MPI_LIMB;
+        if (val->alloced < nlimbs)
+                if (!mpi_resize(val, nlimbs))
+                        return -ENOMEM;
+        i = BYTES_PER_MPI_LIMB - nbytes % BYTES_PER_MPI_LIMB;
+        i %= BYTES_PER_MPI_LIMB;
+        j = val->nlimbs = nlimbs;
+        val->sign = sign;
+        for (; j > 0; j--) {
+                a = 0;
+                for (; i < BYTES_PER_MPI_LIMB; i++) {
+                        if (prepend_zero) {
+                                c1 = '0';
+                                prepend_zero = 0;
+                        } else
+                                c1 = *str++;
+                        assert(c1);
+                        c2 = *str++;
+                        assert(c2);
+                        if (c1 >= '0' && c1 <= '9')
+                                c = c1 - '0';
+                        else if (c1 >= 'a' && c1 <= 'f')
+                                c = c1 - 'a' + 10;
+                        else if (c1 >= 'A' && c1 <= 'F')
+                                c = c1 - 'A' + 10;
+                        else {
+                                mpi_clear(val);
+                                return 1;
+                        }
+                        c <<= 4;
+                        if (c2 >= '0' && c2 <= '9')
+                                c |= c2 - '0';
+                        else if (c2 >= 'a' && c2 <= 'f')
+                                c |= c2 - 'a' + 10;
+                        else if (c2 >= 'A' && c2 <= 'F')
+                                c |= c2 - 'A' + 10;
+                        else {
+                                mpi_clear(val);
+                                return 1;
+                        }
+                        a <<= 8;
+                        a |= c;
+                }
+                i = 0;
+                val->d[j - 1] = a;
+        }
+        return 0;
+}
+EXPORT_SYMBOL_GPL(mpi_fromstr);
+/****************
+ * Special function to get the low 8 bytes from an mpi.
+ * This can be used as a keyid; KEYID is an 2 element array.
+ * Return the low 4 bytes.
+ */
+u32 mpi_get_keyid(const MPI a, u32 *keyid)
+{
+#if BYTES_PER_MPI_LIMB == 4
+        if (keyid) {
+                keyid[0] = a->nlimbs >= 2 ? a->d[1] : 0;
+                keyid[1] = a->nlimbs >= 1 ? a->d[0] : 0;
+        }
+        return a->nlimbs >= 1 ? a->d[0] : 0;
+#elif BYTES_PER_MPI_LIMB == 8
+        if (keyid) {
+                keyid[0] = a->nlimbs ? (u32) (a->d[0] >> 32) : 0;
+                keyid[1] = a->nlimbs ? (u32) (a->d[0] & 0xffffffff) : 0;
+        }
+        return a->nlimbs ? (u32) (a->d[0] & 0xffffffff) : 0;
+#else
+#error Make this function work with other LIMB sizes
+#endif
+}
+/****************
+ * Return an allocated buffer with the MPI (msb first).
+ * NBYTES receives the length of this buffer. Caller must free the
+ * return string (This function does return a 0 byte buffer with NBYTES
+ * set to zero if the value of A is zero. If sign is not NULL, it will
+ * be set to the sign of the A.
+ */
+void *mpi_get_buffer(MPI a, unsigned *nbytes, int *sign)
+{
+        uint8_t *p, *buffer;
+        mpi_limb_t alimb;
+        int i;
+        unsigned int n;
+        if (sign)
+                *sign = a->sign;
+        *nbytes = n = a->nlimbs * BYTES_PER_MPI_LIMB;
+        if (!n)
+                n++;            /* avoid zero length allocation */
+        p = buffer = kmalloc(n, GFP_KERNEL);
+        for (i = a->nlimbs - 1; i >= 0; i--) {
+                alimb = a->d[i];
+#if BYTES_PER_MPI_LIMB == 4
+                *p++ = alimb >> 24;
+                *p++ = alimb >> 16;
+                *p++ = alimb >> 8;
+                *p++ = alimb;
+#elif BYTES_PER_MPI_LIMB == 8
+                *p++ = alimb >> 56;
+                *p++ = alimb >> 48;
+                *p++ = alimb >> 40;
+                *p++ = alimb >> 32;
+                *p++ = alimb >> 24;
+                *p++ = alimb >> 16;
+                *p++ = alimb >> 8;
+                *p++ = alimb;
+#else
+#error please implement for this limb size.
+#endif
+        }
+        /* this is sub-optimal but we need to do the shift operation
+         * because the caller has to free the returned buffer */
+        for (p = buffer; !*p && *nbytes; p++, --*nbytes)
+                ;
+        if (p != buffer)
+                memmove(buffer, p, *nbytes);
+        return buffer;
+}
+EXPORT_SYMBOL_GPL(mpi_get_buffer);
+/****************
+ * Use BUFFER to update MPI.
+ */
+int mpi_set_buffer(MPI a, const void *xbuffer, unsigned nbytes, int sign)
+{
+        const uint8_t *buffer = xbuffer, *p;
+        mpi_limb_t alimb;
+        int nlimbs;
+        int i;
+        nlimbs = (nbytes + BYTES_PER_MPI_LIMB - 1) / BYTES_PER_MPI_LIMB;
+        if (RESIZE_IF_NEEDED(a, nlimbs) < 0)
+                return -ENOMEM;
+        a->sign = sign;
+        for (i = 0, p = buffer + nbytes - 1; p >= buffer + BYTES_PER_MPI_LIMB;) {
+#if BYTES_PER_MPI_LIMB == 4
+                alimb = (mpi_limb_t) *p--;
+                alimb |= (mpi_limb_t) *p-- << 8;
+                alimb |= (mpi_limb_t) *p-- << 16;
+                alimb |= (mpi_limb_t) *p-- << 24;
+#elif BYTES_PER_MPI_LIMB == 8
+                alimb = (mpi_limb_t) *p--;
+                alimb |= (mpi_limb_t) *p-- << 8;
+                alimb |= (mpi_limb_t) *p-- << 16;
+                alimb |= (mpi_limb_t) *p-- << 24;
+                alimb |= (mpi_limb_t) *p-- << 32;
+                alimb |= (mpi_limb_t) *p-- << 40;
+                alimb |= (mpi_limb_t) *p-- << 48;
+                alimb |= (mpi_limb_t) *p-- << 56;
+#else
+#error please implement for this limb size.
+#endif
+                a->d[i++] = alimb;
+        }
+        if (p >= buffer) {
+#if BYTES_PER_MPI_LIMB == 4
+                alimb = *p--;
+                if (p >= buffer)
+                        alimb |= (mpi_limb_t) *p-- << 8;
+                if (p >= buffer)
+                        alimb |= (mpi_limb_t) *p-- << 16;
+                if (p >= buffer)
+                        alimb |= (mpi_limb_t) *p-- << 24;
+#elif BYTES_PER_MPI_LIMB == 8
+                alimb = (mpi_limb_t) *p--;
+                if (p >= buffer)
+                        alimb |= (mpi_limb_t) *p-- << 8;
+                if (p >= buffer)
+                        alimb |= (mpi_limb_t) *p-- << 16;
+                if (p >= buffer)
+                        alimb |= (mpi_limb_t) *p-- << 24;
+                if (p >= buffer)
+                        alimb |= (mpi_limb_t) *p-- << 32;
+                if (p >= buffer)
+                        alimb |= (mpi_limb_t) *p-- << 40;
+                if (p >= buffer)
+                        alimb |= (mpi_limb_t) *p-- << 48;
+                if (p >= buffer)
+                        alimb |= (mpi_limb_t) *p-- << 56;
+#else
+#error please implement for this limb size.
+#endif
+                a->d[i++] = alimb;
+        }
+        a->nlimbs = i;
+        if (i != nlimbs) {
+                pr_emerg("MPI: mpi_set_buffer: Assertion failed (%d != %d)", i,
+                       nlimbs);
+                BUG();
+        }
+        return 0;
+}
+EXPORT_SYMBOL_GPL(mpi_set_buffer);
diff --git a/lib/mpi/mpih-cmp.c b/lib/mpi/mpih-cmp.c
new file mode 100644
index 000000000000..b2fd39677f1b
--- /dev/null
+++ b/lib/mpi/mpih-cmp.c
@@ -0,0 +1,56 @@
+/* mpihelp-sub.c  -  MPI helper functions
+ *      Copyright (C) 1994, 1996 Free Software Foundation, Inc.
+ *      Copyright (C) 1998, 1999, 2000, 2001 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *       Actually it's the same code with only minor changes in the
+ *       way the data is stored; this is to support the abstraction
+ *       of an optional secure memory allocation which may be used
+ *       to avoid revealing of sensitive data due to paging etc.
+ *       The GNU MP Library itself is published under the LGPL;
+ *       however I decided to publish this code under the plain GPL.
+ */
+#include "mpi-internal.h"
+/****************
+ * Compare OP1_PTR/OP1_SIZE with OP2_PTR/OP2_SIZE.
+ * There are no restrictions on the relative sizes of
+ * the two arguments.
+ * Return 1 if OP1 > OP2, 0 if they are equal, and -1 if OP1 < OP2.
+ */
+int mpihelp_cmp(mpi_ptr_t op1_ptr, mpi_ptr_t op2_ptr, mpi_size_t size)
+{
+        mpi_size_t i;
+        mpi_limb_t op1_word, op2_word;
+        for (i = size - 1; i >= 0; i--) {
+                op1_word = op1_ptr[i];
+                op2_word = op2_ptr[i];
+                if (op1_word != op2_word)
+                        goto diff;
+        }
+        return 0;
+diff:
+        /* This can *not* be simplified to
+         *   op2_word - op2_word
+         * since that expression might give signed overflow.  */
+        return (op1_word > op2_word) ? 1 : -1;
+}
diff --git a/lib/mpi/mpih-div.c b/lib/mpi/mpih-div.c
new file mode 100644
index 000000000000..87ede162dfab
--- /dev/null
+++ b/lib/mpi/mpih-div.c
@@ -0,0 +1,541 @@
+/* mpihelp-div.c  -  MPI helper functions
+ *      Copyright (C) 1994, 1996 Free Software Foundation, Inc.
+ *      Copyright (C) 1998, 1999 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *       Actually it's the same code with only minor changes in the
+ *       way the data is stored; this is to support the abstraction
+ *       of an optional secure memory allocation which may be used
+ *       to avoid revealing of sensitive data due to paging etc.
+ *       The GNU MP Library itself is published under the LGPL;
+ *       however I decided to publish this code under the plain GPL.
+ */
+#include "mpi-internal.h"
+#include "longlong.h"
+#ifndef UMUL_TIME
+#define UMUL_TIME 1
+#endif
+#ifndef UDIV_TIME
+#define UDIV_TIME UMUL_TIME
+#endif
+/* FIXME: We should be using invert_limb (or invert_normalized_limb)
+ * here (not udiv_qrnnd).
+ */
+mpi_limb_t
+mpihelp_mod_1(mpi_ptr_t dividend_ptr, mpi_size_t dividend_size,
+              mpi_limb_t divisor_limb)
+{
+        mpi_size_t i;
+        mpi_limb_t n1, n0, r;
+        int dummy;
+        /* Botch: Should this be handled at all?  Rely on callers?  */
+        if (!dividend_size)
+                return 0;
+        /* If multiplication is much faster than division, and the
+         * dividend is large, pre-invert the divisor, and use
+         * only multiplications in the inner loop.
+         *
+         * This test should be read:
+         *   Does it ever help to use udiv_qrnnd_preinv?
+         *     && Does what we save compensate for the inversion overhead?
+         */
+        if (UDIV_TIME > (2 * UMUL_TIME + 6)
+            && (UDIV_TIME - (2 * UMUL_TIME + 6)) * dividend_size > UDIV_TIME) {
+                int normalization_steps;
+                count_leading_zeros(normalization_steps, divisor_limb);
+                if (normalization_steps) {
+                        mpi_limb_t divisor_limb_inverted;
+                        divisor_limb <<= normalization_steps;
+                        /* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB.  The
+                         * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the
+                         * most significant bit (with weight 2**N) implicit.
+                         *
+                         * Special case for DIVISOR_LIMB == 100...000.
+                         */
+                        if (!(divisor_limb << 1))
+                                divisor_limb_inverted = ~(mpi_limb_t) 0;
+                        else
+                                udiv_qrnnd(divisor_limb_inverted, dummy,
+                                           -divisor_limb, 0, divisor_limb);
+                        n1 = dividend_ptr[dividend_size - 1];
+                        r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps);
+                        /* Possible optimization:
+                         * if (r == 0
+                         * && divisor_limb > ((n1 << normalization_steps)
+                         *                 | (dividend_ptr[dividend_size - 2] >> ...)))
+                         * ...one division less...
+                         */
+                        for (i = dividend_size - 2; i >= 0; i--) {
+                                n0 = dividend_ptr[i];
+                                UDIV_QRNND_PREINV(dummy, r, r,
+                                                  ((n1 << normalization_steps)
+                                                   | (n0 >>
+                                                      (BITS_PER_MPI_LIMB -
+                                                       normalization_steps))),
+                                                  divisor_limb,
+                                                  divisor_limb_inverted);
+                                n1 = n0;
+                        }
+                        UDIV_QRNND_PREINV(dummy, r, r,
+                                          n1 << normalization_steps,
+                                          divisor_limb, divisor_limb_inverted);
+                        return r >> normalization_steps;
+                } else {
+                        mpi_limb_t divisor_limb_inverted;
+                        /* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB.  The
+                         * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the
+                         * most significant bit (with weight 2**N) implicit.
+                         *
+                         * Special case for DIVISOR_LIMB == 100...000.
+                         */
+                        if (!(divisor_limb << 1))
+                                divisor_limb_inverted = ~(mpi_limb_t) 0;
+                        else
+                                udiv_qrnnd(divisor_limb_inverted, dummy,
+                                           -divisor_limb, 0, divisor_limb);
+                        i = dividend_size - 1;
+                        r = dividend_ptr[i];
+                        if (r >= divisor_limb)
+                                r = 0;
+                        else
+                                i--;
+                        for (; i >= 0; i--) {
+                                n0 = dividend_ptr[i];
+                                UDIV_QRNND_PREINV(dummy, r, r,
+                                                  n0, divisor_limb,
+                                                  divisor_limb_inverted);
+                        }
+                        return r;
+                }
+        } else {
+                if (UDIV_NEEDS_NORMALIZATION) {
+                        int normalization_steps;
+                        count_leading_zeros(normalization_steps, divisor_limb);
+                        if (normalization_steps) {
+                                divisor_limb <<= normalization_steps;
+                                n1 = dividend_ptr[dividend_size - 1];
+                                r = n1 >> (BITS_PER_MPI_LIMB -
+                                           normalization_steps);
+                                /* Possible optimization:
+                                 * if (r == 0
+                                 * && divisor_limb > ((n1 << normalization_steps)
+                                 *                 | (dividend_ptr[dividend_size - 2] >> ...)))
+                                 * ...one division less...
+                                 */
+                                for (i = dividend_size - 2; i >= 0; i--) {
+                                        n0 = dividend_ptr[i];
+                                        udiv_qrnnd(dummy, r, r,
+                                                   ((n1 << normalization_steps)
+                                                    | (n0 >>
+                                                       (BITS_PER_MPI_LIMB -
+                                                        normalization_steps))),
+                                                   divisor_limb);
+                                        n1 = n0;
+                                }
+                                udiv_qrnnd(dummy, r, r,
+                                           n1 << normalization_steps,
+                                           divisor_limb);
+                                return r >> normalization_steps;
+                        }
+                }
+                /* No normalization needed, either because udiv_qrnnd doesn't require
+                 * it, or because DIVISOR_LIMB is already normalized.  */
+                i = dividend_size - 1;
+                r = dividend_ptr[i];
+                if (r >= divisor_limb)
+                        r = 0;
+                else
+                        i--;
+                for (; i >= 0; i--) {
+                        n0 = dividend_ptr[i];
+                        udiv_qrnnd(dummy, r, r, n0, divisor_limb);
+                }
+                return r;
+        }
+}
+/* Divide num (NP/NSIZE) by den (DP/DSIZE) and write
+ * the NSIZE-DSIZE least significant quotient limbs at QP
+ * and the DSIZE long remainder at NP.  If QEXTRA_LIMBS is
+ * non-zero, generate that many fraction bits and append them after the
+ * other quotient limbs.
+ * Return the most significant limb of the quotient, this is always 0 or 1.
+ *
+ * Preconditions:
+ * 0. NSIZE >= DSIZE.
+ * 1. The most significant bit of the divisor must be set.
+ * 2. QP must either not overlap with the input operands at all, or
+ *    QP + DSIZE >= NP must hold true.  (This means that it's
+ *    possible to put the quotient in the high part of NUM, right after the
+ *    remainder in NUM.
+ * 3. NSIZE >= DSIZE, even if QEXTRA_LIMBS is non-zero.
+ */
+mpi_limb_t
+mpihelp_divrem(mpi_ptr_t qp, mpi_size_t qextra_limbs,
+               mpi_ptr_t np, mpi_size_t nsize, mpi_ptr_t dp, mpi_size_t dsize)
+{
+        mpi_limb_t most_significant_q_limb = 0;
+        switch (dsize) {
+        case 0:
+                /* We are asked to divide by zero, so go ahead and do it!  (To make
+                   the compiler not remove this statement, return the value.)  */
+                return 1 / dsize;
+        case 1:
+                {
+                        mpi_size_t i;
+                        mpi_limb_t n1;
+                        mpi_limb_t d;
+                        d = dp[0];
+                        n1 = np[nsize - 1];
+                        if (n1 >= d) {
+                                n1 -= d;
+                                most_significant_q_limb = 1;
+                        }
+                        qp += qextra_limbs;
+                        for (i = nsize - 2; i >= 0; i--)
+                                udiv_qrnnd(qp[i], n1, n1, np[i], d);
+                        qp -= qextra_limbs;
+                        for (i = qextra_limbs - 1; i >= 0; i--)
+                                udiv_qrnnd(qp[i], n1, n1, 0, d);
+                        np[0] = n1;
+                }
+                break;
+        case 2:
+                {
+                        mpi_size_t i;
+                        mpi_limb_t n1, n0, n2;
+                        mpi_limb_t d1, d0;
+                        np += nsize - 2;
+                        d1 = dp[1];
+                        d0 = dp[0];
+                        n1 = np[1];
+                        n0 = np[0];
+                        if (n1 >= d1 && (n1 > d1 || n0 >= d0)) {
+                                sub_ddmmss(n1, n0, n1, n0, d1, d0);
+                                most_significant_q_limb = 1;
+                        }
+                        for (i = qextra_limbs + nsize - 2 - 1; i >= 0; i--) {
+                                mpi_limb_t q;
+                                mpi_limb_t r;
+                                if (i >= qextra_limbs)
+                                        np--;
+                                else
+                                        np[0] = 0;
+                                if (n1 == d1) {
+                                        /* Q should be either 111..111 or 111..110.  Need special
+                                         * treatment of this rare case as normal division would
+                                         * give overflow.  */
+                                        q = ~(mpi_limb_t) 0;
+                                        r = n0 + d1;
+                                        if (r < d1) {   /* Carry in the addition? */
+                                                add_ssaaaa(n1, n0, r - d0,
+                                                           np[0], 0, d0);
+                                                qp[i] = q;
+                                                continue;
+                                        }
+                                        n1 = d0 - (d0 != 0 ? 1 : 0);
+                                        n0 = -d0;
+                                } else {
+                                        udiv_qrnnd(q, r, n1, n0, d1);
+                                        umul_ppmm(n1, n0, d0, q);
+                                }
+                                n2 = np[0];
+q_test:
+                                if (n1 > r || (n1 == r && n0 > n2)) {
+                                        /* The estimated Q was too large.  */
+                                        q--;
+                                        sub_ddmmss(n1, n0, n1, n0, 0, d0);
+                                        r += d1;
+                                        if (r >= d1)    /* If not carry, test Q again.  */
+                                                goto q_test;
+                                }
+                                qp[i] = q;
+                                sub_ddmmss(n1, n0, r, n2, n1, n0);
+                        }
+                        np[1] = n1;
+                        np[0] = n0;
+                }
+                break;
+        default:
+                {
+                        mpi_size_t i;
+                        mpi_limb_t dX, d1, n0;
+                        np += nsize - dsize;
+                        dX = dp[dsize - 1];
+                        d1 = dp[dsize - 2];
+                        n0 = np[dsize - 1];
+                        if (n0 >= dX) {
+                                if (n0 > dX
+                                    || mpihelp_cmp(np, dp, dsize - 1) >= 0) {
+                                        mpihelp_sub_n(np, np, dp, dsize);
+                                        n0 = np[dsize - 1];
+                                        most_significant_q_limb = 1;
+                                }
+                        }
+                        for (i = qextra_limbs + nsize - dsize - 1; i >= 0; i--) {
+                                mpi_limb_t q;
+                                mpi_limb_t n1, n2;
+                                mpi_limb_t cy_limb;
+                                if (i >= qextra_limbs) {
+                                        np--;
+                                        n2 = np[dsize];
+                                } else {
+                                        n2 = np[dsize - 1];
+                                        MPN_COPY_DECR(np + 1, np, dsize - 1);
+                                        np[0] = 0;
+                                }
+                                if (n0 == dX) {
+                                        /* This might over-estimate q, but it's probably not worth
+                                         * the extra code here to find out.  */
+                                        q = ~(mpi_limb_t) 0;
+                                } else {
+                                        mpi_limb_t r;
+                                        udiv_qrnnd(q, r, n0, np[dsize - 1], dX);
+                                        umul_ppmm(n1, n0, d1, q);
+                                        while (n1 > r
+                                               || (n1 == r
+                                                   && n0 > np[dsize - 2])) {
+                                                q--;
+                                                r += dX;
+                                                if (r < dX)     /* I.e. "carry in previous addition?" */
+                                                        break;
+                                                n1 -= n0 < d1;
+                                                n0 -= d1;
+                                        }
+                                }
+                                /* Possible optimization: We already have (q * n0) and (1 * n1)
+                                 * after the calculation of q.  Taking advantage of that, we
+                                 * could make this loop make two iterations less.  */
+                                cy_limb = mpihelp_submul_1(np, dp, dsize, q);
+                                if (n2 != cy_limb) {
+                                        mpihelp_add_n(np, np, dp, dsize);
+                                        q--;
+                                }
+                                qp[i] = q;
+                                n0 = np[dsize - 1];
+                        }
+                }
+        }
+        return most_significant_q_limb;
+}
+/****************
+ * Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by DIVISOR_LIMB.
+ * Write DIVIDEND_SIZE limbs of quotient at QUOT_PTR.
+ * Return the single-limb remainder.
+ * There are no constraints on the value of the divisor.
+ *
+ * QUOT_PTR and DIVIDEND_PTR might point to the same limb.
+ */
+mpi_limb_t
+mpihelp_divmod_1(mpi_ptr_t quot_ptr,
+                 mpi_ptr_t dividend_ptr, mpi_size_t dividend_size,
+                 mpi_limb_t divisor_limb)
+{
+        mpi_size_t i;
+        mpi_limb_t n1, n0, r;
+        int dummy;
+        if (!dividend_size)
+                return 0;
+        /* If multiplication is much faster than division, and the
+         * dividend is large, pre-invert the divisor, and use
+         * only multiplications in the inner loop.
+         *
+         * This test should be read:
+         * Does it ever help to use udiv_qrnnd_preinv?
+         * && Does what we save compensate for the inversion overhead?
+         */
+        if (UDIV_TIME > (2 * UMUL_TIME + 6)
+            && (UDIV_TIME - (2 * UMUL_TIME + 6)) * dividend_size > UDIV_TIME) {
+                int normalization_steps;
+                count_leading_zeros(normalization_steps, divisor_limb);
+                if (normalization_steps) {
+                        mpi_limb_t divisor_limb_inverted;
+                        divisor_limb <<= normalization_steps;
+                        /* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB.  The
+                         * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the
+                         * most significant bit (with weight 2**N) implicit.
+                         */
+                        /* Special case for DIVISOR_LIMB == 100...000.  */
+                        if (!(divisor_limb << 1))
+                                divisor_limb_inverted = ~(mpi_limb_t) 0;
+                        else
+                                udiv_qrnnd(divisor_limb_inverted, dummy,
+                                           -divisor_limb, 0, divisor_limb);
+                        n1 = dividend_ptr[dividend_size - 1];
+                        r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps);
+                        /* Possible optimization:
+                         * if (r == 0
+                         * && divisor_limb > ((n1 << normalization_steps)
+                         *                 | (dividend_ptr[dividend_size - 2] >> ...)))
+                         * ...one division less...
+                         */
+                        for (i = dividend_size - 2; i >= 0; i--) {
+                                n0 = dividend_ptr[i];
+                                UDIV_QRNND_PREINV(quot_ptr[i + 1], r, r,
+                                                  ((n1 << normalization_steps)
+                                                   | (n0 >>
+                                                      (BITS_PER_MPI_LIMB -
+                                                       normalization_steps))),
+                                                  divisor_limb,
+                                                  divisor_limb_inverted);
+                                n1 = n0;
+                        }
+                        UDIV_QRNND_PREINV(quot_ptr[0], r, r,
+                                          n1 << normalization_steps,
+                                          divisor_limb, divisor_limb_inverted);
+                        return r >> normalization_steps;
+                } else {
+                        mpi_limb_t divisor_limb_inverted;
+                        /* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB.  The
+                         * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the
+                         * most significant bit (with weight 2**N) implicit.
+                         */
+                        /* Special case for DIVISOR_LIMB == 100...000.  */
+                        if (!(divisor_limb << 1))
+                                divisor_limb_inverted = ~(mpi_limb_t) 0;
+                        else
+                                udiv_qrnnd(divisor_limb_inverted, dummy,
+                                           -divisor_limb, 0, divisor_limb);
+                        i = dividend_size - 1;
+                        r = dividend_ptr[i];
+                        if (r >= divisor_limb)
+                                r = 0;
+                        else
+                                quot_ptr[i--] = 0;
+                        for (; i >= 0; i--) {
+                                n0 = dividend_ptr[i];
+                                UDIV_QRNND_PREINV(quot_ptr[i], r, r,
+                                                  n0, divisor_limb,
+                                                  divisor_limb_inverted);
+                        }
+                        return r;
+                }
+        } else {
+                if (UDIV_NEEDS_NORMALIZATION) {
+                        int normalization_steps;
+                        count_leading_zeros(normalization_steps, divisor_limb);
+                        if (normalization_steps) {
+                                divisor_limb <<= normalization_steps;
+                                n1 = dividend_ptr[dividend_size - 1];
+                                r = n1 >> (BITS_PER_MPI_LIMB -
+                                           normalization_steps);
+                                /* Possible optimization:
+                                 * if (r == 0
+                                 * && divisor_limb > ((n1 << normalization_steps)
+                                 *                 | (dividend_ptr[dividend_size - 2] >> ...)))
+                                 * ...one division less...
+                                 */
+                                for (i = dividend_size - 2; i >= 0; i--) {
+                                        n0 = dividend_ptr[i];
+                                        udiv_qrnnd(quot_ptr[i + 1], r, r,
+                                                   ((n1 << normalization_steps)
+                                                    | (n0 >>
+                                                       (BITS_PER_MPI_LIMB -
+                                                        normalization_steps))),
+                                                   divisor_limb);
+                                        n1 = n0;
+                                }
+                                udiv_qrnnd(quot_ptr[0], r, r,
+                                           n1 << normalization_steps,
+                                           divisor_limb);
+                                return r >> normalization_steps;
+                        }
+                }
+                /* No normalization needed, either because udiv_qrnnd doesn't require
+                 * it, or because DIVISOR_LIMB is already normalized.  */
+                i = dividend_size - 1;
+                r = dividend_ptr[i];
+                if (r >= divisor_limb)
+                        r = 0;
+                else
+                        quot_ptr[i--] = 0;
+                for (; i >= 0; i--) {
+                        n0 = dividend_ptr[i];
+                        udiv_qrnnd(quot_ptr[i], r, r, n0, divisor_limb);
+                }
+                return r;
+        }
+}
diff --git a/lib/mpi/mpih-mul.c b/lib/mpi/mpih-mul.c
new file mode 100644
index 000000000000..c69c5eef233b
--- /dev/null
+++ b/lib/mpi/mpih-mul.c
@@ -0,0 +1,527 @@
+/* mpihelp-mul.c  -  MPI helper functions
+ * Copyright (C) 1994, 1996, 1998, 1999,
+ *               2000 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *       Actually it's the same code with only minor changes in the
+ *       way the data is stored; this is to support the abstraction
+ *       of an optional secure memory allocation which may be used
+ *       to avoid revealing of sensitive data due to paging etc.
+ *       The GNU MP Library itself is published under the LGPL;
+ *       however I decided to publish this code under the plain GPL.
+ */
+#include <linux/string.h>
+#include "mpi-internal.h"
+#include "longlong.h"
+#define MPN_MUL_N_RECURSE(prodp, up, vp, size, tspace)          \
+        do {                                                    \
+                if ((size) < KARATSUBA_THRESHOLD)               \
+                        mul_n_basecase(prodp, up, vp, size);    \
+                else                                            \
+                        mul_n(prodp, up, vp, size, tspace);     \
+        } while (0);
+#define MPN_SQR_N_RECURSE(prodp, up, size, tspace)              \
+        do {                                                    \
+                if ((size) < KARATSUBA_THRESHOLD)               \
+                        mpih_sqr_n_basecase(prodp, up, size);   \
+                else                                            \
+                        mpih_sqr_n(prodp, up, size, tspace);    \
+        } while (0);
+/* Multiply the natural numbers u (pointed to by UP) and v (pointed to by VP),
+ * both with SIZE limbs, and store the result at PRODP.  2 * SIZE limbs are
+ * always stored.  Return the most significant limb.
+ *
+ * Argument constraints:
+ * 1. PRODP != UP and PRODP != VP, i.e. the destination
+ *    must be distinct from the multiplier and the multiplicand.
+ *
+ *
+ * Handle simple cases with traditional multiplication.
+ *
+ * This is the most critical code of multiplication.  All multiplies rely
+ * on this, both small and huge.  Small ones arrive here immediately.  Huge
+ * ones arrive here as this is the base case for Karatsuba's recursive
+ * algorithm below.
+ */
+static mpi_limb_t
+mul_n_basecase(mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size)
+{
+        mpi_size_t i;
+        mpi_limb_t cy;
+        mpi_limb_t v_limb;
+        /* Multiply by the first limb in V separately, as the result can be
+         * stored (not added) to PROD.  We also avoid a loop for zeroing.  */
+        v_limb = vp[0];
+        if (v_limb <= 1) {
+                if (v_limb == 1)
+                        MPN_COPY(prodp, up, size);
+                else
+                        MPN_ZERO(prodp, size);
+                cy = 0;
+        } else
+                cy = mpihelp_mul_1(prodp, up, size, v_limb);
+        prodp[size] = cy;
+        prodp++;
+        /* For each iteration in the outer loop, multiply one limb from
+         * U with one limb from V, and add it to PROD.  */
+        for (i = 1; i < size; i++) {
+                v_limb = vp[i];
+                if (v_limb <= 1) {
+                        cy = 0;
+                        if (v_limb == 1)
+                                cy = mpihelp_add_n(prodp, prodp, up, size);
+                } else
+                        cy = mpihelp_addmul_1(prodp, up, size, v_limb);
+                prodp[size] = cy;
+                prodp++;
+        }
+        return cy;
+}
+static void
+mul_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp,
+                mpi_size_t size, mpi_ptr_t tspace)
+{
+        if (size & 1) {
+                /* The size is odd, and the code below doesn't handle that.
+                 * Multiply the least significant (size - 1) limbs with a recursive
+                 * call, and handle the most significant limb of S1 and S2
+                 * separately.
+                 * A slightly faster way to do this would be to make the Karatsuba
+                 * code below behave as if the size were even, and let it check for
+                 * odd size in the end.  I.e., in essence move this code to the end.
+                 * Doing so would save us a recursive call, and potentially make the
+                 * stack grow a lot less.
+                 */
+                mpi_size_t esize = size - 1;    /* even size */
+                mpi_limb_t cy_limb;
+                MPN_MUL_N_RECURSE(prodp, up, vp, esize, tspace);
+                cy_limb = mpihelp_addmul_1(prodp + esize, up, esize, vp[esize]);
+                prodp[esize + esize] = cy_limb;
+                cy_limb = mpihelp_addmul_1(prodp + esize, vp, size, up[esize]);
+                prodp[esize + size] = cy_limb;
+        } else {
+                /* Anatolij Alekseevich Karatsuba's divide-and-conquer algorithm.
+                 *
+                 * Split U in two pieces, U1 and U0, such that
+                 * U = U0 + U1*(B**n),
+                 * and V in V1 and V0, such that
+                 * V = V0 + V1*(B**n).
+                 *
+                 * UV is then computed recursively using the identity
+                 *
+                 *        2n   n          n                     n
+                 * UV = (B  + B )U V  +  B (U -U )(V -V )  +  (B + 1)U V
+                 *                1 1        1  0   0  1              0 0
+                 *
+                 * Where B = 2**BITS_PER_MP_LIMB.
+                 */
+                mpi_size_t hsize = size >> 1;
+                mpi_limb_t cy;
+                int negflg;
+                /* Product H.      ________________  ________________
+                 *                |_____U1 x V1____||____U0 x V0_____|
+                 * Put result in upper part of PROD and pass low part of TSPACE
+                 * as new TSPACE.
+                 */
+                MPN_MUL_N_RECURSE(prodp + size, up + hsize, vp + hsize, hsize,
+                                  tspace);
+                /* Product M.      ________________
+                 *                |_(U1-U0)(V0-V1)_|
+                 */
+                if (mpihelp_cmp(up + hsize, up, hsize) >= 0) {
+                        mpihelp_sub_n(prodp, up + hsize, up, hsize);
+                        negflg = 0;
+                } else {
+                        mpihelp_sub_n(prodp, up, up + hsize, hsize);
+                        negflg = 1;
+                }
+                if (mpihelp_cmp(vp + hsize, vp, hsize) >= 0) {
+                        mpihelp_sub_n(prodp + hsize, vp + hsize, vp, hsize);
+                        negflg ^= 1;
+                } else {
+                        mpihelp_sub_n(prodp + hsize, vp, vp + hsize, hsize);
+                        /* No change of NEGFLG.  */
+                }
+                /* Read temporary operands from low part of PROD.
+                 * Put result in low part of TSPACE using upper part of TSPACE
+                 * as new TSPACE.
+                 */
+                MPN_MUL_N_RECURSE(tspace, prodp, prodp + hsize, hsize,
+                                  tspace + size);
+                /* Add/copy product H. */
+                MPN_COPY(prodp + hsize, prodp + size, hsize);
+                cy = mpihelp_add_n(prodp + size, prodp + size,
+                                   prodp + size + hsize, hsize);
+                /* Add product M (if NEGFLG M is a negative number) */
+                if (negflg)
+                        cy -=
+                            mpihelp_sub_n(prodp + hsize, prodp + hsize, tspace,
+                                          size);
+                else
+                        cy +=
+                            mpihelp_add_n(prodp + hsize, prodp + hsize, tspace,
+                                          size);
+                /* Product L.      ________________  ________________
+                 *                |________________||____U0 x V0_____|
+                 * Read temporary operands from low part of PROD.
+                 * Put result in low part of TSPACE using upper part of TSPACE
+                 * as new TSPACE.
+                 */
+                MPN_MUL_N_RECURSE(tspace, up, vp, hsize, tspace + size);
+                /* Add/copy Product L (twice) */
+                cy += mpihelp_add_n(prodp + hsize, prodp + hsize, tspace, size);
+                if (cy)
+                        mpihelp_add_1(prodp + hsize + size,
+                                      prodp + hsize + size, hsize, cy);
+                MPN_COPY(prodp, tspace, hsize);
+                cy = mpihelp_add_n(prodp + hsize, prodp + hsize, tspace + hsize,
+                                   hsize);
+                if (cy)
+                        mpihelp_add_1(prodp + size, prodp + size, size, 1);
+        }
+}
+void mpih_sqr_n_basecase(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size)
+{
+        mpi_size_t i;
+        mpi_limb_t cy_limb;
+        mpi_limb_t v_limb;
+        /* Multiply by the first limb in V separately, as the result can be
+         * stored (not added) to PROD.  We also avoid a loop for zeroing.  */
+        v_limb = up[0];
+        if (v_limb <= 1) {
+                if (v_limb == 1)
+                        MPN_COPY(prodp, up, size);
+                else
+                        MPN_ZERO(prodp, size);
+                cy_limb = 0;
+        } else
+                cy_limb = mpihelp_mul_1(prodp, up, size, v_limb);
+        prodp[size] = cy_limb;
+        prodp++;
+        /* For each iteration in the outer loop, multiply one limb from
+         * U with one limb from V, and add it to PROD.  */
+        for (i = 1; i < size; i++) {
+                v_limb = up[i];
+                if (v_limb <= 1) {
+                        cy_limb = 0;
+                        if (v_limb == 1)
+                                cy_limb = mpihelp_add_n(prodp, prodp, up, size);
+                } else
+                        cy_limb = mpihelp_addmul_1(prodp, up, size, v_limb);
+                prodp[size] = cy_limb;
+                prodp++;
+        }
+}
+void
+mpih_sqr_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size, mpi_ptr_t tspace)
+{
+        if (size & 1) {
+                /* The size is odd, and the code below doesn't handle that.
+                 * Multiply the least significant (size - 1) limbs with a recursive
+                 * call, and handle the most significant limb of S1 and S2
+                 * separately.
+                 * A slightly faster way to do this would be to make the Karatsuba
+                 * code below behave as if the size were even, and let it check for
+                 * odd size in the end.  I.e., in essence move this code to the end.
+                 * Doing so would save us a recursive call, and potentially make the
+                 * stack grow a lot less.
+                 */
+                mpi_size_t esize = size - 1;    /* even size */
+                mpi_limb_t cy_limb;
+                MPN_SQR_N_RECURSE(prodp, up, esize, tspace);
+                cy_limb = mpihelp_addmul_1(prodp + esize, up, esize, up[esize]);
+                prodp[esize + esize] = cy_limb;
+                cy_limb = mpihelp_addmul_1(prodp + esize, up, size, up[esize]);
+                prodp[esize + size] = cy_limb;
+        } else {
+                mpi_size_t hsize = size >> 1;
+                mpi_limb_t cy;
+                /* Product H.      ________________  ________________
+                 *                |_____U1 x U1____||____U0 x U0_____|
+                 * Put result in upper part of PROD and pass low part of TSPACE
+                 * as new TSPACE.
+                 */
+                MPN_SQR_N_RECURSE(prodp + size, up + hsize, hsize, tspace);
+                /* Product M.      ________________
+                 *                |_(U1-U0)(U0-U1)_|
+                 */
+                if (mpihelp_cmp(up + hsize, up, hsize) >= 0)
+                        mpihelp_sub_n(prodp, up + hsize, up, hsize);
+                else
+                        mpihelp_sub_n(prodp, up, up + hsize, hsize);
+                /* Read temporary operands from low part of PROD.
+                 * Put result in low part of TSPACE using upper part of TSPACE
+                 * as new TSPACE.  */
+                MPN_SQR_N_RECURSE(tspace, prodp, hsize, tspace + size);
+                /* Add/copy product H  */
+                MPN_COPY(prodp + hsize, prodp + size, hsize);
+                cy = mpihelp_add_n(prodp + size, prodp + size,
+                                   prodp + size + hsize, hsize);
+                /* Add product M (if NEGFLG M is a negative number).  */
+                cy -= mpihelp_sub_n(prodp + hsize, prodp + hsize, tspace, size);
+                /* Product L.      ________________  ________________
+                 *                |________________||____U0 x U0_____|
+                 * Read temporary operands from low part of PROD.
+                 * Put result in low part of TSPACE using upper part of TSPACE
+                 * as new TSPACE.  */
+                MPN_SQR_N_RECURSE(tspace, up, hsize, tspace + size);
+                /* Add/copy Product L (twice).  */
+                cy += mpihelp_add_n(prodp + hsize, prodp + hsize, tspace, size);
+                if (cy)
+                        mpihelp_add_1(prodp + hsize + size,
+                                      prodp + hsize + size, hsize, cy);
+                MPN_COPY(prodp, tspace, hsize);
+                cy = mpihelp_add_n(prodp + hsize, prodp + hsize, tspace + hsize,
+                                   hsize);
+                if (cy)
+                        mpihelp_add_1(prodp + size, prodp + size, size, 1);
+        }
+}
+/* This should be made into an inline function in gmp.h.  */
+int mpihelp_mul_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size)
+{
+        if (up == vp) {
+                if (size < KARATSUBA_THRESHOLD)
+                        mpih_sqr_n_basecase(prodp, up, size);
+                else {
+                        mpi_ptr_t tspace;
+                        tspace = mpi_alloc_limb_space(2 * size);
+                        if (!tspace)
+                                return -ENOMEM;
+                        mpih_sqr_n(prodp, up, size, tspace);
+                        mpi_free_limb_space(tspace);
+                }
+        } else {
+                if (size < KARATSUBA_THRESHOLD)
+                        mul_n_basecase(prodp, up, vp, size);
+                else {
+                        mpi_ptr_t tspace;
+                        tspace = mpi_alloc_limb_space(2 * size);
+                        if (!tspace)
+                                return -ENOMEM;
+                        mul_n(prodp, up, vp, size, tspace);
+                        mpi_free_limb_space(tspace);
+                }
+        }
+        return 0;
+}
+int
+mpihelp_mul_karatsuba_case(mpi_ptr_t prodp,
+                           mpi_ptr_t up, mpi_size_t usize,
+                           mpi_ptr_t vp, mpi_size_t vsize,
+                           struct karatsuba_ctx *ctx)
+{
+        mpi_limb_t cy;
+        if (!ctx->tspace || ctx->tspace_size < vsize) {
+                if (ctx->tspace)
+                        mpi_free_limb_space(ctx->tspace);
+                ctx->tspace = mpi_alloc_limb_space(2 * vsize);
+                if (!ctx->tspace)
+                        return -ENOMEM;
+                ctx->tspace_size = vsize;
+        }
+        MPN_MUL_N_RECURSE(prodp, up, vp, vsize, ctx->tspace);
+        prodp += vsize;
+        up += vsize;
+        usize -= vsize;
+        if (usize >= vsize) {
+                if (!ctx->tp || ctx->tp_size < vsize) {
+                        if (ctx->tp)
+                                mpi_free_limb_space(ctx->tp);
+                        ctx->tp = mpi_alloc_limb_space(2 * vsize);
+                        if (!ctx->tp) {
+                                if (ctx->tspace)
+                                        mpi_free_limb_space(ctx->tspace);
+                                ctx->tspace = NULL;
+                                return -ENOMEM;
+                        }
+                        ctx->tp_size = vsize;
+                }
+                do {
+                        MPN_MUL_N_RECURSE(ctx->tp, up, vp, vsize, ctx->tspace);
+                        cy = mpihelp_add_n(prodp, prodp, ctx->tp, vsize);
+                        mpihelp_add_1(prodp + vsize, ctx->tp + vsize, vsize,
+                                      cy);
+                        prodp += vsize;
+                        up += vsize;
+                        usize -= vsize;
+                } while (usize >= vsize);
+        }
+        if (usize) {
+                if (usize < KARATSUBA_THRESHOLD) {
+                        mpi_limb_t tmp;
+                        if (mpihelp_mul(ctx->tspace, vp, vsize, up, usize, &tmp)
+                            < 0)
+                                return -ENOMEM;
+                } else {
+                        if (!ctx->next) {
+                                ctx->next = kzalloc(sizeof *ctx, GFP_KERNEL);
+                                if (!ctx->next)
+                                        return -ENOMEM;
+                        }
+                        if (mpihelp_mul_karatsuba_case(ctx->tspace,
+                                                       vp, vsize,
+                                                       up, usize,
+                                                       ctx->next) < 0)
+                                return -ENOMEM;
+                }
+                cy = mpihelp_add_n(prodp, prodp, ctx->tspace, vsize);
+                mpihelp_add_1(prodp + vsize, ctx->tspace + vsize, usize, cy);
+        }
+        return 0;
+}
+void mpihelp_release_karatsuba_ctx(struct karatsuba_ctx *ctx)
+{
+        struct karatsuba_ctx *ctx2;
+        if (ctx->tp)
+                mpi_free_limb_space(ctx->tp);
+        if (ctx->tspace)
+                mpi_free_limb_space(ctx->tspace);
+        for (ctx = ctx->next; ctx; ctx = ctx2) {
+                ctx2 = ctx->next;
+                if (ctx->tp)
+                        mpi_free_limb_space(ctx->tp);
+                if (ctx->tspace)
+                        mpi_free_limb_space(ctx->tspace);
+                kfree(ctx);
+        }
+}
+/* Multiply the natural numbers u (pointed to by UP, with USIZE limbs)
+ * and v (pointed to by VP, with VSIZE limbs), and store the result at
+ * PRODP.  USIZE + VSIZE limbs are always stored, but if the input
+ * operands are normalized.  Return the most significant limb of the
+ * result.
+ *
+ * NOTE: The space pointed to by PRODP is overwritten before finished
+ * with U and V, so overlap is an error.
+ *
+ * Argument constraints:
+ * 1. USIZE >= VSIZE.
+ * 2. PRODP != UP and PRODP != VP, i.e. the destination
+ *    must be distinct from the multiplier and the multiplicand.
+ */
+int
+mpihelp_mul(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t usize,
+            mpi_ptr_t vp, mpi_size_t vsize, mpi_limb_t *_result)
+{
+        mpi_ptr_t prod_endp = prodp + usize + vsize - 1;
+        mpi_limb_t cy;
+        struct karatsuba_ctx ctx;
+        if (vsize < KARATSUBA_THRESHOLD) {
+                mpi_size_t i;
+                mpi_limb_t v_limb;
+                if (!vsize) {
+                        *_result = 0;
+                        return 0;
+                }
+                /* Multiply by the first limb in V separately, as the result can be
+                 * stored (not added) to PROD.  We also avoid a loop for zeroing.  */
+                v_limb = vp[0];
+                if (v_limb <= 1) {
+                        if (v_limb == 1)
+                                MPN_COPY(prodp, up, usize);
+                        else
+                                MPN_ZERO(prodp, usize);
+                        cy = 0;
+                } else
+                        cy = mpihelp_mul_1(prodp, up, usize, v_limb);
+                prodp[usize] = cy;
+                prodp++;
+                /* For each iteration in the outer loop, multiply one limb from
+                 * U with one limb from V, and add it to PROD.  */
+                for (i = 1; i < vsize; i++) {
+                        v_limb = vp[i];
+                        if (v_limb <= 1) {
+                                cy = 0;
+                                if (v_limb == 1)
+                                        cy = mpihelp_add_n(prodp, prodp, up,
+                                                           usize);
+                        } else
+                                cy = mpihelp_addmul_1(prodp, up, usize, v_limb);
+                        prodp[usize] = cy;
+                        prodp++;
+                }
+                *_result = cy;
+                return 0;
+        }
+        memset(&ctx, 0, sizeof ctx);
+        if (mpihelp_mul_karatsuba_case(prodp, up, usize, vp, vsize, &ctx) < 0)
+                return -ENOMEM;
+        mpihelp_release_karatsuba_ctx(&ctx);
+        *_result = *prod_endp;
+        return 0;
+}
diff --git a/lib/mpi/mpiutil.c b/lib/mpi/mpiutil.c
new file mode 100644
index 000000000000..eefc55d6b7f5
--- /dev/null
+++ b/lib/mpi/mpiutil.c
@@ -0,0 +1,208 @@
+/* mpiutil.ac  -  Utility functions for MPI
+ * Copyright (C) 1998, 1999 Free Software Foundation, Inc.
+ *
+ * This file is part of GnuPG.
+ *
+ * GnuPG is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * GnuPG is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+#include "mpi-internal.h"
+/****************
+ * Note:  It was a bad idea to use the number of limbs to allocate
+ *        because on a alpha the limbs are large but we normally need
+ *        integers of n bits - So we should chnage this to bits (or bytes).
+ *
+ *        But mpi_alloc is used in a lot of places :-)
+ */
+MPI mpi_alloc(unsigned nlimbs)
+{
+        MPI a;
+        a = kmalloc(sizeof *a, GFP_KERNEL);
+        if (!a)
+                return a;
+        if (nlimbs) {
+                a->d = mpi_alloc_limb_space(nlimbs);
+                if (!a->d) {
+                        kfree(a);
+                        return NULL;
+                }
+        } else {
+                a->d = NULL;
+        }
+        a->alloced = nlimbs;
+        a->nlimbs = 0;
+        a->sign = 0;
+        a->flags = 0;
+        a->nbits = 0;
+        return a;
+}
+EXPORT_SYMBOL_GPL(mpi_alloc);
+mpi_ptr_t mpi_alloc_limb_space(unsigned nlimbs)
+{
+        size_t len = nlimbs * sizeof(mpi_limb_t);
+        return kmalloc(len, GFP_KERNEL);
+}
+void mpi_free_limb_space(mpi_ptr_t a)
+{
+        if (!a)
+                return;
+        kfree(a);
+}
+void mpi_assign_limb_space(MPI a, mpi_ptr_t ap, unsigned nlimbs)
+{
+        mpi_free_limb_space(a->d);
+        a->d = ap;
+        a->alloced = nlimbs;
+}
+/****************
+ * Resize the array of A to NLIMBS. the additional space is cleared
+ * (set to 0) [done by m_realloc()]
+ */
+int mpi_resize(MPI a, unsigned nlimbs)
+{
+        void *p;
+        if (nlimbs <= a->alloced)
+                return 0;       /* no need to do it */
+        if (a->d) {
+                p = kmalloc(nlimbs * sizeof(mpi_limb_t), GFP_KERNEL);
+                if (!p)
+                        return -ENOMEM;
+                memcpy(p, a->d, a->alloced * sizeof(mpi_limb_t));
+                kfree(a->d);
+                a->d = p;
+        } else {
+                a->d = kzalloc(nlimbs * sizeof(mpi_limb_t), GFP_KERNEL);
+                if (!a->d)
+                        return -ENOMEM;
+        }
+        a->alloced = nlimbs;
+        return 0;
+}
+void mpi_clear(MPI a)
+{
+        a->nlimbs = 0;
+        a->nbits = 0;
+        a->flags = 0;
+}
+void mpi_free(MPI a)
+{
+        if (!a)
+                return;
+        if (a->flags & 4)
+                kfree(a->d);
+        else
+                mpi_free_limb_space(a->d);
+        if (a->flags & ~7)
+                pr_info("invalid flag value in mpi\n");
+        kfree(a);
+}
+EXPORT_SYMBOL_GPL(mpi_free);
+/****************
+ * Note: This copy function should not interpret the MPI
+ *       but copy it transparently.
+ */
+int mpi_copy(MPI *copied, const MPI a)
+{
+        size_t i;
+        MPI b;
+        *copied = MPI_NULL;
+        if (a) {
+                b = mpi_alloc(a->nlimbs);
+                if (!b)
+                        return -ENOMEM;
+                b->nlimbs = a->nlimbs;
+                b->sign = a->sign;
+                b->flags = a->flags;
+                b->nbits = a->nbits;
+                for (i = 0; i < b->nlimbs; i++)
+                        b->d[i] = a->d[i];
+                *copied = b;
+        }
+        return 0;
+}
+int mpi_set(MPI w, const MPI u)
+{
+        mpi_ptr_t wp, up;
+        mpi_size_t usize = u->nlimbs;
+        int usign = u->sign;
+        if (RESIZE_IF_NEEDED(w, (size_t) usize) < 0)
+                return -ENOMEM;
+        wp = w->d;
+        up = u->d;
+        MPN_COPY(wp, up, usize);
+        w->nlimbs = usize;
+        w->nbits = u->nbits;
+        w->flags = u->flags;
+        w->sign = usign;
+        return 0;
+}
+int mpi_set_ui(MPI w, unsigned long u)
+{
+        if (RESIZE_IF_NEEDED(w, 1) < 0)
+                return -ENOMEM;
+        w->d[0] = u;
+        w->nlimbs = u ? 1 : 0;
+        w->sign = 0;
+        w->nbits = 0;
+        w->flags = 0;
+        return 0;
+}
+MPI mpi_alloc_set_ui(unsigned long u)
+{
+        MPI w = mpi_alloc(1);
+        if (!w)
+                return w;
+        w->d[0] = u;
+        w->nlimbs = u ? 1 : 0;
+        w->sign = 0;
+        return w;
+}
+void mpi_swap(MPI a, MPI b)
+{
+        struct gcry_mpi tmp;
+        tmp = *a;
+        *a = *b;
+        *b = tmp;
+}