diff options
| author | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2012-03-19 20:02:01 -0400 |
|---|---|---|
| committer | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2012-03-19 20:02:01 -0400 |
| commit | 10ce3cc919f50c2043b41ca968b43c26a3672600 (patch) | |
| tree | ea409366a5208aced495bc0516a08b81fd43222e /lib/mpi/mpih-mul.c | |
| parent | 24e3e5ae1e4c2a3a32f5b1f96b4e3fd721806acd (diff) | |
| parent | 5c6a7a62c130afef3d61c1dee153012231ff5cd9 (diff) | |
Merge branch 'next' into for-linus
Diffstat (limited to 'lib/mpi/mpih-mul.c')
| -rw-r--r-- | lib/mpi/mpih-mul.c | 527 |
1 files changed, 527 insertions, 0 deletions
diff --git a/lib/mpi/mpih-mul.c b/lib/mpi/mpih-mul.c new file mode 100644 index 000000000000..c69c5eef233b --- /dev/null +++ b/lib/mpi/mpih-mul.c | |||
| @@ -0,0 +1,527 @@ | |||
| 1 | /* mpihelp-mul.c - MPI helper functions | ||
| 2 | * Copyright (C) 1994, 1996, 1998, 1999, | ||
| 3 | * 2000 Free Software Foundation, Inc. | ||
| 4 | * | ||
| 5 | * This file is part of GnuPG. | ||
| 6 | * | ||
| 7 | * GnuPG is free software; you can redistribute it and/or modify | ||
| 8 | * it under the terms of the GNU General Public License as published by | ||
| 9 | * the Free Software Foundation; either version 2 of the License, or | ||
| 10 | * (at your option) any later version. | ||
| 11 | * | ||
| 12 | * GnuPG is distributed in the hope that it will be useful, | ||
| 13 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 14 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 15 | * GNU General Public License for more details. | ||
| 16 | * | ||
| 17 | * You should have received a copy of the GNU General Public License | ||
| 18 | * along with this program; if not, write to the Free Software | ||
| 19 | * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA | ||
| 20 | * | ||
| 21 | * Note: This code is heavily based on the GNU MP Library. | ||
| 22 | * Actually it's the same code with only minor changes in the | ||
| 23 | * way the data is stored; this is to support the abstraction | ||
| 24 | * of an optional secure memory allocation which may be used | ||
| 25 | * to avoid revealing of sensitive data due to paging etc. | ||
| 26 | * The GNU MP Library itself is published under the LGPL; | ||
| 27 | * however I decided to publish this code under the plain GPL. | ||
| 28 | */ | ||
| 29 | |||
| 30 | #include <linux/string.h> | ||
| 31 | #include "mpi-internal.h" | ||
| 32 | #include "longlong.h" | ||
| 33 | |||
| 34 | #define MPN_MUL_N_RECURSE(prodp, up, vp, size, tspace) \ | ||
| 35 | do { \ | ||
| 36 | if ((size) < KARATSUBA_THRESHOLD) \ | ||
| 37 | mul_n_basecase(prodp, up, vp, size); \ | ||
| 38 | else \ | ||
| 39 | mul_n(prodp, up, vp, size, tspace); \ | ||
| 40 | } while (0); | ||
| 41 | |||
| 42 | #define MPN_SQR_N_RECURSE(prodp, up, size, tspace) \ | ||
| 43 | do { \ | ||
| 44 | if ((size) < KARATSUBA_THRESHOLD) \ | ||
| 45 | mpih_sqr_n_basecase(prodp, up, size); \ | ||
| 46 | else \ | ||
| 47 | mpih_sqr_n(prodp, up, size, tspace); \ | ||
| 48 | } while (0); | ||
| 49 | |||
| 50 | /* Multiply the natural numbers u (pointed to by UP) and v (pointed to by VP), | ||
| 51 | * both with SIZE limbs, and store the result at PRODP. 2 * SIZE limbs are | ||
| 52 | * always stored. Return the most significant limb. | ||
| 53 | * | ||
| 54 | * Argument constraints: | ||
| 55 | * 1. PRODP != UP and PRODP != VP, i.e. the destination | ||
| 56 | * must be distinct from the multiplier and the multiplicand. | ||
| 57 | * | ||
| 58 | * | ||
| 59 | * Handle simple cases with traditional multiplication. | ||
| 60 | * | ||
| 61 | * This is the most critical code of multiplication. All multiplies rely | ||
| 62 | * on this, both small and huge. Small ones arrive here immediately. Huge | ||
| 63 | * ones arrive here as this is the base case for Karatsuba's recursive | ||
| 64 | * algorithm below. | ||
| 65 | */ | ||
| 66 | |||
| 67 | static mpi_limb_t | ||
| 68 | mul_n_basecase(mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size) | ||
| 69 | { | ||
| 70 | mpi_size_t i; | ||
| 71 | mpi_limb_t cy; | ||
| 72 | mpi_limb_t v_limb; | ||
| 73 | |||
| 74 | /* Multiply by the first limb in V separately, as the result can be | ||
| 75 | * stored (not added) to PROD. We also avoid a loop for zeroing. */ | ||
| 76 | v_limb = vp[0]; | ||
| 77 | if (v_limb <= 1) { | ||
| 78 | if (v_limb == 1) | ||
| 79 | MPN_COPY(prodp, up, size); | ||
| 80 | else | ||
| 81 | MPN_ZERO(prodp, size); | ||
| 82 | cy = 0; | ||
| 83 | } else | ||
| 84 | cy = mpihelp_mul_1(prodp, up, size, v_limb); | ||
| 85 | |||
| 86 | prodp[size] = cy; | ||
| 87 | prodp++; | ||
| 88 | |||
| 89 | /* For each iteration in the outer loop, multiply one limb from | ||
| 90 | * U with one limb from V, and add it to PROD. */ | ||
| 91 | for (i = 1; i < size; i++) { | ||
| 92 | v_limb = vp[i]; | ||
| 93 | if (v_limb <= 1) { | ||
| 94 | cy = 0; | ||
| 95 | if (v_limb == 1) | ||
| 96 | cy = mpihelp_add_n(prodp, prodp, up, size); | ||
| 97 | } else | ||
| 98 | cy = mpihelp_addmul_1(prodp, up, size, v_limb); | ||
| 99 | |||
| 100 | prodp[size] = cy; | ||
| 101 | prodp++; | ||
| 102 | } | ||
| 103 | |||
| 104 | return cy; | ||
| 105 | } | ||
| 106 | |||
| 107 | static void | ||
| 108 | mul_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp, | ||
| 109 | mpi_size_t size, mpi_ptr_t tspace) | ||
| 110 | { | ||
| 111 | if (size & 1) { | ||
| 112 | /* The size is odd, and the code below doesn't handle that. | ||
| 113 | * Multiply the least significant (size - 1) limbs with a recursive | ||
| 114 | * call, and handle the most significant limb of S1 and S2 | ||
| 115 | * separately. | ||
| 116 | * A slightly faster way to do this would be to make the Karatsuba | ||
| 117 | * code below behave as if the size were even, and let it check for | ||
| 118 | * odd size in the end. I.e., in essence move this code to the end. | ||
| 119 | * Doing so would save us a recursive call, and potentially make the | ||
| 120 | * stack grow a lot less. | ||
| 121 | */ | ||
| 122 | mpi_size_t esize = size - 1; /* even size */ | ||
| 123 | mpi_limb_t cy_limb; | ||
| 124 | |||
| 125 | MPN_MUL_N_RECURSE(prodp, up, vp, esize, tspace); | ||
| 126 | cy_limb = mpihelp_addmul_1(prodp + esize, up, esize, vp[esize]); | ||
| 127 | prodp[esize + esize] = cy_limb; | ||
| 128 | cy_limb = mpihelp_addmul_1(prodp + esize, vp, size, up[esize]); | ||
| 129 | prodp[esize + size] = cy_limb; | ||
| 130 | } else { | ||
| 131 | /* Anatolij Alekseevich Karatsuba's divide-and-conquer algorithm. | ||
| 132 | * | ||
| 133 | * Split U in two pieces, U1 and U0, such that | ||
| 134 | * U = U0 + U1*(B**n), | ||
| 135 | * and V in V1 and V0, such that | ||
| 136 | * V = V0 + V1*(B**n). | ||
| 137 | * | ||
| 138 | * UV is then computed recursively using the identity | ||
| 139 | * | ||
| 140 | * 2n n n n | ||
| 141 | * UV = (B + B )U V + B (U -U )(V -V ) + (B + 1)U V | ||
| 142 | * 1 1 1 0 0 1 0 0 | ||
| 143 | * | ||
| 144 | * Where B = 2**BITS_PER_MP_LIMB. | ||
| 145 | */ | ||
| 146 | mpi_size_t hsize = size >> 1; | ||
| 147 | mpi_limb_t cy; | ||
| 148 | int negflg; | ||
| 149 | |||
| 150 | /* Product H. ________________ ________________ | ||
| 151 | * |_____U1 x V1____||____U0 x V0_____| | ||
| 152 | * Put result in upper part of PROD and pass low part of TSPACE | ||
| 153 | * as new TSPACE. | ||
| 154 | */ | ||
| 155 | MPN_MUL_N_RECURSE(prodp + size, up + hsize, vp + hsize, hsize, | ||
| 156 | tspace); | ||
| 157 | |||
| 158 | /* Product M. ________________ | ||
| 159 | * |_(U1-U0)(V0-V1)_| | ||
| 160 | */ | ||
| 161 | if (mpihelp_cmp(up + hsize, up, hsize) >= 0) { | ||
| 162 | mpihelp_sub_n(prodp, up + hsize, up, hsize); | ||
| 163 | negflg = 0; | ||
| 164 | } else { | ||
| 165 | mpihelp_sub_n(prodp, up, up + hsize, hsize); | ||
| 166 | negflg = 1; | ||
| 167 | } | ||
| 168 | if (mpihelp_cmp(vp + hsize, vp, hsize) >= 0) { | ||
| 169 | mpihelp_sub_n(prodp + hsize, vp + hsize, vp, hsize); | ||
| 170 | negflg ^= 1; | ||
| 171 | } else { | ||
| 172 | mpihelp_sub_n(prodp + hsize, vp, vp + hsize, hsize); | ||
| 173 | /* No change of NEGFLG. */ | ||
| 174 | } | ||
| 175 | /* Read temporary operands from low part of PROD. | ||
| 176 | * Put result in low part of TSPACE using upper part of TSPACE | ||
| 177 | * as new TSPACE. | ||
| 178 | */ | ||
| 179 | MPN_MUL_N_RECURSE(tspace, prodp, prodp + hsize, hsize, | ||
| 180 | tspace + size); | ||
| 181 | |||
| 182 | /* Add/copy product H. */ | ||
| 183 | MPN_COPY(prodp + hsize, prodp + size, hsize); | ||
| 184 | cy = mpihelp_add_n(prodp + size, prodp + size, | ||
| 185 | prodp + size + hsize, hsize); | ||
| 186 | |||
| 187 | /* Add product M (if NEGFLG M is a negative number) */ | ||
| 188 | if (negflg) | ||
| 189 | cy -= | ||
| 190 | mpihelp_sub_n(prodp + hsize, prodp + hsize, tspace, | ||
| 191 | size); | ||
| 192 | else | ||
| 193 | cy += | ||
| 194 | mpihelp_add_n(prodp + hsize, prodp + hsize, tspace, | ||
| 195 | size); | ||
| 196 | |||
| 197 | /* Product L. ________________ ________________ | ||
| 198 | * |________________||____U0 x V0_____| | ||
| 199 | * Read temporary operands from low part of PROD. | ||
| 200 | * Put result in low part of TSPACE using upper part of TSPACE | ||
| 201 | * as new TSPACE. | ||
| 202 | */ | ||
| 203 | MPN_MUL_N_RECURSE(tspace, up, vp, hsize, tspace + size); | ||
| 204 | |||
| 205 | /* Add/copy Product L (twice) */ | ||
| 206 | |||
| 207 | cy += mpihelp_add_n(prodp + hsize, prodp + hsize, tspace, size); | ||
| 208 | if (cy) | ||
| 209 | mpihelp_add_1(prodp + hsize + size, | ||
| 210 | prodp + hsize + size, hsize, cy); | ||
| 211 | |||
| 212 | MPN_COPY(prodp, tspace, hsize); | ||
| 213 | cy = mpihelp_add_n(prodp + hsize, prodp + hsize, tspace + hsize, | ||
| 214 | hsize); | ||
| 215 | if (cy) | ||
| 216 | mpihelp_add_1(prodp + size, prodp + size, size, 1); | ||
| 217 | } | ||
| 218 | } | ||
| 219 | |||
| 220 | void mpih_sqr_n_basecase(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size) | ||
| 221 | { | ||
| 222 | mpi_size_t i; | ||
| 223 | mpi_limb_t cy_limb; | ||
| 224 | mpi_limb_t v_limb; | ||
| 225 | |||
| 226 | /* Multiply by the first limb in V separately, as the result can be | ||
| 227 | * stored (not added) to PROD. We also avoid a loop for zeroing. */ | ||
| 228 | v_limb = up[0]; | ||
| 229 | if (v_limb <= 1) { | ||
| 230 | if (v_limb == 1) | ||
| 231 | MPN_COPY(prodp, up, size); | ||
| 232 | else | ||
| 233 | MPN_ZERO(prodp, size); | ||
| 234 | cy_limb = 0; | ||
| 235 | } else | ||
| 236 | cy_limb = mpihelp_mul_1(prodp, up, size, v_limb); | ||
| 237 | |||
| 238 | prodp[size] = cy_limb; | ||
| 239 | prodp++; | ||
| 240 | |||
| 241 | /* For each iteration in the outer loop, multiply one limb from | ||
| 242 | * U with one limb from V, and add it to PROD. */ | ||
| 243 | for (i = 1; i < size; i++) { | ||
| 244 | v_limb = up[i]; | ||
| 245 | if (v_limb <= 1) { | ||
| 246 | cy_limb = 0; | ||
| 247 | if (v_limb == 1) | ||
| 248 | cy_limb = mpihelp_add_n(prodp, prodp, up, size); | ||
| 249 | } else | ||
| 250 | cy_limb = mpihelp_addmul_1(prodp, up, size, v_limb); | ||
| 251 | |||
| 252 | prodp[size] = cy_limb; | ||
| 253 | prodp++; | ||
| 254 | } | ||
| 255 | } | ||
| 256 | |||
| 257 | void | ||
| 258 | mpih_sqr_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size, mpi_ptr_t tspace) | ||
| 259 | { | ||
| 260 | if (size & 1) { | ||
| 261 | /* The size is odd, and the code below doesn't handle that. | ||
| 262 | * Multiply the least significant (size - 1) limbs with a recursive | ||
| 263 | * call, and handle the most significant limb of S1 and S2 | ||
| 264 | * separately. | ||
| 265 | * A slightly faster way to do this would be to make the Karatsuba | ||
| 266 | * code below behave as if the size were even, and let it check for | ||
| 267 | * odd size in the end. I.e., in essence move this code to the end. | ||
| 268 | * Doing so would save us a recursive call, and potentially make the | ||
| 269 | * stack grow a lot less. | ||
| 270 | */ | ||
| 271 | mpi_size_t esize = size - 1; /* even size */ | ||
| 272 | mpi_limb_t cy_limb; | ||
| 273 | |||
| 274 | MPN_SQR_N_RECURSE(prodp, up, esize, tspace); | ||
| 275 | cy_limb = mpihelp_addmul_1(prodp + esize, up, esize, up[esize]); | ||
| 276 | prodp[esize + esize] = cy_limb; | ||
| 277 | cy_limb = mpihelp_addmul_1(prodp + esize, up, size, up[esize]); | ||
| 278 | |||
| 279 | prodp[esize + size] = cy_limb; | ||
| 280 | } else { | ||
| 281 | mpi_size_t hsize = size >> 1; | ||
| 282 | mpi_limb_t cy; | ||
| 283 | |||
| 284 | /* Product H. ________________ ________________ | ||
| 285 | * |_____U1 x U1____||____U0 x U0_____| | ||
| 286 | * Put result in upper part of PROD and pass low part of TSPACE | ||
| 287 | * as new TSPACE. | ||
| 288 | */ | ||
| 289 | MPN_SQR_N_RECURSE(prodp + size, up + hsize, hsize, tspace); | ||
| 290 | |||
| 291 | /* Product M. ________________ | ||
| 292 | * |_(U1-U0)(U0-U1)_| | ||
| 293 | */ | ||
| 294 | if (mpihelp_cmp(up + hsize, up, hsize) >= 0) | ||
| 295 | mpihelp_sub_n(prodp, up + hsize, up, hsize); | ||
| 296 | else | ||
| 297 | mpihelp_sub_n(prodp, up, up + hsize, hsize); | ||
| 298 | |||
| 299 | /* Read temporary operands from low part of PROD. | ||
| 300 | * Put result in low part of TSPACE using upper part of TSPACE | ||
| 301 | * as new TSPACE. */ | ||
| 302 | MPN_SQR_N_RECURSE(tspace, prodp, hsize, tspace + size); | ||
| 303 | |||
| 304 | /* Add/copy product H */ | ||
| 305 | MPN_COPY(prodp + hsize, prodp + size, hsize); | ||
| 306 | cy = mpihelp_add_n(prodp + size, prodp + size, | ||
| 307 | prodp + size + hsize, hsize); | ||
| 308 | |||
| 309 | /* Add product M (if NEGFLG M is a negative number). */ | ||
| 310 | cy -= mpihelp_sub_n(prodp + hsize, prodp + hsize, tspace, size); | ||
| 311 | |||
| 312 | /* Product L. ________________ ________________ | ||
| 313 | * |________________||____U0 x U0_____| | ||
| 314 | * Read temporary operands from low part of PROD. | ||
| 315 | * Put result in low part of TSPACE using upper part of TSPACE | ||
| 316 | * as new TSPACE. */ | ||
| 317 | MPN_SQR_N_RECURSE(tspace, up, hsize, tspace + size); | ||
| 318 | |||
| 319 | /* Add/copy Product L (twice). */ | ||
| 320 | cy += mpihelp_add_n(prodp + hsize, prodp + hsize, tspace, size); | ||
| 321 | if (cy) | ||
| 322 | mpihelp_add_1(prodp + hsize + size, | ||
| 323 | prodp + hsize + size, hsize, cy); | ||
| 324 | |||
| 325 | MPN_COPY(prodp, tspace, hsize); | ||
| 326 | cy = mpihelp_add_n(prodp + hsize, prodp + hsize, tspace + hsize, | ||
| 327 | hsize); | ||
| 328 | if (cy) | ||
| 329 | mpihelp_add_1(prodp + size, prodp + size, size, 1); | ||
| 330 | } | ||
| 331 | } | ||
| 332 | |||
| 333 | /* This should be made into an inline function in gmp.h. */ | ||
| 334 | int mpihelp_mul_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size) | ||
| 335 | { | ||
| 336 | if (up == vp) { | ||
| 337 | if (size < KARATSUBA_THRESHOLD) | ||
| 338 | mpih_sqr_n_basecase(prodp, up, size); | ||
| 339 | else { | ||
| 340 | mpi_ptr_t tspace; | ||
| 341 | tspace = mpi_alloc_limb_space(2 * size); | ||
| 342 | if (!tspace) | ||
| 343 | return -ENOMEM; | ||
| 344 | mpih_sqr_n(prodp, up, size, tspace); | ||
| 345 | mpi_free_limb_space(tspace); | ||
| 346 | } | ||
| 347 | } else { | ||
| 348 | if (size < KARATSUBA_THRESHOLD) | ||
| 349 | mul_n_basecase(prodp, up, vp, size); | ||
| 350 | else { | ||
| 351 | mpi_ptr_t tspace; | ||
| 352 | tspace = mpi_alloc_limb_space(2 * size); | ||
| 353 | if (!tspace) | ||
| 354 | return -ENOMEM; | ||
| 355 | mul_n(prodp, up, vp, size, tspace); | ||
| 356 | mpi_free_limb_space(tspace); | ||
| 357 | } | ||
| 358 | } | ||
| 359 | |||
| 360 | return 0; | ||
| 361 | } | ||
| 362 | |||
| 363 | int | ||
| 364 | mpihelp_mul_karatsuba_case(mpi_ptr_t prodp, | ||
| 365 | mpi_ptr_t up, mpi_size_t usize, | ||
| 366 | mpi_ptr_t vp, mpi_size_t vsize, | ||
| 367 | struct karatsuba_ctx *ctx) | ||
| 368 | { | ||
| 369 | mpi_limb_t cy; | ||
| 370 | |||
| 371 | if (!ctx->tspace || ctx->tspace_size < vsize) { | ||
| 372 | if (ctx->tspace) | ||
| 373 | mpi_free_limb_space(ctx->tspace); | ||
| 374 | ctx->tspace = mpi_alloc_limb_space(2 * vsize); | ||
| 375 | if (!ctx->tspace) | ||
| 376 | return -ENOMEM; | ||
| 377 | ctx->tspace_size = vsize; | ||
| 378 | } | ||
| 379 | |||
| 380 | MPN_MUL_N_RECURSE(prodp, up, vp, vsize, ctx->tspace); | ||
| 381 | |||
| 382 | prodp += vsize; | ||
| 383 | up += vsize; | ||
| 384 | usize -= vsize; | ||
| 385 | if (usize >= vsize) { | ||
| 386 | if (!ctx->tp || ctx->tp_size < vsize) { | ||
| 387 | if (ctx->tp) | ||
| 388 | mpi_free_limb_space(ctx->tp); | ||
| 389 | ctx->tp = mpi_alloc_limb_space(2 * vsize); | ||
| 390 | if (!ctx->tp) { | ||
| 391 | if (ctx->tspace) | ||
| 392 | mpi_free_limb_space(ctx->tspace); | ||
| 393 | ctx->tspace = NULL; | ||
| 394 | return -ENOMEM; | ||
| 395 | } | ||
| 396 | ctx->tp_size = vsize; | ||
| 397 | } | ||
| 398 | |||
| 399 | do { | ||
| 400 | MPN_MUL_N_RECURSE(ctx->tp, up, vp, vsize, ctx->tspace); | ||
| 401 | cy = mpihelp_add_n(prodp, prodp, ctx->tp, vsize); | ||
| 402 | mpihelp_add_1(prodp + vsize, ctx->tp + vsize, vsize, | ||
| 403 | cy); | ||
| 404 | prodp += vsize; | ||
| 405 | up += vsize; | ||
| 406 | usize -= vsize; | ||
| 407 | } while (usize >= vsize); | ||
| 408 | } | ||
| 409 | |||
| 410 | if (usize) { | ||
| 411 | if (usize < KARATSUBA_THRESHOLD) { | ||
| 412 | mpi_limb_t tmp; | ||
| 413 | if (mpihelp_mul(ctx->tspace, vp, vsize, up, usize, &tmp) | ||
| 414 | < 0) | ||
| 415 | return -ENOMEM; | ||
| 416 | } else { | ||
| 417 | if (!ctx->next) { | ||
| 418 | ctx->next = kzalloc(sizeof *ctx, GFP_KERNEL); | ||
| 419 | if (!ctx->next) | ||
| 420 | return -ENOMEM; | ||
| 421 | } | ||
| 422 | if (mpihelp_mul_karatsuba_case(ctx->tspace, | ||
| 423 | vp, vsize, | ||
| 424 | up, usize, | ||
| 425 | ctx->next) < 0) | ||
| 426 | return -ENOMEM; | ||
| 427 | } | ||
| 428 | |||
| 429 | cy = mpihelp_add_n(prodp, prodp, ctx->tspace, vsize); | ||
| 430 | mpihelp_add_1(prodp + vsize, ctx->tspace + vsize, usize, cy); | ||
| 431 | } | ||
| 432 | |||
| 433 | return 0; | ||
| 434 | } | ||
| 435 | |||
| 436 | void mpihelp_release_karatsuba_ctx(struct karatsuba_ctx *ctx) | ||
| 437 | { | ||
| 438 | struct karatsuba_ctx *ctx2; | ||
| 439 | |||
| 440 | if (ctx->tp) | ||
| 441 | mpi_free_limb_space(ctx->tp); | ||
| 442 | if (ctx->tspace) | ||
| 443 | mpi_free_limb_space(ctx->tspace); | ||
| 444 | for (ctx = ctx->next; ctx; ctx = ctx2) { | ||
| 445 | ctx2 = ctx->next; | ||
| 446 | if (ctx->tp) | ||
| 447 | mpi_free_limb_space(ctx->tp); | ||
| 448 | if (ctx->tspace) | ||
| 449 | mpi_free_limb_space(ctx->tspace); | ||
| 450 | kfree(ctx); | ||
| 451 | } | ||
| 452 | } | ||
| 453 | |||
| 454 | /* Multiply the natural numbers u (pointed to by UP, with USIZE limbs) | ||
| 455 | * and v (pointed to by VP, with VSIZE limbs), and store the result at | ||
| 456 | * PRODP. USIZE + VSIZE limbs are always stored, but if the input | ||
| 457 | * operands are normalized. Return the most significant limb of the | ||
| 458 | * result. | ||
| 459 | * | ||
| 460 | * NOTE: The space pointed to by PRODP is overwritten before finished | ||
| 461 | * with U and V, so overlap is an error. | ||
| 462 | * | ||
| 463 | * Argument constraints: | ||
| 464 | * 1. USIZE >= VSIZE. | ||
| 465 | * 2. PRODP != UP and PRODP != VP, i.e. the destination | ||
| 466 | * must be distinct from the multiplier and the multiplicand. | ||
| 467 | */ | ||
| 468 | |||
| 469 | int | ||
| 470 | mpihelp_mul(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t usize, | ||
| 471 | mpi_ptr_t vp, mpi_size_t vsize, mpi_limb_t *_result) | ||
| 472 | { | ||
| 473 | mpi_ptr_t prod_endp = prodp + usize + vsize - 1; | ||
| 474 | mpi_limb_t cy; | ||
| 475 | struct karatsuba_ctx ctx; | ||
| 476 | |||
| 477 | if (vsize < KARATSUBA_THRESHOLD) { | ||
| 478 | mpi_size_t i; | ||
| 479 | mpi_limb_t v_limb; | ||
| 480 | |||
| 481 | if (!vsize) { | ||
| 482 | *_result = 0; | ||
| 483 | return 0; | ||
| 484 | } | ||
| 485 | |||
| 486 | /* Multiply by the first limb in V separately, as the result can be | ||
| 487 | * stored (not added) to PROD. We also avoid a loop for zeroing. */ | ||
| 488 | v_limb = vp[0]; | ||
| 489 | if (v_limb <= 1) { | ||
| 490 | if (v_limb == 1) | ||
| 491 | MPN_COPY(prodp, up, usize); | ||
| 492 | else | ||
| 493 | MPN_ZERO(prodp, usize); | ||
| 494 | cy = 0; | ||
| 495 | } else | ||
| 496 | cy = mpihelp_mul_1(prodp, up, usize, v_limb); | ||
| 497 | |||
| 498 | prodp[usize] = cy; | ||
| 499 | prodp++; | ||
| 500 | |||
| 501 | /* For each iteration in the outer loop, multiply one limb from | ||
| 502 | * U with one limb from V, and add it to PROD. */ | ||
| 503 | for (i = 1; i < vsize; i++) { | ||
| 504 | v_limb = vp[i]; | ||
| 505 | if (v_limb <= 1) { | ||
| 506 | cy = 0; | ||
| 507 | if (v_limb == 1) | ||
| 508 | cy = mpihelp_add_n(prodp, prodp, up, | ||
| 509 | usize); | ||
| 510 | } else | ||
| 511 | cy = mpihelp_addmul_1(prodp, up, usize, v_limb); | ||
| 512 | |||
| 513 | prodp[usize] = cy; | ||
| 514 | prodp++; | ||
| 515 | } | ||
| 516 | |||
| 517 | *_result = cy; | ||
| 518 | return 0; | ||
| 519 | } | ||
| 520 | |||
| 521 | memset(&ctx, 0, sizeof ctx); | ||
| 522 | if (mpihelp_mul_karatsuba_case(prodp, up, usize, vp, vsize, &ctx) < 0) | ||
| 523 | return -ENOMEM; | ||
| 524 | mpihelp_release_karatsuba_ctx(&ctx); | ||
| 525 | *_result = *prod_endp; | ||
| 526 | return 0; | ||
| 527 | } | ||
