diff options
author | Rik Snel <rsnel@cube.dyndns.org> | 2006-11-29 02:59:44 -0500 |
---|---|---|
committer | David S. Miller <davem@sunset.davemloft.net> | 2006-12-06 21:38:55 -0500 |
commit | c494e0705d670c51ac736c8c4d92750705fe3187 (patch) | |
tree | 9f00826afc317f976c03ef4e77284b13204c0c9d /include/crypto/gf128mul.h | |
parent | aec3694b987900de7ab789ea5749d673e0d634c4 (diff) |
[CRYPTO] lib: table driven multiplications in GF(2^128)
A lot of cypher modes need multiplications in GF(2^128). LRW, ABL, GCM...
I use functions from this library in my LRW implementation and I will
also use them in my ABL (Arbitrary Block Length, an unencumbered (correct
me if I am wrong, wide block cipher mode).
Elements of GF(2^128) must be presented as u128 *, it encourages automatic
and proper alignment.
The library contains support for two different representations of GF(2^128),
see the comment in gf128mul.h. There different levels of optimization
(memory/speed tradeoff).
The code is based on work by Dr Brian Gladman. Notable changes:
- deletion of two optimization modes
- change from u32 to u64 for faster handling on 64bit machines
- support for 'bbe' representation in addition to the, already implemented,
'lle' representation.
- move 'inline void' functions from header to 'static void' in the
source file
- update to use the linux coding style conventions
The original can be found at:
http://fp.gladman.plus.com/AES/modes.vc8.19-06-06.zip
The copyright (and GPL statement) of the original author is preserved.
Signed-off-by: Rik Snel <rsnel@cube.dyndns.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Diffstat (limited to 'include/crypto/gf128mul.h')
-rw-r--r-- | include/crypto/gf128mul.h | 198 |
1 files changed, 198 insertions, 0 deletions
diff --git a/include/crypto/gf128mul.h b/include/crypto/gf128mul.h new file mode 100644 index 000000000000..4fd315202442 --- /dev/null +++ b/include/crypto/gf128mul.h | |||
@@ -0,0 +1,198 @@ | |||
1 | /* gf128mul.h - GF(2^128) multiplication functions | ||
2 | * | ||
3 | * Copyright (c) 2003, Dr Brian Gladman, Worcester, UK. | ||
4 | * Copyright (c) 2006 Rik Snel <rsnel@cube.dyndns.org> | ||
5 | * | ||
6 | * Based on Dr Brian Gladman's (GPL'd) work published at | ||
7 | * http://fp.gladman.plus.com/cryptography_technology/index.htm | ||
8 | * See the original copyright notice below. | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or modify it | ||
11 | * under the terms of the GNU General Public License as published by the Free | ||
12 | * Software Foundation; either version 2 of the License, or (at your option) | ||
13 | * any later version. | ||
14 | */ | ||
15 | /* | ||
16 | --------------------------------------------------------------------------- | ||
17 | Copyright (c) 2003, Dr Brian Gladman, Worcester, UK. All rights reserved. | ||
18 | |||
19 | LICENSE TERMS | ||
20 | |||
21 | The free distribution and use of this software in both source and binary | ||
22 | form is allowed (with or without changes) provided that: | ||
23 | |||
24 | 1. distributions of this source code include the above copyright | ||
25 | notice, this list of conditions and the following disclaimer; | ||
26 | |||
27 | 2. distributions in binary form include the above copyright | ||
28 | notice, this list of conditions and the following disclaimer | ||
29 | in the documentation and/or other associated materials; | ||
30 | |||
31 | 3. the copyright holder's name is not used to endorse products | ||
32 | built using this software without specific written permission. | ||
33 | |||
34 | ALTERNATIVELY, provided that this notice is retained in full, this product | ||
35 | may be distributed under the terms of the GNU General Public License (GPL), | ||
36 | in which case the provisions of the GPL apply INSTEAD OF those given above. | ||
37 | |||
38 | DISCLAIMER | ||
39 | |||
40 | This software is provided 'as is' with no explicit or implied warranties | ||
41 | in respect of its properties, including, but not limited to, correctness | ||
42 | and/or fitness for purpose. | ||
43 | --------------------------------------------------------------------------- | ||
44 | Issue Date: 31/01/2006 | ||
45 | |||
46 | An implementation of field multiplication in Galois Field GF(128) | ||
47 | */ | ||
48 | |||
49 | #ifndef _CRYPTO_GF128MUL_H | ||
50 | #define _CRYPTO_GF128MUL_H | ||
51 | |||
52 | #include <crypto/b128ops.h> | ||
53 | #include <linux/slab.h> | ||
54 | |||
55 | /* Comment by Rik: | ||
56 | * | ||
57 | * For some background on GF(2^128) see for example: http://- | ||
58 | * csrc.nist.gov/CryptoToolkit/modes/proposedmodes/gcm/gcm-revised-spec.pdf | ||
59 | * | ||
60 | * The elements of GF(2^128) := GF(2)[X]/(X^128-X^7-X^2-X^1-1) can | ||
61 | * be mapped to computer memory in a variety of ways. Let's examine | ||
62 | * three common cases. | ||
63 | * | ||
64 | * Take a look at the 16 binary octets below in memory order. The msb's | ||
65 | * are left and the lsb's are right. char b[16] is an array and b[0] is | ||
66 | * the first octet. | ||
67 | * | ||
68 | * 80000000 00000000 00000000 00000000 .... 00000000 00000000 00000000 | ||
69 | * b[0] b[1] b[2] b[3] b[13] b[14] b[15] | ||
70 | * | ||
71 | * Every bit is a coefficient of some power of X. We can store the bits | ||
72 | * in every byte in little-endian order and the bytes themselves also in | ||
73 | * little endian order. I will call this lle (little-little-endian). | ||
74 | * The above buffer represents the polynomial 1, and X^7+X^2+X^1+1 looks | ||
75 | * like 11100001 00000000 .... 00000000 = { 0xE1, 0x00, }. | ||
76 | * This format was originally implemented in gf128mul and is used | ||
77 | * in GCM (Galois/Counter mode) and in ABL (Arbitrary Block Length). | ||
78 | * | ||
79 | * Another convention says: store the bits in bigendian order and the | ||
80 | * bytes also. This is bbe (big-big-endian). Now the buffer above | ||
81 | * represents X^127. X^7+X^2+X^1+1 looks like 00000000 .... 10000111, | ||
82 | * b[15] = 0x87 and the rest is 0. LRW uses this convention and bbe | ||
83 | * is partly implemented. | ||
84 | * | ||
85 | * Both of the above formats are easy to implement on big-endian | ||
86 | * machines. | ||
87 | * | ||
88 | * EME (which is patent encumbered) uses the ble format (bits are stored | ||
89 | * in big endian order and the bytes in little endian). The above buffer | ||
90 | * represents X^7 in this case and the primitive polynomial is b[0] = 0x87. | ||
91 | * | ||
92 | * The common machine word-size is smaller than 128 bits, so to make | ||
93 | * an efficient implementation we must split into machine word sizes. | ||
94 | * This file uses one 32bit for the moment. Machine endianness comes into | ||
95 | * play. The lle format in relation to machine endianness is discussed | ||
96 | * below by the original author of gf128mul Dr Brian Gladman. | ||
97 | * | ||
98 | * Let's look at the bbe and ble format on a little endian machine. | ||
99 | * | ||
100 | * bbe on a little endian machine u32 x[4]: | ||
101 | * | ||
102 | * MS x[0] LS MS x[1] LS | ||
103 | * ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls | ||
104 | * 103..96 111.104 119.112 127.120 71...64 79...72 87...80 95...88 | ||
105 | * | ||
106 | * MS x[2] LS MS x[3] LS | ||
107 | * ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls | ||
108 | * 39...32 47...40 55...48 63...56 07...00 15...08 23...16 31...24 | ||
109 | * | ||
110 | * ble on a little endian machine | ||
111 | * | ||
112 | * MS x[0] LS MS x[1] LS | ||
113 | * ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls | ||
114 | * 31...24 23...16 15...08 07...00 63...56 55...48 47...40 39...32 | ||
115 | * | ||
116 | * MS x[2] LS MS x[3] LS | ||
117 | * ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls | ||
118 | * 95...88 87...80 79...72 71...64 127.120 199.112 111.104 103..96 | ||
119 | * | ||
120 | * Multiplications in GF(2^128) are mostly bit-shifts, so you see why | ||
121 | * ble (and lbe also) are easier to implement on a little-endian | ||
122 | * machine than on a big-endian machine. The converse holds for bbe | ||
123 | * and lle. | ||
124 | * | ||
125 | * Note: to have good alignment, it seems to me that it is sufficient | ||
126 | * to keep elements of GF(2^128) in type u64[2]. On 32-bit wordsize | ||
127 | * machines this will automatically aligned to wordsize and on a 64-bit | ||
128 | * machine also. | ||
129 | */ | ||
130 | /* Multiply a GF128 field element by x. Field elements are held in arrays | ||
131 | of bytes in which field bits 8n..8n + 7 are held in byte[n], with lower | ||
132 | indexed bits placed in the more numerically significant bit positions | ||
133 | within bytes. | ||
134 | |||
135 | On little endian machines the bit indexes translate into the bit | ||
136 | positions within four 32-bit words in the following way | ||
137 | |||
138 | MS x[0] LS MS x[1] LS | ||
139 | ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls | ||
140 | 24...31 16...23 08...15 00...07 56...63 48...55 40...47 32...39 | ||
141 | |||
142 | MS x[2] LS MS x[3] LS | ||
143 | ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls | ||
144 | 88...95 80...87 72...79 64...71 120.127 112.119 104.111 96..103 | ||
145 | |||
146 | On big endian machines the bit indexes translate into the bit | ||
147 | positions within four 32-bit words in the following way | ||
148 | |||
149 | MS x[0] LS MS x[1] LS | ||
150 | ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls | ||
151 | 00...07 08...15 16...23 24...31 32...39 40...47 48...55 56...63 | ||
152 | |||
153 | MS x[2] LS MS x[3] LS | ||
154 | ms ls ms ls ms ls ms ls ms ls ms ls ms ls ms ls | ||
155 | 64...71 72...79 80...87 88...95 96..103 104.111 112.119 120.127 | ||
156 | */ | ||
157 | |||
158 | /* A slow generic version of gf_mul, implemented for lle and bbe | ||
159 | * It multiplies a and b and puts the result in a */ | ||
160 | void gf128mul_lle(be128 *a, const be128 *b); | ||
161 | |||
162 | void gf128mul_bbe(be128 *a, const be128 *b); | ||
163 | |||
164 | |||
165 | /* 4k table optimization */ | ||
166 | |||
167 | struct gf128mul_4k { | ||
168 | be128 t[256]; | ||
169 | }; | ||
170 | |||
171 | struct gf128mul_4k *gf128mul_init_4k_lle(const be128 *g); | ||
172 | struct gf128mul_4k *gf128mul_init_4k_bbe(const be128 *g); | ||
173 | void gf128mul_4k_lle(be128 *a, struct gf128mul_4k *t); | ||
174 | void gf128mul_4k_bbe(be128 *a, struct gf128mul_4k *t); | ||
175 | |||
176 | static inline void gf128mul_free_4k(struct gf128mul_4k *t) | ||
177 | { | ||
178 | kfree(t); | ||
179 | } | ||
180 | |||
181 | |||
182 | /* 64k table optimization, implemented for lle and bbe */ | ||
183 | |||
184 | struct gf128mul_64k { | ||
185 | struct gf128mul_4k *t[16]; | ||
186 | }; | ||
187 | |||
188 | /* first initialize with the constant factor with which you | ||
189 | * want to multiply and then call gf128_64k_lle with the other | ||
190 | * factor in the first argument, the table in the second and a | ||
191 | * scratch register in the third. Afterwards *a = *r. */ | ||
192 | struct gf128mul_64k *gf128mul_init_64k_lle(const be128 *g); | ||
193 | struct gf128mul_64k *gf128mul_init_64k_bbe(const be128 *g); | ||
194 | void gf128mul_free_64k(struct gf128mul_64k *t); | ||
195 | void gf128mul_64k_lle(be128 *a, struct gf128mul_64k *t); | ||
196 | void gf128mul_64k_bbe(be128 *a, struct gf128mul_64k *t); | ||
197 | |||
198 | #endif /* _CRYPTO_GF128MUL_H */ | ||