aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorGeorge Spelvin <linux@horizon.com>2014-06-23 09:11:54 -0400
committerDavid S. Miller <davem@davemloft.net>2014-06-25 19:03:59 -0400
commit6d514b4e7737ad75a7e7e0a3f7dde45d46341691 (patch)
tree20edf4e5f96fbf08e9e0136fec63bf13009f026a /lib
parent5433ba365f6dd9f30899188755eb4b093314732c (diff)
lib: crc32: Greatly shrink CRC combining code
There's no need for a full 32x32 matrix, when rows before the last are just shifted copies of the rows after them. There's still room for improvement (especially on X86 processors with CRC32 and PCLMUL instructions), but this is a large step in the right direction [which is in particular useful for its current user, namely SCTP checksumming over multiple skb frags[] entries, i.e. in IPVS balancing when other CRC32 offloads are not available]. The internal primitive is now called crc32_generic_shift and takes one less argument; the XOR with crc2 is done in inline wrappers. Signed-off-by: George Spelvin <linux@horizon.com> Signed-off-by: Daniel Borkmann <dborkman@redhat.com> Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'lib')
-rw-r--r--lib/crc32.c147
1 files changed, 70 insertions, 77 deletions
diff --git a/lib/crc32.c b/lib/crc32.c
index 21a7b2135af6..9af30ff334c5 100644
--- a/lib/crc32.c
+++ b/lib/crc32.c
@@ -50,30 +50,6 @@ MODULE_AUTHOR("Matt Domsch <Matt_Domsch@dell.com>");
50MODULE_DESCRIPTION("Various CRC32 calculations"); 50MODULE_DESCRIPTION("Various CRC32 calculations");
51MODULE_LICENSE("GPL"); 51MODULE_LICENSE("GPL");
52 52
53#define GF2_DIM 32
54
55static u32 gf2_matrix_times(u32 *mat, u32 vec)
56{
57 u32 sum = 0;
58
59 while (vec) {
60 if (vec & 1)
61 sum ^= *mat;
62 vec >>= 1;
63 mat++;
64 }
65
66 return sum;
67}
68
69static void gf2_matrix_square(u32 *square, u32 *mat)
70{
71 int i;
72
73 for (i = 0; i < GF2_DIM; i++)
74 square[i] = gf2_matrix_times(mat, mat[i]);
75}
76
77#if CRC_LE_BITS > 8 || CRC_BE_BITS > 8 53#if CRC_LE_BITS > 8 || CRC_BE_BITS > 8
78 54
79/* implements slicing-by-4 or slicing-by-8 algorithm */ 55/* implements slicing-by-4 or slicing-by-8 algorithm */
@@ -155,51 +131,6 @@ crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 (*tab)[256])
155} 131}
156#endif 132#endif
157 133
158/* For conditions of distribution and use, see copyright notice in zlib.h */
159static u32 crc32_generic_combine(u32 crc1, u32 crc2, size_t len2,
160 u32 polynomial)
161{
162 u32 even[GF2_DIM]; /* Even-power-of-two zeros operator */
163 u32 odd[GF2_DIM]; /* Odd-power-of-two zeros operator */
164 u32 row;
165 int i;
166
167 if (len2 <= 0)
168 return crc1;
169
170 /* Put operator for one zero bit in odd */
171 odd[0] = polynomial;
172 row = 1;
173 for (i = 1; i < GF2_DIM; i++) {
174 odd[i] = row;
175 row <<= 1;
176 }
177
178 gf2_matrix_square(even, odd); /* Put operator for two zero bits in even */
179 gf2_matrix_square(odd, even); /* Put operator for four zero bits in odd */
180
181 /* Apply len2 zeros to crc1 (first square will put the operator for one
182 * zero byte, eight zero bits, in even).
183 */
184 do {
185 /* Apply zeros operator for this bit of len2 */
186 gf2_matrix_square(even, odd);
187 if (len2 & 1)
188 crc1 = gf2_matrix_times(even, crc1);
189 len2 >>= 1;
190 /* If no more bits set, then done */
191 if (len2 == 0)
192 break;
193 /* Another iteration of the loop with odd and even swapped */
194 gf2_matrix_square(odd, even);
195 if (len2 & 1)
196 crc1 = gf2_matrix_times(odd, crc1);
197 len2 >>= 1;
198 } while (len2 != 0);
199
200 crc1 ^= crc2;
201 return crc1;
202}
203 134
204/** 135/**
205 * crc32_le_generic() - Calculate bitwise little-endian Ethernet AUTODIN II 136 * crc32_le_generic() - Calculate bitwise little-endian Ethernet AUTODIN II
@@ -271,19 +202,81 @@ u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len)
271 (const u32 (*)[256])crc32ctable_le, CRC32C_POLY_LE); 202 (const u32 (*)[256])crc32ctable_le, CRC32C_POLY_LE);
272} 203}
273#endif 204#endif
274u32 __pure crc32_le_combine(u32 crc1, u32 crc2, size_t len2) 205EXPORT_SYMBOL(crc32_le);
206EXPORT_SYMBOL(__crc32c_le);
207
208/*
209 * This multiplies the polynomials x and y modulo the given modulus.
210 * This follows the "little-endian" CRC convention that the lsbit
211 * represents the highest power of x, and the msbit represents x^0.
212 */
213static u32 __attribute_const__ gf2_multiply(u32 x, u32 y, u32 modulus)
275{ 214{
276 return crc32_generic_combine(crc1, crc2, len2, CRCPOLY_LE); 215 u32 product = x & 1 ? y : 0;
216 int i;
217
218 for (i = 0; i < 31; i++) {
219 product = (product >> 1) ^ (product & 1 ? modulus : 0);
220 x >>= 1;
221 product ^= x & 1 ? y : 0;
222 }
223
224 return product;
277} 225}
278 226
279u32 __pure __crc32c_le_combine(u32 crc1, u32 crc2, size_t len2) 227/**
228 * crc32_generic_shift - Append len 0 bytes to crc, in logarithmic time
229 * @crc: The original little-endian CRC (i.e. lsbit is x^31 coefficient)
230 * @len: The number of bytes. @crc is multiplied by x^(8*@len)
231 * @polynomial: The modulus used to reduce the result to 32 bits.
232 *
233 * It's possible to parallelize CRC computations by computing a CRC
234 * over separate ranges of a buffer, then summing them.
235 * This shifts the given CRC by 8*len bits (i.e. produces the same effect
236 * as appending len bytes of zero to the data), in time proportional
237 * to log(len).
238 */
239static u32 __attribute_const__ crc32_generic_shift(u32 crc, size_t len,
240 u32 polynomial)
280{ 241{
281 return crc32_generic_combine(crc1, crc2, len2, CRC32C_POLY_LE); 242 u32 power = polynomial; /* CRC of x^32 */
243 int i;
244
245 /* Shift up to 32 bits in the simple linear way */
246 for (i = 0; i < 8 * (int)(len & 3); i++)
247 crc = (crc >> 1) ^ (crc & 1 ? polynomial : 0);
248
249 len >>= 2;
250 if (!len)
251 return crc;
252
253 for (;;) {
254 /* "power" is x^(2^i), modulo the polynomial */
255 if (len & 1)
256 crc = gf2_multiply(crc, power, polynomial);
257
258 len >>= 1;
259 if (!len)
260 break;
261
262 /* Square power, advancing to x^(2^(i+1)) */
263 power = gf2_multiply(power, power, polynomial);
264 }
265
266 return crc;
282} 267}
283EXPORT_SYMBOL(crc32_le); 268
284EXPORT_SYMBOL(crc32_le_combine); 269u32 __attribute_const__ crc32_le_shift(u32 crc, size_t len)
285EXPORT_SYMBOL(__crc32c_le); 270{
286EXPORT_SYMBOL(__crc32c_le_combine); 271 return crc32_generic_shift(crc, len, CRCPOLY_LE);
272}
273
274u32 __attribute_const__ __crc32c_le_shift(u32 crc, size_t len)
275{
276 return crc32_generic_shift(crc, len, CRC32C_POLY_LE);
277}
278EXPORT_SYMBOL(crc32_le_shift);
279EXPORT_SYMBOL(__crc32c_le_shift);
287 280
288/** 281/**
289 * crc32_be_generic() - Calculate bitwise big-endian Ethernet AUTODIN II CRC32 282 * crc32_be_generic() - Calculate bitwise big-endian Ethernet AUTODIN II CRC32