Merge branch 'crc32'

Daniel Borkmann says: ==================== crc32 combine improvements So almost a month passed, and I don't want this to get lost somewhere. I have applied the feedback given at that time to this set, rebased plus tested it against latest net-next. I decided to route this via netdev as it improves performance upon library code that provides library bits for SCTP, i.e. for non-linear skb csum handling in IPVS. Thus, resending this for George before it gets lost. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
author: David S. Miller <davem@davemloft.net> 2014-06-25 19:04:09 -0400
committer: David S. Miller <davem@davemloft.net> 2014-06-25 19:04:09 -0400
commit: cb8eb77663dda1502ec419962980053cd0092d48 (patch)
tree: 734ab1bfada593301e315a18b01ad39e68a9730e
parent: 5433ba365f6dd9f30899188755eb4b093314732c (diff)
parent: d8f1c4778e957273c3b5b6e045d8d3af38484ca8 (diff)
2 files changed, 88 insertions, 85 deletions
diff --git a/include/linux/crc32.h b/include/linux/crc32.h
index 7d275c4fc011..9e8a032c1788 100644
--- a/include/linux/crc32.h
+++ b/include/linux/crc32.h
@@ -8,8 +8,8 @@
 #include <linux/types.h>
 #include <linux/bitrev.h>
-extern u32  crc32_le(u32 crc, unsigned char const *p, size_t len);
+u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len);
-extern u32  crc32_be(u32 crc, unsigned char const *p, size_t len);
+u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len);
 /**
 * crc32_le_combine - Combine two crc32 check values into one. For two
@@ -29,9 +29,14 @@ extern u32  crc32_be(u32 crc, unsigned char const *p, size_t len);
 *         with the same initializer as crc1, and crc2 seed was 0. See
 *         also crc32_combine_test().
 */
-extern u32  crc32_le_combine(u32 crc1, u32 crc2, size_t len2);
+u32 __attribute_const__ crc32_le_shift(u32 crc, size_t len);
-extern u32  __crc32c_le(u32 crc, unsigned char const *p, size_t len);
+static inline u32 crc32_le_combine(u32 crc1, u32 crc2, size_t len2)
+{
+        return crc32_le_shift(crc1, len2) ^ crc2;
+}
+u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len);
 /**
 * __crc32c_le_combine - Combine two crc32c check values into one. For two
@@ -51,7 +56,12 @@ extern u32  __crc32c_le(u32 crc, unsigned char const *p, size_t len);
 *         seeded with the same initializer as crc1, and crc2 seed
 *         was 0. See also crc32c_combine_test().
 */
-extern u32  __crc32c_le_combine(u32 crc1, u32 crc2, size_t len2);
+u32 __attribute_const__ __crc32c_le_shift(u32 crc, size_t len);
+static inline u32 __crc32c_le_combine(u32 crc1, u32 crc2, size_t len2)
+{
+        return __crc32c_le_shift(crc1, len2) ^ crc2;
+}
 #define crc32(seed, data, length)  crc32_le(seed, (unsigned char const *)(data), length)
diff --git a/lib/crc32.c b/lib/crc32.c
index 21a7b2135af6..9a907d489d95 100644
--- a/lib/crc32.c
+++ b/lib/crc32.c
@@ -50,34 +50,10 @@ MODULE_AUTHOR("Matt Domsch <Matt_Domsch@dell.com>");
 MODULE_DESCRIPTION("Various CRC32 calculations");
 MODULE_LICENSE("GPL");
-#define GF2_DIM         32
-static u32 gf2_matrix_times(u32 *mat, u32 vec)
-{
-        u32 sum = 0;
-        while (vec) {
-                if (vec & 1)
-                        sum ^= *mat;
-                vec >>= 1;
-                mat++;
-        }
-        return sum;
-}
-static void gf2_matrix_square(u32 *square, u32 *mat)
-{
-        int i;
-        for (i = 0; i < GF2_DIM; i++)
-                square[i] = gf2_matrix_times(mat, mat[i]);
-}
 #if CRC_LE_BITS > 8 || CRC_BE_BITS > 8
 /* implements slicing-by-4 or slicing-by-8 algorithm */
-static inline u32
+static inline u32 __pure
 crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 (*tab)[256])
 {
 # ifdef __LITTLE_ENDIAN
@@ -155,51 +131,6 @@ crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 (*tab)[256])
 }
 #endif
-/* For conditions of distribution and use, see copyright notice in zlib.h */
-static u32 crc32_generic_combine(u32 crc1, u32 crc2, size_t len2,
-                                 u32 polynomial)
-{
-        u32 even[GF2_DIM]; /* Even-power-of-two zeros operator */
-        u32 odd[GF2_DIM];  /* Odd-power-of-two zeros operator  */
-        u32 row;
-        int i;
-        if (len2 <= 0)
-                return crc1;
-        /* Put operator for one zero bit in odd */
-        odd[0] = polynomial;
-        row = 1;
-        for (i = 1; i < GF2_DIM; i++) {
-                odd[i] = row;
-                row <<= 1;
-        }
-        gf2_matrix_square(even, odd); /* Put operator for two zero bits in even */
-        gf2_matrix_square(odd, even); /* Put operator for four zero bits in odd */
-        /* Apply len2 zeros to crc1 (first square will put the operator for one
-         * zero byte, eight zero bits, in even).
-         */
-        do {
-                /* Apply zeros operator for this bit of len2 */
-                gf2_matrix_square(even, odd);
-                if (len2 & 1)
-                        crc1 = gf2_matrix_times(even, crc1);
-                len2 >>= 1;
-                /* If no more bits set, then done */
-                if (len2 == 0)
-                        break;
-                /* Another iteration of the loop with odd and even swapped */
-                gf2_matrix_square(odd, even);
-                if (len2 & 1)
-                        crc1 = gf2_matrix_times(odd, crc1);
-                len2 >>= 1;
-        } while (len2 != 0);
-        crc1 ^= crc2;
-        return crc1;
-}
 /**
 * crc32_le_generic() - Calculate bitwise little-endian Ethernet AUTODIN II
@@ -271,19 +202,81 @@ u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len)
                        (const u32 (*)[256])crc32ctable_le, CRC32C_POLY_LE);
 }
 #endif
-u32 __pure crc32_le_combine(u32 crc1, u32 crc2, size_t len2)
+EXPORT_SYMBOL(crc32_le);
+EXPORT_SYMBOL(__crc32c_le);
+/*
+ * This multiplies the polynomials x and y modulo the given modulus.
+ * This follows the "little-endian" CRC convention that the lsbit
+ * represents the highest power of x, and the msbit represents x^0.
+ */
+static u32 __attribute_const__ gf2_multiply(u32 x, u32 y, u32 modulus)
 {
-        return crc32_generic_combine(crc1, crc2, len2, CRCPOLY_LE);
+        u32 product = x & 1 ? y : 0;
+        int i;
+        for (i = 0; i < 31; i++) {
+                product = (product >> 1) ^ (product & 1 ? modulus : 0);
+                x >>= 1;
+                product ^= x & 1 ? y : 0;
+        }
+        return product;
 }
-u32 __pure __crc32c_le_combine(u32 crc1, u32 crc2, size_t len2)
+/**
+ * crc32_generic_shift - Append len 0 bytes to crc, in logarithmic time
+ * @crc: The original little-endian CRC (i.e. lsbit is x^31 coefficient)
+ * @len: The number of bytes. @crc is multiplied by x^(8*@len)
+ * @polynomial: The modulus used to reduce the result to 32 bits.
+ *
+ * It's possible to parallelize CRC computations by computing a CRC
+ * over separate ranges of a buffer, then summing them.
+ * This shifts the given CRC by 8*len bits (i.e. produces the same effect
+ * as appending len bytes of zero to the data), in time proportional
+ * to log(len).
+ */
+static u32 __attribute_const__ crc32_generic_shift(u32 crc, size_t len,
+                                                   u32 polynomial)
 {
-        return crc32_generic_combine(crc1, crc2, len2, CRC32C_POLY_LE);
+        u32 power = polynomial; /* CRC of x^32 */
+        int i;
+        /* Shift up to 32 bits in the simple linear way */
+        for (i = 0; i < 8 * (int)(len & 3); i++)
+                crc = (crc >> 1) ^ (crc & 1 ? polynomial : 0);
+        len >>= 2;
+        if (!len)
+                return crc;
+        for (;;) {
+                /* "power" is x^(2^i), modulo the polynomial */
+                if (len & 1)
+                        crc = gf2_multiply(crc, power, polynomial);
+                len >>= 1;
+                if (!len)
+                        break;
+                /* Square power, advancing to x^(2^(i+1)) */
+                power = gf2_multiply(power, power, polynomial);
+        }
+        return crc;
 }
-EXPORT_SYMBOL(crc32_le);
-EXPORT_SYMBOL(crc32_le_combine);
+u32 __attribute_const__ crc32_le_shift(u32 crc, size_t len)
-EXPORT_SYMBOL(__crc32c_le);
+{
-EXPORT_SYMBOL(__crc32c_le_combine);
+        return crc32_generic_shift(crc, len, CRCPOLY_LE);
+}
+u32 __attribute_const__ __crc32c_le_shift(u32 crc, size_t len)
+{
+        return crc32_generic_shift(crc, len, CRC32C_POLY_LE);
+}
+EXPORT_SYMBOL(crc32_le_shift);
+EXPORT_SYMBOL(__crc32c_le_shift);
 /**
 * crc32_be_generic() - Calculate bitwise big-endian Ethernet AUTODIN II CRC32
@@ -351,7 +344,7 @@ EXPORT_SYMBOL(crc32_be);
 #ifdef CONFIG_CRC32_SELFTEST
 /* 4096 random bytes */
-static u8 __attribute__((__aligned__(8))) test_buf[] =
+static u8 const __aligned(8) test_buf[] __initconst =
 {
        0x5b, 0x85, 0x21, 0xcb, 0x09, 0x68, 0x7d, 0x30,
        0xc7, 0x69, 0xd7, 0x30, 0x92, 0xde, 0x59, 0xe4,
@@ -875,7 +868,7 @@ static struct crc_test {
        u32 crc_le;     /* expected crc32_le result */
        u32 crc_be;     /* expected crc32_be result */
        u32 crc32c_le;  /* expected crc32c_le result */
-} test[] =
+} const test[] __initconst =
 {
        {0x674bf11d, 0x00000038, 0x00000542, 0x0af6d466, 0xd8b6e4c1, 0xf6e93d6c},
        {0x35c672c6, 0x0000003a, 0x000001aa, 0xc6d3dfba, 0x28aaf3ad, 0x0fe92aca},
author	David S. Miller <davem@davemloft.net>	2014-06-25 19:04:09 -0400
committer	David S. Miller <davem@davemloft.net>	2014-06-25 19:04:09 -0400
commit	cb8eb77663dda1502ec419962980053cd0092d48 (patch)
tree	734ab1bfada593301e315a18b01ad39e68a9730e
parent	5433ba365f6dd9f30899188755eb4b093314732c (diff)
parent	d8f1c4778e957273c3b5b6e045d8d3af38484ca8 (diff)

diff --git a/include/linux/crc32.h b/include/linux/crc32.h index 7d275c4fc011..9e8a032c1788 100644 --- a/include/linux/crc32.h +++ b/include/linux/crc32.h
@@ -8,8 +8,8 @@
8	#include <linux/types.h>	8	#include <linux/types.h>
9	#include <linux/bitrev.h>	9	#include <linux/bitrev.h>
10		10
11	extern u32 crc32_le(u32 crc, unsigned char const *p, size_t len);	11	u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len);
12	extern u32 crc32_be(u32 crc, unsigned char const *p, size_t len);	12	u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len);
13		13
14	/**	14	/**
15	* crc32_le_combine - Combine two crc32 check values into one. For two	15	* crc32_le_combine - Combine two crc32 check values into one. For two
@@ -29,9 +29,14 @@ extern u32 crc32_be(u32 crc, unsigned char const *p, size_t len);
29	* with the same initializer as crc1, and crc2 seed was 0. See	29	* with the same initializer as crc1, and crc2 seed was 0. See
30	* also crc32_combine_test().	30	* also crc32_combine_test().
31	*/	31	*/
32	extern u32 crc32_le_combine(u32 crc1, u32 crc2, size_t len2);	32	u32 __attribute_const__ crc32_le_shift(u32 crc, size_t len);
33		33
34	extern u32 __crc32c_le(u32 crc, unsigned char const *p, size_t len);	34	static inline u32 crc32_le_combine(u32 crc1, u32 crc2, size_t len2)
		35	{
		36	return crc32_le_shift(crc1, len2) ^ crc2;
		37	}
		38
		39	u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len);
35		40
36	/**	41	/**
37	* __crc32c_le_combine - Combine two crc32c check values into one. For two	42	* __crc32c_le_combine - Combine two crc32c check values into one. For two
@@ -51,7 +56,12 @@ extern u32 __crc32c_le(u32 crc, unsigned char const *p, size_t len);
51	* seeded with the same initializer as crc1, and crc2 seed	56	* seeded with the same initializer as crc1, and crc2 seed
52	* was 0. See also crc32c_combine_test().	57	* was 0. See also crc32c_combine_test().
53	*/	58	*/
54	extern u32 __crc32c_le_combine(u32 crc1, u32 crc2, size_t len2);	59	u32 __attribute_const__ __crc32c_le_shift(u32 crc, size_t len);
		60
		61	static inline u32 __crc32c_le_combine(u32 crc1, u32 crc2, size_t len2)
		62	{
		63	return __crc32c_le_shift(crc1, len2) ^ crc2;
		64	}
55		65
56	#define crc32(seed, data, length) crc32_le(seed, (unsigned char const *)(data), length)	66	#define crc32(seed, data, length) crc32_le(seed, (unsigned char const *)(data), length)
57		67


diff --git a/lib/crc32.c b/lib/crc32.c index 21a7b2135af6..9a907d489d95 100644 --- a/lib/crc32.c +++ b/lib/crc32.c
@@ -50,34 +50,10 @@ MODULE_AUTHOR("Matt Domsch <Matt_Domsch@dell.com>");
50	MODULE_DESCRIPTION("Various CRC32 calculations");	50	MODULE_DESCRIPTION("Various CRC32 calculations");
51	MODULE_LICENSE("GPL");	51	MODULE_LICENSE("GPL");
52		52
53	#define GF2_DIM 32
54
55	static u32 gf2_matrix_times(u32 *mat, u32 vec)
56	{
57	u32 sum = 0;
58
59	while (vec) {
60	if (vec & 1)
61	sum ^= *mat;
62	vec >>= 1;
63	mat++;
64	}
65
66	return sum;
67	}
68
69	static void gf2_matrix_square(u32 square, u32 mat)
70	{
71	int i;
72
73	for (i = 0; i < GF2_DIM; i++)
74	square[i] = gf2_matrix_times(mat, mat[i]);
75	}
76
77	#if CRC_LE_BITS > 8 \|\| CRC_BE_BITS > 8	53	#if CRC_LE_BITS > 8 \|\| CRC_BE_BITS > 8
78		54
79	/* implements slicing-by-4 or slicing-by-8 algorithm */	55	/* implements slicing-by-4 or slicing-by-8 algorithm */
80	static inline u32	56	static inline u32 __pure
81	crc32_body(u32 crc, unsigned char const buf, size_t len, const u32 (tab)[256])	57	crc32_body(u32 crc, unsigned char const buf, size_t len, const u32 (tab)[256])
82	{	58	{
83	# ifdef __LITTLE_ENDIAN	59	# ifdef __LITTLE_ENDIAN
@@ -155,51 +131,6 @@ crc32_body(u32 crc, unsigned char const buf, size_t len, const u32 (tab)[256])
155	}	131	}
156	#endif	132	#endif
157		133
158	/* For conditions of distribution and use, see copyright notice in zlib.h */
159	static u32 crc32_generic_combine(u32 crc1, u32 crc2, size_t len2,
160	u32 polynomial)
161	{
162	u32 even[GF2_DIM]; /* Even-power-of-two zeros operator */
163	u32 odd[GF2_DIM]; /* Odd-power-of-two zeros operator */
164	u32 row;
165	int i;
166
167	if (len2 <= 0)
168	return crc1;
169
170	/* Put operator for one zero bit in odd */
171	odd[0] = polynomial;
172	row = 1;
173	for (i = 1; i < GF2_DIM; i++) {
174	odd[i] = row;
175	row <<= 1;
176	}
177
178	gf2_matrix_square(even, odd); /* Put operator for two zero bits in even */
179	gf2_matrix_square(odd, even); /* Put operator for four zero bits in odd */
180
181	/* Apply len2 zeros to crc1 (first square will put the operator for one
182	* zero byte, eight zero bits, in even).
183	*/
184	do {
185	/* Apply zeros operator for this bit of len2 */
186	gf2_matrix_square(even, odd);
187	if (len2 & 1)
188	crc1 = gf2_matrix_times(even, crc1);
189	len2 >>= 1;
190	/* If no more bits set, then done */
191	if (len2 == 0)
192	break;
193	/* Another iteration of the loop with odd and even swapped */
194	gf2_matrix_square(odd, even);
195	if (len2 & 1)
196	crc1 = gf2_matrix_times(odd, crc1);
197	len2 >>= 1;
198	} while (len2 != 0);
199
200	crc1 ^= crc2;
201	return crc1;
202	}
203		134
204	/**	135	/**
205	* crc32_le_generic() - Calculate bitwise little-endian Ethernet AUTODIN II	136	* crc32_le_generic() - Calculate bitwise little-endian Ethernet AUTODIN II
@@ -271,19 +202,81 @@ u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len)
271	(const u32 (*)[256])crc32ctable_le, CRC32C_POLY_LE);	202	(const u32 (*)[256])crc32ctable_le, CRC32C_POLY_LE);
272	}	203	}
273	#endif	204	#endif
274	u32 __pure crc32_le_combine(u32 crc1, u32 crc2, size_t len2)	205	EXPORT_SYMBOL(crc32_le);
		206	EXPORT_SYMBOL(__crc32c_le);
		207
		208	/*
		209	* This multiplies the polynomials x and y modulo the given modulus.
		210	* This follows the "little-endian" CRC convention that the lsbit
		211	* represents the highest power of x, and the msbit represents x^0.
		212	*/
		213	static u32 __attribute_const__ gf2_multiply(u32 x, u32 y, u32 modulus)
275	{	214	{
276	return crc32_generic_combine(crc1, crc2, len2, CRCPOLY_LE);	215	u32 product = x & 1 ? y : 0;
		216	int i;
		217
		218	for (i = 0; i < 31; i++) {
		219	product = (product >> 1) ^ (product & 1 ? modulus : 0);
		220	x >>= 1;
		221	product ^= x & 1 ? y : 0;
		222	}
		223
		224	return product;
277	}	225	}
278		226
279	u32 __pure __crc32c_le_combine(u32 crc1, u32 crc2, size_t len2)	227	/**
		228	* crc32_generic_shift - Append len 0 bytes to crc, in logarithmic time
		229	* @crc: The original little-endian CRC (i.e. lsbit is x^31 coefficient)
		230	* @len: The number of bytes. @crc is multiplied by x^(8*@len)
		231	* @polynomial: The modulus used to reduce the result to 32 bits.
		232	*
		233	* It's possible to parallelize CRC computations by computing a CRC
		234	* over separate ranges of a buffer, then summing them.
		235	* This shifts the given CRC by 8*len bits (i.e. produces the same effect
		236	* as appending len bytes of zero to the data), in time proportional
		237	* to log(len).
		238	*/
		239	static u32 __attribute_const__ crc32_generic_shift(u32 crc, size_t len,
		240	u32 polynomial)
280	{	241	{
281	return crc32_generic_combine(crc1, crc2, len2, CRC32C_POLY_LE);	242	u32 power = polynomial; /* CRC of x^32 */
		243	int i;
		244
		245	/* Shift up to 32 bits in the simple linear way */
		246	for (i = 0; i < 8 * (int)(len & 3); i++)
		247	crc = (crc >> 1) ^ (crc & 1 ? polynomial : 0);
		248
		249	len >>= 2;
		250	if (!len)
		251	return crc;
		252
		253	for (;;) {
		254	/* "power" is x^(2^i), modulo the polynomial */
		255	if (len & 1)
		256	crc = gf2_multiply(crc, power, polynomial);
		257
		258	len >>= 1;
		259	if (!len)
		260	break;
		261
		262	/* Square power, advancing to x^(2^(i+1)) */
		263	power = gf2_multiply(power, power, polynomial);
		264	}
		265
		266	return crc;
282	}	267	}
283	EXPORT_SYMBOL(crc32_le);	268
284	EXPORT_SYMBOL(crc32_le_combine);	269	u32 __attribute_const__ crc32_le_shift(u32 crc, size_t len)
285	EXPORT_SYMBOL(__crc32c_le);	270	{
286	EXPORT_SYMBOL(__crc32c_le_combine);	271	return crc32_generic_shift(crc, len, CRCPOLY_LE);
		272	}
		273
		274	u32 __attribute_const__ __crc32c_le_shift(u32 crc, size_t len)
		275	{
		276	return crc32_generic_shift(crc, len, CRC32C_POLY_LE);
		277	}
		278	EXPORT_SYMBOL(crc32_le_shift);
		279	EXPORT_SYMBOL(__crc32c_le_shift);
287		280
288	/**	281	/**
289	* crc32_be_generic() - Calculate bitwise big-endian Ethernet AUTODIN II CRC32	282	* crc32_be_generic() - Calculate bitwise big-endian Ethernet AUTODIN II CRC32
@@ -351,7 +344,7 @@ EXPORT_SYMBOL(crc32_be);
351	#ifdef CONFIG_CRC32_SELFTEST	344	#ifdef CONFIG_CRC32_SELFTEST
352		345
353	/* 4096 random bytes */	346	/* 4096 random bytes */
354	static u8 __attribute__((__aligned__(8))) test_buf[] =	347	static u8 const __aligned(8) test_buf[] __initconst =
355	{	348	{
356	0x5b, 0x85, 0x21, 0xcb, 0x09, 0x68, 0x7d, 0x30,	349	0x5b, 0x85, 0x21, 0xcb, 0x09, 0x68, 0x7d, 0x30,
357	0xc7, 0x69, 0xd7, 0x30, 0x92, 0xde, 0x59, 0xe4,	350	0xc7, 0x69, 0xd7, 0x30, 0x92, 0xde, 0x59, 0xe4,
@@ -875,7 +868,7 @@ static struct crc_test {
875	u32 crc_le; /* expected crc32_le result */	868	u32 crc_le; /* expected crc32_le result */
876	u32 crc_be; /* expected crc32_be result */	869	u32 crc_be; /* expected crc32_be result */
877	u32 crc32c_le; /* expected crc32c_le result */	870	u32 crc32c_le; /* expected crc32c_le result */
878	} test[] =	871	} const test[] __initconst =
879	{	872	{
880	{0x674bf11d, 0x00000038, 0x00000542, 0x0af6d466, 0xd8b6e4c1, 0xf6e93d6c},	873	{0x674bf11d, 0x00000038, 0x00000542, 0x0af6d466, 0xd8b6e4c1, 0xf6e93d6c},
881	{0x35c672c6, 0x0000003a, 0x000001aa, 0xc6d3dfba, 0x28aaf3ad, 0x0fe92aca},	874	{0x35c672c6, 0x0000003a, 0x000001aa, 0xc6d3dfba, 0x28aaf3ad, 0x0fe92aca},