diff options
author | Bob Pearson <rpearson@systemfabricworks.com> | 2012-03-23 18:02:24 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-03-23 19:58:37 -0400 |
commit | 324eb0f17d9dcead3c60c133aa244f6b3631fec9 (patch) | |
tree | 657c564269a0fc1921f4252dbf0184725f5c85fa | |
parent | 9a1dbf6a29694c9d81b498019f103aee0f8b5b6c (diff) |
crc32: add slice-by-8 algorithm to existing code
Add slicing-by-8 algorithm to the existing slicing-by-4 algorithm. This
consists of:
- extend largest BITS size from 32 to 64
- extend tables from tab[4][256] to up to tab[8][256]
- Add code for inner loop.
[djwong@us.ibm.com: Minor changelog tweaks]
Signed-off-by: Bob Pearson <rpearson@systemfabricworks.com>
Signed-off-by: Darrick J. Wong <djwong@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r-- | lib/crc32.c | 38 | ||||
-rw-r--r-- | lib/crc32defs.h | 29 | ||||
-rw-r--r-- | lib/gen_crc32table.c | 43 |
3 files changed, 75 insertions, 35 deletions
diff --git a/lib/crc32.c b/lib/crc32.c index 5971f2ad46d5..826e16352e0e 100644 --- a/lib/crc32.c +++ b/lib/crc32.c | |||
@@ -47,25 +47,28 @@ MODULE_LICENSE("GPL"); | |||
47 | 47 | ||
48 | #if CRC_LE_BITS > 8 || CRC_BE_BITS > 8 | 48 | #if CRC_LE_BITS > 8 || CRC_BE_BITS > 8 |
49 | 49 | ||
50 | /* implements slicing-by-4 or slicing-by-8 algorithm */ | ||
50 | static inline u32 | 51 | static inline u32 |
51 | crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 (*tab)[256]) | 52 | crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 (*tab)[256]) |
52 | { | 53 | { |
53 | # ifdef __LITTLE_ENDIAN | 54 | # ifdef __LITTLE_ENDIAN |
54 | # define DO_CRC(x) crc = t0[(crc ^ (x)) & 255] ^ (crc >> 8) | 55 | # define DO_CRC(x) crc = t0[(crc ^ (x)) & 255] ^ (crc >> 8) |
55 | # define DO_CRC4 crc = t3[(crc) & 255] ^ \ | 56 | # define DO_CRC4 (t3[(q) & 255] ^ t2[(q >> 8) & 255] ^ \ |
56 | t2[(crc >> 8) & 255] ^ \ | 57 | t1[(q >> 16) & 255] ^ t0[(q >> 24) & 255]) |
57 | t1[(crc >> 16) & 255] ^ \ | 58 | # define DO_CRC8 (t7[(q) & 255] ^ t6[(q >> 8) & 255] ^ \ |
58 | t0[(crc >> 24) & 255] | 59 | t5[(q >> 16) & 255] ^ t4[(q >> 24) & 255]) |
59 | # else | 60 | # else |
60 | # define DO_CRC(x) crc = t0[((crc >> 24) ^ (x)) & 255] ^ (crc << 8) | 61 | # define DO_CRC(x) crc = t0[((crc >> 24) ^ (x)) & 255] ^ (crc << 8) |
61 | # define DO_CRC4 crc = t0[(crc) & 255] ^ \ | 62 | # define DO_CRC4 (t0[(q) & 255] ^ t1[(q >> 8) & 255] ^ \ |
62 | t1[(crc >> 8) & 255] ^ \ | 63 | t2[(q >> 16) & 255] ^ t3[(q >> 24) & 255]) |
63 | t2[(crc >> 16) & 255] ^ \ | 64 | # define DO_CRC8 (t4[(q) & 255] ^ t5[(q >> 8) & 255] ^ \ |
64 | t3[(crc >> 24) & 255] | 65 | t6[(q >> 16) & 255] ^ t7[(q >> 24) & 255]) |
65 | # endif | 66 | # endif |
66 | const u32 *b; | 67 | const u32 *b; |
67 | size_t rem_len; | 68 | size_t rem_len; |
68 | const u32 *t0=tab[0], *t1=tab[1], *t2=tab[2], *t3=tab[3]; | 69 | const u32 *t0=tab[0], *t1=tab[1], *t2=tab[2], *t3=tab[3]; |
70 | const u32 *t4 = tab[4], *t5 = tab[5], *t6 = tab[6], *t7 = tab[7]; | ||
71 | u32 q; | ||
69 | 72 | ||
70 | /* Align it */ | 73 | /* Align it */ |
71 | if (unlikely((long)buf & 3 && len)) { | 74 | if (unlikely((long)buf & 3 && len)) { |
@@ -73,13 +76,25 @@ crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 (*tab)[256]) | |||
73 | DO_CRC(*buf++); | 76 | DO_CRC(*buf++); |
74 | } while ((--len) && ((long)buf)&3); | 77 | } while ((--len) && ((long)buf)&3); |
75 | } | 78 | } |
79 | |||
80 | # if CRC_LE_BITS == 32 | ||
76 | rem_len = len & 3; | 81 | rem_len = len & 3; |
77 | /* load data 32 bits wide, xor data 32 bits wide. */ | ||
78 | len = len >> 2; | 82 | len = len >> 2; |
83 | # else | ||
84 | rem_len = len & 7; | ||
85 | len = len >> 3; | ||
86 | # endif | ||
87 | |||
79 | b = (const u32 *)buf; | 88 | b = (const u32 *)buf; |
80 | for (--b; len; --len) { | 89 | for (--b; len; --len) { |
81 | crc ^= *++b; /* use pre increment for speed */ | 90 | q = crc ^ *++b; /* use pre increment for speed */ |
82 | DO_CRC4; | 91 | # if CRC_LE_BITS == 32 |
92 | crc = DO_CRC4; | ||
93 | # else | ||
94 | crc = DO_CRC8; | ||
95 | q = *++b; | ||
96 | crc ^= DO_CRC4; | ||
97 | # endif | ||
83 | } | 98 | } |
84 | len = rem_len; | 99 | len = rem_len; |
85 | /* And the last few bytes */ | 100 | /* And the last few bytes */ |
@@ -92,6 +107,7 @@ crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 (*tab)[256]) | |||
92 | return crc; | 107 | return crc; |
93 | #undef DO_CRC | 108 | #undef DO_CRC |
94 | #undef DO_CRC4 | 109 | #undef DO_CRC4 |
110 | #undef DO_CRC8 | ||
95 | } | 111 | } |
96 | #endif | 112 | #endif |
97 | 113 | ||
diff --git a/lib/crc32defs.h b/lib/crc32defs.h index daa3a5e85f60..818159288489 100644 --- a/lib/crc32defs.h +++ b/lib/crc32defs.h | |||
@@ -6,29 +6,42 @@ | |||
6 | #define CRCPOLY_LE 0xedb88320 | 6 | #define CRCPOLY_LE 0xedb88320 |
7 | #define CRCPOLY_BE 0x04c11db7 | 7 | #define CRCPOLY_BE 0x04c11db7 |
8 | 8 | ||
9 | /* How many bits at a time to use. Valid values are 1, 2, 4, 8, and 32. */ | 9 | /* |
10 | /* For less performance-sensitive, use 4 or 8 */ | 10 | * How many bits at a time to use. Valid values are 1, 2, 4, 8, 32 and 64. |
11 | * For less performance-sensitive, use 4 or 8 to save table size. | ||
12 | * For larger systems choose same as CPU architecture as default. | ||
13 | * This works well on X86_64, SPARC64 systems. This may require some | ||
14 | * elaboration after experiments with other architectures. | ||
15 | */ | ||
11 | #ifndef CRC_LE_BITS | 16 | #ifndef CRC_LE_BITS |
12 | # define CRC_LE_BITS 32 | 17 | # ifdef CONFIG_64BIT |
18 | # define CRC_LE_BITS 64 | ||
19 | # else | ||
20 | # define CRC_LE_BITS 32 | ||
21 | # endif | ||
13 | #endif | 22 | #endif |
14 | #ifndef CRC_BE_BITS | 23 | #ifndef CRC_BE_BITS |
15 | # define CRC_BE_BITS 32 | 24 | # ifdef CONFIG_64BIT |
25 | # define CRC_BE_BITS 64 | ||
26 | # else | ||
27 | # define CRC_BE_BITS 32 | ||
28 | # endif | ||
16 | #endif | 29 | #endif |
17 | 30 | ||
18 | /* | 31 | /* |
19 | * Little-endian CRC computation. Used with serial bit streams sent | 32 | * Little-endian CRC computation. Used with serial bit streams sent |
20 | * lsbit-first. Be sure to use cpu_to_le32() to append the computed CRC. | 33 | * lsbit-first. Be sure to use cpu_to_le32() to append the computed CRC. |
21 | */ | 34 | */ |
22 | #if CRC_LE_BITS > 32 || CRC_LE_BITS < 1 || CRC_LE_BITS == 16 || \ | 35 | #if CRC_LE_BITS > 64 || CRC_LE_BITS < 1 || CRC_LE_BITS == 16 || \ |
23 | CRC_LE_BITS & CRC_LE_BITS-1 | 36 | CRC_LE_BITS & CRC_LE_BITS-1 |
24 | # error "CRC_LE_BITS must be one of {1, 2, 4, 8, 32}" | 37 | # error "CRC_LE_BITS must be one of {1, 2, 4, 8, 32, 64}" |
25 | #endif | 38 | #endif |
26 | 39 | ||
27 | /* | 40 | /* |
28 | * Big-endian CRC computation. Used with serial bit streams sent | 41 | * Big-endian CRC computation. Used with serial bit streams sent |
29 | * msbit-first. Be sure to use cpu_to_be32() to append the computed CRC. | 42 | * msbit-first. Be sure to use cpu_to_be32() to append the computed CRC. |
30 | */ | 43 | */ |
31 | #if CRC_BE_BITS > 32 || CRC_BE_BITS < 1 || CRC_BE_BITS == 16 || \ | 44 | #if CRC_BE_BITS > 64 || CRC_BE_BITS < 1 || CRC_BE_BITS == 16 || \ |
32 | CRC_BE_BITS & CRC_BE_BITS-1 | 45 | CRC_BE_BITS & CRC_BE_BITS-1 |
33 | # error "CRC_BE_BITS must be one of {1, 2, 4, 8, 32}" | 46 | # error "CRC_BE_BITS must be one of {1, 2, 4, 8, 32, 64}" |
34 | #endif | 47 | #endif |
diff --git a/lib/gen_crc32table.c b/lib/gen_crc32table.c index 99ac744848fb..0d9edd17ee13 100644 --- a/lib/gen_crc32table.c +++ b/lib/gen_crc32table.c | |||
@@ -1,23 +1,28 @@ | |||
1 | #include <stdio.h> | 1 | #include <stdio.h> |
2 | #include "../include/generated/autoconf.h" | ||
2 | #include "crc32defs.h" | 3 | #include "crc32defs.h" |
3 | #include <inttypes.h> | 4 | #include <inttypes.h> |
4 | 5 | ||
5 | #define ENTRIES_PER_LINE 4 | 6 | #define ENTRIES_PER_LINE 4 |
6 | 7 | ||
7 | #if CRC_LE_BITS <= 8 | 8 | #if CRC_LE_BITS > 8 |
8 | #define LE_TABLE_SIZE (1 << CRC_LE_BITS) | 9 | # define LE_TABLE_ROWS (CRC_LE_BITS/8) |
10 | # define LE_TABLE_SIZE 256 | ||
9 | #else | 11 | #else |
10 | #define LE_TABLE_SIZE 256 | 12 | # define LE_TABLE_ROWS 1 |
13 | # define LE_TABLE_SIZE (1 << CRC_LE_BITS) | ||
11 | #endif | 14 | #endif |
12 | 15 | ||
13 | #if CRC_BE_BITS <= 8 | 16 | #if CRC_BE_BITS > 8 |
14 | #define BE_TABLE_SIZE (1 << CRC_BE_BITS) | 17 | # define BE_TABLE_ROWS (CRC_BE_BITS/8) |
18 | # define BE_TABLE_SIZE 256 | ||
15 | #else | 19 | #else |
16 | #define BE_TABLE_SIZE 256 | 20 | # define BE_TABLE_ROWS 1 |
21 | # define BE_TABLE_SIZE (1 << CRC_BE_BITS) | ||
17 | #endif | 22 | #endif |
18 | 23 | ||
19 | static uint32_t crc32table_le[4][256]; | 24 | static uint32_t crc32table_le[LE_TABLE_ROWS][256]; |
20 | static uint32_t crc32table_be[4][256]; | 25 | static uint32_t crc32table_be[BE_TABLE_ROWS][256]; |
21 | 26 | ||
22 | /** | 27 | /** |
23 | * crc32init_le() - allocate and initialize LE table data | 28 | * crc32init_le() - allocate and initialize LE table data |
@@ -40,7 +45,7 @@ static void crc32init_le(void) | |||
40 | } | 45 | } |
41 | for (i = 0; i < LE_TABLE_SIZE; i++) { | 46 | for (i = 0; i < LE_TABLE_SIZE; i++) { |
42 | crc = crc32table_le[0][i]; | 47 | crc = crc32table_le[0][i]; |
43 | for (j = 1; j < 4; j++) { | 48 | for (j = 1; j < LE_TABLE_ROWS; j++) { |
44 | crc = crc32table_le[0][crc & 0xff] ^ (crc >> 8); | 49 | crc = crc32table_le[0][crc & 0xff] ^ (crc >> 8); |
45 | crc32table_le[j][i] = crc; | 50 | crc32table_le[j][i] = crc; |
46 | } | 51 | } |
@@ -64,18 +69,18 @@ static void crc32init_be(void) | |||
64 | } | 69 | } |
65 | for (i = 0; i < BE_TABLE_SIZE; i++) { | 70 | for (i = 0; i < BE_TABLE_SIZE; i++) { |
66 | crc = crc32table_be[0][i]; | 71 | crc = crc32table_be[0][i]; |
67 | for (j = 1; j < 4; j++) { | 72 | for (j = 1; j < BE_TABLE_ROWS; j++) { |
68 | crc = crc32table_be[0][(crc >> 24) & 0xff] ^ (crc << 8); | 73 | crc = crc32table_be[0][(crc >> 24) & 0xff] ^ (crc << 8); |
69 | crc32table_be[j][i] = crc; | 74 | crc32table_be[j][i] = crc; |
70 | } | 75 | } |
71 | } | 76 | } |
72 | } | 77 | } |
73 | 78 | ||
74 | static void output_table(uint32_t (*table)[256], int len, char *trans) | 79 | static void output_table(uint32_t (*table)[256], int rows, int len, char *trans) |
75 | { | 80 | { |
76 | int i, j; | 81 | int i, j; |
77 | 82 | ||
78 | for (j = 0 ; j < 4; j++) { | 83 | for (j = 0 ; j < rows; j++) { |
79 | printf("{"); | 84 | printf("{"); |
80 | for (i = 0; i < len - 1; i++) { | 85 | for (i = 0; i < len - 1; i++) { |
81 | if (i % ENTRIES_PER_LINE == 0) | 86 | if (i % ENTRIES_PER_LINE == 0) |
@@ -92,15 +97,21 @@ int main(int argc, char** argv) | |||
92 | 97 | ||
93 | if (CRC_LE_BITS > 1) { | 98 | if (CRC_LE_BITS > 1) { |
94 | crc32init_le(); | 99 | crc32init_le(); |
95 | printf("static const u32 crc32table_le[4][256] = {"); | 100 | printf("static const u32 __cacheline_aligned " |
96 | output_table(crc32table_le, LE_TABLE_SIZE, "tole"); | 101 | "crc32table_le[%d][%d] = {", |
102 | LE_TABLE_ROWS, LE_TABLE_SIZE); | ||
103 | output_table(crc32table_le, LE_TABLE_ROWS, | ||
104 | LE_TABLE_SIZE, "tole"); | ||
97 | printf("};\n"); | 105 | printf("};\n"); |
98 | } | 106 | } |
99 | 107 | ||
100 | if (CRC_BE_BITS > 1) { | 108 | if (CRC_BE_BITS > 1) { |
101 | crc32init_be(); | 109 | crc32init_be(); |
102 | printf("static const u32 crc32table_be[4][256] = {"); | 110 | printf("static const u32 __cacheline_aligned " |
103 | output_table(crc32table_be, BE_TABLE_SIZE, "tobe"); | 111 | "crc32table_be[%d][%d] = {", |
112 | BE_TABLE_ROWS, BE_TABLE_SIZE); | ||
113 | output_table(crc32table_be, LE_TABLE_ROWS, | ||
114 | BE_TABLE_SIZE, "tobe"); | ||
104 | printf("};\n"); | 115 | printf("};\n"); |
105 | } | 116 | } |
106 | 117 | ||