diff options
| author | Bob Pearson <rpearson@systemfabricworks.com> | 2012-03-23 18:02:24 -0400 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-03-23 19:58:37 -0400 |
| commit | 324eb0f17d9dcead3c60c133aa244f6b3631fec9 (patch) | |
| tree | 657c564269a0fc1921f4252dbf0184725f5c85fa /lib | |
| parent | 9a1dbf6a29694c9d81b498019f103aee0f8b5b6c (diff) | |
crc32: add slice-by-8 algorithm to existing code
Add slicing-by-8 algorithm to the existing slicing-by-4 algorithm. This
consists of:
- extend largest BITS size from 32 to 64
- extend tables from tab[4][256] to up to tab[8][256]
- Add code for inner loop.
[djwong@us.ibm.com: Minor changelog tweaks]
Signed-off-by: Bob Pearson <rpearson@systemfabricworks.com>
Signed-off-by: Darrick J. Wong <djwong@us.ibm.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'lib')
| -rw-r--r-- | lib/crc32.c | 38 | ||||
| -rw-r--r-- | lib/crc32defs.h | 29 | ||||
| -rw-r--r-- | lib/gen_crc32table.c | 43 |
3 files changed, 75 insertions, 35 deletions
diff --git a/lib/crc32.c b/lib/crc32.c index 5971f2ad46d5..826e16352e0e 100644 --- a/lib/crc32.c +++ b/lib/crc32.c | |||
| @@ -47,25 +47,28 @@ MODULE_LICENSE("GPL"); | |||
| 47 | 47 | ||
| 48 | #if CRC_LE_BITS > 8 || CRC_BE_BITS > 8 | 48 | #if CRC_LE_BITS > 8 || CRC_BE_BITS > 8 |
| 49 | 49 | ||
| 50 | /* implements slicing-by-4 or slicing-by-8 algorithm */ | ||
| 50 | static inline u32 | 51 | static inline u32 |
| 51 | crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 (*tab)[256]) | 52 | crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 (*tab)[256]) |
| 52 | { | 53 | { |
| 53 | # ifdef __LITTLE_ENDIAN | 54 | # ifdef __LITTLE_ENDIAN |
| 54 | # define DO_CRC(x) crc = t0[(crc ^ (x)) & 255] ^ (crc >> 8) | 55 | # define DO_CRC(x) crc = t0[(crc ^ (x)) & 255] ^ (crc >> 8) |
| 55 | # define DO_CRC4 crc = t3[(crc) & 255] ^ \ | 56 | # define DO_CRC4 (t3[(q) & 255] ^ t2[(q >> 8) & 255] ^ \ |
| 56 | t2[(crc >> 8) & 255] ^ \ | 57 | t1[(q >> 16) & 255] ^ t0[(q >> 24) & 255]) |
| 57 | t1[(crc >> 16) & 255] ^ \ | 58 | # define DO_CRC8 (t7[(q) & 255] ^ t6[(q >> 8) & 255] ^ \ |
| 58 | t0[(crc >> 24) & 255] | 59 | t5[(q >> 16) & 255] ^ t4[(q >> 24) & 255]) |
| 59 | # else | 60 | # else |
| 60 | # define DO_CRC(x) crc = t0[((crc >> 24) ^ (x)) & 255] ^ (crc << 8) | 61 | # define DO_CRC(x) crc = t0[((crc >> 24) ^ (x)) & 255] ^ (crc << 8) |
| 61 | # define DO_CRC4 crc = t0[(crc) & 255] ^ \ | 62 | # define DO_CRC4 (t0[(q) & 255] ^ t1[(q >> 8) & 255] ^ \ |
| 62 | t1[(crc >> 8) & 255] ^ \ | 63 | t2[(q >> 16) & 255] ^ t3[(q >> 24) & 255]) |
| 63 | t2[(crc >> 16) & 255] ^ \ | 64 | # define DO_CRC8 (t4[(q) & 255] ^ t5[(q >> 8) & 255] ^ \ |
| 64 | t3[(crc >> 24) & 255] | 65 | t6[(q >> 16) & 255] ^ t7[(q >> 24) & 255]) |
| 65 | # endif | 66 | # endif |
| 66 | const u32 *b; | 67 | const u32 *b; |
| 67 | size_t rem_len; | 68 | size_t rem_len; |
| 68 | const u32 *t0=tab[0], *t1=tab[1], *t2=tab[2], *t3=tab[3]; | 69 | const u32 *t0=tab[0], *t1=tab[1], *t2=tab[2], *t3=tab[3]; |
| 70 | const u32 *t4 = tab[4], *t5 = tab[5], *t6 = tab[6], *t7 = tab[7]; | ||
| 71 | u32 q; | ||
| 69 | 72 | ||
| 70 | /* Align it */ | 73 | /* Align it */ |
| 71 | if (unlikely((long)buf & 3 && len)) { | 74 | if (unlikely((long)buf & 3 && len)) { |
| @@ -73,13 +76,25 @@ crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 (*tab)[256]) | |||
| 73 | DO_CRC(*buf++); | 76 | DO_CRC(*buf++); |
| 74 | } while ((--len) && ((long)buf)&3); | 77 | } while ((--len) && ((long)buf)&3); |
| 75 | } | 78 | } |
| 79 | |||
| 80 | # if CRC_LE_BITS == 32 | ||
| 76 | rem_len = len & 3; | 81 | rem_len = len & 3; |
| 77 | /* load data 32 bits wide, xor data 32 bits wide. */ | ||
| 78 | len = len >> 2; | 82 | len = len >> 2; |
| 83 | # else | ||
| 84 | rem_len = len & 7; | ||
| 85 | len = len >> 3; | ||
| 86 | # endif | ||
| 87 | |||
| 79 | b = (const u32 *)buf; | 88 | b = (const u32 *)buf; |
| 80 | for (--b; len; --len) { | 89 | for (--b; len; --len) { |
| 81 | crc ^= *++b; /* use pre increment for speed */ | 90 | q = crc ^ *++b; /* use pre increment for speed */ |
| 82 | DO_CRC4; | 91 | # if CRC_LE_BITS == 32 |
| 92 | crc = DO_CRC4; | ||
| 93 | # else | ||
| 94 | crc = DO_CRC8; | ||
| 95 | q = *++b; | ||
| 96 | crc ^= DO_CRC4; | ||
| 97 | # endif | ||
| 83 | } | 98 | } |
| 84 | len = rem_len; | 99 | len = rem_len; |
| 85 | /* And the last few bytes */ | 100 | /* And the last few bytes */ |
| @@ -92,6 +107,7 @@ crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 (*tab)[256]) | |||
| 92 | return crc; | 107 | return crc; |
| 93 | #undef DO_CRC | 108 | #undef DO_CRC |
| 94 | #undef DO_CRC4 | 109 | #undef DO_CRC4 |
| 110 | #undef DO_CRC8 | ||
| 95 | } | 111 | } |
| 96 | #endif | 112 | #endif |
| 97 | 113 | ||
diff --git a/lib/crc32defs.h b/lib/crc32defs.h index daa3a5e85f60..818159288489 100644 --- a/lib/crc32defs.h +++ b/lib/crc32defs.h | |||
| @@ -6,29 +6,42 @@ | |||
| 6 | #define CRCPOLY_LE 0xedb88320 | 6 | #define CRCPOLY_LE 0xedb88320 |
| 7 | #define CRCPOLY_BE 0x04c11db7 | 7 | #define CRCPOLY_BE 0x04c11db7 |
| 8 | 8 | ||
| 9 | /* How many bits at a time to use. Valid values are 1, 2, 4, 8, and 32. */ | 9 | /* |
| 10 | /* For less performance-sensitive, use 4 or 8 */ | 10 | * How many bits at a time to use. Valid values are 1, 2, 4, 8, 32 and 64. |
| 11 | * For less performance-sensitive, use 4 or 8 to save table size. | ||
| 12 | * For larger systems choose same as CPU architecture as default. | ||
| 13 | * This works well on X86_64, SPARC64 systems. This may require some | ||
| 14 | * elaboration after experiments with other architectures. | ||
| 15 | */ | ||
| 11 | #ifndef CRC_LE_BITS | 16 | #ifndef CRC_LE_BITS |
| 12 | # define CRC_LE_BITS 32 | 17 | # ifdef CONFIG_64BIT |
| 18 | # define CRC_LE_BITS 64 | ||
| 19 | # else | ||
| 20 | # define CRC_LE_BITS 32 | ||
| 21 | # endif | ||
| 13 | #endif | 22 | #endif |
| 14 | #ifndef CRC_BE_BITS | 23 | #ifndef CRC_BE_BITS |
| 15 | # define CRC_BE_BITS 32 | 24 | # ifdef CONFIG_64BIT |
| 25 | # define CRC_BE_BITS 64 | ||
| 26 | # else | ||
| 27 | # define CRC_BE_BITS 32 | ||
| 28 | # endif | ||
| 16 | #endif | 29 | #endif |
| 17 | 30 | ||
| 18 | /* | 31 | /* |
| 19 | * Little-endian CRC computation. Used with serial bit streams sent | 32 | * Little-endian CRC computation. Used with serial bit streams sent |
| 20 | * lsbit-first. Be sure to use cpu_to_le32() to append the computed CRC. | 33 | * lsbit-first. Be sure to use cpu_to_le32() to append the computed CRC. |
| 21 | */ | 34 | */ |
| 22 | #if CRC_LE_BITS > 32 || CRC_LE_BITS < 1 || CRC_LE_BITS == 16 || \ | 35 | #if CRC_LE_BITS > 64 || CRC_LE_BITS < 1 || CRC_LE_BITS == 16 || \ |
| 23 | CRC_LE_BITS & CRC_LE_BITS-1 | 36 | CRC_LE_BITS & CRC_LE_BITS-1 |
| 24 | # error "CRC_LE_BITS must be one of {1, 2, 4, 8, 32}" | 37 | # error "CRC_LE_BITS must be one of {1, 2, 4, 8, 32, 64}" |
| 25 | #endif | 38 | #endif |
| 26 | 39 | ||
| 27 | /* | 40 | /* |
| 28 | * Big-endian CRC computation. Used with serial bit streams sent | 41 | * Big-endian CRC computation. Used with serial bit streams sent |
| 29 | * msbit-first. Be sure to use cpu_to_be32() to append the computed CRC. | 42 | * msbit-first. Be sure to use cpu_to_be32() to append the computed CRC. |
| 30 | */ | 43 | */ |
| 31 | #if CRC_BE_BITS > 32 || CRC_BE_BITS < 1 || CRC_BE_BITS == 16 || \ | 44 | #if CRC_BE_BITS > 64 || CRC_BE_BITS < 1 || CRC_BE_BITS == 16 || \ |
| 32 | CRC_BE_BITS & CRC_BE_BITS-1 | 45 | CRC_BE_BITS & CRC_BE_BITS-1 |
| 33 | # error "CRC_BE_BITS must be one of {1, 2, 4, 8, 32}" | 46 | # error "CRC_BE_BITS must be one of {1, 2, 4, 8, 32, 64}" |
| 34 | #endif | 47 | #endif |
diff --git a/lib/gen_crc32table.c b/lib/gen_crc32table.c index 99ac744848fb..0d9edd17ee13 100644 --- a/lib/gen_crc32table.c +++ b/lib/gen_crc32table.c | |||
| @@ -1,23 +1,28 @@ | |||
| 1 | #include <stdio.h> | 1 | #include <stdio.h> |
| 2 | #include "../include/generated/autoconf.h" | ||
| 2 | #include "crc32defs.h" | 3 | #include "crc32defs.h" |
| 3 | #include <inttypes.h> | 4 | #include <inttypes.h> |
| 4 | 5 | ||
| 5 | #define ENTRIES_PER_LINE 4 | 6 | #define ENTRIES_PER_LINE 4 |
| 6 | 7 | ||
| 7 | #if CRC_LE_BITS <= 8 | 8 | #if CRC_LE_BITS > 8 |
| 8 | #define LE_TABLE_SIZE (1 << CRC_LE_BITS) | 9 | # define LE_TABLE_ROWS (CRC_LE_BITS/8) |
| 10 | # define LE_TABLE_SIZE 256 | ||
| 9 | #else | 11 | #else |
| 10 | #define LE_TABLE_SIZE 256 | 12 | # define LE_TABLE_ROWS 1 |
| 13 | # define LE_TABLE_SIZE (1 << CRC_LE_BITS) | ||
| 11 | #endif | 14 | #endif |
| 12 | 15 | ||
| 13 | #if CRC_BE_BITS <= 8 | 16 | #if CRC_BE_BITS > 8 |
| 14 | #define BE_TABLE_SIZE (1 << CRC_BE_BITS) | 17 | # define BE_TABLE_ROWS (CRC_BE_BITS/8) |
| 18 | # define BE_TABLE_SIZE 256 | ||
| 15 | #else | 19 | #else |
| 16 | #define BE_TABLE_SIZE 256 | 20 | # define BE_TABLE_ROWS 1 |
| 21 | # define BE_TABLE_SIZE (1 << CRC_BE_BITS) | ||
| 17 | #endif | 22 | #endif |
| 18 | 23 | ||
| 19 | static uint32_t crc32table_le[4][256]; | 24 | static uint32_t crc32table_le[LE_TABLE_ROWS][256]; |
| 20 | static uint32_t crc32table_be[4][256]; | 25 | static uint32_t crc32table_be[BE_TABLE_ROWS][256]; |
| 21 | 26 | ||
| 22 | /** | 27 | /** |
| 23 | * crc32init_le() - allocate and initialize LE table data | 28 | * crc32init_le() - allocate and initialize LE table data |
| @@ -40,7 +45,7 @@ static void crc32init_le(void) | |||
| 40 | } | 45 | } |
| 41 | for (i = 0; i < LE_TABLE_SIZE; i++) { | 46 | for (i = 0; i < LE_TABLE_SIZE; i++) { |
| 42 | crc = crc32table_le[0][i]; | 47 | crc = crc32table_le[0][i]; |
| 43 | for (j = 1; j < 4; j++) { | 48 | for (j = 1; j < LE_TABLE_ROWS; j++) { |
| 44 | crc = crc32table_le[0][crc & 0xff] ^ (crc >> 8); | 49 | crc = crc32table_le[0][crc & 0xff] ^ (crc >> 8); |
| 45 | crc32table_le[j][i] = crc; | 50 | crc32table_le[j][i] = crc; |
| 46 | } | 51 | } |
| @@ -64,18 +69,18 @@ static void crc32init_be(void) | |||
| 64 | } | 69 | } |
| 65 | for (i = 0; i < BE_TABLE_SIZE; i++) { | 70 | for (i = 0; i < BE_TABLE_SIZE; i++) { |
| 66 | crc = crc32table_be[0][i]; | 71 | crc = crc32table_be[0][i]; |
| 67 | for (j = 1; j < 4; j++) { | 72 | for (j = 1; j < BE_TABLE_ROWS; j++) { |
| 68 | crc = crc32table_be[0][(crc >> 24) & 0xff] ^ (crc << 8); | 73 | crc = crc32table_be[0][(crc >> 24) & 0xff] ^ (crc << 8); |
| 69 | crc32table_be[j][i] = crc; | 74 | crc32table_be[j][i] = crc; |
| 70 | } | 75 | } |
| 71 | } | 76 | } |
| 72 | } | 77 | } |
| 73 | 78 | ||
| 74 | static void output_table(uint32_t (*table)[256], int len, char *trans) | 79 | static void output_table(uint32_t (*table)[256], int rows, int len, char *trans) |
| 75 | { | 80 | { |
| 76 | int i, j; | 81 | int i, j; |
| 77 | 82 | ||
| 78 | for (j = 0 ; j < 4; j++) { | 83 | for (j = 0 ; j < rows; j++) { |
| 79 | printf("{"); | 84 | printf("{"); |
| 80 | for (i = 0; i < len - 1; i++) { | 85 | for (i = 0; i < len - 1; i++) { |
| 81 | if (i % ENTRIES_PER_LINE == 0) | 86 | if (i % ENTRIES_PER_LINE == 0) |
| @@ -92,15 +97,21 @@ int main(int argc, char** argv) | |||
| 92 | 97 | ||
| 93 | if (CRC_LE_BITS > 1) { | 98 | if (CRC_LE_BITS > 1) { |
| 94 | crc32init_le(); | 99 | crc32init_le(); |
| 95 | printf("static const u32 crc32table_le[4][256] = {"); | 100 | printf("static const u32 __cacheline_aligned " |
| 96 | output_table(crc32table_le, LE_TABLE_SIZE, "tole"); | 101 | "crc32table_le[%d][%d] = {", |
| 102 | LE_TABLE_ROWS, LE_TABLE_SIZE); | ||
| 103 | output_table(crc32table_le, LE_TABLE_ROWS, | ||
| 104 | LE_TABLE_SIZE, "tole"); | ||
| 97 | printf("};\n"); | 105 | printf("};\n"); |
| 98 | } | 106 | } |
| 99 | 107 | ||
| 100 | if (CRC_BE_BITS > 1) { | 108 | if (CRC_BE_BITS > 1) { |
| 101 | crc32init_be(); | 109 | crc32init_be(); |
| 102 | printf("static const u32 crc32table_be[4][256] = {"); | 110 | printf("static const u32 __cacheline_aligned " |
| 103 | output_table(crc32table_be, BE_TABLE_SIZE, "tobe"); | 111 | "crc32table_be[%d][%d] = {", |
| 112 | BE_TABLE_ROWS, BE_TABLE_SIZE); | ||
| 113 | output_table(crc32table_be, LE_TABLE_ROWS, | ||
| 114 | BE_TABLE_SIZE, "tobe"); | ||
| 104 | printf("};\n"); | 115 | printf("};\n"); |
| 105 | } | 116 | } |
| 106 | 117 | ||
