aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBob Pearson <rpearson@systemfabricworks.com>2012-03-23 18:02:24 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-03-23 19:58:37 -0400
commit0292c497b6b942557d085b37f888ef5865f67d37 (patch)
treefe88508060c0ecdf8fb3d5e98ca2c55057cc5068
parent324eb0f17d9dcead3c60c133aa244f6b3631fec9 (diff)
crc32: optimize loop counter for x86
Add two changes that improve the performance of x86 systems 1. replace main loop with incrementing counter this change improves the performance of the selftest by about 5-6% on Nehalem CPUs. The apparent reason is that the compiler can use the loop index to perform an indexed memory access. This is reported to make the performance of PowerPC CPUs to get worse. 2. replace the rem_len loop with incrementing counter this change improves the performance of the selftest, which has more than the usual number of occurances, by about 1-2% on x86 CPUs. In actual work loads the length is most often a multiple of 4 bytes and this code does not get executed as often if at all. Again this change is reported to make the performance of PowerPC get worse. [djwong@us.ibm.com: Minor changelog tweaks] Signed-off-by: Bob Pearson <rpearson@systemfabricworks.com> Signed-off-by: Darrick J. Wong <djwong@us.ibm.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--lib/crc32.c13
1 files changed, 13 insertions, 0 deletions
diff --git a/lib/crc32.c b/lib/crc32.c
index 826e16352e0e..4eac9c770cd4 100644
--- a/lib/crc32.c
+++ b/lib/crc32.c
@@ -66,6 +66,9 @@ crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 (*tab)[256])
66# endif 66# endif
67 const u32 *b; 67 const u32 *b;
68 size_t rem_len; 68 size_t rem_len;
69# ifdef CONFIG_X86
70 size_t i;
71# endif
69 const u32 *t0=tab[0], *t1=tab[1], *t2=tab[2], *t3=tab[3]; 72 const u32 *t0=tab[0], *t1=tab[1], *t2=tab[2], *t3=tab[3];
70 const u32 *t4 = tab[4], *t5 = tab[5], *t6 = tab[6], *t7 = tab[7]; 73 const u32 *t4 = tab[4], *t5 = tab[5], *t6 = tab[6], *t7 = tab[7];
71 u32 q; 74 u32 q;
@@ -86,7 +89,12 @@ crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 (*tab)[256])
86# endif 89# endif
87 90
88 b = (const u32 *)buf; 91 b = (const u32 *)buf;
92# ifdef CONFIG_X86
93 --b;
94 for (i = 0; i < len; i++) {
95# else
89 for (--b; len; --len) { 96 for (--b; len; --len) {
97# endif
90 q = crc ^ *++b; /* use pre increment for speed */ 98 q = crc ^ *++b; /* use pre increment for speed */
91# if CRC_LE_BITS == 32 99# if CRC_LE_BITS == 32
92 crc = DO_CRC4; 100 crc = DO_CRC4;
@@ -100,9 +108,14 @@ crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 (*tab)[256])
100 /* And the last few bytes */ 108 /* And the last few bytes */
101 if (len) { 109 if (len) {
102 u8 *p = (u8 *)(b + 1) - 1; 110 u8 *p = (u8 *)(b + 1) - 1;
111# ifdef CONFIG_X86
112 for (i = 0; i < len; i++)
113 DO_CRC(*++p); /* use pre increment for speed */
114# else
103 do { 115 do {
104 DO_CRC(*++p); /* use pre increment for speed */ 116 DO_CRC(*++p); /* use pre increment for speed */
105 } while (--len); 117 } while (--len);
118# endif
106 } 119 }
107 return crc; 120 return crc;
108#undef DO_CRC 121#undef DO_CRC