aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorAkinobu Mita <akinobu.mita@gmail.com>2012-03-23 18:02:14 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-03-23 19:58:35 -0400
commitf43804bf5f9ae1e60077c5f22aee5fdfe4f09837 (patch)
tree61e2dcaef0ab774d5cd68a89859f569d54fa7f78 /lib
parenta403d930c58eb8448f81fa90c125ac36dc8ef89d (diff)
string: memchr_inv() speed improvements
- Generate a 64-bit pattern more efficiently memchr_inv needs to generate a 64-bit pattern filled with a target character. The operation can be done by more efficient way. - Don't call the slow check_bytes() if the memory area is 64-bit aligned memchr_inv compares contiguous 64-bit words with the 64-bit pattern as much as possible. The outside of the region is checked by check_bytes() that scans for each byte. Unfortunately, the first 64-bit word is unexpectedly scanned by check_bytes() even if the memory area is aligned to a 64-bit boundary. Both changes were originally suggested by Eric Dumazet. Signed-off-by: Akinobu Mita <akinobu.mita@gmail.com> Suggested-by: Eric Dumazet <eric.dumazet@gmail.com> Cc: Brian Norris <computersforpeace@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'lib')
-rw-r--r--lib/string.c20
1 files changed, 16 insertions, 4 deletions
diff --git a/lib/string.c b/lib/string.c
index dc4a86341f91..3a03782720c8 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -785,12 +785,24 @@ void *memchr_inv(const void *start, int c, size_t bytes)
785 if (bytes <= 16) 785 if (bytes <= 16)
786 return check_bytes8(start, value, bytes); 786 return check_bytes8(start, value, bytes);
787 787
788 value64 = value | value << 8 | value << 16 | value << 24; 788 value64 = value;
789 value64 = (value64 & 0xffffffff) | value64 << 32; 789#if defined(ARCH_HAS_FAST_MULTIPLIER) && BITS_PER_LONG == 64
790 prefix = 8 - ((unsigned long)start) % 8; 790 value64 *= 0x0101010101010101;
791#elif defined(ARCH_HAS_FAST_MULTIPLIER)
792 value64 *= 0x01010101;
793 value64 |= value64 << 32;
794#else
795 value64 |= value64 << 8;
796 value64 |= value64 << 16;
797 value64 |= value64 << 32;
798#endif
791 799
800 prefix = (unsigned long)start % 8;
792 if (prefix) { 801 if (prefix) {
793 u8 *r = check_bytes8(start, value, prefix); 802 u8 *r;
803
804 prefix = 8 - prefix;
805 r = check_bytes8(start, value, prefix);
794 if (r) 806 if (r)
795 return r; 807 return r;
796 start += prefix; 808 start += prefix;