aboutsummaryrefslogtreecommitdiffstats
path: root/arch
diff options
context:
space:
mode:
Diffstat (limited to 'arch')
-rw-r--r--arch/powerpc/include/asm/word-at-a-time.h57
1 files changed, 32 insertions, 25 deletions
diff --git a/arch/powerpc/include/asm/word-at-a-time.h b/arch/powerpc/include/asm/word-at-a-time.h
index 213a5f2b0717..9a5c928bb3c6 100644
--- a/arch/powerpc/include/asm/word-at-a-time.h
+++ b/arch/powerpc/include/asm/word-at-a-time.h
@@ -42,13 +42,6 @@ static inline bool has_zero(unsigned long val, unsigned long *data, const struct
42 42
43#else 43#else
44 44
45/*
46 * This is largely generic for little-endian machines, but the
47 * optimal byte mask counting is probably going to be something
48 * that is architecture-specific. If you have a reliably fast
49 * bit count instruction, that might be better than the multiply
50 * and shift, for example.
51 */
52struct word_at_a_time { 45struct word_at_a_time {
53 const unsigned long one_bits, high_bits; 46 const unsigned long one_bits, high_bits;
54}; 47};
@@ -57,19 +50,32 @@ struct word_at_a_time {
57 50
58#ifdef CONFIG_64BIT 51#ifdef CONFIG_64BIT
59 52
60/* 53/* Alan Modra's little-endian strlen tail for 64-bit */
61 * Jan Achrenius on G+: microoptimized version of 54#define create_zero_mask(mask) (mask)
62 * the simpler "(mask & ONEBYTES) * ONEBYTES >> 56" 55
63 * that works for the bytemasks without having to 56static inline unsigned long find_zero(unsigned long mask)
64 * mask them first.
65 */
66static inline long count_masked_bytes(unsigned long mask)
67{ 57{
68 return mask*0x0001020304050608ul >> 56; 58 unsigned long leading_zero_bits;
59 long trailing_zero_bit_mask;
60
61 asm ("addi %1,%2,-1\n\t"
62 "andc %1,%1,%2\n\t"
63 "popcntd %0,%1"
64 : "=r" (leading_zero_bits), "=&r" (trailing_zero_bit_mask)
65 : "r" (mask));
66 return leading_zero_bits >> 3;
69} 67}
70 68
71#else /* 32-bit case */ 69#else /* 32-bit case */
72 70
71/*
72 * This is largely generic for little-endian machines, but the
73 * optimal byte mask counting is probably going to be something
74 * that is architecture-specific. If you have a reliably fast
75 * bit count instruction, that might be better than the multiply
76 * and shift, for example.
77 */
78
73/* Carl Chatfield / Jan Achrenius G+ version for 32-bit */ 79/* Carl Chatfield / Jan Achrenius G+ version for 32-bit */
74static inline long count_masked_bytes(long mask) 80static inline long count_masked_bytes(long mask)
75{ 81{
@@ -79,6 +85,17 @@ static inline long count_masked_bytes(long mask)
79 return a & mask; 85 return a & mask;
80} 86}
81 87
88static inline unsigned long create_zero_mask(unsigned long bits)
89{
90 bits = (bits - 1) & ~bits;
91 return bits >> 7;
92}
93
94static inline unsigned long find_zero(unsigned long mask)
95{
96 return count_masked_bytes(mask);
97}
98
82#endif 99#endif
83 100
84/* Return nonzero if it has a zero */ 101/* Return nonzero if it has a zero */
@@ -94,19 +111,9 @@ static inline unsigned long prep_zero_mask(unsigned long a, unsigned long bits,
94 return bits; 111 return bits;
95} 112}
96 113
97static inline unsigned long create_zero_mask(unsigned long bits)
98{
99 bits = (bits - 1) & ~bits;
100 return bits >> 7;
101}
102
103/* The mask we created is directly usable as a bytemask */ 114/* The mask we created is directly usable as a bytemask */
104#define zero_bytemask(mask) (mask) 115#define zero_bytemask(mask) (mask)
105 116
106static inline unsigned long find_zero(unsigned long mask)
107{
108 return count_masked_bytes(mask);
109}
110#endif 117#endif
111 118
112#endif /* _ASM_WORD_AT_A_TIME_H */ 119#endif /* _ASM_WORD_AT_A_TIME_H */