diff options
author | David Daney <ddaney@caviumnetworks.com> | 2010-02-04 14:31:49 -0500 |
---|---|---|
committer | Ralf Baechle <ralf@linux-mips.org> | 2010-02-27 06:53:42 -0500 |
commit | 500c2e1fdbcc2b273bd4c695a9b8ac8196f61614 (patch) | |
tree | f24c80f609a739beed194fd5c66abf9bc48ce0d6 | |
parent | e275ed5ee94b358964a0dae1c8b49f0bff260b60 (diff) |
MIPS: Optimize spinlocks.
The current locking mechanism uses a ll/sc sequence to release a
spinlock. This is slower than a wmb() followed by a store to unlock.
The branching forward to .subsection 2 on sc failure slows down the
contended case. So we get rid of that part too.
Since we are now working on naturally aligned u16 values, we can get
rid of a masking operation as the LHU already does the right thing.
The ANDI are reversed for better scheduling on multi-issue CPUs
On a 12 CPU 750MHz Octeon cn5750 this patch improves ipv4 UDP packet
forwarding rates from 3.58*10^6 PPS to 3.99*10^6 PPS, or about 11%.
Signed-off-by: David Daney <ddaney@caviumnetworks.com>
To: linux-mips@linux-mips.org
Patchwork: http://patchwork.linux-mips.org/patch/937/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
-rw-r--r-- | arch/mips/include/asm/barrier.h | 6 | ||||
-rw-r--r-- | arch/mips/include/asm/spinlock.h | 118 | ||||
-rw-r--r-- | arch/mips/include/asm/spinlock_types.h | 24 |
3 files changed, 67 insertions, 81 deletions
diff --git a/arch/mips/include/asm/barrier.h b/arch/mips/include/asm/barrier.h index a2670a239e0c..c0884f02d3a6 100644 --- a/arch/mips/include/asm/barrier.h +++ b/arch/mips/include/asm/barrier.h | |||
@@ -168,8 +168,14 @@ | |||
168 | 168 | ||
169 | #ifdef CONFIG_CPU_CAVIUM_OCTEON | 169 | #ifdef CONFIG_CPU_CAVIUM_OCTEON |
170 | #define smp_mb__before_llsc() smp_wmb() | 170 | #define smp_mb__before_llsc() smp_wmb() |
171 | /* Cause previous writes to become visible on all CPUs as soon as possible */ | ||
172 | #define nudge_writes() __asm__ __volatile__(".set push\n\t" \ | ||
173 | ".set arch=octeon\n\t" \ | ||
174 | "syncw\n\t" \ | ||
175 | ".set pop" : : : "memory") | ||
171 | #else | 176 | #else |
172 | #define smp_mb__before_llsc() smp_llsc_mb() | 177 | #define smp_mb__before_llsc() smp_llsc_mb() |
178 | #define nudge_writes() mb() | ||
173 | #endif | 179 | #endif |
174 | 180 | ||
175 | #endif /* __ASM_BARRIER_H */ | 181 | #endif /* __ASM_BARRIER_H */ |
diff --git a/arch/mips/include/asm/spinlock.h b/arch/mips/include/asm/spinlock.h index 5f16696eaa00..396e402fbe2c 100644 --- a/arch/mips/include/asm/spinlock.h +++ b/arch/mips/include/asm/spinlock.h | |||
@@ -36,9 +36,9 @@ | |||
36 | 36 | ||
37 | static inline int arch_spin_is_locked(arch_spinlock_t *lock) | 37 | static inline int arch_spin_is_locked(arch_spinlock_t *lock) |
38 | { | 38 | { |
39 | unsigned int counters = ACCESS_ONCE(lock->lock); | 39 | u32 counters = ACCESS_ONCE(lock->lock); |
40 | 40 | ||
41 | return ((counters >> 14) ^ counters) & 0x1fff; | 41 | return ((counters >> 16) ^ counters) & 0xffff; |
42 | } | 42 | } |
43 | 43 | ||
44 | #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) | 44 | #define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) |
@@ -47,9 +47,9 @@ static inline int arch_spin_is_locked(arch_spinlock_t *lock) | |||
47 | 47 | ||
48 | static inline int arch_spin_is_contended(arch_spinlock_t *lock) | 48 | static inline int arch_spin_is_contended(arch_spinlock_t *lock) |
49 | { | 49 | { |
50 | unsigned int counters = ACCESS_ONCE(lock->lock); | 50 | u32 counters = ACCESS_ONCE(lock->lock); |
51 | 51 | ||
52 | return (((counters >> 14) - counters) & 0x1fff) > 1; | 52 | return (((counters >> 16) - counters) & 0xffff) > 1; |
53 | } | 53 | } |
54 | #define arch_spin_is_contended arch_spin_is_contended | 54 | #define arch_spin_is_contended arch_spin_is_contended |
55 | 55 | ||
@@ -57,6 +57,7 @@ static inline void arch_spin_lock(arch_spinlock_t *lock) | |||
57 | { | 57 | { |
58 | int my_ticket; | 58 | int my_ticket; |
59 | int tmp; | 59 | int tmp; |
60 | int inc = 0x10000; | ||
60 | 61 | ||
61 | if (R10000_LLSC_WAR) { | 62 | if (R10000_LLSC_WAR) { |
62 | __asm__ __volatile__ ( | 63 | __asm__ __volatile__ ( |
@@ -64,25 +65,24 @@ static inline void arch_spin_lock(arch_spinlock_t *lock) | |||
64 | " .set noreorder \n" | 65 | " .set noreorder \n" |
65 | " \n" | 66 | " \n" |
66 | "1: ll %[ticket], %[ticket_ptr] \n" | 67 | "1: ll %[ticket], %[ticket_ptr] \n" |
67 | " addiu %[my_ticket], %[ticket], 0x4000 \n" | 68 | " addu %[my_ticket], %[ticket], %[inc] \n" |
68 | " sc %[my_ticket], %[ticket_ptr] \n" | 69 | " sc %[my_ticket], %[ticket_ptr] \n" |
69 | " beqzl %[my_ticket], 1b \n" | 70 | " beqzl %[my_ticket], 1b \n" |
70 | " nop \n" | 71 | " nop \n" |
71 | " srl %[my_ticket], %[ticket], 14 \n" | 72 | " srl %[my_ticket], %[ticket], 16 \n" |
72 | " andi %[my_ticket], %[my_ticket], 0x1fff \n" | 73 | " andi %[ticket], %[ticket], 0xffff \n" |
73 | " andi %[ticket], %[ticket], 0x1fff \n" | 74 | " andi %[my_ticket], %[my_ticket], 0xffff \n" |
74 | " bne %[ticket], %[my_ticket], 4f \n" | 75 | " bne %[ticket], %[my_ticket], 4f \n" |
75 | " subu %[ticket], %[my_ticket], %[ticket] \n" | 76 | " subu %[ticket], %[my_ticket], %[ticket] \n" |
76 | "2: \n" | 77 | "2: \n" |
77 | " .subsection 2 \n" | 78 | " .subsection 2 \n" |
78 | "4: andi %[ticket], %[ticket], 0x1fff \n" | 79 | "4: andi %[ticket], %[ticket], 0xffff \n" |
79 | " sll %[ticket], 5 \n" | 80 | " sll %[ticket], 5 \n" |
80 | " \n" | 81 | " \n" |
81 | "6: bnez %[ticket], 6b \n" | 82 | "6: bnez %[ticket], 6b \n" |
82 | " subu %[ticket], 1 \n" | 83 | " subu %[ticket], 1 \n" |
83 | " \n" | 84 | " \n" |
84 | " lw %[ticket], %[ticket_ptr] \n" | 85 | " lhu %[ticket], %[serving_now_ptr] \n" |
85 | " andi %[ticket], %[ticket], 0x1fff \n" | ||
86 | " beq %[ticket], %[my_ticket], 2b \n" | 86 | " beq %[ticket], %[my_ticket], 2b \n" |
87 | " subu %[ticket], %[my_ticket], %[ticket] \n" | 87 | " subu %[ticket], %[my_ticket], %[ticket] \n" |
88 | " b 4b \n" | 88 | " b 4b \n" |
@@ -90,36 +90,33 @@ static inline void arch_spin_lock(arch_spinlock_t *lock) | |||
90 | " .previous \n" | 90 | " .previous \n" |
91 | " .set pop \n" | 91 | " .set pop \n" |
92 | : [ticket_ptr] "+m" (lock->lock), | 92 | : [ticket_ptr] "+m" (lock->lock), |
93 | [serving_now_ptr] "+m" (lock->h.serving_now), | ||
93 | [ticket] "=&r" (tmp), | 94 | [ticket] "=&r" (tmp), |
94 | [my_ticket] "=&r" (my_ticket)); | 95 | [my_ticket] "=&r" (my_ticket) |
96 | : [inc] "r" (inc)); | ||
95 | } else { | 97 | } else { |
96 | __asm__ __volatile__ ( | 98 | __asm__ __volatile__ ( |
97 | " .set push # arch_spin_lock \n" | 99 | " .set push # arch_spin_lock \n" |
98 | " .set noreorder \n" | 100 | " .set noreorder \n" |
99 | " \n" | 101 | " \n" |
100 | " ll %[ticket], %[ticket_ptr] \n" | 102 | "1: ll %[ticket], %[ticket_ptr] \n" |
101 | "1: addiu %[my_ticket], %[ticket], 0x4000 \n" | 103 | " addu %[my_ticket], %[ticket], %[inc] \n" |
102 | " sc %[my_ticket], %[ticket_ptr] \n" | 104 | " sc %[my_ticket], %[ticket_ptr] \n" |
103 | " beqz %[my_ticket], 3f \n" | 105 | " beqz %[my_ticket], 1b \n" |
104 | " nop \n" | 106 | " srl %[my_ticket], %[ticket], 16 \n" |
105 | " srl %[my_ticket], %[ticket], 14 \n" | 107 | " andi %[ticket], %[ticket], 0xffff \n" |
106 | " andi %[my_ticket], %[my_ticket], 0x1fff \n" | 108 | " andi %[my_ticket], %[my_ticket], 0xffff \n" |
107 | " andi %[ticket], %[ticket], 0x1fff \n" | ||
108 | " bne %[ticket], %[my_ticket], 4f \n" | 109 | " bne %[ticket], %[my_ticket], 4f \n" |
109 | " subu %[ticket], %[my_ticket], %[ticket] \n" | 110 | " subu %[ticket], %[my_ticket], %[ticket] \n" |
110 | "2: \n" | 111 | "2: \n" |
111 | " .subsection 2 \n" | 112 | " .subsection 2 \n" |
112 | "3: b 1b \n" | ||
113 | " ll %[ticket], %[ticket_ptr] \n" | ||
114 | " \n" | ||
115 | "4: andi %[ticket], %[ticket], 0x1fff \n" | 113 | "4: andi %[ticket], %[ticket], 0x1fff \n" |
116 | " sll %[ticket], 5 \n" | 114 | " sll %[ticket], 5 \n" |
117 | " \n" | 115 | " \n" |
118 | "6: bnez %[ticket], 6b \n" | 116 | "6: bnez %[ticket], 6b \n" |
119 | " subu %[ticket], 1 \n" | 117 | " subu %[ticket], 1 \n" |
120 | " \n" | 118 | " \n" |
121 | " lw %[ticket], %[ticket_ptr] \n" | 119 | " lhu %[ticket], %[serving_now_ptr] \n" |
122 | " andi %[ticket], %[ticket], 0x1fff \n" | ||
123 | " beq %[ticket], %[my_ticket], 2b \n" | 120 | " beq %[ticket], %[my_ticket], 2b \n" |
124 | " subu %[ticket], %[my_ticket], %[ticket] \n" | 121 | " subu %[ticket], %[my_ticket], %[ticket] \n" |
125 | " b 4b \n" | 122 | " b 4b \n" |
@@ -127,8 +124,10 @@ static inline void arch_spin_lock(arch_spinlock_t *lock) | |||
127 | " .previous \n" | 124 | " .previous \n" |
128 | " .set pop \n" | 125 | " .set pop \n" |
129 | : [ticket_ptr] "+m" (lock->lock), | 126 | : [ticket_ptr] "+m" (lock->lock), |
127 | [serving_now_ptr] "+m" (lock->h.serving_now), | ||
130 | [ticket] "=&r" (tmp), | 128 | [ticket] "=&r" (tmp), |
131 | [my_ticket] "=&r" (my_ticket)); | 129 | [my_ticket] "=&r" (my_ticket) |
130 | : [inc] "r" (inc)); | ||
132 | } | 131 | } |
133 | 132 | ||
134 | smp_llsc_mb(); | 133 | smp_llsc_mb(); |
@@ -136,47 +135,16 @@ static inline void arch_spin_lock(arch_spinlock_t *lock) | |||
136 | 135 | ||
137 | static inline void arch_spin_unlock(arch_spinlock_t *lock) | 136 | static inline void arch_spin_unlock(arch_spinlock_t *lock) |
138 | { | 137 | { |
139 | int tmp; | 138 | unsigned int serving_now = lock->h.serving_now + 1; |
140 | 139 | wmb(); | |
141 | smp_mb__before_llsc(); | 140 | lock->h.serving_now = (u16)serving_now; |
142 | 141 | nudge_writes(); | |
143 | if (R10000_LLSC_WAR) { | ||
144 | __asm__ __volatile__ ( | ||
145 | " # arch_spin_unlock \n" | ||
146 | "1: ll %[ticket], %[ticket_ptr] \n" | ||
147 | " addiu %[ticket], %[ticket], 1 \n" | ||
148 | " ori %[ticket], %[ticket], 0x2000 \n" | ||
149 | " xori %[ticket], %[ticket], 0x2000 \n" | ||
150 | " sc %[ticket], %[ticket_ptr] \n" | ||
151 | " beqzl %[ticket], 1b \n" | ||
152 | : [ticket_ptr] "+m" (lock->lock), | ||
153 | [ticket] "=&r" (tmp)); | ||
154 | } else { | ||
155 | __asm__ __volatile__ ( | ||
156 | " .set push # arch_spin_unlock \n" | ||
157 | " .set noreorder \n" | ||
158 | " \n" | ||
159 | " ll %[ticket], %[ticket_ptr] \n" | ||
160 | "1: addiu %[ticket], %[ticket], 1 \n" | ||
161 | " ori %[ticket], %[ticket], 0x2000 \n" | ||
162 | " xori %[ticket], %[ticket], 0x2000 \n" | ||
163 | " sc %[ticket], %[ticket_ptr] \n" | ||
164 | " beqz %[ticket], 2f \n" | ||
165 | " nop \n" | ||
166 | " \n" | ||
167 | " .subsection 2 \n" | ||
168 | "2: b 1b \n" | ||
169 | " ll %[ticket], %[ticket_ptr] \n" | ||
170 | " .previous \n" | ||
171 | " .set pop \n" | ||
172 | : [ticket_ptr] "+m" (lock->lock), | ||
173 | [ticket] "=&r" (tmp)); | ||
174 | } | ||
175 | } | 142 | } |
176 | 143 | ||
177 | static inline unsigned int arch_spin_trylock(arch_spinlock_t *lock) | 144 | static inline unsigned int arch_spin_trylock(arch_spinlock_t *lock) |
178 | { | 145 | { |
179 | int tmp, tmp2, tmp3; | 146 | int tmp, tmp2, tmp3; |
147 | int inc = 0x10000; | ||
180 | 148 | ||
181 | if (R10000_LLSC_WAR) { | 149 | if (R10000_LLSC_WAR) { |
182 | __asm__ __volatile__ ( | 150 | __asm__ __volatile__ ( |
@@ -184,11 +152,11 @@ static inline unsigned int arch_spin_trylock(arch_spinlock_t *lock) | |||
184 | " .set noreorder \n" | 152 | " .set noreorder \n" |
185 | " \n" | 153 | " \n" |
186 | "1: ll %[ticket], %[ticket_ptr] \n" | 154 | "1: ll %[ticket], %[ticket_ptr] \n" |
187 | " srl %[my_ticket], %[ticket], 14 \n" | 155 | " srl %[my_ticket], %[ticket], 16 \n" |
188 | " andi %[my_ticket], %[my_ticket], 0x1fff \n" | 156 | " andi %[my_ticket], %[my_ticket], 0xffff \n" |
189 | " andi %[now_serving], %[ticket], 0x1fff \n" | 157 | " andi %[now_serving], %[ticket], 0xffff \n" |
190 | " bne %[my_ticket], %[now_serving], 3f \n" | 158 | " bne %[my_ticket], %[now_serving], 3f \n" |
191 | " addiu %[ticket], %[ticket], 0x4000 \n" | 159 | " addu %[ticket], %[ticket], %[inc] \n" |
192 | " sc %[ticket], %[ticket_ptr] \n" | 160 | " sc %[ticket], %[ticket_ptr] \n" |
193 | " beqzl %[ticket], 1b \n" | 161 | " beqzl %[ticket], 1b \n" |
194 | " li %[ticket], 1 \n" | 162 | " li %[ticket], 1 \n" |
@@ -201,33 +169,33 @@ static inline unsigned int arch_spin_trylock(arch_spinlock_t *lock) | |||
201 | : [ticket_ptr] "+m" (lock->lock), | 169 | : [ticket_ptr] "+m" (lock->lock), |
202 | [ticket] "=&r" (tmp), | 170 | [ticket] "=&r" (tmp), |
203 | [my_ticket] "=&r" (tmp2), | 171 | [my_ticket] "=&r" (tmp2), |
204 | [now_serving] "=&r" (tmp3)); | 172 | [now_serving] "=&r" (tmp3) |
173 | : [inc] "r" (inc)); | ||
205 | } else { | 174 | } else { |
206 | __asm__ __volatile__ ( | 175 | __asm__ __volatile__ ( |
207 | " .set push # arch_spin_trylock \n" | 176 | " .set push # arch_spin_trylock \n" |
208 | " .set noreorder \n" | 177 | " .set noreorder \n" |
209 | " \n" | 178 | " \n" |
210 | " ll %[ticket], %[ticket_ptr] \n" | 179 | "1: ll %[ticket], %[ticket_ptr] \n" |
211 | "1: srl %[my_ticket], %[ticket], 14 \n" | 180 | " srl %[my_ticket], %[ticket], 16 \n" |
212 | " andi %[my_ticket], %[my_ticket], 0x1fff \n" | 181 | " andi %[my_ticket], %[my_ticket], 0xffff \n" |
213 | " andi %[now_serving], %[ticket], 0x1fff \n" | 182 | " andi %[now_serving], %[ticket], 0xffff \n" |
214 | " bne %[my_ticket], %[now_serving], 3f \n" | 183 | " bne %[my_ticket], %[now_serving], 3f \n" |
215 | " addiu %[ticket], %[ticket], 0x4000 \n" | 184 | " addu %[ticket], %[ticket], %[inc] \n" |
216 | " sc %[ticket], %[ticket_ptr] \n" | 185 | " sc %[ticket], %[ticket_ptr] \n" |
217 | " beqz %[ticket], 4f \n" | 186 | " beqz %[ticket], 1b \n" |
218 | " li %[ticket], 1 \n" | 187 | " li %[ticket], 1 \n" |
219 | "2: \n" | 188 | "2: \n" |
220 | " .subsection 2 \n" | 189 | " .subsection 2 \n" |
221 | "3: b 2b \n" | 190 | "3: b 2b \n" |
222 | " li %[ticket], 0 \n" | 191 | " li %[ticket], 0 \n" |
223 | "4: b 1b \n" | ||
224 | " ll %[ticket], %[ticket_ptr] \n" | ||
225 | " .previous \n" | 192 | " .previous \n" |
226 | " .set pop \n" | 193 | " .set pop \n" |
227 | : [ticket_ptr] "+m" (lock->lock), | 194 | : [ticket_ptr] "+m" (lock->lock), |
228 | [ticket] "=&r" (tmp), | 195 | [ticket] "=&r" (tmp), |
229 | [my_ticket] "=&r" (tmp2), | 196 | [my_ticket] "=&r" (tmp2), |
230 | [now_serving] "=&r" (tmp3)); | 197 | [now_serving] "=&r" (tmp3) |
198 | : [inc] "r" (inc)); | ||
231 | } | 199 | } |
232 | 200 | ||
233 | smp_llsc_mb(); | 201 | smp_llsc_mb(); |
diff --git a/arch/mips/include/asm/spinlock_types.h b/arch/mips/include/asm/spinlock_types.h index ee197c2f9c98..c52f36013a9d 100644 --- a/arch/mips/include/asm/spinlock_types.h +++ b/arch/mips/include/asm/spinlock_types.h | |||
@@ -5,16 +5,28 @@ | |||
5 | # error "please don't include this file directly" | 5 | # error "please don't include this file directly" |
6 | #endif | 6 | #endif |
7 | 7 | ||
8 | typedef struct { | 8 | #include <linux/types.h> |
9 | |||
10 | #include <asm/byteorder.h> | ||
11 | |||
12 | typedef union { | ||
9 | /* | 13 | /* |
10 | * bits 0..13: serving_now | 14 | * bits 0..15 : serving_now |
11 | * bits 14 : junk data | 15 | * bits 16..31 : ticket |
12 | * bits 15..28: ticket | ||
13 | */ | 16 | */ |
14 | unsigned int lock; | 17 | u32 lock; |
18 | struct { | ||
19 | #ifdef __BIG_ENDIAN | ||
20 | u16 ticket; | ||
21 | u16 serving_now; | ||
22 | #else | ||
23 | u16 serving_now; | ||
24 | u16 ticket; | ||
25 | #endif | ||
26 | } h; | ||
15 | } arch_spinlock_t; | 27 | } arch_spinlock_t; |
16 | 28 | ||
17 | #define __ARCH_SPIN_LOCK_UNLOCKED { 0 } | 29 | #define __ARCH_SPIN_LOCK_UNLOCKED { .lock = 0 } |
18 | 30 | ||
19 | typedef struct { | 31 | typedef struct { |
20 | volatile unsigned int lock; | 32 | volatile unsigned int lock; |