aboutsummaryrefslogtreecommitdiffstats
path: root/arch/mips/include
diff options
context:
space:
mode:
authorDavid Daney <ddaney@caviumnetworks.com>2010-02-04 14:31:49 -0500
committerRalf Baechle <ralf@linux-mips.org>2010-02-27 06:53:42 -0500
commit500c2e1fdbcc2b273bd4c695a9b8ac8196f61614 (patch)
treef24c80f609a739beed194fd5c66abf9bc48ce0d6 /arch/mips/include
parente275ed5ee94b358964a0dae1c8b49f0bff260b60 (diff)
MIPS: Optimize spinlocks.
The current locking mechanism uses a ll/sc sequence to release a spinlock. This is slower than a wmb() followed by a store to unlock. The branching forward to .subsection 2 on sc failure slows down the contended case. So we get rid of that part too. Since we are now working on naturally aligned u16 values, we can get rid of a masking operation as the LHU already does the right thing. The ANDI are reversed for better scheduling on multi-issue CPUs On a 12 CPU 750MHz Octeon cn5750 this patch improves ipv4 UDP packet forwarding rates from 3.58*10^6 PPS to 3.99*10^6 PPS, or about 11%. Signed-off-by: David Daney <ddaney@caviumnetworks.com> To: linux-mips@linux-mips.org Patchwork: http://patchwork.linux-mips.org/patch/937/ Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
Diffstat (limited to 'arch/mips/include')
-rw-r--r--arch/mips/include/asm/barrier.h6
-rw-r--r--arch/mips/include/asm/spinlock.h118
-rw-r--r--arch/mips/include/asm/spinlock_types.h24
3 files changed, 67 insertions, 81 deletions
diff --git a/arch/mips/include/asm/barrier.h b/arch/mips/include/asm/barrier.h
index a2670a239e0c..c0884f02d3a6 100644
--- a/arch/mips/include/asm/barrier.h
+++ b/arch/mips/include/asm/barrier.h
@@ -168,8 +168,14 @@
168 168
169#ifdef CONFIG_CPU_CAVIUM_OCTEON 169#ifdef CONFIG_CPU_CAVIUM_OCTEON
170#define smp_mb__before_llsc() smp_wmb() 170#define smp_mb__before_llsc() smp_wmb()
171/* Cause previous writes to become visible on all CPUs as soon as possible */
172#define nudge_writes() __asm__ __volatile__(".set push\n\t" \
173 ".set arch=octeon\n\t" \
174 "syncw\n\t" \
175 ".set pop" : : : "memory")
171#else 176#else
172#define smp_mb__before_llsc() smp_llsc_mb() 177#define smp_mb__before_llsc() smp_llsc_mb()
178#define nudge_writes() mb()
173#endif 179#endif
174 180
175#endif /* __ASM_BARRIER_H */ 181#endif /* __ASM_BARRIER_H */
diff --git a/arch/mips/include/asm/spinlock.h b/arch/mips/include/asm/spinlock.h
index 5f16696eaa00..396e402fbe2c 100644
--- a/arch/mips/include/asm/spinlock.h
+++ b/arch/mips/include/asm/spinlock.h
@@ -36,9 +36,9 @@
36 36
37static inline int arch_spin_is_locked(arch_spinlock_t *lock) 37static inline int arch_spin_is_locked(arch_spinlock_t *lock)
38{ 38{
39 unsigned int counters = ACCESS_ONCE(lock->lock); 39 u32 counters = ACCESS_ONCE(lock->lock);
40 40
41 return ((counters >> 14) ^ counters) & 0x1fff; 41 return ((counters >> 16) ^ counters) & 0xffff;
42} 42}
43 43
44#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) 44#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock)
@@ -47,9 +47,9 @@ static inline int arch_spin_is_locked(arch_spinlock_t *lock)
47 47
48static inline int arch_spin_is_contended(arch_spinlock_t *lock) 48static inline int arch_spin_is_contended(arch_spinlock_t *lock)
49{ 49{
50 unsigned int counters = ACCESS_ONCE(lock->lock); 50 u32 counters = ACCESS_ONCE(lock->lock);
51 51
52 return (((counters >> 14) - counters) & 0x1fff) > 1; 52 return (((counters >> 16) - counters) & 0xffff) > 1;
53} 53}
54#define arch_spin_is_contended arch_spin_is_contended 54#define arch_spin_is_contended arch_spin_is_contended
55 55
@@ -57,6 +57,7 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
57{ 57{
58 int my_ticket; 58 int my_ticket;
59 int tmp; 59 int tmp;
60 int inc = 0x10000;
60 61
61 if (R10000_LLSC_WAR) { 62 if (R10000_LLSC_WAR) {
62 __asm__ __volatile__ ( 63 __asm__ __volatile__ (
@@ -64,25 +65,24 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
64 " .set noreorder \n" 65 " .set noreorder \n"
65 " \n" 66 " \n"
66 "1: ll %[ticket], %[ticket_ptr] \n" 67 "1: ll %[ticket], %[ticket_ptr] \n"
67 " addiu %[my_ticket], %[ticket], 0x4000 \n" 68 " addu %[my_ticket], %[ticket], %[inc] \n"
68 " sc %[my_ticket], %[ticket_ptr] \n" 69 " sc %[my_ticket], %[ticket_ptr] \n"
69 " beqzl %[my_ticket], 1b \n" 70 " beqzl %[my_ticket], 1b \n"
70 " nop \n" 71 " nop \n"
71 " srl %[my_ticket], %[ticket], 14 \n" 72 " srl %[my_ticket], %[ticket], 16 \n"
72 " andi %[my_ticket], %[my_ticket], 0x1fff \n" 73 " andi %[ticket], %[ticket], 0xffff \n"
73 " andi %[ticket], %[ticket], 0x1fff \n" 74 " andi %[my_ticket], %[my_ticket], 0xffff \n"
74 " bne %[ticket], %[my_ticket], 4f \n" 75 " bne %[ticket], %[my_ticket], 4f \n"
75 " subu %[ticket], %[my_ticket], %[ticket] \n" 76 " subu %[ticket], %[my_ticket], %[ticket] \n"
76 "2: \n" 77 "2: \n"
77 " .subsection 2 \n" 78 " .subsection 2 \n"
78 "4: andi %[ticket], %[ticket], 0x1fff \n" 79 "4: andi %[ticket], %[ticket], 0xffff \n"
79 " sll %[ticket], 5 \n" 80 " sll %[ticket], 5 \n"
80 " \n" 81 " \n"
81 "6: bnez %[ticket], 6b \n" 82 "6: bnez %[ticket], 6b \n"
82 " subu %[ticket], 1 \n" 83 " subu %[ticket], 1 \n"
83 " \n" 84 " \n"
84 " lw %[ticket], %[ticket_ptr] \n" 85 " lhu %[ticket], %[serving_now_ptr] \n"
85 " andi %[ticket], %[ticket], 0x1fff \n"
86 " beq %[ticket], %[my_ticket], 2b \n" 86 " beq %[ticket], %[my_ticket], 2b \n"
87 " subu %[ticket], %[my_ticket], %[ticket] \n" 87 " subu %[ticket], %[my_ticket], %[ticket] \n"
88 " b 4b \n" 88 " b 4b \n"
@@ -90,36 +90,33 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
90 " .previous \n" 90 " .previous \n"
91 " .set pop \n" 91 " .set pop \n"
92 : [ticket_ptr] "+m" (lock->lock), 92 : [ticket_ptr] "+m" (lock->lock),
93 [serving_now_ptr] "+m" (lock->h.serving_now),
93 [ticket] "=&r" (tmp), 94 [ticket] "=&r" (tmp),
94 [my_ticket] "=&r" (my_ticket)); 95 [my_ticket] "=&r" (my_ticket)
96 : [inc] "r" (inc));
95 } else { 97 } else {
96 __asm__ __volatile__ ( 98 __asm__ __volatile__ (
97 " .set push # arch_spin_lock \n" 99 " .set push # arch_spin_lock \n"
98 " .set noreorder \n" 100 " .set noreorder \n"
99 " \n" 101 " \n"
100 " ll %[ticket], %[ticket_ptr] \n" 102 "1: ll %[ticket], %[ticket_ptr] \n"
101 "1: addiu %[my_ticket], %[ticket], 0x4000 \n" 103 " addu %[my_ticket], %[ticket], %[inc] \n"
102 " sc %[my_ticket], %[ticket_ptr] \n" 104 " sc %[my_ticket], %[ticket_ptr] \n"
103 " beqz %[my_ticket], 3f \n" 105 " beqz %[my_ticket], 1b \n"
104 " nop \n" 106 " srl %[my_ticket], %[ticket], 16 \n"
105 " srl %[my_ticket], %[ticket], 14 \n" 107 " andi %[ticket], %[ticket], 0xffff \n"
106 " andi %[my_ticket], %[my_ticket], 0x1fff \n" 108 " andi %[my_ticket], %[my_ticket], 0xffff \n"
107 " andi %[ticket], %[ticket], 0x1fff \n"
108 " bne %[ticket], %[my_ticket], 4f \n" 109 " bne %[ticket], %[my_ticket], 4f \n"
109 " subu %[ticket], %[my_ticket], %[ticket] \n" 110 " subu %[ticket], %[my_ticket], %[ticket] \n"
110 "2: \n" 111 "2: \n"
111 " .subsection 2 \n" 112 " .subsection 2 \n"
112 "3: b 1b \n"
113 " ll %[ticket], %[ticket_ptr] \n"
114 " \n"
115 "4: andi %[ticket], %[ticket], 0x1fff \n" 113 "4: andi %[ticket], %[ticket], 0x1fff \n"
116 " sll %[ticket], 5 \n" 114 " sll %[ticket], 5 \n"
117 " \n" 115 " \n"
118 "6: bnez %[ticket], 6b \n" 116 "6: bnez %[ticket], 6b \n"
119 " subu %[ticket], 1 \n" 117 " subu %[ticket], 1 \n"
120 " \n" 118 " \n"
121 " lw %[ticket], %[ticket_ptr] \n" 119 " lhu %[ticket], %[serving_now_ptr] \n"
122 " andi %[ticket], %[ticket], 0x1fff \n"
123 " beq %[ticket], %[my_ticket], 2b \n" 120 " beq %[ticket], %[my_ticket], 2b \n"
124 " subu %[ticket], %[my_ticket], %[ticket] \n" 121 " subu %[ticket], %[my_ticket], %[ticket] \n"
125 " b 4b \n" 122 " b 4b \n"
@@ -127,8 +124,10 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
127 " .previous \n" 124 " .previous \n"
128 " .set pop \n" 125 " .set pop \n"
129 : [ticket_ptr] "+m" (lock->lock), 126 : [ticket_ptr] "+m" (lock->lock),
127 [serving_now_ptr] "+m" (lock->h.serving_now),
130 [ticket] "=&r" (tmp), 128 [ticket] "=&r" (tmp),
131 [my_ticket] "=&r" (my_ticket)); 129 [my_ticket] "=&r" (my_ticket)
130 : [inc] "r" (inc));
132 } 131 }
133 132
134 smp_llsc_mb(); 133 smp_llsc_mb();
@@ -136,47 +135,16 @@ static inline void arch_spin_lock(arch_spinlock_t *lock)
136 135
137static inline void arch_spin_unlock(arch_spinlock_t *lock) 136static inline void arch_spin_unlock(arch_spinlock_t *lock)
138{ 137{
139 int tmp; 138 unsigned int serving_now = lock->h.serving_now + 1;
140 139 wmb();
141 smp_mb__before_llsc(); 140 lock->h.serving_now = (u16)serving_now;
142 141 nudge_writes();
143 if (R10000_LLSC_WAR) {
144 __asm__ __volatile__ (
145 " # arch_spin_unlock \n"
146 "1: ll %[ticket], %[ticket_ptr] \n"
147 " addiu %[ticket], %[ticket], 1 \n"
148 " ori %[ticket], %[ticket], 0x2000 \n"
149 " xori %[ticket], %[ticket], 0x2000 \n"
150 " sc %[ticket], %[ticket_ptr] \n"
151 " beqzl %[ticket], 1b \n"
152 : [ticket_ptr] "+m" (lock->lock),
153 [ticket] "=&r" (tmp));
154 } else {
155 __asm__ __volatile__ (
156 " .set push # arch_spin_unlock \n"
157 " .set noreorder \n"
158 " \n"
159 " ll %[ticket], %[ticket_ptr] \n"
160 "1: addiu %[ticket], %[ticket], 1 \n"
161 " ori %[ticket], %[ticket], 0x2000 \n"
162 " xori %[ticket], %[ticket], 0x2000 \n"
163 " sc %[ticket], %[ticket_ptr] \n"
164 " beqz %[ticket], 2f \n"
165 " nop \n"
166 " \n"
167 " .subsection 2 \n"
168 "2: b 1b \n"
169 " ll %[ticket], %[ticket_ptr] \n"
170 " .previous \n"
171 " .set pop \n"
172 : [ticket_ptr] "+m" (lock->lock),
173 [ticket] "=&r" (tmp));
174 }
175} 142}
176 143
177static inline unsigned int arch_spin_trylock(arch_spinlock_t *lock) 144static inline unsigned int arch_spin_trylock(arch_spinlock_t *lock)
178{ 145{
179 int tmp, tmp2, tmp3; 146 int tmp, tmp2, tmp3;
147 int inc = 0x10000;
180 148
181 if (R10000_LLSC_WAR) { 149 if (R10000_LLSC_WAR) {
182 __asm__ __volatile__ ( 150 __asm__ __volatile__ (
@@ -184,11 +152,11 @@ static inline unsigned int arch_spin_trylock(arch_spinlock_t *lock)
184 " .set noreorder \n" 152 " .set noreorder \n"
185 " \n" 153 " \n"
186 "1: ll %[ticket], %[ticket_ptr] \n" 154 "1: ll %[ticket], %[ticket_ptr] \n"
187 " srl %[my_ticket], %[ticket], 14 \n" 155 " srl %[my_ticket], %[ticket], 16 \n"
188 " andi %[my_ticket], %[my_ticket], 0x1fff \n" 156 " andi %[my_ticket], %[my_ticket], 0xffff \n"
189 " andi %[now_serving], %[ticket], 0x1fff \n" 157 " andi %[now_serving], %[ticket], 0xffff \n"
190 " bne %[my_ticket], %[now_serving], 3f \n" 158 " bne %[my_ticket], %[now_serving], 3f \n"
191 " addiu %[ticket], %[ticket], 0x4000 \n" 159 " addu %[ticket], %[ticket], %[inc] \n"
192 " sc %[ticket], %[ticket_ptr] \n" 160 " sc %[ticket], %[ticket_ptr] \n"
193 " beqzl %[ticket], 1b \n" 161 " beqzl %[ticket], 1b \n"
194 " li %[ticket], 1 \n" 162 " li %[ticket], 1 \n"
@@ -201,33 +169,33 @@ static inline unsigned int arch_spin_trylock(arch_spinlock_t *lock)
201 : [ticket_ptr] "+m" (lock->lock), 169 : [ticket_ptr] "+m" (lock->lock),
202 [ticket] "=&r" (tmp), 170 [ticket] "=&r" (tmp),
203 [my_ticket] "=&r" (tmp2), 171 [my_ticket] "=&r" (tmp2),
204 [now_serving] "=&r" (tmp3)); 172 [now_serving] "=&r" (tmp3)
173 : [inc] "r" (inc));
205 } else { 174 } else {
206 __asm__ __volatile__ ( 175 __asm__ __volatile__ (
207 " .set push # arch_spin_trylock \n" 176 " .set push # arch_spin_trylock \n"
208 " .set noreorder \n" 177 " .set noreorder \n"
209 " \n" 178 " \n"
210 " ll %[ticket], %[ticket_ptr] \n" 179 "1: ll %[ticket], %[ticket_ptr] \n"
211 "1: srl %[my_ticket], %[ticket], 14 \n" 180 " srl %[my_ticket], %[ticket], 16 \n"
212 " andi %[my_ticket], %[my_ticket], 0x1fff \n" 181 " andi %[my_ticket], %[my_ticket], 0xffff \n"
213 " andi %[now_serving], %[ticket], 0x1fff \n" 182 " andi %[now_serving], %[ticket], 0xffff \n"
214 " bne %[my_ticket], %[now_serving], 3f \n" 183 " bne %[my_ticket], %[now_serving], 3f \n"
215 " addiu %[ticket], %[ticket], 0x4000 \n" 184 " addu %[ticket], %[ticket], %[inc] \n"
216 " sc %[ticket], %[ticket_ptr] \n" 185 " sc %[ticket], %[ticket_ptr] \n"
217 " beqz %[ticket], 4f \n" 186 " beqz %[ticket], 1b \n"
218 " li %[ticket], 1 \n" 187 " li %[ticket], 1 \n"
219 "2: \n" 188 "2: \n"
220 " .subsection 2 \n" 189 " .subsection 2 \n"
221 "3: b 2b \n" 190 "3: b 2b \n"
222 " li %[ticket], 0 \n" 191 " li %[ticket], 0 \n"
223 "4: b 1b \n"
224 " ll %[ticket], %[ticket_ptr] \n"
225 " .previous \n" 192 " .previous \n"
226 " .set pop \n" 193 " .set pop \n"
227 : [ticket_ptr] "+m" (lock->lock), 194 : [ticket_ptr] "+m" (lock->lock),
228 [ticket] "=&r" (tmp), 195 [ticket] "=&r" (tmp),
229 [my_ticket] "=&r" (tmp2), 196 [my_ticket] "=&r" (tmp2),
230 [now_serving] "=&r" (tmp3)); 197 [now_serving] "=&r" (tmp3)
198 : [inc] "r" (inc));
231 } 199 }
232 200
233 smp_llsc_mb(); 201 smp_llsc_mb();
diff --git a/arch/mips/include/asm/spinlock_types.h b/arch/mips/include/asm/spinlock_types.h
index ee197c2f9c98..c52f36013a9d 100644
--- a/arch/mips/include/asm/spinlock_types.h
+++ b/arch/mips/include/asm/spinlock_types.h
@@ -5,16 +5,28 @@
5# error "please don't include this file directly" 5# error "please don't include this file directly"
6#endif 6#endif
7 7
8typedef struct { 8#include <linux/types.h>
9
10#include <asm/byteorder.h>
11
12typedef union {
9 /* 13 /*
10 * bits 0..13: serving_now 14 * bits 0..15 : serving_now
11 * bits 14 : junk data 15 * bits 16..31 : ticket
12 * bits 15..28: ticket
13 */ 16 */
14 unsigned int lock; 17 u32 lock;
18 struct {
19#ifdef __BIG_ENDIAN
20 u16 ticket;
21 u16 serving_now;
22#else
23 u16 serving_now;
24 u16 ticket;
25#endif
26 } h;
15} arch_spinlock_t; 27} arch_spinlock_t;
16 28
17#define __ARCH_SPIN_LOCK_UNLOCKED { 0 } 29#define __ARCH_SPIN_LOCK_UNLOCKED { .lock = 0 }
18 30
19typedef struct { 31typedef struct {
20 volatile unsigned int lock; 32 volatile unsigned int lock;