diff options
Diffstat (limited to 'arch/x86')
-rw-r--r-- | arch/x86/xen/spinlock.c | 348 |
1 files changed, 79 insertions, 269 deletions
diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index d50962936af4..a458729be25f 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c | |||
@@ -17,45 +17,44 @@ | |||
17 | #include "xen-ops.h" | 17 | #include "xen-ops.h" |
18 | #include "debugfs.h" | 18 | #include "debugfs.h" |
19 | 19 | ||
20 | #ifdef CONFIG_XEN_DEBUG_FS | 20 | enum xen_contention_stat { |
21 | static struct xen_spinlock_stats | 21 | TAKEN_SLOW, |
22 | { | 22 | TAKEN_SLOW_PICKUP, |
23 | u64 taken; | 23 | TAKEN_SLOW_SPURIOUS, |
24 | u32 taken_slow; | 24 | RELEASED_SLOW, |
25 | u32 taken_slow_nested; | 25 | RELEASED_SLOW_KICKED, |
26 | u32 taken_slow_pickup; | 26 | NR_CONTENTION_STATS |
27 | u32 taken_slow_spurious; | 27 | }; |
28 | u32 taken_slow_irqenable; | ||
29 | 28 | ||
30 | u64 released; | ||
31 | u32 released_slow; | ||
32 | u32 released_slow_kicked; | ||
33 | 29 | ||
30 | #ifdef CONFIG_XEN_DEBUG_FS | ||
34 | #define HISTO_BUCKETS 30 | 31 | #define HISTO_BUCKETS 30 |
35 | u32 histo_spin_total[HISTO_BUCKETS+1]; | 32 | static struct xen_spinlock_stats |
36 | u32 histo_spin_spinning[HISTO_BUCKETS+1]; | 33 | { |
34 | u32 contention_stats[NR_CONTENTION_STATS]; | ||
37 | u32 histo_spin_blocked[HISTO_BUCKETS+1]; | 35 | u32 histo_spin_blocked[HISTO_BUCKETS+1]; |
38 | |||
39 | u64 time_total; | ||
40 | u64 time_spinning; | ||
41 | u64 time_blocked; | 36 | u64 time_blocked; |
42 | } spinlock_stats; | 37 | } spinlock_stats; |
43 | 38 | ||
44 | static u8 zero_stats; | 39 | static u8 zero_stats; |
45 | 40 | ||
46 | static unsigned lock_timeout = 1 << 10; | ||
47 | #define TIMEOUT lock_timeout | ||
48 | |||
49 | static inline void check_zero(void) | 41 | static inline void check_zero(void) |
50 | { | 42 | { |
51 | if (unlikely(zero_stats)) { | 43 | u8 ret; |
52 | memset(&spinlock_stats, 0, sizeof(spinlock_stats)); | 44 | u8 old = ACCESS_ONCE(zero_stats); |
53 | zero_stats = 0; | 45 | if (unlikely(old)) { |
46 | ret = cmpxchg(&zero_stats, old, 0); | ||
47 | /* This ensures only one fellow resets the stat */ | ||
48 | if (ret == old) | ||
49 | memset(&spinlock_stats, 0, sizeof(spinlock_stats)); | ||
54 | } | 50 | } |
55 | } | 51 | } |
56 | 52 | ||
57 | #define ADD_STATS(elem, val) \ | 53 | static inline void add_stats(enum xen_contention_stat var, u32 val) |
58 | do { check_zero(); spinlock_stats.elem += (val); } while(0) | 54 | { |
55 | check_zero(); | ||
56 | spinlock_stats.contention_stats[var] += val; | ||
57 | } | ||
59 | 58 | ||
60 | static inline u64 spin_time_start(void) | 59 | static inline u64 spin_time_start(void) |
61 | { | 60 | { |
@@ -74,22 +73,6 @@ static void __spin_time_accum(u64 delta, u32 *array) | |||
74 | array[HISTO_BUCKETS]++; | 73 | array[HISTO_BUCKETS]++; |
75 | } | 74 | } |
76 | 75 | ||
77 | static inline void spin_time_accum_spinning(u64 start) | ||
78 | { | ||
79 | u32 delta = xen_clocksource_read() - start; | ||
80 | |||
81 | __spin_time_accum(delta, spinlock_stats.histo_spin_spinning); | ||
82 | spinlock_stats.time_spinning += delta; | ||
83 | } | ||
84 | |||
85 | static inline void spin_time_accum_total(u64 start) | ||
86 | { | ||
87 | u32 delta = xen_clocksource_read() - start; | ||
88 | |||
89 | __spin_time_accum(delta, spinlock_stats.histo_spin_total); | ||
90 | spinlock_stats.time_total += delta; | ||
91 | } | ||
92 | |||
93 | static inline void spin_time_accum_blocked(u64 start) | 76 | static inline void spin_time_accum_blocked(u64 start) |
94 | { | 77 | { |
95 | u32 delta = xen_clocksource_read() - start; | 78 | u32 delta = xen_clocksource_read() - start; |
@@ -99,19 +82,15 @@ static inline void spin_time_accum_blocked(u64 start) | |||
99 | } | 82 | } |
100 | #else /* !CONFIG_XEN_DEBUG_FS */ | 83 | #else /* !CONFIG_XEN_DEBUG_FS */ |
101 | #define TIMEOUT (1 << 10) | 84 | #define TIMEOUT (1 << 10) |
102 | #define ADD_STATS(elem, val) do { (void)(val); } while(0) | 85 | static inline void add_stats(enum xen_contention_stat var, u32 val) |
86 | { | ||
87 | } | ||
103 | 88 | ||
104 | static inline u64 spin_time_start(void) | 89 | static inline u64 spin_time_start(void) |
105 | { | 90 | { |
106 | return 0; | 91 | return 0; |
107 | } | 92 | } |
108 | 93 | ||
109 | static inline void spin_time_accum_total(u64 start) | ||
110 | { | ||
111 | } | ||
112 | static inline void spin_time_accum_spinning(u64 start) | ||
113 | { | ||
114 | } | ||
115 | static inline void spin_time_accum_blocked(u64 start) | 94 | static inline void spin_time_accum_blocked(u64 start) |
116 | { | 95 | { |
117 | } | 96 | } |
@@ -134,230 +113,84 @@ typedef u16 xen_spinners_t; | |||
134 | asm(LOCK_PREFIX " decw %0" : "+m" ((xl)->spinners) : : "memory"); | 113 | asm(LOCK_PREFIX " decw %0" : "+m" ((xl)->spinners) : : "memory"); |
135 | #endif | 114 | #endif |
136 | 115 | ||
137 | struct xen_spinlock { | 116 | struct xen_lock_waiting { |
138 | unsigned char lock; /* 0 -> free; 1 -> locked */ | 117 | struct arch_spinlock *lock; |
139 | xen_spinners_t spinners; /* count of waiting cpus */ | 118 | __ticket_t want; |
140 | }; | 119 | }; |
141 | 120 | ||
142 | static DEFINE_PER_CPU(int, lock_kicker_irq) = -1; | 121 | static DEFINE_PER_CPU(int, lock_kicker_irq) = -1; |
143 | |||
144 | #if 0 | ||
145 | static int xen_spin_is_locked(struct arch_spinlock *lock) | ||
146 | { | ||
147 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | ||
148 | |||
149 | return xl->lock != 0; | ||
150 | } | ||
151 | |||
152 | static int xen_spin_is_contended(struct arch_spinlock *lock) | ||
153 | { | ||
154 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | ||
155 | |||
156 | /* Not strictly true; this is only the count of contended | ||
157 | lock-takers entering the slow path. */ | ||
158 | return xl->spinners != 0; | ||
159 | } | ||
160 | |||
161 | static int xen_spin_trylock(struct arch_spinlock *lock) | ||
162 | { | ||
163 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | ||
164 | u8 old = 1; | ||
165 | |||
166 | asm("xchgb %b0,%1" | ||
167 | : "+q" (old), "+m" (xl->lock) : : "memory"); | ||
168 | |||
169 | return old == 0; | ||
170 | } | ||
171 | |||
172 | static DEFINE_PER_CPU(char *, irq_name); | 122 | static DEFINE_PER_CPU(char *, irq_name); |
173 | static DEFINE_PER_CPU(struct xen_spinlock *, lock_spinners); | 123 | static DEFINE_PER_CPU(struct xen_lock_waiting, lock_waiting); |
124 | static cpumask_t waiting_cpus; | ||
174 | 125 | ||
175 | /* | 126 | static void xen_lock_spinning(struct arch_spinlock *lock, __ticket_t want) |
176 | * Mark a cpu as interested in a lock. Returns the CPU's previous | ||
177 | * lock of interest, in case we got preempted by an interrupt. | ||
178 | */ | ||
179 | static inline struct xen_spinlock *spinning_lock(struct xen_spinlock *xl) | ||
180 | { | 127 | { |
181 | struct xen_spinlock *prev; | ||
182 | |||
183 | prev = __this_cpu_read(lock_spinners); | ||
184 | __this_cpu_write(lock_spinners, xl); | ||
185 | |||
186 | wmb(); /* set lock of interest before count */ | ||
187 | |||
188 | inc_spinners(xl); | ||
189 | |||
190 | return prev; | ||
191 | } | ||
192 | |||
193 | /* | ||
194 | * Mark a cpu as no longer interested in a lock. Restores previous | ||
195 | * lock of interest (NULL for none). | ||
196 | */ | ||
197 | static inline void unspinning_lock(struct xen_spinlock *xl, struct xen_spinlock *prev) | ||
198 | { | ||
199 | dec_spinners(xl); | ||
200 | wmb(); /* decrement count before restoring lock */ | ||
201 | __this_cpu_write(lock_spinners, prev); | ||
202 | } | ||
203 | |||
204 | static noinline int xen_spin_lock_slow(struct arch_spinlock *lock, bool irq_enable) | ||
205 | { | ||
206 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | ||
207 | struct xen_spinlock *prev; | ||
208 | int irq = __this_cpu_read(lock_kicker_irq); | 128 | int irq = __this_cpu_read(lock_kicker_irq); |
209 | int ret; | 129 | struct xen_lock_waiting *w = &__get_cpu_var(lock_waiting); |
130 | int cpu = smp_processor_id(); | ||
210 | u64 start; | 131 | u64 start; |
132 | unsigned long flags; | ||
211 | 133 | ||
212 | /* If kicker interrupts not initialized yet, just spin */ | 134 | /* If kicker interrupts not initialized yet, just spin */ |
213 | if (irq == -1) | 135 | if (irq == -1) |
214 | return 0; | 136 | return; |
215 | 137 | ||
216 | start = spin_time_start(); | 138 | start = spin_time_start(); |
217 | 139 | ||
218 | /* announce we're spinning */ | 140 | /* |
219 | prev = spinning_lock(xl); | 141 | * Make sure an interrupt handler can't upset things in a |
220 | 142 | * partially setup state. | |
221 | ADD_STATS(taken_slow, 1); | 143 | */ |
222 | ADD_STATS(taken_slow_nested, prev != NULL); | 144 | local_irq_save(flags); |
223 | |||
224 | do { | ||
225 | unsigned long flags; | ||
226 | |||
227 | /* clear pending */ | ||
228 | xen_clear_irq_pending(irq); | ||
229 | |||
230 | /* check again make sure it didn't become free while | ||
231 | we weren't looking */ | ||
232 | ret = xen_spin_trylock(lock); | ||
233 | if (ret) { | ||
234 | ADD_STATS(taken_slow_pickup, 1); | ||
235 | |||
236 | /* | ||
237 | * If we interrupted another spinlock while it | ||
238 | * was blocking, make sure it doesn't block | ||
239 | * without rechecking the lock. | ||
240 | */ | ||
241 | if (prev != NULL) | ||
242 | xen_set_irq_pending(irq); | ||
243 | goto out; | ||
244 | } | ||
245 | 145 | ||
246 | flags = arch_local_save_flags(); | 146 | w->want = want; |
247 | if (irq_enable) { | 147 | smp_wmb(); |
248 | ADD_STATS(taken_slow_irqenable, 1); | 148 | w->lock = lock; |
249 | raw_local_irq_enable(); | ||
250 | } | ||
251 | 149 | ||
252 | /* | 150 | /* This uses set_bit, which atomic and therefore a barrier */ |
253 | * Block until irq becomes pending. If we're | 151 | cpumask_set_cpu(cpu, &waiting_cpus); |
254 | * interrupted at this point (after the trylock but | 152 | add_stats(TAKEN_SLOW, 1); |
255 | * before entering the block), then the nested lock | ||
256 | * handler guarantees that the irq will be left | ||
257 | * pending if there's any chance the lock became free; | ||
258 | * xen_poll_irq() returns immediately if the irq is | ||
259 | * pending. | ||
260 | */ | ||
261 | xen_poll_irq(irq); | ||
262 | 153 | ||
263 | raw_local_irq_restore(flags); | 154 | /* clear pending */ |
155 | xen_clear_irq_pending(irq); | ||
264 | 156 | ||
265 | ADD_STATS(taken_slow_spurious, !xen_test_irq_pending(irq)); | 157 | /* Only check lock once pending cleared */ |
266 | } while (!xen_test_irq_pending(irq)); /* check for spurious wakeups */ | 158 | barrier(); |
267 | 159 | ||
160 | /* check again make sure it didn't become free while | ||
161 | we weren't looking */ | ||
162 | if (ACCESS_ONCE(lock->tickets.head) == want) { | ||
163 | add_stats(TAKEN_SLOW_PICKUP, 1); | ||
164 | goto out; | ||
165 | } | ||
166 | /* Block until irq becomes pending (or perhaps a spurious wakeup) */ | ||
167 | xen_poll_irq(irq); | ||
168 | add_stats(TAKEN_SLOW_SPURIOUS, !xen_test_irq_pending(irq)); | ||
268 | kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq)); | 169 | kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq)); |
269 | |||
270 | out: | 170 | out: |
271 | unspinning_lock(xl, prev); | 171 | cpumask_clear_cpu(cpu, &waiting_cpus); |
172 | w->lock = NULL; | ||
173 | local_irq_restore(flags); | ||
272 | spin_time_accum_blocked(start); | 174 | spin_time_accum_blocked(start); |
273 | |||
274 | return ret; | ||
275 | } | 175 | } |
276 | 176 | ||
277 | static inline void __xen_spin_lock(struct arch_spinlock *lock, bool irq_enable) | 177 | static void xen_unlock_kick(struct arch_spinlock *lock, __ticket_t next) |
278 | { | ||
279 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | ||
280 | unsigned timeout; | ||
281 | u8 oldval; | ||
282 | u64 start_spin; | ||
283 | |||
284 | ADD_STATS(taken, 1); | ||
285 | |||
286 | start_spin = spin_time_start(); | ||
287 | |||
288 | do { | ||
289 | u64 start_spin_fast = spin_time_start(); | ||
290 | |||
291 | timeout = TIMEOUT; | ||
292 | |||
293 | asm("1: xchgb %1,%0\n" | ||
294 | " testb %1,%1\n" | ||
295 | " jz 3f\n" | ||
296 | "2: rep;nop\n" | ||
297 | " cmpb $0,%0\n" | ||
298 | " je 1b\n" | ||
299 | " dec %2\n" | ||
300 | " jnz 2b\n" | ||
301 | "3:\n" | ||
302 | : "+m" (xl->lock), "=q" (oldval), "+r" (timeout) | ||
303 | : "1" (1) | ||
304 | : "memory"); | ||
305 | |||
306 | spin_time_accum_spinning(start_spin_fast); | ||
307 | |||
308 | } while (unlikely(oldval != 0 && | ||
309 | (TIMEOUT == ~0 || !xen_spin_lock_slow(lock, irq_enable)))); | ||
310 | |||
311 | spin_time_accum_total(start_spin); | ||
312 | } | ||
313 | |||
314 | static void xen_spin_lock(struct arch_spinlock *lock) | ||
315 | { | ||
316 | __xen_spin_lock(lock, false); | ||
317 | } | ||
318 | |||
319 | static void xen_spin_lock_flags(struct arch_spinlock *lock, unsigned long flags) | ||
320 | { | ||
321 | __xen_spin_lock(lock, !raw_irqs_disabled_flags(flags)); | ||
322 | } | ||
323 | |||
324 | static noinline void xen_spin_unlock_slow(struct xen_spinlock *xl) | ||
325 | { | 178 | { |
326 | int cpu; | 179 | int cpu; |
327 | 180 | ||
328 | ADD_STATS(released_slow, 1); | 181 | add_stats(RELEASED_SLOW, 1); |
182 | |||
183 | for_each_cpu(cpu, &waiting_cpus) { | ||
184 | const struct xen_lock_waiting *w = &per_cpu(lock_waiting, cpu); | ||
329 | 185 | ||
330 | for_each_online_cpu(cpu) { | 186 | if (w->lock == lock && w->want == next) { |
331 | /* XXX should mix up next cpu selection */ | 187 | add_stats(RELEASED_SLOW_KICKED, 1); |
332 | if (per_cpu(lock_spinners, cpu) == xl) { | ||
333 | ADD_STATS(released_slow_kicked, 1); | ||
334 | xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR); | 188 | xen_send_IPI_one(cpu, XEN_SPIN_UNLOCK_VECTOR); |
189 | break; | ||
335 | } | 190 | } |
336 | } | 191 | } |
337 | } | 192 | } |
338 | 193 | ||
339 | static void xen_spin_unlock(struct arch_spinlock *lock) | ||
340 | { | ||
341 | struct xen_spinlock *xl = (struct xen_spinlock *)lock; | ||
342 | |||
343 | ADD_STATS(released, 1); | ||
344 | |||
345 | smp_wmb(); /* make sure no writes get moved after unlock */ | ||
346 | xl->lock = 0; /* release lock */ | ||
347 | |||
348 | /* | ||
349 | * Make sure unlock happens before checking for waiting | ||
350 | * spinners. We need a strong barrier to enforce the | ||
351 | * write-read ordering to different memory locations, as the | ||
352 | * CPU makes no implied guarantees about their ordering. | ||
353 | */ | ||
354 | mb(); | ||
355 | |||
356 | if (unlikely(xl->spinners)) | ||
357 | xen_spin_unlock_slow(xl); | ||
358 | } | ||
359 | #endif | ||
360 | |||
361 | static irqreturn_t dummy_handler(int irq, void *dev_id) | 194 | static irqreturn_t dummy_handler(int irq, void *dev_id) |
362 | { | 195 | { |
363 | BUG(); | 196 | BUG(); |
@@ -420,15 +253,8 @@ void __init xen_init_spinlocks(void) | |||
420 | if (xen_hvm_domain()) | 253 | if (xen_hvm_domain()) |
421 | return; | 254 | return; |
422 | 255 | ||
423 | BUILD_BUG_ON(sizeof(struct xen_spinlock) > sizeof(arch_spinlock_t)); | 256 | pv_lock_ops.lock_spinning = xen_lock_spinning; |
424 | #if 0 | 257 | pv_lock_ops.unlock_kick = xen_unlock_kick; |
425 | pv_lock_ops.spin_is_locked = xen_spin_is_locked; | ||
426 | pv_lock_ops.spin_is_contended = xen_spin_is_contended; | ||
427 | pv_lock_ops.spin_lock = xen_spin_lock; | ||
428 | pv_lock_ops.spin_lock_flags = xen_spin_lock_flags; | ||
429 | pv_lock_ops.spin_trylock = xen_spin_trylock; | ||
430 | pv_lock_ops.spin_unlock = xen_spin_unlock; | ||
431 | #endif | ||
432 | } | 258 | } |
433 | 259 | ||
434 | #ifdef CONFIG_XEN_DEBUG_FS | 260 | #ifdef CONFIG_XEN_DEBUG_FS |
@@ -446,37 +272,21 @@ static int __init xen_spinlock_debugfs(void) | |||
446 | 272 | ||
447 | debugfs_create_u8("zero_stats", 0644, d_spin_debug, &zero_stats); | 273 | debugfs_create_u8("zero_stats", 0644, d_spin_debug, &zero_stats); |
448 | 274 | ||
449 | debugfs_create_u32("timeout", 0644, d_spin_debug, &lock_timeout); | ||
450 | |||
451 | debugfs_create_u64("taken", 0444, d_spin_debug, &spinlock_stats.taken); | ||
452 | debugfs_create_u32("taken_slow", 0444, d_spin_debug, | 275 | debugfs_create_u32("taken_slow", 0444, d_spin_debug, |
453 | &spinlock_stats.taken_slow); | 276 | &spinlock_stats.contention_stats[TAKEN_SLOW]); |
454 | debugfs_create_u32("taken_slow_nested", 0444, d_spin_debug, | ||
455 | &spinlock_stats.taken_slow_nested); | ||
456 | debugfs_create_u32("taken_slow_pickup", 0444, d_spin_debug, | 277 | debugfs_create_u32("taken_slow_pickup", 0444, d_spin_debug, |
457 | &spinlock_stats.taken_slow_pickup); | 278 | &spinlock_stats.contention_stats[TAKEN_SLOW_PICKUP]); |
458 | debugfs_create_u32("taken_slow_spurious", 0444, d_spin_debug, | 279 | debugfs_create_u32("taken_slow_spurious", 0444, d_spin_debug, |
459 | &spinlock_stats.taken_slow_spurious); | 280 | &spinlock_stats.contention_stats[TAKEN_SLOW_SPURIOUS]); |
460 | debugfs_create_u32("taken_slow_irqenable", 0444, d_spin_debug, | ||
461 | &spinlock_stats.taken_slow_irqenable); | ||
462 | 281 | ||
463 | debugfs_create_u64("released", 0444, d_spin_debug, &spinlock_stats.released); | ||
464 | debugfs_create_u32("released_slow", 0444, d_spin_debug, | 282 | debugfs_create_u32("released_slow", 0444, d_spin_debug, |
465 | &spinlock_stats.released_slow); | 283 | &spinlock_stats.contention_stats[RELEASED_SLOW]); |
466 | debugfs_create_u32("released_slow_kicked", 0444, d_spin_debug, | 284 | debugfs_create_u32("released_slow_kicked", 0444, d_spin_debug, |
467 | &spinlock_stats.released_slow_kicked); | 285 | &spinlock_stats.contention_stats[RELEASED_SLOW_KICKED]); |
468 | 286 | ||
469 | debugfs_create_u64("time_spinning", 0444, d_spin_debug, | ||
470 | &spinlock_stats.time_spinning); | ||
471 | debugfs_create_u64("time_blocked", 0444, d_spin_debug, | 287 | debugfs_create_u64("time_blocked", 0444, d_spin_debug, |
472 | &spinlock_stats.time_blocked); | 288 | &spinlock_stats.time_blocked); |
473 | debugfs_create_u64("time_total", 0444, d_spin_debug, | ||
474 | &spinlock_stats.time_total); | ||
475 | 289 | ||
476 | debugfs_create_u32_array("histo_total", 0444, d_spin_debug, | ||
477 | spinlock_stats.histo_spin_total, HISTO_BUCKETS + 1); | ||
478 | debugfs_create_u32_array("histo_spinning", 0444, d_spin_debug, | ||
479 | spinlock_stats.histo_spin_spinning, HISTO_BUCKETS + 1); | ||
480 | debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug, | 290 | debugfs_create_u32_array("histo_blocked", 0444, d_spin_debug, |
481 | spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1); | 291 | spinlock_stats.histo_spin_blocked, HISTO_BUCKETS + 1); |
482 | 292 | ||