diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/cpu.c | 28 | ||||
-rw-r--r-- | kernel/futex.c | 4 | ||||
-rw-r--r-- | kernel/jump_label.c | 107 | ||||
-rw-r--r-- | kernel/locking/lockdep.c | 116 | ||||
-rw-r--r-- | kernel/locking/lockdep_internals.h | 27 | ||||
-rw-r--r-- | kernel/locking/lockdep_proc.c | 2 | ||||
-rw-r--r-- | kernel/locking/qspinlock.c | 143 | ||||
-rw-r--r-- | kernel/locking/qspinlock_paravirt.h | 4 | ||||
-rw-r--r-- | kernel/locking/qspinlock_stat.h | 6 | ||||
-rw-r--r-- | kernel/locking/rtmutex.c | 4 | ||||
-rw-r--r-- | kernel/locking/rwsem-xadd.c | 15 | ||||
-rw-r--r-- | kernel/locking/rwsem.c | 7 | ||||
-rw-r--r-- | kernel/locking/rwsem.h | 95 | ||||
-rw-r--r-- | kernel/module.c | 9 |
14 files changed, 342 insertions, 225 deletions
diff --git a/kernel/cpu.c b/kernel/cpu.c index 0097acec1c71..be4859f07153 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c | |||
@@ -315,6 +315,16 @@ void lockdep_assert_cpus_held(void) | |||
315 | percpu_rwsem_assert_held(&cpu_hotplug_lock); | 315 | percpu_rwsem_assert_held(&cpu_hotplug_lock); |
316 | } | 316 | } |
317 | 317 | ||
318 | static void lockdep_acquire_cpus_lock(void) | ||
319 | { | ||
320 | rwsem_acquire(&cpu_hotplug_lock.rw_sem.dep_map, 0, 0, _THIS_IP_); | ||
321 | } | ||
322 | |||
323 | static void lockdep_release_cpus_lock(void) | ||
324 | { | ||
325 | rwsem_release(&cpu_hotplug_lock.rw_sem.dep_map, 1, _THIS_IP_); | ||
326 | } | ||
327 | |||
318 | /* | 328 | /* |
319 | * Wait for currently running CPU hotplug operations to complete (if any) and | 329 | * Wait for currently running CPU hotplug operations to complete (if any) and |
320 | * disable future CPU hotplug (from sysfs). The 'cpu_add_remove_lock' protects | 330 | * disable future CPU hotplug (from sysfs). The 'cpu_add_remove_lock' protects |
@@ -344,6 +354,17 @@ void cpu_hotplug_enable(void) | |||
344 | cpu_maps_update_done(); | 354 | cpu_maps_update_done(); |
345 | } | 355 | } |
346 | EXPORT_SYMBOL_GPL(cpu_hotplug_enable); | 356 | EXPORT_SYMBOL_GPL(cpu_hotplug_enable); |
357 | |||
358 | #else | ||
359 | |||
360 | static void lockdep_acquire_cpus_lock(void) | ||
361 | { | ||
362 | } | ||
363 | |||
364 | static void lockdep_release_cpus_lock(void) | ||
365 | { | ||
366 | } | ||
367 | |||
347 | #endif /* CONFIG_HOTPLUG_CPU */ | 368 | #endif /* CONFIG_HOTPLUG_CPU */ |
348 | 369 | ||
349 | #ifdef CONFIG_HOTPLUG_SMT | 370 | #ifdef CONFIG_HOTPLUG_SMT |
@@ -616,6 +637,12 @@ static void cpuhp_thread_fun(unsigned int cpu) | |||
616 | */ | 637 | */ |
617 | smp_mb(); | 638 | smp_mb(); |
618 | 639 | ||
640 | /* | ||
641 | * The BP holds the hotplug lock, but we're now running on the AP, | ||
642 | * ensure that anybody asserting the lock is held, will actually find | ||
643 | * it so. | ||
644 | */ | ||
645 | lockdep_acquire_cpus_lock(); | ||
619 | cpuhp_lock_acquire(bringup); | 646 | cpuhp_lock_acquire(bringup); |
620 | 647 | ||
621 | if (st->single) { | 648 | if (st->single) { |
@@ -661,6 +688,7 @@ static void cpuhp_thread_fun(unsigned int cpu) | |||
661 | } | 688 | } |
662 | 689 | ||
663 | cpuhp_lock_release(bringup); | 690 | cpuhp_lock_release(bringup); |
691 | lockdep_release_cpus_lock(); | ||
664 | 692 | ||
665 | if (!st->should_run) | 693 | if (!st->should_run) |
666 | complete_ap_thread(st, bringup); | 694 | complete_ap_thread(st, bringup); |
diff --git a/kernel/futex.c b/kernel/futex.c index 11fc3bb456d6..3e2de8fc1891 100644 --- a/kernel/futex.c +++ b/kernel/futex.c | |||
@@ -1365,9 +1365,9 @@ static void __unqueue_futex(struct futex_q *q) | |||
1365 | { | 1365 | { |
1366 | struct futex_hash_bucket *hb; | 1366 | struct futex_hash_bucket *hb; |
1367 | 1367 | ||
1368 | if (WARN_ON_SMP(!q->lock_ptr || !spin_is_locked(q->lock_ptr)) | 1368 | if (WARN_ON_SMP(!q->lock_ptr) || WARN_ON(plist_node_empty(&q->list))) |
1369 | || WARN_ON(plist_node_empty(&q->list))) | ||
1370 | return; | 1369 | return; |
1370 | lockdep_assert_held(q->lock_ptr); | ||
1371 | 1371 | ||
1372 | hb = container_of(q->lock_ptr, struct futex_hash_bucket, lock); | 1372 | hb = container_of(q->lock_ptr, struct futex_hash_bucket, lock); |
1373 | plist_del(&q->list, &hb->chain); | 1373 | plist_del(&q->list, &hb->chain); |
diff --git a/kernel/jump_label.c b/kernel/jump_label.c index 2e62503bea0d..b28028b08d44 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c | |||
@@ -38,23 +38,43 @@ static int jump_label_cmp(const void *a, const void *b) | |||
38 | const struct jump_entry *jea = a; | 38 | const struct jump_entry *jea = a; |
39 | const struct jump_entry *jeb = b; | 39 | const struct jump_entry *jeb = b; |
40 | 40 | ||
41 | if (jea->key < jeb->key) | 41 | if (jump_entry_key(jea) < jump_entry_key(jeb)) |
42 | return -1; | 42 | return -1; |
43 | 43 | ||
44 | if (jea->key > jeb->key) | 44 | if (jump_entry_key(jea) > jump_entry_key(jeb)) |
45 | return 1; | 45 | return 1; |
46 | 46 | ||
47 | return 0; | 47 | return 0; |
48 | } | 48 | } |
49 | 49 | ||
50 | static void jump_label_swap(void *a, void *b, int size) | ||
51 | { | ||
52 | long delta = (unsigned long)a - (unsigned long)b; | ||
53 | struct jump_entry *jea = a; | ||
54 | struct jump_entry *jeb = b; | ||
55 | struct jump_entry tmp = *jea; | ||
56 | |||
57 | jea->code = jeb->code - delta; | ||
58 | jea->target = jeb->target - delta; | ||
59 | jea->key = jeb->key - delta; | ||
60 | |||
61 | jeb->code = tmp.code + delta; | ||
62 | jeb->target = tmp.target + delta; | ||
63 | jeb->key = tmp.key + delta; | ||
64 | } | ||
65 | |||
50 | static void | 66 | static void |
51 | jump_label_sort_entries(struct jump_entry *start, struct jump_entry *stop) | 67 | jump_label_sort_entries(struct jump_entry *start, struct jump_entry *stop) |
52 | { | 68 | { |
53 | unsigned long size; | 69 | unsigned long size; |
70 | void *swapfn = NULL; | ||
71 | |||
72 | if (IS_ENABLED(CONFIG_HAVE_ARCH_JUMP_LABEL_RELATIVE)) | ||
73 | swapfn = jump_label_swap; | ||
54 | 74 | ||
55 | size = (((unsigned long)stop - (unsigned long)start) | 75 | size = (((unsigned long)stop - (unsigned long)start) |
56 | / sizeof(struct jump_entry)); | 76 | / sizeof(struct jump_entry)); |
57 | sort(start, size, sizeof(struct jump_entry), jump_label_cmp, NULL); | 77 | sort(start, size, sizeof(struct jump_entry), jump_label_cmp, swapfn); |
58 | } | 78 | } |
59 | 79 | ||
60 | static void jump_label_update(struct static_key *key); | 80 | static void jump_label_update(struct static_key *key); |
@@ -85,6 +105,7 @@ void static_key_slow_inc_cpuslocked(struct static_key *key) | |||
85 | int v, v1; | 105 | int v, v1; |
86 | 106 | ||
87 | STATIC_KEY_CHECK_USE(key); | 107 | STATIC_KEY_CHECK_USE(key); |
108 | lockdep_assert_cpus_held(); | ||
88 | 109 | ||
89 | /* | 110 | /* |
90 | * Careful if we get concurrent static_key_slow_inc() calls; | 111 | * Careful if we get concurrent static_key_slow_inc() calls; |
@@ -130,6 +151,7 @@ EXPORT_SYMBOL_GPL(static_key_slow_inc); | |||
130 | void static_key_enable_cpuslocked(struct static_key *key) | 151 | void static_key_enable_cpuslocked(struct static_key *key) |
131 | { | 152 | { |
132 | STATIC_KEY_CHECK_USE(key); | 153 | STATIC_KEY_CHECK_USE(key); |
154 | lockdep_assert_cpus_held(); | ||
133 | 155 | ||
134 | if (atomic_read(&key->enabled) > 0) { | 156 | if (atomic_read(&key->enabled) > 0) { |
135 | WARN_ON_ONCE(atomic_read(&key->enabled) != 1); | 157 | WARN_ON_ONCE(atomic_read(&key->enabled) != 1); |
@@ -160,6 +182,7 @@ EXPORT_SYMBOL_GPL(static_key_enable); | |||
160 | void static_key_disable_cpuslocked(struct static_key *key) | 182 | void static_key_disable_cpuslocked(struct static_key *key) |
161 | { | 183 | { |
162 | STATIC_KEY_CHECK_USE(key); | 184 | STATIC_KEY_CHECK_USE(key); |
185 | lockdep_assert_cpus_held(); | ||
163 | 186 | ||
164 | if (atomic_read(&key->enabled) != 1) { | 187 | if (atomic_read(&key->enabled) != 1) { |
165 | WARN_ON_ONCE(atomic_read(&key->enabled) != 0); | 188 | WARN_ON_ONCE(atomic_read(&key->enabled) != 0); |
@@ -185,6 +208,8 @@ static void __static_key_slow_dec_cpuslocked(struct static_key *key, | |||
185 | unsigned long rate_limit, | 208 | unsigned long rate_limit, |
186 | struct delayed_work *work) | 209 | struct delayed_work *work) |
187 | { | 210 | { |
211 | lockdep_assert_cpus_held(); | ||
212 | |||
188 | /* | 213 | /* |
189 | * The negative count check is valid even when a negative | 214 | * The negative count check is valid even when a negative |
190 | * key->enabled is in use by static_key_slow_inc(); a | 215 | * key->enabled is in use by static_key_slow_inc(); a |
@@ -261,8 +286,8 @@ EXPORT_SYMBOL_GPL(jump_label_rate_limit); | |||
261 | 286 | ||
262 | static int addr_conflict(struct jump_entry *entry, void *start, void *end) | 287 | static int addr_conflict(struct jump_entry *entry, void *start, void *end) |
263 | { | 288 | { |
264 | if (entry->code <= (unsigned long)end && | 289 | if (jump_entry_code(entry) <= (unsigned long)end && |
265 | entry->code + JUMP_LABEL_NOP_SIZE > (unsigned long)start) | 290 | jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE > (unsigned long)start) |
266 | return 1; | 291 | return 1; |
267 | 292 | ||
268 | return 0; | 293 | return 0; |
@@ -321,16 +346,6 @@ static inline void static_key_set_linked(struct static_key *key) | |||
321 | key->type |= JUMP_TYPE_LINKED; | 346 | key->type |= JUMP_TYPE_LINKED; |
322 | } | 347 | } |
323 | 348 | ||
324 | static inline struct static_key *jump_entry_key(struct jump_entry *entry) | ||
325 | { | ||
326 | return (struct static_key *)((unsigned long)entry->key & ~1UL); | ||
327 | } | ||
328 | |||
329 | static bool jump_entry_branch(struct jump_entry *entry) | ||
330 | { | ||
331 | return (unsigned long)entry->key & 1UL; | ||
332 | } | ||
333 | |||
334 | /*** | 349 | /*** |
335 | * A 'struct static_key' uses a union such that it either points directly | 350 | * A 'struct static_key' uses a union such that it either points directly |
336 | * to a table of 'struct jump_entry' or to a linked list of modules which in | 351 | * to a table of 'struct jump_entry' or to a linked list of modules which in |
@@ -355,7 +370,7 @@ static enum jump_label_type jump_label_type(struct jump_entry *entry) | |||
355 | { | 370 | { |
356 | struct static_key *key = jump_entry_key(entry); | 371 | struct static_key *key = jump_entry_key(entry); |
357 | bool enabled = static_key_enabled(key); | 372 | bool enabled = static_key_enabled(key); |
358 | bool branch = jump_entry_branch(entry); | 373 | bool branch = jump_entry_is_branch(entry); |
359 | 374 | ||
360 | /* See the comment in linux/jump_label.h */ | 375 | /* See the comment in linux/jump_label.h */ |
361 | return enabled ^ branch; | 376 | return enabled ^ branch; |
@@ -363,19 +378,20 @@ static enum jump_label_type jump_label_type(struct jump_entry *entry) | |||
363 | 378 | ||
364 | static void __jump_label_update(struct static_key *key, | 379 | static void __jump_label_update(struct static_key *key, |
365 | struct jump_entry *entry, | 380 | struct jump_entry *entry, |
366 | struct jump_entry *stop) | 381 | struct jump_entry *stop, |
382 | bool init) | ||
367 | { | 383 | { |
368 | for (; (entry < stop) && (jump_entry_key(entry) == key); entry++) { | 384 | for (; (entry < stop) && (jump_entry_key(entry) == key); entry++) { |
369 | /* | 385 | /* |
370 | * An entry->code of 0 indicates an entry which has been | 386 | * An entry->code of 0 indicates an entry which has been |
371 | * disabled because it was in an init text area. | 387 | * disabled because it was in an init text area. |
372 | */ | 388 | */ |
373 | if (entry->code) { | 389 | if (init || !jump_entry_is_init(entry)) { |
374 | if (kernel_text_address(entry->code)) | 390 | if (kernel_text_address(jump_entry_code(entry))) |
375 | arch_jump_label_transform(entry, jump_label_type(entry)); | 391 | arch_jump_label_transform(entry, jump_label_type(entry)); |
376 | else | 392 | else |
377 | WARN_ONCE(1, "can't patch jump_label at %pS", | 393 | WARN_ONCE(1, "can't patch jump_label at %pS", |
378 | (void *)(unsigned long)entry->code); | 394 | (void *)jump_entry_code(entry)); |
379 | } | 395 | } |
380 | } | 396 | } |
381 | } | 397 | } |
@@ -410,6 +426,9 @@ void __init jump_label_init(void) | |||
410 | if (jump_label_type(iter) == JUMP_LABEL_NOP) | 426 | if (jump_label_type(iter) == JUMP_LABEL_NOP) |
411 | arch_jump_label_transform_static(iter, JUMP_LABEL_NOP); | 427 | arch_jump_label_transform_static(iter, JUMP_LABEL_NOP); |
412 | 428 | ||
429 | if (init_section_contains((void *)jump_entry_code(iter), 1)) | ||
430 | jump_entry_set_init(iter); | ||
431 | |||
413 | iterk = jump_entry_key(iter); | 432 | iterk = jump_entry_key(iter); |
414 | if (iterk == key) | 433 | if (iterk == key) |
415 | continue; | 434 | continue; |
@@ -422,26 +441,13 @@ void __init jump_label_init(void) | |||
422 | cpus_read_unlock(); | 441 | cpus_read_unlock(); |
423 | } | 442 | } |
424 | 443 | ||
425 | /* Disable any jump label entries in __init/__exit code */ | ||
426 | void __init jump_label_invalidate_initmem(void) | ||
427 | { | ||
428 | struct jump_entry *iter_start = __start___jump_table; | ||
429 | struct jump_entry *iter_stop = __stop___jump_table; | ||
430 | struct jump_entry *iter; | ||
431 | |||
432 | for (iter = iter_start; iter < iter_stop; iter++) { | ||
433 | if (init_section_contains((void *)(unsigned long)iter->code, 1)) | ||
434 | iter->code = 0; | ||
435 | } | ||
436 | } | ||
437 | |||
438 | #ifdef CONFIG_MODULES | 444 | #ifdef CONFIG_MODULES |
439 | 445 | ||
440 | static enum jump_label_type jump_label_init_type(struct jump_entry *entry) | 446 | static enum jump_label_type jump_label_init_type(struct jump_entry *entry) |
441 | { | 447 | { |
442 | struct static_key *key = jump_entry_key(entry); | 448 | struct static_key *key = jump_entry_key(entry); |
443 | bool type = static_key_type(key); | 449 | bool type = static_key_type(key); |
444 | bool branch = jump_entry_branch(entry); | 450 | bool branch = jump_entry_is_branch(entry); |
445 | 451 | ||
446 | /* See the comment in linux/jump_label.h */ | 452 | /* See the comment in linux/jump_label.h */ |
447 | return type ^ branch; | 453 | return type ^ branch; |
@@ -455,7 +461,7 @@ struct static_key_mod { | |||
455 | 461 | ||
456 | static inline struct static_key_mod *static_key_mod(struct static_key *key) | 462 | static inline struct static_key_mod *static_key_mod(struct static_key *key) |
457 | { | 463 | { |
458 | WARN_ON_ONCE(!(key->type & JUMP_TYPE_LINKED)); | 464 | WARN_ON_ONCE(!static_key_linked(key)); |
459 | return (struct static_key_mod *)(key->type & ~JUMP_TYPE_MASK); | 465 | return (struct static_key_mod *)(key->type & ~JUMP_TYPE_MASK); |
460 | } | 466 | } |
461 | 467 | ||
@@ -514,7 +520,8 @@ static void __jump_label_mod_update(struct static_key *key) | |||
514 | stop = __stop___jump_table; | 520 | stop = __stop___jump_table; |
515 | else | 521 | else |
516 | stop = m->jump_entries + m->num_jump_entries; | 522 | stop = m->jump_entries + m->num_jump_entries; |
517 | __jump_label_update(key, mod->entries, stop); | 523 | __jump_label_update(key, mod->entries, stop, |
524 | m && m->state == MODULE_STATE_COMING); | ||
518 | } | 525 | } |
519 | } | 526 | } |
520 | 527 | ||
@@ -560,12 +567,15 @@ static int jump_label_add_module(struct module *mod) | |||
560 | for (iter = iter_start; iter < iter_stop; iter++) { | 567 | for (iter = iter_start; iter < iter_stop; iter++) { |
561 | struct static_key *iterk; | 568 | struct static_key *iterk; |
562 | 569 | ||
570 | if (within_module_init(jump_entry_code(iter), mod)) | ||
571 | jump_entry_set_init(iter); | ||
572 | |||
563 | iterk = jump_entry_key(iter); | 573 | iterk = jump_entry_key(iter); |
564 | if (iterk == key) | 574 | if (iterk == key) |
565 | continue; | 575 | continue; |
566 | 576 | ||
567 | key = iterk; | 577 | key = iterk; |
568 | if (within_module(iter->key, mod)) { | 578 | if (within_module((unsigned long)key, mod)) { |
569 | static_key_set_entries(key, iter); | 579 | static_key_set_entries(key, iter); |
570 | continue; | 580 | continue; |
571 | } | 581 | } |
@@ -595,7 +605,7 @@ static int jump_label_add_module(struct module *mod) | |||
595 | 605 | ||
596 | /* Only update if we've changed from our initial state */ | 606 | /* Only update if we've changed from our initial state */ |
597 | if (jump_label_type(iter) != jump_label_init_type(iter)) | 607 | if (jump_label_type(iter) != jump_label_init_type(iter)) |
598 | __jump_label_update(key, iter, iter_stop); | 608 | __jump_label_update(key, iter, iter_stop, true); |
599 | } | 609 | } |
600 | 610 | ||
601 | return 0; | 611 | return 0; |
@@ -615,7 +625,7 @@ static void jump_label_del_module(struct module *mod) | |||
615 | 625 | ||
616 | key = jump_entry_key(iter); | 626 | key = jump_entry_key(iter); |
617 | 627 | ||
618 | if (within_module(iter->key, mod)) | 628 | if (within_module((unsigned long)key, mod)) |
619 | continue; | 629 | continue; |
620 | 630 | ||
621 | /* No memory during module load */ | 631 | /* No memory during module load */ |
@@ -651,19 +661,6 @@ static void jump_label_del_module(struct module *mod) | |||
651 | } | 661 | } |
652 | } | 662 | } |
653 | 663 | ||
654 | /* Disable any jump label entries in module init code */ | ||
655 | static void jump_label_invalidate_module_init(struct module *mod) | ||
656 | { | ||
657 | struct jump_entry *iter_start = mod->jump_entries; | ||
658 | struct jump_entry *iter_stop = iter_start + mod->num_jump_entries; | ||
659 | struct jump_entry *iter; | ||
660 | |||
661 | for (iter = iter_start; iter < iter_stop; iter++) { | ||
662 | if (within_module_init(iter->code, mod)) | ||
663 | iter->code = 0; | ||
664 | } | ||
665 | } | ||
666 | |||
667 | static int | 664 | static int |
668 | jump_label_module_notify(struct notifier_block *self, unsigned long val, | 665 | jump_label_module_notify(struct notifier_block *self, unsigned long val, |
669 | void *data) | 666 | void *data) |
@@ -685,9 +682,6 @@ jump_label_module_notify(struct notifier_block *self, unsigned long val, | |||
685 | case MODULE_STATE_GOING: | 682 | case MODULE_STATE_GOING: |
686 | jump_label_del_module(mod); | 683 | jump_label_del_module(mod); |
687 | break; | 684 | break; |
688 | case MODULE_STATE_LIVE: | ||
689 | jump_label_invalidate_module_init(mod); | ||
690 | break; | ||
691 | } | 685 | } |
692 | 686 | ||
693 | jump_label_unlock(); | 687 | jump_label_unlock(); |
@@ -757,7 +751,8 @@ static void jump_label_update(struct static_key *key) | |||
757 | entry = static_key_entries(key); | 751 | entry = static_key_entries(key); |
758 | /* if there are no users, entry can be NULL */ | 752 | /* if there are no users, entry can be NULL */ |
759 | if (entry) | 753 | if (entry) |
760 | __jump_label_update(key, entry, stop); | 754 | __jump_label_update(key, entry, stop, |
755 | system_state < SYSTEM_RUNNING); | ||
761 | } | 756 | } |
762 | 757 | ||
763 | #ifdef CONFIG_STATIC_KEYS_SELFTEST | 758 | #ifdef CONFIG_STATIC_KEYS_SELFTEST |
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index dd13f865ad40..1efada2dd9dd 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c | |||
@@ -138,7 +138,7 @@ static struct lock_list list_entries[MAX_LOCKDEP_ENTRIES]; | |||
138 | * get freed - this significantly simplifies the debugging code. | 138 | * get freed - this significantly simplifies the debugging code. |
139 | */ | 139 | */ |
140 | unsigned long nr_lock_classes; | 140 | unsigned long nr_lock_classes; |
141 | static struct lock_class lock_classes[MAX_LOCKDEP_KEYS]; | 141 | struct lock_class lock_classes[MAX_LOCKDEP_KEYS]; |
142 | 142 | ||
143 | static inline struct lock_class *hlock_class(struct held_lock *hlock) | 143 | static inline struct lock_class *hlock_class(struct held_lock *hlock) |
144 | { | 144 | { |
@@ -1391,7 +1391,9 @@ static void print_lock_class_header(struct lock_class *class, int depth) | |||
1391 | 1391 | ||
1392 | printk("%*s->", depth, ""); | 1392 | printk("%*s->", depth, ""); |
1393 | print_lock_name(class); | 1393 | print_lock_name(class); |
1394 | printk(KERN_CONT " ops: %lu", class->ops); | 1394 | #ifdef CONFIG_DEBUG_LOCKDEP |
1395 | printk(KERN_CONT " ops: %lu", debug_class_ops_read(class)); | ||
1396 | #endif | ||
1395 | printk(KERN_CONT " {\n"); | 1397 | printk(KERN_CONT " {\n"); |
1396 | 1398 | ||
1397 | for (bit = 0; bit < LOCK_USAGE_STATES; bit++) { | 1399 | for (bit = 0; bit < LOCK_USAGE_STATES; bit++) { |
@@ -2148,76 +2150,6 @@ static int check_no_collision(struct task_struct *curr, | |||
2148 | } | 2150 | } |
2149 | 2151 | ||
2150 | /* | 2152 | /* |
2151 | * This is for building a chain between just two different classes, | ||
2152 | * instead of adding a new hlock upon current, which is done by | ||
2153 | * add_chain_cache(). | ||
2154 | * | ||
2155 | * This can be called in any context with two classes, while | ||
2156 | * add_chain_cache() must be done within the lock owener's context | ||
2157 | * since it uses hlock which might be racy in another context. | ||
2158 | */ | ||
2159 | static inline int add_chain_cache_classes(unsigned int prev, | ||
2160 | unsigned int next, | ||
2161 | unsigned int irq_context, | ||
2162 | u64 chain_key) | ||
2163 | { | ||
2164 | struct hlist_head *hash_head = chainhashentry(chain_key); | ||
2165 | struct lock_chain *chain; | ||
2166 | |||
2167 | /* | ||
2168 | * Allocate a new chain entry from the static array, and add | ||
2169 | * it to the hash: | ||
2170 | */ | ||
2171 | |||
2172 | /* | ||
2173 | * We might need to take the graph lock, ensure we've got IRQs | ||
2174 | * disabled to make this an IRQ-safe lock.. for recursion reasons | ||
2175 | * lockdep won't complain about its own locking errors. | ||
2176 | */ | ||
2177 | if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) | ||
2178 | return 0; | ||
2179 | |||
2180 | if (unlikely(nr_lock_chains >= MAX_LOCKDEP_CHAINS)) { | ||
2181 | if (!debug_locks_off_graph_unlock()) | ||
2182 | return 0; | ||
2183 | |||
2184 | print_lockdep_off("BUG: MAX_LOCKDEP_CHAINS too low!"); | ||
2185 | dump_stack(); | ||
2186 | return 0; | ||
2187 | } | ||
2188 | |||
2189 | chain = lock_chains + nr_lock_chains++; | ||
2190 | chain->chain_key = chain_key; | ||
2191 | chain->irq_context = irq_context; | ||
2192 | chain->depth = 2; | ||
2193 | if (likely(nr_chain_hlocks + chain->depth <= MAX_LOCKDEP_CHAIN_HLOCKS)) { | ||
2194 | chain->base = nr_chain_hlocks; | ||
2195 | nr_chain_hlocks += chain->depth; | ||
2196 | chain_hlocks[chain->base] = prev - 1; | ||
2197 | chain_hlocks[chain->base + 1] = next -1; | ||
2198 | } | ||
2199 | #ifdef CONFIG_DEBUG_LOCKDEP | ||
2200 | /* | ||
2201 | * Important for check_no_collision(). | ||
2202 | */ | ||
2203 | else { | ||
2204 | if (!debug_locks_off_graph_unlock()) | ||
2205 | return 0; | ||
2206 | |||
2207 | print_lockdep_off("BUG: MAX_LOCKDEP_CHAIN_HLOCKS too low!"); | ||
2208 | dump_stack(); | ||
2209 | return 0; | ||
2210 | } | ||
2211 | #endif | ||
2212 | |||
2213 | hlist_add_head_rcu(&chain->entry, hash_head); | ||
2214 | debug_atomic_inc(chain_lookup_misses); | ||
2215 | inc_chains(); | ||
2216 | |||
2217 | return 1; | ||
2218 | } | ||
2219 | |||
2220 | /* | ||
2221 | * Adds a dependency chain into chain hashtable. And must be called with | 2153 | * Adds a dependency chain into chain hashtable. And must be called with |
2222 | * graph_lock held. | 2154 | * graph_lock held. |
2223 | * | 2155 | * |
@@ -3262,6 +3194,10 @@ static int __lock_is_held(const struct lockdep_map *lock, int read); | |||
3262 | /* | 3194 | /* |
3263 | * This gets called for every mutex_lock*()/spin_lock*() operation. | 3195 | * This gets called for every mutex_lock*()/spin_lock*() operation. |
3264 | * We maintain the dependency maps and validate the locking attempt: | 3196 | * We maintain the dependency maps and validate the locking attempt: |
3197 | * | ||
3198 | * The callers must make sure that IRQs are disabled before calling it, | ||
3199 | * otherwise we could get an interrupt which would want to take locks, | ||
3200 | * which would end up in lockdep again. | ||
3265 | */ | 3201 | */ |
3266 | static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, | 3202 | static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, |
3267 | int trylock, int read, int check, int hardirqs_off, | 3203 | int trylock, int read, int check, int hardirqs_off, |
@@ -3279,14 +3215,6 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, | |||
3279 | if (unlikely(!debug_locks)) | 3215 | if (unlikely(!debug_locks)) |
3280 | return 0; | 3216 | return 0; |
3281 | 3217 | ||
3282 | /* | ||
3283 | * Lockdep should run with IRQs disabled, otherwise we could | ||
3284 | * get an interrupt which would want to take locks, which would | ||
3285 | * end up in lockdep and have you got a head-ache already? | ||
3286 | */ | ||
3287 | if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) | ||
3288 | return 0; | ||
3289 | |||
3290 | if (!prove_locking || lock->key == &__lockdep_no_validate__) | 3218 | if (!prove_locking || lock->key == &__lockdep_no_validate__) |
3291 | check = 0; | 3219 | check = 0; |
3292 | 3220 | ||
@@ -3300,7 +3228,9 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, | |||
3300 | if (!class) | 3228 | if (!class) |
3301 | return 0; | 3229 | return 0; |
3302 | } | 3230 | } |
3303 | atomic_inc((atomic_t *)&class->ops); | 3231 | |
3232 | debug_class_ops_inc(class); | ||
3233 | |||
3304 | if (very_verbose(class)) { | 3234 | if (very_verbose(class)) { |
3305 | printk("\nacquire class [%px] %s", class->key, class->name); | 3235 | printk("\nacquire class [%px] %s", class->key, class->name); |
3306 | if (class->name_version > 1) | 3236 | if (class->name_version > 1) |
@@ -3543,6 +3473,9 @@ static int reacquire_held_locks(struct task_struct *curr, unsigned int depth, | |||
3543 | { | 3473 | { |
3544 | struct held_lock *hlock; | 3474 | struct held_lock *hlock; |
3545 | 3475 | ||
3476 | if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) | ||
3477 | return 0; | ||
3478 | |||
3546 | for (hlock = curr->held_locks + idx; idx < depth; idx++, hlock++) { | 3479 | for (hlock = curr->held_locks + idx; idx < depth; idx++, hlock++) { |
3547 | if (!__lock_acquire(hlock->instance, | 3480 | if (!__lock_acquire(hlock->instance, |
3548 | hlock_class(hlock)->subclass, | 3481 | hlock_class(hlock)->subclass, |
@@ -3696,6 +3629,13 @@ __lock_release(struct lockdep_map *lock, int nested, unsigned long ip) | |||
3696 | curr->lockdep_depth = i; | 3629 | curr->lockdep_depth = i; |
3697 | curr->curr_chain_key = hlock->prev_chain_key; | 3630 | curr->curr_chain_key = hlock->prev_chain_key; |
3698 | 3631 | ||
3632 | /* | ||
3633 | * The most likely case is when the unlock is on the innermost | ||
3634 | * lock. In this case, we are done! | ||
3635 | */ | ||
3636 | if (i == depth-1) | ||
3637 | return 1; | ||
3638 | |||
3699 | if (reacquire_held_locks(curr, depth, i + 1)) | 3639 | if (reacquire_held_locks(curr, depth, i + 1)) |
3700 | return 0; | 3640 | return 0; |
3701 | 3641 | ||
@@ -3703,10 +3643,14 @@ __lock_release(struct lockdep_map *lock, int nested, unsigned long ip) | |||
3703 | * We had N bottles of beer on the wall, we drank one, but now | 3643 | * We had N bottles of beer on the wall, we drank one, but now |
3704 | * there's not N-1 bottles of beer left on the wall... | 3644 | * there's not N-1 bottles of beer left on the wall... |
3705 | */ | 3645 | */ |
3706 | if (DEBUG_LOCKS_WARN_ON(curr->lockdep_depth != depth - 1)) | 3646 | DEBUG_LOCKS_WARN_ON(curr->lockdep_depth != depth-1); |
3707 | return 0; | ||
3708 | 3647 | ||
3709 | return 1; | 3648 | /* |
3649 | * Since reacquire_held_locks() would have called check_chain_key() | ||
3650 | * indirectly via __lock_acquire(), we don't need to do it again | ||
3651 | * on return. | ||
3652 | */ | ||
3653 | return 0; | ||
3710 | } | 3654 | } |
3711 | 3655 | ||
3712 | static int __lock_is_held(const struct lockdep_map *lock, int read) | 3656 | static int __lock_is_held(const struct lockdep_map *lock, int read) |
@@ -4122,7 +4066,7 @@ void lock_contended(struct lockdep_map *lock, unsigned long ip) | |||
4122 | { | 4066 | { |
4123 | unsigned long flags; | 4067 | unsigned long flags; |
4124 | 4068 | ||
4125 | if (unlikely(!lock_stat)) | 4069 | if (unlikely(!lock_stat || !debug_locks)) |
4126 | return; | 4070 | return; |
4127 | 4071 | ||
4128 | if (unlikely(current->lockdep_recursion)) | 4072 | if (unlikely(current->lockdep_recursion)) |
@@ -4142,7 +4086,7 @@ void lock_acquired(struct lockdep_map *lock, unsigned long ip) | |||
4142 | { | 4086 | { |
4143 | unsigned long flags; | 4087 | unsigned long flags; |
4144 | 4088 | ||
4145 | if (unlikely(!lock_stat)) | 4089 | if (unlikely(!lock_stat || !debug_locks)) |
4146 | return; | 4090 | return; |
4147 | 4091 | ||
4148 | if (unlikely(current->lockdep_recursion)) | 4092 | if (unlikely(current->lockdep_recursion)) |
diff --git a/kernel/locking/lockdep_internals.h b/kernel/locking/lockdep_internals.h index d459d624ba2a..88c847a41c8a 100644 --- a/kernel/locking/lockdep_internals.h +++ b/kernel/locking/lockdep_internals.h | |||
@@ -152,9 +152,15 @@ struct lockdep_stats { | |||
152 | int nr_find_usage_forwards_recursions; | 152 | int nr_find_usage_forwards_recursions; |
153 | int nr_find_usage_backwards_checks; | 153 | int nr_find_usage_backwards_checks; |
154 | int nr_find_usage_backwards_recursions; | 154 | int nr_find_usage_backwards_recursions; |
155 | |||
156 | /* | ||
157 | * Per lock class locking operation stat counts | ||
158 | */ | ||
159 | unsigned long lock_class_ops[MAX_LOCKDEP_KEYS]; | ||
155 | }; | 160 | }; |
156 | 161 | ||
157 | DECLARE_PER_CPU(struct lockdep_stats, lockdep_stats); | 162 | DECLARE_PER_CPU(struct lockdep_stats, lockdep_stats); |
163 | extern struct lock_class lock_classes[MAX_LOCKDEP_KEYS]; | ||
158 | 164 | ||
159 | #define __debug_atomic_inc(ptr) \ | 165 | #define __debug_atomic_inc(ptr) \ |
160 | this_cpu_inc(lockdep_stats.ptr); | 166 | this_cpu_inc(lockdep_stats.ptr); |
@@ -179,9 +185,30 @@ DECLARE_PER_CPU(struct lockdep_stats, lockdep_stats); | |||
179 | } \ | 185 | } \ |
180 | __total; \ | 186 | __total; \ |
181 | }) | 187 | }) |
188 | |||
189 | static inline void debug_class_ops_inc(struct lock_class *class) | ||
190 | { | ||
191 | int idx; | ||
192 | |||
193 | idx = class - lock_classes; | ||
194 | __debug_atomic_inc(lock_class_ops[idx]); | ||
195 | } | ||
196 | |||
197 | static inline unsigned long debug_class_ops_read(struct lock_class *class) | ||
198 | { | ||
199 | int idx, cpu; | ||
200 | unsigned long ops = 0; | ||
201 | |||
202 | idx = class - lock_classes; | ||
203 | for_each_possible_cpu(cpu) | ||
204 | ops += per_cpu(lockdep_stats.lock_class_ops[idx], cpu); | ||
205 | return ops; | ||
206 | } | ||
207 | |||
182 | #else | 208 | #else |
183 | # define __debug_atomic_inc(ptr) do { } while (0) | 209 | # define __debug_atomic_inc(ptr) do { } while (0) |
184 | # define debug_atomic_inc(ptr) do { } while (0) | 210 | # define debug_atomic_inc(ptr) do { } while (0) |
185 | # define debug_atomic_dec(ptr) do { } while (0) | 211 | # define debug_atomic_dec(ptr) do { } while (0) |
186 | # define debug_atomic_read(ptr) 0 | 212 | # define debug_atomic_read(ptr) 0 |
213 | # define debug_class_ops_inc(ptr) do { } while (0) | ||
187 | #endif | 214 | #endif |
diff --git a/kernel/locking/lockdep_proc.c b/kernel/locking/lockdep_proc.c index 3dd980dfba2d..3d31f9b0059e 100644 --- a/kernel/locking/lockdep_proc.c +++ b/kernel/locking/lockdep_proc.c | |||
@@ -68,7 +68,7 @@ static int l_show(struct seq_file *m, void *v) | |||
68 | 68 | ||
69 | seq_printf(m, "%p", class->key); | 69 | seq_printf(m, "%p", class->key); |
70 | #ifdef CONFIG_DEBUG_LOCKDEP | 70 | #ifdef CONFIG_DEBUG_LOCKDEP |
71 | seq_printf(m, " OPS:%8ld", class->ops); | 71 | seq_printf(m, " OPS:%8ld", debug_class_ops_read(class)); |
72 | #endif | 72 | #endif |
73 | #ifdef CONFIG_PROVE_LOCKING | 73 | #ifdef CONFIG_PROVE_LOCKING |
74 | seq_printf(m, " FD:%5ld", lockdep_count_forward_deps(class)); | 74 | seq_printf(m, " FD:%5ld", lockdep_count_forward_deps(class)); |
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index bfaeb05123ff..8a8c3c208c5e 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c | |||
@@ -74,12 +74,24 @@ | |||
74 | */ | 74 | */ |
75 | 75 | ||
76 | #include "mcs_spinlock.h" | 76 | #include "mcs_spinlock.h" |
77 | #define MAX_NODES 4 | ||
77 | 78 | ||
79 | /* | ||
80 | * On 64-bit architectures, the mcs_spinlock structure will be 16 bytes in | ||
81 | * size and four of them will fit nicely in one 64-byte cacheline. For | ||
82 | * pvqspinlock, however, we need more space for extra data. To accommodate | ||
83 | * that, we insert two more long words to pad it up to 32 bytes. IOW, only | ||
84 | * two of them can fit in a cacheline in this case. That is OK as it is rare | ||
85 | * to have more than 2 levels of slowpath nesting in actual use. We don't | ||
86 | * want to penalize pvqspinlocks to optimize for a rare case in native | ||
87 | * qspinlocks. | ||
88 | */ | ||
89 | struct qnode { | ||
90 | struct mcs_spinlock mcs; | ||
78 | #ifdef CONFIG_PARAVIRT_SPINLOCKS | 91 | #ifdef CONFIG_PARAVIRT_SPINLOCKS |
79 | #define MAX_NODES 8 | 92 | long reserved[2]; |
80 | #else | ||
81 | #define MAX_NODES 4 | ||
82 | #endif | 93 | #endif |
94 | }; | ||
83 | 95 | ||
84 | /* | 96 | /* |
85 | * The pending bit spinning loop count. | 97 | * The pending bit spinning loop count. |
@@ -101,7 +113,7 @@ | |||
101 | * | 113 | * |
102 | * PV doubles the storage and uses the second cacheline for PV state. | 114 | * PV doubles the storage and uses the second cacheline for PV state. |
103 | */ | 115 | */ |
104 | static DEFINE_PER_CPU_ALIGNED(struct mcs_spinlock, mcs_nodes[MAX_NODES]); | 116 | static DEFINE_PER_CPU_ALIGNED(struct qnode, qnodes[MAX_NODES]); |
105 | 117 | ||
106 | /* | 118 | /* |
107 | * We must be able to distinguish between no-tail and the tail at 0:0, | 119 | * We must be able to distinguish between no-tail and the tail at 0:0, |
@@ -126,7 +138,13 @@ static inline __pure struct mcs_spinlock *decode_tail(u32 tail) | |||
126 | int cpu = (tail >> _Q_TAIL_CPU_OFFSET) - 1; | 138 | int cpu = (tail >> _Q_TAIL_CPU_OFFSET) - 1; |
127 | int idx = (tail & _Q_TAIL_IDX_MASK) >> _Q_TAIL_IDX_OFFSET; | 139 | int idx = (tail & _Q_TAIL_IDX_MASK) >> _Q_TAIL_IDX_OFFSET; |
128 | 140 | ||
129 | return per_cpu_ptr(&mcs_nodes[idx], cpu); | 141 | return per_cpu_ptr(&qnodes[idx].mcs, cpu); |
142 | } | ||
143 | |||
144 | static inline __pure | ||
145 | struct mcs_spinlock *grab_mcs_node(struct mcs_spinlock *base, int idx) | ||
146 | { | ||
147 | return &((struct qnode *)base + idx)->mcs; | ||
130 | } | 148 | } |
131 | 149 | ||
132 | #define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK | _Q_PENDING_MASK) | 150 | #define _Q_LOCKED_PENDING_MASK (_Q_LOCKED_MASK | _Q_PENDING_MASK) |
@@ -232,6 +250,20 @@ static __always_inline u32 xchg_tail(struct qspinlock *lock, u32 tail) | |||
232 | #endif /* _Q_PENDING_BITS == 8 */ | 250 | #endif /* _Q_PENDING_BITS == 8 */ |
233 | 251 | ||
234 | /** | 252 | /** |
253 | * queued_fetch_set_pending_acquire - fetch the whole lock value and set pending | ||
254 | * @lock : Pointer to queued spinlock structure | ||
255 | * Return: The previous lock value | ||
256 | * | ||
257 | * *,*,* -> *,1,* | ||
258 | */ | ||
259 | #ifndef queued_fetch_set_pending_acquire | ||
260 | static __always_inline u32 queued_fetch_set_pending_acquire(struct qspinlock *lock) | ||
261 | { | ||
262 | return atomic_fetch_or_acquire(_Q_PENDING_VAL, &lock->val); | ||
263 | } | ||
264 | #endif | ||
265 | |||
266 | /** | ||
235 | * set_locked - Set the lock bit and own the lock | 267 | * set_locked - Set the lock bit and own the lock |
236 | * @lock: Pointer to queued spinlock structure | 268 | * @lock: Pointer to queued spinlock structure |
237 | * | 269 | * |
@@ -326,43 +358,48 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) | |||
326 | /* | 358 | /* |
327 | * trylock || pending | 359 | * trylock || pending |
328 | * | 360 | * |
329 | * 0,0,0 -> 0,0,1 ; trylock | 361 | * 0,0,* -> 0,1,* -> 0,0,1 pending, trylock |
330 | * 0,0,1 -> 0,1,1 ; pending | ||
331 | */ | 362 | */ |
332 | val = atomic_fetch_or_acquire(_Q_PENDING_VAL, &lock->val); | 363 | val = queued_fetch_set_pending_acquire(lock); |
333 | if (!(val & ~_Q_LOCKED_MASK)) { | ||
334 | /* | ||
335 | * We're pending, wait for the owner to go away. | ||
336 | * | ||
337 | * *,1,1 -> *,1,0 | ||
338 | * | ||
339 | * this wait loop must be a load-acquire such that we match the | ||
340 | * store-release that clears the locked bit and create lock | ||
341 | * sequentiality; this is because not all | ||
342 | * clear_pending_set_locked() implementations imply full | ||
343 | * barriers. | ||
344 | */ | ||
345 | if (val & _Q_LOCKED_MASK) { | ||
346 | atomic_cond_read_acquire(&lock->val, | ||
347 | !(VAL & _Q_LOCKED_MASK)); | ||
348 | } | ||
349 | 364 | ||
350 | /* | 365 | /* |
351 | * take ownership and clear the pending bit. | 366 | * If we observe contention, there is a concurrent locker. |
352 | * | 367 | * |
353 | * *,1,0 -> *,0,1 | 368 | * Undo and queue; our setting of PENDING might have made the |
354 | */ | 369 | * n,0,0 -> 0,0,0 transition fail and it will now be waiting |
355 | clear_pending_set_locked(lock); | 370 | * on @next to become !NULL. |
356 | qstat_inc(qstat_lock_pending, true); | 371 | */ |
357 | return; | 372 | if (unlikely(val & ~_Q_LOCKED_MASK)) { |
373 | |||
374 | /* Undo PENDING if we set it. */ | ||
375 | if (!(val & _Q_PENDING_MASK)) | ||
376 | clear_pending(lock); | ||
377 | |||
378 | goto queue; | ||
358 | } | 379 | } |
359 | 380 | ||
360 | /* | 381 | /* |
361 | * If pending was clear but there are waiters in the queue, then | 382 | * We're pending, wait for the owner to go away. |
362 | * we need to undo our setting of pending before we queue ourselves. | 383 | * |
384 | * 0,1,1 -> 0,1,0 | ||
385 | * | ||
386 | * this wait loop must be a load-acquire such that we match the | ||
387 | * store-release that clears the locked bit and create lock | ||
388 | * sequentiality; this is because not all | ||
389 | * clear_pending_set_locked() implementations imply full | ||
390 | * barriers. | ||
391 | */ | ||
392 | if (val & _Q_LOCKED_MASK) | ||
393 | atomic_cond_read_acquire(&lock->val, !(VAL & _Q_LOCKED_MASK)); | ||
394 | |||
395 | /* | ||
396 | * take ownership and clear the pending bit. | ||
397 | * | ||
398 | * 0,1,0 -> 0,0,1 | ||
363 | */ | 399 | */ |
364 | if (!(val & _Q_PENDING_MASK)) | 400 | clear_pending_set_locked(lock); |
365 | clear_pending(lock); | 401 | qstat_inc(qstat_lock_pending, true); |
402 | return; | ||
366 | 403 | ||
367 | /* | 404 | /* |
368 | * End of pending bit optimistic spinning and beginning of MCS | 405 | * End of pending bit optimistic spinning and beginning of MCS |
@@ -371,11 +408,16 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) | |||
371 | queue: | 408 | queue: |
372 | qstat_inc(qstat_lock_slowpath, true); | 409 | qstat_inc(qstat_lock_slowpath, true); |
373 | pv_queue: | 410 | pv_queue: |
374 | node = this_cpu_ptr(&mcs_nodes[0]); | 411 | node = this_cpu_ptr(&qnodes[0].mcs); |
375 | idx = node->count++; | 412 | idx = node->count++; |
376 | tail = encode_tail(smp_processor_id(), idx); | 413 | tail = encode_tail(smp_processor_id(), idx); |
377 | 414 | ||
378 | node += idx; | 415 | node = grab_mcs_node(node, idx); |
416 | |||
417 | /* | ||
418 | * Keep counts of non-zero index values: | ||
419 | */ | ||
420 | qstat_inc(qstat_lock_idx1 + idx - 1, idx); | ||
379 | 421 | ||
380 | /* | 422 | /* |
381 | * Ensure that we increment the head node->count before initialising | 423 | * Ensure that we increment the head node->count before initialising |
@@ -476,16 +518,25 @@ locked: | |||
476 | */ | 518 | */ |
477 | 519 | ||
478 | /* | 520 | /* |
479 | * In the PV case we might already have _Q_LOCKED_VAL set. | 521 | * In the PV case we might already have _Q_LOCKED_VAL set, because |
522 | * of lock stealing; therefore we must also allow: | ||
523 | * | ||
524 | * n,0,1 -> 0,0,1 | ||
480 | * | 525 | * |
481 | * The atomic_cond_read_acquire() call above has provided the | 526 | * Note: at this point: (val & _Q_PENDING_MASK) == 0, because of the |
482 | * necessary acquire semantics required for locking. | 527 | * above wait condition, therefore any concurrent setting of |
528 | * PENDING will make the uncontended transition fail. | ||
483 | */ | 529 | */ |
484 | if (((val & _Q_TAIL_MASK) == tail) && | 530 | if ((val & _Q_TAIL_MASK) == tail) { |
485 | atomic_try_cmpxchg_relaxed(&lock->val, &val, _Q_LOCKED_VAL)) | 531 | if (atomic_try_cmpxchg_relaxed(&lock->val, &val, _Q_LOCKED_VAL)) |
486 | goto release; /* No contention */ | 532 | goto release; /* No contention */ |
533 | } | ||
487 | 534 | ||
488 | /* Either somebody is queued behind us or _Q_PENDING_VAL is set */ | 535 | /* |
536 | * Either somebody is queued behind us or _Q_PENDING_VAL got set | ||
537 | * which will then detect the remaining tail and queue behind us | ||
538 | * ensuring we'll see a @next. | ||
539 | */ | ||
489 | set_locked(lock); | 540 | set_locked(lock); |
490 | 541 | ||
491 | /* | 542 | /* |
@@ -501,7 +552,7 @@ release: | |||
501 | /* | 552 | /* |
502 | * release the node | 553 | * release the node |
503 | */ | 554 | */ |
504 | __this_cpu_dec(mcs_nodes[0].count); | 555 | __this_cpu_dec(qnodes[0].mcs.count); |
505 | } | 556 | } |
506 | EXPORT_SYMBOL(queued_spin_lock_slowpath); | 557 | EXPORT_SYMBOL(queued_spin_lock_slowpath); |
507 | 558 | ||
diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h index 5a0cf5f9008c..0130e488ebfe 100644 --- a/kernel/locking/qspinlock_paravirt.h +++ b/kernel/locking/qspinlock_paravirt.h | |||
@@ -49,8 +49,6 @@ enum vcpu_state { | |||
49 | 49 | ||
50 | struct pv_node { | 50 | struct pv_node { |
51 | struct mcs_spinlock mcs; | 51 | struct mcs_spinlock mcs; |
52 | struct mcs_spinlock __res[3]; | ||
53 | |||
54 | int cpu; | 52 | int cpu; |
55 | u8 state; | 53 | u8 state; |
56 | }; | 54 | }; |
@@ -281,7 +279,7 @@ static void pv_init_node(struct mcs_spinlock *node) | |||
281 | { | 279 | { |
282 | struct pv_node *pn = (struct pv_node *)node; | 280 | struct pv_node *pn = (struct pv_node *)node; |
283 | 281 | ||
284 | BUILD_BUG_ON(sizeof(struct pv_node) > 5*sizeof(struct mcs_spinlock)); | 282 | BUILD_BUG_ON(sizeof(struct pv_node) > sizeof(struct qnode)); |
285 | 283 | ||
286 | pn->cpu = smp_processor_id(); | 284 | pn->cpu = smp_processor_id(); |
287 | pn->state = vcpu_running; | 285 | pn->state = vcpu_running; |
diff --git a/kernel/locking/qspinlock_stat.h b/kernel/locking/qspinlock_stat.h index 6bd78c0740fc..42d3d8dc8f49 100644 --- a/kernel/locking/qspinlock_stat.h +++ b/kernel/locking/qspinlock_stat.h | |||
@@ -55,6 +55,9 @@ enum qlock_stats { | |||
55 | qstat_pv_wait_node, | 55 | qstat_pv_wait_node, |
56 | qstat_lock_pending, | 56 | qstat_lock_pending, |
57 | qstat_lock_slowpath, | 57 | qstat_lock_slowpath, |
58 | qstat_lock_idx1, | ||
59 | qstat_lock_idx2, | ||
60 | qstat_lock_idx3, | ||
58 | qstat_num, /* Total number of statistical counters */ | 61 | qstat_num, /* Total number of statistical counters */ |
59 | qstat_reset_cnts = qstat_num, | 62 | qstat_reset_cnts = qstat_num, |
60 | }; | 63 | }; |
@@ -82,6 +85,9 @@ static const char * const qstat_names[qstat_num + 1] = { | |||
82 | [qstat_pv_wait_node] = "pv_wait_node", | 85 | [qstat_pv_wait_node] = "pv_wait_node", |
83 | [qstat_lock_pending] = "lock_pending", | 86 | [qstat_lock_pending] = "lock_pending", |
84 | [qstat_lock_slowpath] = "lock_slowpath", | 87 | [qstat_lock_slowpath] = "lock_slowpath", |
88 | [qstat_lock_idx1] = "lock_index1", | ||
89 | [qstat_lock_idx2] = "lock_index2", | ||
90 | [qstat_lock_idx3] = "lock_index3", | ||
85 | [qstat_reset_cnts] = "reset_counters", | 91 | [qstat_reset_cnts] = "reset_counters", |
86 | }; | 92 | }; |
87 | 93 | ||
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 2823d4163a37..581edcc63c26 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c | |||
@@ -1485,9 +1485,9 @@ void __sched rt_mutex_lock_nested(struct rt_mutex *lock, unsigned int subclass) | |||
1485 | __rt_mutex_lock(lock, subclass); | 1485 | __rt_mutex_lock(lock, subclass); |
1486 | } | 1486 | } |
1487 | EXPORT_SYMBOL_GPL(rt_mutex_lock_nested); | 1487 | EXPORT_SYMBOL_GPL(rt_mutex_lock_nested); |
1488 | #endif | ||
1489 | 1488 | ||
1490 | #ifndef CONFIG_DEBUG_LOCK_ALLOC | 1489 | #else /* !CONFIG_DEBUG_LOCK_ALLOC */ |
1490 | |||
1491 | /** | 1491 | /** |
1492 | * rt_mutex_lock - lock a rt_mutex | 1492 | * rt_mutex_lock - lock a rt_mutex |
1493 | * | 1493 | * |
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index 3064c50e181e..09b180063ee1 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c | |||
@@ -180,7 +180,7 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem, | |||
180 | * but it gives the spinners an early indication that the | 180 | * but it gives the spinners an early indication that the |
181 | * readers now have the lock. | 181 | * readers now have the lock. |
182 | */ | 182 | */ |
183 | rwsem_set_reader_owned(sem); | 183 | __rwsem_set_reader_owned(sem, waiter->task); |
184 | } | 184 | } |
185 | 185 | ||
186 | /* | 186 | /* |
@@ -233,8 +233,19 @@ __rwsem_down_read_failed_common(struct rw_semaphore *sem, int state) | |||
233 | waiter.type = RWSEM_WAITING_FOR_READ; | 233 | waiter.type = RWSEM_WAITING_FOR_READ; |
234 | 234 | ||
235 | raw_spin_lock_irq(&sem->wait_lock); | 235 | raw_spin_lock_irq(&sem->wait_lock); |
236 | if (list_empty(&sem->wait_list)) | 236 | if (list_empty(&sem->wait_list)) { |
237 | /* | ||
238 | * In case the wait queue is empty and the lock isn't owned | ||
239 | * by a writer, this reader can exit the slowpath and return | ||
240 | * immediately as its RWSEM_ACTIVE_READ_BIAS has already | ||
241 | * been set in the count. | ||
242 | */ | ||
243 | if (atomic_long_read(&sem->count) >= 0) { | ||
244 | raw_spin_unlock_irq(&sem->wait_lock); | ||
245 | return sem; | ||
246 | } | ||
237 | adjustment += RWSEM_WAITING_BIAS; | 247 | adjustment += RWSEM_WAITING_BIAS; |
248 | } | ||
238 | list_add_tail(&waiter.list, &sem->wait_list); | 249 | list_add_tail(&waiter.list, &sem->wait_list); |
239 | 250 | ||
240 | /* we're now waiting on the lock, but no longer actively locking */ | 251 | /* we're now waiting on the lock, but no longer actively locking */ |
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index 776308d2fa9e..e586f0d03ad3 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c | |||
@@ -117,8 +117,9 @@ EXPORT_SYMBOL(down_write_trylock); | |||
117 | void up_read(struct rw_semaphore *sem) | 117 | void up_read(struct rw_semaphore *sem) |
118 | { | 118 | { |
119 | rwsem_release(&sem->dep_map, 1, _RET_IP_); | 119 | rwsem_release(&sem->dep_map, 1, _RET_IP_); |
120 | DEBUG_RWSEMS_WARN_ON(sem->owner != RWSEM_READER_OWNED); | 120 | DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & RWSEM_READER_OWNED)); |
121 | 121 | ||
122 | rwsem_clear_reader_owned(sem); | ||
122 | __up_read(sem); | 123 | __up_read(sem); |
123 | } | 124 | } |
124 | 125 | ||
@@ -181,7 +182,7 @@ void down_read_non_owner(struct rw_semaphore *sem) | |||
181 | might_sleep(); | 182 | might_sleep(); |
182 | 183 | ||
183 | __down_read(sem); | 184 | __down_read(sem); |
184 | rwsem_set_reader_owned(sem); | 185 | __rwsem_set_reader_owned(sem, NULL); |
185 | } | 186 | } |
186 | 187 | ||
187 | EXPORT_SYMBOL(down_read_non_owner); | 188 | EXPORT_SYMBOL(down_read_non_owner); |
@@ -215,7 +216,7 @@ EXPORT_SYMBOL(down_write_killable_nested); | |||
215 | 216 | ||
216 | void up_read_non_owner(struct rw_semaphore *sem) | 217 | void up_read_non_owner(struct rw_semaphore *sem) |
217 | { | 218 | { |
218 | DEBUG_RWSEMS_WARN_ON(sem->owner != RWSEM_READER_OWNED); | 219 | DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & RWSEM_READER_OWNED)); |
219 | __up_read(sem); | 220 | __up_read(sem); |
220 | } | 221 | } |
221 | 222 | ||
diff --git a/kernel/locking/rwsem.h b/kernel/locking/rwsem.h index b9d0e72aa80f..bad2bca0268b 100644 --- a/kernel/locking/rwsem.h +++ b/kernel/locking/rwsem.h | |||
@@ -1,24 +1,30 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | 1 | /* SPDX-License-Identifier: GPL-2.0 */ |
2 | /* | 2 | /* |
3 | * The owner field of the rw_semaphore structure will be set to | 3 | * The least significant 2 bits of the owner value has the following |
4 | * RWSEM_READER_OWNED when a reader grabs the lock. A writer will clear | 4 | * meanings when set. |
5 | * the owner field when it unlocks. A reader, on the other hand, will | 5 | * - RWSEM_READER_OWNED (bit 0): The rwsem is owned by readers |
6 | * not touch the owner field when it unlocks. | 6 | * - RWSEM_ANONYMOUSLY_OWNED (bit 1): The rwsem is anonymously owned, |
7 | * i.e. the owner(s) cannot be readily determined. It can be reader | ||
8 | * owned or the owning writer is indeterminate. | ||
7 | * | 9 | * |
8 | * In essence, the owner field now has the following 4 states: | 10 | * When a writer acquires a rwsem, it puts its task_struct pointer |
9 | * 1) 0 | 11 | * into the owner field. It is cleared after an unlock. |
10 | * - lock is free or the owner hasn't set the field yet | 12 | * |
11 | * 2) RWSEM_READER_OWNED | 13 | * When a reader acquires a rwsem, it will also puts its task_struct |
12 | * - lock is currently or previously owned by readers (lock is free | 14 | * pointer into the owner field with both the RWSEM_READER_OWNED and |
13 | * or not set by owner yet) | 15 | * RWSEM_ANONYMOUSLY_OWNED bits set. On unlock, the owner field will |
14 | * 3) RWSEM_ANONYMOUSLY_OWNED bit set with some other bits set as well | 16 | * largely be left untouched. So for a free or reader-owned rwsem, |
15 | * - lock is owned by an anonymous writer, so spinning on the lock | 17 | * the owner value may contain information about the last reader that |
16 | * owner should be disabled. | 18 | * acquires the rwsem. The anonymous bit is set because that particular |
17 | * 4) Other non-zero value | 19 | * reader may or may not still own the lock. |
18 | * - a writer owns the lock and other writers can spin on the lock owner. | 20 | * |
21 | * That information may be helpful in debugging cases where the system | ||
22 | * seems to hang on a reader owned rwsem especially if only one reader | ||
23 | * is involved. Ideally we would like to track all the readers that own | ||
24 | * a rwsem, but the overhead is simply too big. | ||
19 | */ | 25 | */ |
20 | #define RWSEM_ANONYMOUSLY_OWNED (1UL << 0) | 26 | #define RWSEM_READER_OWNED (1UL << 0) |
21 | #define RWSEM_READER_OWNED ((struct task_struct *)RWSEM_ANONYMOUSLY_OWNED) | 27 | #define RWSEM_ANONYMOUSLY_OWNED (1UL << 1) |
22 | 28 | ||
23 | #ifdef CONFIG_DEBUG_RWSEMS | 29 | #ifdef CONFIG_DEBUG_RWSEMS |
24 | # define DEBUG_RWSEMS_WARN_ON(c) DEBUG_LOCKS_WARN_ON(c) | 30 | # define DEBUG_RWSEMS_WARN_ON(c) DEBUG_LOCKS_WARN_ON(c) |
@@ -44,15 +50,26 @@ static inline void rwsem_clear_owner(struct rw_semaphore *sem) | |||
44 | WRITE_ONCE(sem->owner, NULL); | 50 | WRITE_ONCE(sem->owner, NULL); |
45 | } | 51 | } |
46 | 52 | ||
53 | /* | ||
54 | * The task_struct pointer of the last owning reader will be left in | ||
55 | * the owner field. | ||
56 | * | ||
57 | * Note that the owner value just indicates the task has owned the rwsem | ||
58 | * previously, it may not be the real owner or one of the real owners | ||
59 | * anymore when that field is examined, so take it with a grain of salt. | ||
60 | */ | ||
61 | static inline void __rwsem_set_reader_owned(struct rw_semaphore *sem, | ||
62 | struct task_struct *owner) | ||
63 | { | ||
64 | unsigned long val = (unsigned long)owner | RWSEM_READER_OWNED | ||
65 | | RWSEM_ANONYMOUSLY_OWNED; | ||
66 | |||
67 | WRITE_ONCE(sem->owner, (struct task_struct *)val); | ||
68 | } | ||
69 | |||
47 | static inline void rwsem_set_reader_owned(struct rw_semaphore *sem) | 70 | static inline void rwsem_set_reader_owned(struct rw_semaphore *sem) |
48 | { | 71 | { |
49 | /* | 72 | __rwsem_set_reader_owned(sem, current); |
50 | * We check the owner value first to make sure that we will only | ||
51 | * do a write to the rwsem cacheline when it is really necessary | ||
52 | * to minimize cacheline contention. | ||
53 | */ | ||
54 | if (READ_ONCE(sem->owner) != RWSEM_READER_OWNED) | ||
55 | WRITE_ONCE(sem->owner, RWSEM_READER_OWNED); | ||
56 | } | 73 | } |
57 | 74 | ||
58 | /* | 75 | /* |
@@ -72,6 +89,25 @@ static inline bool rwsem_has_anonymous_owner(struct task_struct *owner) | |||
72 | { | 89 | { |
73 | return (unsigned long)owner & RWSEM_ANONYMOUSLY_OWNED; | 90 | return (unsigned long)owner & RWSEM_ANONYMOUSLY_OWNED; |
74 | } | 91 | } |
92 | |||
93 | #ifdef CONFIG_DEBUG_RWSEMS | ||
94 | /* | ||
95 | * With CONFIG_DEBUG_RWSEMS configured, it will make sure that if there | ||
96 | * is a task pointer in owner of a reader-owned rwsem, it will be the | ||
97 | * real owner or one of the real owners. The only exception is when the | ||
98 | * unlock is done by up_read_non_owner(). | ||
99 | */ | ||
100 | #define rwsem_clear_reader_owned rwsem_clear_reader_owned | ||
101 | static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem) | ||
102 | { | ||
103 | unsigned long val = (unsigned long)current | RWSEM_READER_OWNED | ||
104 | | RWSEM_ANONYMOUSLY_OWNED; | ||
105 | if (READ_ONCE(sem->owner) == (struct task_struct *)val) | ||
106 | cmpxchg_relaxed((unsigned long *)&sem->owner, val, | ||
107 | RWSEM_READER_OWNED | RWSEM_ANONYMOUSLY_OWNED); | ||
108 | } | ||
109 | #endif | ||
110 | |||
75 | #else | 111 | #else |
76 | static inline void rwsem_set_owner(struct rw_semaphore *sem) | 112 | static inline void rwsem_set_owner(struct rw_semaphore *sem) |
77 | { | 113 | { |
@@ -81,7 +117,18 @@ static inline void rwsem_clear_owner(struct rw_semaphore *sem) | |||
81 | { | 117 | { |
82 | } | 118 | } |
83 | 119 | ||
120 | static inline void __rwsem_set_reader_owned(struct rw_semaphore *sem, | ||
121 | struct task_struct *owner) | ||
122 | { | ||
123 | } | ||
124 | |||
84 | static inline void rwsem_set_reader_owned(struct rw_semaphore *sem) | 125 | static inline void rwsem_set_reader_owned(struct rw_semaphore *sem) |
85 | { | 126 | { |
86 | } | 127 | } |
87 | #endif | 128 | #endif |
129 | |||
130 | #ifndef rwsem_clear_reader_owned | ||
131 | static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem) | ||
132 | { | ||
133 | } | ||
134 | #endif | ||
diff --git a/kernel/module.c b/kernel/module.c index 6746c85511fe..49a405891587 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
@@ -3317,6 +3317,15 @@ static struct module *layout_and_allocate(struct load_info *info, int flags) | |||
3317 | ndx = find_sec(info, ".data..ro_after_init"); | 3317 | ndx = find_sec(info, ".data..ro_after_init"); |
3318 | if (ndx) | 3318 | if (ndx) |
3319 | info->sechdrs[ndx].sh_flags |= SHF_RO_AFTER_INIT; | 3319 | info->sechdrs[ndx].sh_flags |= SHF_RO_AFTER_INIT; |
3320 | /* | ||
3321 | * Mark the __jump_table section as ro_after_init as well: these data | ||
3322 | * structures are never modified, with the exception of entries that | ||
3323 | * refer to code in the __init section, which are annotated as such | ||
3324 | * at module load time. | ||
3325 | */ | ||
3326 | ndx = find_sec(info, "__jump_table"); | ||
3327 | if (ndx) | ||
3328 | info->sechdrs[ndx].sh_flags |= SHF_RO_AFTER_INIT; | ||
3320 | 3329 | ||
3321 | /* Determine total sizes, and put offsets in sh_entsize. For now | 3330 | /* Determine total sizes, and put offsets in sh_entsize. For now |
3322 | this is done generically; there doesn't appear to be any | 3331 | this is done generically; there doesn't appear to be any |