diff options
author | Peter Zijlstra <peterz@infradead.org> | 2015-02-26 10:23:11 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2015-03-23 05:49:07 -0400 |
commit | 35a9393c95b31870a74f51a3e7455f33f5657b6f (patch) | |
tree | 9a0e913b3d2dc02956c03a24b535a3604bb2dfa7 /kernel | |
parent | bc465aa9d045feb0e13b4a8f32cc33c1943f62d6 (diff) |
lockdep: Fix the module unload key range freeing logic
Module unload calls lockdep_free_key_range(), which removes entries
from the data structures. Most of the lockdep code OTOH assumes the
data structures are append only; in specific see the comments in
add_lock_to_list() and look_up_lock_class().
Clearly this has only worked by accident; make it work proper. The
actual scenario to make it go boom would involve the memory freed by
the module unlock being re-allocated and re-used for a lock inside of
a rcu-sched grace period. This is a very unlikely scenario, still
better plug the hole.
Use RCU list iteration in all places and ammend the comments.
Change lockdep_free_key_range() to issue a sync_sched() between
removal from the lists and returning -- which results in the memory
being freed. Further ensure the callers are placed correctly and
comment the requirements.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Andrey Tsyvarev <tsyvarev@ispras.ru>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/locking/lockdep.c | 81 | ||||
-rw-r--r-- | kernel/module.c | 8 |
2 files changed, 59 insertions, 30 deletions
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 88d0d4420ad2..ba77ab5f64dd 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c | |||
@@ -633,7 +633,7 @@ static int count_matching_names(struct lock_class *new_class) | |||
633 | if (!new_class->name) | 633 | if (!new_class->name) |
634 | return 0; | 634 | return 0; |
635 | 635 | ||
636 | list_for_each_entry(class, &all_lock_classes, lock_entry) { | 636 | list_for_each_entry_rcu(class, &all_lock_classes, lock_entry) { |
637 | if (new_class->key - new_class->subclass == class->key) | 637 | if (new_class->key - new_class->subclass == class->key) |
638 | return class->name_version; | 638 | return class->name_version; |
639 | if (class->name && !strcmp(class->name, new_class->name)) | 639 | if (class->name && !strcmp(class->name, new_class->name)) |
@@ -700,10 +700,12 @@ look_up_lock_class(struct lockdep_map *lock, unsigned int subclass) | |||
700 | hash_head = classhashentry(key); | 700 | hash_head = classhashentry(key); |
701 | 701 | ||
702 | /* | 702 | /* |
703 | * We can walk the hash lockfree, because the hash only | 703 | * We do an RCU walk of the hash, see lockdep_free_key_range(). |
704 | * grows, and we are careful when adding entries to the end: | ||
705 | */ | 704 | */ |
706 | list_for_each_entry(class, hash_head, hash_entry) { | 705 | if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) |
706 | return NULL; | ||
707 | |||
708 | list_for_each_entry_rcu(class, hash_head, hash_entry) { | ||
707 | if (class->key == key) { | 709 | if (class->key == key) { |
708 | /* | 710 | /* |
709 | * Huh! same key, different name? Did someone trample | 711 | * Huh! same key, different name? Did someone trample |
@@ -728,7 +730,8 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) | |||
728 | struct lockdep_subclass_key *key; | 730 | struct lockdep_subclass_key *key; |
729 | struct list_head *hash_head; | 731 | struct list_head *hash_head; |
730 | struct lock_class *class; | 732 | struct lock_class *class; |
731 | unsigned long flags; | 733 | |
734 | DEBUG_LOCKS_WARN_ON(!irqs_disabled()); | ||
732 | 735 | ||
733 | class = look_up_lock_class(lock, subclass); | 736 | class = look_up_lock_class(lock, subclass); |
734 | if (likely(class)) | 737 | if (likely(class)) |
@@ -750,28 +753,26 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) | |||
750 | key = lock->key->subkeys + subclass; | 753 | key = lock->key->subkeys + subclass; |
751 | hash_head = classhashentry(key); | 754 | hash_head = classhashentry(key); |
752 | 755 | ||
753 | raw_local_irq_save(flags); | ||
754 | if (!graph_lock()) { | 756 | if (!graph_lock()) { |
755 | raw_local_irq_restore(flags); | ||
756 | return NULL; | 757 | return NULL; |
757 | } | 758 | } |
758 | /* | 759 | /* |
759 | * We have to do the hash-walk again, to avoid races | 760 | * We have to do the hash-walk again, to avoid races |
760 | * with another CPU: | 761 | * with another CPU: |
761 | */ | 762 | */ |
762 | list_for_each_entry(class, hash_head, hash_entry) | 763 | list_for_each_entry_rcu(class, hash_head, hash_entry) { |
763 | if (class->key == key) | 764 | if (class->key == key) |
764 | goto out_unlock_set; | 765 | goto out_unlock_set; |
766 | } | ||
767 | |||
765 | /* | 768 | /* |
766 | * Allocate a new key from the static array, and add it to | 769 | * Allocate a new key from the static array, and add it to |
767 | * the hash: | 770 | * the hash: |
768 | */ | 771 | */ |
769 | if (nr_lock_classes >= MAX_LOCKDEP_KEYS) { | 772 | if (nr_lock_classes >= MAX_LOCKDEP_KEYS) { |
770 | if (!debug_locks_off_graph_unlock()) { | 773 | if (!debug_locks_off_graph_unlock()) { |
771 | raw_local_irq_restore(flags); | ||
772 | return NULL; | 774 | return NULL; |
773 | } | 775 | } |
774 | raw_local_irq_restore(flags); | ||
775 | 776 | ||
776 | print_lockdep_off("BUG: MAX_LOCKDEP_KEYS too low!"); | 777 | print_lockdep_off("BUG: MAX_LOCKDEP_KEYS too low!"); |
777 | dump_stack(); | 778 | dump_stack(); |
@@ -798,7 +799,6 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) | |||
798 | 799 | ||
799 | if (verbose(class)) { | 800 | if (verbose(class)) { |
800 | graph_unlock(); | 801 | graph_unlock(); |
801 | raw_local_irq_restore(flags); | ||
802 | 802 | ||
803 | printk("\nnew class %p: %s", class->key, class->name); | 803 | printk("\nnew class %p: %s", class->key, class->name); |
804 | if (class->name_version > 1) | 804 | if (class->name_version > 1) |
@@ -806,15 +806,12 @@ register_lock_class(struct lockdep_map *lock, unsigned int subclass, int force) | |||
806 | printk("\n"); | 806 | printk("\n"); |
807 | dump_stack(); | 807 | dump_stack(); |
808 | 808 | ||
809 | raw_local_irq_save(flags); | ||
810 | if (!graph_lock()) { | 809 | if (!graph_lock()) { |
811 | raw_local_irq_restore(flags); | ||
812 | return NULL; | 810 | return NULL; |
813 | } | 811 | } |
814 | } | 812 | } |
815 | out_unlock_set: | 813 | out_unlock_set: |
816 | graph_unlock(); | 814 | graph_unlock(); |
817 | raw_local_irq_restore(flags); | ||
818 | 815 | ||
819 | out_set_class_cache: | 816 | out_set_class_cache: |
820 | if (!subclass || force) | 817 | if (!subclass || force) |
@@ -870,11 +867,9 @@ static int add_lock_to_list(struct lock_class *class, struct lock_class *this, | |||
870 | entry->distance = distance; | 867 | entry->distance = distance; |
871 | entry->trace = *trace; | 868 | entry->trace = *trace; |
872 | /* | 869 | /* |
873 | * Since we never remove from the dependency list, the list can | 870 | * Both allocation and removal are done under the graph lock; but |
874 | * be walked lockless by other CPUs, it's only allocation | 871 | * iteration is under RCU-sched; see look_up_lock_class() and |
875 | * that must be protected by the spinlock. But this also means | 872 | * lockdep_free_key_range(). |
876 | * we must make new entries visible only once writes to the | ||
877 | * entry become visible - hence the RCU op: | ||
878 | */ | 873 | */ |
879 | list_add_tail_rcu(&entry->entry, head); | 874 | list_add_tail_rcu(&entry->entry, head); |
880 | 875 | ||
@@ -1025,7 +1020,9 @@ static int __bfs(struct lock_list *source_entry, | |||
1025 | else | 1020 | else |
1026 | head = &lock->class->locks_before; | 1021 | head = &lock->class->locks_before; |
1027 | 1022 | ||
1028 | list_for_each_entry(entry, head, entry) { | 1023 | DEBUG_LOCKS_WARN_ON(!irqs_disabled()); |
1024 | |||
1025 | list_for_each_entry_rcu(entry, head, entry) { | ||
1029 | if (!lock_accessed(entry)) { | 1026 | if (!lock_accessed(entry)) { |
1030 | unsigned int cq_depth; | 1027 | unsigned int cq_depth; |
1031 | mark_lock_accessed(entry, lock); | 1028 | mark_lock_accessed(entry, lock); |
@@ -2022,7 +2019,7 @@ static inline int lookup_chain_cache(struct task_struct *curr, | |||
2022 | * We can walk it lock-free, because entries only get added | 2019 | * We can walk it lock-free, because entries only get added |
2023 | * to the hash: | 2020 | * to the hash: |
2024 | */ | 2021 | */ |
2025 | list_for_each_entry(chain, hash_head, entry) { | 2022 | list_for_each_entry_rcu(chain, hash_head, entry) { |
2026 | if (chain->chain_key == chain_key) { | 2023 | if (chain->chain_key == chain_key) { |
2027 | cache_hit: | 2024 | cache_hit: |
2028 | debug_atomic_inc(chain_lookup_hits); | 2025 | debug_atomic_inc(chain_lookup_hits); |
@@ -2996,8 +2993,18 @@ void lockdep_init_map(struct lockdep_map *lock, const char *name, | |||
2996 | if (unlikely(!debug_locks)) | 2993 | if (unlikely(!debug_locks)) |
2997 | return; | 2994 | return; |
2998 | 2995 | ||
2999 | if (subclass) | 2996 | if (subclass) { |
2997 | unsigned long flags; | ||
2998 | |||
2999 | if (DEBUG_LOCKS_WARN_ON(current->lockdep_recursion)) | ||
3000 | return; | ||
3001 | |||
3002 | raw_local_irq_save(flags); | ||
3003 | current->lockdep_recursion = 1; | ||
3000 | register_lock_class(lock, subclass, 1); | 3004 | register_lock_class(lock, subclass, 1); |
3005 | current->lockdep_recursion = 0; | ||
3006 | raw_local_irq_restore(flags); | ||
3007 | } | ||
3001 | } | 3008 | } |
3002 | EXPORT_SYMBOL_GPL(lockdep_init_map); | 3009 | EXPORT_SYMBOL_GPL(lockdep_init_map); |
3003 | 3010 | ||
@@ -3887,9 +3894,17 @@ static inline int within(const void *addr, void *start, unsigned long size) | |||
3887 | return addr >= start && addr < start + size; | 3894 | return addr >= start && addr < start + size; |
3888 | } | 3895 | } |
3889 | 3896 | ||
3897 | /* | ||
3898 | * Used in module.c to remove lock classes from memory that is going to be | ||
3899 | * freed; and possibly re-used by other modules. | ||
3900 | * | ||
3901 | * We will have had one sync_sched() before getting here, so we're guaranteed | ||
3902 | * nobody will look up these exact classes -- they're properly dead but still | ||
3903 | * allocated. | ||
3904 | */ | ||
3890 | void lockdep_free_key_range(void *start, unsigned long size) | 3905 | void lockdep_free_key_range(void *start, unsigned long size) |
3891 | { | 3906 | { |
3892 | struct lock_class *class, *next; | 3907 | struct lock_class *class; |
3893 | struct list_head *head; | 3908 | struct list_head *head; |
3894 | unsigned long flags; | 3909 | unsigned long flags; |
3895 | int i; | 3910 | int i; |
@@ -3905,7 +3920,7 @@ void lockdep_free_key_range(void *start, unsigned long size) | |||
3905 | head = classhash_table + i; | 3920 | head = classhash_table + i; |
3906 | if (list_empty(head)) | 3921 | if (list_empty(head)) |
3907 | continue; | 3922 | continue; |
3908 | list_for_each_entry_safe(class, next, head, hash_entry) { | 3923 | list_for_each_entry_rcu(class, head, hash_entry) { |
3909 | if (within(class->key, start, size)) | 3924 | if (within(class->key, start, size)) |
3910 | zap_class(class); | 3925 | zap_class(class); |
3911 | else if (within(class->name, start, size)) | 3926 | else if (within(class->name, start, size)) |
@@ -3916,11 +3931,25 @@ void lockdep_free_key_range(void *start, unsigned long size) | |||
3916 | if (locked) | 3931 | if (locked) |
3917 | graph_unlock(); | 3932 | graph_unlock(); |
3918 | raw_local_irq_restore(flags); | 3933 | raw_local_irq_restore(flags); |
3934 | |||
3935 | /* | ||
3936 | * Wait for any possible iterators from look_up_lock_class() to pass | ||
3937 | * before continuing to free the memory they refer to. | ||
3938 | * | ||
3939 | * sync_sched() is sufficient because the read-side is IRQ disable. | ||
3940 | */ | ||
3941 | synchronize_sched(); | ||
3942 | |||
3943 | /* | ||
3944 | * XXX at this point we could return the resources to the pool; | ||
3945 | * instead we leak them. We would need to change to bitmap allocators | ||
3946 | * instead of the linear allocators we have now. | ||
3947 | */ | ||
3919 | } | 3948 | } |
3920 | 3949 | ||
3921 | void lockdep_reset_lock(struct lockdep_map *lock) | 3950 | void lockdep_reset_lock(struct lockdep_map *lock) |
3922 | { | 3951 | { |
3923 | struct lock_class *class, *next; | 3952 | struct lock_class *class; |
3924 | struct list_head *head; | 3953 | struct list_head *head; |
3925 | unsigned long flags; | 3954 | unsigned long flags; |
3926 | int i, j; | 3955 | int i, j; |
@@ -3948,7 +3977,7 @@ void lockdep_reset_lock(struct lockdep_map *lock) | |||
3948 | head = classhash_table + i; | 3977 | head = classhash_table + i; |
3949 | if (list_empty(head)) | 3978 | if (list_empty(head)) |
3950 | continue; | 3979 | continue; |
3951 | list_for_each_entry_safe(class, next, head, hash_entry) { | 3980 | list_for_each_entry_rcu(class, head, hash_entry) { |
3952 | int match = 0; | 3981 | int match = 0; |
3953 | 3982 | ||
3954 | for (j = 0; j < NR_LOCKDEP_CACHING_CLASSES; j++) | 3983 | for (j = 0; j < NR_LOCKDEP_CACHING_CLASSES; j++) |
diff --git a/kernel/module.c b/kernel/module.c index b3d634ed06c9..99fdf94efce8 100644 --- a/kernel/module.c +++ b/kernel/module.c | |||
@@ -1865,7 +1865,7 @@ static void free_module(struct module *mod) | |||
1865 | kfree(mod->args); | 1865 | kfree(mod->args); |
1866 | percpu_modfree(mod); | 1866 | percpu_modfree(mod); |
1867 | 1867 | ||
1868 | /* Free lock-classes: */ | 1868 | /* Free lock-classes; relies on the preceding sync_rcu(). */ |
1869 | lockdep_free_key_range(mod->module_core, mod->core_size); | 1869 | lockdep_free_key_range(mod->module_core, mod->core_size); |
1870 | 1870 | ||
1871 | /* Finally, free the core (containing the module structure) */ | 1871 | /* Finally, free the core (containing the module structure) */ |
@@ -3349,9 +3349,6 @@ static int load_module(struct load_info *info, const char __user *uargs, | |||
3349 | module_bug_cleanup(mod); | 3349 | module_bug_cleanup(mod); |
3350 | mutex_unlock(&module_mutex); | 3350 | mutex_unlock(&module_mutex); |
3351 | 3351 | ||
3352 | /* Free lock-classes: */ | ||
3353 | lockdep_free_key_range(mod->module_core, mod->core_size); | ||
3354 | |||
3355 | /* we can't deallocate the module until we clear memory protection */ | 3352 | /* we can't deallocate the module until we clear memory protection */ |
3356 | unset_module_init_ro_nx(mod); | 3353 | unset_module_init_ro_nx(mod); |
3357 | unset_module_core_ro_nx(mod); | 3354 | unset_module_core_ro_nx(mod); |
@@ -3375,6 +3372,9 @@ static int load_module(struct load_info *info, const char __user *uargs, | |||
3375 | synchronize_rcu(); | 3372 | synchronize_rcu(); |
3376 | mutex_unlock(&module_mutex); | 3373 | mutex_unlock(&module_mutex); |
3377 | free_module: | 3374 | free_module: |
3375 | /* Free lock-classes; relies on the preceding sync_rcu() */ | ||
3376 | lockdep_free_key_range(mod->module_core, mod->core_size); | ||
3377 | |||
3378 | module_deallocate(mod, info); | 3378 | module_deallocate(mod, info); |
3379 | free_copy: | 3379 | free_copy: |
3380 | free_copy(info); | 3380 | free_copy(info); |