diff options
| author | Mauro Carvalho Chehab <m.chehab@samsung.com> | 2014-04-14 11:00:36 -0400 |
|---|---|---|
| committer | Mauro Carvalho Chehab <m.chehab@samsung.com> | 2014-04-14 11:00:36 -0400 |
| commit | 277a163c83d7ba93fba1e8980d29a9f8bfcfba6c (patch) | |
| tree | ccfd357d152292958957b6b8a993892e7a8cc95f /kernel/locking | |
| parent | a83b93a7480441a47856dc9104bea970e84cda87 (diff) | |
| parent | c9eaa447e77efe77b7fa4c953bd62de8297fd6c5 (diff) | |
Merge tag 'v3.15-rc1' into patchwork
Linux 3.15-rc1
* tag 'v3.15-rc1': (12180 commits)
Linux 3.15-rc1
mm: Initialize error in shmem_file_aio_read()
cifs: Use min_t() when comparing "size_t" and "unsigned long"
sym53c8xx_2: Set DID_REQUEUE return code when aborting squeue
powerpc: Don't try to set LPCR unless we're in hypervisor mode
futex: update documentation for ordering guarantees
ceph: fix pr_fmt() redefinition
vti: don't allow to add the same tunnel twice
gre: don't allow to add the same tunnel twice
drivers: net: xen-netfront: fix array initialization bug
missing bits of "splice: fix racy pipe->buffers uses"
cifs: fix the race in cifs_writev()
ceph_sync_{,direct_}write: fix an oops on ceph_osdc_new_request() failure
pktgen: be friendly to LLTX devices
r8152: check RTL8152_UNPLUG
net: sun4i-emac: add promiscuous support
net/apne: replace IS_ERR and PTR_ERR with PTR_ERR_OR_ZERO
blackfin: cleanup board files
bf609: clock: drop unused clock bit set/clear functions
Blackfin: bf537: rename "CONFIG_ADT75"
...
Diffstat (limited to 'kernel/locking')
| -rw-r--r-- | kernel/locking/Makefile | 4 | ||||
| -rw-r--r-- | kernel/locking/lockdep.c | 23 | ||||
| -rw-r--r-- | kernel/locking/locktorture.c | 452 | ||||
| -rw-r--r-- | kernel/locking/mcs_spinlock.c | 178 | ||||
| -rw-r--r-- | kernel/locking/mcs_spinlock.h | 129 | ||||
| -rw-r--r-- | kernel/locking/mutex-debug.c | 6 | ||||
| -rw-r--r-- | kernel/locking/mutex.c | 104 | ||||
| -rw-r--r-- | kernel/locking/rtmutex.c | 12 | ||||
| -rw-r--r-- | kernel/locking/rwsem-xadd.c | 4 |
9 files changed, 831 insertions, 81 deletions
diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile index baab8e5e7f66..b8bdcd4785b7 100644 --- a/kernel/locking/Makefile +++ b/kernel/locking/Makefile | |||
| @@ -1,5 +1,5 @@ | |||
| 1 | 1 | ||
| 2 | obj-y += mutex.o semaphore.o rwsem.o lglock.o | 2 | obj-y += mutex.o semaphore.o rwsem.o mcs_spinlock.o |
| 3 | 3 | ||
| 4 | ifdef CONFIG_FUNCTION_TRACER | 4 | ifdef CONFIG_FUNCTION_TRACER |
| 5 | CFLAGS_REMOVE_lockdep.o = -pg | 5 | CFLAGS_REMOVE_lockdep.o = -pg |
| @@ -14,6 +14,7 @@ ifeq ($(CONFIG_PROC_FS),y) | |||
| 14 | obj-$(CONFIG_LOCKDEP) += lockdep_proc.o | 14 | obj-$(CONFIG_LOCKDEP) += lockdep_proc.o |
| 15 | endif | 15 | endif |
| 16 | obj-$(CONFIG_SMP) += spinlock.o | 16 | obj-$(CONFIG_SMP) += spinlock.o |
| 17 | obj-$(CONFIG_SMP) += lglock.o | ||
| 17 | obj-$(CONFIG_PROVE_LOCKING) += spinlock.o | 18 | obj-$(CONFIG_PROVE_LOCKING) += spinlock.o |
| 18 | obj-$(CONFIG_RT_MUTEXES) += rtmutex.o | 19 | obj-$(CONFIG_RT_MUTEXES) += rtmutex.o |
| 19 | obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o | 20 | obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o |
| @@ -23,3 +24,4 @@ obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o | |||
| 23 | obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o | 24 | obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o |
| 24 | obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o | 25 | obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o |
| 25 | obj-$(CONFIG_PERCPU_RWSEM) += percpu-rwsem.o | 26 | obj-$(CONFIG_PERCPU_RWSEM) += percpu-rwsem.o |
| 27 | obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o | ||
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index eb8a54783fa0..b0e9467922e1 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c | |||
| @@ -1936,12 +1936,12 @@ check_prevs_add(struct task_struct *curr, struct held_lock *next) | |||
| 1936 | 1936 | ||
| 1937 | for (;;) { | 1937 | for (;;) { |
| 1938 | int distance = curr->lockdep_depth - depth + 1; | 1938 | int distance = curr->lockdep_depth - depth + 1; |
| 1939 | hlock = curr->held_locks + depth-1; | 1939 | hlock = curr->held_locks + depth - 1; |
| 1940 | /* | 1940 | /* |
| 1941 | * Only non-recursive-read entries get new dependencies | 1941 | * Only non-recursive-read entries get new dependencies |
| 1942 | * added: | 1942 | * added: |
| 1943 | */ | 1943 | */ |
| 1944 | if (hlock->read != 2) { | 1944 | if (hlock->read != 2 && hlock->check) { |
| 1945 | if (!check_prev_add(curr, hlock, next, | 1945 | if (!check_prev_add(curr, hlock, next, |
| 1946 | distance, trylock_loop)) | 1946 | distance, trylock_loop)) |
| 1947 | return 0; | 1947 | return 0; |
| @@ -2098,7 +2098,7 @@ static int validate_chain(struct task_struct *curr, struct lockdep_map *lock, | |||
| 2098 | * (If lookup_chain_cache() returns with 1 it acquires | 2098 | * (If lookup_chain_cache() returns with 1 it acquires |
| 2099 | * graph_lock for us) | 2099 | * graph_lock for us) |
| 2100 | */ | 2100 | */ |
| 2101 | if (!hlock->trylock && (hlock->check == 2) && | 2101 | if (!hlock->trylock && hlock->check && |
| 2102 | lookup_chain_cache(curr, hlock, chain_key)) { | 2102 | lookup_chain_cache(curr, hlock, chain_key)) { |
| 2103 | /* | 2103 | /* |
| 2104 | * Check whether last held lock: | 2104 | * Check whether last held lock: |
| @@ -2517,7 +2517,7 @@ mark_held_locks(struct task_struct *curr, enum mark_type mark) | |||
| 2517 | 2517 | ||
| 2518 | BUG_ON(usage_bit >= LOCK_USAGE_STATES); | 2518 | BUG_ON(usage_bit >= LOCK_USAGE_STATES); |
| 2519 | 2519 | ||
| 2520 | if (hlock_class(hlock)->key == __lockdep_no_validate__.subkeys) | 2520 | if (!hlock->check) |
| 2521 | continue; | 2521 | continue; |
| 2522 | 2522 | ||
| 2523 | if (!mark_lock(curr, hlock, usage_bit)) | 2523 | if (!mark_lock(curr, hlock, usage_bit)) |
| @@ -2557,7 +2557,7 @@ static void __trace_hardirqs_on_caller(unsigned long ip) | |||
| 2557 | debug_atomic_inc(hardirqs_on_events); | 2557 | debug_atomic_inc(hardirqs_on_events); |
| 2558 | } | 2558 | } |
| 2559 | 2559 | ||
| 2560 | void trace_hardirqs_on_caller(unsigned long ip) | 2560 | __visible void trace_hardirqs_on_caller(unsigned long ip) |
| 2561 | { | 2561 | { |
| 2562 | time_hardirqs_on(CALLER_ADDR0, ip); | 2562 | time_hardirqs_on(CALLER_ADDR0, ip); |
| 2563 | 2563 | ||
| @@ -2610,7 +2610,7 @@ EXPORT_SYMBOL(trace_hardirqs_on); | |||
| 2610 | /* | 2610 | /* |
| 2611 | * Hardirqs were disabled: | 2611 | * Hardirqs were disabled: |
| 2612 | */ | 2612 | */ |
| 2613 | void trace_hardirqs_off_caller(unsigned long ip) | 2613 | __visible void trace_hardirqs_off_caller(unsigned long ip) |
| 2614 | { | 2614 | { |
| 2615 | struct task_struct *curr = current; | 2615 | struct task_struct *curr = current; |
| 2616 | 2616 | ||
| @@ -3055,9 +3055,6 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, | |||
| 3055 | int class_idx; | 3055 | int class_idx; |
| 3056 | u64 chain_key; | 3056 | u64 chain_key; |
| 3057 | 3057 | ||
| 3058 | if (!prove_locking) | ||
| 3059 | check = 1; | ||
| 3060 | |||
| 3061 | if (unlikely(!debug_locks)) | 3058 | if (unlikely(!debug_locks)) |
| 3062 | return 0; | 3059 | return 0; |
| 3063 | 3060 | ||
| @@ -3069,8 +3066,8 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, | |||
| 3069 | if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) | 3066 | if (DEBUG_LOCKS_WARN_ON(!irqs_disabled())) |
| 3070 | return 0; | 3067 | return 0; |
| 3071 | 3068 | ||
| 3072 | if (lock->key == &__lockdep_no_validate__) | 3069 | if (!prove_locking || lock->key == &__lockdep_no_validate__) |
| 3073 | check = 1; | 3070 | check = 0; |
| 3074 | 3071 | ||
| 3075 | if (subclass < NR_LOCKDEP_CACHING_CLASSES) | 3072 | if (subclass < NR_LOCKDEP_CACHING_CLASSES) |
| 3076 | class = lock->class_cache[subclass]; | 3073 | class = lock->class_cache[subclass]; |
| @@ -3138,7 +3135,7 @@ static int __lock_acquire(struct lockdep_map *lock, unsigned int subclass, | |||
| 3138 | hlock->holdtime_stamp = lockstat_clock(); | 3135 | hlock->holdtime_stamp = lockstat_clock(); |
| 3139 | #endif | 3136 | #endif |
| 3140 | 3137 | ||
| 3141 | if (check == 2 && !mark_irqflags(curr, hlock)) | 3138 | if (check && !mark_irqflags(curr, hlock)) |
| 3142 | return 0; | 3139 | return 0; |
| 3143 | 3140 | ||
| 3144 | /* mark it as used: */ | 3141 | /* mark it as used: */ |
| @@ -4191,7 +4188,7 @@ void debug_show_held_locks(struct task_struct *task) | |||
| 4191 | } | 4188 | } |
| 4192 | EXPORT_SYMBOL_GPL(debug_show_held_locks); | 4189 | EXPORT_SYMBOL_GPL(debug_show_held_locks); |
| 4193 | 4190 | ||
| 4194 | void lockdep_sys_exit(void) | 4191 | asmlinkage void lockdep_sys_exit(void) |
| 4195 | { | 4192 | { |
| 4196 | struct task_struct *curr = current; | 4193 | struct task_struct *curr = current; |
| 4197 | 4194 | ||
diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c new file mode 100644 index 000000000000..f26b1a18e34e --- /dev/null +++ b/kernel/locking/locktorture.c | |||
| @@ -0,0 +1,452 @@ | |||
| 1 | /* | ||
| 2 | * Module-based torture test facility for locking | ||
| 3 | * | ||
| 4 | * This program is free software; you can redistribute it and/or modify | ||
| 5 | * it under the terms of the GNU General Public License as published by | ||
| 6 | * the Free Software Foundation; either version 2 of the License, or | ||
| 7 | * (at your option) any later version. | ||
| 8 | * | ||
| 9 | * This program is distributed in the hope that it will be useful, | ||
| 10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
| 11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
| 12 | * GNU General Public License for more details. | ||
| 13 | * | ||
| 14 | * You should have received a copy of the GNU General Public License | ||
| 15 | * along with this program; if not, you can access it online at | ||
| 16 | * http://www.gnu.org/licenses/gpl-2.0.html. | ||
| 17 | * | ||
| 18 | * Copyright (C) IBM Corporation, 2014 | ||
| 19 | * | ||
| 20 | * Author: Paul E. McKenney <paulmck@us.ibm.com> | ||
| 21 | * Based on kernel/rcu/torture.c. | ||
| 22 | */ | ||
| 23 | #include <linux/types.h> | ||
| 24 | #include <linux/kernel.h> | ||
| 25 | #include <linux/init.h> | ||
| 26 | #include <linux/module.h> | ||
| 27 | #include <linux/kthread.h> | ||
| 28 | #include <linux/err.h> | ||
| 29 | #include <linux/spinlock.h> | ||
| 30 | #include <linux/smp.h> | ||
| 31 | #include <linux/interrupt.h> | ||
| 32 | #include <linux/sched.h> | ||
| 33 | #include <linux/atomic.h> | ||
| 34 | #include <linux/bitops.h> | ||
| 35 | #include <linux/completion.h> | ||
| 36 | #include <linux/moduleparam.h> | ||
| 37 | #include <linux/percpu.h> | ||
| 38 | #include <linux/notifier.h> | ||
| 39 | #include <linux/reboot.h> | ||
| 40 | #include <linux/freezer.h> | ||
| 41 | #include <linux/cpu.h> | ||
| 42 | #include <linux/delay.h> | ||
| 43 | #include <linux/stat.h> | ||
| 44 | #include <linux/slab.h> | ||
| 45 | #include <linux/trace_clock.h> | ||
| 46 | #include <asm/byteorder.h> | ||
| 47 | #include <linux/torture.h> | ||
| 48 | |||
| 49 | MODULE_LICENSE("GPL"); | ||
| 50 | MODULE_AUTHOR("Paul E. McKenney <paulmck@us.ibm.com>"); | ||
| 51 | |||
| 52 | torture_param(int, nwriters_stress, -1, | ||
| 53 | "Number of write-locking stress-test threads"); | ||
| 54 | torture_param(int, onoff_holdoff, 0, "Time after boot before CPU hotplugs (s)"); | ||
| 55 | torture_param(int, onoff_interval, 0, | ||
| 56 | "Time between CPU hotplugs (s), 0=disable"); | ||
| 57 | torture_param(int, shuffle_interval, 3, | ||
| 58 | "Number of jiffies between shuffles, 0=disable"); | ||
| 59 | torture_param(int, shutdown_secs, 0, "Shutdown time (j), <= zero to disable."); | ||
| 60 | torture_param(int, stat_interval, 60, | ||
| 61 | "Number of seconds between stats printk()s"); | ||
| 62 | torture_param(int, stutter, 5, "Number of jiffies to run/halt test, 0=disable"); | ||
| 63 | torture_param(bool, verbose, true, | ||
| 64 | "Enable verbose debugging printk()s"); | ||
| 65 | |||
| 66 | static char *torture_type = "spin_lock"; | ||
| 67 | module_param(torture_type, charp, 0444); | ||
| 68 | MODULE_PARM_DESC(torture_type, | ||
| 69 | "Type of lock to torture (spin_lock, spin_lock_irq, ...)"); | ||
| 70 | |||
| 71 | static atomic_t n_lock_torture_errors; | ||
| 72 | |||
| 73 | static struct task_struct *stats_task; | ||
| 74 | static struct task_struct **writer_tasks; | ||
| 75 | |||
| 76 | static int nrealwriters_stress; | ||
| 77 | static bool lock_is_write_held; | ||
| 78 | |||
| 79 | struct lock_writer_stress_stats { | ||
| 80 | long n_write_lock_fail; | ||
| 81 | long n_write_lock_acquired; | ||
| 82 | }; | ||
| 83 | static struct lock_writer_stress_stats *lwsa; | ||
| 84 | |||
| 85 | #if defined(MODULE) || defined(CONFIG_LOCK_TORTURE_TEST_RUNNABLE) | ||
| 86 | #define LOCKTORTURE_RUNNABLE_INIT 1 | ||
| 87 | #else | ||
| 88 | #define LOCKTORTURE_RUNNABLE_INIT 0 | ||
| 89 | #endif | ||
| 90 | int locktorture_runnable = LOCKTORTURE_RUNNABLE_INIT; | ||
| 91 | module_param(locktorture_runnable, int, 0444); | ||
| 92 | MODULE_PARM_DESC(locktorture_runnable, "Start locktorture at boot"); | ||
| 93 | |||
| 94 | /* Forward reference. */ | ||
| 95 | static void lock_torture_cleanup(void); | ||
| 96 | |||
| 97 | /* | ||
| 98 | * Operations vector for selecting different types of tests. | ||
| 99 | */ | ||
| 100 | struct lock_torture_ops { | ||
| 101 | void (*init)(void); | ||
| 102 | int (*writelock)(void); | ||
| 103 | void (*write_delay)(struct torture_random_state *trsp); | ||
| 104 | void (*writeunlock)(void); | ||
| 105 | unsigned long flags; | ||
| 106 | const char *name; | ||
| 107 | }; | ||
| 108 | |||
| 109 | static struct lock_torture_ops *cur_ops; | ||
| 110 | |||
| 111 | /* | ||
| 112 | * Definitions for lock torture testing. | ||
| 113 | */ | ||
| 114 | |||
| 115 | static int torture_lock_busted_write_lock(void) | ||
| 116 | { | ||
| 117 | return 0; /* BUGGY, do not use in real life!!! */ | ||
| 118 | } | ||
| 119 | |||
| 120 | static void torture_lock_busted_write_delay(struct torture_random_state *trsp) | ||
| 121 | { | ||
| 122 | const unsigned long longdelay_us = 100; | ||
| 123 | |||
| 124 | /* We want a long delay occasionally to force massive contention. */ | ||
| 125 | if (!(torture_random(trsp) % | ||
| 126 | (nrealwriters_stress * 2000 * longdelay_us))) | ||
| 127 | mdelay(longdelay_us); | ||
| 128 | #ifdef CONFIG_PREEMPT | ||
| 129 | if (!(torture_random(trsp) % (nrealwriters_stress * 20000))) | ||
| 130 | preempt_schedule(); /* Allow test to be preempted. */ | ||
| 131 | #endif | ||
| 132 | } | ||
| 133 | |||
| 134 | static void torture_lock_busted_write_unlock(void) | ||
| 135 | { | ||
| 136 | /* BUGGY, do not use in real life!!! */ | ||
| 137 | } | ||
| 138 | |||
| 139 | static struct lock_torture_ops lock_busted_ops = { | ||
| 140 | .writelock = torture_lock_busted_write_lock, | ||
| 141 | .write_delay = torture_lock_busted_write_delay, | ||
| 142 | .writeunlock = torture_lock_busted_write_unlock, | ||
| 143 | .name = "lock_busted" | ||
| 144 | }; | ||
| 145 | |||
| 146 | static DEFINE_SPINLOCK(torture_spinlock); | ||
| 147 | |||
| 148 | static int torture_spin_lock_write_lock(void) __acquires(torture_spinlock) | ||
| 149 | { | ||
| 150 | spin_lock(&torture_spinlock); | ||
| 151 | return 0; | ||
| 152 | } | ||
| 153 | |||
| 154 | static void torture_spin_lock_write_delay(struct torture_random_state *trsp) | ||
| 155 | { | ||
| 156 | const unsigned long shortdelay_us = 2; | ||
| 157 | const unsigned long longdelay_us = 100; | ||
| 158 | |||
| 159 | /* We want a short delay mostly to emulate likely code, and | ||
| 160 | * we want a long delay occasionally to force massive contention. | ||
| 161 | */ | ||
| 162 | if (!(torture_random(trsp) % | ||
| 163 | (nrealwriters_stress * 2000 * longdelay_us))) | ||
| 164 | mdelay(longdelay_us); | ||
| 165 | if (!(torture_random(trsp) % | ||
| 166 | (nrealwriters_stress * 2 * shortdelay_us))) | ||
| 167 | udelay(shortdelay_us); | ||
| 168 | #ifdef CONFIG_PREEMPT | ||
| 169 | if (!(torture_random(trsp) % (nrealwriters_stress * 20000))) | ||
| 170 | preempt_schedule(); /* Allow test to be preempted. */ | ||
| 171 | #endif | ||
| 172 | } | ||
| 173 | |||
| 174 | static void torture_spin_lock_write_unlock(void) __releases(torture_spinlock) | ||
| 175 | { | ||
| 176 | spin_unlock(&torture_spinlock); | ||
| 177 | } | ||
| 178 | |||
| 179 | static struct lock_torture_ops spin_lock_ops = { | ||
| 180 | .writelock = torture_spin_lock_write_lock, | ||
| 181 | .write_delay = torture_spin_lock_write_delay, | ||
| 182 | .writeunlock = torture_spin_lock_write_unlock, | ||
| 183 | .name = "spin_lock" | ||
| 184 | }; | ||
| 185 | |||
| 186 | static int torture_spin_lock_write_lock_irq(void) | ||
| 187 | __acquires(torture_spinlock_irq) | ||
| 188 | { | ||
| 189 | unsigned long flags; | ||
| 190 | |||
| 191 | spin_lock_irqsave(&torture_spinlock, flags); | ||
| 192 | cur_ops->flags = flags; | ||
| 193 | return 0; | ||
| 194 | } | ||
| 195 | |||
| 196 | static void torture_lock_spin_write_unlock_irq(void) | ||
| 197 | __releases(torture_spinlock) | ||
| 198 | { | ||
| 199 | spin_unlock_irqrestore(&torture_spinlock, cur_ops->flags); | ||
| 200 | } | ||
| 201 | |||
| 202 | static struct lock_torture_ops spin_lock_irq_ops = { | ||
| 203 | .writelock = torture_spin_lock_write_lock_irq, | ||
| 204 | .write_delay = torture_spin_lock_write_delay, | ||
| 205 | .writeunlock = torture_lock_spin_write_unlock_irq, | ||
| 206 | .name = "spin_lock_irq" | ||
| 207 | }; | ||
| 208 | |||
| 209 | /* | ||
| 210 | * Lock torture writer kthread. Repeatedly acquires and releases | ||
| 211 | * the lock, checking for duplicate acquisitions. | ||
| 212 | */ | ||
| 213 | static int lock_torture_writer(void *arg) | ||
| 214 | { | ||
| 215 | struct lock_writer_stress_stats *lwsp = arg; | ||
| 216 | static DEFINE_TORTURE_RANDOM(rand); | ||
| 217 | |||
| 218 | VERBOSE_TOROUT_STRING("lock_torture_writer task started"); | ||
| 219 | set_user_nice(current, 19); | ||
| 220 | |||
| 221 | do { | ||
| 222 | schedule_timeout_uninterruptible(1); | ||
| 223 | cur_ops->writelock(); | ||
| 224 | if (WARN_ON_ONCE(lock_is_write_held)) | ||
| 225 | lwsp->n_write_lock_fail++; | ||
| 226 | lock_is_write_held = 1; | ||
| 227 | lwsp->n_write_lock_acquired++; | ||
| 228 | cur_ops->write_delay(&rand); | ||
| 229 | lock_is_write_held = 0; | ||
| 230 | cur_ops->writeunlock(); | ||
| 231 | stutter_wait("lock_torture_writer"); | ||
| 232 | } while (!torture_must_stop()); | ||
| 233 | torture_kthread_stopping("lock_torture_writer"); | ||
| 234 | return 0; | ||
| 235 | } | ||
| 236 | |||
| 237 | /* | ||
| 238 | * Create an lock-torture-statistics message in the specified buffer. | ||
| 239 | */ | ||
| 240 | static void lock_torture_printk(char *page) | ||
| 241 | { | ||
| 242 | bool fail = 0; | ||
| 243 | int i; | ||
| 244 | long max = 0; | ||
| 245 | long min = lwsa[0].n_write_lock_acquired; | ||
| 246 | long long sum = 0; | ||
| 247 | |||
| 248 | for (i = 0; i < nrealwriters_stress; i++) { | ||
| 249 | if (lwsa[i].n_write_lock_fail) | ||
| 250 | fail = true; | ||
| 251 | sum += lwsa[i].n_write_lock_acquired; | ||
| 252 | if (max < lwsa[i].n_write_lock_fail) | ||
| 253 | max = lwsa[i].n_write_lock_fail; | ||
| 254 | if (min > lwsa[i].n_write_lock_fail) | ||
| 255 | min = lwsa[i].n_write_lock_fail; | ||
| 256 | } | ||
| 257 | page += sprintf(page, "%s%s ", torture_type, TORTURE_FLAG); | ||
| 258 | page += sprintf(page, | ||
| 259 | "Writes: Total: %lld Max/Min: %ld/%ld %s Fail: %d %s\n", | ||
| 260 | sum, max, min, max / 2 > min ? "???" : "", | ||
| 261 | fail, fail ? "!!!" : ""); | ||
| 262 | if (fail) | ||
| 263 | atomic_inc(&n_lock_torture_errors); | ||
| 264 | } | ||
| 265 | |||
| 266 | /* | ||
| 267 | * Print torture statistics. Caller must ensure that there is only one | ||
| 268 | * call to this function at a given time!!! This is normally accomplished | ||
| 269 | * by relying on the module system to only have one copy of the module | ||
| 270 | * loaded, and then by giving the lock_torture_stats kthread full control | ||
| 271 | * (or the init/cleanup functions when lock_torture_stats thread is not | ||
| 272 | * running). | ||
| 273 | */ | ||
| 274 | static void lock_torture_stats_print(void) | ||
| 275 | { | ||
| 276 | int size = nrealwriters_stress * 200 + 8192; | ||
| 277 | char *buf; | ||
| 278 | |||
| 279 | buf = kmalloc(size, GFP_KERNEL); | ||
| 280 | if (!buf) { | ||
| 281 | pr_err("lock_torture_stats_print: Out of memory, need: %d", | ||
| 282 | size); | ||
| 283 | return; | ||
| 284 | } | ||
| 285 | lock_torture_printk(buf); | ||
| 286 | pr_alert("%s", buf); | ||
| 287 | kfree(buf); | ||
| 288 | } | ||
| 289 | |||
| 290 | /* | ||
| 291 | * Periodically prints torture statistics, if periodic statistics printing | ||
| 292 | * was specified via the stat_interval module parameter. | ||
| 293 | * | ||
| 294 | * No need to worry about fullstop here, since this one doesn't reference | ||
| 295 | * volatile state or register callbacks. | ||
| 296 | */ | ||
| 297 | static int lock_torture_stats(void *arg) | ||
| 298 | { | ||
| 299 | VERBOSE_TOROUT_STRING("lock_torture_stats task started"); | ||
| 300 | do { | ||
| 301 | schedule_timeout_interruptible(stat_interval * HZ); | ||
| 302 | lock_torture_stats_print(); | ||
| 303 | torture_shutdown_absorb("lock_torture_stats"); | ||
| 304 | } while (!torture_must_stop()); | ||
| 305 | torture_kthread_stopping("lock_torture_stats"); | ||
| 306 | return 0; | ||
| 307 | } | ||
| 308 | |||
| 309 | static inline void | ||
| 310 | lock_torture_print_module_parms(struct lock_torture_ops *cur_ops, | ||
| 311 | const char *tag) | ||
| 312 | { | ||
| 313 | pr_alert("%s" TORTURE_FLAG | ||
| 314 | "--- %s: nwriters_stress=%d stat_interval=%d verbose=%d shuffle_interval=%d stutter=%d shutdown_secs=%d onoff_interval=%d onoff_holdoff=%d\n", | ||
| 315 | torture_type, tag, nrealwriters_stress, stat_interval, verbose, | ||
| 316 | shuffle_interval, stutter, shutdown_secs, | ||
| 317 | onoff_interval, onoff_holdoff); | ||
| 318 | } | ||
| 319 | |||
| 320 | static void lock_torture_cleanup(void) | ||
| 321 | { | ||
| 322 | int i; | ||
| 323 | |||
| 324 | if (torture_cleanup()) | ||
| 325 | return; | ||
| 326 | |||
| 327 | if (writer_tasks) { | ||
| 328 | for (i = 0; i < nrealwriters_stress; i++) | ||
| 329 | torture_stop_kthread(lock_torture_writer, | ||
| 330 | writer_tasks[i]); | ||
| 331 | kfree(writer_tasks); | ||
| 332 | writer_tasks = NULL; | ||
| 333 | } | ||
| 334 | |||
| 335 | torture_stop_kthread(lock_torture_stats, stats_task); | ||
| 336 | lock_torture_stats_print(); /* -After- the stats thread is stopped! */ | ||
| 337 | |||
| 338 | if (atomic_read(&n_lock_torture_errors)) | ||
| 339 | lock_torture_print_module_parms(cur_ops, | ||
| 340 | "End of test: FAILURE"); | ||
| 341 | else if (torture_onoff_failures()) | ||
| 342 | lock_torture_print_module_parms(cur_ops, | ||
| 343 | "End of test: LOCK_HOTPLUG"); | ||
| 344 | else | ||
| 345 | lock_torture_print_module_parms(cur_ops, | ||
| 346 | "End of test: SUCCESS"); | ||
| 347 | } | ||
| 348 | |||
| 349 | static int __init lock_torture_init(void) | ||
| 350 | { | ||
| 351 | int i; | ||
| 352 | int firsterr = 0; | ||
| 353 | static struct lock_torture_ops *torture_ops[] = { | ||
| 354 | &lock_busted_ops, &spin_lock_ops, &spin_lock_irq_ops, | ||
| 355 | }; | ||
| 356 | |||
| 357 | torture_init_begin(torture_type, verbose, &locktorture_runnable); | ||
| 358 | |||
| 359 | /* Process args and tell the world that the torturer is on the job. */ | ||
| 360 | for (i = 0; i < ARRAY_SIZE(torture_ops); i++) { | ||
| 361 | cur_ops = torture_ops[i]; | ||
| 362 | if (strcmp(torture_type, cur_ops->name) == 0) | ||
| 363 | break; | ||
| 364 | } | ||
| 365 | if (i == ARRAY_SIZE(torture_ops)) { | ||
| 366 | pr_alert("lock-torture: invalid torture type: \"%s\"\n", | ||
| 367 | torture_type); | ||
| 368 | pr_alert("lock-torture types:"); | ||
| 369 | for (i = 0; i < ARRAY_SIZE(torture_ops); i++) | ||
| 370 | pr_alert(" %s", torture_ops[i]->name); | ||
| 371 | pr_alert("\n"); | ||
| 372 | torture_init_end(); | ||
| 373 | return -EINVAL; | ||
| 374 | } | ||
| 375 | if (cur_ops->init) | ||
| 376 | cur_ops->init(); /* no "goto unwind" prior to this point!!! */ | ||
| 377 | |||
| 378 | if (nwriters_stress >= 0) | ||
| 379 | nrealwriters_stress = nwriters_stress; | ||
| 380 | else | ||
| 381 | nrealwriters_stress = 2 * num_online_cpus(); | ||
| 382 | lock_torture_print_module_parms(cur_ops, "Start of test"); | ||
| 383 | |||
| 384 | /* Initialize the statistics so that each run gets its own numbers. */ | ||
| 385 | |||
| 386 | lock_is_write_held = 0; | ||
| 387 | lwsa = kmalloc(sizeof(*lwsa) * nrealwriters_stress, GFP_KERNEL); | ||
| 388 | if (lwsa == NULL) { | ||
| 389 | VERBOSE_TOROUT_STRING("lwsa: Out of memory"); | ||
| 390 | firsterr = -ENOMEM; | ||
| 391 | goto unwind; | ||
| 392 | } | ||
| 393 | for (i = 0; i < nrealwriters_stress; i++) { | ||
| 394 | lwsa[i].n_write_lock_fail = 0; | ||
| 395 | lwsa[i].n_write_lock_acquired = 0; | ||
| 396 | } | ||
| 397 | |||
| 398 | /* Start up the kthreads. */ | ||
| 399 | |||
| 400 | if (onoff_interval > 0) { | ||
| 401 | firsterr = torture_onoff_init(onoff_holdoff * HZ, | ||
| 402 | onoff_interval * HZ); | ||
| 403 | if (firsterr) | ||
| 404 | goto unwind; | ||
| 405 | } | ||
| 406 | if (shuffle_interval > 0) { | ||
| 407 | firsterr = torture_shuffle_init(shuffle_interval); | ||
| 408 | if (firsterr) | ||
| 409 | goto unwind; | ||
| 410 | } | ||
| 411 | if (shutdown_secs > 0) { | ||
| 412 | firsterr = torture_shutdown_init(shutdown_secs, | ||
| 413 | lock_torture_cleanup); | ||
| 414 | if (firsterr) | ||
| 415 | goto unwind; | ||
| 416 | } | ||
| 417 | if (stutter > 0) { | ||
| 418 | firsterr = torture_stutter_init(stutter); | ||
| 419 | if (firsterr) | ||
| 420 | goto unwind; | ||
| 421 | } | ||
| 422 | |||
| 423 | writer_tasks = kzalloc(nrealwriters_stress * sizeof(writer_tasks[0]), | ||
| 424 | GFP_KERNEL); | ||
| 425 | if (writer_tasks == NULL) { | ||
| 426 | VERBOSE_TOROUT_ERRSTRING("writer_tasks: Out of memory"); | ||
| 427 | firsterr = -ENOMEM; | ||
| 428 | goto unwind; | ||
| 429 | } | ||
| 430 | for (i = 0; i < nrealwriters_stress; i++) { | ||
| 431 | firsterr = torture_create_kthread(lock_torture_writer, &lwsa[i], | ||
| 432 | writer_tasks[i]); | ||
| 433 | if (firsterr) | ||
| 434 | goto unwind; | ||
| 435 | } | ||
| 436 | if (stat_interval > 0) { | ||
| 437 | firsterr = torture_create_kthread(lock_torture_stats, NULL, | ||
| 438 | stats_task); | ||
| 439 | if (firsterr) | ||
| 440 | goto unwind; | ||
| 441 | } | ||
| 442 | torture_init_end(); | ||
| 443 | return 0; | ||
| 444 | |||
| 445 | unwind: | ||
| 446 | torture_init_end(); | ||
| 447 | lock_torture_cleanup(); | ||
| 448 | return firsterr; | ||
| 449 | } | ||
| 450 | |||
| 451 | module_init(lock_torture_init); | ||
| 452 | module_exit(lock_torture_cleanup); | ||
diff --git a/kernel/locking/mcs_spinlock.c b/kernel/locking/mcs_spinlock.c new file mode 100644 index 000000000000..838dc9e00669 --- /dev/null +++ b/kernel/locking/mcs_spinlock.c | |||
| @@ -0,0 +1,178 @@ | |||
| 1 | |||
| 2 | #include <linux/percpu.h> | ||
| 3 | #include <linux/mutex.h> | ||
| 4 | #include <linux/sched.h> | ||
| 5 | #include "mcs_spinlock.h" | ||
| 6 | |||
| 7 | #ifdef CONFIG_SMP | ||
| 8 | |||
| 9 | /* | ||
| 10 | * An MCS like lock especially tailored for optimistic spinning for sleeping | ||
| 11 | * lock implementations (mutex, rwsem, etc). | ||
| 12 | * | ||
| 13 | * Using a single mcs node per CPU is safe because sleeping locks should not be | ||
| 14 | * called from interrupt context and we have preemption disabled while | ||
| 15 | * spinning. | ||
| 16 | */ | ||
| 17 | static DEFINE_PER_CPU_SHARED_ALIGNED(struct optimistic_spin_queue, osq_node); | ||
| 18 | |||
| 19 | /* | ||
| 20 | * Get a stable @node->next pointer, either for unlock() or unqueue() purposes. | ||
| 21 | * Can return NULL in case we were the last queued and we updated @lock instead. | ||
| 22 | */ | ||
| 23 | static inline struct optimistic_spin_queue * | ||
| 24 | osq_wait_next(struct optimistic_spin_queue **lock, | ||
| 25 | struct optimistic_spin_queue *node, | ||
| 26 | struct optimistic_spin_queue *prev) | ||
| 27 | { | ||
| 28 | struct optimistic_spin_queue *next = NULL; | ||
| 29 | |||
| 30 | for (;;) { | ||
| 31 | if (*lock == node && cmpxchg(lock, node, prev) == node) { | ||
| 32 | /* | ||
| 33 | * We were the last queued, we moved @lock back. @prev | ||
| 34 | * will now observe @lock and will complete its | ||
| 35 | * unlock()/unqueue(). | ||
| 36 | */ | ||
| 37 | break; | ||
| 38 | } | ||
| 39 | |||
| 40 | /* | ||
| 41 | * We must xchg() the @node->next value, because if we were to | ||
| 42 | * leave it in, a concurrent unlock()/unqueue() from | ||
| 43 | * @node->next might complete Step-A and think its @prev is | ||
| 44 | * still valid. | ||
| 45 | * | ||
| 46 | * If the concurrent unlock()/unqueue() wins the race, we'll | ||
| 47 | * wait for either @lock to point to us, through its Step-B, or | ||
| 48 | * wait for a new @node->next from its Step-C. | ||
| 49 | */ | ||
| 50 | if (node->next) { | ||
| 51 | next = xchg(&node->next, NULL); | ||
| 52 | if (next) | ||
| 53 | break; | ||
| 54 | } | ||
| 55 | |||
| 56 | arch_mutex_cpu_relax(); | ||
| 57 | } | ||
| 58 | |||
| 59 | return next; | ||
| 60 | } | ||
| 61 | |||
| 62 | bool osq_lock(struct optimistic_spin_queue **lock) | ||
| 63 | { | ||
| 64 | struct optimistic_spin_queue *node = this_cpu_ptr(&osq_node); | ||
| 65 | struct optimistic_spin_queue *prev, *next; | ||
| 66 | |||
| 67 | node->locked = 0; | ||
| 68 | node->next = NULL; | ||
| 69 | |||
| 70 | node->prev = prev = xchg(lock, node); | ||
| 71 | if (likely(prev == NULL)) | ||
| 72 | return true; | ||
| 73 | |||
| 74 | ACCESS_ONCE(prev->next) = node; | ||
| 75 | |||
| 76 | /* | ||
| 77 | * Normally @prev is untouchable after the above store; because at that | ||
| 78 | * moment unlock can proceed and wipe the node element from stack. | ||
| 79 | * | ||
| 80 | * However, since our nodes are static per-cpu storage, we're | ||
| 81 | * guaranteed their existence -- this allows us to apply | ||
| 82 | * cmpxchg in an attempt to undo our queueing. | ||
| 83 | */ | ||
| 84 | |||
| 85 | while (!smp_load_acquire(&node->locked)) { | ||
| 86 | /* | ||
| 87 | * If we need to reschedule bail... so we can block. | ||
| 88 | */ | ||
| 89 | if (need_resched()) | ||
| 90 | goto unqueue; | ||
| 91 | |||
| 92 | arch_mutex_cpu_relax(); | ||
| 93 | } | ||
| 94 | return true; | ||
| 95 | |||
| 96 | unqueue: | ||
| 97 | /* | ||
| 98 | * Step - A -- stabilize @prev | ||
| 99 | * | ||
| 100 | * Undo our @prev->next assignment; this will make @prev's | ||
| 101 | * unlock()/unqueue() wait for a next pointer since @lock points to us | ||
| 102 | * (or later). | ||
| 103 | */ | ||
| 104 | |||
| 105 | for (;;) { | ||
| 106 | if (prev->next == node && | ||
| 107 | cmpxchg(&prev->next, node, NULL) == node) | ||
| 108 | break; | ||
| 109 | |||
| 110 | /* | ||
| 111 | * We can only fail the cmpxchg() racing against an unlock(), | ||
| 112 | * in which case we should observe @node->locked becomming | ||
| 113 | * true. | ||
| 114 | */ | ||
| 115 | if (smp_load_acquire(&node->locked)) | ||
| 116 | return true; | ||
| 117 | |||
| 118 | arch_mutex_cpu_relax(); | ||
| 119 | |||
| 120 | /* | ||
| 121 | * Or we race against a concurrent unqueue()'s step-B, in which | ||
| 122 | * case its step-C will write us a new @node->prev pointer. | ||
| 123 | */ | ||
| 124 | prev = ACCESS_ONCE(node->prev); | ||
| 125 | } | ||
| 126 | |||
| 127 | /* | ||
| 128 | * Step - B -- stabilize @next | ||
| 129 | * | ||
| 130 | * Similar to unlock(), wait for @node->next or move @lock from @node | ||
| 131 | * back to @prev. | ||
| 132 | */ | ||
| 133 | |||
| 134 | next = osq_wait_next(lock, node, prev); | ||
| 135 | if (!next) | ||
| 136 | return false; | ||
| 137 | |||
| 138 | /* | ||
| 139 | * Step - C -- unlink | ||
| 140 | * | ||
| 141 | * @prev is stable because its still waiting for a new @prev->next | ||
| 142 | * pointer, @next is stable because our @node->next pointer is NULL and | ||
| 143 | * it will wait in Step-A. | ||
| 144 | */ | ||
| 145 | |||
| 146 | ACCESS_ONCE(next->prev) = prev; | ||
| 147 | ACCESS_ONCE(prev->next) = next; | ||
| 148 | |||
| 149 | return false; | ||
| 150 | } | ||
| 151 | |||
| 152 | void osq_unlock(struct optimistic_spin_queue **lock) | ||
| 153 | { | ||
| 154 | struct optimistic_spin_queue *node = this_cpu_ptr(&osq_node); | ||
| 155 | struct optimistic_spin_queue *next; | ||
| 156 | |||
| 157 | /* | ||
| 158 | * Fast path for the uncontended case. | ||
| 159 | */ | ||
| 160 | if (likely(cmpxchg(lock, node, NULL) == node)) | ||
| 161 | return; | ||
| 162 | |||
| 163 | /* | ||
| 164 | * Second most likely case. | ||
| 165 | */ | ||
| 166 | next = xchg(&node->next, NULL); | ||
| 167 | if (next) { | ||
| 168 | ACCESS_ONCE(next->locked) = 1; | ||
| 169 | return; | ||
| 170 | } | ||
| 171 | |||
| 172 | next = osq_wait_next(lock, node, NULL); | ||
| 173 | if (next) | ||
| 174 | ACCESS_ONCE(next->locked) = 1; | ||
| 175 | } | ||
| 176 | |||
| 177 | #endif | ||
| 178 | |||
diff --git a/kernel/locking/mcs_spinlock.h b/kernel/locking/mcs_spinlock.h new file mode 100644 index 000000000000..a2dbac4aca6b --- /dev/null +++ b/kernel/locking/mcs_spinlock.h | |||
| @@ -0,0 +1,129 @@ | |||
| 1 | /* | ||
| 2 | * MCS lock defines | ||
| 3 | * | ||
| 4 | * This file contains the main data structure and API definitions of MCS lock. | ||
| 5 | * | ||
| 6 | * The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spin-lock | ||
| 7 | * with the desirable properties of being fair, and with each cpu trying | ||
| 8 | * to acquire the lock spinning on a local variable. | ||
| 9 | * It avoids expensive cache bouncings that common test-and-set spin-lock | ||
| 10 | * implementations incur. | ||
| 11 | */ | ||
| 12 | #ifndef __LINUX_MCS_SPINLOCK_H | ||
| 13 | #define __LINUX_MCS_SPINLOCK_H | ||
| 14 | |||
| 15 | #include <asm/mcs_spinlock.h> | ||
| 16 | |||
| 17 | struct mcs_spinlock { | ||
| 18 | struct mcs_spinlock *next; | ||
| 19 | int locked; /* 1 if lock acquired */ | ||
| 20 | }; | ||
| 21 | |||
| 22 | #ifndef arch_mcs_spin_lock_contended | ||
| 23 | /* | ||
| 24 | * Using smp_load_acquire() provides a memory barrier that ensures | ||
| 25 | * subsequent operations happen after the lock is acquired. | ||
| 26 | */ | ||
| 27 | #define arch_mcs_spin_lock_contended(l) \ | ||
| 28 | do { \ | ||
| 29 | while (!(smp_load_acquire(l))) \ | ||
| 30 | arch_mutex_cpu_relax(); \ | ||
| 31 | } while (0) | ||
| 32 | #endif | ||
| 33 | |||
| 34 | #ifndef arch_mcs_spin_unlock_contended | ||
| 35 | /* | ||
| 36 | * smp_store_release() provides a memory barrier to ensure all | ||
| 37 | * operations in the critical section has been completed before | ||
| 38 | * unlocking. | ||
| 39 | */ | ||
| 40 | #define arch_mcs_spin_unlock_contended(l) \ | ||
| 41 | smp_store_release((l), 1) | ||
| 42 | #endif | ||
| 43 | |||
| 44 | /* | ||
| 45 | * Note: the smp_load_acquire/smp_store_release pair is not | ||
| 46 | * sufficient to form a full memory barrier across | ||
| 47 | * cpus for many architectures (except x86) for mcs_unlock and mcs_lock. | ||
| 48 | * For applications that need a full barrier across multiple cpus | ||
| 49 | * with mcs_unlock and mcs_lock pair, smp_mb__after_unlock_lock() should be | ||
| 50 | * used after mcs_lock. | ||
| 51 | */ | ||
| 52 | |||
| 53 | /* | ||
| 54 | * In order to acquire the lock, the caller should declare a local node and | ||
| 55 | * pass a reference of the node to this function in addition to the lock. | ||
| 56 | * If the lock has already been acquired, then this will proceed to spin | ||
| 57 | * on this node->locked until the previous lock holder sets the node->locked | ||
| 58 | * in mcs_spin_unlock(). | ||
| 59 | * | ||
| 60 | * We don't inline mcs_spin_lock() so that perf can correctly account for the | ||
| 61 | * time spent in this lock function. | ||
| 62 | */ | ||
| 63 | static inline | ||
| 64 | void mcs_spin_lock(struct mcs_spinlock **lock, struct mcs_spinlock *node) | ||
| 65 | { | ||
| 66 | struct mcs_spinlock *prev; | ||
| 67 | |||
| 68 | /* Init node */ | ||
| 69 | node->locked = 0; | ||
| 70 | node->next = NULL; | ||
| 71 | |||
| 72 | prev = xchg(lock, node); | ||
| 73 | if (likely(prev == NULL)) { | ||
| 74 | /* | ||
| 75 | * Lock acquired, don't need to set node->locked to 1. Threads | ||
| 76 | * only spin on its own node->locked value for lock acquisition. | ||
| 77 | * However, since this thread can immediately acquire the lock | ||
| 78 | * and does not proceed to spin on its own node->locked, this | ||
| 79 | * value won't be used. If a debug mode is needed to | ||
| 80 | * audit lock status, then set node->locked value here. | ||
| 81 | */ | ||
| 82 | return; | ||
| 83 | } | ||
| 84 | ACCESS_ONCE(prev->next) = node; | ||
| 85 | |||
| 86 | /* Wait until the lock holder passes the lock down. */ | ||
| 87 | arch_mcs_spin_lock_contended(&node->locked); | ||
| 88 | } | ||
| 89 | |||
| 90 | /* | ||
| 91 | * Releases the lock. The caller should pass in the corresponding node that | ||
| 92 | * was used to acquire the lock. | ||
| 93 | */ | ||
| 94 | static inline | ||
| 95 | void mcs_spin_unlock(struct mcs_spinlock **lock, struct mcs_spinlock *node) | ||
| 96 | { | ||
| 97 | struct mcs_spinlock *next = ACCESS_ONCE(node->next); | ||
| 98 | |||
| 99 | if (likely(!next)) { | ||
| 100 | /* | ||
| 101 | * Release the lock by setting it to NULL | ||
| 102 | */ | ||
| 103 | if (likely(cmpxchg(lock, node, NULL) == node)) | ||
| 104 | return; | ||
| 105 | /* Wait until the next pointer is set */ | ||
| 106 | while (!(next = ACCESS_ONCE(node->next))) | ||
| 107 | arch_mutex_cpu_relax(); | ||
| 108 | } | ||
| 109 | |||
| 110 | /* Pass lock to next waiter. */ | ||
| 111 | arch_mcs_spin_unlock_contended(&next->locked); | ||
| 112 | } | ||
| 113 | |||
| 114 | /* | ||
| 115 | * Cancellable version of the MCS lock above. | ||
| 116 | * | ||
| 117 | * Intended for adaptive spinning of sleeping locks: | ||
| 118 | * mutex_lock()/rwsem_down_{read,write}() etc. | ||
| 119 | */ | ||
| 120 | |||
| 121 | struct optimistic_spin_queue { | ||
| 122 | struct optimistic_spin_queue *next, *prev; | ||
| 123 | int locked; /* 1 if lock acquired */ | ||
| 124 | }; | ||
| 125 | |||
| 126 | extern bool osq_lock(struct optimistic_spin_queue **lock); | ||
| 127 | extern void osq_unlock(struct optimistic_spin_queue **lock); | ||
| 128 | |||
| 129 | #endif /* __LINUX_MCS_SPINLOCK_H */ | ||
diff --git a/kernel/locking/mutex-debug.c b/kernel/locking/mutex-debug.c index faf6f5b53e77..e1191c996c59 100644 --- a/kernel/locking/mutex-debug.c +++ b/kernel/locking/mutex-debug.c | |||
| @@ -83,6 +83,12 @@ void debug_mutex_unlock(struct mutex *lock) | |||
| 83 | 83 | ||
| 84 | DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next); | 84 | DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next); |
| 85 | mutex_clear_owner(lock); | 85 | mutex_clear_owner(lock); |
| 86 | |||
| 87 | /* | ||
| 88 | * __mutex_slowpath_needs_to_unlock() is explicitly 0 for debug | ||
| 89 | * mutexes so that we can do it here after we've verified state. | ||
| 90 | */ | ||
| 91 | atomic_set(&lock->count, 1); | ||
| 86 | } | 92 | } |
| 87 | 93 | ||
| 88 | void debug_mutex_init(struct mutex *lock, const char *name, | 94 | void debug_mutex_init(struct mutex *lock, const char *name, |
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index 4dd6e4c219de..bc73d33c6760 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c | |||
| @@ -25,6 +25,7 @@ | |||
| 25 | #include <linux/spinlock.h> | 25 | #include <linux/spinlock.h> |
| 26 | #include <linux/interrupt.h> | 26 | #include <linux/interrupt.h> |
| 27 | #include <linux/debug_locks.h> | 27 | #include <linux/debug_locks.h> |
| 28 | #include "mcs_spinlock.h" | ||
| 28 | 29 | ||
| 29 | /* | 30 | /* |
| 30 | * In the DEBUG case we are using the "NULL fastpath" for mutexes, | 31 | * In the DEBUG case we are using the "NULL fastpath" for mutexes, |
| @@ -33,6 +34,13 @@ | |||
| 33 | #ifdef CONFIG_DEBUG_MUTEXES | 34 | #ifdef CONFIG_DEBUG_MUTEXES |
| 34 | # include "mutex-debug.h" | 35 | # include "mutex-debug.h" |
| 35 | # include <asm-generic/mutex-null.h> | 36 | # include <asm-generic/mutex-null.h> |
| 37 | /* | ||
| 38 | * Must be 0 for the debug case so we do not do the unlock outside of the | ||
| 39 | * wait_lock region. debug_mutex_unlock() will do the actual unlock in this | ||
| 40 | * case. | ||
| 41 | */ | ||
| 42 | # undef __mutex_slowpath_needs_to_unlock | ||
| 43 | # define __mutex_slowpath_needs_to_unlock() 0 | ||
| 36 | #else | 44 | #else |
| 37 | # include "mutex.h" | 45 | # include "mutex.h" |
| 38 | # include <asm/mutex.h> | 46 | # include <asm/mutex.h> |
| @@ -52,7 +60,7 @@ __mutex_init(struct mutex *lock, const char *name, struct lock_class_key *key) | |||
| 52 | INIT_LIST_HEAD(&lock->wait_list); | 60 | INIT_LIST_HEAD(&lock->wait_list); |
| 53 | mutex_clear_owner(lock); | 61 | mutex_clear_owner(lock); |
| 54 | #ifdef CONFIG_MUTEX_SPIN_ON_OWNER | 62 | #ifdef CONFIG_MUTEX_SPIN_ON_OWNER |
| 55 | lock->spin_mlock = NULL; | 63 | lock->osq = NULL; |
| 56 | #endif | 64 | #endif |
| 57 | 65 | ||
| 58 | debug_mutex_init(lock, name, key); | 66 | debug_mutex_init(lock, name, key); |
| @@ -67,8 +75,7 @@ EXPORT_SYMBOL(__mutex_init); | |||
| 67 | * We also put the fastpath first in the kernel image, to make sure the | 75 | * We also put the fastpath first in the kernel image, to make sure the |
| 68 | * branch is predicted by the CPU as default-untaken. | 76 | * branch is predicted by the CPU as default-untaken. |
| 69 | */ | 77 | */ |
| 70 | static __used noinline void __sched | 78 | __visible void __sched __mutex_lock_slowpath(atomic_t *lock_count); |
| 71 | __mutex_lock_slowpath(atomic_t *lock_count); | ||
| 72 | 79 | ||
| 73 | /** | 80 | /** |
| 74 | * mutex_lock - acquire the mutex | 81 | * mutex_lock - acquire the mutex |
| @@ -111,54 +118,7 @@ EXPORT_SYMBOL(mutex_lock); | |||
| 111 | * more or less simultaneously, the spinners need to acquire a MCS lock | 118 | * more or less simultaneously, the spinners need to acquire a MCS lock |
| 112 | * first before spinning on the owner field. | 119 | * first before spinning on the owner field. |
| 113 | * | 120 | * |
| 114 | * We don't inline mspin_lock() so that perf can correctly account for the | ||
| 115 | * time spent in this lock function. | ||
| 116 | */ | 121 | */ |
| 117 | struct mspin_node { | ||
| 118 | struct mspin_node *next ; | ||
| 119 | int locked; /* 1 if lock acquired */ | ||
| 120 | }; | ||
| 121 | #define MLOCK(mutex) ((struct mspin_node **)&((mutex)->spin_mlock)) | ||
| 122 | |||
| 123 | static noinline | ||
| 124 | void mspin_lock(struct mspin_node **lock, struct mspin_node *node) | ||
| 125 | { | ||
| 126 | struct mspin_node *prev; | ||
| 127 | |||
| 128 | /* Init node */ | ||
| 129 | node->locked = 0; | ||
| 130 | node->next = NULL; | ||
| 131 | |||
| 132 | prev = xchg(lock, node); | ||
| 133 | if (likely(prev == NULL)) { | ||
| 134 | /* Lock acquired */ | ||
| 135 | node->locked = 1; | ||
| 136 | return; | ||
| 137 | } | ||
| 138 | ACCESS_ONCE(prev->next) = node; | ||
| 139 | smp_wmb(); | ||
| 140 | /* Wait until the lock holder passes the lock down */ | ||
| 141 | while (!ACCESS_ONCE(node->locked)) | ||
| 142 | arch_mutex_cpu_relax(); | ||
| 143 | } | ||
| 144 | |||
| 145 | static void mspin_unlock(struct mspin_node **lock, struct mspin_node *node) | ||
| 146 | { | ||
| 147 | struct mspin_node *next = ACCESS_ONCE(node->next); | ||
| 148 | |||
| 149 | if (likely(!next)) { | ||
| 150 | /* | ||
| 151 | * Release the lock by setting it to NULL | ||
| 152 | */ | ||
| 153 | if (cmpxchg(lock, node, NULL) == node) | ||
| 154 | return; | ||
| 155 | /* Wait until the next pointer is set */ | ||
| 156 | while (!(next = ACCESS_ONCE(node->next))) | ||
| 157 | arch_mutex_cpu_relax(); | ||
| 158 | } | ||
| 159 | ACCESS_ONCE(next->locked) = 1; | ||
| 160 | smp_wmb(); | ||
| 161 | } | ||
| 162 | 122 | ||
| 163 | /* | 123 | /* |
| 164 | * Mutex spinning code migrated from kernel/sched/core.c | 124 | * Mutex spinning code migrated from kernel/sched/core.c |
| @@ -212,6 +172,9 @@ static inline int mutex_can_spin_on_owner(struct mutex *lock) | |||
| 212 | struct task_struct *owner; | 172 | struct task_struct *owner; |
| 213 | int retval = 1; | 173 | int retval = 1; |
| 214 | 174 | ||
| 175 | if (need_resched()) | ||
| 176 | return 0; | ||
| 177 | |||
| 215 | rcu_read_lock(); | 178 | rcu_read_lock(); |
| 216 | owner = ACCESS_ONCE(lock->owner); | 179 | owner = ACCESS_ONCE(lock->owner); |
| 217 | if (owner) | 180 | if (owner) |
| @@ -225,7 +188,8 @@ static inline int mutex_can_spin_on_owner(struct mutex *lock) | |||
| 225 | } | 188 | } |
| 226 | #endif | 189 | #endif |
| 227 | 190 | ||
| 228 | static __used noinline void __sched __mutex_unlock_slowpath(atomic_t *lock_count); | 191 | __visible __used noinline |
| 192 | void __sched __mutex_unlock_slowpath(atomic_t *lock_count); | ||
| 229 | 193 | ||
| 230 | /** | 194 | /** |
| 231 | * mutex_unlock - release the mutex | 195 | * mutex_unlock - release the mutex |
| @@ -446,9 +410,11 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, | |||
| 446 | if (!mutex_can_spin_on_owner(lock)) | 410 | if (!mutex_can_spin_on_owner(lock)) |
| 447 | goto slowpath; | 411 | goto slowpath; |
| 448 | 412 | ||
| 413 | if (!osq_lock(&lock->osq)) | ||
| 414 | goto slowpath; | ||
| 415 | |||
| 449 | for (;;) { | 416 | for (;;) { |
| 450 | struct task_struct *owner; | 417 | struct task_struct *owner; |
| 451 | struct mspin_node node; | ||
| 452 | 418 | ||
| 453 | if (use_ww_ctx && ww_ctx->acquired > 0) { | 419 | if (use_ww_ctx && ww_ctx->acquired > 0) { |
| 454 | struct ww_mutex *ww; | 420 | struct ww_mutex *ww; |
| @@ -463,19 +429,16 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, | |||
| 463 | * performed the optimistic spinning cannot be done. | 429 | * performed the optimistic spinning cannot be done. |
| 464 | */ | 430 | */ |
| 465 | if (ACCESS_ONCE(ww->ctx)) | 431 | if (ACCESS_ONCE(ww->ctx)) |
| 466 | goto slowpath; | 432 | break; |
| 467 | } | 433 | } |
| 468 | 434 | ||
| 469 | /* | 435 | /* |
| 470 | * If there's an owner, wait for it to either | 436 | * If there's an owner, wait for it to either |
| 471 | * release the lock or go to sleep. | 437 | * release the lock or go to sleep. |
| 472 | */ | 438 | */ |
| 473 | mspin_lock(MLOCK(lock), &node); | ||
| 474 | owner = ACCESS_ONCE(lock->owner); | 439 | owner = ACCESS_ONCE(lock->owner); |
| 475 | if (owner && !mutex_spin_on_owner(lock, owner)) { | 440 | if (owner && !mutex_spin_on_owner(lock, owner)) |
| 476 | mspin_unlock(MLOCK(lock), &node); | 441 | break; |
| 477 | goto slowpath; | ||
| 478 | } | ||
| 479 | 442 | ||
| 480 | if ((atomic_read(&lock->count) == 1) && | 443 | if ((atomic_read(&lock->count) == 1) && |
| 481 | (atomic_cmpxchg(&lock->count, 1, 0) == 1)) { | 444 | (atomic_cmpxchg(&lock->count, 1, 0) == 1)) { |
| @@ -488,11 +451,10 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, | |||
| 488 | } | 451 | } |
| 489 | 452 | ||
| 490 | mutex_set_owner(lock); | 453 | mutex_set_owner(lock); |
| 491 | mspin_unlock(MLOCK(lock), &node); | 454 | osq_unlock(&lock->osq); |
| 492 | preempt_enable(); | 455 | preempt_enable(); |
| 493 | return 0; | 456 | return 0; |
| 494 | } | 457 | } |
| 495 | mspin_unlock(MLOCK(lock), &node); | ||
| 496 | 458 | ||
| 497 | /* | 459 | /* |
| 498 | * When there's no owner, we might have preempted between the | 460 | * When there's no owner, we might have preempted between the |
| @@ -501,7 +463,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, | |||
| 501 | * the owner complete. | 463 | * the owner complete. |
| 502 | */ | 464 | */ |
| 503 | if (!owner && (need_resched() || rt_task(task))) | 465 | if (!owner && (need_resched() || rt_task(task))) |
| 504 | goto slowpath; | 466 | break; |
| 505 | 467 | ||
| 506 | /* | 468 | /* |
| 507 | * The cpu_relax() call is a compiler barrier which forces | 469 | * The cpu_relax() call is a compiler barrier which forces |
| @@ -511,7 +473,15 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass, | |||
| 511 | */ | 473 | */ |
| 512 | arch_mutex_cpu_relax(); | 474 | arch_mutex_cpu_relax(); |
| 513 | } | 475 | } |
| 476 | osq_unlock(&lock->osq); | ||
| 514 | slowpath: | 477 | slowpath: |
| 478 | /* | ||
| 479 | * If we fell out of the spin path because of need_resched(), | ||
| 480 | * reschedule now, before we try-lock the mutex. This avoids getting | ||
| 481 | * scheduled out right after we obtained the mutex. | ||
| 482 | */ | ||
| 483 | if (need_resched()) | ||
| 484 | schedule_preempt_disabled(); | ||
| 515 | #endif | 485 | #endif |
| 516 | spin_lock_mutex(&lock->wait_lock, flags); | 486 | spin_lock_mutex(&lock->wait_lock, flags); |
| 517 | 487 | ||
| @@ -717,10 +687,6 @@ __mutex_unlock_common_slowpath(atomic_t *lock_count, int nested) | |||
| 717 | struct mutex *lock = container_of(lock_count, struct mutex, count); | 687 | struct mutex *lock = container_of(lock_count, struct mutex, count); |
| 718 | unsigned long flags; | 688 | unsigned long flags; |
| 719 | 689 | ||
| 720 | spin_lock_mutex(&lock->wait_lock, flags); | ||
| 721 | mutex_release(&lock->dep_map, nested, _RET_IP_); | ||
| 722 | debug_mutex_unlock(lock); | ||
| 723 | |||
| 724 | /* | 690 | /* |
| 725 | * some architectures leave the lock unlocked in the fastpath failure | 691 | * some architectures leave the lock unlocked in the fastpath failure |
| 726 | * case, others need to leave it locked. In the later case we have to | 692 | * case, others need to leave it locked. In the later case we have to |
| @@ -729,6 +695,10 @@ __mutex_unlock_common_slowpath(atomic_t *lock_count, int nested) | |||
| 729 | if (__mutex_slowpath_needs_to_unlock()) | 695 | if (__mutex_slowpath_needs_to_unlock()) |
| 730 | atomic_set(&lock->count, 1); | 696 | atomic_set(&lock->count, 1); |
| 731 | 697 | ||
| 698 | spin_lock_mutex(&lock->wait_lock, flags); | ||
| 699 | mutex_release(&lock->dep_map, nested, _RET_IP_); | ||
| 700 | debug_mutex_unlock(lock); | ||
| 701 | |||
| 732 | if (!list_empty(&lock->wait_list)) { | 702 | if (!list_empty(&lock->wait_list)) { |
| 733 | /* get the first entry from the wait-list: */ | 703 | /* get the first entry from the wait-list: */ |
| 734 | struct mutex_waiter *waiter = | 704 | struct mutex_waiter *waiter = |
| @@ -746,7 +716,7 @@ __mutex_unlock_common_slowpath(atomic_t *lock_count, int nested) | |||
| 746 | /* | 716 | /* |
| 747 | * Release the lock, slowpath: | 717 | * Release the lock, slowpath: |
| 748 | */ | 718 | */ |
| 749 | static __used noinline void | 719 | __visible void |
| 750 | __mutex_unlock_slowpath(atomic_t *lock_count) | 720 | __mutex_unlock_slowpath(atomic_t *lock_count) |
| 751 | { | 721 | { |
| 752 | __mutex_unlock_common_slowpath(lock_count, 1); | 722 | __mutex_unlock_common_slowpath(lock_count, 1); |
| @@ -803,7 +773,7 @@ int __sched mutex_lock_killable(struct mutex *lock) | |||
| 803 | } | 773 | } |
| 804 | EXPORT_SYMBOL(mutex_lock_killable); | 774 | EXPORT_SYMBOL(mutex_lock_killable); |
| 805 | 775 | ||
| 806 | static __used noinline void __sched | 776 | __visible void __sched |
| 807 | __mutex_lock_slowpath(atomic_t *lock_count) | 777 | __mutex_lock_slowpath(atomic_t *lock_count) |
| 808 | { | 778 | { |
| 809 | struct mutex *lock = container_of(lock_count, struct mutex, count); | 779 | struct mutex *lock = container_of(lock_count, struct mutex, count); |
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 2e960a2bab81..aa4dff04b594 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c | |||
| @@ -213,6 +213,18 @@ struct task_struct *rt_mutex_get_top_task(struct task_struct *task) | |||
| 213 | } | 213 | } |
| 214 | 214 | ||
| 215 | /* | 215 | /* |
| 216 | * Called by sched_setscheduler() to check whether the priority change | ||
| 217 | * is overruled by a possible priority boosting. | ||
| 218 | */ | ||
| 219 | int rt_mutex_check_prio(struct task_struct *task, int newprio) | ||
| 220 | { | ||
| 221 | if (!task_has_pi_waiters(task)) | ||
| 222 | return 0; | ||
| 223 | |||
| 224 | return task_top_pi_waiter(task)->task->prio <= newprio; | ||
| 225 | } | ||
| 226 | |||
| 227 | /* | ||
| 216 | * Adjust the priority of a task, after its pi_waiters got modified. | 228 | * Adjust the priority of a task, after its pi_waiters got modified. |
| 217 | * | 229 | * |
| 218 | * This can be both boosting and unboosting. task->pi_lock must be held. | 230 | * This can be both boosting and unboosting. task->pi_lock must be held. |
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index 19c5fa95e0b4..1d66e08e897d 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c | |||
| @@ -143,6 +143,7 @@ __rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type) | |||
| 143 | /* | 143 | /* |
| 144 | * wait for the read lock to be granted | 144 | * wait for the read lock to be granted |
| 145 | */ | 145 | */ |
| 146 | __visible | ||
| 146 | struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem) | 147 | struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem) |
| 147 | { | 148 | { |
| 148 | long count, adjustment = -RWSEM_ACTIVE_READ_BIAS; | 149 | long count, adjustment = -RWSEM_ACTIVE_READ_BIAS; |
| @@ -190,6 +191,7 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem) | |||
| 190 | /* | 191 | /* |
| 191 | * wait until we successfully acquire the write lock | 192 | * wait until we successfully acquire the write lock |
| 192 | */ | 193 | */ |
| 194 | __visible | ||
| 193 | struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem) | 195 | struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem) |
| 194 | { | 196 | { |
| 195 | long count, adjustment = -RWSEM_ACTIVE_WRITE_BIAS; | 197 | long count, adjustment = -RWSEM_ACTIVE_WRITE_BIAS; |
| @@ -252,6 +254,7 @@ struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem) | |||
| 252 | * handle waking up a waiter on the semaphore | 254 | * handle waking up a waiter on the semaphore |
| 253 | * - up_read/up_write has decremented the active part of count if we come here | 255 | * - up_read/up_write has decremented the active part of count if we come here |
| 254 | */ | 256 | */ |
| 257 | __visible | ||
| 255 | struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem) | 258 | struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem) |
| 256 | { | 259 | { |
| 257 | unsigned long flags; | 260 | unsigned long flags; |
| @@ -272,6 +275,7 @@ struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem) | |||
| 272 | * - caller incremented waiting part of count and discovered it still negative | 275 | * - caller incremented waiting part of count and discovered it still negative |
| 273 | * - just wake up any readers at the front of the queue | 276 | * - just wake up any readers at the front of the queue |
| 274 | */ | 277 | */ |
| 278 | __visible | ||
| 275 | struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem) | 279 | struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem) |
| 276 | { | 280 | { |
| 277 | unsigned long flags; | 281 | unsigned long flags; |
