aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Documentation/mutex-design.txt252
-rw-r--r--arch/x86/Kconfig1
-rw-r--r--arch/x86/include/asm/qrwlock.h17
-rw-r--r--arch/x86/include/asm/spinlock.h4
-rw-r--r--arch/x86/include/asm/spinlock_types.h4
-rw-r--r--include/asm-generic/qrwlock.h166
-rw-r--r--include/asm-generic/qrwlock_types.h21
-rw-r--r--include/linux/rwsem.h25
-rw-r--r--kernel/Kconfig.locks7
-rw-r--r--kernel/locking/Makefile1
-rw-r--r--kernel/locking/qrwlock.c133
-rw-r--r--kernel/locking/rwsem-xadd.c225
-rw-r--r--kernel/locking/rwsem.c31
13 files changed, 737 insertions, 150 deletions
diff --git a/Documentation/mutex-design.txt b/Documentation/mutex-design.txt
index 1dfe62c3641d..ee231ed09ec6 100644
--- a/Documentation/mutex-design.txt
+++ b/Documentation/mutex-design.txt
@@ -1,139 +1,157 @@
1Generic Mutex Subsystem 1Generic Mutex Subsystem
2 2
3started by Ingo Molnar <mingo@redhat.com> 3started by Ingo Molnar <mingo@redhat.com>
4updated by Davidlohr Bueso <davidlohr@hp.com>
4 5
5 "Why on earth do we need a new mutex subsystem, and what's wrong 6What are mutexes?
6 with semaphores?" 7-----------------
7 8
8firstly, there's nothing wrong with semaphores. But if the simpler 9In the Linux kernel, mutexes refer to a particular locking primitive
9mutex semantics are sufficient for your code, then there are a couple 10that enforces serialization on shared memory systems, and not only to
10of advantages of mutexes: 11the generic term referring to 'mutual exclusion' found in academia
12or similar theoretical text books. Mutexes are sleeping locks which
13behave similarly to binary semaphores, and were introduced in 2006[1]
14as an alternative to these. This new data structure provided a number
15of advantages, including simpler interfaces, and at that time smaller
16code (see Disadvantages).
11 17
12 - 'struct mutex' is smaller on most architectures: E.g. on x86, 18[1] http://lwn.net/Articles/164802/
13 'struct semaphore' is 20 bytes, 'struct mutex' is 16 bytes.
14 A smaller structure size means less RAM footprint, and better
15 CPU-cache utilization.
16 19
17 - tighter code. On x86 i get the following .text sizes when 20Implementation
18 switching all mutex-alike semaphores in the kernel to the mutex 21--------------
19 subsystem:
20 22
21 text data bss dec hex filename 23Mutexes are represented by 'struct mutex', defined in include/linux/mutex.h
22 3280380 868188 396860 4545428 455b94 vmlinux-semaphore 24and implemented in kernel/locking/mutex.c. These locks use a three
23 3255329 865296 396732 4517357 44eded vmlinux-mutex 25state atomic counter (->count) to represent the different possible
26transitions that can occur during the lifetime of a lock:
24 27
25 that's 25051 bytes of code saved, or a 0.76% win - off the hottest 28 1: unlocked
26 codepaths of the kernel. (The .data savings are 2892 bytes, or 0.33%) 29 0: locked, no waiters
27 Smaller code means better icache footprint, which is one of the 30 negative: locked, with potential waiters
28 major optimization goals in the Linux kernel currently.
29 31
30 - the mutex subsystem is slightly faster and has better scalability for 32In its most basic form it also includes a wait-queue and a spinlock
31 contended workloads. On an 8-way x86 system, running a mutex-based 33that serializes access to it. CONFIG_SMP systems can also include
32 kernel and testing creat+unlink+close (of separate, per-task files) 34a pointer to the lock task owner (->owner) as well as a spinner MCS
33 in /tmp with 16 parallel tasks, the average number of ops/sec is: 35lock (->osq), both described below in (ii).
34 36
35 Semaphores: Mutexes: 37When acquiring a mutex, there are three possible paths that can be
38taken, depending on the state of the lock:
36 39
37 $ ./test-mutex V 16 10 $ ./test-mutex V 16 10 40(i) fastpath: tries to atomically acquire the lock by decrementing the
38 8 CPUs, running 16 tasks. 8 CPUs, running 16 tasks. 41 counter. If it was already taken by another task it goes to the next
39 checking VFS performance. checking VFS performance. 42 possible path. This logic is architecture specific. On x86-64, the
40 avg loops/sec: 34713 avg loops/sec: 84153 43 locking fastpath is 2 instructions:
41 CPU utilization: 63% CPU utilization: 22%
42 44
43 i.e. in this workload, the mutex based kernel was 2.4 times faster 45 0000000000000e10 <mutex_lock>:
44 than the semaphore based kernel, _and_ it also had 2.8 times less CPU 46 e21: f0 ff 0b lock decl (%rbx)
45 utilization. (In terms of 'ops per CPU cycle', the semaphore kernel 47 e24: 79 08 jns e2e <mutex_lock+0x1e>
46 performed 551 ops/sec per 1% of CPU time used, while the mutex kernel
47 performed 3825 ops/sec per 1% of CPU time used - it was 6.9 times
48 more efficient.)
49
50 the scalability difference is visible even on a 2-way P4 HT box:
51
52 Semaphores: Mutexes:
53
54 $ ./test-mutex V 16 10 $ ./test-mutex V 16 10
55 4 CPUs, running 16 tasks. 8 CPUs, running 16 tasks.
56 checking VFS performance. checking VFS performance.
57 avg loops/sec: 127659 avg loops/sec: 181082
58 CPU utilization: 100% CPU utilization: 34%
59
60 (the straight performance advantage of mutexes is 41%, the per-cycle
61 efficiency of mutexes is 4.1 times better.)
62
63 - there are no fastpath tradeoffs, the mutex fastpath is just as tight
64 as the semaphore fastpath. On x86, the locking fastpath is 2
65 instructions:
66
67 c0377ccb <mutex_lock>:
68 c0377ccb: f0 ff 08 lock decl (%eax)
69 c0377cce: 78 0e js c0377cde <.text..lock.mutex>
70 c0377cd0: c3 ret
71 48
72 the unlocking fastpath is equally tight: 49 the unlocking fastpath is equally tight:
73 50
74 c0377cd1 <mutex_unlock>: 51 0000000000000bc0 <mutex_unlock>:
75 c0377cd1: f0 ff 00 lock incl (%eax) 52 bc8: f0 ff 07 lock incl (%rdi)
76 c0377cd4: 7e 0f jle c0377ce5 <.text..lock.mutex+0x7> 53 bcb: 7f 0a jg bd7 <mutex_unlock+0x17>
77 c0377cd6: c3 ret 54
78 55
79 - 'struct mutex' semantics are well-defined and are enforced if 56(ii) midpath: aka optimistic spinning, tries to spin for acquisition
80 CONFIG_DEBUG_MUTEXES is turned on. Semaphores on the other hand have 57 while the lock owner is running and there are no other tasks ready
81 virtually no debugging code or instrumentation. The mutex subsystem 58 to run that have higher priority (need_resched). The rationale is
82 checks and enforces the following rules: 59 that if the lock owner is running, it is likely to release the lock
83 60 soon. The mutex spinners are queued up using MCS lock so that only
84 * - only one task can hold the mutex at a time 61 one spinner can compete for the mutex.
85 * - only the owner can unlock the mutex 62
86 * - multiple unlocks are not permitted 63 The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spinlock
87 * - recursive locking is not permitted 64 with the desirable properties of being fair and with each cpu trying
88 * - a mutex object must be initialized via the API 65 to acquire the lock spinning on a local variable. It avoids expensive
89 * - a mutex object must not be initialized via memset or copying 66 cacheline bouncing that common test-and-set spinlock implementations
90 * - task may not exit with mutex held 67 incur. An MCS-like lock is specially tailored for optimistic spinning
91 * - memory areas where held locks reside must not be freed 68 for sleeping lock implementation. An important feature of the customized
92 * - held mutexes must not be reinitialized 69 MCS lock is that it has the extra property that spinners are able to exit
93 * - mutexes may not be used in hardware or software interrupt 70 the MCS spinlock queue when they need to reschedule. This further helps
94 * contexts such as tasklets and timers 71 avoid situations where MCS spinners that need to reschedule would continue
95 72 waiting to spin on mutex owner, only to go directly to slowpath upon
96 furthermore, there are also convenience features in the debugging 73 obtaining the MCS lock.
97 code: 74
98 75
99 * - uses symbolic names of mutexes, whenever they are printed in debug output 76(iii) slowpath: last resort, if the lock is still unable to be acquired,
100 * - point-of-acquire tracking, symbolic lookup of function names 77 the task is added to the wait-queue and sleeps until woken up by the
101 * - list of all locks held in the system, printout of them 78 unlock path. Under normal circumstances it blocks as TASK_UNINTERRUPTIBLE.
102 * - owner tracking 79
103 * - detects self-recursing locks and prints out all relevant info 80While formally kernel mutexes are sleepable locks, it is path (ii) that
104 * - detects multi-task circular deadlocks and prints out all affected 81makes them more practically a hybrid type. By simply not interrupting a
105 * locks and tasks (and only those tasks) 82task and busy-waiting for a few cycles instead of immediately sleeping,
83the performance of this lock has been seen to significantly improve a
84number of workloads. Note that this technique is also used for rw-semaphores.
85
86Semantics
87---------
88
89The mutex subsystem checks and enforces the following rules:
90
91 - Only one task can hold the mutex at a time.
92 - Only the owner can unlock the mutex.
93 - Multiple unlocks are not permitted.
94 - Recursive locking/unlocking is not permitted.
95 - A mutex must only be initialized via the API (see below).
96 - A task may not exit with a mutex held.
97 - Memory areas where held locks reside must not be freed.
98 - Held mutexes must not be reinitialized.
99 - Mutexes may not be used in hardware or software interrupt
100 contexts such as tasklets and timers.
101
102These semantics are fully enforced when CONFIG DEBUG_MUTEXES is enabled.
103In addition, the mutex debugging code also implements a number of other
104features that make lock debugging easier and faster:
105
106 - Uses symbolic names of mutexes, whenever they are printed
107 in debug output.
108 - Point-of-acquire tracking, symbolic lookup of function names,
109 list of all locks held in the system, printout of them.
110 - Owner tracking.
111 - Detects self-recursing locks and prints out all relevant info.
112 - Detects multi-task circular deadlocks and prints out all affected
113 locks and tasks (and only those tasks).
114
115
116Interfaces
117----------
118Statically define the mutex:
119 DEFINE_MUTEX(name);
120
121Dynamically initialize the mutex:
122 mutex_init(mutex);
123
124Acquire the mutex, uninterruptible:
125 void mutex_lock(struct mutex *lock);
126 void mutex_lock_nested(struct mutex *lock, unsigned int subclass);
127 int mutex_trylock(struct mutex *lock);
128
129Acquire the mutex, interruptible:
130 int mutex_lock_interruptible_nested(struct mutex *lock,
131 unsigned int subclass);
132 int mutex_lock_interruptible(struct mutex *lock);
133
134Acquire the mutex, interruptible, if dec to 0:
135 int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock);
136
137Unlock the mutex:
138 void mutex_unlock(struct mutex *lock);
139
140Test if the mutex is taken:
141 int mutex_is_locked(struct mutex *lock);
106 142
107Disadvantages 143Disadvantages
108------------- 144-------------
109 145
110The stricter mutex API means you cannot use mutexes the same way you 146Unlike its original design and purpose, 'struct mutex' is larger than
111can use semaphores: e.g. they cannot be used from an interrupt context, 147most locks in the kernel. E.g: on x86-64 it is 40 bytes, almost twice
112nor can they be unlocked from a different context that which acquired 148as large as 'struct semaphore' (24 bytes) and 8 bytes shy of the
113it. [ I'm not aware of any other (e.g. performance) disadvantages from 149'struct rw_semaphore' variant. Larger structure sizes mean more CPU
114using mutexes at the moment, please let me know if you find any. ] 150cache and memory footprint.
115
116Implementation of mutexes
117-------------------------
118
119'struct mutex' is the new mutex type, defined in include/linux/mutex.h and
120implemented in kernel/locking/mutex.c. It is a counter-based mutex with a
121spinlock and a wait-list. The counter has 3 states: 1 for "unlocked", 0 for
122"locked" and negative numbers (usually -1) for "locked, potential waiters
123queued".
124
125the APIs of 'struct mutex' have been streamlined:
126
127 DEFINE_MUTEX(name);
128 151
129 mutex_init(mutex); 152When to use mutexes
153-------------------
130 154
131 void mutex_lock(struct mutex *lock); 155Unless the strict semantics of mutexes are unsuitable and/or the critical
132 int mutex_lock_interruptible(struct mutex *lock); 156region prevents the lock from being shared, always prefer them to any other
133 int mutex_trylock(struct mutex *lock); 157locking primitive.
134 void mutex_unlock(struct mutex *lock);
135 int mutex_is_locked(struct mutex *lock);
136 void mutex_lock_nested(struct mutex *lock, unsigned int subclass);
137 int mutex_lock_interruptible_nested(struct mutex *lock,
138 unsigned int subclass);
139 int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock);
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index b660088c220d..fcefdda5136d 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -121,6 +121,7 @@ config X86
121 select MODULES_USE_ELF_RELA if X86_64 121 select MODULES_USE_ELF_RELA if X86_64
122 select CLONE_BACKWARDS if X86_32 122 select CLONE_BACKWARDS if X86_32
123 select ARCH_USE_BUILTIN_BSWAP 123 select ARCH_USE_BUILTIN_BSWAP
124 select ARCH_USE_QUEUE_RWLOCK
124 select OLD_SIGSUSPEND3 if X86_32 || IA32_EMULATION 125 select OLD_SIGSUSPEND3 if X86_32 || IA32_EMULATION
125 select OLD_SIGACTION if X86_32 126 select OLD_SIGACTION if X86_32
126 select COMPAT_OLD_SIGACTION if IA32_EMULATION 127 select COMPAT_OLD_SIGACTION if IA32_EMULATION
diff --git a/arch/x86/include/asm/qrwlock.h b/arch/x86/include/asm/qrwlock.h
new file mode 100644
index 000000000000..70f46f07f94e
--- /dev/null
+++ b/arch/x86/include/asm/qrwlock.h
@@ -0,0 +1,17 @@
1#ifndef _ASM_X86_QRWLOCK_H
2#define _ASM_X86_QRWLOCK_H
3
4#include <asm-generic/qrwlock_types.h>
5
6#if !defined(CONFIG_X86_OOSTORE) && !defined(CONFIG_X86_PPRO_FENCE)
7#define queue_write_unlock queue_write_unlock
8static inline void queue_write_unlock(struct qrwlock *lock)
9{
10 barrier();
11 ACCESS_ONCE(*(u8 *)&lock->cnts) = 0;
12}
13#endif
14
15#include <asm-generic/qrwlock.h>
16
17#endif /* _ASM_X86_QRWLOCK_H */
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h
index 0f62f5482d91..54f1c8068c02 100644
--- a/arch/x86/include/asm/spinlock.h
+++ b/arch/x86/include/asm/spinlock.h
@@ -187,6 +187,7 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock)
187 cpu_relax(); 187 cpu_relax();
188} 188}
189 189
190#ifndef CONFIG_QUEUE_RWLOCK
190/* 191/*
191 * Read-write spinlocks, allowing multiple readers 192 * Read-write spinlocks, allowing multiple readers
192 * but only one writer. 193 * but only one writer.
@@ -269,6 +270,9 @@ static inline void arch_write_unlock(arch_rwlock_t *rw)
269 asm volatile(LOCK_PREFIX WRITE_LOCK_ADD(%1) "%0" 270 asm volatile(LOCK_PREFIX WRITE_LOCK_ADD(%1) "%0"
270 : "+m" (rw->write) : "i" (RW_LOCK_BIAS) : "memory"); 271 : "+m" (rw->write) : "i" (RW_LOCK_BIAS) : "memory");
271} 272}
273#else
274#include <asm/qrwlock.h>
275#endif /* CONFIG_QUEUE_RWLOCK */
272 276
273#define arch_read_lock_flags(lock, flags) arch_read_lock(lock) 277#define arch_read_lock_flags(lock, flags) arch_read_lock(lock)
274#define arch_write_lock_flags(lock, flags) arch_write_lock(lock) 278#define arch_write_lock_flags(lock, flags) arch_write_lock(lock)
diff --git a/arch/x86/include/asm/spinlock_types.h b/arch/x86/include/asm/spinlock_types.h
index 4f1bea19945b..73c4c007200f 100644
--- a/arch/x86/include/asm/spinlock_types.h
+++ b/arch/x86/include/asm/spinlock_types.h
@@ -34,6 +34,10 @@ typedef struct arch_spinlock {
34 34
35#define __ARCH_SPIN_LOCK_UNLOCKED { { 0 } } 35#define __ARCH_SPIN_LOCK_UNLOCKED { { 0 } }
36 36
37#ifdef CONFIG_QUEUE_RWLOCK
38#include <asm-generic/qrwlock_types.h>
39#else
37#include <asm/rwlock.h> 40#include <asm/rwlock.h>
41#endif
38 42
39#endif /* _ASM_X86_SPINLOCK_TYPES_H */ 43#endif /* _ASM_X86_SPINLOCK_TYPES_H */
diff --git a/include/asm-generic/qrwlock.h b/include/asm-generic/qrwlock.h
new file mode 100644
index 000000000000..6383d54bf983
--- /dev/null
+++ b/include/asm-generic/qrwlock.h
@@ -0,0 +1,166 @@
1/*
2 * Queue read/write lock
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * (C) Copyright 2013-2014 Hewlett-Packard Development Company, L.P.
15 *
16 * Authors: Waiman Long <waiman.long@hp.com>
17 */
18#ifndef __ASM_GENERIC_QRWLOCK_H
19#define __ASM_GENERIC_QRWLOCK_H
20
21#include <linux/atomic.h>
22#include <asm/barrier.h>
23#include <asm/processor.h>
24
25#include <asm-generic/qrwlock_types.h>
26
27/*
28 * Writer states & reader shift and bias
29 */
30#define _QW_WAITING 1 /* A writer is waiting */
31#define _QW_LOCKED 0xff /* A writer holds the lock */
32#define _QW_WMASK 0xff /* Writer mask */
33#define _QR_SHIFT 8 /* Reader count shift */
34#define _QR_BIAS (1U << _QR_SHIFT)
35
36/*
37 * External function declarations
38 */
39extern void queue_read_lock_slowpath(struct qrwlock *lock);
40extern void queue_write_lock_slowpath(struct qrwlock *lock);
41
42/**
43 * queue_read_can_lock- would read_trylock() succeed?
44 * @lock: Pointer to queue rwlock structure
45 */
46static inline int queue_read_can_lock(struct qrwlock *lock)
47{
48 return !(atomic_read(&lock->cnts) & _QW_WMASK);
49}
50
51/**
52 * queue_write_can_lock- would write_trylock() succeed?
53 * @lock: Pointer to queue rwlock structure
54 */
55static inline int queue_write_can_lock(struct qrwlock *lock)
56{
57 return !atomic_read(&lock->cnts);
58}
59
60/**
61 * queue_read_trylock - try to acquire read lock of a queue rwlock
62 * @lock : Pointer to queue rwlock structure
63 * Return: 1 if lock acquired, 0 if failed
64 */
65static inline int queue_read_trylock(struct qrwlock *lock)
66{
67 u32 cnts;
68
69 cnts = atomic_read(&lock->cnts);
70 if (likely(!(cnts & _QW_WMASK))) {
71 cnts = (u32)atomic_add_return(_QR_BIAS, &lock->cnts);
72 if (likely(!(cnts & _QW_WMASK)))
73 return 1;
74 atomic_sub(_QR_BIAS, &lock->cnts);
75 }
76 return 0;
77}
78
79/**
80 * queue_write_trylock - try to acquire write lock of a queue rwlock
81 * @lock : Pointer to queue rwlock structure
82 * Return: 1 if lock acquired, 0 if failed
83 */
84static inline int queue_write_trylock(struct qrwlock *lock)
85{
86 u32 cnts;
87
88 cnts = atomic_read(&lock->cnts);
89 if (unlikely(cnts))
90 return 0;
91
92 return likely(atomic_cmpxchg(&lock->cnts,
93 cnts, cnts | _QW_LOCKED) == cnts);
94}
95/**
96 * queue_read_lock - acquire read lock of a queue rwlock
97 * @lock: Pointer to queue rwlock structure
98 */
99static inline void queue_read_lock(struct qrwlock *lock)
100{
101 u32 cnts;
102
103 cnts = atomic_add_return(_QR_BIAS, &lock->cnts);
104 if (likely(!(cnts & _QW_WMASK)))
105 return;
106
107 /* The slowpath will decrement the reader count, if necessary. */
108 queue_read_lock_slowpath(lock);
109}
110
111/**
112 * queue_write_lock - acquire write lock of a queue rwlock
113 * @lock : Pointer to queue rwlock structure
114 */
115static inline void queue_write_lock(struct qrwlock *lock)
116{
117 /* Optimize for the unfair lock case where the fair flag is 0. */
118 if (atomic_cmpxchg(&lock->cnts, 0, _QW_LOCKED) == 0)
119 return;
120
121 queue_write_lock_slowpath(lock);
122}
123
124/**
125 * queue_read_unlock - release read lock of a queue rwlock
126 * @lock : Pointer to queue rwlock structure
127 */
128static inline void queue_read_unlock(struct qrwlock *lock)
129{
130 /*
131 * Atomically decrement the reader count
132 */
133 smp_mb__before_atomic();
134 atomic_sub(_QR_BIAS, &lock->cnts);
135}
136
137#ifndef queue_write_unlock
138/**
139 * queue_write_unlock - release write lock of a queue rwlock
140 * @lock : Pointer to queue rwlock structure
141 */
142static inline void queue_write_unlock(struct qrwlock *lock)
143{
144 /*
145 * If the writer field is atomic, it can be cleared directly.
146 * Otherwise, an atomic subtraction will be used to clear it.
147 */
148 smp_mb__before_atomic();
149 atomic_sub(_QW_LOCKED, &lock->cnts);
150}
151#endif
152
153/*
154 * Remapping rwlock architecture specific functions to the corresponding
155 * queue rwlock functions.
156 */
157#define arch_read_can_lock(l) queue_read_can_lock(l)
158#define arch_write_can_lock(l) queue_write_can_lock(l)
159#define arch_read_lock(l) queue_read_lock(l)
160#define arch_write_lock(l) queue_write_lock(l)
161#define arch_read_trylock(l) queue_read_trylock(l)
162#define arch_write_trylock(l) queue_write_trylock(l)
163#define arch_read_unlock(l) queue_read_unlock(l)
164#define arch_write_unlock(l) queue_write_unlock(l)
165
166#endif /* __ASM_GENERIC_QRWLOCK_H */
diff --git a/include/asm-generic/qrwlock_types.h b/include/asm-generic/qrwlock_types.h
new file mode 100644
index 000000000000..4d76f24df518
--- /dev/null
+++ b/include/asm-generic/qrwlock_types.h
@@ -0,0 +1,21 @@
1#ifndef __ASM_GENERIC_QRWLOCK_TYPES_H
2#define __ASM_GENERIC_QRWLOCK_TYPES_H
3
4#include <linux/types.h>
5#include <asm/spinlock_types.h>
6
7/*
8 * The queue read/write lock data structure
9 */
10
11typedef struct qrwlock {
12 atomic_t cnts;
13 arch_spinlock_t lock;
14} arch_rwlock_t;
15
16#define __ARCH_RW_LOCK_UNLOCKED { \
17 .cnts = ATOMIC_INIT(0), \
18 .lock = __ARCH_SPIN_LOCK_UNLOCKED, \
19}
20
21#endif /* __ASM_GENERIC_QRWLOCK_TYPES_H */
diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index 03f3b05e8ec1..8d79708146aa 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -16,6 +16,7 @@
16 16
17#include <linux/atomic.h> 17#include <linux/atomic.h>
18 18
19struct optimistic_spin_queue;
19struct rw_semaphore; 20struct rw_semaphore;
20 21
21#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK 22#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK
@@ -23,9 +24,17 @@ struct rw_semaphore;
23#else 24#else
24/* All arch specific implementations share the same struct */ 25/* All arch specific implementations share the same struct */
25struct rw_semaphore { 26struct rw_semaphore {
26 long count; 27 long count;
27 raw_spinlock_t wait_lock; 28 raw_spinlock_t wait_lock;
28 struct list_head wait_list; 29 struct list_head wait_list;
30#ifdef CONFIG_SMP
31 /*
32 * Write owner. Used as a speculative check to see
33 * if the owner is running on the cpu.
34 */
35 struct task_struct *owner;
36 struct optimistic_spin_queue *osq; /* spinner MCS lock */
37#endif
29#ifdef CONFIG_DEBUG_LOCK_ALLOC 38#ifdef CONFIG_DEBUG_LOCK_ALLOC
30 struct lockdep_map dep_map; 39 struct lockdep_map dep_map;
31#endif 40#endif
@@ -55,11 +64,21 @@ static inline int rwsem_is_locked(struct rw_semaphore *sem)
55# define __RWSEM_DEP_MAP_INIT(lockname) 64# define __RWSEM_DEP_MAP_INIT(lockname)
56#endif 65#endif
57 66
67#if defined(CONFIG_SMP) && !defined(CONFIG_RWSEM_GENERIC_SPINLOCK)
68#define __RWSEM_INITIALIZER(name) \
69 { RWSEM_UNLOCKED_VALUE, \
70 __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock), \
71 LIST_HEAD_INIT((name).wait_list), \
72 NULL, /* owner */ \
73 NULL /* mcs lock */ \
74 __RWSEM_DEP_MAP_INIT(name) }
75#else
58#define __RWSEM_INITIALIZER(name) \ 76#define __RWSEM_INITIALIZER(name) \
59 { RWSEM_UNLOCKED_VALUE, \ 77 { RWSEM_UNLOCKED_VALUE, \
60 __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock), \ 78 __RAW_SPIN_LOCK_UNLOCKED(name.wait_lock), \
61 LIST_HEAD_INIT((name).wait_list) \ 79 LIST_HEAD_INIT((name).wait_list) \
62 __RWSEM_DEP_MAP_INIT(name) } 80 __RWSEM_DEP_MAP_INIT(name) }
81#endif
63 82
64#define DECLARE_RWSEM(name) \ 83#define DECLARE_RWSEM(name) \
65 struct rw_semaphore name = __RWSEM_INITIALIZER(name) 84 struct rw_semaphore name = __RWSEM_INITIALIZER(name)
diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks
index d2b32ac27a39..35536d9c0964 100644
--- a/kernel/Kconfig.locks
+++ b/kernel/Kconfig.locks
@@ -223,3 +223,10 @@ endif
223config MUTEX_SPIN_ON_OWNER 223config MUTEX_SPIN_ON_OWNER
224 def_bool y 224 def_bool y
225 depends on SMP && !DEBUG_MUTEXES 225 depends on SMP && !DEBUG_MUTEXES
226
227config ARCH_USE_QUEUE_RWLOCK
228 bool
229
230config QUEUE_RWLOCK
231 def_bool y if ARCH_USE_QUEUE_RWLOCK
232 depends on SMP
diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile
index b8bdcd4785b7..8541bfdfd232 100644
--- a/kernel/locking/Makefile
+++ b/kernel/locking/Makefile
@@ -24,4 +24,5 @@ obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o
24obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o 24obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
25obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o 25obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o
26obj-$(CONFIG_PERCPU_RWSEM) += percpu-rwsem.o 26obj-$(CONFIG_PERCPU_RWSEM) += percpu-rwsem.o
27obj-$(CONFIG_QUEUE_RWLOCK) += qrwlock.o
27obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o 28obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o
diff --git a/kernel/locking/qrwlock.c b/kernel/locking/qrwlock.c
new file mode 100644
index 000000000000..fb5b8ac411a5
--- /dev/null
+++ b/kernel/locking/qrwlock.c
@@ -0,0 +1,133 @@
1/*
2 * Queue read/write lock
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * (C) Copyright 2013-2014 Hewlett-Packard Development Company, L.P.
15 *
16 * Authors: Waiman Long <waiman.long@hp.com>
17 */
18#include <linux/smp.h>
19#include <linux/bug.h>
20#include <linux/cpumask.h>
21#include <linux/percpu.h>
22#include <linux/hardirq.h>
23#include <linux/mutex.h>
24#include <asm/qrwlock.h>
25
26/**
27 * rspin_until_writer_unlock - inc reader count & spin until writer is gone
28 * @lock : Pointer to queue rwlock structure
29 * @writer: Current queue rwlock writer status byte
30 *
31 * In interrupt context or at the head of the queue, the reader will just
32 * increment the reader count & wait until the writer releases the lock.
33 */
34static __always_inline void
35rspin_until_writer_unlock(struct qrwlock *lock, u32 cnts)
36{
37 while ((cnts & _QW_WMASK) == _QW_LOCKED) {
38 arch_mutex_cpu_relax();
39 cnts = smp_load_acquire((u32 *)&lock->cnts);
40 }
41}
42
43/**
44 * queue_read_lock_slowpath - acquire read lock of a queue rwlock
45 * @lock: Pointer to queue rwlock structure
46 */
47void queue_read_lock_slowpath(struct qrwlock *lock)
48{
49 u32 cnts;
50
51 /*
52 * Readers come here when they cannot get the lock without waiting
53 */
54 if (unlikely(in_interrupt())) {
55 /*
56 * Readers in interrupt context will spin until the lock is
57 * available without waiting in the queue.
58 */
59 cnts = smp_load_acquire((u32 *)&lock->cnts);
60 rspin_until_writer_unlock(lock, cnts);
61 return;
62 }
63 atomic_sub(_QR_BIAS, &lock->cnts);
64
65 /*
66 * Put the reader into the wait queue
67 */
68 arch_spin_lock(&lock->lock);
69
70 /*
71 * At the head of the wait queue now, wait until the writer state
72 * goes to 0 and then try to increment the reader count and get
73 * the lock. It is possible that an incoming writer may steal the
74 * lock in the interim, so it is necessary to check the writer byte
75 * to make sure that the write lock isn't taken.
76 */
77 while (atomic_read(&lock->cnts) & _QW_WMASK)
78 arch_mutex_cpu_relax();
79
80 cnts = atomic_add_return(_QR_BIAS, &lock->cnts) - _QR_BIAS;
81 rspin_until_writer_unlock(lock, cnts);
82
83 /*
84 * Signal the next one in queue to become queue head
85 */
86 arch_spin_unlock(&lock->lock);
87}
88EXPORT_SYMBOL(queue_read_lock_slowpath);
89
90/**
91 * queue_write_lock_slowpath - acquire write lock of a queue rwlock
92 * @lock : Pointer to queue rwlock structure
93 */
94void queue_write_lock_slowpath(struct qrwlock *lock)
95{
96 u32 cnts;
97
98 /* Put the writer into the wait queue */
99 arch_spin_lock(&lock->lock);
100
101 /* Try to acquire the lock directly if no reader is present */
102 if (!atomic_read(&lock->cnts) &&
103 (atomic_cmpxchg(&lock->cnts, 0, _QW_LOCKED) == 0))
104 goto unlock;
105
106 /*
107 * Set the waiting flag to notify readers that a writer is pending,
108 * or wait for a previous writer to go away.
109 */
110 for (;;) {
111 cnts = atomic_read(&lock->cnts);
112 if (!(cnts & _QW_WMASK) &&
113 (atomic_cmpxchg(&lock->cnts, cnts,
114 cnts | _QW_WAITING) == cnts))
115 break;
116
117 arch_mutex_cpu_relax();
118 }
119
120 /* When no more readers, set the locked flag */
121 for (;;) {
122 cnts = atomic_read(&lock->cnts);
123 if ((cnts == _QW_WAITING) &&
124 (atomic_cmpxchg(&lock->cnts, _QW_WAITING,
125 _QW_LOCKED) == _QW_WAITING))
126 break;
127
128 arch_mutex_cpu_relax();
129 }
130unlock:
131 arch_spin_unlock(&lock->lock);
132}
133EXPORT_SYMBOL(queue_write_lock_slowpath);
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index b4219ff87b8c..dacc32142fcc 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -5,11 +5,17 @@
5 * 5 *
6 * Writer lock-stealing by Alex Shi <alex.shi@intel.com> 6 * Writer lock-stealing by Alex Shi <alex.shi@intel.com>
7 * and Michel Lespinasse <walken@google.com> 7 * and Michel Lespinasse <walken@google.com>
8 *
9 * Optimistic spinning by Tim Chen <tim.c.chen@intel.com>
10 * and Davidlohr Bueso <davidlohr@hp.com>. Based on mutexes.
8 */ 11 */
9#include <linux/rwsem.h> 12#include <linux/rwsem.h>
10#include <linux/sched.h> 13#include <linux/sched.h>
11#include <linux/init.h> 14#include <linux/init.h>
12#include <linux/export.h> 15#include <linux/export.h>
16#include <linux/sched/rt.h>
17
18#include "mcs_spinlock.h"
13 19
14/* 20/*
15 * Guide to the rw_semaphore's count field for common values. 21 * Guide to the rw_semaphore's count field for common values.
@@ -76,6 +82,10 @@ void __init_rwsem(struct rw_semaphore *sem, const char *name,
76 sem->count = RWSEM_UNLOCKED_VALUE; 82 sem->count = RWSEM_UNLOCKED_VALUE;
77 raw_spin_lock_init(&sem->wait_lock); 83 raw_spin_lock_init(&sem->wait_lock);
78 INIT_LIST_HEAD(&sem->wait_list); 84 INIT_LIST_HEAD(&sem->wait_list);
85#ifdef CONFIG_SMP
86 sem->owner = NULL;
87 sem->osq = NULL;
88#endif
79} 89}
80 90
81EXPORT_SYMBOL(__init_rwsem); 91EXPORT_SYMBOL(__init_rwsem);
@@ -190,7 +200,7 @@ __rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type)
190} 200}
191 201
192/* 202/*
193 * wait for the read lock to be granted 203 * Wait for the read lock to be granted
194 */ 204 */
195__visible 205__visible
196struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem) 206struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
@@ -237,64 +247,221 @@ struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
237 return sem; 247 return sem;
238} 248}
239 249
250static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem)
251{
252 if (!(count & RWSEM_ACTIVE_MASK)) {
253 /* try acquiring the write lock */
254 if (sem->count == RWSEM_WAITING_BIAS &&
255 cmpxchg(&sem->count, RWSEM_WAITING_BIAS,
256 RWSEM_ACTIVE_WRITE_BIAS) == RWSEM_WAITING_BIAS) {
257 if (!list_is_singular(&sem->wait_list))
258 rwsem_atomic_update(RWSEM_WAITING_BIAS, sem);
259 return true;
260 }
261 }
262 return false;
263}
264
265#ifdef CONFIG_SMP
240/* 266/*
241 * wait until we successfully acquire the write lock 267 * Try to acquire write lock before the writer has been put on wait queue.
268 */
269static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem)
270{
271 long old, count = ACCESS_ONCE(sem->count);
272
273 while (true) {
274 if (!(count == 0 || count == RWSEM_WAITING_BIAS))
275 return false;
276
277 old = cmpxchg(&sem->count, count, count + RWSEM_ACTIVE_WRITE_BIAS);
278 if (old == count)
279 return true;
280
281 count = old;
282 }
283}
284
285static inline bool rwsem_can_spin_on_owner(struct rw_semaphore *sem)
286{
287 struct task_struct *owner;
288 bool on_cpu = true;
289
290 if (need_resched())
291 return 0;
292
293 rcu_read_lock();
294 owner = ACCESS_ONCE(sem->owner);
295 if (owner)
296 on_cpu = owner->on_cpu;
297 rcu_read_unlock();
298
299 /*
300 * If sem->owner is not set, the rwsem owner may have
301 * just acquired it and not set the owner yet or the rwsem
302 * has been released.
303 */
304 return on_cpu;
305}
306
307static inline bool owner_running(struct rw_semaphore *sem,
308 struct task_struct *owner)
309{
310 if (sem->owner != owner)
311 return false;
312
313 /*
314 * Ensure we emit the owner->on_cpu, dereference _after_ checking
315 * sem->owner still matches owner, if that fails, owner might
316 * point to free()d memory, if it still matches, the rcu_read_lock()
317 * ensures the memory stays valid.
318 */
319 barrier();
320
321 return owner->on_cpu;
322}
323
324static noinline
325bool rwsem_spin_on_owner(struct rw_semaphore *sem, struct task_struct *owner)
326{
327 rcu_read_lock();
328 while (owner_running(sem, owner)) {
329 if (need_resched())
330 break;
331
332 arch_mutex_cpu_relax();
333 }
334 rcu_read_unlock();
335
336 /*
337 * We break out the loop above on need_resched() or when the
338 * owner changed, which is a sign for heavy contention. Return
339 * success only when sem->owner is NULL.
340 */
341 return sem->owner == NULL;
342}
343
344static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
345{
346 struct task_struct *owner;
347 bool taken = false;
348
349 preempt_disable();
350
351 /* sem->wait_lock should not be held when doing optimistic spinning */
352 if (!rwsem_can_spin_on_owner(sem))
353 goto done;
354
355 if (!osq_lock(&sem->osq))
356 goto done;
357
358 while (true) {
359 owner = ACCESS_ONCE(sem->owner);
360 if (owner && !rwsem_spin_on_owner(sem, owner))
361 break;
362
363 /* wait_lock will be acquired if write_lock is obtained */
364 if (rwsem_try_write_lock_unqueued(sem)) {
365 taken = true;
366 break;
367 }
368
369 /*
370 * When there's no owner, we might have preempted between the
371 * owner acquiring the lock and setting the owner field. If
372 * we're an RT task that will live-lock because we won't let
373 * the owner complete.
374 */
375 if (!owner && (need_resched() || rt_task(current)))
376 break;
377
378 /*
379 * The cpu_relax() call is a compiler barrier which forces
380 * everything in this loop to be re-loaded. We don't need
381 * memory barriers as we'll eventually observe the right
382 * values at the cost of a few extra spins.
383 */
384 arch_mutex_cpu_relax();
385 }
386 osq_unlock(&sem->osq);
387done:
388 preempt_enable();
389 return taken;
390}
391
392#else
393static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
394{
395 return false;
396}
397#endif
398
399/*
400 * Wait until we successfully acquire the write lock
242 */ 401 */
243__visible 402__visible
244struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem) 403struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem)
245{ 404{
246 long count, adjustment = -RWSEM_ACTIVE_WRITE_BIAS; 405 long count;
406 bool waiting = true; /* any queued threads before us */
247 struct rwsem_waiter waiter; 407 struct rwsem_waiter waiter;
248 struct task_struct *tsk = current;
249 408
250 /* set up my own style of waitqueue */ 409 /* undo write bias from down_write operation, stop active locking */
251 waiter.task = tsk; 410 count = rwsem_atomic_update(-RWSEM_ACTIVE_WRITE_BIAS, sem);
411
412 /* do optimistic spinning and steal lock if possible */
413 if (rwsem_optimistic_spin(sem))
414 return sem;
415
416 /*
417 * Optimistic spinning failed, proceed to the slowpath
418 * and block until we can acquire the sem.
419 */
420 waiter.task = current;
252 waiter.type = RWSEM_WAITING_FOR_WRITE; 421 waiter.type = RWSEM_WAITING_FOR_WRITE;
253 422
254 raw_spin_lock_irq(&sem->wait_lock); 423 raw_spin_lock_irq(&sem->wait_lock);
424
425 /* account for this before adding a new element to the list */
255 if (list_empty(&sem->wait_list)) 426 if (list_empty(&sem->wait_list))
256 adjustment += RWSEM_WAITING_BIAS; 427 waiting = false;
428
257 list_add_tail(&waiter.list, &sem->wait_list); 429 list_add_tail(&waiter.list, &sem->wait_list);
258 430
259 /* we're now waiting on the lock, but no longer actively locking */ 431 /* we're now waiting on the lock, but no longer actively locking */
260 count = rwsem_atomic_update(adjustment, sem); 432 if (waiting) {
433 count = ACCESS_ONCE(sem->count);
261 434
262 /* If there were already threads queued before us and there are no 435 /*
263 * active writers, the lock must be read owned; so we try to wake 436 * If there were already threads queued before us and there are
264 * any read locks that were queued ahead of us. */ 437 * no active writers, the lock must be read owned; so we try to
265 if (count > RWSEM_WAITING_BIAS && 438 * wake any read locks that were queued ahead of us.
266 adjustment == -RWSEM_ACTIVE_WRITE_BIAS) 439 */
267 sem = __rwsem_do_wake(sem, RWSEM_WAKE_READERS); 440 if (count > RWSEM_WAITING_BIAS)
441 sem = __rwsem_do_wake(sem, RWSEM_WAKE_READERS);
442
443 } else
444 count = rwsem_atomic_update(RWSEM_WAITING_BIAS, sem);
268 445
269 /* wait until we successfully acquire the lock */ 446 /* wait until we successfully acquire the lock */
270 set_task_state(tsk, TASK_UNINTERRUPTIBLE); 447 set_current_state(TASK_UNINTERRUPTIBLE);
271 while (true) { 448 while (true) {
272 if (!(count & RWSEM_ACTIVE_MASK)) { 449 if (rwsem_try_write_lock(count, sem))
273 /* Try acquiring the write lock. */ 450 break;
274 count = RWSEM_ACTIVE_WRITE_BIAS;
275 if (!list_is_singular(&sem->wait_list))
276 count += RWSEM_WAITING_BIAS;
277
278 if (sem->count == RWSEM_WAITING_BIAS &&
279 cmpxchg(&sem->count, RWSEM_WAITING_BIAS, count) ==
280 RWSEM_WAITING_BIAS)
281 break;
282 }
283
284 raw_spin_unlock_irq(&sem->wait_lock); 451 raw_spin_unlock_irq(&sem->wait_lock);
285 452
286 /* Block until there are no active lockers. */ 453 /* Block until there are no active lockers. */
287 do { 454 do {
288 schedule(); 455 schedule();
289 set_task_state(tsk, TASK_UNINTERRUPTIBLE); 456 set_current_state(TASK_UNINTERRUPTIBLE);
290 } while ((count = sem->count) & RWSEM_ACTIVE_MASK); 457 } while ((count = sem->count) & RWSEM_ACTIVE_MASK);
291 458
292 raw_spin_lock_irq(&sem->wait_lock); 459 raw_spin_lock_irq(&sem->wait_lock);
293 } 460 }
461 __set_current_state(TASK_RUNNING);
294 462
295 list_del(&waiter.list); 463 list_del(&waiter.list);
296 raw_spin_unlock_irq(&sem->wait_lock); 464 raw_spin_unlock_irq(&sem->wait_lock);
297 tsk->state = TASK_RUNNING;
298 465
299 return sem; 466 return sem;
300} 467}
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index cfff1435bdfb..42f806de49d4 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -12,6 +12,27 @@
12 12
13#include <linux/atomic.h> 13#include <linux/atomic.h>
14 14
15#if defined(CONFIG_SMP) && defined(CONFIG_RWSEM_XCHGADD_ALGORITHM)
16static inline void rwsem_set_owner(struct rw_semaphore *sem)
17{
18 sem->owner = current;
19}
20
21static inline void rwsem_clear_owner(struct rw_semaphore *sem)
22{
23 sem->owner = NULL;
24}
25
26#else
27static inline void rwsem_set_owner(struct rw_semaphore *sem)
28{
29}
30
31static inline void rwsem_clear_owner(struct rw_semaphore *sem)
32{
33}
34#endif
35
15/* 36/*
16 * lock for reading 37 * lock for reading
17 */ 38 */
@@ -48,6 +69,7 @@ void __sched down_write(struct rw_semaphore *sem)
48 rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_); 69 rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
49 70
50 LOCK_CONTENDED(sem, __down_write_trylock, __down_write); 71 LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
72 rwsem_set_owner(sem);
51} 73}
52 74
53EXPORT_SYMBOL(down_write); 75EXPORT_SYMBOL(down_write);
@@ -59,8 +81,11 @@ int down_write_trylock(struct rw_semaphore *sem)
59{ 81{
60 int ret = __down_write_trylock(sem); 82 int ret = __down_write_trylock(sem);
61 83
62 if (ret == 1) 84 if (ret == 1) {
63 rwsem_acquire(&sem->dep_map, 0, 1, _RET_IP_); 85 rwsem_acquire(&sem->dep_map, 0, 1, _RET_IP_);
86 rwsem_set_owner(sem);
87 }
88
64 return ret; 89 return ret;
65} 90}
66 91
@@ -85,6 +110,7 @@ void up_write(struct rw_semaphore *sem)
85{ 110{
86 rwsem_release(&sem->dep_map, 1, _RET_IP_); 111 rwsem_release(&sem->dep_map, 1, _RET_IP_);
87 112
113 rwsem_clear_owner(sem);
88 __up_write(sem); 114 __up_write(sem);
89} 115}
90 116
@@ -99,6 +125,7 @@ void downgrade_write(struct rw_semaphore *sem)
99 * lockdep: a downgraded write will live on as a write 125 * lockdep: a downgraded write will live on as a write
100 * dependency. 126 * dependency.
101 */ 127 */
128 rwsem_clear_owner(sem);
102 __downgrade_write(sem); 129 __downgrade_write(sem);
103} 130}
104 131
@@ -122,6 +149,7 @@ void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest)
122 rwsem_acquire_nest(&sem->dep_map, 0, 0, nest, _RET_IP_); 149 rwsem_acquire_nest(&sem->dep_map, 0, 0, nest, _RET_IP_);
123 150
124 LOCK_CONTENDED(sem, __down_write_trylock, __down_write); 151 LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
152 rwsem_set_owner(sem);
125} 153}
126 154
127EXPORT_SYMBOL(_down_write_nest_lock); 155EXPORT_SYMBOL(_down_write_nest_lock);
@@ -141,6 +169,7 @@ void down_write_nested(struct rw_semaphore *sem, int subclass)
141 rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_); 169 rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_);
142 170
143 LOCK_CONTENDED(sem, __down_write_trylock, __down_write); 171 LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
172 rwsem_set_owner(sem);
144} 173}
145 174
146EXPORT_SYMBOL(down_write_nested); 175EXPORT_SYMBOL(down_write_nested);