diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-12-09 22:59:22 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-12-09 22:59:22 -0500 |
commit | 9c37f95936b6c169e89733747504879b06e77c24 (patch) | |
tree | 9d03d0c8f8b716d7d232975ca6e89f0f33cd1602 | |
parent | a0e4467726cd26bacb16f13d207ffcfa82ffc07d (diff) | |
parent | 78bff1c8684fb94f1ae7283688f90188b53fc433 (diff) |
Merge branch 'core-locking-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull locking tree changes from Ingo Molnar:
"Two changes: a documentation update and a ticket locks live lock fix"
* 'core-locking-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
x86/ticketlock: Fix spin_unlock_wait() livelock
locking/lglocks: Add documentation of current lglocks implementation
-rw-r--r-- | Documentation/locking/lglock.txt | 166 | ||||
-rw-r--r-- | arch/x86/include/asm/spinlock.h | 14 |
2 files changed, 179 insertions, 1 deletions
diff --git a/Documentation/locking/lglock.txt b/Documentation/locking/lglock.txt new file mode 100644 index 000000000000..a6971e34fabe --- /dev/null +++ b/Documentation/locking/lglock.txt | |||
@@ -0,0 +1,166 @@ | |||
1 | lglock - local/global locks for mostly local access patterns | ||
2 | ------------------------------------------------------------ | ||
3 | |||
4 | Origin: Nick Piggin's VFS scalability series introduced during | ||
5 | 2.6.35++ [1] [2] | ||
6 | Location: kernel/locking/lglock.c | ||
7 | include/linux/lglock.h | ||
8 | Users: currently only the VFS and stop_machine related code | ||
9 | |||
10 | Design Goal: | ||
11 | ------------ | ||
12 | |||
13 | Improve scalability of globally used large data sets that are | ||
14 | distributed over all CPUs as per_cpu elements. | ||
15 | |||
16 | To manage global data structures that are partitioned over all CPUs | ||
17 | as per_cpu elements but can be mostly handled by CPU local actions | ||
18 | lglock will be used where the majority of accesses are cpu local | ||
19 | reading and occasional cpu local writing with very infrequent | ||
20 | global write access. | ||
21 | |||
22 | |||
23 | * deal with things locally whenever possible | ||
24 | - very fast access to the local per_cpu data | ||
25 | - reasonably fast access to specific per_cpu data on a different | ||
26 | CPU | ||
27 | * while making global action possible when needed | ||
28 | - by expensive access to all CPUs locks - effectively | ||
29 | resulting in a globally visible critical section. | ||
30 | |||
31 | Design: | ||
32 | ------- | ||
33 | |||
34 | Basically it is an array of per_cpu spinlocks with the | ||
35 | lg_local_lock/unlock accessing the local CPUs lock object and the | ||
36 | lg_local_lock_cpu/unlock_cpu accessing a remote CPUs lock object | ||
37 | the lg_local_lock has to disable preemption as migration protection so | ||
38 | that the reference to the local CPUs lock does not go out of scope. | ||
39 | Due to the lg_local_lock/unlock only touching cpu-local resources it | ||
40 | is fast. Taking the local lock on a different CPU will be more | ||
41 | expensive but still relatively cheap. | ||
42 | |||
43 | One can relax the migration constraints by acquiring the current | ||
44 | CPUs lock with lg_local_lock_cpu, remember the cpu, and release that | ||
45 | lock at the end of the critical section even if migrated. This should | ||
46 | give most of the performance benefits without inhibiting migration | ||
47 | though needs careful considerations for nesting of lglocks and | ||
48 | consideration of deadlocks with lg_global_lock. | ||
49 | |||
50 | The lg_global_lock/unlock locks all underlying spinlocks of all | ||
51 | possible CPUs (including those off-line). The preemption disable/enable | ||
52 | are needed in the non-RT kernels to prevent deadlocks like: | ||
53 | |||
54 | on cpu 1 | ||
55 | |||
56 | task A task B | ||
57 | lg_global_lock | ||
58 | got cpu 0 lock | ||
59 | <<<< preempt <<<< | ||
60 | lg_local_lock_cpu for cpu 0 | ||
61 | spin on cpu 0 lock | ||
62 | |||
63 | On -RT this deadlock scenario is resolved by the arch_spin_locks in the | ||
64 | lglocks being replaced by rt_mutexes which resolve the above deadlock | ||
65 | by boosting the lock-holder. | ||
66 | |||
67 | |||
68 | Implementation: | ||
69 | --------------- | ||
70 | |||
71 | The initial lglock implementation from Nick Piggin used some complex | ||
72 | macros to generate the lglock/brlock in lglock.h - they were later | ||
73 | turned into a set of functions by Andi Kleen [7]. The change to functions | ||
74 | was motivated by the presence of multiple lock users and also by them | ||
75 | being easier to maintain than the generating macros. This change to | ||
76 | functions is also the basis to eliminated the restriction of not | ||
77 | being initializeable in kernel modules (the remaining problem is that | ||
78 | locks are not explicitly initialized - see lockdep-design.txt) | ||
79 | |||
80 | Declaration and initialization: | ||
81 | ------------------------------- | ||
82 | |||
83 | #include <linux/lglock.h> | ||
84 | |||
85 | DEFINE_LGLOCK(name) | ||
86 | or: | ||
87 | DEFINE_STATIC_LGLOCK(name); | ||
88 | |||
89 | lg_lock_init(&name, "lockdep_name_string"); | ||
90 | |||
91 | on UP this is mapped to DEFINE_SPINLOCK(name) in both cases, note | ||
92 | also that as of 3.18-rc6 all declaration in use are of the _STATIC_ | ||
93 | variant (and it seems that the non-static was never in use). | ||
94 | lg_lock_init is initializing the lockdep map only. | ||
95 | |||
96 | Usage: | ||
97 | ------ | ||
98 | |||
99 | From the locking semantics it is a spinlock. It could be called a | ||
100 | locality aware spinlock. lg_local_* behaves like a per_cpu | ||
101 | spinlock and lg_global_* like a global spinlock. | ||
102 | No surprises in the API. | ||
103 | |||
104 | lg_local_lock(*lglock); | ||
105 | access to protected per_cpu object on this CPU | ||
106 | lg_local_unlock(*lglock); | ||
107 | |||
108 | lg_local_lock_cpu(*lglock, cpu); | ||
109 | access to protected per_cpu object on other CPU cpu | ||
110 | lg_local_unlock_cpu(*lglock, cpu); | ||
111 | |||
112 | lg_global_lock(*lglock); | ||
113 | access all protected per_cpu objects on all CPUs | ||
114 | lg_global_unlock(*lglock); | ||
115 | |||
116 | There are no _trylock variants of the lglocks. | ||
117 | |||
118 | Note that the lg_global_lock/unlock has to iterate over all possible | ||
119 | CPUs rather than the actually present CPUs or a CPU could go off-line | ||
120 | with a held lock [4] and that makes it very expensive. A discussion on | ||
121 | these issues can be found at [5] | ||
122 | |||
123 | Constraints: | ||
124 | ------------ | ||
125 | |||
126 | * currently the declaration of lglocks in kernel modules is not | ||
127 | possible, though this should be doable with little change. | ||
128 | * lglocks are not recursive. | ||
129 | * suitable for code that can do most operations on the CPU local | ||
130 | data and will very rarely need the global lock | ||
131 | * lg_global_lock/unlock is *very* expensive and does not scale | ||
132 | * on UP systems all lg_* primitives are simply spinlocks | ||
133 | * in PREEMPT_RT the spinlock becomes an rt-mutex and can sleep but | ||
134 | does not change the tasks state while sleeping [6]. | ||
135 | * in PREEMPT_RT the preempt_disable/enable in lg_local_lock/unlock | ||
136 | is downgraded to a migrate_disable/enable, the other | ||
137 | preempt_disable/enable are downgraded to barriers [6]. | ||
138 | The deadlock noted for non-RT above is resolved due to rt_mutexes | ||
139 | boosting the lock-holder in this case which arch_spin_locks do | ||
140 | not do. | ||
141 | |||
142 | lglocks were designed for very specific problems in the VFS and probably | ||
143 | only are the right answer in these corner cases. Any new user that looks | ||
144 | at lglocks probably wants to look at the seqlock and RCU alternatives as | ||
145 | her first choice. There are also efforts to resolve the RCU issues that | ||
146 | currently prevent using RCU in place of view remaining lglocks. | ||
147 | |||
148 | Note on brlock history: | ||
149 | ----------------------- | ||
150 | |||
151 | The 'Big Reader' read-write spinlocks were originally introduced by | ||
152 | Ingo Molnar in 2000 (2.4/2.5 kernel series) and removed in 2003. They | ||
153 | later were introduced by the VFS scalability patch set in 2.6 series | ||
154 | again as the "big reader lock" brlock [2] variant of lglock which has | ||
155 | been replaced by seqlock primitives or by RCU based primitives in the | ||
156 | 3.13 kernel series as was suggested in [3] in 2003. The brlock was | ||
157 | entirely removed in the 3.13 kernel series. | ||
158 | |||
159 | Link: 1 http://lkml.org/lkml/2010/8/2/81 | ||
160 | Link: 2 http://lwn.net/Articles/401738/ | ||
161 | Link: 3 http://lkml.org/lkml/2003/3/9/205 | ||
162 | Link: 4 https://lkml.org/lkml/2011/8/24/185 | ||
163 | Link: 5 http://lkml.org/lkml/2011/12/18/189 | ||
164 | Link: 6 https://www.kernel.org/pub/linux/kernel/projects/rt/ | ||
165 | patch series - lglocks-rt.patch.patch | ||
166 | Link: 7 http://lkml.org/lkml/2012/3/5/26 | ||
diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index 9295016485c9..a4efe477ceab 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h | |||
@@ -183,8 +183,20 @@ static __always_inline void arch_spin_lock_flags(arch_spinlock_t *lock, | |||
183 | 183 | ||
184 | static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) | 184 | static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) |
185 | { | 185 | { |
186 | while (arch_spin_is_locked(lock)) | 186 | __ticket_t head = ACCESS_ONCE(lock->tickets.head); |
187 | |||
188 | for (;;) { | ||
189 | struct __raw_tickets tmp = ACCESS_ONCE(lock->tickets); | ||
190 | /* | ||
191 | * We need to check "unlocked" in a loop, tmp.head == head | ||
192 | * can be false positive because of overflow. | ||
193 | */ | ||
194 | if (tmp.head == (tmp.tail & ~TICKET_SLOWPATH_FLAG) || | ||
195 | tmp.head != head) | ||
196 | break; | ||
197 | |||
187 | cpu_relax(); | 198 | cpu_relax(); |
199 | } | ||
188 | } | 200 | } |
189 | 201 | ||
190 | /* | 202 | /* |