diff options
62 files changed, 983 insertions, 1925 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 52cd9341e03c..7be412e1a380 100644 --- a/MAINTAINERS +++ b/MAINTAINERS | |||
@@ -9100,7 +9100,6 @@ F: arch/*/include/asm/spinlock*.h | |||
9100 | F: include/linux/rwlock*.h | 9100 | F: include/linux/rwlock*.h |
9101 | F: include/linux/mutex*.h | 9101 | F: include/linux/mutex*.h |
9102 | F: include/linux/rwsem*.h | 9102 | F: include/linux/rwsem*.h |
9103 | F: arch/*/include/asm/rwsem.h | ||
9104 | F: include/linux/seqlock.h | 9103 | F: include/linux/seqlock.h |
9105 | F: lib/locking*.[ch] | 9104 | F: lib/locking*.[ch] |
9106 | F: kernel/locking/ | 9105 | F: kernel/locking/ |
diff --git a/arch/Kconfig b/arch/Kconfig index a826843470ed..3ab446bd12ef 100644 --- a/arch/Kconfig +++ b/arch/Kconfig | |||
@@ -907,6 +907,15 @@ config HAVE_ARCH_PREL32_RELOCATIONS | |||
907 | config ARCH_USE_MEMREMAP_PROT | 907 | config ARCH_USE_MEMREMAP_PROT |
908 | bool | 908 | bool |
909 | 909 | ||
910 | config LOCK_EVENT_COUNTS | ||
911 | bool "Locking event counts collection" | ||
912 | depends on DEBUG_FS | ||
913 | ---help--- | ||
914 | Enable light-weight counting of various locking related events | ||
915 | in the system with minimal performance impact. This reduces | ||
916 | the chance of application behavior change because of timing | ||
917 | differences. The counts are reported via debugfs. | ||
918 | |||
910 | source "kernel/gcov/Kconfig" | 919 | source "kernel/gcov/Kconfig" |
911 | 920 | ||
912 | source "scripts/gcc-plugins/Kconfig" | 921 | source "scripts/gcc-plugins/Kconfig" |
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig index c7c976eb6407..f7b19b813a70 100644 --- a/arch/alpha/Kconfig +++ b/arch/alpha/Kconfig | |||
@@ -50,13 +50,6 @@ config MMU | |||
50 | bool | 50 | bool |
51 | default y | 51 | default y |
52 | 52 | ||
53 | config RWSEM_GENERIC_SPINLOCK | ||
54 | bool | ||
55 | |||
56 | config RWSEM_XCHGADD_ALGORITHM | ||
57 | bool | ||
58 | default y | ||
59 | |||
60 | config ARCH_HAS_ILOG2_U32 | 53 | config ARCH_HAS_ILOG2_U32 |
61 | bool | 54 | bool |
62 | default n | 55 | default n |
diff --git a/arch/alpha/include/asm/rwsem.h b/arch/alpha/include/asm/rwsem.h deleted file mode 100644 index cf8fc8f9a2ed..000000000000 --- a/arch/alpha/include/asm/rwsem.h +++ /dev/null | |||
@@ -1,211 +0,0 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
2 | #ifndef _ALPHA_RWSEM_H | ||
3 | #define _ALPHA_RWSEM_H | ||
4 | |||
5 | /* | ||
6 | * Written by Ivan Kokshaysky <ink@jurassic.park.msu.ru>, 2001. | ||
7 | * Based on asm-alpha/semaphore.h and asm-i386/rwsem.h | ||
8 | */ | ||
9 | |||
10 | #ifndef _LINUX_RWSEM_H | ||
11 | #error "please don't include asm/rwsem.h directly, use linux/rwsem.h instead" | ||
12 | #endif | ||
13 | |||
14 | #ifdef __KERNEL__ | ||
15 | |||
16 | #include <linux/compiler.h> | ||
17 | |||
18 | #define RWSEM_UNLOCKED_VALUE 0x0000000000000000L | ||
19 | #define RWSEM_ACTIVE_BIAS 0x0000000000000001L | ||
20 | #define RWSEM_ACTIVE_MASK 0x00000000ffffffffL | ||
21 | #define RWSEM_WAITING_BIAS (-0x0000000100000000L) | ||
22 | #define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS | ||
23 | #define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) | ||
24 | |||
25 | static inline int ___down_read(struct rw_semaphore *sem) | ||
26 | { | ||
27 | long oldcount; | ||
28 | #ifndef CONFIG_SMP | ||
29 | oldcount = sem->count.counter; | ||
30 | sem->count.counter += RWSEM_ACTIVE_READ_BIAS; | ||
31 | #else | ||
32 | long temp; | ||
33 | __asm__ __volatile__( | ||
34 | "1: ldq_l %0,%1\n" | ||
35 | " addq %0,%3,%2\n" | ||
36 | " stq_c %2,%1\n" | ||
37 | " beq %2,2f\n" | ||
38 | " mb\n" | ||
39 | ".subsection 2\n" | ||
40 | "2: br 1b\n" | ||
41 | ".previous" | ||
42 | :"=&r" (oldcount), "=m" (sem->count), "=&r" (temp) | ||
43 | :"Ir" (RWSEM_ACTIVE_READ_BIAS), "m" (sem->count) : "memory"); | ||
44 | #endif | ||
45 | return (oldcount < 0); | ||
46 | } | ||
47 | |||
48 | static inline void __down_read(struct rw_semaphore *sem) | ||
49 | { | ||
50 | if (unlikely(___down_read(sem))) | ||
51 | rwsem_down_read_failed(sem); | ||
52 | } | ||
53 | |||
54 | static inline int __down_read_killable(struct rw_semaphore *sem) | ||
55 | { | ||
56 | if (unlikely(___down_read(sem))) | ||
57 | if (IS_ERR(rwsem_down_read_failed_killable(sem))) | ||
58 | return -EINTR; | ||
59 | |||
60 | return 0; | ||
61 | } | ||
62 | |||
63 | /* | ||
64 | * trylock for reading -- returns 1 if successful, 0 if contention | ||
65 | */ | ||
66 | static inline int __down_read_trylock(struct rw_semaphore *sem) | ||
67 | { | ||
68 | long old, new, res; | ||
69 | |||
70 | res = atomic_long_read(&sem->count); | ||
71 | do { | ||
72 | new = res + RWSEM_ACTIVE_READ_BIAS; | ||
73 | if (new <= 0) | ||
74 | break; | ||
75 | old = res; | ||
76 | res = atomic_long_cmpxchg(&sem->count, old, new); | ||
77 | } while (res != old); | ||
78 | return res >= 0 ? 1 : 0; | ||
79 | } | ||
80 | |||
81 | static inline long ___down_write(struct rw_semaphore *sem) | ||
82 | { | ||
83 | long oldcount; | ||
84 | #ifndef CONFIG_SMP | ||
85 | oldcount = sem->count.counter; | ||
86 | sem->count.counter += RWSEM_ACTIVE_WRITE_BIAS; | ||
87 | #else | ||
88 | long temp; | ||
89 | __asm__ __volatile__( | ||
90 | "1: ldq_l %0,%1\n" | ||
91 | " addq %0,%3,%2\n" | ||
92 | " stq_c %2,%1\n" | ||
93 | " beq %2,2f\n" | ||
94 | " mb\n" | ||
95 | ".subsection 2\n" | ||
96 | "2: br 1b\n" | ||
97 | ".previous" | ||
98 | :"=&r" (oldcount), "=m" (sem->count), "=&r" (temp) | ||
99 | :"Ir" (RWSEM_ACTIVE_WRITE_BIAS), "m" (sem->count) : "memory"); | ||
100 | #endif | ||
101 | return oldcount; | ||
102 | } | ||
103 | |||
104 | static inline void __down_write(struct rw_semaphore *sem) | ||
105 | { | ||
106 | if (unlikely(___down_write(sem))) | ||
107 | rwsem_down_write_failed(sem); | ||
108 | } | ||
109 | |||
110 | static inline int __down_write_killable(struct rw_semaphore *sem) | ||
111 | { | ||
112 | if (unlikely(___down_write(sem))) { | ||
113 | if (IS_ERR(rwsem_down_write_failed_killable(sem))) | ||
114 | return -EINTR; | ||
115 | } | ||
116 | |||
117 | return 0; | ||
118 | } | ||
119 | |||
120 | /* | ||
121 | * trylock for writing -- returns 1 if successful, 0 if contention | ||
122 | */ | ||
123 | static inline int __down_write_trylock(struct rw_semaphore *sem) | ||
124 | { | ||
125 | long ret = atomic_long_cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE, | ||
126 | RWSEM_ACTIVE_WRITE_BIAS); | ||
127 | if (ret == RWSEM_UNLOCKED_VALUE) | ||
128 | return 1; | ||
129 | return 0; | ||
130 | } | ||
131 | |||
132 | static inline void __up_read(struct rw_semaphore *sem) | ||
133 | { | ||
134 | long oldcount; | ||
135 | #ifndef CONFIG_SMP | ||
136 | oldcount = sem->count.counter; | ||
137 | sem->count.counter -= RWSEM_ACTIVE_READ_BIAS; | ||
138 | #else | ||
139 | long temp; | ||
140 | __asm__ __volatile__( | ||
141 | " mb\n" | ||
142 | "1: ldq_l %0,%1\n" | ||
143 | " subq %0,%3,%2\n" | ||
144 | " stq_c %2,%1\n" | ||
145 | " beq %2,2f\n" | ||
146 | ".subsection 2\n" | ||
147 | "2: br 1b\n" | ||
148 | ".previous" | ||
149 | :"=&r" (oldcount), "=m" (sem->count), "=&r" (temp) | ||
150 | :"Ir" (RWSEM_ACTIVE_READ_BIAS), "m" (sem->count) : "memory"); | ||
151 | #endif | ||
152 | if (unlikely(oldcount < 0)) | ||
153 | if ((int)oldcount - RWSEM_ACTIVE_READ_BIAS == 0) | ||
154 | rwsem_wake(sem); | ||
155 | } | ||
156 | |||
157 | static inline void __up_write(struct rw_semaphore *sem) | ||
158 | { | ||
159 | long count; | ||
160 | #ifndef CONFIG_SMP | ||
161 | sem->count.counter -= RWSEM_ACTIVE_WRITE_BIAS; | ||
162 | count = sem->count.counter; | ||
163 | #else | ||
164 | long temp; | ||
165 | __asm__ __volatile__( | ||
166 | " mb\n" | ||
167 | "1: ldq_l %0,%1\n" | ||
168 | " subq %0,%3,%2\n" | ||
169 | " stq_c %2,%1\n" | ||
170 | " beq %2,2f\n" | ||
171 | " subq %0,%3,%0\n" | ||
172 | ".subsection 2\n" | ||
173 | "2: br 1b\n" | ||
174 | ".previous" | ||
175 | :"=&r" (count), "=m" (sem->count), "=&r" (temp) | ||
176 | :"Ir" (RWSEM_ACTIVE_WRITE_BIAS), "m" (sem->count) : "memory"); | ||
177 | #endif | ||
178 | if (unlikely(count)) | ||
179 | if ((int)count == 0) | ||
180 | rwsem_wake(sem); | ||
181 | } | ||
182 | |||
183 | /* | ||
184 | * downgrade write lock to read lock | ||
185 | */ | ||
186 | static inline void __downgrade_write(struct rw_semaphore *sem) | ||
187 | { | ||
188 | long oldcount; | ||
189 | #ifndef CONFIG_SMP | ||
190 | oldcount = sem->count.counter; | ||
191 | sem->count.counter -= RWSEM_WAITING_BIAS; | ||
192 | #else | ||
193 | long temp; | ||
194 | __asm__ __volatile__( | ||
195 | "1: ldq_l %0,%1\n" | ||
196 | " addq %0,%3,%2\n" | ||
197 | " stq_c %2,%1\n" | ||
198 | " beq %2,2f\n" | ||
199 | " mb\n" | ||
200 | ".subsection 2\n" | ||
201 | "2: br 1b\n" | ||
202 | ".previous" | ||
203 | :"=&r" (oldcount), "=m" (sem->count), "=&r" (temp) | ||
204 | :"Ir" (-RWSEM_WAITING_BIAS), "m" (sem->count) : "memory"); | ||
205 | #endif | ||
206 | if (unlikely(oldcount < 0)) | ||
207 | rwsem_downgrade_wake(sem); | ||
208 | } | ||
209 | |||
210 | #endif /* __KERNEL__ */ | ||
211 | #endif /* _ALPHA_RWSEM_H */ | ||
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index c781e45d1d99..23e063df5d2c 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig | |||
@@ -63,9 +63,6 @@ config SCHED_OMIT_FRAME_POINTER | |||
63 | config GENERIC_CSUM | 63 | config GENERIC_CSUM |
64 | def_bool y | 64 | def_bool y |
65 | 65 | ||
66 | config RWSEM_GENERIC_SPINLOCK | ||
67 | def_bool y | ||
68 | |||
69 | config ARCH_DISCONTIGMEM_ENABLE | 66 | config ARCH_DISCONTIGMEM_ENABLE |
70 | def_bool n | 67 | def_bool n |
71 | 68 | ||
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 9aed25a6019b..dc9855c4a3b4 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig | |||
@@ -178,10 +178,6 @@ config TRACE_IRQFLAGS_SUPPORT | |||
178 | bool | 178 | bool |
179 | default !CPU_V7M | 179 | default !CPU_V7M |
180 | 180 | ||
181 | config RWSEM_XCHGADD_ALGORITHM | ||
182 | bool | ||
183 | default y | ||
184 | |||
185 | config ARCH_HAS_ILOG2_U32 | 181 | config ARCH_HAS_ILOG2_U32 |
186 | bool | 182 | bool |
187 | 183 | ||
diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild index a8a4eb7f6dae..8fb51b7bf1d5 100644 --- a/arch/arm/include/asm/Kbuild +++ b/arch/arm/include/asm/Kbuild | |||
@@ -12,7 +12,6 @@ generic-y += mm-arch-hooks.h | |||
12 | generic-y += msi.h | 12 | generic-y += msi.h |
13 | generic-y += parport.h | 13 | generic-y += parport.h |
14 | generic-y += preempt.h | 14 | generic-y += preempt.h |
15 | generic-y += rwsem.h | ||
16 | generic-y += seccomp.h | 15 | generic-y += seccomp.h |
17 | generic-y += segment.h | 16 | generic-y += segment.h |
18 | generic-y += serial.h | 17 | generic-y += serial.h |
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 78d9fafac983..d81adca1b04d 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig | |||
@@ -236,9 +236,6 @@ config LOCKDEP_SUPPORT | |||
236 | config TRACE_IRQFLAGS_SUPPORT | 236 | config TRACE_IRQFLAGS_SUPPORT |
237 | def_bool y | 237 | def_bool y |
238 | 238 | ||
239 | config RWSEM_XCHGADD_ALGORITHM | ||
240 | def_bool y | ||
241 | |||
242 | config GENERIC_BUG | 239 | config GENERIC_BUG |
243 | def_bool y | 240 | def_bool y |
244 | depends on BUG | 241 | depends on BUG |
diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild index 1e17ea5c372b..60a933b07001 100644 --- a/arch/arm64/include/asm/Kbuild +++ b/arch/arm64/include/asm/Kbuild | |||
@@ -16,7 +16,6 @@ generic-y += mm-arch-hooks.h | |||
16 | generic-y += msi.h | 16 | generic-y += msi.h |
17 | generic-y += qrwlock.h | 17 | generic-y += qrwlock.h |
18 | generic-y += qspinlock.h | 18 | generic-y += qspinlock.h |
19 | generic-y += rwsem.h | ||
20 | generic-y += segment.h | 19 | generic-y += segment.h |
21 | generic-y += serial.h | 20 | generic-y += serial.h |
22 | generic-y += set_memory.h | 21 | generic-y += set_memory.h |
diff --git a/arch/c6x/Kconfig b/arch/c6x/Kconfig index 3bb75e674161..eeb0471268a0 100644 --- a/arch/c6x/Kconfig +++ b/arch/c6x/Kconfig | |||
@@ -28,9 +28,6 @@ config MMU | |||
28 | config FPU | 28 | config FPU |
29 | def_bool n | 29 | def_bool n |
30 | 30 | ||
31 | config RWSEM_GENERIC_SPINLOCK | ||
32 | def_bool y | ||
33 | |||
34 | config GENERIC_CALIBRATE_DELAY | 31 | config GENERIC_CALIBRATE_DELAY |
35 | def_bool y | 32 | def_bool y |
36 | 33 | ||
diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig index 725a115759c9..6555d1781132 100644 --- a/arch/csky/Kconfig +++ b/arch/csky/Kconfig | |||
@@ -92,9 +92,6 @@ config GENERIC_HWEIGHT | |||
92 | config MMU | 92 | config MMU |
93 | def_bool y | 93 | def_bool y |
94 | 94 | ||
95 | config RWSEM_GENERIC_SPINLOCK | ||
96 | def_bool y | ||
97 | |||
98 | config STACKTRACE_SUPPORT | 95 | config STACKTRACE_SUPPORT |
99 | def_bool y | 96 | def_bool y |
100 | 97 | ||
diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig index c071da34e081..61c01db6c292 100644 --- a/arch/h8300/Kconfig +++ b/arch/h8300/Kconfig | |||
@@ -27,9 +27,6 @@ config H8300 | |||
27 | config CPU_BIG_ENDIAN | 27 | config CPU_BIG_ENDIAN |
28 | def_bool y | 28 | def_bool y |
29 | 29 | ||
30 | config RWSEM_GENERIC_SPINLOCK | ||
31 | def_bool y | ||
32 | |||
33 | config GENERIC_HWEIGHT | 30 | config GENERIC_HWEIGHT |
34 | def_bool y | 31 | def_bool y |
35 | 32 | ||
diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig index ac441680dcc0..3e54a53208d5 100644 --- a/arch/hexagon/Kconfig +++ b/arch/hexagon/Kconfig | |||
@@ -65,12 +65,6 @@ config GENERIC_CSUM | |||
65 | config GENERIC_IRQ_PROBE | 65 | config GENERIC_IRQ_PROBE |
66 | def_bool y | 66 | def_bool y |
67 | 67 | ||
68 | config RWSEM_GENERIC_SPINLOCK | ||
69 | def_bool n | ||
70 | |||
71 | config RWSEM_XCHGADD_ALGORITHM | ||
72 | def_bool y | ||
73 | |||
74 | config GENERIC_HWEIGHT | 68 | config GENERIC_HWEIGHT |
75 | def_bool y | 69 | def_bool y |
76 | 70 | ||
diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild index d046e8ccdf78..3ff5f297acda 100644 --- a/arch/hexagon/include/asm/Kbuild +++ b/arch/hexagon/include/asm/Kbuild | |||
@@ -27,7 +27,6 @@ generic-y += mm-arch-hooks.h | |||
27 | generic-y += pci.h | 27 | generic-y += pci.h |
28 | generic-y += percpu.h | 28 | generic-y += percpu.h |
29 | generic-y += preempt.h | 29 | generic-y += preempt.h |
30 | generic-y += rwsem.h | ||
31 | generic-y += sections.h | 30 | generic-y += sections.h |
32 | generic-y += segment.h | 31 | generic-y += segment.h |
33 | generic-y += serial.h | 32 | generic-y += serial.h |
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig index 8d7396bd1790..73a26f04644e 100644 --- a/arch/ia64/Kconfig +++ b/arch/ia64/Kconfig | |||
@@ -83,10 +83,6 @@ config STACKTRACE_SUPPORT | |||
83 | config GENERIC_LOCKBREAK | 83 | config GENERIC_LOCKBREAK |
84 | def_bool n | 84 | def_bool n |
85 | 85 | ||
86 | config RWSEM_XCHGADD_ALGORITHM | ||
87 | bool | ||
88 | default y | ||
89 | |||
90 | config HUGETLB_PAGE_SIZE_VARIABLE | 86 | config HUGETLB_PAGE_SIZE_VARIABLE |
91 | bool | 87 | bool |
92 | depends on HUGETLB_PAGE | 88 | depends on HUGETLB_PAGE |
diff --git a/arch/ia64/include/asm/rwsem.h b/arch/ia64/include/asm/rwsem.h deleted file mode 100644 index 917910607e0e..000000000000 --- a/arch/ia64/include/asm/rwsem.h +++ /dev/null | |||
@@ -1,172 +0,0 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
2 | /* | ||
3 | * R/W semaphores for ia64 | ||
4 | * | ||
5 | * Copyright (C) 2003 Ken Chen <kenneth.w.chen@intel.com> | ||
6 | * Copyright (C) 2003 Asit Mallick <asit.k.mallick@intel.com> | ||
7 | * Copyright (C) 2005 Christoph Lameter <cl@linux.com> | ||
8 | * | ||
9 | * Based on asm-i386/rwsem.h and other architecture implementation. | ||
10 | * | ||
11 | * The MSW of the count is the negated number of active writers and | ||
12 | * waiting lockers, and the LSW is the total number of active locks. | ||
13 | * | ||
14 | * The lock count is initialized to 0 (no active and no waiting lockers). | ||
15 | * | ||
16 | * When a writer subtracts WRITE_BIAS, it'll get 0xffffffff00000001 for | ||
17 | * the case of an uncontended lock. Readers increment by 1 and see a positive | ||
18 | * value when uncontended, negative if there are writers (and maybe) readers | ||
19 | * waiting (in which case it goes to sleep). | ||
20 | */ | ||
21 | |||
22 | #ifndef _ASM_IA64_RWSEM_H | ||
23 | #define _ASM_IA64_RWSEM_H | ||
24 | |||
25 | #ifndef _LINUX_RWSEM_H | ||
26 | #error "Please don't include <asm/rwsem.h> directly, use <linux/rwsem.h> instead." | ||
27 | #endif | ||
28 | |||
29 | #include <asm/intrinsics.h> | ||
30 | |||
31 | #define RWSEM_UNLOCKED_VALUE __IA64_UL_CONST(0x0000000000000000) | ||
32 | #define RWSEM_ACTIVE_BIAS (1L) | ||
33 | #define RWSEM_ACTIVE_MASK (0xffffffffL) | ||
34 | #define RWSEM_WAITING_BIAS (-0x100000000L) | ||
35 | #define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS | ||
36 | #define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) | ||
37 | |||
38 | /* | ||
39 | * lock for reading | ||
40 | */ | ||
41 | static inline int | ||
42 | ___down_read (struct rw_semaphore *sem) | ||
43 | { | ||
44 | long result = ia64_fetchadd8_acq((unsigned long *)&sem->count.counter, 1); | ||
45 | |||
46 | return (result < 0); | ||
47 | } | ||
48 | |||
49 | static inline void | ||
50 | __down_read (struct rw_semaphore *sem) | ||
51 | { | ||
52 | if (___down_read(sem)) | ||
53 | rwsem_down_read_failed(sem); | ||
54 | } | ||
55 | |||
56 | static inline int | ||
57 | __down_read_killable (struct rw_semaphore *sem) | ||
58 | { | ||
59 | if (___down_read(sem)) | ||
60 | if (IS_ERR(rwsem_down_read_failed_killable(sem))) | ||
61 | return -EINTR; | ||
62 | |||
63 | return 0; | ||
64 | } | ||
65 | |||
66 | /* | ||
67 | * lock for writing | ||
68 | */ | ||
69 | static inline long | ||
70 | ___down_write (struct rw_semaphore *sem) | ||
71 | { | ||
72 | long old, new; | ||
73 | |||
74 | do { | ||
75 | old = atomic_long_read(&sem->count); | ||
76 | new = old + RWSEM_ACTIVE_WRITE_BIAS; | ||
77 | } while (atomic_long_cmpxchg_acquire(&sem->count, old, new) != old); | ||
78 | |||
79 | return old; | ||
80 | } | ||
81 | |||
82 | static inline void | ||
83 | __down_write (struct rw_semaphore *sem) | ||
84 | { | ||
85 | if (___down_write(sem)) | ||
86 | rwsem_down_write_failed(sem); | ||
87 | } | ||
88 | |||
89 | static inline int | ||
90 | __down_write_killable (struct rw_semaphore *sem) | ||
91 | { | ||
92 | if (___down_write(sem)) { | ||
93 | if (IS_ERR(rwsem_down_write_failed_killable(sem))) | ||
94 | return -EINTR; | ||
95 | } | ||
96 | |||
97 | return 0; | ||
98 | } | ||
99 | |||
100 | /* | ||
101 | * unlock after reading | ||
102 | */ | ||
103 | static inline void | ||
104 | __up_read (struct rw_semaphore *sem) | ||
105 | { | ||
106 | long result = ia64_fetchadd8_rel((unsigned long *)&sem->count.counter, -1); | ||
107 | |||
108 | if (result < 0 && (--result & RWSEM_ACTIVE_MASK) == 0) | ||
109 | rwsem_wake(sem); | ||
110 | } | ||
111 | |||
112 | /* | ||
113 | * unlock after writing | ||
114 | */ | ||
115 | static inline void | ||
116 | __up_write (struct rw_semaphore *sem) | ||
117 | { | ||
118 | long old, new; | ||
119 | |||
120 | do { | ||
121 | old = atomic_long_read(&sem->count); | ||
122 | new = old - RWSEM_ACTIVE_WRITE_BIAS; | ||
123 | } while (atomic_long_cmpxchg_release(&sem->count, old, new) != old); | ||
124 | |||
125 | if (new < 0 && (new & RWSEM_ACTIVE_MASK) == 0) | ||
126 | rwsem_wake(sem); | ||
127 | } | ||
128 | |||
129 | /* | ||
130 | * trylock for reading -- returns 1 if successful, 0 if contention | ||
131 | */ | ||
132 | static inline int | ||
133 | __down_read_trylock (struct rw_semaphore *sem) | ||
134 | { | ||
135 | long tmp; | ||
136 | while ((tmp = atomic_long_read(&sem->count)) >= 0) { | ||
137 | if (tmp == atomic_long_cmpxchg_acquire(&sem->count, tmp, tmp+1)) { | ||
138 | return 1; | ||
139 | } | ||
140 | } | ||
141 | return 0; | ||
142 | } | ||
143 | |||
144 | /* | ||
145 | * trylock for writing -- returns 1 if successful, 0 if contention | ||
146 | */ | ||
147 | static inline int | ||
148 | __down_write_trylock (struct rw_semaphore *sem) | ||
149 | { | ||
150 | long tmp = atomic_long_cmpxchg_acquire(&sem->count, | ||
151 | RWSEM_UNLOCKED_VALUE, RWSEM_ACTIVE_WRITE_BIAS); | ||
152 | return tmp == RWSEM_UNLOCKED_VALUE; | ||
153 | } | ||
154 | |||
155 | /* | ||
156 | * downgrade write lock to read lock | ||
157 | */ | ||
158 | static inline void | ||
159 | __downgrade_write (struct rw_semaphore *sem) | ||
160 | { | ||
161 | long old, new; | ||
162 | |||
163 | do { | ||
164 | old = atomic_long_read(&sem->count); | ||
165 | new = old - RWSEM_WAITING_BIAS; | ||
166 | } while (atomic_long_cmpxchg_release(&sem->count, old, new) != old); | ||
167 | |||
168 | if (old < 0) | ||
169 | rwsem_downgrade_wake(sem); | ||
170 | } | ||
171 | |||
172 | #endif /* _ASM_IA64_RWSEM_H */ | ||
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index 4e37efbc9296..735b9679fe6f 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig | |||
@@ -33,13 +33,6 @@ config M68K | |||
33 | config CPU_BIG_ENDIAN | 33 | config CPU_BIG_ENDIAN |
34 | def_bool y | 34 | def_bool y |
35 | 35 | ||
36 | config RWSEM_GENERIC_SPINLOCK | ||
37 | bool | ||
38 | default y | ||
39 | |||
40 | config RWSEM_XCHGADD_ALGORITHM | ||
41 | bool | ||
42 | |||
43 | config ARCH_HAS_ILOG2_U32 | 36 | config ARCH_HAS_ILOG2_U32 |
44 | bool | 37 | bool |
45 | 38 | ||
diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig index 321e398ab6b5..adb179f519f9 100644 --- a/arch/microblaze/Kconfig +++ b/arch/microblaze/Kconfig | |||
@@ -59,15 +59,9 @@ config CPU_LITTLE_ENDIAN | |||
59 | 59 | ||
60 | endchoice | 60 | endchoice |
61 | 61 | ||
62 | config RWSEM_GENERIC_SPINLOCK | ||
63 | def_bool y | ||
64 | |||
65 | config ZONE_DMA | 62 | config ZONE_DMA |
66 | def_bool y | 63 | def_bool y |
67 | 64 | ||
68 | config RWSEM_XCHGADD_ALGORITHM | ||
69 | bool | ||
70 | |||
71 | config ARCH_HAS_ILOG2_U32 | 65 | config ARCH_HAS_ILOG2_U32 |
72 | def_bool n | 66 | def_bool n |
73 | 67 | ||
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig index 4a5f5b0ee9a9..b9c48b27162d 100644 --- a/arch/mips/Kconfig +++ b/arch/mips/Kconfig | |||
@@ -1037,13 +1037,6 @@ source "arch/mips/paravirt/Kconfig" | |||
1037 | 1037 | ||
1038 | endmenu | 1038 | endmenu |
1039 | 1039 | ||
1040 | config RWSEM_GENERIC_SPINLOCK | ||
1041 | bool | ||
1042 | default y | ||
1043 | |||
1044 | config RWSEM_XCHGADD_ALGORITHM | ||
1045 | bool | ||
1046 | |||
1047 | config GENERIC_HWEIGHT | 1040 | config GENERIC_HWEIGHT |
1048 | bool | 1041 | bool |
1049 | default y | 1042 | default y |
diff --git a/arch/nds32/Kconfig b/arch/nds32/Kconfig index addb7f5f5264..55559ca0efe4 100644 --- a/arch/nds32/Kconfig +++ b/arch/nds32/Kconfig | |||
@@ -60,9 +60,6 @@ config GENERIC_LOCKBREAK | |||
60 | def_bool y | 60 | def_bool y |
61 | depends on PREEMPT | 61 | depends on PREEMPT |
62 | 62 | ||
63 | config RWSEM_GENERIC_SPINLOCK | ||
64 | def_bool y | ||
65 | |||
66 | config TRACE_IRQFLAGS_SUPPORT | 63 | config TRACE_IRQFLAGS_SUPPORT |
67 | def_bool y | 64 | def_bool y |
68 | 65 | ||
diff --git a/arch/nios2/Kconfig b/arch/nios2/Kconfig index 3633f8144367..ea37394ff3ea 100644 --- a/arch/nios2/Kconfig +++ b/arch/nios2/Kconfig | |||
@@ -41,9 +41,6 @@ config NO_IOPORT_MAP | |||
41 | config FPU | 41 | config FPU |
42 | def_bool n | 42 | def_bool n |
43 | 43 | ||
44 | config RWSEM_GENERIC_SPINLOCK | ||
45 | def_bool y | ||
46 | |||
47 | config TRACE_IRQFLAGS_SUPPORT | 44 | config TRACE_IRQFLAGS_SUPPORT |
48 | def_bool n | 45 | def_bool n |
49 | 46 | ||
diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig index c6cf8a49a0ab..7cfb20555b10 100644 --- a/arch/openrisc/Kconfig +++ b/arch/openrisc/Kconfig | |||
@@ -44,12 +44,6 @@ config CPU_BIG_ENDIAN | |||
44 | config MMU | 44 | config MMU |
45 | def_bool y | 45 | def_bool y |
46 | 46 | ||
47 | config RWSEM_GENERIC_SPINLOCK | ||
48 | def_bool y | ||
49 | |||
50 | config RWSEM_XCHGADD_ALGORITHM | ||
51 | def_bool n | ||
52 | |||
53 | config GENERIC_HWEIGHT | 47 | config GENERIC_HWEIGHT |
54 | def_bool y | 48 | def_bool y |
55 | 49 | ||
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index c8e621296092..f1ed8ddfe486 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig | |||
@@ -75,12 +75,6 @@ config GENERIC_LOCKBREAK | |||
75 | default y | 75 | default y |
76 | depends on SMP && PREEMPT | 76 | depends on SMP && PREEMPT |
77 | 77 | ||
78 | config RWSEM_GENERIC_SPINLOCK | ||
79 | def_bool y | ||
80 | |||
81 | config RWSEM_XCHGADD_ALGORITHM | ||
82 | bool | ||
83 | |||
84 | config ARCH_HAS_ILOG2_U32 | 78 | config ARCH_HAS_ILOG2_U32 |
85 | bool | 79 | bool |
86 | default n | 80 | default n |
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 8e1e2abf17eb..fab0bf4259c7 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig | |||
@@ -103,13 +103,6 @@ config LOCKDEP_SUPPORT | |||
103 | bool | 103 | bool |
104 | default y | 104 | default y |
105 | 105 | ||
106 | config RWSEM_GENERIC_SPINLOCK | ||
107 | bool | ||
108 | |||
109 | config RWSEM_XCHGADD_ALGORITHM | ||
110 | bool | ||
111 | default y | ||
112 | |||
113 | config GENERIC_LOCKBREAK | 106 | config GENERIC_LOCKBREAK |
114 | bool | 107 | bool |
115 | default y | 108 | default y |
diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild index a0c132bedfae..36bda391e549 100644 --- a/arch/powerpc/include/asm/Kbuild +++ b/arch/powerpc/include/asm/Kbuild | |||
@@ -8,6 +8,5 @@ generic-y += irq_regs.h | |||
8 | generic-y += local64.h | 8 | generic-y += local64.h |
9 | generic-y += mcs_spinlock.h | 9 | generic-y += mcs_spinlock.h |
10 | generic-y += preempt.h | 10 | generic-y += preempt.h |
11 | generic-y += rwsem.h | ||
12 | generic-y += vtime.h | 11 | generic-y += vtime.h |
13 | generic-y += msi.h | 12 | generic-y += msi.h |
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index eb56c82d8aa1..0582260fb6c2 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig | |||
@@ -69,9 +69,6 @@ config STACKTRACE_SUPPORT | |||
69 | config TRACE_IRQFLAGS_SUPPORT | 69 | config TRACE_IRQFLAGS_SUPPORT |
70 | def_bool y | 70 | def_bool y |
71 | 71 | ||
72 | config RWSEM_GENERIC_SPINLOCK | ||
73 | def_bool y | ||
74 | |||
75 | config GENERIC_BUG | 72 | config GENERIC_BUG |
76 | def_bool y | 73 | def_bool y |
77 | depends on BUG | 74 | depends on BUG |
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index cf06e313e103..97b555e772d7 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig | |||
@@ -14,12 +14,6 @@ config LOCKDEP_SUPPORT | |||
14 | config STACKTRACE_SUPPORT | 14 | config STACKTRACE_SUPPORT |
15 | def_bool y | 15 | def_bool y |
16 | 16 | ||
17 | config RWSEM_GENERIC_SPINLOCK | ||
18 | bool | ||
19 | |||
20 | config RWSEM_XCHGADD_ALGORITHM | ||
21 | def_bool y | ||
22 | |||
23 | config ARCH_HAS_ILOG2_U32 | 17 | config ARCH_HAS_ILOG2_U32 |
24 | def_bool n | 18 | def_bool n |
25 | 19 | ||
diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild index 12d77cb11fe5..d5fadefea33c 100644 --- a/arch/s390/include/asm/Kbuild +++ b/arch/s390/include/asm/Kbuild | |||
@@ -20,7 +20,6 @@ generic-y += local.h | |||
20 | generic-y += local64.h | 20 | generic-y += local64.h |
21 | generic-y += mcs_spinlock.h | 21 | generic-y += mcs_spinlock.h |
22 | generic-y += mm-arch-hooks.h | 22 | generic-y += mm-arch-hooks.h |
23 | generic-y += rwsem.h | ||
24 | generic-y += trace_clock.h | 23 | generic-y += trace_clock.h |
25 | generic-y += unaligned.h | 24 | generic-y += unaligned.h |
26 | generic-y += word-at-a-time.h | 25 | generic-y += word-at-a-time.h |
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index b1c91ea9a958..0be08d586d40 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig | |||
@@ -90,12 +90,6 @@ config ARCH_DEFCONFIG | |||
90 | default "arch/sh/configs/shx3_defconfig" if SUPERH32 | 90 | default "arch/sh/configs/shx3_defconfig" if SUPERH32 |
91 | default "arch/sh/configs/cayman_defconfig" if SUPERH64 | 91 | default "arch/sh/configs/cayman_defconfig" if SUPERH64 |
92 | 92 | ||
93 | config RWSEM_GENERIC_SPINLOCK | ||
94 | def_bool y | ||
95 | |||
96 | config RWSEM_XCHGADD_ALGORITHM | ||
97 | bool | ||
98 | |||
99 | config GENERIC_BUG | 93 | config GENERIC_BUG |
100 | def_bool y | 94 | def_bool y |
101 | depends on BUG && SUPERH32 | 95 | depends on BUG && SUPERH32 |
diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild index 7bf2cb680d32..73fff39a0122 100644 --- a/arch/sh/include/asm/Kbuild +++ b/arch/sh/include/asm/Kbuild | |||
@@ -17,7 +17,6 @@ generic-y += mm-arch-hooks.h | |||
17 | generic-y += parport.h | 17 | generic-y += parport.h |
18 | generic-y += percpu.h | 18 | generic-y += percpu.h |
19 | generic-y += preempt.h | 19 | generic-y += preempt.h |
20 | generic-y += rwsem.h | ||
21 | generic-y += serial.h | 20 | generic-y += serial.h |
22 | generic-y += sizes.h | 21 | generic-y += sizes.h |
23 | generic-y += trace_clock.h | 22 | generic-y += trace_clock.h |
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig index db79290ed6d5..f6421c9ce5d3 100644 --- a/arch/sparc/Kconfig +++ b/arch/sparc/Kconfig | |||
@@ -192,14 +192,6 @@ config NR_CPUS | |||
192 | 192 | ||
193 | source "kernel/Kconfig.hz" | 193 | source "kernel/Kconfig.hz" |
194 | 194 | ||
195 | config RWSEM_GENERIC_SPINLOCK | ||
196 | bool | ||
197 | default y if SPARC32 | ||
198 | |||
199 | config RWSEM_XCHGADD_ALGORITHM | ||
200 | bool | ||
201 | default y if SPARC64 | ||
202 | |||
203 | config GENERIC_HWEIGHT | 195 | config GENERIC_HWEIGHT |
204 | bool | 196 | bool |
205 | default y | 197 | default y |
diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild index a22cfd5c0ee8..2ca3200d3616 100644 --- a/arch/sparc/include/asm/Kbuild +++ b/arch/sparc/include/asm/Kbuild | |||
@@ -18,7 +18,6 @@ generic-y += mm-arch-hooks.h | |||
18 | generic-y += module.h | 18 | generic-y += module.h |
19 | generic-y += msi.h | 19 | generic-y += msi.h |
20 | generic-y += preempt.h | 20 | generic-y += preempt.h |
21 | generic-y += rwsem.h | ||
22 | generic-y += serial.h | 21 | generic-y += serial.h |
23 | generic-y += trace_clock.h | 22 | generic-y += trace_clock.h |
24 | generic-y += word-at-a-time.h | 23 | generic-y += word-at-a-time.h |
diff --git a/arch/unicore32/Kconfig b/arch/unicore32/Kconfig index d83c8f70900d..2445dfcf6444 100644 --- a/arch/unicore32/Kconfig +++ b/arch/unicore32/Kconfig | |||
@@ -39,12 +39,6 @@ config STACKTRACE_SUPPORT | |||
39 | config LOCKDEP_SUPPORT | 39 | config LOCKDEP_SUPPORT |
40 | def_bool y | 40 | def_bool y |
41 | 41 | ||
42 | config RWSEM_GENERIC_SPINLOCK | ||
43 | def_bool y | ||
44 | |||
45 | config RWSEM_XCHGADD_ALGORITHM | ||
46 | bool | ||
47 | |||
48 | config ARCH_HAS_ILOG2_U32 | 42 | config ARCH_HAS_ILOG2_U32 |
49 | bool | 43 | bool |
50 | 44 | ||
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index f1162df4a805..90e2640ade75 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig | |||
@@ -268,9 +268,6 @@ config ARCH_MAY_HAVE_PC_FDC | |||
268 | def_bool y | 268 | def_bool y |
269 | depends on ISA_DMA_API | 269 | depends on ISA_DMA_API |
270 | 270 | ||
271 | config RWSEM_XCHGADD_ALGORITHM | ||
272 | def_bool y | ||
273 | |||
274 | config GENERIC_CALIBRATE_DELAY | 271 | config GENERIC_CALIBRATE_DELAY |
275 | def_bool y | 272 | def_bool y |
276 | 273 | ||
@@ -783,14 +780,6 @@ config PARAVIRT_SPINLOCKS | |||
783 | 780 | ||
784 | If you are unsure how to answer this question, answer Y. | 781 | If you are unsure how to answer this question, answer Y. |
785 | 782 | ||
786 | config QUEUED_LOCK_STAT | ||
787 | bool "Paravirt queued spinlock statistics" | ||
788 | depends on PARAVIRT_SPINLOCKS && DEBUG_FS | ||
789 | ---help--- | ||
790 | Enable the collection of statistical data on the slowpath | ||
791 | behavior of paravirtualized queued spinlocks and report | ||
792 | them on debugfs. | ||
793 | |||
794 | source "arch/x86/xen/Kconfig" | 783 | source "arch/x86/xen/Kconfig" |
795 | 784 | ||
796 | config KVM_GUEST | 785 | config KVM_GUEST |
diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h deleted file mode 100644 index 4c25cf6caefa..000000000000 --- a/arch/x86/include/asm/rwsem.h +++ /dev/null | |||
@@ -1,237 +0,0 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
2 | /* rwsem.h: R/W semaphores implemented using XADD/CMPXCHG for i486+ | ||
3 | * | ||
4 | * Written by David Howells (dhowells@redhat.com). | ||
5 | * | ||
6 | * Derived from asm-x86/semaphore.h | ||
7 | * | ||
8 | * | ||
9 | * The MSW of the count is the negated number of active writers and waiting | ||
10 | * lockers, and the LSW is the total number of active locks | ||
11 | * | ||
12 | * The lock count is initialized to 0 (no active and no waiting lockers). | ||
13 | * | ||
14 | * When a writer subtracts WRITE_BIAS, it'll get 0xffff0001 for the case of an | ||
15 | * uncontended lock. This can be determined because XADD returns the old value. | ||
16 | * Readers increment by 1 and see a positive value when uncontended, negative | ||
17 | * if there are writers (and maybe) readers waiting (in which case it goes to | ||
18 | * sleep). | ||
19 | * | ||
20 | * The value of WAITING_BIAS supports up to 32766 waiting processes. This can | ||
21 | * be extended to 65534 by manually checking the whole MSW rather than relying | ||
22 | * on the S flag. | ||
23 | * | ||
24 | * The value of ACTIVE_BIAS supports up to 65535 active processes. | ||
25 | * | ||
26 | * This should be totally fair - if anything is waiting, a process that wants a | ||
27 | * lock will go to the back of the queue. When the currently active lock is | ||
28 | * released, if there's a writer at the front of the queue, then that and only | ||
29 | * that will be woken up; if there's a bunch of consecutive readers at the | ||
30 | * front, then they'll all be woken up, but no other readers will be. | ||
31 | */ | ||
32 | |||
33 | #ifndef _ASM_X86_RWSEM_H | ||
34 | #define _ASM_X86_RWSEM_H | ||
35 | |||
36 | #ifndef _LINUX_RWSEM_H | ||
37 | #error "please don't include asm/rwsem.h directly, use linux/rwsem.h instead" | ||
38 | #endif | ||
39 | |||
40 | #ifdef __KERNEL__ | ||
41 | #include <asm/asm.h> | ||
42 | |||
43 | /* | ||
44 | * The bias values and the counter type limits the number of | ||
45 | * potential readers/writers to 32767 for 32 bits and 2147483647 | ||
46 | * for 64 bits. | ||
47 | */ | ||
48 | |||
49 | #ifdef CONFIG_X86_64 | ||
50 | # define RWSEM_ACTIVE_MASK 0xffffffffL | ||
51 | #else | ||
52 | # define RWSEM_ACTIVE_MASK 0x0000ffffL | ||
53 | #endif | ||
54 | |||
55 | #define RWSEM_UNLOCKED_VALUE 0x00000000L | ||
56 | #define RWSEM_ACTIVE_BIAS 0x00000001L | ||
57 | #define RWSEM_WAITING_BIAS (-RWSEM_ACTIVE_MASK-1) | ||
58 | #define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS | ||
59 | #define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) | ||
60 | |||
61 | /* | ||
62 | * lock for reading | ||
63 | */ | ||
64 | #define ____down_read(sem, slow_path) \ | ||
65 | ({ \ | ||
66 | struct rw_semaphore* ret; \ | ||
67 | asm volatile("# beginning down_read\n\t" \ | ||
68 | LOCK_PREFIX _ASM_INC "(%[sem])\n\t" \ | ||
69 | /* adds 0x00000001 */ \ | ||
70 | " jns 1f\n" \ | ||
71 | " call " slow_path "\n" \ | ||
72 | "1:\n\t" \ | ||
73 | "# ending down_read\n\t" \ | ||
74 | : "+m" (sem->count), "=a" (ret), \ | ||
75 | ASM_CALL_CONSTRAINT \ | ||
76 | : [sem] "a" (sem) \ | ||
77 | : "memory", "cc"); \ | ||
78 | ret; \ | ||
79 | }) | ||
80 | |||
81 | static inline void __down_read(struct rw_semaphore *sem) | ||
82 | { | ||
83 | ____down_read(sem, "call_rwsem_down_read_failed"); | ||
84 | } | ||
85 | |||
86 | static inline int __down_read_killable(struct rw_semaphore *sem) | ||
87 | { | ||
88 | if (IS_ERR(____down_read(sem, "call_rwsem_down_read_failed_killable"))) | ||
89 | return -EINTR; | ||
90 | return 0; | ||
91 | } | ||
92 | |||
93 | /* | ||
94 | * trylock for reading -- returns 1 if successful, 0 if contention | ||
95 | */ | ||
96 | static inline bool __down_read_trylock(struct rw_semaphore *sem) | ||
97 | { | ||
98 | long result, tmp; | ||
99 | asm volatile("# beginning __down_read_trylock\n\t" | ||
100 | " mov %[count],%[result]\n\t" | ||
101 | "1:\n\t" | ||
102 | " mov %[result],%[tmp]\n\t" | ||
103 | " add %[inc],%[tmp]\n\t" | ||
104 | " jle 2f\n\t" | ||
105 | LOCK_PREFIX " cmpxchg %[tmp],%[count]\n\t" | ||
106 | " jnz 1b\n\t" | ||
107 | "2:\n\t" | ||
108 | "# ending __down_read_trylock\n\t" | ||
109 | : [count] "+m" (sem->count), [result] "=&a" (result), | ||
110 | [tmp] "=&r" (tmp) | ||
111 | : [inc] "i" (RWSEM_ACTIVE_READ_BIAS) | ||
112 | : "memory", "cc"); | ||
113 | return result >= 0; | ||
114 | } | ||
115 | |||
116 | /* | ||
117 | * lock for writing | ||
118 | */ | ||
119 | #define ____down_write(sem, slow_path) \ | ||
120 | ({ \ | ||
121 | long tmp; \ | ||
122 | struct rw_semaphore* ret; \ | ||
123 | \ | ||
124 | asm volatile("# beginning down_write\n\t" \ | ||
125 | LOCK_PREFIX " xadd %[tmp],(%[sem])\n\t" \ | ||
126 | /* adds 0xffff0001, returns the old value */ \ | ||
127 | " test " __ASM_SEL(%w1,%k1) "," __ASM_SEL(%w1,%k1) "\n\t" \ | ||
128 | /* was the active mask 0 before? */\ | ||
129 | " jz 1f\n" \ | ||
130 | " call " slow_path "\n" \ | ||
131 | "1:\n" \ | ||
132 | "# ending down_write" \ | ||
133 | : "+m" (sem->count), [tmp] "=d" (tmp), \ | ||
134 | "=a" (ret), ASM_CALL_CONSTRAINT \ | ||
135 | : [sem] "a" (sem), "[tmp]" (RWSEM_ACTIVE_WRITE_BIAS) \ | ||
136 | : "memory", "cc"); \ | ||
137 | ret; \ | ||
138 | }) | ||
139 | |||
140 | static inline void __down_write(struct rw_semaphore *sem) | ||
141 | { | ||
142 | ____down_write(sem, "call_rwsem_down_write_failed"); | ||
143 | } | ||
144 | |||
145 | static inline int __down_write_killable(struct rw_semaphore *sem) | ||
146 | { | ||
147 | if (IS_ERR(____down_write(sem, "call_rwsem_down_write_failed_killable"))) | ||
148 | return -EINTR; | ||
149 | |||
150 | return 0; | ||
151 | } | ||
152 | |||
153 | /* | ||
154 | * trylock for writing -- returns 1 if successful, 0 if contention | ||
155 | */ | ||
156 | static inline bool __down_write_trylock(struct rw_semaphore *sem) | ||
157 | { | ||
158 | bool result; | ||
159 | long tmp0, tmp1; | ||
160 | asm volatile("# beginning __down_write_trylock\n\t" | ||
161 | " mov %[count],%[tmp0]\n\t" | ||
162 | "1:\n\t" | ||
163 | " test " __ASM_SEL(%w1,%k1) "," __ASM_SEL(%w1,%k1) "\n\t" | ||
164 | /* was the active mask 0 before? */ | ||
165 | " jnz 2f\n\t" | ||
166 | " mov %[tmp0],%[tmp1]\n\t" | ||
167 | " add %[inc],%[tmp1]\n\t" | ||
168 | LOCK_PREFIX " cmpxchg %[tmp1],%[count]\n\t" | ||
169 | " jnz 1b\n\t" | ||
170 | "2:\n\t" | ||
171 | CC_SET(e) | ||
172 | "# ending __down_write_trylock\n\t" | ||
173 | : [count] "+m" (sem->count), [tmp0] "=&a" (tmp0), | ||
174 | [tmp1] "=&r" (tmp1), CC_OUT(e) (result) | ||
175 | : [inc] "er" (RWSEM_ACTIVE_WRITE_BIAS) | ||
176 | : "memory"); | ||
177 | return result; | ||
178 | } | ||
179 | |||
180 | /* | ||
181 | * unlock after reading | ||
182 | */ | ||
183 | static inline void __up_read(struct rw_semaphore *sem) | ||
184 | { | ||
185 | long tmp; | ||
186 | asm volatile("# beginning __up_read\n\t" | ||
187 | LOCK_PREFIX " xadd %[tmp],(%[sem])\n\t" | ||
188 | /* subtracts 1, returns the old value */ | ||
189 | " jns 1f\n\t" | ||
190 | " call call_rwsem_wake\n" /* expects old value in %edx */ | ||
191 | "1:\n" | ||
192 | "# ending __up_read\n" | ||
193 | : "+m" (sem->count), [tmp] "=d" (tmp) | ||
194 | : [sem] "a" (sem), "[tmp]" (-RWSEM_ACTIVE_READ_BIAS) | ||
195 | : "memory", "cc"); | ||
196 | } | ||
197 | |||
198 | /* | ||
199 | * unlock after writing | ||
200 | */ | ||
201 | static inline void __up_write(struct rw_semaphore *sem) | ||
202 | { | ||
203 | long tmp; | ||
204 | asm volatile("# beginning __up_write\n\t" | ||
205 | LOCK_PREFIX " xadd %[tmp],(%[sem])\n\t" | ||
206 | /* subtracts 0xffff0001, returns the old value */ | ||
207 | " jns 1f\n\t" | ||
208 | " call call_rwsem_wake\n" /* expects old value in %edx */ | ||
209 | "1:\n\t" | ||
210 | "# ending __up_write\n" | ||
211 | : "+m" (sem->count), [tmp] "=d" (tmp) | ||
212 | : [sem] "a" (sem), "[tmp]" (-RWSEM_ACTIVE_WRITE_BIAS) | ||
213 | : "memory", "cc"); | ||
214 | } | ||
215 | |||
216 | /* | ||
217 | * downgrade write lock to read lock | ||
218 | */ | ||
219 | static inline void __downgrade_write(struct rw_semaphore *sem) | ||
220 | { | ||
221 | asm volatile("# beginning __downgrade_write\n\t" | ||
222 | LOCK_PREFIX _ASM_ADD "%[inc],(%[sem])\n\t" | ||
223 | /* | ||
224 | * transitions 0xZZZZ0001 -> 0xYYYY0001 (i386) | ||
225 | * 0xZZZZZZZZ00000001 -> 0xYYYYYYYY00000001 (x86_64) | ||
226 | */ | ||
227 | " jns 1f\n\t" | ||
228 | " call call_rwsem_downgrade_wake\n" | ||
229 | "1:\n\t" | ||
230 | "# ending __downgrade_write\n" | ||
231 | : "+m" (sem->count) | ||
232 | : [sem] "a" (sem), [inc] "er" (-RWSEM_WAITING_BIAS) | ||
233 | : "memory", "cc"); | ||
234 | } | ||
235 | |||
236 | #endif /* __KERNEL__ */ | ||
237 | #endif /* _ASM_X86_RWSEM_H */ | ||
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 3cb3af51ec89..5246db42de45 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile | |||
@@ -35,7 +35,6 @@ obj-$(CONFIG_SMP) += msr-smp.o cache-smp.o | |||
35 | lib-y := delay.o misc.o cmdline.o cpu.o | 35 | lib-y := delay.o misc.o cmdline.o cpu.o |
36 | lib-y += usercopy_$(BITS).o usercopy.o getuser.o putuser.o | 36 | lib-y += usercopy_$(BITS).o usercopy.o getuser.o putuser.o |
37 | lib-y += memcpy_$(BITS).o | 37 | lib-y += memcpy_$(BITS).o |
38 | lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o | ||
39 | lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o | 38 | lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o |
40 | lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o | 39 | lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o |
41 | lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o | 40 | lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o |
diff --git a/arch/x86/lib/rwsem.S b/arch/x86/lib/rwsem.S deleted file mode 100644 index dc2ab6ea6768..000000000000 --- a/arch/x86/lib/rwsem.S +++ /dev/null | |||
@@ -1,156 +0,0 @@ | |||
1 | /* | ||
2 | * x86 semaphore implementation. | ||
3 | * | ||
4 | * (C) Copyright 1999 Linus Torvalds | ||
5 | * | ||
6 | * Portions Copyright 1999 Red Hat, Inc. | ||
7 | * | ||
8 | * This program is free software; you can redistribute it and/or | ||
9 | * modify it under the terms of the GNU General Public License | ||
10 | * as published by the Free Software Foundation; either version | ||
11 | * 2 of the License, or (at your option) any later version. | ||
12 | * | ||
13 | * rw semaphores implemented November 1999 by Benjamin LaHaise <bcrl@kvack.org> | ||
14 | */ | ||
15 | |||
16 | #include <linux/linkage.h> | ||
17 | #include <asm/alternative-asm.h> | ||
18 | #include <asm/frame.h> | ||
19 | |||
20 | #define __ASM_HALF_REG(reg) __ASM_SEL(reg, e##reg) | ||
21 | #define __ASM_HALF_SIZE(inst) __ASM_SEL(inst##w, inst##l) | ||
22 | |||
23 | #ifdef CONFIG_X86_32 | ||
24 | |||
25 | /* | ||
26 | * The semaphore operations have a special calling sequence that | ||
27 | * allow us to do a simpler in-line version of them. These routines | ||
28 | * need to convert that sequence back into the C sequence when | ||
29 | * there is contention on the semaphore. | ||
30 | * | ||
31 | * %eax contains the semaphore pointer on entry. Save the C-clobbered | ||
32 | * registers (%eax, %edx and %ecx) except %eax which is either a return | ||
33 | * value or just gets clobbered. Same is true for %edx so make sure GCC | ||
34 | * reloads it after the slow path, by making it hold a temporary, for | ||
35 | * example see ____down_write(). | ||
36 | */ | ||
37 | |||
38 | #define save_common_regs \ | ||
39 | pushl %ecx | ||
40 | |||
41 | #define restore_common_regs \ | ||
42 | popl %ecx | ||
43 | |||
44 | /* Avoid uglifying the argument copying x86-64 needs to do. */ | ||
45 | .macro movq src, dst | ||
46 | .endm | ||
47 | |||
48 | #else | ||
49 | |||
50 | /* | ||
51 | * x86-64 rwsem wrappers | ||
52 | * | ||
53 | * This interfaces the inline asm code to the slow-path | ||
54 | * C routines. We need to save the call-clobbered regs | ||
55 | * that the asm does not mark as clobbered, and move the | ||
56 | * argument from %rax to %rdi. | ||
57 | * | ||
58 | * NOTE! We don't need to save %rax, because the functions | ||
59 | * will always return the semaphore pointer in %rax (which | ||
60 | * is also the input argument to these helpers) | ||
61 | * | ||
62 | * The following can clobber %rdx because the asm clobbers it: | ||
63 | * call_rwsem_down_write_failed | ||
64 | * call_rwsem_wake | ||
65 | * but %rdi, %rsi, %rcx, %r8-r11 always need saving. | ||
66 | */ | ||
67 | |||
68 | #define save_common_regs \ | ||
69 | pushq %rdi; \ | ||
70 | pushq %rsi; \ | ||
71 | pushq %rcx; \ | ||
72 | pushq %r8; \ | ||
73 | pushq %r9; \ | ||
74 | pushq %r10; \ | ||
75 | pushq %r11 | ||
76 | |||
77 | #define restore_common_regs \ | ||
78 | popq %r11; \ | ||
79 | popq %r10; \ | ||
80 | popq %r9; \ | ||
81 | popq %r8; \ | ||
82 | popq %rcx; \ | ||
83 | popq %rsi; \ | ||
84 | popq %rdi | ||
85 | |||
86 | #endif | ||
87 | |||
88 | /* Fix up special calling conventions */ | ||
89 | ENTRY(call_rwsem_down_read_failed) | ||
90 | FRAME_BEGIN | ||
91 | save_common_regs | ||
92 | __ASM_SIZE(push,) %__ASM_REG(dx) | ||
93 | movq %rax,%rdi | ||
94 | call rwsem_down_read_failed | ||
95 | __ASM_SIZE(pop,) %__ASM_REG(dx) | ||
96 | restore_common_regs | ||
97 | FRAME_END | ||
98 | ret | ||
99 | ENDPROC(call_rwsem_down_read_failed) | ||
100 | |||
101 | ENTRY(call_rwsem_down_read_failed_killable) | ||
102 | FRAME_BEGIN | ||
103 | save_common_regs | ||
104 | __ASM_SIZE(push,) %__ASM_REG(dx) | ||
105 | movq %rax,%rdi | ||
106 | call rwsem_down_read_failed_killable | ||
107 | __ASM_SIZE(pop,) %__ASM_REG(dx) | ||
108 | restore_common_regs | ||
109 | FRAME_END | ||
110 | ret | ||
111 | ENDPROC(call_rwsem_down_read_failed_killable) | ||
112 | |||
113 | ENTRY(call_rwsem_down_write_failed) | ||
114 | FRAME_BEGIN | ||
115 | save_common_regs | ||
116 | movq %rax,%rdi | ||
117 | call rwsem_down_write_failed | ||
118 | restore_common_regs | ||
119 | FRAME_END | ||
120 | ret | ||
121 | ENDPROC(call_rwsem_down_write_failed) | ||
122 | |||
123 | ENTRY(call_rwsem_down_write_failed_killable) | ||
124 | FRAME_BEGIN | ||
125 | save_common_regs | ||
126 | movq %rax,%rdi | ||
127 | call rwsem_down_write_failed_killable | ||
128 | restore_common_regs | ||
129 | FRAME_END | ||
130 | ret | ||
131 | ENDPROC(call_rwsem_down_write_failed_killable) | ||
132 | |||
133 | ENTRY(call_rwsem_wake) | ||
134 | FRAME_BEGIN | ||
135 | /* do nothing if still outstanding active readers */ | ||
136 | __ASM_HALF_SIZE(dec) %__ASM_HALF_REG(dx) | ||
137 | jnz 1f | ||
138 | save_common_regs | ||
139 | movq %rax,%rdi | ||
140 | call rwsem_wake | ||
141 | restore_common_regs | ||
142 | 1: FRAME_END | ||
143 | ret | ||
144 | ENDPROC(call_rwsem_wake) | ||
145 | |||
146 | ENTRY(call_rwsem_downgrade_wake) | ||
147 | FRAME_BEGIN | ||
148 | save_common_regs | ||
149 | __ASM_SIZE(push,) %__ASM_REG(dx) | ||
150 | movq %rax,%rdi | ||
151 | call rwsem_downgrade_wake | ||
152 | __ASM_SIZE(pop,) %__ASM_REG(dx) | ||
153 | restore_common_regs | ||
154 | FRAME_END | ||
155 | ret | ||
156 | ENDPROC(call_rwsem_downgrade_wake) | ||
diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig index a9e80e44178c..a8985e1f7432 100644 --- a/arch/x86/um/Kconfig +++ b/arch/x86/um/Kconfig | |||
@@ -32,12 +32,6 @@ config ARCH_DEFCONFIG | |||
32 | default "arch/um/configs/i386_defconfig" if X86_32 | 32 | default "arch/um/configs/i386_defconfig" if X86_32 |
33 | default "arch/um/configs/x86_64_defconfig" if X86_64 | 33 | default "arch/um/configs/x86_64_defconfig" if X86_64 |
34 | 34 | ||
35 | config RWSEM_XCHGADD_ALGORITHM | ||
36 | def_bool 64BIT | ||
37 | |||
38 | config RWSEM_GENERIC_SPINLOCK | ||
39 | def_bool !RWSEM_XCHGADD_ALGORITHM | ||
40 | |||
41 | config 3_LEVEL_PGTABLES | 35 | config 3_LEVEL_PGTABLES |
42 | bool "Three-level pagetables" if !64BIT | 36 | bool "Three-level pagetables" if !64BIT |
43 | default 64BIT | 37 | default 64BIT |
diff --git a/arch/x86/um/Makefile b/arch/x86/um/Makefile index 2d686ae54681..33c51c064c77 100644 --- a/arch/x86/um/Makefile +++ b/arch/x86/um/Makefile | |||
@@ -21,14 +21,12 @@ obj-y += checksum_32.o syscalls_32.o | |||
21 | obj-$(CONFIG_ELF_CORE) += elfcore.o | 21 | obj-$(CONFIG_ELF_CORE) += elfcore.o |
22 | 22 | ||
23 | subarch-y = ../lib/string_32.o ../lib/atomic64_32.o ../lib/atomic64_cx8_32.o | 23 | subarch-y = ../lib/string_32.o ../lib/atomic64_32.o ../lib/atomic64_cx8_32.o |
24 | subarch-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += ../lib/rwsem.o | ||
25 | 24 | ||
26 | else | 25 | else |
27 | 26 | ||
28 | obj-y += syscalls_64.o vdso/ | 27 | obj-y += syscalls_64.o vdso/ |
29 | 28 | ||
30 | subarch-y = ../lib/csum-partial_64.o ../lib/memcpy_64.o ../entry/thunk_64.o \ | 29 | subarch-y = ../lib/csum-partial_64.o ../lib/memcpy_64.o ../entry/thunk_64.o |
31 | ../lib/rwsem.o | ||
32 | 30 | ||
33 | endif | 31 | endif |
34 | 32 | ||
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig index 4b9aafe766c5..35c8d91e6106 100644 --- a/arch/xtensa/Kconfig +++ b/arch/xtensa/Kconfig | |||
@@ -46,9 +46,6 @@ config XTENSA | |||
46 | with reasonable minimum requirements. The Xtensa Linux project has | 46 | with reasonable minimum requirements. The Xtensa Linux project has |
47 | a home page at <http://www.linux-xtensa.org/>. | 47 | a home page at <http://www.linux-xtensa.org/>. |
48 | 48 | ||
49 | config RWSEM_XCHGADD_ALGORITHM | ||
50 | def_bool y | ||
51 | |||
52 | config GENERIC_HWEIGHT | 49 | config GENERIC_HWEIGHT |
53 | def_bool y | 50 | def_bool y |
54 | 51 | ||
diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild index 3843198e03d4..4148090cafb0 100644 --- a/arch/xtensa/include/asm/Kbuild +++ b/arch/xtensa/include/asm/Kbuild | |||
@@ -25,7 +25,6 @@ generic-y += percpu.h | |||
25 | generic-y += preempt.h | 25 | generic-y += preempt.h |
26 | generic-y += qrwlock.h | 26 | generic-y += qrwlock.h |
27 | generic-y += qspinlock.h | 27 | generic-y += qspinlock.h |
28 | generic-y += rwsem.h | ||
29 | generic-y += sections.h | 28 | generic-y += sections.h |
30 | generic-y += socket.h | 29 | generic-y += socket.h |
31 | generic-y += topology.h | 30 | generic-y += topology.h |
diff --git a/include/asm-generic/rwsem.h b/include/asm-generic/rwsem.h deleted file mode 100644 index 93e67a055a4d..000000000000 --- a/include/asm-generic/rwsem.h +++ /dev/null | |||
@@ -1,140 +0,0 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
2 | #ifndef _ASM_GENERIC_RWSEM_H | ||
3 | #define _ASM_GENERIC_RWSEM_H | ||
4 | |||
5 | #ifndef _LINUX_RWSEM_H | ||
6 | #error "Please don't include <asm/rwsem.h> directly, use <linux/rwsem.h> instead." | ||
7 | #endif | ||
8 | |||
9 | #ifdef __KERNEL__ | ||
10 | |||
11 | /* | ||
12 | * R/W semaphores originally for PPC using the stuff in lib/rwsem.c. | ||
13 | * Adapted largely from include/asm-i386/rwsem.h | ||
14 | * by Paul Mackerras <paulus@samba.org>. | ||
15 | */ | ||
16 | |||
17 | /* | ||
18 | * the semaphore definition | ||
19 | */ | ||
20 | #ifdef CONFIG_64BIT | ||
21 | # define RWSEM_ACTIVE_MASK 0xffffffffL | ||
22 | #else | ||
23 | # define RWSEM_ACTIVE_MASK 0x0000ffffL | ||
24 | #endif | ||
25 | |||
26 | #define RWSEM_UNLOCKED_VALUE 0x00000000L | ||
27 | #define RWSEM_ACTIVE_BIAS 0x00000001L | ||
28 | #define RWSEM_WAITING_BIAS (-RWSEM_ACTIVE_MASK-1) | ||
29 | #define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS | ||
30 | #define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) | ||
31 | |||
32 | /* | ||
33 | * lock for reading | ||
34 | */ | ||
35 | static inline void __down_read(struct rw_semaphore *sem) | ||
36 | { | ||
37 | if (unlikely(atomic_long_inc_return_acquire(&sem->count) <= 0)) | ||
38 | rwsem_down_read_failed(sem); | ||
39 | } | ||
40 | |||
41 | static inline int __down_read_killable(struct rw_semaphore *sem) | ||
42 | { | ||
43 | if (unlikely(atomic_long_inc_return_acquire(&sem->count) <= 0)) { | ||
44 | if (IS_ERR(rwsem_down_read_failed_killable(sem))) | ||
45 | return -EINTR; | ||
46 | } | ||
47 | |||
48 | return 0; | ||
49 | } | ||
50 | |||
51 | static inline int __down_read_trylock(struct rw_semaphore *sem) | ||
52 | { | ||
53 | long tmp; | ||
54 | |||
55 | while ((tmp = atomic_long_read(&sem->count)) >= 0) { | ||
56 | if (tmp == atomic_long_cmpxchg_acquire(&sem->count, tmp, | ||
57 | tmp + RWSEM_ACTIVE_READ_BIAS)) { | ||
58 | return 1; | ||
59 | } | ||
60 | } | ||
61 | return 0; | ||
62 | } | ||
63 | |||
64 | /* | ||
65 | * lock for writing | ||
66 | */ | ||
67 | static inline void __down_write(struct rw_semaphore *sem) | ||
68 | { | ||
69 | long tmp; | ||
70 | |||
71 | tmp = atomic_long_add_return_acquire(RWSEM_ACTIVE_WRITE_BIAS, | ||
72 | &sem->count); | ||
73 | if (unlikely(tmp != RWSEM_ACTIVE_WRITE_BIAS)) | ||
74 | rwsem_down_write_failed(sem); | ||
75 | } | ||
76 | |||
77 | static inline int __down_write_killable(struct rw_semaphore *sem) | ||
78 | { | ||
79 | long tmp; | ||
80 | |||
81 | tmp = atomic_long_add_return_acquire(RWSEM_ACTIVE_WRITE_BIAS, | ||
82 | &sem->count); | ||
83 | if (unlikely(tmp != RWSEM_ACTIVE_WRITE_BIAS)) | ||
84 | if (IS_ERR(rwsem_down_write_failed_killable(sem))) | ||
85 | return -EINTR; | ||
86 | return 0; | ||
87 | } | ||
88 | |||
89 | static inline int __down_write_trylock(struct rw_semaphore *sem) | ||
90 | { | ||
91 | long tmp; | ||
92 | |||
93 | tmp = atomic_long_cmpxchg_acquire(&sem->count, RWSEM_UNLOCKED_VALUE, | ||
94 | RWSEM_ACTIVE_WRITE_BIAS); | ||
95 | return tmp == RWSEM_UNLOCKED_VALUE; | ||
96 | } | ||
97 | |||
98 | /* | ||
99 | * unlock after reading | ||
100 | */ | ||
101 | static inline void __up_read(struct rw_semaphore *sem) | ||
102 | { | ||
103 | long tmp; | ||
104 | |||
105 | tmp = atomic_long_dec_return_release(&sem->count); | ||
106 | if (unlikely(tmp < -1 && (tmp & RWSEM_ACTIVE_MASK) == 0)) | ||
107 | rwsem_wake(sem); | ||
108 | } | ||
109 | |||
110 | /* | ||
111 | * unlock after writing | ||
112 | */ | ||
113 | static inline void __up_write(struct rw_semaphore *sem) | ||
114 | { | ||
115 | if (unlikely(atomic_long_sub_return_release(RWSEM_ACTIVE_WRITE_BIAS, | ||
116 | &sem->count) < 0)) | ||
117 | rwsem_wake(sem); | ||
118 | } | ||
119 | |||
120 | /* | ||
121 | * downgrade write lock to read lock | ||
122 | */ | ||
123 | static inline void __downgrade_write(struct rw_semaphore *sem) | ||
124 | { | ||
125 | long tmp; | ||
126 | |||
127 | /* | ||
128 | * When downgrading from exclusive to shared ownership, | ||
129 | * anything inside the write-locked region cannot leak | ||
130 | * into the read side. In contrast, anything in the | ||
131 | * read-locked region is ok to be re-ordered into the | ||
132 | * write side. As such, rely on RELEASE semantics. | ||
133 | */ | ||
134 | tmp = atomic_long_add_return_release(-RWSEM_WAITING_BIAS, &sem->count); | ||
135 | if (tmp < 0) | ||
136 | rwsem_downgrade_wake(sem); | ||
137 | } | ||
138 | |||
139 | #endif /* __KERNEL__ */ | ||
140 | #endif /* _ASM_GENERIC_RWSEM_H */ | ||
diff --git a/include/linux/jump_label_ratelimit.h b/include/linux/jump_label_ratelimit.h index a49f2b45b3f0..42710d5949ba 100644 --- a/include/linux/jump_label_ratelimit.h +++ b/include/linux/jump_label_ratelimit.h | |||
@@ -12,21 +12,79 @@ struct static_key_deferred { | |||
12 | struct delayed_work work; | 12 | struct delayed_work work; |
13 | }; | 13 | }; |
14 | 14 | ||
15 | extern void static_key_slow_dec_deferred(struct static_key_deferred *key); | 15 | struct static_key_true_deferred { |
16 | extern void static_key_deferred_flush(struct static_key_deferred *key); | 16 | struct static_key_true key; |
17 | unsigned long timeout; | ||
18 | struct delayed_work work; | ||
19 | }; | ||
20 | |||
21 | struct static_key_false_deferred { | ||
22 | struct static_key_false key; | ||
23 | unsigned long timeout; | ||
24 | struct delayed_work work; | ||
25 | }; | ||
26 | |||
27 | #define static_key_slow_dec_deferred(x) \ | ||
28 | __static_key_slow_dec_deferred(&(x)->key, &(x)->work, (x)->timeout) | ||
29 | #define static_branch_slow_dec_deferred(x) \ | ||
30 | __static_key_slow_dec_deferred(&(x)->key.key, &(x)->work, (x)->timeout) | ||
31 | |||
32 | #define static_key_deferred_flush(x) \ | ||
33 | __static_key_deferred_flush((x), &(x)->work) | ||
34 | |||
35 | extern void | ||
36 | __static_key_slow_dec_deferred(struct static_key *key, | ||
37 | struct delayed_work *work, | ||
38 | unsigned long timeout); | ||
39 | extern void __static_key_deferred_flush(void *key, struct delayed_work *work); | ||
17 | extern void | 40 | extern void |
18 | jump_label_rate_limit(struct static_key_deferred *key, unsigned long rl); | 41 | jump_label_rate_limit(struct static_key_deferred *key, unsigned long rl); |
19 | 42 | ||
43 | extern void jump_label_update_timeout(struct work_struct *work); | ||
44 | |||
45 | #define DEFINE_STATIC_KEY_DEFERRED_TRUE(name, rl) \ | ||
46 | struct static_key_true_deferred name = { \ | ||
47 | .key = { STATIC_KEY_INIT_TRUE }, \ | ||
48 | .timeout = (rl), \ | ||
49 | .work = __DELAYED_WORK_INITIALIZER((name).work, \ | ||
50 | jump_label_update_timeout, \ | ||
51 | 0), \ | ||
52 | } | ||
53 | |||
54 | #define DEFINE_STATIC_KEY_DEFERRED_FALSE(name, rl) \ | ||
55 | struct static_key_false_deferred name = { \ | ||
56 | .key = { STATIC_KEY_INIT_FALSE }, \ | ||
57 | .timeout = (rl), \ | ||
58 | .work = __DELAYED_WORK_INITIALIZER((name).work, \ | ||
59 | jump_label_update_timeout, \ | ||
60 | 0), \ | ||
61 | } | ||
62 | |||
63 | #define static_branch_deferred_inc(x) static_branch_inc(&(x)->key) | ||
64 | |||
20 | #else /* !CONFIG_JUMP_LABEL */ | 65 | #else /* !CONFIG_JUMP_LABEL */ |
21 | struct static_key_deferred { | 66 | struct static_key_deferred { |
22 | struct static_key key; | 67 | struct static_key key; |
23 | }; | 68 | }; |
69 | struct static_key_true_deferred { | ||
70 | struct static_key_true key; | ||
71 | }; | ||
72 | struct static_key_false_deferred { | ||
73 | struct static_key_false key; | ||
74 | }; | ||
75 | #define DEFINE_STATIC_KEY_DEFERRED_TRUE(name, rl) \ | ||
76 | struct static_key_true_deferred name = { STATIC_KEY_TRUE_INIT } | ||
77 | #define DEFINE_STATIC_KEY_DEFERRED_FALSE(name, rl) \ | ||
78 | struct static_key_false_deferred name = { STATIC_KEY_FALSE_INIT } | ||
79 | |||
80 | #define static_branch_slow_dec_deferred(x) static_branch_dec(&(x)->key) | ||
81 | |||
24 | static inline void static_key_slow_dec_deferred(struct static_key_deferred *key) | 82 | static inline void static_key_slow_dec_deferred(struct static_key_deferred *key) |
25 | { | 83 | { |
26 | STATIC_KEY_CHECK_USE(key); | 84 | STATIC_KEY_CHECK_USE(key); |
27 | static_key_slow_dec(&key->key); | 85 | static_key_slow_dec(&key->key); |
28 | } | 86 | } |
29 | static inline void static_key_deferred_flush(struct static_key_deferred *key) | 87 | static inline void static_key_deferred_flush(void *key) |
30 | { | 88 | { |
31 | STATIC_KEY_CHECK_USE(key); | 89 | STATIC_KEY_CHECK_USE(key); |
32 | } | 90 | } |
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 6f165d625320..6e2377e6c1d6 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h | |||
@@ -476,7 +476,7 @@ struct pin_cookie { }; | |||
476 | 476 | ||
477 | #define NIL_COOKIE (struct pin_cookie){ } | 477 | #define NIL_COOKIE (struct pin_cookie){ } |
478 | 478 | ||
479 | #define lockdep_pin_lock(l) ({ struct pin_cookie cookie; cookie; }) | 479 | #define lockdep_pin_lock(l) ({ struct pin_cookie cookie = { }; cookie; }) |
480 | #define lockdep_repin_lock(l, c) do { (void)(l); (void)(c); } while (0) | 480 | #define lockdep_repin_lock(l, c) do { (void)(l); (void)(c); } while (0) |
481 | #define lockdep_unpin_lock(l, c) do { (void)(l); (void)(c); } while (0) | 481 | #define lockdep_unpin_lock(l, c) do { (void)(l); (void)(c); } while (0) |
482 | 482 | ||
diff --git a/include/linux/rwsem-spinlock.h b/include/linux/rwsem-spinlock.h deleted file mode 100644 index e47568363e5e..000000000000 --- a/include/linux/rwsem-spinlock.h +++ /dev/null | |||
@@ -1,47 +0,0 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
2 | /* rwsem-spinlock.h: fallback C implementation | ||
3 | * | ||
4 | * Copyright (c) 2001 David Howells (dhowells@redhat.com). | ||
5 | * - Derived partially from ideas by Andrea Arcangeli <andrea@suse.de> | ||
6 | * - Derived also from comments by Linus | ||
7 | */ | ||
8 | |||
9 | #ifndef _LINUX_RWSEM_SPINLOCK_H | ||
10 | #define _LINUX_RWSEM_SPINLOCK_H | ||
11 | |||
12 | #ifndef _LINUX_RWSEM_H | ||
13 | #error "please don't include linux/rwsem-spinlock.h directly, use linux/rwsem.h instead" | ||
14 | #endif | ||
15 | |||
16 | #ifdef __KERNEL__ | ||
17 | /* | ||
18 | * the rw-semaphore definition | ||
19 | * - if count is 0 then there are no active readers or writers | ||
20 | * - if count is +ve then that is the number of active readers | ||
21 | * - if count is -1 then there is one active writer | ||
22 | * - if wait_list is not empty, then there are processes waiting for the semaphore | ||
23 | */ | ||
24 | struct rw_semaphore { | ||
25 | __s32 count; | ||
26 | raw_spinlock_t wait_lock; | ||
27 | struct list_head wait_list; | ||
28 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||
29 | struct lockdep_map dep_map; | ||
30 | #endif | ||
31 | }; | ||
32 | |||
33 | #define RWSEM_UNLOCKED_VALUE 0x00000000 | ||
34 | |||
35 | extern void __down_read(struct rw_semaphore *sem); | ||
36 | extern int __must_check __down_read_killable(struct rw_semaphore *sem); | ||
37 | extern int __down_read_trylock(struct rw_semaphore *sem); | ||
38 | extern void __down_write(struct rw_semaphore *sem); | ||
39 | extern int __must_check __down_write_killable(struct rw_semaphore *sem); | ||
40 | extern int __down_write_trylock(struct rw_semaphore *sem); | ||
41 | extern void __up_read(struct rw_semaphore *sem); | ||
42 | extern void __up_write(struct rw_semaphore *sem); | ||
43 | extern void __downgrade_write(struct rw_semaphore *sem); | ||
44 | extern int rwsem_is_locked(struct rw_semaphore *sem); | ||
45 | |||
46 | #endif /* __KERNEL__ */ | ||
47 | #endif /* _LINUX_RWSEM_SPINLOCK_H */ | ||
diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index 67dbb57508b1..2ea18a3def04 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h | |||
@@ -20,25 +20,30 @@ | |||
20 | #include <linux/osq_lock.h> | 20 | #include <linux/osq_lock.h> |
21 | #endif | 21 | #endif |
22 | 22 | ||
23 | struct rw_semaphore; | 23 | /* |
24 | 24 | * For an uncontended rwsem, count and owner are the only fields a task | |
25 | #ifdef CONFIG_RWSEM_GENERIC_SPINLOCK | 25 | * needs to touch when acquiring the rwsem. So they are put next to each |
26 | #include <linux/rwsem-spinlock.h> /* use a generic implementation */ | 26 | * other to increase the chance that they will share the same cacheline. |
27 | #define __RWSEM_INIT_COUNT(name) .count = RWSEM_UNLOCKED_VALUE | 27 | * |
28 | #else | 28 | * In a contended rwsem, the owner is likely the most frequently accessed |
29 | /* All arch specific implementations share the same struct */ | 29 | * field in the structure as the optimistic waiter that holds the osq lock |
30 | * will spin on owner. For an embedded rwsem, other hot fields in the | ||
31 | * containing structure should be moved further away from the rwsem to | ||
32 | * reduce the chance that they will share the same cacheline causing | ||
33 | * cacheline bouncing problem. | ||
34 | */ | ||
30 | struct rw_semaphore { | 35 | struct rw_semaphore { |
31 | atomic_long_t count; | 36 | atomic_long_t count; |
32 | struct list_head wait_list; | ||
33 | raw_spinlock_t wait_lock; | ||
34 | #ifdef CONFIG_RWSEM_SPIN_ON_OWNER | 37 | #ifdef CONFIG_RWSEM_SPIN_ON_OWNER |
35 | struct optimistic_spin_queue osq; /* spinner MCS lock */ | ||
36 | /* | 38 | /* |
37 | * Write owner. Used as a speculative check to see | 39 | * Write owner. Used as a speculative check to see |
38 | * if the owner is running on the cpu. | 40 | * if the owner is running on the cpu. |
39 | */ | 41 | */ |
40 | struct task_struct *owner; | 42 | struct task_struct *owner; |
43 | struct optimistic_spin_queue osq; /* spinner MCS lock */ | ||
41 | #endif | 44 | #endif |
45 | raw_spinlock_t wait_lock; | ||
46 | struct list_head wait_list; | ||
42 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | 47 | #ifdef CONFIG_DEBUG_LOCK_ALLOC |
43 | struct lockdep_map dep_map; | 48 | struct lockdep_map dep_map; |
44 | #endif | 49 | #endif |
@@ -50,24 +55,14 @@ struct rw_semaphore { | |||
50 | */ | 55 | */ |
51 | #define RWSEM_OWNER_UNKNOWN ((struct task_struct *)-2L) | 56 | #define RWSEM_OWNER_UNKNOWN ((struct task_struct *)-2L) |
52 | 57 | ||
53 | extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem); | ||
54 | extern struct rw_semaphore *rwsem_down_read_failed_killable(struct rw_semaphore *sem); | ||
55 | extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem); | ||
56 | extern struct rw_semaphore *rwsem_down_write_failed_killable(struct rw_semaphore *sem); | ||
57 | extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *); | ||
58 | extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem); | ||
59 | |||
60 | /* Include the arch specific part */ | ||
61 | #include <asm/rwsem.h> | ||
62 | |||
63 | /* In all implementations count != 0 means locked */ | 58 | /* In all implementations count != 0 means locked */ |
64 | static inline int rwsem_is_locked(struct rw_semaphore *sem) | 59 | static inline int rwsem_is_locked(struct rw_semaphore *sem) |
65 | { | 60 | { |
66 | return atomic_long_read(&sem->count) != 0; | 61 | return atomic_long_read(&sem->count) != 0; |
67 | } | 62 | } |
68 | 63 | ||
64 | #define RWSEM_UNLOCKED_VALUE 0L | ||
69 | #define __RWSEM_INIT_COUNT(name) .count = ATOMIC_LONG_INIT(RWSEM_UNLOCKED_VALUE) | 65 | #define __RWSEM_INIT_COUNT(name) .count = ATOMIC_LONG_INIT(RWSEM_UNLOCKED_VALUE) |
70 | #endif | ||
71 | 66 | ||
72 | /* Common initializer macros and functions */ | 67 | /* Common initializer macros and functions */ |
73 | 68 | ||
diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks index fbba478ae522..e335953fa704 100644 --- a/kernel/Kconfig.locks +++ b/kernel/Kconfig.locks | |||
@@ -229,7 +229,7 @@ config MUTEX_SPIN_ON_OWNER | |||
229 | 229 | ||
230 | config RWSEM_SPIN_ON_OWNER | 230 | config RWSEM_SPIN_ON_OWNER |
231 | def_bool y | 231 | def_bool y |
232 | depends on SMP && RWSEM_XCHGADD_ALGORITHM && ARCH_SUPPORTS_ATOMIC_RMW | 232 | depends on SMP && ARCH_SUPPORTS_ATOMIC_RMW |
233 | 233 | ||
234 | config LOCK_SPIN_ON_OWNER | 234 | config LOCK_SPIN_ON_OWNER |
235 | def_bool y | 235 | def_bool y |
diff --git a/kernel/jump_label.c b/kernel/jump_label.c index bad96b476eb6..de6efdecc70d 100644 --- a/kernel/jump_label.c +++ b/kernel/jump_label.c | |||
@@ -202,11 +202,13 @@ void static_key_disable(struct static_key *key) | |||
202 | } | 202 | } |
203 | EXPORT_SYMBOL_GPL(static_key_disable); | 203 | EXPORT_SYMBOL_GPL(static_key_disable); |
204 | 204 | ||
205 | static void __static_key_slow_dec_cpuslocked(struct static_key *key, | 205 | static bool static_key_slow_try_dec(struct static_key *key) |
206 | unsigned long rate_limit, | ||
207 | struct delayed_work *work) | ||
208 | { | 206 | { |
209 | lockdep_assert_cpus_held(); | 207 | int val; |
208 | |||
209 | val = atomic_fetch_add_unless(&key->enabled, -1, 1); | ||
210 | if (val == 1) | ||
211 | return false; | ||
210 | 212 | ||
211 | /* | 213 | /* |
212 | * The negative count check is valid even when a negative | 214 | * The negative count check is valid even when a negative |
@@ -215,63 +217,70 @@ static void __static_key_slow_dec_cpuslocked(struct static_key *key, | |||
215 | * returns is unbalanced, because all other static_key_slow_inc() | 217 | * returns is unbalanced, because all other static_key_slow_inc() |
216 | * instances block while the update is in progress. | 218 | * instances block while the update is in progress. |
217 | */ | 219 | */ |
218 | if (!atomic_dec_and_mutex_lock(&key->enabled, &jump_label_mutex)) { | 220 | WARN(val < 0, "jump label: negative count!\n"); |
219 | WARN(atomic_read(&key->enabled) < 0, | 221 | return true; |
220 | "jump label: negative count!\n"); | 222 | } |
223 | |||
224 | static void __static_key_slow_dec_cpuslocked(struct static_key *key) | ||
225 | { | ||
226 | lockdep_assert_cpus_held(); | ||
227 | |||
228 | if (static_key_slow_try_dec(key)) | ||
221 | return; | 229 | return; |
222 | } | ||
223 | 230 | ||
224 | if (rate_limit) { | 231 | jump_label_lock(); |
225 | atomic_inc(&key->enabled); | 232 | if (atomic_dec_and_test(&key->enabled)) |
226 | schedule_delayed_work(work, rate_limit); | ||
227 | } else { | ||
228 | jump_label_update(key); | 233 | jump_label_update(key); |
229 | } | ||
230 | jump_label_unlock(); | 234 | jump_label_unlock(); |
231 | } | 235 | } |
232 | 236 | ||
233 | static void __static_key_slow_dec(struct static_key *key, | 237 | static void __static_key_slow_dec(struct static_key *key) |
234 | unsigned long rate_limit, | ||
235 | struct delayed_work *work) | ||
236 | { | 238 | { |
237 | cpus_read_lock(); | 239 | cpus_read_lock(); |
238 | __static_key_slow_dec_cpuslocked(key, rate_limit, work); | 240 | __static_key_slow_dec_cpuslocked(key); |
239 | cpus_read_unlock(); | 241 | cpus_read_unlock(); |
240 | } | 242 | } |
241 | 243 | ||
242 | static void jump_label_update_timeout(struct work_struct *work) | 244 | void jump_label_update_timeout(struct work_struct *work) |
243 | { | 245 | { |
244 | struct static_key_deferred *key = | 246 | struct static_key_deferred *key = |
245 | container_of(work, struct static_key_deferred, work.work); | 247 | container_of(work, struct static_key_deferred, work.work); |
246 | __static_key_slow_dec(&key->key, 0, NULL); | 248 | __static_key_slow_dec(&key->key); |
247 | } | 249 | } |
250 | EXPORT_SYMBOL_GPL(jump_label_update_timeout); | ||
248 | 251 | ||
249 | void static_key_slow_dec(struct static_key *key) | 252 | void static_key_slow_dec(struct static_key *key) |
250 | { | 253 | { |
251 | STATIC_KEY_CHECK_USE(key); | 254 | STATIC_KEY_CHECK_USE(key); |
252 | __static_key_slow_dec(key, 0, NULL); | 255 | __static_key_slow_dec(key); |
253 | } | 256 | } |
254 | EXPORT_SYMBOL_GPL(static_key_slow_dec); | 257 | EXPORT_SYMBOL_GPL(static_key_slow_dec); |
255 | 258 | ||
256 | void static_key_slow_dec_cpuslocked(struct static_key *key) | 259 | void static_key_slow_dec_cpuslocked(struct static_key *key) |
257 | { | 260 | { |
258 | STATIC_KEY_CHECK_USE(key); | 261 | STATIC_KEY_CHECK_USE(key); |
259 | __static_key_slow_dec_cpuslocked(key, 0, NULL); | 262 | __static_key_slow_dec_cpuslocked(key); |
260 | } | 263 | } |
261 | 264 | ||
262 | void static_key_slow_dec_deferred(struct static_key_deferred *key) | 265 | void __static_key_slow_dec_deferred(struct static_key *key, |
266 | struct delayed_work *work, | ||
267 | unsigned long timeout) | ||
263 | { | 268 | { |
264 | STATIC_KEY_CHECK_USE(key); | 269 | STATIC_KEY_CHECK_USE(key); |
265 | __static_key_slow_dec(&key->key, key->timeout, &key->work); | 270 | |
271 | if (static_key_slow_try_dec(key)) | ||
272 | return; | ||
273 | |||
274 | schedule_delayed_work(work, timeout); | ||
266 | } | 275 | } |
267 | EXPORT_SYMBOL_GPL(static_key_slow_dec_deferred); | 276 | EXPORT_SYMBOL_GPL(__static_key_slow_dec_deferred); |
268 | 277 | ||
269 | void static_key_deferred_flush(struct static_key_deferred *key) | 278 | void __static_key_deferred_flush(void *key, struct delayed_work *work) |
270 | { | 279 | { |
271 | STATIC_KEY_CHECK_USE(key); | 280 | STATIC_KEY_CHECK_USE(key); |
272 | flush_delayed_work(&key->work); | 281 | flush_delayed_work(work); |
273 | } | 282 | } |
274 | EXPORT_SYMBOL_GPL(static_key_deferred_flush); | 283 | EXPORT_SYMBOL_GPL(__static_key_deferred_flush); |
275 | 284 | ||
276 | void jump_label_rate_limit(struct static_key_deferred *key, | 285 | void jump_label_rate_limit(struct static_key_deferred *key, |
277 | unsigned long rl) | 286 | unsigned long rl) |
diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile index 392c7f23af76..6fe2f333aecb 100644 --- a/kernel/locking/Makefile +++ b/kernel/locking/Makefile | |||
@@ -3,7 +3,7 @@ | |||
3 | # and is generally not a function of system call inputs. | 3 | # and is generally not a function of system call inputs. |
4 | KCOV_INSTRUMENT := n | 4 | KCOV_INSTRUMENT := n |
5 | 5 | ||
6 | obj-y += mutex.o semaphore.o rwsem.o percpu-rwsem.o | 6 | obj-y += mutex.o semaphore.o rwsem.o percpu-rwsem.o rwsem-xadd.o |
7 | 7 | ||
8 | ifdef CONFIG_FUNCTION_TRACER | 8 | ifdef CONFIG_FUNCTION_TRACER |
9 | CFLAGS_REMOVE_lockdep.o = $(CC_FLAGS_FTRACE) | 9 | CFLAGS_REMOVE_lockdep.o = $(CC_FLAGS_FTRACE) |
@@ -25,8 +25,7 @@ obj-$(CONFIG_RT_MUTEXES) += rtmutex.o | |||
25 | obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o | 25 | obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o |
26 | obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o | 26 | obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o |
27 | obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o | 27 | obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o |
28 | obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o | ||
29 | obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o | ||
30 | obj-$(CONFIG_QUEUED_RWLOCKS) += qrwlock.o | 28 | obj-$(CONFIG_QUEUED_RWLOCKS) += qrwlock.o |
31 | obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o | 29 | obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o |
32 | obj-$(CONFIG_WW_MUTEX_SELFTEST) += test-ww_mutex.o | 30 | obj-$(CONFIG_WW_MUTEX_SELFTEST) += test-ww_mutex.o |
31 | obj-$(CONFIG_LOCK_EVENT_COUNTS) += lock_events.o | ||
diff --git a/kernel/locking/lock_events.c b/kernel/locking/lock_events.c new file mode 100644 index 000000000000..fa2c2f951c6b --- /dev/null +++ b/kernel/locking/lock_events.c | |||
@@ -0,0 +1,179 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
2 | /* | ||
3 | * This program is free software; you can redistribute it and/or modify | ||
4 | * it under the terms of the GNU General Public License as published by | ||
5 | * the Free Software Foundation; either version 2 of the License, or | ||
6 | * (at your option) any later version. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | * Authors: Waiman Long <waiman.long@hpe.com> | ||
14 | */ | ||
15 | |||
16 | /* | ||
17 | * Collect locking event counts | ||
18 | */ | ||
19 | #include <linux/debugfs.h> | ||
20 | #include <linux/sched.h> | ||
21 | #include <linux/sched/clock.h> | ||
22 | #include <linux/fs.h> | ||
23 | |||
24 | #include "lock_events.h" | ||
25 | |||
26 | #undef LOCK_EVENT | ||
27 | #define LOCK_EVENT(name) [LOCKEVENT_ ## name] = #name, | ||
28 | |||
29 | #define LOCK_EVENTS_DIR "lock_event_counts" | ||
30 | |||
31 | /* | ||
32 | * When CONFIG_LOCK_EVENT_COUNTS is enabled, event counts of different | ||
33 | * types of locks will be reported under the <debugfs>/lock_event_counts/ | ||
34 | * directory. See lock_events_list.h for the list of available locking | ||
35 | * events. | ||
36 | * | ||
37 | * Writing to the special ".reset_counts" file will reset all the above | ||
38 | * locking event counts. This is a very slow operation and so should not | ||
39 | * be done frequently. | ||
40 | * | ||
41 | * These event counts are implemented as per-cpu variables which are | ||
42 | * summed and computed whenever the corresponding debugfs files are read. This | ||
43 | * minimizes added overhead making the counts usable even in a production | ||
44 | * environment. | ||
45 | */ | ||
46 | static const char * const lockevent_names[lockevent_num + 1] = { | ||
47 | |||
48 | #include "lock_events_list.h" | ||
49 | |||
50 | [LOCKEVENT_reset_cnts] = ".reset_counts", | ||
51 | }; | ||
52 | |||
53 | /* | ||
54 | * Per-cpu counts | ||
55 | */ | ||
56 | DEFINE_PER_CPU(unsigned long, lockevents[lockevent_num]); | ||
57 | |||
58 | /* | ||
59 | * The lockevent_read() function can be overridden. | ||
60 | */ | ||
61 | ssize_t __weak lockevent_read(struct file *file, char __user *user_buf, | ||
62 | size_t count, loff_t *ppos) | ||
63 | { | ||
64 | char buf[64]; | ||
65 | int cpu, id, len; | ||
66 | u64 sum = 0; | ||
67 | |||
68 | /* | ||
69 | * Get the counter ID stored in file->f_inode->i_private | ||
70 | */ | ||
71 | id = (long)file_inode(file)->i_private; | ||
72 | |||
73 | if (id >= lockevent_num) | ||
74 | return -EBADF; | ||
75 | |||
76 | for_each_possible_cpu(cpu) | ||
77 | sum += per_cpu(lockevents[id], cpu); | ||
78 | len = snprintf(buf, sizeof(buf) - 1, "%llu\n", sum); | ||
79 | |||
80 | return simple_read_from_buffer(user_buf, count, ppos, buf, len); | ||
81 | } | ||
82 | |||
83 | /* | ||
84 | * Function to handle write request | ||
85 | * | ||
86 | * When idx = reset_cnts, reset all the counts. | ||
87 | */ | ||
88 | static ssize_t lockevent_write(struct file *file, const char __user *user_buf, | ||
89 | size_t count, loff_t *ppos) | ||
90 | { | ||
91 | int cpu; | ||
92 | |||
93 | /* | ||
94 | * Get the counter ID stored in file->f_inode->i_private | ||
95 | */ | ||
96 | if ((long)file_inode(file)->i_private != LOCKEVENT_reset_cnts) | ||
97 | return count; | ||
98 | |||
99 | for_each_possible_cpu(cpu) { | ||
100 | int i; | ||
101 | unsigned long *ptr = per_cpu_ptr(lockevents, cpu); | ||
102 | |||
103 | for (i = 0 ; i < lockevent_num; i++) | ||
104 | WRITE_ONCE(ptr[i], 0); | ||
105 | } | ||
106 | return count; | ||
107 | } | ||
108 | |||
109 | /* | ||
110 | * Debugfs data structures | ||
111 | */ | ||
112 | static const struct file_operations fops_lockevent = { | ||
113 | .read = lockevent_read, | ||
114 | .write = lockevent_write, | ||
115 | .llseek = default_llseek, | ||
116 | }; | ||
117 | |||
118 | #ifdef CONFIG_PARAVIRT_SPINLOCKS | ||
119 | #include <asm/paravirt.h> | ||
120 | |||
121 | static bool __init skip_lockevent(const char *name) | ||
122 | { | ||
123 | static int pv_on __initdata = -1; | ||
124 | |||
125 | if (pv_on < 0) | ||
126 | pv_on = !pv_is_native_spin_unlock(); | ||
127 | /* | ||
128 | * Skip PV qspinlock events on bare metal. | ||
129 | */ | ||
130 | if (!pv_on && !memcmp(name, "pv_", 3)) | ||
131 | return true; | ||
132 | return false; | ||
133 | } | ||
134 | #else | ||
135 | static inline bool skip_lockevent(const char *name) | ||
136 | { | ||
137 | return false; | ||
138 | } | ||
139 | #endif | ||
140 | |||
141 | /* | ||
142 | * Initialize debugfs for the locking event counts. | ||
143 | */ | ||
144 | static int __init init_lockevent_counts(void) | ||
145 | { | ||
146 | struct dentry *d_counts = debugfs_create_dir(LOCK_EVENTS_DIR, NULL); | ||
147 | int i; | ||
148 | |||
149 | if (!d_counts) | ||
150 | goto out; | ||
151 | |||
152 | /* | ||
153 | * Create the debugfs files | ||
154 | * | ||
155 | * As reading from and writing to the stat files can be slow, only | ||
156 | * root is allowed to do the read/write to limit impact to system | ||
157 | * performance. | ||
158 | */ | ||
159 | for (i = 0; i < lockevent_num; i++) { | ||
160 | if (skip_lockevent(lockevent_names[i])) | ||
161 | continue; | ||
162 | if (!debugfs_create_file(lockevent_names[i], 0400, d_counts, | ||
163 | (void *)(long)i, &fops_lockevent)) | ||
164 | goto fail_undo; | ||
165 | } | ||
166 | |||
167 | if (!debugfs_create_file(lockevent_names[LOCKEVENT_reset_cnts], 0200, | ||
168 | d_counts, (void *)(long)LOCKEVENT_reset_cnts, | ||
169 | &fops_lockevent)) | ||
170 | goto fail_undo; | ||
171 | |||
172 | return 0; | ||
173 | fail_undo: | ||
174 | debugfs_remove_recursive(d_counts); | ||
175 | out: | ||
176 | pr_warn("Could not create '%s' debugfs entries\n", LOCK_EVENTS_DIR); | ||
177 | return -ENOMEM; | ||
178 | } | ||
179 | fs_initcall(init_lockevent_counts); | ||
diff --git a/kernel/locking/lock_events.h b/kernel/locking/lock_events.h new file mode 100644 index 000000000000..feb1acc54611 --- /dev/null +++ b/kernel/locking/lock_events.h | |||
@@ -0,0 +1,59 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
2 | /* | ||
3 | * This program is free software; you can redistribute it and/or modify | ||
4 | * it under the terms of the GNU General Public License as published by | ||
5 | * the Free Software Foundation; either version 2 of the License, or | ||
6 | * (at your option) any later version. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | * Authors: Waiman Long <longman@redhat.com> | ||
14 | */ | ||
15 | |||
16 | #ifndef __LOCKING_LOCK_EVENTS_H | ||
17 | #define __LOCKING_LOCK_EVENTS_H | ||
18 | |||
19 | enum lock_events { | ||
20 | |||
21 | #include "lock_events_list.h" | ||
22 | |||
23 | lockevent_num, /* Total number of lock event counts */ | ||
24 | LOCKEVENT_reset_cnts = lockevent_num, | ||
25 | }; | ||
26 | |||
27 | #ifdef CONFIG_LOCK_EVENT_COUNTS | ||
28 | /* | ||
29 | * Per-cpu counters | ||
30 | */ | ||
31 | DECLARE_PER_CPU(unsigned long, lockevents[lockevent_num]); | ||
32 | |||
33 | /* | ||
34 | * Increment the PV qspinlock statistical counters | ||
35 | */ | ||
36 | static inline void __lockevent_inc(enum lock_events event, bool cond) | ||
37 | { | ||
38 | if (cond) | ||
39 | __this_cpu_inc(lockevents[event]); | ||
40 | } | ||
41 | |||
42 | #define lockevent_inc(ev) __lockevent_inc(LOCKEVENT_ ##ev, true) | ||
43 | #define lockevent_cond_inc(ev, c) __lockevent_inc(LOCKEVENT_ ##ev, c) | ||
44 | |||
45 | static inline void __lockevent_add(enum lock_events event, int inc) | ||
46 | { | ||
47 | __this_cpu_add(lockevents[event], inc); | ||
48 | } | ||
49 | |||
50 | #define lockevent_add(ev, c) __lockevent_add(LOCKEVENT_ ##ev, c) | ||
51 | |||
52 | #else /* CONFIG_LOCK_EVENT_COUNTS */ | ||
53 | |||
54 | #define lockevent_inc(ev) | ||
55 | #define lockevent_add(ev, c) | ||
56 | #define lockevent_cond_inc(ev, c) | ||
57 | |||
58 | #endif /* CONFIG_LOCK_EVENT_COUNTS */ | ||
59 | #endif /* __LOCKING_LOCK_EVENTS_H */ | ||
diff --git a/kernel/locking/lock_events_list.h b/kernel/locking/lock_events_list.h new file mode 100644 index 000000000000..ad7668cfc9da --- /dev/null +++ b/kernel/locking/lock_events_list.h | |||
@@ -0,0 +1,67 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
2 | /* | ||
3 | * This program is free software; you can redistribute it and/or modify | ||
4 | * it under the terms of the GNU General Public License as published by | ||
5 | * the Free Software Foundation; either version 2 of the License, or | ||
6 | * (at your option) any later version. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | * Authors: Waiman Long <longman@redhat.com> | ||
14 | */ | ||
15 | |||
16 | #ifndef LOCK_EVENT | ||
17 | #define LOCK_EVENT(name) LOCKEVENT_ ## name, | ||
18 | #endif | ||
19 | |||
20 | #ifdef CONFIG_QUEUED_SPINLOCKS | ||
21 | #ifdef CONFIG_PARAVIRT_SPINLOCKS | ||
22 | /* | ||
23 | * Locking events for PV qspinlock. | ||
24 | */ | ||
25 | LOCK_EVENT(pv_hash_hops) /* Average # of hops per hashing operation */ | ||
26 | LOCK_EVENT(pv_kick_unlock) /* # of vCPU kicks issued at unlock time */ | ||
27 | LOCK_EVENT(pv_kick_wake) /* # of vCPU kicks for pv_latency_wake */ | ||
28 | LOCK_EVENT(pv_latency_kick) /* Average latency (ns) of vCPU kick */ | ||
29 | LOCK_EVENT(pv_latency_wake) /* Average latency (ns) of kick-to-wakeup */ | ||
30 | LOCK_EVENT(pv_lock_stealing) /* # of lock stealing operations */ | ||
31 | LOCK_EVENT(pv_spurious_wakeup) /* # of spurious wakeups in non-head vCPUs */ | ||
32 | LOCK_EVENT(pv_wait_again) /* # of wait's after queue head vCPU kick */ | ||
33 | LOCK_EVENT(pv_wait_early) /* # of early vCPU wait's */ | ||
34 | LOCK_EVENT(pv_wait_head) /* # of vCPU wait's at the queue head */ | ||
35 | LOCK_EVENT(pv_wait_node) /* # of vCPU wait's at non-head queue node */ | ||
36 | #endif /* CONFIG_PARAVIRT_SPINLOCKS */ | ||
37 | |||
38 | /* | ||
39 | * Locking events for qspinlock | ||
40 | * | ||
41 | * Subtracting lock_use_node[234] from lock_slowpath will give you | ||
42 | * lock_use_node1. | ||
43 | */ | ||
44 | LOCK_EVENT(lock_pending) /* # of locking ops via pending code */ | ||
45 | LOCK_EVENT(lock_slowpath) /* # of locking ops via MCS lock queue */ | ||
46 | LOCK_EVENT(lock_use_node2) /* # of locking ops that use 2nd percpu node */ | ||
47 | LOCK_EVENT(lock_use_node3) /* # of locking ops that use 3rd percpu node */ | ||
48 | LOCK_EVENT(lock_use_node4) /* # of locking ops that use 4th percpu node */ | ||
49 | LOCK_EVENT(lock_no_node) /* # of locking ops w/o using percpu node */ | ||
50 | #endif /* CONFIG_QUEUED_SPINLOCKS */ | ||
51 | |||
52 | /* | ||
53 | * Locking events for rwsem | ||
54 | */ | ||
55 | LOCK_EVENT(rwsem_sleep_reader) /* # of reader sleeps */ | ||
56 | LOCK_EVENT(rwsem_sleep_writer) /* # of writer sleeps */ | ||
57 | LOCK_EVENT(rwsem_wake_reader) /* # of reader wakeups */ | ||
58 | LOCK_EVENT(rwsem_wake_writer) /* # of writer wakeups */ | ||
59 | LOCK_EVENT(rwsem_opt_wlock) /* # of write locks opt-spin acquired */ | ||
60 | LOCK_EVENT(rwsem_opt_fail) /* # of failed opt-spinnings */ | ||
61 | LOCK_EVENT(rwsem_rlock) /* # of read locks acquired */ | ||
62 | LOCK_EVENT(rwsem_rlock_fast) /* # of fast read locks acquired */ | ||
63 | LOCK_EVENT(rwsem_rlock_fail) /* # of failed read lock acquisitions */ | ||
64 | LOCK_EVENT(rwsem_rtrylock) /* # of read trylock calls */ | ||
65 | LOCK_EVENT(rwsem_wlock) /* # of write locks acquired */ | ||
66 | LOCK_EVENT(rwsem_wlock_fail) /* # of failed write lock acquisitions */ | ||
67 | LOCK_EVENT(rwsem_wtrylock) /* # of write trylock calls */ | ||
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 91c6b89f04df..27b992fe8cec 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c | |||
@@ -501,11 +501,11 @@ static char get_usage_char(struct lock_class *class, enum lock_usage_bit bit) | |||
501 | { | 501 | { |
502 | char c = '.'; | 502 | char c = '.'; |
503 | 503 | ||
504 | if (class->usage_mask & lock_flag(bit + 2)) | 504 | if (class->usage_mask & lock_flag(bit + LOCK_USAGE_DIR_MASK)) |
505 | c = '+'; | 505 | c = '+'; |
506 | if (class->usage_mask & lock_flag(bit)) { | 506 | if (class->usage_mask & lock_flag(bit)) { |
507 | c = '-'; | 507 | c = '-'; |
508 | if (class->usage_mask & lock_flag(bit + 2)) | 508 | if (class->usage_mask & lock_flag(bit + LOCK_USAGE_DIR_MASK)) |
509 | c = '?'; | 509 | c = '?'; |
510 | } | 510 | } |
511 | 511 | ||
@@ -1666,19 +1666,25 @@ check_redundant(struct lock_list *root, struct lock_class *target, | |||
1666 | } | 1666 | } |
1667 | 1667 | ||
1668 | #if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) | 1668 | #if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) |
1669 | |||
1670 | static inline int usage_accumulate(struct lock_list *entry, void *mask) | ||
1671 | { | ||
1672 | *(unsigned long *)mask |= entry->class->usage_mask; | ||
1673 | |||
1674 | return 0; | ||
1675 | } | ||
1676 | |||
1669 | /* | 1677 | /* |
1670 | * Forwards and backwards subgraph searching, for the purposes of | 1678 | * Forwards and backwards subgraph searching, for the purposes of |
1671 | * proving that two subgraphs can be connected by a new dependency | 1679 | * proving that two subgraphs can be connected by a new dependency |
1672 | * without creating any illegal irq-safe -> irq-unsafe lock dependency. | 1680 | * without creating any illegal irq-safe -> irq-unsafe lock dependency. |
1673 | */ | 1681 | */ |
1674 | 1682 | ||
1675 | static inline int usage_match(struct lock_list *entry, void *bit) | 1683 | static inline int usage_match(struct lock_list *entry, void *mask) |
1676 | { | 1684 | { |
1677 | return entry->class->usage_mask & (1 << (enum lock_usage_bit)bit); | 1685 | return entry->class->usage_mask & *(unsigned long *)mask; |
1678 | } | 1686 | } |
1679 | 1687 | ||
1680 | |||
1681 | |||
1682 | /* | 1688 | /* |
1683 | * Find a node in the forwards-direction dependency sub-graph starting | 1689 | * Find a node in the forwards-direction dependency sub-graph starting |
1684 | * at @root->class that matches @bit. | 1690 | * at @root->class that matches @bit. |
@@ -1690,14 +1696,14 @@ static inline int usage_match(struct lock_list *entry, void *bit) | |||
1690 | * Return <0 on error. | 1696 | * Return <0 on error. |
1691 | */ | 1697 | */ |
1692 | static int | 1698 | static int |
1693 | find_usage_forwards(struct lock_list *root, enum lock_usage_bit bit, | 1699 | find_usage_forwards(struct lock_list *root, unsigned long usage_mask, |
1694 | struct lock_list **target_entry) | 1700 | struct lock_list **target_entry) |
1695 | { | 1701 | { |
1696 | int result; | 1702 | int result; |
1697 | 1703 | ||
1698 | debug_atomic_inc(nr_find_usage_forwards_checks); | 1704 | debug_atomic_inc(nr_find_usage_forwards_checks); |
1699 | 1705 | ||
1700 | result = __bfs_forwards(root, (void *)bit, usage_match, target_entry); | 1706 | result = __bfs_forwards(root, &usage_mask, usage_match, target_entry); |
1701 | 1707 | ||
1702 | return result; | 1708 | return result; |
1703 | } | 1709 | } |
@@ -1713,14 +1719,14 @@ find_usage_forwards(struct lock_list *root, enum lock_usage_bit bit, | |||
1713 | * Return <0 on error. | 1719 | * Return <0 on error. |
1714 | */ | 1720 | */ |
1715 | static int | 1721 | static int |
1716 | find_usage_backwards(struct lock_list *root, enum lock_usage_bit bit, | 1722 | find_usage_backwards(struct lock_list *root, unsigned long usage_mask, |
1717 | struct lock_list **target_entry) | 1723 | struct lock_list **target_entry) |
1718 | { | 1724 | { |
1719 | int result; | 1725 | int result; |
1720 | 1726 | ||
1721 | debug_atomic_inc(nr_find_usage_backwards_checks); | 1727 | debug_atomic_inc(nr_find_usage_backwards_checks); |
1722 | 1728 | ||
1723 | result = __bfs_backwards(root, (void *)bit, usage_match, target_entry); | 1729 | result = __bfs_backwards(root, &usage_mask, usage_match, target_entry); |
1724 | 1730 | ||
1725 | return result; | 1731 | return result; |
1726 | } | 1732 | } |
@@ -1912,39 +1918,6 @@ print_bad_irq_dependency(struct task_struct *curr, | |||
1912 | return 0; | 1918 | return 0; |
1913 | } | 1919 | } |
1914 | 1920 | ||
1915 | static int | ||
1916 | check_usage(struct task_struct *curr, struct held_lock *prev, | ||
1917 | struct held_lock *next, enum lock_usage_bit bit_backwards, | ||
1918 | enum lock_usage_bit bit_forwards, const char *irqclass) | ||
1919 | { | ||
1920 | int ret; | ||
1921 | struct lock_list this, that; | ||
1922 | struct lock_list *uninitialized_var(target_entry); | ||
1923 | struct lock_list *uninitialized_var(target_entry1); | ||
1924 | |||
1925 | this.parent = NULL; | ||
1926 | |||
1927 | this.class = hlock_class(prev); | ||
1928 | ret = find_usage_backwards(&this, bit_backwards, &target_entry); | ||
1929 | if (ret < 0) | ||
1930 | return print_bfs_bug(ret); | ||
1931 | if (ret == 1) | ||
1932 | return ret; | ||
1933 | |||
1934 | that.parent = NULL; | ||
1935 | that.class = hlock_class(next); | ||
1936 | ret = find_usage_forwards(&that, bit_forwards, &target_entry1); | ||
1937 | if (ret < 0) | ||
1938 | return print_bfs_bug(ret); | ||
1939 | if (ret == 1) | ||
1940 | return ret; | ||
1941 | |||
1942 | return print_bad_irq_dependency(curr, &this, &that, | ||
1943 | target_entry, target_entry1, | ||
1944 | prev, next, | ||
1945 | bit_backwards, bit_forwards, irqclass); | ||
1946 | } | ||
1947 | |||
1948 | static const char *state_names[] = { | 1921 | static const char *state_names[] = { |
1949 | #define LOCKDEP_STATE(__STATE) \ | 1922 | #define LOCKDEP_STATE(__STATE) \ |
1950 | __stringify(__STATE), | 1923 | __stringify(__STATE), |
@@ -1961,9 +1934,19 @@ static const char *state_rnames[] = { | |||
1961 | 1934 | ||
1962 | static inline const char *state_name(enum lock_usage_bit bit) | 1935 | static inline const char *state_name(enum lock_usage_bit bit) |
1963 | { | 1936 | { |
1964 | return (bit & LOCK_USAGE_READ_MASK) ? state_rnames[bit >> 2] : state_names[bit >> 2]; | 1937 | if (bit & LOCK_USAGE_READ_MASK) |
1938 | return state_rnames[bit >> LOCK_USAGE_DIR_MASK]; | ||
1939 | else | ||
1940 | return state_names[bit >> LOCK_USAGE_DIR_MASK]; | ||
1965 | } | 1941 | } |
1966 | 1942 | ||
1943 | /* | ||
1944 | * The bit number is encoded like: | ||
1945 | * | ||
1946 | * bit0: 0 exclusive, 1 read lock | ||
1947 | * bit1: 0 used in irq, 1 irq enabled | ||
1948 | * bit2-n: state | ||
1949 | */ | ||
1967 | static int exclusive_bit(int new_bit) | 1950 | static int exclusive_bit(int new_bit) |
1968 | { | 1951 | { |
1969 | int state = new_bit & LOCK_USAGE_STATE_MASK; | 1952 | int state = new_bit & LOCK_USAGE_STATE_MASK; |
@@ -1975,45 +1958,160 @@ static int exclusive_bit(int new_bit) | |||
1975 | return state | (dir ^ LOCK_USAGE_DIR_MASK); | 1958 | return state | (dir ^ LOCK_USAGE_DIR_MASK); |
1976 | } | 1959 | } |
1977 | 1960 | ||
1961 | /* | ||
1962 | * Observe that when given a bitmask where each bitnr is encoded as above, a | ||
1963 | * right shift of the mask transforms the individual bitnrs as -1 and | ||
1964 | * conversely, a left shift transforms into +1 for the individual bitnrs. | ||
1965 | * | ||
1966 | * So for all bits whose number have LOCK_ENABLED_* set (bitnr1 == 1), we can | ||
1967 | * create the mask with those bit numbers using LOCK_USED_IN_* (bitnr1 == 0) | ||
1968 | * instead by subtracting the bit number by 2, or shifting the mask right by 2. | ||
1969 | * | ||
1970 | * Similarly, bitnr1 == 0 becomes bitnr1 == 1 by adding 2, or shifting left 2. | ||
1971 | * | ||
1972 | * So split the mask (note that LOCKF_ENABLED_IRQ_ALL|LOCKF_USED_IN_IRQ_ALL is | ||
1973 | * all bits set) and recompose with bitnr1 flipped. | ||
1974 | */ | ||
1975 | static unsigned long invert_dir_mask(unsigned long mask) | ||
1976 | { | ||
1977 | unsigned long excl = 0; | ||
1978 | |||
1979 | /* Invert dir */ | ||
1980 | excl |= (mask & LOCKF_ENABLED_IRQ_ALL) >> LOCK_USAGE_DIR_MASK; | ||
1981 | excl |= (mask & LOCKF_USED_IN_IRQ_ALL) << LOCK_USAGE_DIR_MASK; | ||
1982 | |||
1983 | return excl; | ||
1984 | } | ||
1985 | |||
1986 | /* | ||
1987 | * As above, we clear bitnr0 (LOCK_*_READ off) with bitmask ops. First, for all | ||
1988 | * bits with bitnr0 set (LOCK_*_READ), add those with bitnr0 cleared (LOCK_*). | ||
1989 | * And then mask out all bitnr0. | ||
1990 | */ | ||
1991 | static unsigned long exclusive_mask(unsigned long mask) | ||
1992 | { | ||
1993 | unsigned long excl = invert_dir_mask(mask); | ||
1994 | |||
1995 | /* Strip read */ | ||
1996 | excl |= (excl & LOCKF_IRQ_READ) >> LOCK_USAGE_READ_MASK; | ||
1997 | excl &= ~LOCKF_IRQ_READ; | ||
1998 | |||
1999 | return excl; | ||
2000 | } | ||
2001 | |||
2002 | /* | ||
2003 | * Retrieve the _possible_ original mask to which @mask is | ||
2004 | * exclusive. Ie: this is the opposite of exclusive_mask(). | ||
2005 | * Note that 2 possible original bits can match an exclusive | ||
2006 | * bit: one has LOCK_USAGE_READ_MASK set, the other has it | ||
2007 | * cleared. So both are returned for each exclusive bit. | ||
2008 | */ | ||
2009 | static unsigned long original_mask(unsigned long mask) | ||
2010 | { | ||
2011 | unsigned long excl = invert_dir_mask(mask); | ||
2012 | |||
2013 | /* Include read in existing usages */ | ||
2014 | excl |= (excl & LOCKF_IRQ) << LOCK_USAGE_READ_MASK; | ||
2015 | |||
2016 | return excl; | ||
2017 | } | ||
2018 | |||
2019 | /* | ||
2020 | * Find the first pair of bit match between an original | ||
2021 | * usage mask and an exclusive usage mask. | ||
2022 | */ | ||
2023 | static int find_exclusive_match(unsigned long mask, | ||
2024 | unsigned long excl_mask, | ||
2025 | enum lock_usage_bit *bitp, | ||
2026 | enum lock_usage_bit *excl_bitp) | ||
2027 | { | ||
2028 | int bit, excl; | ||
2029 | |||
2030 | for_each_set_bit(bit, &mask, LOCK_USED) { | ||
2031 | excl = exclusive_bit(bit); | ||
2032 | if (excl_mask & lock_flag(excl)) { | ||
2033 | *bitp = bit; | ||
2034 | *excl_bitp = excl; | ||
2035 | return 0; | ||
2036 | } | ||
2037 | } | ||
2038 | return -1; | ||
2039 | } | ||
2040 | |||
2041 | /* | ||
2042 | * Prove that the new dependency does not connect a hardirq-safe(-read) | ||
2043 | * lock with a hardirq-unsafe lock - to achieve this we search | ||
2044 | * the backwards-subgraph starting at <prev>, and the | ||
2045 | * forwards-subgraph starting at <next>: | ||
2046 | */ | ||
1978 | static int check_irq_usage(struct task_struct *curr, struct held_lock *prev, | 2047 | static int check_irq_usage(struct task_struct *curr, struct held_lock *prev, |
1979 | struct held_lock *next, enum lock_usage_bit bit) | 2048 | struct held_lock *next) |
1980 | { | 2049 | { |
2050 | unsigned long usage_mask = 0, forward_mask, backward_mask; | ||
2051 | enum lock_usage_bit forward_bit = 0, backward_bit = 0; | ||
2052 | struct lock_list *uninitialized_var(target_entry1); | ||
2053 | struct lock_list *uninitialized_var(target_entry); | ||
2054 | struct lock_list this, that; | ||
2055 | int ret; | ||
2056 | |||
1981 | /* | 2057 | /* |
1982 | * Prove that the new dependency does not connect a hardirq-safe | 2058 | * Step 1: gather all hard/soft IRQs usages backward in an |
1983 | * lock with a hardirq-unsafe lock - to achieve this we search | 2059 | * accumulated usage mask. |
1984 | * the backwards-subgraph starting at <prev>, and the | ||
1985 | * forwards-subgraph starting at <next>: | ||
1986 | */ | 2060 | */ |
1987 | if (!check_usage(curr, prev, next, bit, | 2061 | this.parent = NULL; |
1988 | exclusive_bit(bit), state_name(bit))) | 2062 | this.class = hlock_class(prev); |
1989 | return 0; | 2063 | |
2064 | ret = __bfs_backwards(&this, &usage_mask, usage_accumulate, NULL); | ||
2065 | if (ret < 0) | ||
2066 | return print_bfs_bug(ret); | ||
1990 | 2067 | ||
1991 | bit++; /* _READ */ | 2068 | usage_mask &= LOCKF_USED_IN_IRQ_ALL; |
2069 | if (!usage_mask) | ||
2070 | return 1; | ||
1992 | 2071 | ||
1993 | /* | 2072 | /* |
1994 | * Prove that the new dependency does not connect a hardirq-safe-read | 2073 | * Step 2: find exclusive uses forward that match the previous |
1995 | * lock with a hardirq-unsafe lock - to achieve this we search | 2074 | * backward accumulated mask. |
1996 | * the backwards-subgraph starting at <prev>, and the | ||
1997 | * forwards-subgraph starting at <next>: | ||
1998 | */ | 2075 | */ |
1999 | if (!check_usage(curr, prev, next, bit, | 2076 | forward_mask = exclusive_mask(usage_mask); |
2000 | exclusive_bit(bit), state_name(bit))) | ||
2001 | return 0; | ||
2002 | 2077 | ||
2003 | return 1; | 2078 | that.parent = NULL; |
2004 | } | 2079 | that.class = hlock_class(next); |
2005 | 2080 | ||
2006 | static int | 2081 | ret = find_usage_forwards(&that, forward_mask, &target_entry1); |
2007 | check_prev_add_irq(struct task_struct *curr, struct held_lock *prev, | 2082 | if (ret < 0) |
2008 | struct held_lock *next) | 2083 | return print_bfs_bug(ret); |
2009 | { | 2084 | if (ret == 1) |
2010 | #define LOCKDEP_STATE(__STATE) \ | 2085 | return ret; |
2011 | if (!check_irq_usage(curr, prev, next, LOCK_USED_IN_##__STATE)) \ | ||
2012 | return 0; | ||
2013 | #include "lockdep_states.h" | ||
2014 | #undef LOCKDEP_STATE | ||
2015 | 2086 | ||
2016 | return 1; | 2087 | /* |
2088 | * Step 3: we found a bad match! Now retrieve a lock from the backward | ||
2089 | * list whose usage mask matches the exclusive usage mask from the | ||
2090 | * lock found on the forward list. | ||
2091 | */ | ||
2092 | backward_mask = original_mask(target_entry1->class->usage_mask); | ||
2093 | |||
2094 | ret = find_usage_backwards(&this, backward_mask, &target_entry); | ||
2095 | if (ret < 0) | ||
2096 | return print_bfs_bug(ret); | ||
2097 | if (DEBUG_LOCKS_WARN_ON(ret == 1)) | ||
2098 | return 1; | ||
2099 | |||
2100 | /* | ||
2101 | * Step 4: narrow down to a pair of incompatible usage bits | ||
2102 | * and report it. | ||
2103 | */ | ||
2104 | ret = find_exclusive_match(target_entry->class->usage_mask, | ||
2105 | target_entry1->class->usage_mask, | ||
2106 | &backward_bit, &forward_bit); | ||
2107 | if (DEBUG_LOCKS_WARN_ON(ret == -1)) | ||
2108 | return 1; | ||
2109 | |||
2110 | return print_bad_irq_dependency(curr, &this, &that, | ||
2111 | target_entry, target_entry1, | ||
2112 | prev, next, | ||
2113 | backward_bit, forward_bit, | ||
2114 | state_name(backward_bit)); | ||
2017 | } | 2115 | } |
2018 | 2116 | ||
2019 | static void inc_chains(void) | 2117 | static void inc_chains(void) |
@@ -2030,9 +2128,8 @@ static void inc_chains(void) | |||
2030 | 2128 | ||
2031 | #else | 2129 | #else |
2032 | 2130 | ||
2033 | static inline int | 2131 | static inline int check_irq_usage(struct task_struct *curr, |
2034 | check_prev_add_irq(struct task_struct *curr, struct held_lock *prev, | 2132 | struct held_lock *prev, struct held_lock *next) |
2035 | struct held_lock *next) | ||
2036 | { | 2133 | { |
2037 | return 1; | 2134 | return 1; |
2038 | } | 2135 | } |
@@ -2211,7 +2308,7 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev, | |||
2211 | else if (unlikely(ret < 0)) | 2308 | else if (unlikely(ret < 0)) |
2212 | return print_bfs_bug(ret); | 2309 | return print_bfs_bug(ret); |
2213 | 2310 | ||
2214 | if (!check_prev_add_irq(curr, prev, next)) | 2311 | if (!check_irq_usage(curr, prev, next)) |
2215 | return 0; | 2312 | return 0; |
2216 | 2313 | ||
2217 | /* | 2314 | /* |
@@ -2773,6 +2870,12 @@ static void check_chain_key(struct task_struct *curr) | |||
2773 | #endif | 2870 | #endif |
2774 | } | 2871 | } |
2775 | 2872 | ||
2873 | static int mark_lock(struct task_struct *curr, struct held_lock *this, | ||
2874 | enum lock_usage_bit new_bit); | ||
2875 | |||
2876 | #if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) | ||
2877 | |||
2878 | |||
2776 | static void | 2879 | static void |
2777 | print_usage_bug_scenario(struct held_lock *lock) | 2880 | print_usage_bug_scenario(struct held_lock *lock) |
2778 | { | 2881 | { |
@@ -2842,10 +2945,6 @@ valid_state(struct task_struct *curr, struct held_lock *this, | |||
2842 | return 1; | 2945 | return 1; |
2843 | } | 2946 | } |
2844 | 2947 | ||
2845 | static int mark_lock(struct task_struct *curr, struct held_lock *this, | ||
2846 | enum lock_usage_bit new_bit); | ||
2847 | |||
2848 | #if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) | ||
2849 | 2948 | ||
2850 | /* | 2949 | /* |
2851 | * print irq inversion bug: | 2950 | * print irq inversion bug: |
@@ -2925,7 +3024,7 @@ check_usage_forwards(struct task_struct *curr, struct held_lock *this, | |||
2925 | 3024 | ||
2926 | root.parent = NULL; | 3025 | root.parent = NULL; |
2927 | root.class = hlock_class(this); | 3026 | root.class = hlock_class(this); |
2928 | ret = find_usage_forwards(&root, bit, &target_entry); | 3027 | ret = find_usage_forwards(&root, lock_flag(bit), &target_entry); |
2929 | if (ret < 0) | 3028 | if (ret < 0) |
2930 | return print_bfs_bug(ret); | 3029 | return print_bfs_bug(ret); |
2931 | if (ret == 1) | 3030 | if (ret == 1) |
@@ -2949,7 +3048,7 @@ check_usage_backwards(struct task_struct *curr, struct held_lock *this, | |||
2949 | 3048 | ||
2950 | root.parent = NULL; | 3049 | root.parent = NULL; |
2951 | root.class = hlock_class(this); | 3050 | root.class = hlock_class(this); |
2952 | ret = find_usage_backwards(&root, bit, &target_entry); | 3051 | ret = find_usage_backwards(&root, lock_flag(bit), &target_entry); |
2953 | if (ret < 0) | 3052 | if (ret < 0) |
2954 | return print_bfs_bug(ret); | 3053 | return print_bfs_bug(ret); |
2955 | if (ret == 1) | 3054 | if (ret == 1) |
@@ -3004,7 +3103,7 @@ static int (*state_verbose_f[])(struct lock_class *class) = { | |||
3004 | static inline int state_verbose(enum lock_usage_bit bit, | 3103 | static inline int state_verbose(enum lock_usage_bit bit, |
3005 | struct lock_class *class) | 3104 | struct lock_class *class) |
3006 | { | 3105 | { |
3007 | return state_verbose_f[bit >> 2](class); | 3106 | return state_verbose_f[bit >> LOCK_USAGE_DIR_MASK](class); |
3008 | } | 3107 | } |
3009 | 3108 | ||
3010 | typedef int (*check_usage_f)(struct task_struct *, struct held_lock *, | 3109 | typedef int (*check_usage_f)(struct task_struct *, struct held_lock *, |
@@ -3146,7 +3245,7 @@ void lockdep_hardirqs_on(unsigned long ip) | |||
3146 | /* | 3245 | /* |
3147 | * See the fine text that goes along with this variable definition. | 3246 | * See the fine text that goes along with this variable definition. |
3148 | */ | 3247 | */ |
3149 | if (DEBUG_LOCKS_WARN_ON(unlikely(early_boot_irqs_disabled))) | 3248 | if (DEBUG_LOCKS_WARN_ON(early_boot_irqs_disabled)) |
3150 | return; | 3249 | return; |
3151 | 3250 | ||
3152 | /* | 3251 | /* |
diff --git a/kernel/locking/lockdep_internals.h b/kernel/locking/lockdep_internals.h index d4c197425f68..150ec3f0c5b5 100644 --- a/kernel/locking/lockdep_internals.h +++ b/kernel/locking/lockdep_internals.h | |||
@@ -42,13 +42,35 @@ enum { | |||
42 | __LOCKF(USED) | 42 | __LOCKF(USED) |
43 | }; | 43 | }; |
44 | 44 | ||
45 | #define LOCKF_ENABLED_IRQ (LOCKF_ENABLED_HARDIRQ | LOCKF_ENABLED_SOFTIRQ) | 45 | #define LOCKDEP_STATE(__STATE) LOCKF_ENABLED_##__STATE | |
46 | #define LOCKF_USED_IN_IRQ (LOCKF_USED_IN_HARDIRQ | LOCKF_USED_IN_SOFTIRQ) | 46 | static const unsigned long LOCKF_ENABLED_IRQ = |
47 | #include "lockdep_states.h" | ||
48 | 0; | ||
49 | #undef LOCKDEP_STATE | ||
50 | |||
51 | #define LOCKDEP_STATE(__STATE) LOCKF_USED_IN_##__STATE | | ||
52 | static const unsigned long LOCKF_USED_IN_IRQ = | ||
53 | #include "lockdep_states.h" | ||
54 | 0; | ||
55 | #undef LOCKDEP_STATE | ||
56 | |||
57 | #define LOCKDEP_STATE(__STATE) LOCKF_ENABLED_##__STATE##_READ | | ||
58 | static const unsigned long LOCKF_ENABLED_IRQ_READ = | ||
59 | #include "lockdep_states.h" | ||
60 | 0; | ||
61 | #undef LOCKDEP_STATE | ||
62 | |||
63 | #define LOCKDEP_STATE(__STATE) LOCKF_USED_IN_##__STATE##_READ | | ||
64 | static const unsigned long LOCKF_USED_IN_IRQ_READ = | ||
65 | #include "lockdep_states.h" | ||
66 | 0; | ||
67 | #undef LOCKDEP_STATE | ||
68 | |||
69 | #define LOCKF_ENABLED_IRQ_ALL (LOCKF_ENABLED_IRQ | LOCKF_ENABLED_IRQ_READ) | ||
70 | #define LOCKF_USED_IN_IRQ_ALL (LOCKF_USED_IN_IRQ | LOCKF_USED_IN_IRQ_READ) | ||
47 | 71 | ||
48 | #define LOCKF_ENABLED_IRQ_READ \ | 72 | #define LOCKF_IRQ (LOCKF_ENABLED_IRQ | LOCKF_USED_IN_IRQ) |
49 | (LOCKF_ENABLED_HARDIRQ_READ | LOCKF_ENABLED_SOFTIRQ_READ) | 73 | #define LOCKF_IRQ_READ (LOCKF_ENABLED_IRQ_READ | LOCKF_USED_IN_IRQ_READ) |
50 | #define LOCKF_USED_IN_IRQ_READ \ | ||
51 | (LOCKF_USED_IN_HARDIRQ_READ | LOCKF_USED_IN_SOFTIRQ_READ) | ||
52 | 74 | ||
53 | /* | 75 | /* |
54 | * CONFIG_LOCKDEP_SMALL is defined for sparc. Sparc requires .text, | 76 | * CONFIG_LOCKDEP_SMALL is defined for sparc. Sparc requires .text, |
diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c index 883cf1b92d90..f17dad99eec8 100644 --- a/kernel/locking/percpu-rwsem.c +++ b/kernel/locking/percpu-rwsem.c | |||
@@ -7,6 +7,8 @@ | |||
7 | #include <linux/sched.h> | 7 | #include <linux/sched.h> |
8 | #include <linux/errno.h> | 8 | #include <linux/errno.h> |
9 | 9 | ||
10 | #include "rwsem.h" | ||
11 | |||
10 | int __percpu_init_rwsem(struct percpu_rw_semaphore *sem, | 12 | int __percpu_init_rwsem(struct percpu_rw_semaphore *sem, |
11 | const char *name, struct lock_class_key *rwsem_key) | 13 | const char *name, struct lock_class_key *rwsem_key) |
12 | { | 14 | { |
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index 5e9247dc2515..e14b32c69639 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c | |||
@@ -395,7 +395,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) | |||
395 | * 0,1,0 -> 0,0,1 | 395 | * 0,1,0 -> 0,0,1 |
396 | */ | 396 | */ |
397 | clear_pending_set_locked(lock); | 397 | clear_pending_set_locked(lock); |
398 | qstat_inc(qstat_lock_pending, true); | 398 | lockevent_inc(lock_pending); |
399 | return; | 399 | return; |
400 | 400 | ||
401 | /* | 401 | /* |
@@ -403,7 +403,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) | |||
403 | * queuing. | 403 | * queuing. |
404 | */ | 404 | */ |
405 | queue: | 405 | queue: |
406 | qstat_inc(qstat_lock_slowpath, true); | 406 | lockevent_inc(lock_slowpath); |
407 | pv_queue: | 407 | pv_queue: |
408 | node = this_cpu_ptr(&qnodes[0].mcs); | 408 | node = this_cpu_ptr(&qnodes[0].mcs); |
409 | idx = node->count++; | 409 | idx = node->count++; |
@@ -419,7 +419,7 @@ pv_queue: | |||
419 | * simple enough. | 419 | * simple enough. |
420 | */ | 420 | */ |
421 | if (unlikely(idx >= MAX_NODES)) { | 421 | if (unlikely(idx >= MAX_NODES)) { |
422 | qstat_inc(qstat_lock_no_node, true); | 422 | lockevent_inc(lock_no_node); |
423 | while (!queued_spin_trylock(lock)) | 423 | while (!queued_spin_trylock(lock)) |
424 | cpu_relax(); | 424 | cpu_relax(); |
425 | goto release; | 425 | goto release; |
@@ -430,7 +430,7 @@ pv_queue: | |||
430 | /* | 430 | /* |
431 | * Keep counts of non-zero index values: | 431 | * Keep counts of non-zero index values: |
432 | */ | 432 | */ |
433 | qstat_inc(qstat_lock_use_node2 + idx - 1, idx); | 433 | lockevent_cond_inc(lock_use_node2 + idx - 1, idx); |
434 | 434 | ||
435 | /* | 435 | /* |
436 | * Ensure that we increment the head node->count before initialising | 436 | * Ensure that we increment the head node->count before initialising |
diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h index 8f36c27c1794..89bab079e7a4 100644 --- a/kernel/locking/qspinlock_paravirt.h +++ b/kernel/locking/qspinlock_paravirt.h | |||
@@ -89,7 +89,7 @@ static inline bool pv_hybrid_queued_unfair_trylock(struct qspinlock *lock) | |||
89 | 89 | ||
90 | if (!(val & _Q_LOCKED_PENDING_MASK) && | 90 | if (!(val & _Q_LOCKED_PENDING_MASK) && |
91 | (cmpxchg_acquire(&lock->locked, 0, _Q_LOCKED_VAL) == 0)) { | 91 | (cmpxchg_acquire(&lock->locked, 0, _Q_LOCKED_VAL) == 0)) { |
92 | qstat_inc(qstat_pv_lock_stealing, true); | 92 | lockevent_inc(pv_lock_stealing); |
93 | return true; | 93 | return true; |
94 | } | 94 | } |
95 | if (!(val & _Q_TAIL_MASK) || (val & _Q_PENDING_MASK)) | 95 | if (!(val & _Q_TAIL_MASK) || (val & _Q_PENDING_MASK)) |
@@ -219,7 +219,7 @@ static struct qspinlock **pv_hash(struct qspinlock *lock, struct pv_node *node) | |||
219 | hopcnt++; | 219 | hopcnt++; |
220 | if (!cmpxchg(&he->lock, NULL, lock)) { | 220 | if (!cmpxchg(&he->lock, NULL, lock)) { |
221 | WRITE_ONCE(he->node, node); | 221 | WRITE_ONCE(he->node, node); |
222 | qstat_hop(hopcnt); | 222 | lockevent_pv_hop(hopcnt); |
223 | return &he->lock; | 223 | return &he->lock; |
224 | } | 224 | } |
225 | } | 225 | } |
@@ -320,8 +320,8 @@ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev) | |||
320 | smp_store_mb(pn->state, vcpu_halted); | 320 | smp_store_mb(pn->state, vcpu_halted); |
321 | 321 | ||
322 | if (!READ_ONCE(node->locked)) { | 322 | if (!READ_ONCE(node->locked)) { |
323 | qstat_inc(qstat_pv_wait_node, true); | 323 | lockevent_inc(pv_wait_node); |
324 | qstat_inc(qstat_pv_wait_early, wait_early); | 324 | lockevent_cond_inc(pv_wait_early, wait_early); |
325 | pv_wait(&pn->state, vcpu_halted); | 325 | pv_wait(&pn->state, vcpu_halted); |
326 | } | 326 | } |
327 | 327 | ||
@@ -339,7 +339,8 @@ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev) | |||
339 | * So it is better to spin for a while in the hope that the | 339 | * So it is better to spin for a while in the hope that the |
340 | * MCS lock will be released soon. | 340 | * MCS lock will be released soon. |
341 | */ | 341 | */ |
342 | qstat_inc(qstat_pv_spurious_wakeup, !READ_ONCE(node->locked)); | 342 | lockevent_cond_inc(pv_spurious_wakeup, |
343 | !READ_ONCE(node->locked)); | ||
343 | } | 344 | } |
344 | 345 | ||
345 | /* | 346 | /* |
@@ -416,7 +417,7 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node) | |||
416 | /* | 417 | /* |
417 | * Tracking # of slowpath locking operations | 418 | * Tracking # of slowpath locking operations |
418 | */ | 419 | */ |
419 | qstat_inc(qstat_lock_slowpath, true); | 420 | lockevent_inc(lock_slowpath); |
420 | 421 | ||
421 | for (;; waitcnt++) { | 422 | for (;; waitcnt++) { |
422 | /* | 423 | /* |
@@ -464,8 +465,8 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node) | |||
464 | } | 465 | } |
465 | } | 466 | } |
466 | WRITE_ONCE(pn->state, vcpu_hashed); | 467 | WRITE_ONCE(pn->state, vcpu_hashed); |
467 | qstat_inc(qstat_pv_wait_head, true); | 468 | lockevent_inc(pv_wait_head); |
468 | qstat_inc(qstat_pv_wait_again, waitcnt); | 469 | lockevent_cond_inc(pv_wait_again, waitcnt); |
469 | pv_wait(&lock->locked, _Q_SLOW_VAL); | 470 | pv_wait(&lock->locked, _Q_SLOW_VAL); |
470 | 471 | ||
471 | /* | 472 | /* |
@@ -528,7 +529,7 @@ __pv_queued_spin_unlock_slowpath(struct qspinlock *lock, u8 locked) | |||
528 | * vCPU is harmless other than the additional latency in completing | 529 | * vCPU is harmless other than the additional latency in completing |
529 | * the unlock. | 530 | * the unlock. |
530 | */ | 531 | */ |
531 | qstat_inc(qstat_pv_kick_unlock, true); | 532 | lockevent_inc(pv_kick_unlock); |
532 | pv_kick(node->cpu); | 533 | pv_kick(node->cpu); |
533 | } | 534 | } |
534 | 535 | ||
diff --git a/kernel/locking/qspinlock_stat.h b/kernel/locking/qspinlock_stat.h index d73f85388d5c..54152670ff24 100644 --- a/kernel/locking/qspinlock_stat.h +++ b/kernel/locking/qspinlock_stat.h | |||
@@ -9,262 +9,105 @@ | |||
9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
10 | * GNU General Public License for more details. | 10 | * GNU General Public License for more details. |
11 | * | 11 | * |
12 | * Authors: Waiman Long <waiman.long@hpe.com> | 12 | * Authors: Waiman Long <longman@redhat.com> |
13 | */ | 13 | */ |
14 | 14 | ||
15 | /* | 15 | #include "lock_events.h" |
16 | * When queued spinlock statistical counters are enabled, the following | ||
17 | * debugfs files will be created for reporting the counter values: | ||
18 | * | ||
19 | * <debugfs>/qlockstat/ | ||
20 | * pv_hash_hops - average # of hops per hashing operation | ||
21 | * pv_kick_unlock - # of vCPU kicks issued at unlock time | ||
22 | * pv_kick_wake - # of vCPU kicks used for computing pv_latency_wake | ||
23 | * pv_latency_kick - average latency (ns) of vCPU kick operation | ||
24 | * pv_latency_wake - average latency (ns) from vCPU kick to wakeup | ||
25 | * pv_lock_stealing - # of lock stealing operations | ||
26 | * pv_spurious_wakeup - # of spurious wakeups in non-head vCPUs | ||
27 | * pv_wait_again - # of wait's after a queue head vCPU kick | ||
28 | * pv_wait_early - # of early vCPU wait's | ||
29 | * pv_wait_head - # of vCPU wait's at the queue head | ||
30 | * pv_wait_node - # of vCPU wait's at a non-head queue node | ||
31 | * lock_pending - # of locking operations via pending code | ||
32 | * lock_slowpath - # of locking operations via MCS lock queue | ||
33 | * lock_use_node2 - # of locking operations that use 2nd per-CPU node | ||
34 | * lock_use_node3 - # of locking operations that use 3rd per-CPU node | ||
35 | * lock_use_node4 - # of locking operations that use 4th per-CPU node | ||
36 | * lock_no_node - # of locking operations without using per-CPU node | ||
37 | * | ||
38 | * Subtracting lock_use_node[234] from lock_slowpath will give you | ||
39 | * lock_use_node1. | ||
40 | * | ||
41 | * Writing to the "reset_counters" file will reset all the above counter | ||
42 | * values. | ||
43 | * | ||
44 | * These statistical counters are implemented as per-cpu variables which are | ||
45 | * summed and computed whenever the corresponding debugfs files are read. This | ||
46 | * minimizes added overhead making the counters usable even in a production | ||
47 | * environment. | ||
48 | * | ||
49 | * There may be slight difference between pv_kick_wake and pv_kick_unlock. | ||
50 | */ | ||
51 | enum qlock_stats { | ||
52 | qstat_pv_hash_hops, | ||
53 | qstat_pv_kick_unlock, | ||
54 | qstat_pv_kick_wake, | ||
55 | qstat_pv_latency_kick, | ||
56 | qstat_pv_latency_wake, | ||
57 | qstat_pv_lock_stealing, | ||
58 | qstat_pv_spurious_wakeup, | ||
59 | qstat_pv_wait_again, | ||
60 | qstat_pv_wait_early, | ||
61 | qstat_pv_wait_head, | ||
62 | qstat_pv_wait_node, | ||
63 | qstat_lock_pending, | ||
64 | qstat_lock_slowpath, | ||
65 | qstat_lock_use_node2, | ||
66 | qstat_lock_use_node3, | ||
67 | qstat_lock_use_node4, | ||
68 | qstat_lock_no_node, | ||
69 | qstat_num, /* Total number of statistical counters */ | ||
70 | qstat_reset_cnts = qstat_num, | ||
71 | }; | ||
72 | 16 | ||
73 | #ifdef CONFIG_QUEUED_LOCK_STAT | 17 | #ifdef CONFIG_LOCK_EVENT_COUNTS |
18 | #ifdef CONFIG_PARAVIRT_SPINLOCKS | ||
74 | /* | 19 | /* |
75 | * Collect pvqspinlock statistics | 20 | * Collect pvqspinlock locking event counts |
76 | */ | 21 | */ |
77 | #include <linux/debugfs.h> | ||
78 | #include <linux/sched.h> | 22 | #include <linux/sched.h> |
79 | #include <linux/sched/clock.h> | 23 | #include <linux/sched/clock.h> |
80 | #include <linux/fs.h> | 24 | #include <linux/fs.h> |
81 | 25 | ||
82 | static const char * const qstat_names[qstat_num + 1] = { | 26 | #define EVENT_COUNT(ev) lockevents[LOCKEVENT_ ## ev] |
83 | [qstat_pv_hash_hops] = "pv_hash_hops", | ||
84 | [qstat_pv_kick_unlock] = "pv_kick_unlock", | ||
85 | [qstat_pv_kick_wake] = "pv_kick_wake", | ||
86 | [qstat_pv_spurious_wakeup] = "pv_spurious_wakeup", | ||
87 | [qstat_pv_latency_kick] = "pv_latency_kick", | ||
88 | [qstat_pv_latency_wake] = "pv_latency_wake", | ||
89 | [qstat_pv_lock_stealing] = "pv_lock_stealing", | ||
90 | [qstat_pv_wait_again] = "pv_wait_again", | ||
91 | [qstat_pv_wait_early] = "pv_wait_early", | ||
92 | [qstat_pv_wait_head] = "pv_wait_head", | ||
93 | [qstat_pv_wait_node] = "pv_wait_node", | ||
94 | [qstat_lock_pending] = "lock_pending", | ||
95 | [qstat_lock_slowpath] = "lock_slowpath", | ||
96 | [qstat_lock_use_node2] = "lock_use_node2", | ||
97 | [qstat_lock_use_node3] = "lock_use_node3", | ||
98 | [qstat_lock_use_node4] = "lock_use_node4", | ||
99 | [qstat_lock_no_node] = "lock_no_node", | ||
100 | [qstat_reset_cnts] = "reset_counters", | ||
101 | }; | ||
102 | 27 | ||
103 | /* | 28 | /* |
104 | * Per-cpu counters | 29 | * PV specific per-cpu counter |
105 | */ | 30 | */ |
106 | static DEFINE_PER_CPU(unsigned long, qstats[qstat_num]); | ||
107 | static DEFINE_PER_CPU(u64, pv_kick_time); | 31 | static DEFINE_PER_CPU(u64, pv_kick_time); |
108 | 32 | ||
109 | /* | 33 | /* |
110 | * Function to read and return the qlock statistical counter values | 34 | * Function to read and return the PV qspinlock counts. |
111 | * | 35 | * |
112 | * The following counters are handled specially: | 36 | * The following counters are handled specially: |
113 | * 1. qstat_pv_latency_kick | 37 | * 1. pv_latency_kick |
114 | * Average kick latency (ns) = pv_latency_kick/pv_kick_unlock | 38 | * Average kick latency (ns) = pv_latency_kick/pv_kick_unlock |
115 | * 2. qstat_pv_latency_wake | 39 | * 2. pv_latency_wake |
116 | * Average wake latency (ns) = pv_latency_wake/pv_kick_wake | 40 | * Average wake latency (ns) = pv_latency_wake/pv_kick_wake |
117 | * 3. qstat_pv_hash_hops | 41 | * 3. pv_hash_hops |
118 | * Average hops/hash = pv_hash_hops/pv_kick_unlock | 42 | * Average hops/hash = pv_hash_hops/pv_kick_unlock |
119 | */ | 43 | */ |
120 | static ssize_t qstat_read(struct file *file, char __user *user_buf, | 44 | ssize_t lockevent_read(struct file *file, char __user *user_buf, |
121 | size_t count, loff_t *ppos) | 45 | size_t count, loff_t *ppos) |
122 | { | 46 | { |
123 | char buf[64]; | 47 | char buf[64]; |
124 | int cpu, counter, len; | 48 | int cpu, id, len; |
125 | u64 stat = 0, kicks = 0; | 49 | u64 sum = 0, kicks = 0; |
126 | 50 | ||
127 | /* | 51 | /* |
128 | * Get the counter ID stored in file->f_inode->i_private | 52 | * Get the counter ID stored in file->f_inode->i_private |
129 | */ | 53 | */ |
130 | counter = (long)file_inode(file)->i_private; | 54 | id = (long)file_inode(file)->i_private; |
131 | 55 | ||
132 | if (counter >= qstat_num) | 56 | if (id >= lockevent_num) |
133 | return -EBADF; | 57 | return -EBADF; |
134 | 58 | ||
135 | for_each_possible_cpu(cpu) { | 59 | for_each_possible_cpu(cpu) { |
136 | stat += per_cpu(qstats[counter], cpu); | 60 | sum += per_cpu(lockevents[id], cpu); |
137 | /* | 61 | /* |
138 | * Need to sum additional counter for some of them | 62 | * Need to sum additional counters for some of them |
139 | */ | 63 | */ |
140 | switch (counter) { | 64 | switch (id) { |
141 | 65 | ||
142 | case qstat_pv_latency_kick: | 66 | case LOCKEVENT_pv_latency_kick: |
143 | case qstat_pv_hash_hops: | 67 | case LOCKEVENT_pv_hash_hops: |
144 | kicks += per_cpu(qstats[qstat_pv_kick_unlock], cpu); | 68 | kicks += per_cpu(EVENT_COUNT(pv_kick_unlock), cpu); |
145 | break; | 69 | break; |
146 | 70 | ||
147 | case qstat_pv_latency_wake: | 71 | case LOCKEVENT_pv_latency_wake: |
148 | kicks += per_cpu(qstats[qstat_pv_kick_wake], cpu); | 72 | kicks += per_cpu(EVENT_COUNT(pv_kick_wake), cpu); |
149 | break; | 73 | break; |
150 | } | 74 | } |
151 | } | 75 | } |
152 | 76 | ||
153 | if (counter == qstat_pv_hash_hops) { | 77 | if (id == LOCKEVENT_pv_hash_hops) { |
154 | u64 frac = 0; | 78 | u64 frac = 0; |
155 | 79 | ||
156 | if (kicks) { | 80 | if (kicks) { |
157 | frac = 100ULL * do_div(stat, kicks); | 81 | frac = 100ULL * do_div(sum, kicks); |
158 | frac = DIV_ROUND_CLOSEST_ULL(frac, kicks); | 82 | frac = DIV_ROUND_CLOSEST_ULL(frac, kicks); |
159 | } | 83 | } |
160 | 84 | ||
161 | /* | 85 | /* |
162 | * Return a X.XX decimal number | 86 | * Return a X.XX decimal number |
163 | */ | 87 | */ |
164 | len = snprintf(buf, sizeof(buf) - 1, "%llu.%02llu\n", stat, frac); | 88 | len = snprintf(buf, sizeof(buf) - 1, "%llu.%02llu\n", |
89 | sum, frac); | ||
165 | } else { | 90 | } else { |
166 | /* | 91 | /* |
167 | * Round to the nearest ns | 92 | * Round to the nearest ns |
168 | */ | 93 | */ |
169 | if ((counter == qstat_pv_latency_kick) || | 94 | if ((id == LOCKEVENT_pv_latency_kick) || |
170 | (counter == qstat_pv_latency_wake)) { | 95 | (id == LOCKEVENT_pv_latency_wake)) { |
171 | if (kicks) | 96 | if (kicks) |
172 | stat = DIV_ROUND_CLOSEST_ULL(stat, kicks); | 97 | sum = DIV_ROUND_CLOSEST_ULL(sum, kicks); |
173 | } | 98 | } |
174 | len = snprintf(buf, sizeof(buf) - 1, "%llu\n", stat); | 99 | len = snprintf(buf, sizeof(buf) - 1, "%llu\n", sum); |
175 | } | 100 | } |
176 | 101 | ||
177 | return simple_read_from_buffer(user_buf, count, ppos, buf, len); | 102 | return simple_read_from_buffer(user_buf, count, ppos, buf, len); |
178 | } | 103 | } |
179 | 104 | ||
180 | /* | 105 | /* |
181 | * Function to handle write request | ||
182 | * | ||
183 | * When counter = reset_cnts, reset all the counter values. | ||
184 | * Since the counter updates aren't atomic, the resetting is done twice | ||
185 | * to make sure that the counters are very likely to be all cleared. | ||
186 | */ | ||
187 | static ssize_t qstat_write(struct file *file, const char __user *user_buf, | ||
188 | size_t count, loff_t *ppos) | ||
189 | { | ||
190 | int cpu; | ||
191 | |||
192 | /* | ||
193 | * Get the counter ID stored in file->f_inode->i_private | ||
194 | */ | ||
195 | if ((long)file_inode(file)->i_private != qstat_reset_cnts) | ||
196 | return count; | ||
197 | |||
198 | for_each_possible_cpu(cpu) { | ||
199 | int i; | ||
200 | unsigned long *ptr = per_cpu_ptr(qstats, cpu); | ||
201 | |||
202 | for (i = 0 ; i < qstat_num; i++) | ||
203 | WRITE_ONCE(ptr[i], 0); | ||
204 | } | ||
205 | return count; | ||
206 | } | ||
207 | |||
208 | /* | ||
209 | * Debugfs data structures | ||
210 | */ | ||
211 | static const struct file_operations fops_qstat = { | ||
212 | .read = qstat_read, | ||
213 | .write = qstat_write, | ||
214 | .llseek = default_llseek, | ||
215 | }; | ||
216 | |||
217 | /* | ||
218 | * Initialize debugfs for the qspinlock statistical counters | ||
219 | */ | ||
220 | static int __init init_qspinlock_stat(void) | ||
221 | { | ||
222 | struct dentry *d_qstat = debugfs_create_dir("qlockstat", NULL); | ||
223 | int i; | ||
224 | |||
225 | if (!d_qstat) | ||
226 | goto out; | ||
227 | |||
228 | /* | ||
229 | * Create the debugfs files | ||
230 | * | ||
231 | * As reading from and writing to the stat files can be slow, only | ||
232 | * root is allowed to do the read/write to limit impact to system | ||
233 | * performance. | ||
234 | */ | ||
235 | for (i = 0; i < qstat_num; i++) | ||
236 | if (!debugfs_create_file(qstat_names[i], 0400, d_qstat, | ||
237 | (void *)(long)i, &fops_qstat)) | ||
238 | goto fail_undo; | ||
239 | |||
240 | if (!debugfs_create_file(qstat_names[qstat_reset_cnts], 0200, d_qstat, | ||
241 | (void *)(long)qstat_reset_cnts, &fops_qstat)) | ||
242 | goto fail_undo; | ||
243 | |||
244 | return 0; | ||
245 | fail_undo: | ||
246 | debugfs_remove_recursive(d_qstat); | ||
247 | out: | ||
248 | pr_warn("Could not create 'qlockstat' debugfs entries\n"); | ||
249 | return -ENOMEM; | ||
250 | } | ||
251 | fs_initcall(init_qspinlock_stat); | ||
252 | |||
253 | /* | ||
254 | * Increment the PV qspinlock statistical counters | ||
255 | */ | ||
256 | static inline void qstat_inc(enum qlock_stats stat, bool cond) | ||
257 | { | ||
258 | if (cond) | ||
259 | this_cpu_inc(qstats[stat]); | ||
260 | } | ||
261 | |||
262 | /* | ||
263 | * PV hash hop count | 106 | * PV hash hop count |
264 | */ | 107 | */ |
265 | static inline void qstat_hop(int hopcnt) | 108 | static inline void lockevent_pv_hop(int hopcnt) |
266 | { | 109 | { |
267 | this_cpu_add(qstats[qstat_pv_hash_hops], hopcnt); | 110 | this_cpu_add(EVENT_COUNT(pv_hash_hops), hopcnt); |
268 | } | 111 | } |
269 | 112 | ||
270 | /* | 113 | /* |
@@ -276,7 +119,7 @@ static inline void __pv_kick(int cpu) | |||
276 | 119 | ||
277 | per_cpu(pv_kick_time, cpu) = start; | 120 | per_cpu(pv_kick_time, cpu) = start; |
278 | pv_kick(cpu); | 121 | pv_kick(cpu); |
279 | this_cpu_add(qstats[qstat_pv_latency_kick], sched_clock() - start); | 122 | this_cpu_add(EVENT_COUNT(pv_latency_kick), sched_clock() - start); |
280 | } | 123 | } |
281 | 124 | ||
282 | /* | 125 | /* |
@@ -289,18 +132,19 @@ static inline void __pv_wait(u8 *ptr, u8 val) | |||
289 | *pkick_time = 0; | 132 | *pkick_time = 0; |
290 | pv_wait(ptr, val); | 133 | pv_wait(ptr, val); |
291 | if (*pkick_time) { | 134 | if (*pkick_time) { |
292 | this_cpu_add(qstats[qstat_pv_latency_wake], | 135 | this_cpu_add(EVENT_COUNT(pv_latency_wake), |
293 | sched_clock() - *pkick_time); | 136 | sched_clock() - *pkick_time); |
294 | qstat_inc(qstat_pv_kick_wake, true); | 137 | lockevent_inc(pv_kick_wake); |
295 | } | 138 | } |
296 | } | 139 | } |
297 | 140 | ||
298 | #define pv_kick(c) __pv_kick(c) | 141 | #define pv_kick(c) __pv_kick(c) |
299 | #define pv_wait(p, v) __pv_wait(p, v) | 142 | #define pv_wait(p, v) __pv_wait(p, v) |
300 | 143 | ||
301 | #else /* CONFIG_QUEUED_LOCK_STAT */ | 144 | #endif /* CONFIG_PARAVIRT_SPINLOCKS */ |
145 | |||
146 | #else /* CONFIG_LOCK_EVENT_COUNTS */ | ||
302 | 147 | ||
303 | static inline void qstat_inc(enum qlock_stats stat, bool cond) { } | 148 | static inline void lockevent_pv_hop(int hopcnt) { } |
304 | static inline void qstat_hop(int hopcnt) { } | ||
305 | 149 | ||
306 | #endif /* CONFIG_QUEUED_LOCK_STAT */ | 150 | #endif /* CONFIG_LOCK_EVENT_COUNTS */ |
diff --git a/kernel/locking/rwsem-spinlock.c b/kernel/locking/rwsem-spinlock.c deleted file mode 100644 index a7ffb2a96ede..000000000000 --- a/kernel/locking/rwsem-spinlock.c +++ /dev/null | |||
@@ -1,339 +0,0 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | ||
2 | /* rwsem-spinlock.c: R/W semaphores: contention handling functions for | ||
3 | * generic spinlock implementation | ||
4 | * | ||
5 | * Copyright (c) 2001 David Howells (dhowells@redhat.com). | ||
6 | * - Derived partially from idea by Andrea Arcangeli <andrea@suse.de> | ||
7 | * - Derived also from comments by Linus | ||
8 | */ | ||
9 | #include <linux/rwsem.h> | ||
10 | #include <linux/sched/signal.h> | ||
11 | #include <linux/sched/debug.h> | ||
12 | #include <linux/export.h> | ||
13 | |||
14 | enum rwsem_waiter_type { | ||
15 | RWSEM_WAITING_FOR_WRITE, | ||
16 | RWSEM_WAITING_FOR_READ | ||
17 | }; | ||
18 | |||
19 | struct rwsem_waiter { | ||
20 | struct list_head list; | ||
21 | struct task_struct *task; | ||
22 | enum rwsem_waiter_type type; | ||
23 | }; | ||
24 | |||
25 | int rwsem_is_locked(struct rw_semaphore *sem) | ||
26 | { | ||
27 | int ret = 1; | ||
28 | unsigned long flags; | ||
29 | |||
30 | if (raw_spin_trylock_irqsave(&sem->wait_lock, flags)) { | ||
31 | ret = (sem->count != 0); | ||
32 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); | ||
33 | } | ||
34 | return ret; | ||
35 | } | ||
36 | EXPORT_SYMBOL(rwsem_is_locked); | ||
37 | |||
38 | /* | ||
39 | * initialise the semaphore | ||
40 | */ | ||
41 | void __init_rwsem(struct rw_semaphore *sem, const char *name, | ||
42 | struct lock_class_key *key) | ||
43 | { | ||
44 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||
45 | /* | ||
46 | * Make sure we are not reinitializing a held semaphore: | ||
47 | */ | ||
48 | debug_check_no_locks_freed((void *)sem, sizeof(*sem)); | ||
49 | lockdep_init_map(&sem->dep_map, name, key, 0); | ||
50 | #endif | ||
51 | sem->count = 0; | ||
52 | raw_spin_lock_init(&sem->wait_lock); | ||
53 | INIT_LIST_HEAD(&sem->wait_list); | ||
54 | } | ||
55 | EXPORT_SYMBOL(__init_rwsem); | ||
56 | |||
57 | /* | ||
58 | * handle the lock release when processes blocked on it that can now run | ||
59 | * - if we come here, then: | ||
60 | * - the 'active count' _reached_ zero | ||
61 | * - the 'waiting count' is non-zero | ||
62 | * - the spinlock must be held by the caller | ||
63 | * - woken process blocks are discarded from the list after having task zeroed | ||
64 | * - writers are only woken if wakewrite is non-zero | ||
65 | */ | ||
66 | static inline struct rw_semaphore * | ||
67 | __rwsem_do_wake(struct rw_semaphore *sem, int wakewrite) | ||
68 | { | ||
69 | struct rwsem_waiter *waiter; | ||
70 | struct task_struct *tsk; | ||
71 | int woken; | ||
72 | |||
73 | waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); | ||
74 | |||
75 | if (waiter->type == RWSEM_WAITING_FOR_WRITE) { | ||
76 | if (wakewrite) | ||
77 | /* Wake up a writer. Note that we do not grant it the | ||
78 | * lock - it will have to acquire it when it runs. */ | ||
79 | wake_up_process(waiter->task); | ||
80 | goto out; | ||
81 | } | ||
82 | |||
83 | /* grant an infinite number of read locks to the front of the queue */ | ||
84 | woken = 0; | ||
85 | do { | ||
86 | struct list_head *next = waiter->list.next; | ||
87 | |||
88 | list_del(&waiter->list); | ||
89 | tsk = waiter->task; | ||
90 | /* | ||
91 | * Make sure we do not wakeup the next reader before | ||
92 | * setting the nil condition to grant the next reader; | ||
93 | * otherwise we could miss the wakeup on the other | ||
94 | * side and end up sleeping again. See the pairing | ||
95 | * in rwsem_down_read_failed(). | ||
96 | */ | ||
97 | smp_mb(); | ||
98 | waiter->task = NULL; | ||
99 | wake_up_process(tsk); | ||
100 | put_task_struct(tsk); | ||
101 | woken++; | ||
102 | if (next == &sem->wait_list) | ||
103 | break; | ||
104 | waiter = list_entry(next, struct rwsem_waiter, list); | ||
105 | } while (waiter->type != RWSEM_WAITING_FOR_WRITE); | ||
106 | |||
107 | sem->count += woken; | ||
108 | |||
109 | out: | ||
110 | return sem; | ||
111 | } | ||
112 | |||
113 | /* | ||
114 | * wake a single writer | ||
115 | */ | ||
116 | static inline struct rw_semaphore * | ||
117 | __rwsem_wake_one_writer(struct rw_semaphore *sem) | ||
118 | { | ||
119 | struct rwsem_waiter *waiter; | ||
120 | |||
121 | waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); | ||
122 | wake_up_process(waiter->task); | ||
123 | |||
124 | return sem; | ||
125 | } | ||
126 | |||
127 | /* | ||
128 | * get a read lock on the semaphore | ||
129 | */ | ||
130 | int __sched __down_read_common(struct rw_semaphore *sem, int state) | ||
131 | { | ||
132 | struct rwsem_waiter waiter; | ||
133 | unsigned long flags; | ||
134 | |||
135 | raw_spin_lock_irqsave(&sem->wait_lock, flags); | ||
136 | |||
137 | if (sem->count >= 0 && list_empty(&sem->wait_list)) { | ||
138 | /* granted */ | ||
139 | sem->count++; | ||
140 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); | ||
141 | goto out; | ||
142 | } | ||
143 | |||
144 | /* set up my own style of waitqueue */ | ||
145 | waiter.task = current; | ||
146 | waiter.type = RWSEM_WAITING_FOR_READ; | ||
147 | get_task_struct(current); | ||
148 | |||
149 | list_add_tail(&waiter.list, &sem->wait_list); | ||
150 | |||
151 | /* wait to be given the lock */ | ||
152 | for (;;) { | ||
153 | if (!waiter.task) | ||
154 | break; | ||
155 | if (signal_pending_state(state, current)) | ||
156 | goto out_nolock; | ||
157 | set_current_state(state); | ||
158 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); | ||
159 | schedule(); | ||
160 | raw_spin_lock_irqsave(&sem->wait_lock, flags); | ||
161 | } | ||
162 | |||
163 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); | ||
164 | out: | ||
165 | return 0; | ||
166 | |||
167 | out_nolock: | ||
168 | /* | ||
169 | * We didn't take the lock, so that there is a writer, which | ||
170 | * is owner or the first waiter of the sem. If it's a waiter, | ||
171 | * it will be woken by current owner. Not need to wake anybody. | ||
172 | */ | ||
173 | list_del(&waiter.list); | ||
174 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); | ||
175 | return -EINTR; | ||
176 | } | ||
177 | |||
178 | void __sched __down_read(struct rw_semaphore *sem) | ||
179 | { | ||
180 | __down_read_common(sem, TASK_UNINTERRUPTIBLE); | ||
181 | } | ||
182 | |||
183 | int __sched __down_read_killable(struct rw_semaphore *sem) | ||
184 | { | ||
185 | return __down_read_common(sem, TASK_KILLABLE); | ||
186 | } | ||
187 | |||
188 | /* | ||
189 | * trylock for reading -- returns 1 if successful, 0 if contention | ||
190 | */ | ||
191 | int __down_read_trylock(struct rw_semaphore *sem) | ||
192 | { | ||
193 | unsigned long flags; | ||
194 | int ret = 0; | ||
195 | |||
196 | |||
197 | raw_spin_lock_irqsave(&sem->wait_lock, flags); | ||
198 | |||
199 | if (sem->count >= 0 && list_empty(&sem->wait_list)) { | ||
200 | /* granted */ | ||
201 | sem->count++; | ||
202 | ret = 1; | ||
203 | } | ||
204 | |||
205 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); | ||
206 | |||
207 | return ret; | ||
208 | } | ||
209 | |||
210 | /* | ||
211 | * get a write lock on the semaphore | ||
212 | */ | ||
213 | int __sched __down_write_common(struct rw_semaphore *sem, int state) | ||
214 | { | ||
215 | struct rwsem_waiter waiter; | ||
216 | unsigned long flags; | ||
217 | int ret = 0; | ||
218 | |||
219 | raw_spin_lock_irqsave(&sem->wait_lock, flags); | ||
220 | |||
221 | /* set up my own style of waitqueue */ | ||
222 | waiter.task = current; | ||
223 | waiter.type = RWSEM_WAITING_FOR_WRITE; | ||
224 | list_add_tail(&waiter.list, &sem->wait_list); | ||
225 | |||
226 | /* wait for someone to release the lock */ | ||
227 | for (;;) { | ||
228 | /* | ||
229 | * That is the key to support write lock stealing: allows the | ||
230 | * task already on CPU to get the lock soon rather than put | ||
231 | * itself into sleep and waiting for system woke it or someone | ||
232 | * else in the head of the wait list up. | ||
233 | */ | ||
234 | if (sem->count == 0) | ||
235 | break; | ||
236 | if (signal_pending_state(state, current)) | ||
237 | goto out_nolock; | ||
238 | |||
239 | set_current_state(state); | ||
240 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); | ||
241 | schedule(); | ||
242 | raw_spin_lock_irqsave(&sem->wait_lock, flags); | ||
243 | } | ||
244 | /* got the lock */ | ||
245 | sem->count = -1; | ||
246 | list_del(&waiter.list); | ||
247 | |||
248 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); | ||
249 | |||
250 | return ret; | ||
251 | |||
252 | out_nolock: | ||
253 | list_del(&waiter.list); | ||
254 | if (!list_empty(&sem->wait_list) && sem->count >= 0) | ||
255 | __rwsem_do_wake(sem, 0); | ||
256 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); | ||
257 | |||
258 | return -EINTR; | ||
259 | } | ||
260 | |||
261 | void __sched __down_write(struct rw_semaphore *sem) | ||
262 | { | ||
263 | __down_write_common(sem, TASK_UNINTERRUPTIBLE); | ||
264 | } | ||
265 | |||
266 | int __sched __down_write_killable(struct rw_semaphore *sem) | ||
267 | { | ||
268 | return __down_write_common(sem, TASK_KILLABLE); | ||
269 | } | ||
270 | |||
271 | /* | ||
272 | * trylock for writing -- returns 1 if successful, 0 if contention | ||
273 | */ | ||
274 | int __down_write_trylock(struct rw_semaphore *sem) | ||
275 | { | ||
276 | unsigned long flags; | ||
277 | int ret = 0; | ||
278 | |||
279 | raw_spin_lock_irqsave(&sem->wait_lock, flags); | ||
280 | |||
281 | if (sem->count == 0) { | ||
282 | /* got the lock */ | ||
283 | sem->count = -1; | ||
284 | ret = 1; | ||
285 | } | ||
286 | |||
287 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); | ||
288 | |||
289 | return ret; | ||
290 | } | ||
291 | |||
292 | /* | ||
293 | * release a read lock on the semaphore | ||
294 | */ | ||
295 | void __up_read(struct rw_semaphore *sem) | ||
296 | { | ||
297 | unsigned long flags; | ||
298 | |||
299 | raw_spin_lock_irqsave(&sem->wait_lock, flags); | ||
300 | |||
301 | if (--sem->count == 0 && !list_empty(&sem->wait_list)) | ||
302 | sem = __rwsem_wake_one_writer(sem); | ||
303 | |||
304 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); | ||
305 | } | ||
306 | |||
307 | /* | ||
308 | * release a write lock on the semaphore | ||
309 | */ | ||
310 | void __up_write(struct rw_semaphore *sem) | ||
311 | { | ||
312 | unsigned long flags; | ||
313 | |||
314 | raw_spin_lock_irqsave(&sem->wait_lock, flags); | ||
315 | |||
316 | sem->count = 0; | ||
317 | if (!list_empty(&sem->wait_list)) | ||
318 | sem = __rwsem_do_wake(sem, 1); | ||
319 | |||
320 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); | ||
321 | } | ||
322 | |||
323 | /* | ||
324 | * downgrade a write lock into a read lock | ||
325 | * - just wake up any readers at the front of the queue | ||
326 | */ | ||
327 | void __downgrade_write(struct rw_semaphore *sem) | ||
328 | { | ||
329 | unsigned long flags; | ||
330 | |||
331 | raw_spin_lock_irqsave(&sem->wait_lock, flags); | ||
332 | |||
333 | sem->count = 1; | ||
334 | if (!list_empty(&sem->wait_list)) | ||
335 | sem = __rwsem_do_wake(sem, 0); | ||
336 | |||
337 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); | ||
338 | } | ||
339 | |||
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index fbe96341beee..6b3ee9948bf1 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c | |||
@@ -147,6 +147,7 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem, | |||
147 | * will notice the queued writer. | 147 | * will notice the queued writer. |
148 | */ | 148 | */ |
149 | wake_q_add(wake_q, waiter->task); | 149 | wake_q_add(wake_q, waiter->task); |
150 | lockevent_inc(rwsem_wake_writer); | ||
150 | } | 151 | } |
151 | 152 | ||
152 | return; | 153 | return; |
@@ -176,9 +177,8 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem, | |||
176 | goto try_reader_grant; | 177 | goto try_reader_grant; |
177 | } | 178 | } |
178 | /* | 179 | /* |
179 | * It is not really necessary to set it to reader-owned here, | 180 | * Set it to reader-owned to give spinners an early |
180 | * but it gives the spinners an early indication that the | 181 | * indication that readers now have the lock. |
181 | * readers now have the lock. | ||
182 | */ | 182 | */ |
183 | __rwsem_set_reader_owned(sem, waiter->task); | 183 | __rwsem_set_reader_owned(sem, waiter->task); |
184 | } | 184 | } |
@@ -215,6 +215,7 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem, | |||
215 | } | 215 | } |
216 | 216 | ||
217 | adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment; | 217 | adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment; |
218 | lockevent_cond_inc(rwsem_wake_reader, woken); | ||
218 | if (list_empty(&sem->wait_list)) { | 219 | if (list_empty(&sem->wait_list)) { |
219 | /* hit end of list above */ | 220 | /* hit end of list above */ |
220 | adjustment -= RWSEM_WAITING_BIAS; | 221 | adjustment -= RWSEM_WAITING_BIAS; |
@@ -225,92 +226,6 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem, | |||
225 | } | 226 | } |
226 | 227 | ||
227 | /* | 228 | /* |
228 | * Wait for the read lock to be granted | ||
229 | */ | ||
230 | static inline struct rw_semaphore __sched * | ||
231 | __rwsem_down_read_failed_common(struct rw_semaphore *sem, int state) | ||
232 | { | ||
233 | long count, adjustment = -RWSEM_ACTIVE_READ_BIAS; | ||
234 | struct rwsem_waiter waiter; | ||
235 | DEFINE_WAKE_Q(wake_q); | ||
236 | |||
237 | waiter.task = current; | ||
238 | waiter.type = RWSEM_WAITING_FOR_READ; | ||
239 | |||
240 | raw_spin_lock_irq(&sem->wait_lock); | ||
241 | if (list_empty(&sem->wait_list)) { | ||
242 | /* | ||
243 | * In case the wait queue is empty and the lock isn't owned | ||
244 | * by a writer, this reader can exit the slowpath and return | ||
245 | * immediately as its RWSEM_ACTIVE_READ_BIAS has already | ||
246 | * been set in the count. | ||
247 | */ | ||
248 | if (atomic_long_read(&sem->count) >= 0) { | ||
249 | raw_spin_unlock_irq(&sem->wait_lock); | ||
250 | return sem; | ||
251 | } | ||
252 | adjustment += RWSEM_WAITING_BIAS; | ||
253 | } | ||
254 | list_add_tail(&waiter.list, &sem->wait_list); | ||
255 | |||
256 | /* we're now waiting on the lock, but no longer actively locking */ | ||
257 | count = atomic_long_add_return(adjustment, &sem->count); | ||
258 | |||
259 | /* | ||
260 | * If there are no active locks, wake the front queued process(es). | ||
261 | * | ||
262 | * If there are no writers and we are first in the queue, | ||
263 | * wake our own waiter to join the existing active readers ! | ||
264 | */ | ||
265 | if (count == RWSEM_WAITING_BIAS || | ||
266 | (count > RWSEM_WAITING_BIAS && | ||
267 | adjustment != -RWSEM_ACTIVE_READ_BIAS)) | ||
268 | __rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q); | ||
269 | |||
270 | raw_spin_unlock_irq(&sem->wait_lock); | ||
271 | wake_up_q(&wake_q); | ||
272 | |||
273 | /* wait to be given the lock */ | ||
274 | while (true) { | ||
275 | set_current_state(state); | ||
276 | if (!waiter.task) | ||
277 | break; | ||
278 | if (signal_pending_state(state, current)) { | ||
279 | raw_spin_lock_irq(&sem->wait_lock); | ||
280 | if (waiter.task) | ||
281 | goto out_nolock; | ||
282 | raw_spin_unlock_irq(&sem->wait_lock); | ||
283 | break; | ||
284 | } | ||
285 | schedule(); | ||
286 | } | ||
287 | |||
288 | __set_current_state(TASK_RUNNING); | ||
289 | return sem; | ||
290 | out_nolock: | ||
291 | list_del(&waiter.list); | ||
292 | if (list_empty(&sem->wait_list)) | ||
293 | atomic_long_add(-RWSEM_WAITING_BIAS, &sem->count); | ||
294 | raw_spin_unlock_irq(&sem->wait_lock); | ||
295 | __set_current_state(TASK_RUNNING); | ||
296 | return ERR_PTR(-EINTR); | ||
297 | } | ||
298 | |||
299 | __visible struct rw_semaphore * __sched | ||
300 | rwsem_down_read_failed(struct rw_semaphore *sem) | ||
301 | { | ||
302 | return __rwsem_down_read_failed_common(sem, TASK_UNINTERRUPTIBLE); | ||
303 | } | ||
304 | EXPORT_SYMBOL(rwsem_down_read_failed); | ||
305 | |||
306 | __visible struct rw_semaphore * __sched | ||
307 | rwsem_down_read_failed_killable(struct rw_semaphore *sem) | ||
308 | { | ||
309 | return __rwsem_down_read_failed_common(sem, TASK_KILLABLE); | ||
310 | } | ||
311 | EXPORT_SYMBOL(rwsem_down_read_failed_killable); | ||
312 | |||
313 | /* | ||
314 | * This function must be called with the sem->wait_lock held to prevent | 229 | * This function must be called with the sem->wait_lock held to prevent |
315 | * race conditions between checking the rwsem wait list and setting the | 230 | * race conditions between checking the rwsem wait list and setting the |
316 | * sem->count accordingly. | 231 | * sem->count accordingly. |
@@ -346,21 +261,17 @@ static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem) | |||
346 | */ | 261 | */ |
347 | static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem) | 262 | static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem) |
348 | { | 263 | { |
349 | long old, count = atomic_long_read(&sem->count); | 264 | long count = atomic_long_read(&sem->count); |
350 | |||
351 | while (true) { | ||
352 | if (!(count == 0 || count == RWSEM_WAITING_BIAS)) | ||
353 | return false; | ||
354 | 265 | ||
355 | old = atomic_long_cmpxchg_acquire(&sem->count, count, | 266 | while (!count || count == RWSEM_WAITING_BIAS) { |
356 | count + RWSEM_ACTIVE_WRITE_BIAS); | 267 | if (atomic_long_try_cmpxchg_acquire(&sem->count, &count, |
357 | if (old == count) { | 268 | count + RWSEM_ACTIVE_WRITE_BIAS)) { |
358 | rwsem_set_owner(sem); | 269 | rwsem_set_owner(sem); |
270 | lockevent_inc(rwsem_opt_wlock); | ||
359 | return true; | 271 | return true; |
360 | } | 272 | } |
361 | |||
362 | count = old; | ||
363 | } | 273 | } |
274 | return false; | ||
364 | } | 275 | } |
365 | 276 | ||
366 | static inline bool owner_on_cpu(struct task_struct *owner) | 277 | static inline bool owner_on_cpu(struct task_struct *owner) |
@@ -481,6 +392,7 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem) | |||
481 | osq_unlock(&sem->osq); | 392 | osq_unlock(&sem->osq); |
482 | done: | 393 | done: |
483 | preempt_enable(); | 394 | preempt_enable(); |
395 | lockevent_cond_inc(rwsem_opt_fail, !taken); | ||
484 | return taken; | 396 | return taken; |
485 | } | 397 | } |
486 | 398 | ||
@@ -505,6 +417,97 @@ static inline bool rwsem_has_spinner(struct rw_semaphore *sem) | |||
505 | #endif | 417 | #endif |
506 | 418 | ||
507 | /* | 419 | /* |
420 | * Wait for the read lock to be granted | ||
421 | */ | ||
422 | static inline struct rw_semaphore __sched * | ||
423 | __rwsem_down_read_failed_common(struct rw_semaphore *sem, int state) | ||
424 | { | ||
425 | long count, adjustment = -RWSEM_ACTIVE_READ_BIAS; | ||
426 | struct rwsem_waiter waiter; | ||
427 | DEFINE_WAKE_Q(wake_q); | ||
428 | |||
429 | waiter.task = current; | ||
430 | waiter.type = RWSEM_WAITING_FOR_READ; | ||
431 | |||
432 | raw_spin_lock_irq(&sem->wait_lock); | ||
433 | if (list_empty(&sem->wait_list)) { | ||
434 | /* | ||
435 | * In case the wait queue is empty and the lock isn't owned | ||
436 | * by a writer, this reader can exit the slowpath and return | ||
437 | * immediately as its RWSEM_ACTIVE_READ_BIAS has already | ||
438 | * been set in the count. | ||
439 | */ | ||
440 | if (atomic_long_read(&sem->count) >= 0) { | ||
441 | raw_spin_unlock_irq(&sem->wait_lock); | ||
442 | rwsem_set_reader_owned(sem); | ||
443 | lockevent_inc(rwsem_rlock_fast); | ||
444 | return sem; | ||
445 | } | ||
446 | adjustment += RWSEM_WAITING_BIAS; | ||
447 | } | ||
448 | list_add_tail(&waiter.list, &sem->wait_list); | ||
449 | |||
450 | /* we're now waiting on the lock, but no longer actively locking */ | ||
451 | count = atomic_long_add_return(adjustment, &sem->count); | ||
452 | |||
453 | /* | ||
454 | * If there are no active locks, wake the front queued process(es). | ||
455 | * | ||
456 | * If there are no writers and we are first in the queue, | ||
457 | * wake our own waiter to join the existing active readers ! | ||
458 | */ | ||
459 | if (count == RWSEM_WAITING_BIAS || | ||
460 | (count > RWSEM_WAITING_BIAS && | ||
461 | adjustment != -RWSEM_ACTIVE_READ_BIAS)) | ||
462 | __rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q); | ||
463 | |||
464 | raw_spin_unlock_irq(&sem->wait_lock); | ||
465 | wake_up_q(&wake_q); | ||
466 | |||
467 | /* wait to be given the lock */ | ||
468 | while (true) { | ||
469 | set_current_state(state); | ||
470 | if (!waiter.task) | ||
471 | break; | ||
472 | if (signal_pending_state(state, current)) { | ||
473 | raw_spin_lock_irq(&sem->wait_lock); | ||
474 | if (waiter.task) | ||
475 | goto out_nolock; | ||
476 | raw_spin_unlock_irq(&sem->wait_lock); | ||
477 | break; | ||
478 | } | ||
479 | schedule(); | ||
480 | lockevent_inc(rwsem_sleep_reader); | ||
481 | } | ||
482 | |||
483 | __set_current_state(TASK_RUNNING); | ||
484 | lockevent_inc(rwsem_rlock); | ||
485 | return sem; | ||
486 | out_nolock: | ||
487 | list_del(&waiter.list); | ||
488 | if (list_empty(&sem->wait_list)) | ||
489 | atomic_long_add(-RWSEM_WAITING_BIAS, &sem->count); | ||
490 | raw_spin_unlock_irq(&sem->wait_lock); | ||
491 | __set_current_state(TASK_RUNNING); | ||
492 | lockevent_inc(rwsem_rlock_fail); | ||
493 | return ERR_PTR(-EINTR); | ||
494 | } | ||
495 | |||
496 | __visible struct rw_semaphore * __sched | ||
497 | rwsem_down_read_failed(struct rw_semaphore *sem) | ||
498 | { | ||
499 | return __rwsem_down_read_failed_common(sem, TASK_UNINTERRUPTIBLE); | ||
500 | } | ||
501 | EXPORT_SYMBOL(rwsem_down_read_failed); | ||
502 | |||
503 | __visible struct rw_semaphore * __sched | ||
504 | rwsem_down_read_failed_killable(struct rw_semaphore *sem) | ||
505 | { | ||
506 | return __rwsem_down_read_failed_common(sem, TASK_KILLABLE); | ||
507 | } | ||
508 | EXPORT_SYMBOL(rwsem_down_read_failed_killable); | ||
509 | |||
510 | /* | ||
508 | * Wait until we successfully acquire the write lock | 511 | * Wait until we successfully acquire the write lock |
509 | */ | 512 | */ |
510 | static inline struct rw_semaphore * | 513 | static inline struct rw_semaphore * |
@@ -580,6 +583,7 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state) | |||
580 | goto out_nolock; | 583 | goto out_nolock; |
581 | 584 | ||
582 | schedule(); | 585 | schedule(); |
586 | lockevent_inc(rwsem_sleep_writer); | ||
583 | set_current_state(state); | 587 | set_current_state(state); |
584 | } while ((count = atomic_long_read(&sem->count)) & RWSEM_ACTIVE_MASK); | 588 | } while ((count = atomic_long_read(&sem->count)) & RWSEM_ACTIVE_MASK); |
585 | 589 | ||
@@ -588,6 +592,7 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state) | |||
588 | __set_current_state(TASK_RUNNING); | 592 | __set_current_state(TASK_RUNNING); |
589 | list_del(&waiter.list); | 593 | list_del(&waiter.list); |
590 | raw_spin_unlock_irq(&sem->wait_lock); | 594 | raw_spin_unlock_irq(&sem->wait_lock); |
595 | lockevent_inc(rwsem_wlock); | ||
591 | 596 | ||
592 | return ret; | 597 | return ret; |
593 | 598 | ||
@@ -601,6 +606,7 @@ out_nolock: | |||
601 | __rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q); | 606 | __rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q); |
602 | raw_spin_unlock_irq(&sem->wait_lock); | 607 | raw_spin_unlock_irq(&sem->wait_lock); |
603 | wake_up_q(&wake_q); | 608 | wake_up_q(&wake_q); |
609 | lockevent_inc(rwsem_wlock_fail); | ||
604 | 610 | ||
605 | return ERR_PTR(-EINTR); | 611 | return ERR_PTR(-EINTR); |
606 | } | 612 | } |
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index e586f0d03ad3..ccbf18f560ff 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c | |||
@@ -24,7 +24,6 @@ void __sched down_read(struct rw_semaphore *sem) | |||
24 | rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_); | 24 | rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_); |
25 | 25 | ||
26 | LOCK_CONTENDED(sem, __down_read_trylock, __down_read); | 26 | LOCK_CONTENDED(sem, __down_read_trylock, __down_read); |
27 | rwsem_set_reader_owned(sem); | ||
28 | } | 27 | } |
29 | 28 | ||
30 | EXPORT_SYMBOL(down_read); | 29 | EXPORT_SYMBOL(down_read); |
@@ -39,7 +38,6 @@ int __sched down_read_killable(struct rw_semaphore *sem) | |||
39 | return -EINTR; | 38 | return -EINTR; |
40 | } | 39 | } |
41 | 40 | ||
42 | rwsem_set_reader_owned(sem); | ||
43 | return 0; | 41 | return 0; |
44 | } | 42 | } |
45 | 43 | ||
@@ -52,10 +50,8 @@ int down_read_trylock(struct rw_semaphore *sem) | |||
52 | { | 50 | { |
53 | int ret = __down_read_trylock(sem); | 51 | int ret = __down_read_trylock(sem); |
54 | 52 | ||
55 | if (ret == 1) { | 53 | if (ret == 1) |
56 | rwsem_acquire_read(&sem->dep_map, 0, 1, _RET_IP_); | 54 | rwsem_acquire_read(&sem->dep_map, 0, 1, _RET_IP_); |
57 | rwsem_set_reader_owned(sem); | ||
58 | } | ||
59 | return ret; | 55 | return ret; |
60 | } | 56 | } |
61 | 57 | ||
@@ -70,7 +66,6 @@ void __sched down_write(struct rw_semaphore *sem) | |||
70 | rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_); | 66 | rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_); |
71 | 67 | ||
72 | LOCK_CONTENDED(sem, __down_write_trylock, __down_write); | 68 | LOCK_CONTENDED(sem, __down_write_trylock, __down_write); |
73 | rwsem_set_owner(sem); | ||
74 | } | 69 | } |
75 | 70 | ||
76 | EXPORT_SYMBOL(down_write); | 71 | EXPORT_SYMBOL(down_write); |
@@ -88,7 +83,6 @@ int __sched down_write_killable(struct rw_semaphore *sem) | |||
88 | return -EINTR; | 83 | return -EINTR; |
89 | } | 84 | } |
90 | 85 | ||
91 | rwsem_set_owner(sem); | ||
92 | return 0; | 86 | return 0; |
93 | } | 87 | } |
94 | 88 | ||
@@ -101,10 +95,8 @@ int down_write_trylock(struct rw_semaphore *sem) | |||
101 | { | 95 | { |
102 | int ret = __down_write_trylock(sem); | 96 | int ret = __down_write_trylock(sem); |
103 | 97 | ||
104 | if (ret == 1) { | 98 | if (ret == 1) |
105 | rwsem_acquire(&sem->dep_map, 0, 1, _RET_IP_); | 99 | rwsem_acquire(&sem->dep_map, 0, 1, _RET_IP_); |
106 | rwsem_set_owner(sem); | ||
107 | } | ||
108 | 100 | ||
109 | return ret; | 101 | return ret; |
110 | } | 102 | } |
@@ -117,9 +109,7 @@ EXPORT_SYMBOL(down_write_trylock); | |||
117 | void up_read(struct rw_semaphore *sem) | 109 | void up_read(struct rw_semaphore *sem) |
118 | { | 110 | { |
119 | rwsem_release(&sem->dep_map, 1, _RET_IP_); | 111 | rwsem_release(&sem->dep_map, 1, _RET_IP_); |
120 | DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & RWSEM_READER_OWNED)); | ||
121 | 112 | ||
122 | rwsem_clear_reader_owned(sem); | ||
123 | __up_read(sem); | 113 | __up_read(sem); |
124 | } | 114 | } |
125 | 115 | ||
@@ -131,9 +121,7 @@ EXPORT_SYMBOL(up_read); | |||
131 | void up_write(struct rw_semaphore *sem) | 121 | void up_write(struct rw_semaphore *sem) |
132 | { | 122 | { |
133 | rwsem_release(&sem->dep_map, 1, _RET_IP_); | 123 | rwsem_release(&sem->dep_map, 1, _RET_IP_); |
134 | DEBUG_RWSEMS_WARN_ON(sem->owner != current); | ||
135 | 124 | ||
136 | rwsem_clear_owner(sem); | ||
137 | __up_write(sem); | 125 | __up_write(sem); |
138 | } | 126 | } |
139 | 127 | ||
@@ -145,9 +133,7 @@ EXPORT_SYMBOL(up_write); | |||
145 | void downgrade_write(struct rw_semaphore *sem) | 133 | void downgrade_write(struct rw_semaphore *sem) |
146 | { | 134 | { |
147 | lock_downgrade(&sem->dep_map, _RET_IP_); | 135 | lock_downgrade(&sem->dep_map, _RET_IP_); |
148 | DEBUG_RWSEMS_WARN_ON(sem->owner != current); | ||
149 | 136 | ||
150 | rwsem_set_reader_owned(sem); | ||
151 | __downgrade_write(sem); | 137 | __downgrade_write(sem); |
152 | } | 138 | } |
153 | 139 | ||
@@ -161,7 +147,6 @@ void down_read_nested(struct rw_semaphore *sem, int subclass) | |||
161 | rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_); | 147 | rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_); |
162 | 148 | ||
163 | LOCK_CONTENDED(sem, __down_read_trylock, __down_read); | 149 | LOCK_CONTENDED(sem, __down_read_trylock, __down_read); |
164 | rwsem_set_reader_owned(sem); | ||
165 | } | 150 | } |
166 | 151 | ||
167 | EXPORT_SYMBOL(down_read_nested); | 152 | EXPORT_SYMBOL(down_read_nested); |
@@ -172,7 +157,6 @@ void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest) | |||
172 | rwsem_acquire_nest(&sem->dep_map, 0, 0, nest, _RET_IP_); | 157 | rwsem_acquire_nest(&sem->dep_map, 0, 0, nest, _RET_IP_); |
173 | 158 | ||
174 | LOCK_CONTENDED(sem, __down_write_trylock, __down_write); | 159 | LOCK_CONTENDED(sem, __down_write_trylock, __down_write); |
175 | rwsem_set_owner(sem); | ||
176 | } | 160 | } |
177 | 161 | ||
178 | EXPORT_SYMBOL(_down_write_nest_lock); | 162 | EXPORT_SYMBOL(_down_write_nest_lock); |
@@ -193,7 +177,6 @@ void down_write_nested(struct rw_semaphore *sem, int subclass) | |||
193 | rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_); | 177 | rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_); |
194 | 178 | ||
195 | LOCK_CONTENDED(sem, __down_write_trylock, __down_write); | 179 | LOCK_CONTENDED(sem, __down_write_trylock, __down_write); |
196 | rwsem_set_owner(sem); | ||
197 | } | 180 | } |
198 | 181 | ||
199 | EXPORT_SYMBOL(down_write_nested); | 182 | EXPORT_SYMBOL(down_write_nested); |
@@ -208,7 +191,6 @@ int __sched down_write_killable_nested(struct rw_semaphore *sem, int subclass) | |||
208 | return -EINTR; | 191 | return -EINTR; |
209 | } | 192 | } |
210 | 193 | ||
211 | rwsem_set_owner(sem); | ||
212 | return 0; | 194 | return 0; |
213 | } | 195 | } |
214 | 196 | ||
@@ -216,7 +198,8 @@ EXPORT_SYMBOL(down_write_killable_nested); | |||
216 | 198 | ||
217 | void up_read_non_owner(struct rw_semaphore *sem) | 199 | void up_read_non_owner(struct rw_semaphore *sem) |
218 | { | 200 | { |
219 | DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & RWSEM_READER_OWNED)); | 201 | DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & RWSEM_READER_OWNED), |
202 | sem); | ||
220 | __up_read(sem); | 203 | __up_read(sem); |
221 | } | 204 | } |
222 | 205 | ||
diff --git a/kernel/locking/rwsem.h b/kernel/locking/rwsem.h index bad2bca0268b..64877f5294e3 100644 --- a/kernel/locking/rwsem.h +++ b/kernel/locking/rwsem.h | |||
@@ -23,15 +23,44 @@ | |||
23 | * is involved. Ideally we would like to track all the readers that own | 23 | * is involved. Ideally we would like to track all the readers that own |
24 | * a rwsem, but the overhead is simply too big. | 24 | * a rwsem, but the overhead is simply too big. |
25 | */ | 25 | */ |
26 | #include "lock_events.h" | ||
27 | |||
26 | #define RWSEM_READER_OWNED (1UL << 0) | 28 | #define RWSEM_READER_OWNED (1UL << 0) |
27 | #define RWSEM_ANONYMOUSLY_OWNED (1UL << 1) | 29 | #define RWSEM_ANONYMOUSLY_OWNED (1UL << 1) |
28 | 30 | ||
29 | #ifdef CONFIG_DEBUG_RWSEMS | 31 | #ifdef CONFIG_DEBUG_RWSEMS |
30 | # define DEBUG_RWSEMS_WARN_ON(c) DEBUG_LOCKS_WARN_ON(c) | 32 | # define DEBUG_RWSEMS_WARN_ON(c, sem) do { \ |
33 | if (!debug_locks_silent && \ | ||
34 | WARN_ONCE(c, "DEBUG_RWSEMS_WARN_ON(%s): count = 0x%lx, owner = 0x%lx, curr 0x%lx, list %sempty\n",\ | ||
35 | #c, atomic_long_read(&(sem)->count), \ | ||
36 | (long)((sem)->owner), (long)current, \ | ||
37 | list_empty(&(sem)->wait_list) ? "" : "not ")) \ | ||
38 | debug_locks_off(); \ | ||
39 | } while (0) | ||
40 | #else | ||
41 | # define DEBUG_RWSEMS_WARN_ON(c, sem) | ||
42 | #endif | ||
43 | |||
44 | /* | ||
45 | * R/W semaphores originally for PPC using the stuff in lib/rwsem.c. | ||
46 | * Adapted largely from include/asm-i386/rwsem.h | ||
47 | * by Paul Mackerras <paulus@samba.org>. | ||
48 | */ | ||
49 | |||
50 | /* | ||
51 | * the semaphore definition | ||
52 | */ | ||
53 | #ifdef CONFIG_64BIT | ||
54 | # define RWSEM_ACTIVE_MASK 0xffffffffL | ||
31 | #else | 55 | #else |
32 | # define DEBUG_RWSEMS_WARN_ON(c) | 56 | # define RWSEM_ACTIVE_MASK 0x0000ffffL |
33 | #endif | 57 | #endif |
34 | 58 | ||
59 | #define RWSEM_ACTIVE_BIAS 0x00000001L | ||
60 | #define RWSEM_WAITING_BIAS (-RWSEM_ACTIVE_MASK-1) | ||
61 | #define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS | ||
62 | #define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) | ||
63 | |||
35 | #ifdef CONFIG_RWSEM_SPIN_ON_OWNER | 64 | #ifdef CONFIG_RWSEM_SPIN_ON_OWNER |
36 | /* | 65 | /* |
37 | * All writes to owner are protected by WRITE_ONCE() to make sure that | 66 | * All writes to owner are protected by WRITE_ONCE() to make sure that |
@@ -132,3 +161,144 @@ static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem) | |||
132 | { | 161 | { |
133 | } | 162 | } |
134 | #endif | 163 | #endif |
164 | |||
165 | extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem); | ||
166 | extern struct rw_semaphore *rwsem_down_read_failed_killable(struct rw_semaphore *sem); | ||
167 | extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem); | ||
168 | extern struct rw_semaphore *rwsem_down_write_failed_killable(struct rw_semaphore *sem); | ||
169 | extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem); | ||
170 | extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem); | ||
171 | |||
172 | /* | ||
173 | * lock for reading | ||
174 | */ | ||
175 | static inline void __down_read(struct rw_semaphore *sem) | ||
176 | { | ||
177 | if (unlikely(atomic_long_inc_return_acquire(&sem->count) <= 0)) { | ||
178 | rwsem_down_read_failed(sem); | ||
179 | DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & | ||
180 | RWSEM_READER_OWNED), sem); | ||
181 | } else { | ||
182 | rwsem_set_reader_owned(sem); | ||
183 | } | ||
184 | } | ||
185 | |||
186 | static inline int __down_read_killable(struct rw_semaphore *sem) | ||
187 | { | ||
188 | if (unlikely(atomic_long_inc_return_acquire(&sem->count) <= 0)) { | ||
189 | if (IS_ERR(rwsem_down_read_failed_killable(sem))) | ||
190 | return -EINTR; | ||
191 | DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & | ||
192 | RWSEM_READER_OWNED), sem); | ||
193 | } else { | ||
194 | rwsem_set_reader_owned(sem); | ||
195 | } | ||
196 | return 0; | ||
197 | } | ||
198 | |||
199 | static inline int __down_read_trylock(struct rw_semaphore *sem) | ||
200 | { | ||
201 | /* | ||
202 | * Optimize for the case when the rwsem is not locked at all. | ||
203 | */ | ||
204 | long tmp = RWSEM_UNLOCKED_VALUE; | ||
205 | |||
206 | lockevent_inc(rwsem_rtrylock); | ||
207 | do { | ||
208 | if (atomic_long_try_cmpxchg_acquire(&sem->count, &tmp, | ||
209 | tmp + RWSEM_ACTIVE_READ_BIAS)) { | ||
210 | rwsem_set_reader_owned(sem); | ||
211 | return 1; | ||
212 | } | ||
213 | } while (tmp >= 0); | ||
214 | return 0; | ||
215 | } | ||
216 | |||
217 | /* | ||
218 | * lock for writing | ||
219 | */ | ||
220 | static inline void __down_write(struct rw_semaphore *sem) | ||
221 | { | ||
222 | long tmp; | ||
223 | |||
224 | tmp = atomic_long_add_return_acquire(RWSEM_ACTIVE_WRITE_BIAS, | ||
225 | &sem->count); | ||
226 | if (unlikely(tmp != RWSEM_ACTIVE_WRITE_BIAS)) | ||
227 | rwsem_down_write_failed(sem); | ||
228 | rwsem_set_owner(sem); | ||
229 | } | ||
230 | |||
231 | static inline int __down_write_killable(struct rw_semaphore *sem) | ||
232 | { | ||
233 | long tmp; | ||
234 | |||
235 | tmp = atomic_long_add_return_acquire(RWSEM_ACTIVE_WRITE_BIAS, | ||
236 | &sem->count); | ||
237 | if (unlikely(tmp != RWSEM_ACTIVE_WRITE_BIAS)) | ||
238 | if (IS_ERR(rwsem_down_write_failed_killable(sem))) | ||
239 | return -EINTR; | ||
240 | rwsem_set_owner(sem); | ||
241 | return 0; | ||
242 | } | ||
243 | |||
244 | static inline int __down_write_trylock(struct rw_semaphore *sem) | ||
245 | { | ||
246 | long tmp; | ||
247 | |||
248 | lockevent_inc(rwsem_wtrylock); | ||
249 | tmp = atomic_long_cmpxchg_acquire(&sem->count, RWSEM_UNLOCKED_VALUE, | ||
250 | RWSEM_ACTIVE_WRITE_BIAS); | ||
251 | if (tmp == RWSEM_UNLOCKED_VALUE) { | ||
252 | rwsem_set_owner(sem); | ||
253 | return true; | ||
254 | } | ||
255 | return false; | ||
256 | } | ||
257 | |||
258 | /* | ||
259 | * unlock after reading | ||
260 | */ | ||
261 | static inline void __up_read(struct rw_semaphore *sem) | ||
262 | { | ||
263 | long tmp; | ||
264 | |||
265 | DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & RWSEM_READER_OWNED), | ||
266 | sem); | ||
267 | rwsem_clear_reader_owned(sem); | ||
268 | tmp = atomic_long_dec_return_release(&sem->count); | ||
269 | if (unlikely(tmp < -1 && (tmp & RWSEM_ACTIVE_MASK) == 0)) | ||
270 | rwsem_wake(sem); | ||
271 | } | ||
272 | |||
273 | /* | ||
274 | * unlock after writing | ||
275 | */ | ||
276 | static inline void __up_write(struct rw_semaphore *sem) | ||
277 | { | ||
278 | DEBUG_RWSEMS_WARN_ON(sem->owner != current, sem); | ||
279 | rwsem_clear_owner(sem); | ||
280 | if (unlikely(atomic_long_sub_return_release(RWSEM_ACTIVE_WRITE_BIAS, | ||
281 | &sem->count) < 0)) | ||
282 | rwsem_wake(sem); | ||
283 | } | ||
284 | |||
285 | /* | ||
286 | * downgrade write lock to read lock | ||
287 | */ | ||
288 | static inline void __downgrade_write(struct rw_semaphore *sem) | ||
289 | { | ||
290 | long tmp; | ||
291 | |||
292 | /* | ||
293 | * When downgrading from exclusive to shared ownership, | ||
294 | * anything inside the write-locked region cannot leak | ||
295 | * into the read side. In contrast, anything in the | ||
296 | * read-locked region is ok to be re-ordered into the | ||
297 | * write side. As such, rely on RELEASE semantics. | ||
298 | */ | ||
299 | DEBUG_RWSEMS_WARN_ON(sem->owner != current, sem); | ||
300 | tmp = atomic_long_add_return_release(-RWSEM_WAITING_BIAS, &sem->count); | ||
301 | rwsem_set_reader_owned(sem); | ||
302 | if (tmp < 0) | ||
303 | rwsem_downgrade_wake(sem); | ||
304 | } | ||