aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--MAINTAINERS1
-rw-r--r--arch/Kconfig9
-rw-r--r--arch/alpha/Kconfig7
-rw-r--r--arch/alpha/include/asm/rwsem.h211
-rw-r--r--arch/arc/Kconfig3
-rw-r--r--arch/arm/Kconfig4
-rw-r--r--arch/arm/include/asm/Kbuild1
-rw-r--r--arch/arm64/Kconfig3
-rw-r--r--arch/arm64/include/asm/Kbuild1
-rw-r--r--arch/c6x/Kconfig3
-rw-r--r--arch/csky/Kconfig3
-rw-r--r--arch/h8300/Kconfig3
-rw-r--r--arch/hexagon/Kconfig6
-rw-r--r--arch/hexagon/include/asm/Kbuild1
-rw-r--r--arch/ia64/Kconfig4
-rw-r--r--arch/ia64/include/asm/rwsem.h172
-rw-r--r--arch/m68k/Kconfig7
-rw-r--r--arch/microblaze/Kconfig6
-rw-r--r--arch/mips/Kconfig7
-rw-r--r--arch/nds32/Kconfig3
-rw-r--r--arch/nios2/Kconfig3
-rw-r--r--arch/openrisc/Kconfig6
-rw-r--r--arch/parisc/Kconfig6
-rw-r--r--arch/powerpc/Kconfig7
-rw-r--r--arch/powerpc/include/asm/Kbuild1
-rw-r--r--arch/riscv/Kconfig3
-rw-r--r--arch/s390/Kconfig6
-rw-r--r--arch/s390/include/asm/Kbuild1
-rw-r--r--arch/sh/Kconfig6
-rw-r--r--arch/sh/include/asm/Kbuild1
-rw-r--r--arch/sparc/Kconfig8
-rw-r--r--arch/sparc/include/asm/Kbuild1
-rw-r--r--arch/unicore32/Kconfig6
-rw-r--r--arch/x86/Kconfig11
-rw-r--r--arch/x86/include/asm/rwsem.h237
-rw-r--r--arch/x86/lib/Makefile1
-rw-r--r--arch/x86/lib/rwsem.S156
-rw-r--r--arch/x86/um/Kconfig6
-rw-r--r--arch/x86/um/Makefile4
-rw-r--r--arch/xtensa/Kconfig3
-rw-r--r--arch/xtensa/include/asm/Kbuild1
-rw-r--r--include/asm-generic/rwsem.h140
-rw-r--r--include/linux/jump_label_ratelimit.h64
-rw-r--r--include/linux/lockdep.h2
-rw-r--r--include/linux/rwsem-spinlock.h47
-rw-r--r--include/linux/rwsem.h37
-rw-r--r--kernel/Kconfig.locks2
-rw-r--r--kernel/jump_label.c63
-rw-r--r--kernel/locking/Makefile5
-rw-r--r--kernel/locking/lock_events.c179
-rw-r--r--kernel/locking/lock_events.h59
-rw-r--r--kernel/locking/lock_events_list.h67
-rw-r--r--kernel/locking/lockdep.c267
-rw-r--r--kernel/locking/lockdep_internals.h34
-rw-r--r--kernel/locking/percpu-rwsem.c2
-rw-r--r--kernel/locking/qspinlock.c8
-rw-r--r--kernel/locking/qspinlock_paravirt.h19
-rw-r--r--kernel/locking/qspinlock_stat.h242
-rw-r--r--kernel/locking/rwsem-spinlock.c339
-rw-r--r--kernel/locking/rwsem-xadd.c204
-rw-r--r--kernel/locking/rwsem.c25
-rw-r--r--kernel/locking/rwsem.h174
62 files changed, 983 insertions, 1925 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 52cd9341e03c..7be412e1a380 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9100,7 +9100,6 @@ F: arch/*/include/asm/spinlock*.h
9100F: include/linux/rwlock*.h 9100F: include/linux/rwlock*.h
9101F: include/linux/mutex*.h 9101F: include/linux/mutex*.h
9102F: include/linux/rwsem*.h 9102F: include/linux/rwsem*.h
9103F: arch/*/include/asm/rwsem.h
9104F: include/linux/seqlock.h 9103F: include/linux/seqlock.h
9105F: lib/locking*.[ch] 9104F: lib/locking*.[ch]
9106F: kernel/locking/ 9105F: kernel/locking/
diff --git a/arch/Kconfig b/arch/Kconfig
index a826843470ed..3ab446bd12ef 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -907,6 +907,15 @@ config HAVE_ARCH_PREL32_RELOCATIONS
907config ARCH_USE_MEMREMAP_PROT 907config ARCH_USE_MEMREMAP_PROT
908 bool 908 bool
909 909
910config LOCK_EVENT_COUNTS
911 bool "Locking event counts collection"
912 depends on DEBUG_FS
913 ---help---
914 Enable light-weight counting of various locking related events
915 in the system with minimal performance impact. This reduces
916 the chance of application behavior change because of timing
917 differences. The counts are reported via debugfs.
918
910source "kernel/gcov/Kconfig" 919source "kernel/gcov/Kconfig"
911 920
912source "scripts/gcc-plugins/Kconfig" 921source "scripts/gcc-plugins/Kconfig"
diff --git a/arch/alpha/Kconfig b/arch/alpha/Kconfig
index c7c976eb6407..f7b19b813a70 100644
--- a/arch/alpha/Kconfig
+++ b/arch/alpha/Kconfig
@@ -50,13 +50,6 @@ config MMU
50 bool 50 bool
51 default y 51 default y
52 52
53config RWSEM_GENERIC_SPINLOCK
54 bool
55
56config RWSEM_XCHGADD_ALGORITHM
57 bool
58 default y
59
60config ARCH_HAS_ILOG2_U32 53config ARCH_HAS_ILOG2_U32
61 bool 54 bool
62 default n 55 default n
diff --git a/arch/alpha/include/asm/rwsem.h b/arch/alpha/include/asm/rwsem.h
deleted file mode 100644
index cf8fc8f9a2ed..000000000000
--- a/arch/alpha/include/asm/rwsem.h
+++ /dev/null
@@ -1,211 +0,0 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2#ifndef _ALPHA_RWSEM_H
3#define _ALPHA_RWSEM_H
4
5/*
6 * Written by Ivan Kokshaysky <ink@jurassic.park.msu.ru>, 2001.
7 * Based on asm-alpha/semaphore.h and asm-i386/rwsem.h
8 */
9
10#ifndef _LINUX_RWSEM_H
11#error "please don't include asm/rwsem.h directly, use linux/rwsem.h instead"
12#endif
13
14#ifdef __KERNEL__
15
16#include <linux/compiler.h>
17
18#define RWSEM_UNLOCKED_VALUE 0x0000000000000000L
19#define RWSEM_ACTIVE_BIAS 0x0000000000000001L
20#define RWSEM_ACTIVE_MASK 0x00000000ffffffffL
21#define RWSEM_WAITING_BIAS (-0x0000000100000000L)
22#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
23#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
24
25static inline int ___down_read(struct rw_semaphore *sem)
26{
27 long oldcount;
28#ifndef CONFIG_SMP
29 oldcount = sem->count.counter;
30 sem->count.counter += RWSEM_ACTIVE_READ_BIAS;
31#else
32 long temp;
33 __asm__ __volatile__(
34 "1: ldq_l %0,%1\n"
35 " addq %0,%3,%2\n"
36 " stq_c %2,%1\n"
37 " beq %2,2f\n"
38 " mb\n"
39 ".subsection 2\n"
40 "2: br 1b\n"
41 ".previous"
42 :"=&r" (oldcount), "=m" (sem->count), "=&r" (temp)
43 :"Ir" (RWSEM_ACTIVE_READ_BIAS), "m" (sem->count) : "memory");
44#endif
45 return (oldcount < 0);
46}
47
48static inline void __down_read(struct rw_semaphore *sem)
49{
50 if (unlikely(___down_read(sem)))
51 rwsem_down_read_failed(sem);
52}
53
54static inline int __down_read_killable(struct rw_semaphore *sem)
55{
56 if (unlikely(___down_read(sem)))
57 if (IS_ERR(rwsem_down_read_failed_killable(sem)))
58 return -EINTR;
59
60 return 0;
61}
62
63/*
64 * trylock for reading -- returns 1 if successful, 0 if contention
65 */
66static inline int __down_read_trylock(struct rw_semaphore *sem)
67{
68 long old, new, res;
69
70 res = atomic_long_read(&sem->count);
71 do {
72 new = res + RWSEM_ACTIVE_READ_BIAS;
73 if (new <= 0)
74 break;
75 old = res;
76 res = atomic_long_cmpxchg(&sem->count, old, new);
77 } while (res != old);
78 return res >= 0 ? 1 : 0;
79}
80
81static inline long ___down_write(struct rw_semaphore *sem)
82{
83 long oldcount;
84#ifndef CONFIG_SMP
85 oldcount = sem->count.counter;
86 sem->count.counter += RWSEM_ACTIVE_WRITE_BIAS;
87#else
88 long temp;
89 __asm__ __volatile__(
90 "1: ldq_l %0,%1\n"
91 " addq %0,%3,%2\n"
92 " stq_c %2,%1\n"
93 " beq %2,2f\n"
94 " mb\n"
95 ".subsection 2\n"
96 "2: br 1b\n"
97 ".previous"
98 :"=&r" (oldcount), "=m" (sem->count), "=&r" (temp)
99 :"Ir" (RWSEM_ACTIVE_WRITE_BIAS), "m" (sem->count) : "memory");
100#endif
101 return oldcount;
102}
103
104static inline void __down_write(struct rw_semaphore *sem)
105{
106 if (unlikely(___down_write(sem)))
107 rwsem_down_write_failed(sem);
108}
109
110static inline int __down_write_killable(struct rw_semaphore *sem)
111{
112 if (unlikely(___down_write(sem))) {
113 if (IS_ERR(rwsem_down_write_failed_killable(sem)))
114 return -EINTR;
115 }
116
117 return 0;
118}
119
120/*
121 * trylock for writing -- returns 1 if successful, 0 if contention
122 */
123static inline int __down_write_trylock(struct rw_semaphore *sem)
124{
125 long ret = atomic_long_cmpxchg(&sem->count, RWSEM_UNLOCKED_VALUE,
126 RWSEM_ACTIVE_WRITE_BIAS);
127 if (ret == RWSEM_UNLOCKED_VALUE)
128 return 1;
129 return 0;
130}
131
132static inline void __up_read(struct rw_semaphore *sem)
133{
134 long oldcount;
135#ifndef CONFIG_SMP
136 oldcount = sem->count.counter;
137 sem->count.counter -= RWSEM_ACTIVE_READ_BIAS;
138#else
139 long temp;
140 __asm__ __volatile__(
141 " mb\n"
142 "1: ldq_l %0,%1\n"
143 " subq %0,%3,%2\n"
144 " stq_c %2,%1\n"
145 " beq %2,2f\n"
146 ".subsection 2\n"
147 "2: br 1b\n"
148 ".previous"
149 :"=&r" (oldcount), "=m" (sem->count), "=&r" (temp)
150 :"Ir" (RWSEM_ACTIVE_READ_BIAS), "m" (sem->count) : "memory");
151#endif
152 if (unlikely(oldcount < 0))
153 if ((int)oldcount - RWSEM_ACTIVE_READ_BIAS == 0)
154 rwsem_wake(sem);
155}
156
157static inline void __up_write(struct rw_semaphore *sem)
158{
159 long count;
160#ifndef CONFIG_SMP
161 sem->count.counter -= RWSEM_ACTIVE_WRITE_BIAS;
162 count = sem->count.counter;
163#else
164 long temp;
165 __asm__ __volatile__(
166 " mb\n"
167 "1: ldq_l %0,%1\n"
168 " subq %0,%3,%2\n"
169 " stq_c %2,%1\n"
170 " beq %2,2f\n"
171 " subq %0,%3,%0\n"
172 ".subsection 2\n"
173 "2: br 1b\n"
174 ".previous"
175 :"=&r" (count), "=m" (sem->count), "=&r" (temp)
176 :"Ir" (RWSEM_ACTIVE_WRITE_BIAS), "m" (sem->count) : "memory");
177#endif
178 if (unlikely(count))
179 if ((int)count == 0)
180 rwsem_wake(sem);
181}
182
183/*
184 * downgrade write lock to read lock
185 */
186static inline void __downgrade_write(struct rw_semaphore *sem)
187{
188 long oldcount;
189#ifndef CONFIG_SMP
190 oldcount = sem->count.counter;
191 sem->count.counter -= RWSEM_WAITING_BIAS;
192#else
193 long temp;
194 __asm__ __volatile__(
195 "1: ldq_l %0,%1\n"
196 " addq %0,%3,%2\n"
197 " stq_c %2,%1\n"
198 " beq %2,2f\n"
199 " mb\n"
200 ".subsection 2\n"
201 "2: br 1b\n"
202 ".previous"
203 :"=&r" (oldcount), "=m" (sem->count), "=&r" (temp)
204 :"Ir" (-RWSEM_WAITING_BIAS), "m" (sem->count) : "memory");
205#endif
206 if (unlikely(oldcount < 0))
207 rwsem_downgrade_wake(sem);
208}
209
210#endif /* __KERNEL__ */
211#endif /* _ALPHA_RWSEM_H */
diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig
index c781e45d1d99..23e063df5d2c 100644
--- a/arch/arc/Kconfig
+++ b/arch/arc/Kconfig
@@ -63,9 +63,6 @@ config SCHED_OMIT_FRAME_POINTER
63config GENERIC_CSUM 63config GENERIC_CSUM
64 def_bool y 64 def_bool y
65 65
66config RWSEM_GENERIC_SPINLOCK
67 def_bool y
68
69config ARCH_DISCONTIGMEM_ENABLE 66config ARCH_DISCONTIGMEM_ENABLE
70 def_bool n 67 def_bool n
71 68
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 9aed25a6019b..dc9855c4a3b4 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -178,10 +178,6 @@ config TRACE_IRQFLAGS_SUPPORT
178 bool 178 bool
179 default !CPU_V7M 179 default !CPU_V7M
180 180
181config RWSEM_XCHGADD_ALGORITHM
182 bool
183 default y
184
185config ARCH_HAS_ILOG2_U32 181config ARCH_HAS_ILOG2_U32
186 bool 182 bool
187 183
diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild
index a8a4eb7f6dae..8fb51b7bf1d5 100644
--- a/arch/arm/include/asm/Kbuild
+++ b/arch/arm/include/asm/Kbuild
@@ -12,7 +12,6 @@ generic-y += mm-arch-hooks.h
12generic-y += msi.h 12generic-y += msi.h
13generic-y += parport.h 13generic-y += parport.h
14generic-y += preempt.h 14generic-y += preempt.h
15generic-y += rwsem.h
16generic-y += seccomp.h 15generic-y += seccomp.h
17generic-y += segment.h 16generic-y += segment.h
18generic-y += serial.h 17generic-y += serial.h
diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 78d9fafac983..d81adca1b04d 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -236,9 +236,6 @@ config LOCKDEP_SUPPORT
236config TRACE_IRQFLAGS_SUPPORT 236config TRACE_IRQFLAGS_SUPPORT
237 def_bool y 237 def_bool y
238 238
239config RWSEM_XCHGADD_ALGORITHM
240 def_bool y
241
242config GENERIC_BUG 239config GENERIC_BUG
243 def_bool y 240 def_bool y
244 depends on BUG 241 depends on BUG
diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild
index 1e17ea5c372b..60a933b07001 100644
--- a/arch/arm64/include/asm/Kbuild
+++ b/arch/arm64/include/asm/Kbuild
@@ -16,7 +16,6 @@ generic-y += mm-arch-hooks.h
16generic-y += msi.h 16generic-y += msi.h
17generic-y += qrwlock.h 17generic-y += qrwlock.h
18generic-y += qspinlock.h 18generic-y += qspinlock.h
19generic-y += rwsem.h
20generic-y += segment.h 19generic-y += segment.h
21generic-y += serial.h 20generic-y += serial.h
22generic-y += set_memory.h 21generic-y += set_memory.h
diff --git a/arch/c6x/Kconfig b/arch/c6x/Kconfig
index 3bb75e674161..eeb0471268a0 100644
--- a/arch/c6x/Kconfig
+++ b/arch/c6x/Kconfig
@@ -28,9 +28,6 @@ config MMU
28config FPU 28config FPU
29 def_bool n 29 def_bool n
30 30
31config RWSEM_GENERIC_SPINLOCK
32 def_bool y
33
34config GENERIC_CALIBRATE_DELAY 31config GENERIC_CALIBRATE_DELAY
35 def_bool y 32 def_bool y
36 33
diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig
index 725a115759c9..6555d1781132 100644
--- a/arch/csky/Kconfig
+++ b/arch/csky/Kconfig
@@ -92,9 +92,6 @@ config GENERIC_HWEIGHT
92config MMU 92config MMU
93 def_bool y 93 def_bool y
94 94
95config RWSEM_GENERIC_SPINLOCK
96 def_bool y
97
98config STACKTRACE_SUPPORT 95config STACKTRACE_SUPPORT
99 def_bool y 96 def_bool y
100 97
diff --git a/arch/h8300/Kconfig b/arch/h8300/Kconfig
index c071da34e081..61c01db6c292 100644
--- a/arch/h8300/Kconfig
+++ b/arch/h8300/Kconfig
@@ -27,9 +27,6 @@ config H8300
27config CPU_BIG_ENDIAN 27config CPU_BIG_ENDIAN
28 def_bool y 28 def_bool y
29 29
30config RWSEM_GENERIC_SPINLOCK
31 def_bool y
32
33config GENERIC_HWEIGHT 30config GENERIC_HWEIGHT
34 def_bool y 31 def_bool y
35 32
diff --git a/arch/hexagon/Kconfig b/arch/hexagon/Kconfig
index ac441680dcc0..3e54a53208d5 100644
--- a/arch/hexagon/Kconfig
+++ b/arch/hexagon/Kconfig
@@ -65,12 +65,6 @@ config GENERIC_CSUM
65config GENERIC_IRQ_PROBE 65config GENERIC_IRQ_PROBE
66 def_bool y 66 def_bool y
67 67
68config RWSEM_GENERIC_SPINLOCK
69 def_bool n
70
71config RWSEM_XCHGADD_ALGORITHM
72 def_bool y
73
74config GENERIC_HWEIGHT 68config GENERIC_HWEIGHT
75 def_bool y 69 def_bool y
76 70
diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild
index d046e8ccdf78..3ff5f297acda 100644
--- a/arch/hexagon/include/asm/Kbuild
+++ b/arch/hexagon/include/asm/Kbuild
@@ -27,7 +27,6 @@ generic-y += mm-arch-hooks.h
27generic-y += pci.h 27generic-y += pci.h
28generic-y += percpu.h 28generic-y += percpu.h
29generic-y += preempt.h 29generic-y += preempt.h
30generic-y += rwsem.h
31generic-y += sections.h 30generic-y += sections.h
32generic-y += segment.h 31generic-y += segment.h
33generic-y += serial.h 32generic-y += serial.h
diff --git a/arch/ia64/Kconfig b/arch/ia64/Kconfig
index 8d7396bd1790..73a26f04644e 100644
--- a/arch/ia64/Kconfig
+++ b/arch/ia64/Kconfig
@@ -83,10 +83,6 @@ config STACKTRACE_SUPPORT
83config GENERIC_LOCKBREAK 83config GENERIC_LOCKBREAK
84 def_bool n 84 def_bool n
85 85
86config RWSEM_XCHGADD_ALGORITHM
87 bool
88 default y
89
90config HUGETLB_PAGE_SIZE_VARIABLE 86config HUGETLB_PAGE_SIZE_VARIABLE
91 bool 87 bool
92 depends on HUGETLB_PAGE 88 depends on HUGETLB_PAGE
diff --git a/arch/ia64/include/asm/rwsem.h b/arch/ia64/include/asm/rwsem.h
deleted file mode 100644
index 917910607e0e..000000000000
--- a/arch/ia64/include/asm/rwsem.h
+++ /dev/null
@@ -1,172 +0,0 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2/*
3 * R/W semaphores for ia64
4 *
5 * Copyright (C) 2003 Ken Chen <kenneth.w.chen@intel.com>
6 * Copyright (C) 2003 Asit Mallick <asit.k.mallick@intel.com>
7 * Copyright (C) 2005 Christoph Lameter <cl@linux.com>
8 *
9 * Based on asm-i386/rwsem.h and other architecture implementation.
10 *
11 * The MSW of the count is the negated number of active writers and
12 * waiting lockers, and the LSW is the total number of active locks.
13 *
14 * The lock count is initialized to 0 (no active and no waiting lockers).
15 *
16 * When a writer subtracts WRITE_BIAS, it'll get 0xffffffff00000001 for
17 * the case of an uncontended lock. Readers increment by 1 and see a positive
18 * value when uncontended, negative if there are writers (and maybe) readers
19 * waiting (in which case it goes to sleep).
20 */
21
22#ifndef _ASM_IA64_RWSEM_H
23#define _ASM_IA64_RWSEM_H
24
25#ifndef _LINUX_RWSEM_H
26#error "Please don't include <asm/rwsem.h> directly, use <linux/rwsem.h> instead."
27#endif
28
29#include <asm/intrinsics.h>
30
31#define RWSEM_UNLOCKED_VALUE __IA64_UL_CONST(0x0000000000000000)
32#define RWSEM_ACTIVE_BIAS (1L)
33#define RWSEM_ACTIVE_MASK (0xffffffffL)
34#define RWSEM_WAITING_BIAS (-0x100000000L)
35#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
36#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
37
38/*
39 * lock for reading
40 */
41static inline int
42___down_read (struct rw_semaphore *sem)
43{
44 long result = ia64_fetchadd8_acq((unsigned long *)&sem->count.counter, 1);
45
46 return (result < 0);
47}
48
49static inline void
50__down_read (struct rw_semaphore *sem)
51{
52 if (___down_read(sem))
53 rwsem_down_read_failed(sem);
54}
55
56static inline int
57__down_read_killable (struct rw_semaphore *sem)
58{
59 if (___down_read(sem))
60 if (IS_ERR(rwsem_down_read_failed_killable(sem)))
61 return -EINTR;
62
63 return 0;
64}
65
66/*
67 * lock for writing
68 */
69static inline long
70___down_write (struct rw_semaphore *sem)
71{
72 long old, new;
73
74 do {
75 old = atomic_long_read(&sem->count);
76 new = old + RWSEM_ACTIVE_WRITE_BIAS;
77 } while (atomic_long_cmpxchg_acquire(&sem->count, old, new) != old);
78
79 return old;
80}
81
82static inline void
83__down_write (struct rw_semaphore *sem)
84{
85 if (___down_write(sem))
86 rwsem_down_write_failed(sem);
87}
88
89static inline int
90__down_write_killable (struct rw_semaphore *sem)
91{
92 if (___down_write(sem)) {
93 if (IS_ERR(rwsem_down_write_failed_killable(sem)))
94 return -EINTR;
95 }
96
97 return 0;
98}
99
100/*
101 * unlock after reading
102 */
103static inline void
104__up_read (struct rw_semaphore *sem)
105{
106 long result = ia64_fetchadd8_rel((unsigned long *)&sem->count.counter, -1);
107
108 if (result < 0 && (--result & RWSEM_ACTIVE_MASK) == 0)
109 rwsem_wake(sem);
110}
111
112/*
113 * unlock after writing
114 */
115static inline void
116__up_write (struct rw_semaphore *sem)
117{
118 long old, new;
119
120 do {
121 old = atomic_long_read(&sem->count);
122 new = old - RWSEM_ACTIVE_WRITE_BIAS;
123 } while (atomic_long_cmpxchg_release(&sem->count, old, new) != old);
124
125 if (new < 0 && (new & RWSEM_ACTIVE_MASK) == 0)
126 rwsem_wake(sem);
127}
128
129/*
130 * trylock for reading -- returns 1 if successful, 0 if contention
131 */
132static inline int
133__down_read_trylock (struct rw_semaphore *sem)
134{
135 long tmp;
136 while ((tmp = atomic_long_read(&sem->count)) >= 0) {
137 if (tmp == atomic_long_cmpxchg_acquire(&sem->count, tmp, tmp+1)) {
138 return 1;
139 }
140 }
141 return 0;
142}
143
144/*
145 * trylock for writing -- returns 1 if successful, 0 if contention
146 */
147static inline int
148__down_write_trylock (struct rw_semaphore *sem)
149{
150 long tmp = atomic_long_cmpxchg_acquire(&sem->count,
151 RWSEM_UNLOCKED_VALUE, RWSEM_ACTIVE_WRITE_BIAS);
152 return tmp == RWSEM_UNLOCKED_VALUE;
153}
154
155/*
156 * downgrade write lock to read lock
157 */
158static inline void
159__downgrade_write (struct rw_semaphore *sem)
160{
161 long old, new;
162
163 do {
164 old = atomic_long_read(&sem->count);
165 new = old - RWSEM_WAITING_BIAS;
166 } while (atomic_long_cmpxchg_release(&sem->count, old, new) != old);
167
168 if (old < 0)
169 rwsem_downgrade_wake(sem);
170}
171
172#endif /* _ASM_IA64_RWSEM_H */
diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig
index 4e37efbc9296..735b9679fe6f 100644
--- a/arch/m68k/Kconfig
+++ b/arch/m68k/Kconfig
@@ -33,13 +33,6 @@ config M68K
33config CPU_BIG_ENDIAN 33config CPU_BIG_ENDIAN
34 def_bool y 34 def_bool y
35 35
36config RWSEM_GENERIC_SPINLOCK
37 bool
38 default y
39
40config RWSEM_XCHGADD_ALGORITHM
41 bool
42
43config ARCH_HAS_ILOG2_U32 36config ARCH_HAS_ILOG2_U32
44 bool 37 bool
45 38
diff --git a/arch/microblaze/Kconfig b/arch/microblaze/Kconfig
index 321e398ab6b5..adb179f519f9 100644
--- a/arch/microblaze/Kconfig
+++ b/arch/microblaze/Kconfig
@@ -59,15 +59,9 @@ config CPU_LITTLE_ENDIAN
59 59
60endchoice 60endchoice
61 61
62config RWSEM_GENERIC_SPINLOCK
63 def_bool y
64
65config ZONE_DMA 62config ZONE_DMA
66 def_bool y 63 def_bool y
67 64
68config RWSEM_XCHGADD_ALGORITHM
69 bool
70
71config ARCH_HAS_ILOG2_U32 65config ARCH_HAS_ILOG2_U32
72 def_bool n 66 def_bool n
73 67
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index 4a5f5b0ee9a9..b9c48b27162d 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -1037,13 +1037,6 @@ source "arch/mips/paravirt/Kconfig"
1037 1037
1038endmenu 1038endmenu
1039 1039
1040config RWSEM_GENERIC_SPINLOCK
1041 bool
1042 default y
1043
1044config RWSEM_XCHGADD_ALGORITHM
1045 bool
1046
1047config GENERIC_HWEIGHT 1040config GENERIC_HWEIGHT
1048 bool 1041 bool
1049 default y 1042 default y
diff --git a/arch/nds32/Kconfig b/arch/nds32/Kconfig
index addb7f5f5264..55559ca0efe4 100644
--- a/arch/nds32/Kconfig
+++ b/arch/nds32/Kconfig
@@ -60,9 +60,6 @@ config GENERIC_LOCKBREAK
60 def_bool y 60 def_bool y
61 depends on PREEMPT 61 depends on PREEMPT
62 62
63config RWSEM_GENERIC_SPINLOCK
64 def_bool y
65
66config TRACE_IRQFLAGS_SUPPORT 63config TRACE_IRQFLAGS_SUPPORT
67 def_bool y 64 def_bool y
68 65
diff --git a/arch/nios2/Kconfig b/arch/nios2/Kconfig
index 3633f8144367..ea37394ff3ea 100644
--- a/arch/nios2/Kconfig
+++ b/arch/nios2/Kconfig
@@ -41,9 +41,6 @@ config NO_IOPORT_MAP
41config FPU 41config FPU
42 def_bool n 42 def_bool n
43 43
44config RWSEM_GENERIC_SPINLOCK
45 def_bool y
46
47config TRACE_IRQFLAGS_SUPPORT 44config TRACE_IRQFLAGS_SUPPORT
48 def_bool n 45 def_bool n
49 46
diff --git a/arch/openrisc/Kconfig b/arch/openrisc/Kconfig
index c6cf8a49a0ab..7cfb20555b10 100644
--- a/arch/openrisc/Kconfig
+++ b/arch/openrisc/Kconfig
@@ -44,12 +44,6 @@ config CPU_BIG_ENDIAN
44config MMU 44config MMU
45 def_bool y 45 def_bool y
46 46
47config RWSEM_GENERIC_SPINLOCK
48 def_bool y
49
50config RWSEM_XCHGADD_ALGORITHM
51 def_bool n
52
53config GENERIC_HWEIGHT 47config GENERIC_HWEIGHT
54 def_bool y 48 def_bool y
55 49
diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig
index c8e621296092..f1ed8ddfe486 100644
--- a/arch/parisc/Kconfig
+++ b/arch/parisc/Kconfig
@@ -75,12 +75,6 @@ config GENERIC_LOCKBREAK
75 default y 75 default y
76 depends on SMP && PREEMPT 76 depends on SMP && PREEMPT
77 77
78config RWSEM_GENERIC_SPINLOCK
79 def_bool y
80
81config RWSEM_XCHGADD_ALGORITHM
82 bool
83
84config ARCH_HAS_ILOG2_U32 78config ARCH_HAS_ILOG2_U32
85 bool 79 bool
86 default n 80 default n
diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 8e1e2abf17eb..fab0bf4259c7 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -103,13 +103,6 @@ config LOCKDEP_SUPPORT
103 bool 103 bool
104 default y 104 default y
105 105
106config RWSEM_GENERIC_SPINLOCK
107 bool
108
109config RWSEM_XCHGADD_ALGORITHM
110 bool
111 default y
112
113config GENERIC_LOCKBREAK 106config GENERIC_LOCKBREAK
114 bool 107 bool
115 default y 108 default y
diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild
index a0c132bedfae..36bda391e549 100644
--- a/arch/powerpc/include/asm/Kbuild
+++ b/arch/powerpc/include/asm/Kbuild
@@ -8,6 +8,5 @@ generic-y += irq_regs.h
8generic-y += local64.h 8generic-y += local64.h
9generic-y += mcs_spinlock.h 9generic-y += mcs_spinlock.h
10generic-y += preempt.h 10generic-y += preempt.h
11generic-y += rwsem.h
12generic-y += vtime.h 11generic-y += vtime.h
13generic-y += msi.h 12generic-y += msi.h
diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig
index eb56c82d8aa1..0582260fb6c2 100644
--- a/arch/riscv/Kconfig
+++ b/arch/riscv/Kconfig
@@ -69,9 +69,6 @@ config STACKTRACE_SUPPORT
69config TRACE_IRQFLAGS_SUPPORT 69config TRACE_IRQFLAGS_SUPPORT
70 def_bool y 70 def_bool y
71 71
72config RWSEM_GENERIC_SPINLOCK
73 def_bool y
74
75config GENERIC_BUG 72config GENERIC_BUG
76 def_bool y 73 def_bool y
77 depends on BUG 74 depends on BUG
diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
index cf06e313e103..97b555e772d7 100644
--- a/arch/s390/Kconfig
+++ b/arch/s390/Kconfig
@@ -14,12 +14,6 @@ config LOCKDEP_SUPPORT
14config STACKTRACE_SUPPORT 14config STACKTRACE_SUPPORT
15 def_bool y 15 def_bool y
16 16
17config RWSEM_GENERIC_SPINLOCK
18 bool
19
20config RWSEM_XCHGADD_ALGORITHM
21 def_bool y
22
23config ARCH_HAS_ILOG2_U32 17config ARCH_HAS_ILOG2_U32
24 def_bool n 18 def_bool n
25 19
diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild
index 12d77cb11fe5..d5fadefea33c 100644
--- a/arch/s390/include/asm/Kbuild
+++ b/arch/s390/include/asm/Kbuild
@@ -20,7 +20,6 @@ generic-y += local.h
20generic-y += local64.h 20generic-y += local64.h
21generic-y += mcs_spinlock.h 21generic-y += mcs_spinlock.h
22generic-y += mm-arch-hooks.h 22generic-y += mm-arch-hooks.h
23generic-y += rwsem.h
24generic-y += trace_clock.h 23generic-y += trace_clock.h
25generic-y += unaligned.h 24generic-y += unaligned.h
26generic-y += word-at-a-time.h 25generic-y += word-at-a-time.h
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index b1c91ea9a958..0be08d586d40 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -90,12 +90,6 @@ config ARCH_DEFCONFIG
90 default "arch/sh/configs/shx3_defconfig" if SUPERH32 90 default "arch/sh/configs/shx3_defconfig" if SUPERH32
91 default "arch/sh/configs/cayman_defconfig" if SUPERH64 91 default "arch/sh/configs/cayman_defconfig" if SUPERH64
92 92
93config RWSEM_GENERIC_SPINLOCK
94 def_bool y
95
96config RWSEM_XCHGADD_ALGORITHM
97 bool
98
99config GENERIC_BUG 93config GENERIC_BUG
100 def_bool y 94 def_bool y
101 depends on BUG && SUPERH32 95 depends on BUG && SUPERH32
diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild
index 7bf2cb680d32..73fff39a0122 100644
--- a/arch/sh/include/asm/Kbuild
+++ b/arch/sh/include/asm/Kbuild
@@ -17,7 +17,6 @@ generic-y += mm-arch-hooks.h
17generic-y += parport.h 17generic-y += parport.h
18generic-y += percpu.h 18generic-y += percpu.h
19generic-y += preempt.h 19generic-y += preempt.h
20generic-y += rwsem.h
21generic-y += serial.h 20generic-y += serial.h
22generic-y += sizes.h 21generic-y += sizes.h
23generic-y += trace_clock.h 22generic-y += trace_clock.h
diff --git a/arch/sparc/Kconfig b/arch/sparc/Kconfig
index db79290ed6d5..f6421c9ce5d3 100644
--- a/arch/sparc/Kconfig
+++ b/arch/sparc/Kconfig
@@ -192,14 +192,6 @@ config NR_CPUS
192 192
193source "kernel/Kconfig.hz" 193source "kernel/Kconfig.hz"
194 194
195config RWSEM_GENERIC_SPINLOCK
196 bool
197 default y if SPARC32
198
199config RWSEM_XCHGADD_ALGORITHM
200 bool
201 default y if SPARC64
202
203config GENERIC_HWEIGHT 195config GENERIC_HWEIGHT
204 bool 196 bool
205 default y 197 default y
diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild
index a22cfd5c0ee8..2ca3200d3616 100644
--- a/arch/sparc/include/asm/Kbuild
+++ b/arch/sparc/include/asm/Kbuild
@@ -18,7 +18,6 @@ generic-y += mm-arch-hooks.h
18generic-y += module.h 18generic-y += module.h
19generic-y += msi.h 19generic-y += msi.h
20generic-y += preempt.h 20generic-y += preempt.h
21generic-y += rwsem.h
22generic-y += serial.h 21generic-y += serial.h
23generic-y += trace_clock.h 22generic-y += trace_clock.h
24generic-y += word-at-a-time.h 23generic-y += word-at-a-time.h
diff --git a/arch/unicore32/Kconfig b/arch/unicore32/Kconfig
index d83c8f70900d..2445dfcf6444 100644
--- a/arch/unicore32/Kconfig
+++ b/arch/unicore32/Kconfig
@@ -39,12 +39,6 @@ config STACKTRACE_SUPPORT
39config LOCKDEP_SUPPORT 39config LOCKDEP_SUPPORT
40 def_bool y 40 def_bool y
41 41
42config RWSEM_GENERIC_SPINLOCK
43 def_bool y
44
45config RWSEM_XCHGADD_ALGORITHM
46 bool
47
48config ARCH_HAS_ILOG2_U32 42config ARCH_HAS_ILOG2_U32
49 bool 43 bool
50 44
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index f1162df4a805..90e2640ade75 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -268,9 +268,6 @@ config ARCH_MAY_HAVE_PC_FDC
268 def_bool y 268 def_bool y
269 depends on ISA_DMA_API 269 depends on ISA_DMA_API
270 270
271config RWSEM_XCHGADD_ALGORITHM
272 def_bool y
273
274config GENERIC_CALIBRATE_DELAY 271config GENERIC_CALIBRATE_DELAY
275 def_bool y 272 def_bool y
276 273
@@ -783,14 +780,6 @@ config PARAVIRT_SPINLOCKS
783 780
784 If you are unsure how to answer this question, answer Y. 781 If you are unsure how to answer this question, answer Y.
785 782
786config QUEUED_LOCK_STAT
787 bool "Paravirt queued spinlock statistics"
788 depends on PARAVIRT_SPINLOCKS && DEBUG_FS
789 ---help---
790 Enable the collection of statistical data on the slowpath
791 behavior of paravirtualized queued spinlocks and report
792 them on debugfs.
793
794source "arch/x86/xen/Kconfig" 783source "arch/x86/xen/Kconfig"
795 784
796config KVM_GUEST 785config KVM_GUEST
diff --git a/arch/x86/include/asm/rwsem.h b/arch/x86/include/asm/rwsem.h
deleted file mode 100644
index 4c25cf6caefa..000000000000
--- a/arch/x86/include/asm/rwsem.h
+++ /dev/null
@@ -1,237 +0,0 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2/* rwsem.h: R/W semaphores implemented using XADD/CMPXCHG for i486+
3 *
4 * Written by David Howells (dhowells@redhat.com).
5 *
6 * Derived from asm-x86/semaphore.h
7 *
8 *
9 * The MSW of the count is the negated number of active writers and waiting
10 * lockers, and the LSW is the total number of active locks
11 *
12 * The lock count is initialized to 0 (no active and no waiting lockers).
13 *
14 * When a writer subtracts WRITE_BIAS, it'll get 0xffff0001 for the case of an
15 * uncontended lock. This can be determined because XADD returns the old value.
16 * Readers increment by 1 and see a positive value when uncontended, negative
17 * if there are writers (and maybe) readers waiting (in which case it goes to
18 * sleep).
19 *
20 * The value of WAITING_BIAS supports up to 32766 waiting processes. This can
21 * be extended to 65534 by manually checking the whole MSW rather than relying
22 * on the S flag.
23 *
24 * The value of ACTIVE_BIAS supports up to 65535 active processes.
25 *
26 * This should be totally fair - if anything is waiting, a process that wants a
27 * lock will go to the back of the queue. When the currently active lock is
28 * released, if there's a writer at the front of the queue, then that and only
29 * that will be woken up; if there's a bunch of consecutive readers at the
30 * front, then they'll all be woken up, but no other readers will be.
31 */
32
33#ifndef _ASM_X86_RWSEM_H
34#define _ASM_X86_RWSEM_H
35
36#ifndef _LINUX_RWSEM_H
37#error "please don't include asm/rwsem.h directly, use linux/rwsem.h instead"
38#endif
39
40#ifdef __KERNEL__
41#include <asm/asm.h>
42
43/*
44 * The bias values and the counter type limits the number of
45 * potential readers/writers to 32767 for 32 bits and 2147483647
46 * for 64 bits.
47 */
48
49#ifdef CONFIG_X86_64
50# define RWSEM_ACTIVE_MASK 0xffffffffL
51#else
52# define RWSEM_ACTIVE_MASK 0x0000ffffL
53#endif
54
55#define RWSEM_UNLOCKED_VALUE 0x00000000L
56#define RWSEM_ACTIVE_BIAS 0x00000001L
57#define RWSEM_WAITING_BIAS (-RWSEM_ACTIVE_MASK-1)
58#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
59#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
60
61/*
62 * lock for reading
63 */
64#define ____down_read(sem, slow_path) \
65({ \
66 struct rw_semaphore* ret; \
67 asm volatile("# beginning down_read\n\t" \
68 LOCK_PREFIX _ASM_INC "(%[sem])\n\t" \
69 /* adds 0x00000001 */ \
70 " jns 1f\n" \
71 " call " slow_path "\n" \
72 "1:\n\t" \
73 "# ending down_read\n\t" \
74 : "+m" (sem->count), "=a" (ret), \
75 ASM_CALL_CONSTRAINT \
76 : [sem] "a" (sem) \
77 : "memory", "cc"); \
78 ret; \
79})
80
81static inline void __down_read(struct rw_semaphore *sem)
82{
83 ____down_read(sem, "call_rwsem_down_read_failed");
84}
85
86static inline int __down_read_killable(struct rw_semaphore *sem)
87{
88 if (IS_ERR(____down_read(sem, "call_rwsem_down_read_failed_killable")))
89 return -EINTR;
90 return 0;
91}
92
93/*
94 * trylock for reading -- returns 1 if successful, 0 if contention
95 */
96static inline bool __down_read_trylock(struct rw_semaphore *sem)
97{
98 long result, tmp;
99 asm volatile("# beginning __down_read_trylock\n\t"
100 " mov %[count],%[result]\n\t"
101 "1:\n\t"
102 " mov %[result],%[tmp]\n\t"
103 " add %[inc],%[tmp]\n\t"
104 " jle 2f\n\t"
105 LOCK_PREFIX " cmpxchg %[tmp],%[count]\n\t"
106 " jnz 1b\n\t"
107 "2:\n\t"
108 "# ending __down_read_trylock\n\t"
109 : [count] "+m" (sem->count), [result] "=&a" (result),
110 [tmp] "=&r" (tmp)
111 : [inc] "i" (RWSEM_ACTIVE_READ_BIAS)
112 : "memory", "cc");
113 return result >= 0;
114}
115
116/*
117 * lock for writing
118 */
119#define ____down_write(sem, slow_path) \
120({ \
121 long tmp; \
122 struct rw_semaphore* ret; \
123 \
124 asm volatile("# beginning down_write\n\t" \
125 LOCK_PREFIX " xadd %[tmp],(%[sem])\n\t" \
126 /* adds 0xffff0001, returns the old value */ \
127 " test " __ASM_SEL(%w1,%k1) "," __ASM_SEL(%w1,%k1) "\n\t" \
128 /* was the active mask 0 before? */\
129 " jz 1f\n" \
130 " call " slow_path "\n" \
131 "1:\n" \
132 "# ending down_write" \
133 : "+m" (sem->count), [tmp] "=d" (tmp), \
134 "=a" (ret), ASM_CALL_CONSTRAINT \
135 : [sem] "a" (sem), "[tmp]" (RWSEM_ACTIVE_WRITE_BIAS) \
136 : "memory", "cc"); \
137 ret; \
138})
139
140static inline void __down_write(struct rw_semaphore *sem)
141{
142 ____down_write(sem, "call_rwsem_down_write_failed");
143}
144
145static inline int __down_write_killable(struct rw_semaphore *sem)
146{
147 if (IS_ERR(____down_write(sem, "call_rwsem_down_write_failed_killable")))
148 return -EINTR;
149
150 return 0;
151}
152
153/*
154 * trylock for writing -- returns 1 if successful, 0 if contention
155 */
156static inline bool __down_write_trylock(struct rw_semaphore *sem)
157{
158 bool result;
159 long tmp0, tmp1;
160 asm volatile("# beginning __down_write_trylock\n\t"
161 " mov %[count],%[tmp0]\n\t"
162 "1:\n\t"
163 " test " __ASM_SEL(%w1,%k1) "," __ASM_SEL(%w1,%k1) "\n\t"
164 /* was the active mask 0 before? */
165 " jnz 2f\n\t"
166 " mov %[tmp0],%[tmp1]\n\t"
167 " add %[inc],%[tmp1]\n\t"
168 LOCK_PREFIX " cmpxchg %[tmp1],%[count]\n\t"
169 " jnz 1b\n\t"
170 "2:\n\t"
171 CC_SET(e)
172 "# ending __down_write_trylock\n\t"
173 : [count] "+m" (sem->count), [tmp0] "=&a" (tmp0),
174 [tmp1] "=&r" (tmp1), CC_OUT(e) (result)
175 : [inc] "er" (RWSEM_ACTIVE_WRITE_BIAS)
176 : "memory");
177 return result;
178}
179
180/*
181 * unlock after reading
182 */
183static inline void __up_read(struct rw_semaphore *sem)
184{
185 long tmp;
186 asm volatile("# beginning __up_read\n\t"
187 LOCK_PREFIX " xadd %[tmp],(%[sem])\n\t"
188 /* subtracts 1, returns the old value */
189 " jns 1f\n\t"
190 " call call_rwsem_wake\n" /* expects old value in %edx */
191 "1:\n"
192 "# ending __up_read\n"
193 : "+m" (sem->count), [tmp] "=d" (tmp)
194 : [sem] "a" (sem), "[tmp]" (-RWSEM_ACTIVE_READ_BIAS)
195 : "memory", "cc");
196}
197
198/*
199 * unlock after writing
200 */
201static inline void __up_write(struct rw_semaphore *sem)
202{
203 long tmp;
204 asm volatile("# beginning __up_write\n\t"
205 LOCK_PREFIX " xadd %[tmp],(%[sem])\n\t"
206 /* subtracts 0xffff0001, returns the old value */
207 " jns 1f\n\t"
208 " call call_rwsem_wake\n" /* expects old value in %edx */
209 "1:\n\t"
210 "# ending __up_write\n"
211 : "+m" (sem->count), [tmp] "=d" (tmp)
212 : [sem] "a" (sem), "[tmp]" (-RWSEM_ACTIVE_WRITE_BIAS)
213 : "memory", "cc");
214}
215
216/*
217 * downgrade write lock to read lock
218 */
219static inline void __downgrade_write(struct rw_semaphore *sem)
220{
221 asm volatile("# beginning __downgrade_write\n\t"
222 LOCK_PREFIX _ASM_ADD "%[inc],(%[sem])\n\t"
223 /*
224 * transitions 0xZZZZ0001 -> 0xYYYY0001 (i386)
225 * 0xZZZZZZZZ00000001 -> 0xYYYYYYYY00000001 (x86_64)
226 */
227 " jns 1f\n\t"
228 " call call_rwsem_downgrade_wake\n"
229 "1:\n\t"
230 "# ending __downgrade_write\n"
231 : "+m" (sem->count)
232 : [sem] "a" (sem), [inc] "er" (-RWSEM_WAITING_BIAS)
233 : "memory", "cc");
234}
235
236#endif /* __KERNEL__ */
237#endif /* _ASM_X86_RWSEM_H */
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 3cb3af51ec89..5246db42de45 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -35,7 +35,6 @@ obj-$(CONFIG_SMP) += msr-smp.o cache-smp.o
35lib-y := delay.o misc.o cmdline.o cpu.o 35lib-y := delay.o misc.o cmdline.o cpu.o
36lib-y += usercopy_$(BITS).o usercopy.o getuser.o putuser.o 36lib-y += usercopy_$(BITS).o usercopy.o getuser.o putuser.o
37lib-y += memcpy_$(BITS).o 37lib-y += memcpy_$(BITS).o
38lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
39lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o 38lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o
40lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o 39lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
41lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o 40lib-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
diff --git a/arch/x86/lib/rwsem.S b/arch/x86/lib/rwsem.S
deleted file mode 100644
index dc2ab6ea6768..000000000000
--- a/arch/x86/lib/rwsem.S
+++ /dev/null
@@ -1,156 +0,0 @@
1/*
2 * x86 semaphore implementation.
3 *
4 * (C) Copyright 1999 Linus Torvalds
5 *
6 * Portions Copyright 1999 Red Hat, Inc.
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; either version
11 * 2 of the License, or (at your option) any later version.
12 *
13 * rw semaphores implemented November 1999 by Benjamin LaHaise <bcrl@kvack.org>
14 */
15
16#include <linux/linkage.h>
17#include <asm/alternative-asm.h>
18#include <asm/frame.h>
19
20#define __ASM_HALF_REG(reg) __ASM_SEL(reg, e##reg)
21#define __ASM_HALF_SIZE(inst) __ASM_SEL(inst##w, inst##l)
22
23#ifdef CONFIG_X86_32
24
25/*
26 * The semaphore operations have a special calling sequence that
27 * allow us to do a simpler in-line version of them. These routines
28 * need to convert that sequence back into the C sequence when
29 * there is contention on the semaphore.
30 *
31 * %eax contains the semaphore pointer on entry. Save the C-clobbered
32 * registers (%eax, %edx and %ecx) except %eax which is either a return
33 * value or just gets clobbered. Same is true for %edx so make sure GCC
34 * reloads it after the slow path, by making it hold a temporary, for
35 * example see ____down_write().
36 */
37
38#define save_common_regs \
39 pushl %ecx
40
41#define restore_common_regs \
42 popl %ecx
43
44 /* Avoid uglifying the argument copying x86-64 needs to do. */
45 .macro movq src, dst
46 .endm
47
48#else
49
50/*
51 * x86-64 rwsem wrappers
52 *
53 * This interfaces the inline asm code to the slow-path
54 * C routines. We need to save the call-clobbered regs
55 * that the asm does not mark as clobbered, and move the
56 * argument from %rax to %rdi.
57 *
58 * NOTE! We don't need to save %rax, because the functions
59 * will always return the semaphore pointer in %rax (which
60 * is also the input argument to these helpers)
61 *
62 * The following can clobber %rdx because the asm clobbers it:
63 * call_rwsem_down_write_failed
64 * call_rwsem_wake
65 * but %rdi, %rsi, %rcx, %r8-r11 always need saving.
66 */
67
68#define save_common_regs \
69 pushq %rdi; \
70 pushq %rsi; \
71 pushq %rcx; \
72 pushq %r8; \
73 pushq %r9; \
74 pushq %r10; \
75 pushq %r11
76
77#define restore_common_regs \
78 popq %r11; \
79 popq %r10; \
80 popq %r9; \
81 popq %r8; \
82 popq %rcx; \
83 popq %rsi; \
84 popq %rdi
85
86#endif
87
88/* Fix up special calling conventions */
89ENTRY(call_rwsem_down_read_failed)
90 FRAME_BEGIN
91 save_common_regs
92 __ASM_SIZE(push,) %__ASM_REG(dx)
93 movq %rax,%rdi
94 call rwsem_down_read_failed
95 __ASM_SIZE(pop,) %__ASM_REG(dx)
96 restore_common_regs
97 FRAME_END
98 ret
99ENDPROC(call_rwsem_down_read_failed)
100
101ENTRY(call_rwsem_down_read_failed_killable)
102 FRAME_BEGIN
103 save_common_regs
104 __ASM_SIZE(push,) %__ASM_REG(dx)
105 movq %rax,%rdi
106 call rwsem_down_read_failed_killable
107 __ASM_SIZE(pop,) %__ASM_REG(dx)
108 restore_common_regs
109 FRAME_END
110 ret
111ENDPROC(call_rwsem_down_read_failed_killable)
112
113ENTRY(call_rwsem_down_write_failed)
114 FRAME_BEGIN
115 save_common_regs
116 movq %rax,%rdi
117 call rwsem_down_write_failed
118 restore_common_regs
119 FRAME_END
120 ret
121ENDPROC(call_rwsem_down_write_failed)
122
123ENTRY(call_rwsem_down_write_failed_killable)
124 FRAME_BEGIN
125 save_common_regs
126 movq %rax,%rdi
127 call rwsem_down_write_failed_killable
128 restore_common_regs
129 FRAME_END
130 ret
131ENDPROC(call_rwsem_down_write_failed_killable)
132
133ENTRY(call_rwsem_wake)
134 FRAME_BEGIN
135 /* do nothing if still outstanding active readers */
136 __ASM_HALF_SIZE(dec) %__ASM_HALF_REG(dx)
137 jnz 1f
138 save_common_regs
139 movq %rax,%rdi
140 call rwsem_wake
141 restore_common_regs
1421: FRAME_END
143 ret
144ENDPROC(call_rwsem_wake)
145
146ENTRY(call_rwsem_downgrade_wake)
147 FRAME_BEGIN
148 save_common_regs
149 __ASM_SIZE(push,) %__ASM_REG(dx)
150 movq %rax,%rdi
151 call rwsem_downgrade_wake
152 __ASM_SIZE(pop,) %__ASM_REG(dx)
153 restore_common_regs
154 FRAME_END
155 ret
156ENDPROC(call_rwsem_downgrade_wake)
diff --git a/arch/x86/um/Kconfig b/arch/x86/um/Kconfig
index a9e80e44178c..a8985e1f7432 100644
--- a/arch/x86/um/Kconfig
+++ b/arch/x86/um/Kconfig
@@ -32,12 +32,6 @@ config ARCH_DEFCONFIG
32 default "arch/um/configs/i386_defconfig" if X86_32 32 default "arch/um/configs/i386_defconfig" if X86_32
33 default "arch/um/configs/x86_64_defconfig" if X86_64 33 default "arch/um/configs/x86_64_defconfig" if X86_64
34 34
35config RWSEM_XCHGADD_ALGORITHM
36 def_bool 64BIT
37
38config RWSEM_GENERIC_SPINLOCK
39 def_bool !RWSEM_XCHGADD_ALGORITHM
40
41config 3_LEVEL_PGTABLES 35config 3_LEVEL_PGTABLES
42 bool "Three-level pagetables" if !64BIT 36 bool "Three-level pagetables" if !64BIT
43 default 64BIT 37 default 64BIT
diff --git a/arch/x86/um/Makefile b/arch/x86/um/Makefile
index 2d686ae54681..33c51c064c77 100644
--- a/arch/x86/um/Makefile
+++ b/arch/x86/um/Makefile
@@ -21,14 +21,12 @@ obj-y += checksum_32.o syscalls_32.o
21obj-$(CONFIG_ELF_CORE) += elfcore.o 21obj-$(CONFIG_ELF_CORE) += elfcore.o
22 22
23subarch-y = ../lib/string_32.o ../lib/atomic64_32.o ../lib/atomic64_cx8_32.o 23subarch-y = ../lib/string_32.o ../lib/atomic64_32.o ../lib/atomic64_cx8_32.o
24subarch-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += ../lib/rwsem.o
25 24
26else 25else
27 26
28obj-y += syscalls_64.o vdso/ 27obj-y += syscalls_64.o vdso/
29 28
30subarch-y = ../lib/csum-partial_64.o ../lib/memcpy_64.o ../entry/thunk_64.o \ 29subarch-y = ../lib/csum-partial_64.o ../lib/memcpy_64.o ../entry/thunk_64.o
31 ../lib/rwsem.o
32 30
33endif 31endif
34 32
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index 4b9aafe766c5..35c8d91e6106 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -46,9 +46,6 @@ config XTENSA
46 with reasonable minimum requirements. The Xtensa Linux project has 46 with reasonable minimum requirements. The Xtensa Linux project has
47 a home page at <http://www.linux-xtensa.org/>. 47 a home page at <http://www.linux-xtensa.org/>.
48 48
49config RWSEM_XCHGADD_ALGORITHM
50 def_bool y
51
52config GENERIC_HWEIGHT 49config GENERIC_HWEIGHT
53 def_bool y 50 def_bool y
54 51
diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild
index 3843198e03d4..4148090cafb0 100644
--- a/arch/xtensa/include/asm/Kbuild
+++ b/arch/xtensa/include/asm/Kbuild
@@ -25,7 +25,6 @@ generic-y += percpu.h
25generic-y += preempt.h 25generic-y += preempt.h
26generic-y += qrwlock.h 26generic-y += qrwlock.h
27generic-y += qspinlock.h 27generic-y += qspinlock.h
28generic-y += rwsem.h
29generic-y += sections.h 28generic-y += sections.h
30generic-y += socket.h 29generic-y += socket.h
31generic-y += topology.h 30generic-y += topology.h
diff --git a/include/asm-generic/rwsem.h b/include/asm-generic/rwsem.h
deleted file mode 100644
index 93e67a055a4d..000000000000
--- a/include/asm-generic/rwsem.h
+++ /dev/null
@@ -1,140 +0,0 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2#ifndef _ASM_GENERIC_RWSEM_H
3#define _ASM_GENERIC_RWSEM_H
4
5#ifndef _LINUX_RWSEM_H
6#error "Please don't include <asm/rwsem.h> directly, use <linux/rwsem.h> instead."
7#endif
8
9#ifdef __KERNEL__
10
11/*
12 * R/W semaphores originally for PPC using the stuff in lib/rwsem.c.
13 * Adapted largely from include/asm-i386/rwsem.h
14 * by Paul Mackerras <paulus@samba.org>.
15 */
16
17/*
18 * the semaphore definition
19 */
20#ifdef CONFIG_64BIT
21# define RWSEM_ACTIVE_MASK 0xffffffffL
22#else
23# define RWSEM_ACTIVE_MASK 0x0000ffffL
24#endif
25
26#define RWSEM_UNLOCKED_VALUE 0x00000000L
27#define RWSEM_ACTIVE_BIAS 0x00000001L
28#define RWSEM_WAITING_BIAS (-RWSEM_ACTIVE_MASK-1)
29#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
30#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
31
32/*
33 * lock for reading
34 */
35static inline void __down_read(struct rw_semaphore *sem)
36{
37 if (unlikely(atomic_long_inc_return_acquire(&sem->count) <= 0))
38 rwsem_down_read_failed(sem);
39}
40
41static inline int __down_read_killable(struct rw_semaphore *sem)
42{
43 if (unlikely(atomic_long_inc_return_acquire(&sem->count) <= 0)) {
44 if (IS_ERR(rwsem_down_read_failed_killable(sem)))
45 return -EINTR;
46 }
47
48 return 0;
49}
50
51static inline int __down_read_trylock(struct rw_semaphore *sem)
52{
53 long tmp;
54
55 while ((tmp = atomic_long_read(&sem->count)) >= 0) {
56 if (tmp == atomic_long_cmpxchg_acquire(&sem->count, tmp,
57 tmp + RWSEM_ACTIVE_READ_BIAS)) {
58 return 1;
59 }
60 }
61 return 0;
62}
63
64/*
65 * lock for writing
66 */
67static inline void __down_write(struct rw_semaphore *sem)
68{
69 long tmp;
70
71 tmp = atomic_long_add_return_acquire(RWSEM_ACTIVE_WRITE_BIAS,
72 &sem->count);
73 if (unlikely(tmp != RWSEM_ACTIVE_WRITE_BIAS))
74 rwsem_down_write_failed(sem);
75}
76
77static inline int __down_write_killable(struct rw_semaphore *sem)
78{
79 long tmp;
80
81 tmp = atomic_long_add_return_acquire(RWSEM_ACTIVE_WRITE_BIAS,
82 &sem->count);
83 if (unlikely(tmp != RWSEM_ACTIVE_WRITE_BIAS))
84 if (IS_ERR(rwsem_down_write_failed_killable(sem)))
85 return -EINTR;
86 return 0;
87}
88
89static inline int __down_write_trylock(struct rw_semaphore *sem)
90{
91 long tmp;
92
93 tmp = atomic_long_cmpxchg_acquire(&sem->count, RWSEM_UNLOCKED_VALUE,
94 RWSEM_ACTIVE_WRITE_BIAS);
95 return tmp == RWSEM_UNLOCKED_VALUE;
96}
97
98/*
99 * unlock after reading
100 */
101static inline void __up_read(struct rw_semaphore *sem)
102{
103 long tmp;
104
105 tmp = atomic_long_dec_return_release(&sem->count);
106 if (unlikely(tmp < -1 && (tmp & RWSEM_ACTIVE_MASK) == 0))
107 rwsem_wake(sem);
108}
109
110/*
111 * unlock after writing
112 */
113static inline void __up_write(struct rw_semaphore *sem)
114{
115 if (unlikely(atomic_long_sub_return_release(RWSEM_ACTIVE_WRITE_BIAS,
116 &sem->count) < 0))
117 rwsem_wake(sem);
118}
119
120/*
121 * downgrade write lock to read lock
122 */
123static inline void __downgrade_write(struct rw_semaphore *sem)
124{
125 long tmp;
126
127 /*
128 * When downgrading from exclusive to shared ownership,
129 * anything inside the write-locked region cannot leak
130 * into the read side. In contrast, anything in the
131 * read-locked region is ok to be re-ordered into the
132 * write side. As such, rely on RELEASE semantics.
133 */
134 tmp = atomic_long_add_return_release(-RWSEM_WAITING_BIAS, &sem->count);
135 if (tmp < 0)
136 rwsem_downgrade_wake(sem);
137}
138
139#endif /* __KERNEL__ */
140#endif /* _ASM_GENERIC_RWSEM_H */
diff --git a/include/linux/jump_label_ratelimit.h b/include/linux/jump_label_ratelimit.h
index a49f2b45b3f0..42710d5949ba 100644
--- a/include/linux/jump_label_ratelimit.h
+++ b/include/linux/jump_label_ratelimit.h
@@ -12,21 +12,79 @@ struct static_key_deferred {
12 struct delayed_work work; 12 struct delayed_work work;
13}; 13};
14 14
15extern void static_key_slow_dec_deferred(struct static_key_deferred *key); 15struct static_key_true_deferred {
16extern void static_key_deferred_flush(struct static_key_deferred *key); 16 struct static_key_true key;
17 unsigned long timeout;
18 struct delayed_work work;
19};
20
21struct static_key_false_deferred {
22 struct static_key_false key;
23 unsigned long timeout;
24 struct delayed_work work;
25};
26
27#define static_key_slow_dec_deferred(x) \
28 __static_key_slow_dec_deferred(&(x)->key, &(x)->work, (x)->timeout)
29#define static_branch_slow_dec_deferred(x) \
30 __static_key_slow_dec_deferred(&(x)->key.key, &(x)->work, (x)->timeout)
31
32#define static_key_deferred_flush(x) \
33 __static_key_deferred_flush((x), &(x)->work)
34
35extern void
36__static_key_slow_dec_deferred(struct static_key *key,
37 struct delayed_work *work,
38 unsigned long timeout);
39extern void __static_key_deferred_flush(void *key, struct delayed_work *work);
17extern void 40extern void
18jump_label_rate_limit(struct static_key_deferred *key, unsigned long rl); 41jump_label_rate_limit(struct static_key_deferred *key, unsigned long rl);
19 42
43extern void jump_label_update_timeout(struct work_struct *work);
44
45#define DEFINE_STATIC_KEY_DEFERRED_TRUE(name, rl) \
46 struct static_key_true_deferred name = { \
47 .key = { STATIC_KEY_INIT_TRUE }, \
48 .timeout = (rl), \
49 .work = __DELAYED_WORK_INITIALIZER((name).work, \
50 jump_label_update_timeout, \
51 0), \
52 }
53
54#define DEFINE_STATIC_KEY_DEFERRED_FALSE(name, rl) \
55 struct static_key_false_deferred name = { \
56 .key = { STATIC_KEY_INIT_FALSE }, \
57 .timeout = (rl), \
58 .work = __DELAYED_WORK_INITIALIZER((name).work, \
59 jump_label_update_timeout, \
60 0), \
61 }
62
63#define static_branch_deferred_inc(x) static_branch_inc(&(x)->key)
64
20#else /* !CONFIG_JUMP_LABEL */ 65#else /* !CONFIG_JUMP_LABEL */
21struct static_key_deferred { 66struct static_key_deferred {
22 struct static_key key; 67 struct static_key key;
23}; 68};
69struct static_key_true_deferred {
70 struct static_key_true key;
71};
72struct static_key_false_deferred {
73 struct static_key_false key;
74};
75#define DEFINE_STATIC_KEY_DEFERRED_TRUE(name, rl) \
76 struct static_key_true_deferred name = { STATIC_KEY_TRUE_INIT }
77#define DEFINE_STATIC_KEY_DEFERRED_FALSE(name, rl) \
78 struct static_key_false_deferred name = { STATIC_KEY_FALSE_INIT }
79
80#define static_branch_slow_dec_deferred(x) static_branch_dec(&(x)->key)
81
24static inline void static_key_slow_dec_deferred(struct static_key_deferred *key) 82static inline void static_key_slow_dec_deferred(struct static_key_deferred *key)
25{ 83{
26 STATIC_KEY_CHECK_USE(key); 84 STATIC_KEY_CHECK_USE(key);
27 static_key_slow_dec(&key->key); 85 static_key_slow_dec(&key->key);
28} 86}
29static inline void static_key_deferred_flush(struct static_key_deferred *key) 87static inline void static_key_deferred_flush(void *key)
30{ 88{
31 STATIC_KEY_CHECK_USE(key); 89 STATIC_KEY_CHECK_USE(key);
32} 90}
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 6f165d625320..6e2377e6c1d6 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -476,7 +476,7 @@ struct pin_cookie { };
476 476
477#define NIL_COOKIE (struct pin_cookie){ } 477#define NIL_COOKIE (struct pin_cookie){ }
478 478
479#define lockdep_pin_lock(l) ({ struct pin_cookie cookie; cookie; }) 479#define lockdep_pin_lock(l) ({ struct pin_cookie cookie = { }; cookie; })
480#define lockdep_repin_lock(l, c) do { (void)(l); (void)(c); } while (0) 480#define lockdep_repin_lock(l, c) do { (void)(l); (void)(c); } while (0)
481#define lockdep_unpin_lock(l, c) do { (void)(l); (void)(c); } while (0) 481#define lockdep_unpin_lock(l, c) do { (void)(l); (void)(c); } while (0)
482 482
diff --git a/include/linux/rwsem-spinlock.h b/include/linux/rwsem-spinlock.h
deleted file mode 100644
index e47568363e5e..000000000000
--- a/include/linux/rwsem-spinlock.h
+++ /dev/null
@@ -1,47 +0,0 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2/* rwsem-spinlock.h: fallback C implementation
3 *
4 * Copyright (c) 2001 David Howells (dhowells@redhat.com).
5 * - Derived partially from ideas by Andrea Arcangeli <andrea@suse.de>
6 * - Derived also from comments by Linus
7 */
8
9#ifndef _LINUX_RWSEM_SPINLOCK_H
10#define _LINUX_RWSEM_SPINLOCK_H
11
12#ifndef _LINUX_RWSEM_H
13#error "please don't include linux/rwsem-spinlock.h directly, use linux/rwsem.h instead"
14#endif
15
16#ifdef __KERNEL__
17/*
18 * the rw-semaphore definition
19 * - if count is 0 then there are no active readers or writers
20 * - if count is +ve then that is the number of active readers
21 * - if count is -1 then there is one active writer
22 * - if wait_list is not empty, then there are processes waiting for the semaphore
23 */
24struct rw_semaphore {
25 __s32 count;
26 raw_spinlock_t wait_lock;
27 struct list_head wait_list;
28#ifdef CONFIG_DEBUG_LOCK_ALLOC
29 struct lockdep_map dep_map;
30#endif
31};
32
33#define RWSEM_UNLOCKED_VALUE 0x00000000
34
35extern void __down_read(struct rw_semaphore *sem);
36extern int __must_check __down_read_killable(struct rw_semaphore *sem);
37extern int __down_read_trylock(struct rw_semaphore *sem);
38extern void __down_write(struct rw_semaphore *sem);
39extern int __must_check __down_write_killable(struct rw_semaphore *sem);
40extern int __down_write_trylock(struct rw_semaphore *sem);
41extern void __up_read(struct rw_semaphore *sem);
42extern void __up_write(struct rw_semaphore *sem);
43extern void __downgrade_write(struct rw_semaphore *sem);
44extern int rwsem_is_locked(struct rw_semaphore *sem);
45
46#endif /* __KERNEL__ */
47#endif /* _LINUX_RWSEM_SPINLOCK_H */
diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index 67dbb57508b1..2ea18a3def04 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -20,25 +20,30 @@
20#include <linux/osq_lock.h> 20#include <linux/osq_lock.h>
21#endif 21#endif
22 22
23struct rw_semaphore; 23/*
24 24 * For an uncontended rwsem, count and owner are the only fields a task
25#ifdef CONFIG_RWSEM_GENERIC_SPINLOCK 25 * needs to touch when acquiring the rwsem. So they are put next to each
26#include <linux/rwsem-spinlock.h> /* use a generic implementation */ 26 * other to increase the chance that they will share the same cacheline.
27#define __RWSEM_INIT_COUNT(name) .count = RWSEM_UNLOCKED_VALUE 27 *
28#else 28 * In a contended rwsem, the owner is likely the most frequently accessed
29/* All arch specific implementations share the same struct */ 29 * field in the structure as the optimistic waiter that holds the osq lock
30 * will spin on owner. For an embedded rwsem, other hot fields in the
31 * containing structure should be moved further away from the rwsem to
32 * reduce the chance that they will share the same cacheline causing
33 * cacheline bouncing problem.
34 */
30struct rw_semaphore { 35struct rw_semaphore {
31 atomic_long_t count; 36 atomic_long_t count;
32 struct list_head wait_list;
33 raw_spinlock_t wait_lock;
34#ifdef CONFIG_RWSEM_SPIN_ON_OWNER 37#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
35 struct optimistic_spin_queue osq; /* spinner MCS lock */
36 /* 38 /*
37 * Write owner. Used as a speculative check to see 39 * Write owner. Used as a speculative check to see
38 * if the owner is running on the cpu. 40 * if the owner is running on the cpu.
39 */ 41 */
40 struct task_struct *owner; 42 struct task_struct *owner;
43 struct optimistic_spin_queue osq; /* spinner MCS lock */
41#endif 44#endif
45 raw_spinlock_t wait_lock;
46 struct list_head wait_list;
42#ifdef CONFIG_DEBUG_LOCK_ALLOC 47#ifdef CONFIG_DEBUG_LOCK_ALLOC
43 struct lockdep_map dep_map; 48 struct lockdep_map dep_map;
44#endif 49#endif
@@ -50,24 +55,14 @@ struct rw_semaphore {
50 */ 55 */
51#define RWSEM_OWNER_UNKNOWN ((struct task_struct *)-2L) 56#define RWSEM_OWNER_UNKNOWN ((struct task_struct *)-2L)
52 57
53extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
54extern struct rw_semaphore *rwsem_down_read_failed_killable(struct rw_semaphore *sem);
55extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
56extern struct rw_semaphore *rwsem_down_write_failed_killable(struct rw_semaphore *sem);
57extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *);
58extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
59
60/* Include the arch specific part */
61#include <asm/rwsem.h>
62
63/* In all implementations count != 0 means locked */ 58/* In all implementations count != 0 means locked */
64static inline int rwsem_is_locked(struct rw_semaphore *sem) 59static inline int rwsem_is_locked(struct rw_semaphore *sem)
65{ 60{
66 return atomic_long_read(&sem->count) != 0; 61 return atomic_long_read(&sem->count) != 0;
67} 62}
68 63
64#define RWSEM_UNLOCKED_VALUE 0L
69#define __RWSEM_INIT_COUNT(name) .count = ATOMIC_LONG_INIT(RWSEM_UNLOCKED_VALUE) 65#define __RWSEM_INIT_COUNT(name) .count = ATOMIC_LONG_INIT(RWSEM_UNLOCKED_VALUE)
70#endif
71 66
72/* Common initializer macros and functions */ 67/* Common initializer macros and functions */
73 68
diff --git a/kernel/Kconfig.locks b/kernel/Kconfig.locks
index fbba478ae522..e335953fa704 100644
--- a/kernel/Kconfig.locks
+++ b/kernel/Kconfig.locks
@@ -229,7 +229,7 @@ config MUTEX_SPIN_ON_OWNER
229 229
230config RWSEM_SPIN_ON_OWNER 230config RWSEM_SPIN_ON_OWNER
231 def_bool y 231 def_bool y
232 depends on SMP && RWSEM_XCHGADD_ALGORITHM && ARCH_SUPPORTS_ATOMIC_RMW 232 depends on SMP && ARCH_SUPPORTS_ATOMIC_RMW
233 233
234config LOCK_SPIN_ON_OWNER 234config LOCK_SPIN_ON_OWNER
235 def_bool y 235 def_bool y
diff --git a/kernel/jump_label.c b/kernel/jump_label.c
index bad96b476eb6..de6efdecc70d 100644
--- a/kernel/jump_label.c
+++ b/kernel/jump_label.c
@@ -202,11 +202,13 @@ void static_key_disable(struct static_key *key)
202} 202}
203EXPORT_SYMBOL_GPL(static_key_disable); 203EXPORT_SYMBOL_GPL(static_key_disable);
204 204
205static void __static_key_slow_dec_cpuslocked(struct static_key *key, 205static bool static_key_slow_try_dec(struct static_key *key)
206 unsigned long rate_limit,
207 struct delayed_work *work)
208{ 206{
209 lockdep_assert_cpus_held(); 207 int val;
208
209 val = atomic_fetch_add_unless(&key->enabled, -1, 1);
210 if (val == 1)
211 return false;
210 212
211 /* 213 /*
212 * The negative count check is valid even when a negative 214 * The negative count check is valid even when a negative
@@ -215,63 +217,70 @@ static void __static_key_slow_dec_cpuslocked(struct static_key *key,
215 * returns is unbalanced, because all other static_key_slow_inc() 217 * returns is unbalanced, because all other static_key_slow_inc()
216 * instances block while the update is in progress. 218 * instances block while the update is in progress.
217 */ 219 */
218 if (!atomic_dec_and_mutex_lock(&key->enabled, &jump_label_mutex)) { 220 WARN(val < 0, "jump label: negative count!\n");
219 WARN(atomic_read(&key->enabled) < 0, 221 return true;
220 "jump label: negative count!\n"); 222}
223
224static void __static_key_slow_dec_cpuslocked(struct static_key *key)
225{
226 lockdep_assert_cpus_held();
227
228 if (static_key_slow_try_dec(key))
221 return; 229 return;
222 }
223 230
224 if (rate_limit) { 231 jump_label_lock();
225 atomic_inc(&key->enabled); 232 if (atomic_dec_and_test(&key->enabled))
226 schedule_delayed_work(work, rate_limit);
227 } else {
228 jump_label_update(key); 233 jump_label_update(key);
229 }
230 jump_label_unlock(); 234 jump_label_unlock();
231} 235}
232 236
233static void __static_key_slow_dec(struct static_key *key, 237static void __static_key_slow_dec(struct static_key *key)
234 unsigned long rate_limit,
235 struct delayed_work *work)
236{ 238{
237 cpus_read_lock(); 239 cpus_read_lock();
238 __static_key_slow_dec_cpuslocked(key, rate_limit, work); 240 __static_key_slow_dec_cpuslocked(key);
239 cpus_read_unlock(); 241 cpus_read_unlock();
240} 242}
241 243
242static void jump_label_update_timeout(struct work_struct *work) 244void jump_label_update_timeout(struct work_struct *work)
243{ 245{
244 struct static_key_deferred *key = 246 struct static_key_deferred *key =
245 container_of(work, struct static_key_deferred, work.work); 247 container_of(work, struct static_key_deferred, work.work);
246 __static_key_slow_dec(&key->key, 0, NULL); 248 __static_key_slow_dec(&key->key);
247} 249}
250EXPORT_SYMBOL_GPL(jump_label_update_timeout);
248 251
249void static_key_slow_dec(struct static_key *key) 252void static_key_slow_dec(struct static_key *key)
250{ 253{
251 STATIC_KEY_CHECK_USE(key); 254 STATIC_KEY_CHECK_USE(key);
252 __static_key_slow_dec(key, 0, NULL); 255 __static_key_slow_dec(key);
253} 256}
254EXPORT_SYMBOL_GPL(static_key_slow_dec); 257EXPORT_SYMBOL_GPL(static_key_slow_dec);
255 258
256void static_key_slow_dec_cpuslocked(struct static_key *key) 259void static_key_slow_dec_cpuslocked(struct static_key *key)
257{ 260{
258 STATIC_KEY_CHECK_USE(key); 261 STATIC_KEY_CHECK_USE(key);
259 __static_key_slow_dec_cpuslocked(key, 0, NULL); 262 __static_key_slow_dec_cpuslocked(key);
260} 263}
261 264
262void static_key_slow_dec_deferred(struct static_key_deferred *key) 265void __static_key_slow_dec_deferred(struct static_key *key,
266 struct delayed_work *work,
267 unsigned long timeout)
263{ 268{
264 STATIC_KEY_CHECK_USE(key); 269 STATIC_KEY_CHECK_USE(key);
265 __static_key_slow_dec(&key->key, key->timeout, &key->work); 270
271 if (static_key_slow_try_dec(key))
272 return;
273
274 schedule_delayed_work(work, timeout);
266} 275}
267EXPORT_SYMBOL_GPL(static_key_slow_dec_deferred); 276EXPORT_SYMBOL_GPL(__static_key_slow_dec_deferred);
268 277
269void static_key_deferred_flush(struct static_key_deferred *key) 278void __static_key_deferred_flush(void *key, struct delayed_work *work)
270{ 279{
271 STATIC_KEY_CHECK_USE(key); 280 STATIC_KEY_CHECK_USE(key);
272 flush_delayed_work(&key->work); 281 flush_delayed_work(work);
273} 282}
274EXPORT_SYMBOL_GPL(static_key_deferred_flush); 283EXPORT_SYMBOL_GPL(__static_key_deferred_flush);
275 284
276void jump_label_rate_limit(struct static_key_deferred *key, 285void jump_label_rate_limit(struct static_key_deferred *key,
277 unsigned long rl) 286 unsigned long rl)
diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile
index 392c7f23af76..6fe2f333aecb 100644
--- a/kernel/locking/Makefile
+++ b/kernel/locking/Makefile
@@ -3,7 +3,7 @@
3# and is generally not a function of system call inputs. 3# and is generally not a function of system call inputs.
4KCOV_INSTRUMENT := n 4KCOV_INSTRUMENT := n
5 5
6obj-y += mutex.o semaphore.o rwsem.o percpu-rwsem.o 6obj-y += mutex.o semaphore.o rwsem.o percpu-rwsem.o rwsem-xadd.o
7 7
8ifdef CONFIG_FUNCTION_TRACER 8ifdef CONFIG_FUNCTION_TRACER
9CFLAGS_REMOVE_lockdep.o = $(CC_FLAGS_FTRACE) 9CFLAGS_REMOVE_lockdep.o = $(CC_FLAGS_FTRACE)
@@ -25,8 +25,7 @@ obj-$(CONFIG_RT_MUTEXES) += rtmutex.o
25obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o 25obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o
26obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o 26obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o
27obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o 27obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o
28obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
29obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o
30obj-$(CONFIG_QUEUED_RWLOCKS) += qrwlock.o 28obj-$(CONFIG_QUEUED_RWLOCKS) += qrwlock.o
31obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o 29obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o
32obj-$(CONFIG_WW_MUTEX_SELFTEST) += test-ww_mutex.o 30obj-$(CONFIG_WW_MUTEX_SELFTEST) += test-ww_mutex.o
31obj-$(CONFIG_LOCK_EVENT_COUNTS) += lock_events.o
diff --git a/kernel/locking/lock_events.c b/kernel/locking/lock_events.c
new file mode 100644
index 000000000000..fa2c2f951c6b
--- /dev/null
+++ b/kernel/locking/lock_events.c
@@ -0,0 +1,179 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2/*
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License as published by
5 * the Free Software Foundation; either version 2 of the License, or
6 * (at your option) any later version.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * Authors: Waiman Long <waiman.long@hpe.com>
14 */
15
16/*
17 * Collect locking event counts
18 */
19#include <linux/debugfs.h>
20#include <linux/sched.h>
21#include <linux/sched/clock.h>
22#include <linux/fs.h>
23
24#include "lock_events.h"
25
26#undef LOCK_EVENT
27#define LOCK_EVENT(name) [LOCKEVENT_ ## name] = #name,
28
29#define LOCK_EVENTS_DIR "lock_event_counts"
30
31/*
32 * When CONFIG_LOCK_EVENT_COUNTS is enabled, event counts of different
33 * types of locks will be reported under the <debugfs>/lock_event_counts/
34 * directory. See lock_events_list.h for the list of available locking
35 * events.
36 *
37 * Writing to the special ".reset_counts" file will reset all the above
38 * locking event counts. This is a very slow operation and so should not
39 * be done frequently.
40 *
41 * These event counts are implemented as per-cpu variables which are
42 * summed and computed whenever the corresponding debugfs files are read. This
43 * minimizes added overhead making the counts usable even in a production
44 * environment.
45 */
46static const char * const lockevent_names[lockevent_num + 1] = {
47
48#include "lock_events_list.h"
49
50 [LOCKEVENT_reset_cnts] = ".reset_counts",
51};
52
53/*
54 * Per-cpu counts
55 */
56DEFINE_PER_CPU(unsigned long, lockevents[lockevent_num]);
57
58/*
59 * The lockevent_read() function can be overridden.
60 */
61ssize_t __weak lockevent_read(struct file *file, char __user *user_buf,
62 size_t count, loff_t *ppos)
63{
64 char buf[64];
65 int cpu, id, len;
66 u64 sum = 0;
67
68 /*
69 * Get the counter ID stored in file->f_inode->i_private
70 */
71 id = (long)file_inode(file)->i_private;
72
73 if (id >= lockevent_num)
74 return -EBADF;
75
76 for_each_possible_cpu(cpu)
77 sum += per_cpu(lockevents[id], cpu);
78 len = snprintf(buf, sizeof(buf) - 1, "%llu\n", sum);
79
80 return simple_read_from_buffer(user_buf, count, ppos, buf, len);
81}
82
83/*
84 * Function to handle write request
85 *
86 * When idx = reset_cnts, reset all the counts.
87 */
88static ssize_t lockevent_write(struct file *file, const char __user *user_buf,
89 size_t count, loff_t *ppos)
90{
91 int cpu;
92
93 /*
94 * Get the counter ID stored in file->f_inode->i_private
95 */
96 if ((long)file_inode(file)->i_private != LOCKEVENT_reset_cnts)
97 return count;
98
99 for_each_possible_cpu(cpu) {
100 int i;
101 unsigned long *ptr = per_cpu_ptr(lockevents, cpu);
102
103 for (i = 0 ; i < lockevent_num; i++)
104 WRITE_ONCE(ptr[i], 0);
105 }
106 return count;
107}
108
109/*
110 * Debugfs data structures
111 */
112static const struct file_operations fops_lockevent = {
113 .read = lockevent_read,
114 .write = lockevent_write,
115 .llseek = default_llseek,
116};
117
118#ifdef CONFIG_PARAVIRT_SPINLOCKS
119#include <asm/paravirt.h>
120
121static bool __init skip_lockevent(const char *name)
122{
123 static int pv_on __initdata = -1;
124
125 if (pv_on < 0)
126 pv_on = !pv_is_native_spin_unlock();
127 /*
128 * Skip PV qspinlock events on bare metal.
129 */
130 if (!pv_on && !memcmp(name, "pv_", 3))
131 return true;
132 return false;
133}
134#else
135static inline bool skip_lockevent(const char *name)
136{
137 return false;
138}
139#endif
140
141/*
142 * Initialize debugfs for the locking event counts.
143 */
144static int __init init_lockevent_counts(void)
145{
146 struct dentry *d_counts = debugfs_create_dir(LOCK_EVENTS_DIR, NULL);
147 int i;
148
149 if (!d_counts)
150 goto out;
151
152 /*
153 * Create the debugfs files
154 *
155 * As reading from and writing to the stat files can be slow, only
156 * root is allowed to do the read/write to limit impact to system
157 * performance.
158 */
159 for (i = 0; i < lockevent_num; i++) {
160 if (skip_lockevent(lockevent_names[i]))
161 continue;
162 if (!debugfs_create_file(lockevent_names[i], 0400, d_counts,
163 (void *)(long)i, &fops_lockevent))
164 goto fail_undo;
165 }
166
167 if (!debugfs_create_file(lockevent_names[LOCKEVENT_reset_cnts], 0200,
168 d_counts, (void *)(long)LOCKEVENT_reset_cnts,
169 &fops_lockevent))
170 goto fail_undo;
171
172 return 0;
173fail_undo:
174 debugfs_remove_recursive(d_counts);
175out:
176 pr_warn("Could not create '%s' debugfs entries\n", LOCK_EVENTS_DIR);
177 return -ENOMEM;
178}
179fs_initcall(init_lockevent_counts);
diff --git a/kernel/locking/lock_events.h b/kernel/locking/lock_events.h
new file mode 100644
index 000000000000..feb1acc54611
--- /dev/null
+++ b/kernel/locking/lock_events.h
@@ -0,0 +1,59 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2/*
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License as published by
5 * the Free Software Foundation; either version 2 of the License, or
6 * (at your option) any later version.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * Authors: Waiman Long <longman@redhat.com>
14 */
15
16#ifndef __LOCKING_LOCK_EVENTS_H
17#define __LOCKING_LOCK_EVENTS_H
18
19enum lock_events {
20
21#include "lock_events_list.h"
22
23 lockevent_num, /* Total number of lock event counts */
24 LOCKEVENT_reset_cnts = lockevent_num,
25};
26
27#ifdef CONFIG_LOCK_EVENT_COUNTS
28/*
29 * Per-cpu counters
30 */
31DECLARE_PER_CPU(unsigned long, lockevents[lockevent_num]);
32
33/*
34 * Increment the PV qspinlock statistical counters
35 */
36static inline void __lockevent_inc(enum lock_events event, bool cond)
37{
38 if (cond)
39 __this_cpu_inc(lockevents[event]);
40}
41
42#define lockevent_inc(ev) __lockevent_inc(LOCKEVENT_ ##ev, true)
43#define lockevent_cond_inc(ev, c) __lockevent_inc(LOCKEVENT_ ##ev, c)
44
45static inline void __lockevent_add(enum lock_events event, int inc)
46{
47 __this_cpu_add(lockevents[event], inc);
48}
49
50#define lockevent_add(ev, c) __lockevent_add(LOCKEVENT_ ##ev, c)
51
52#else /* CONFIG_LOCK_EVENT_COUNTS */
53
54#define lockevent_inc(ev)
55#define lockevent_add(ev, c)
56#define lockevent_cond_inc(ev, c)
57
58#endif /* CONFIG_LOCK_EVENT_COUNTS */
59#endif /* __LOCKING_LOCK_EVENTS_H */
diff --git a/kernel/locking/lock_events_list.h b/kernel/locking/lock_events_list.h
new file mode 100644
index 000000000000..ad7668cfc9da
--- /dev/null
+++ b/kernel/locking/lock_events_list.h
@@ -0,0 +1,67 @@
1/* SPDX-License-Identifier: GPL-2.0 */
2/*
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License as published by
5 * the Free Software Foundation; either version 2 of the License, or
6 * (at your option) any later version.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * Authors: Waiman Long <longman@redhat.com>
14 */
15
16#ifndef LOCK_EVENT
17#define LOCK_EVENT(name) LOCKEVENT_ ## name,
18#endif
19
20#ifdef CONFIG_QUEUED_SPINLOCKS
21#ifdef CONFIG_PARAVIRT_SPINLOCKS
22/*
23 * Locking events for PV qspinlock.
24 */
25LOCK_EVENT(pv_hash_hops) /* Average # of hops per hashing operation */
26LOCK_EVENT(pv_kick_unlock) /* # of vCPU kicks issued at unlock time */
27LOCK_EVENT(pv_kick_wake) /* # of vCPU kicks for pv_latency_wake */
28LOCK_EVENT(pv_latency_kick) /* Average latency (ns) of vCPU kick */
29LOCK_EVENT(pv_latency_wake) /* Average latency (ns) of kick-to-wakeup */
30LOCK_EVENT(pv_lock_stealing) /* # of lock stealing operations */
31LOCK_EVENT(pv_spurious_wakeup) /* # of spurious wakeups in non-head vCPUs */
32LOCK_EVENT(pv_wait_again) /* # of wait's after queue head vCPU kick */
33LOCK_EVENT(pv_wait_early) /* # of early vCPU wait's */
34LOCK_EVENT(pv_wait_head) /* # of vCPU wait's at the queue head */
35LOCK_EVENT(pv_wait_node) /* # of vCPU wait's at non-head queue node */
36#endif /* CONFIG_PARAVIRT_SPINLOCKS */
37
38/*
39 * Locking events for qspinlock
40 *
41 * Subtracting lock_use_node[234] from lock_slowpath will give you
42 * lock_use_node1.
43 */
44LOCK_EVENT(lock_pending) /* # of locking ops via pending code */
45LOCK_EVENT(lock_slowpath) /* # of locking ops via MCS lock queue */
46LOCK_EVENT(lock_use_node2) /* # of locking ops that use 2nd percpu node */
47LOCK_EVENT(lock_use_node3) /* # of locking ops that use 3rd percpu node */
48LOCK_EVENT(lock_use_node4) /* # of locking ops that use 4th percpu node */
49LOCK_EVENT(lock_no_node) /* # of locking ops w/o using percpu node */
50#endif /* CONFIG_QUEUED_SPINLOCKS */
51
52/*
53 * Locking events for rwsem
54 */
55LOCK_EVENT(rwsem_sleep_reader) /* # of reader sleeps */
56LOCK_EVENT(rwsem_sleep_writer) /* # of writer sleeps */
57LOCK_EVENT(rwsem_wake_reader) /* # of reader wakeups */
58LOCK_EVENT(rwsem_wake_writer) /* # of writer wakeups */
59LOCK_EVENT(rwsem_opt_wlock) /* # of write locks opt-spin acquired */
60LOCK_EVENT(rwsem_opt_fail) /* # of failed opt-spinnings */
61LOCK_EVENT(rwsem_rlock) /* # of read locks acquired */
62LOCK_EVENT(rwsem_rlock_fast) /* # of fast read locks acquired */
63LOCK_EVENT(rwsem_rlock_fail) /* # of failed read lock acquisitions */
64LOCK_EVENT(rwsem_rtrylock) /* # of read trylock calls */
65LOCK_EVENT(rwsem_wlock) /* # of write locks acquired */
66LOCK_EVENT(rwsem_wlock_fail) /* # of failed write lock acquisitions */
67LOCK_EVENT(rwsem_wtrylock) /* # of write trylock calls */
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 91c6b89f04df..27b992fe8cec 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -501,11 +501,11 @@ static char get_usage_char(struct lock_class *class, enum lock_usage_bit bit)
501{ 501{
502 char c = '.'; 502 char c = '.';
503 503
504 if (class->usage_mask & lock_flag(bit + 2)) 504 if (class->usage_mask & lock_flag(bit + LOCK_USAGE_DIR_MASK))
505 c = '+'; 505 c = '+';
506 if (class->usage_mask & lock_flag(bit)) { 506 if (class->usage_mask & lock_flag(bit)) {
507 c = '-'; 507 c = '-';
508 if (class->usage_mask & lock_flag(bit + 2)) 508 if (class->usage_mask & lock_flag(bit + LOCK_USAGE_DIR_MASK))
509 c = '?'; 509 c = '?';
510 } 510 }
511 511
@@ -1666,19 +1666,25 @@ check_redundant(struct lock_list *root, struct lock_class *target,
1666} 1666}
1667 1667
1668#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) 1668#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING)
1669
1670static inline int usage_accumulate(struct lock_list *entry, void *mask)
1671{
1672 *(unsigned long *)mask |= entry->class->usage_mask;
1673
1674 return 0;
1675}
1676
1669/* 1677/*
1670 * Forwards and backwards subgraph searching, for the purposes of 1678 * Forwards and backwards subgraph searching, for the purposes of
1671 * proving that two subgraphs can be connected by a new dependency 1679 * proving that two subgraphs can be connected by a new dependency
1672 * without creating any illegal irq-safe -> irq-unsafe lock dependency. 1680 * without creating any illegal irq-safe -> irq-unsafe lock dependency.
1673 */ 1681 */
1674 1682
1675static inline int usage_match(struct lock_list *entry, void *bit) 1683static inline int usage_match(struct lock_list *entry, void *mask)
1676{ 1684{
1677 return entry->class->usage_mask & (1 << (enum lock_usage_bit)bit); 1685 return entry->class->usage_mask & *(unsigned long *)mask;
1678} 1686}
1679 1687
1680
1681
1682/* 1688/*
1683 * Find a node in the forwards-direction dependency sub-graph starting 1689 * Find a node in the forwards-direction dependency sub-graph starting
1684 * at @root->class that matches @bit. 1690 * at @root->class that matches @bit.
@@ -1690,14 +1696,14 @@ static inline int usage_match(struct lock_list *entry, void *bit)
1690 * Return <0 on error. 1696 * Return <0 on error.
1691 */ 1697 */
1692static int 1698static int
1693find_usage_forwards(struct lock_list *root, enum lock_usage_bit bit, 1699find_usage_forwards(struct lock_list *root, unsigned long usage_mask,
1694 struct lock_list **target_entry) 1700 struct lock_list **target_entry)
1695{ 1701{
1696 int result; 1702 int result;
1697 1703
1698 debug_atomic_inc(nr_find_usage_forwards_checks); 1704 debug_atomic_inc(nr_find_usage_forwards_checks);
1699 1705
1700 result = __bfs_forwards(root, (void *)bit, usage_match, target_entry); 1706 result = __bfs_forwards(root, &usage_mask, usage_match, target_entry);
1701 1707
1702 return result; 1708 return result;
1703} 1709}
@@ -1713,14 +1719,14 @@ find_usage_forwards(struct lock_list *root, enum lock_usage_bit bit,
1713 * Return <0 on error. 1719 * Return <0 on error.
1714 */ 1720 */
1715static int 1721static int
1716find_usage_backwards(struct lock_list *root, enum lock_usage_bit bit, 1722find_usage_backwards(struct lock_list *root, unsigned long usage_mask,
1717 struct lock_list **target_entry) 1723 struct lock_list **target_entry)
1718{ 1724{
1719 int result; 1725 int result;
1720 1726
1721 debug_atomic_inc(nr_find_usage_backwards_checks); 1727 debug_atomic_inc(nr_find_usage_backwards_checks);
1722 1728
1723 result = __bfs_backwards(root, (void *)bit, usage_match, target_entry); 1729 result = __bfs_backwards(root, &usage_mask, usage_match, target_entry);
1724 1730
1725 return result; 1731 return result;
1726} 1732}
@@ -1912,39 +1918,6 @@ print_bad_irq_dependency(struct task_struct *curr,
1912 return 0; 1918 return 0;
1913} 1919}
1914 1920
1915static int
1916check_usage(struct task_struct *curr, struct held_lock *prev,
1917 struct held_lock *next, enum lock_usage_bit bit_backwards,
1918 enum lock_usage_bit bit_forwards, const char *irqclass)
1919{
1920 int ret;
1921 struct lock_list this, that;
1922 struct lock_list *uninitialized_var(target_entry);
1923 struct lock_list *uninitialized_var(target_entry1);
1924
1925 this.parent = NULL;
1926
1927 this.class = hlock_class(prev);
1928 ret = find_usage_backwards(&this, bit_backwards, &target_entry);
1929 if (ret < 0)
1930 return print_bfs_bug(ret);
1931 if (ret == 1)
1932 return ret;
1933
1934 that.parent = NULL;
1935 that.class = hlock_class(next);
1936 ret = find_usage_forwards(&that, bit_forwards, &target_entry1);
1937 if (ret < 0)
1938 return print_bfs_bug(ret);
1939 if (ret == 1)
1940 return ret;
1941
1942 return print_bad_irq_dependency(curr, &this, &that,
1943 target_entry, target_entry1,
1944 prev, next,
1945 bit_backwards, bit_forwards, irqclass);
1946}
1947
1948static const char *state_names[] = { 1921static const char *state_names[] = {
1949#define LOCKDEP_STATE(__STATE) \ 1922#define LOCKDEP_STATE(__STATE) \
1950 __stringify(__STATE), 1923 __stringify(__STATE),
@@ -1961,9 +1934,19 @@ static const char *state_rnames[] = {
1961 1934
1962static inline const char *state_name(enum lock_usage_bit bit) 1935static inline const char *state_name(enum lock_usage_bit bit)
1963{ 1936{
1964 return (bit & LOCK_USAGE_READ_MASK) ? state_rnames[bit >> 2] : state_names[bit >> 2]; 1937 if (bit & LOCK_USAGE_READ_MASK)
1938 return state_rnames[bit >> LOCK_USAGE_DIR_MASK];
1939 else
1940 return state_names[bit >> LOCK_USAGE_DIR_MASK];
1965} 1941}
1966 1942
1943/*
1944 * The bit number is encoded like:
1945 *
1946 * bit0: 0 exclusive, 1 read lock
1947 * bit1: 0 used in irq, 1 irq enabled
1948 * bit2-n: state
1949 */
1967static int exclusive_bit(int new_bit) 1950static int exclusive_bit(int new_bit)
1968{ 1951{
1969 int state = new_bit & LOCK_USAGE_STATE_MASK; 1952 int state = new_bit & LOCK_USAGE_STATE_MASK;
@@ -1975,45 +1958,160 @@ static int exclusive_bit(int new_bit)
1975 return state | (dir ^ LOCK_USAGE_DIR_MASK); 1958 return state | (dir ^ LOCK_USAGE_DIR_MASK);
1976} 1959}
1977 1960
1961/*
1962 * Observe that when given a bitmask where each bitnr is encoded as above, a
1963 * right shift of the mask transforms the individual bitnrs as -1 and
1964 * conversely, a left shift transforms into +1 for the individual bitnrs.
1965 *
1966 * So for all bits whose number have LOCK_ENABLED_* set (bitnr1 == 1), we can
1967 * create the mask with those bit numbers using LOCK_USED_IN_* (bitnr1 == 0)
1968 * instead by subtracting the bit number by 2, or shifting the mask right by 2.
1969 *
1970 * Similarly, bitnr1 == 0 becomes bitnr1 == 1 by adding 2, or shifting left 2.
1971 *
1972 * So split the mask (note that LOCKF_ENABLED_IRQ_ALL|LOCKF_USED_IN_IRQ_ALL is
1973 * all bits set) and recompose with bitnr1 flipped.
1974 */
1975static unsigned long invert_dir_mask(unsigned long mask)
1976{
1977 unsigned long excl = 0;
1978
1979 /* Invert dir */
1980 excl |= (mask & LOCKF_ENABLED_IRQ_ALL) >> LOCK_USAGE_DIR_MASK;
1981 excl |= (mask & LOCKF_USED_IN_IRQ_ALL) << LOCK_USAGE_DIR_MASK;
1982
1983 return excl;
1984}
1985
1986/*
1987 * As above, we clear bitnr0 (LOCK_*_READ off) with bitmask ops. First, for all
1988 * bits with bitnr0 set (LOCK_*_READ), add those with bitnr0 cleared (LOCK_*).
1989 * And then mask out all bitnr0.
1990 */
1991static unsigned long exclusive_mask(unsigned long mask)
1992{
1993 unsigned long excl = invert_dir_mask(mask);
1994
1995 /* Strip read */
1996 excl |= (excl & LOCKF_IRQ_READ) >> LOCK_USAGE_READ_MASK;
1997 excl &= ~LOCKF_IRQ_READ;
1998
1999 return excl;
2000}
2001
2002/*
2003 * Retrieve the _possible_ original mask to which @mask is
2004 * exclusive. Ie: this is the opposite of exclusive_mask().
2005 * Note that 2 possible original bits can match an exclusive
2006 * bit: one has LOCK_USAGE_READ_MASK set, the other has it
2007 * cleared. So both are returned for each exclusive bit.
2008 */
2009static unsigned long original_mask(unsigned long mask)
2010{
2011 unsigned long excl = invert_dir_mask(mask);
2012
2013 /* Include read in existing usages */
2014 excl |= (excl & LOCKF_IRQ) << LOCK_USAGE_READ_MASK;
2015
2016 return excl;
2017}
2018
2019/*
2020 * Find the first pair of bit match between an original
2021 * usage mask and an exclusive usage mask.
2022 */
2023static int find_exclusive_match(unsigned long mask,
2024 unsigned long excl_mask,
2025 enum lock_usage_bit *bitp,
2026 enum lock_usage_bit *excl_bitp)
2027{
2028 int bit, excl;
2029
2030 for_each_set_bit(bit, &mask, LOCK_USED) {
2031 excl = exclusive_bit(bit);
2032 if (excl_mask & lock_flag(excl)) {
2033 *bitp = bit;
2034 *excl_bitp = excl;
2035 return 0;
2036 }
2037 }
2038 return -1;
2039}
2040
2041/*
2042 * Prove that the new dependency does not connect a hardirq-safe(-read)
2043 * lock with a hardirq-unsafe lock - to achieve this we search
2044 * the backwards-subgraph starting at <prev>, and the
2045 * forwards-subgraph starting at <next>:
2046 */
1978static int check_irq_usage(struct task_struct *curr, struct held_lock *prev, 2047static int check_irq_usage(struct task_struct *curr, struct held_lock *prev,
1979 struct held_lock *next, enum lock_usage_bit bit) 2048 struct held_lock *next)
1980{ 2049{
2050 unsigned long usage_mask = 0, forward_mask, backward_mask;
2051 enum lock_usage_bit forward_bit = 0, backward_bit = 0;
2052 struct lock_list *uninitialized_var(target_entry1);
2053 struct lock_list *uninitialized_var(target_entry);
2054 struct lock_list this, that;
2055 int ret;
2056
1981 /* 2057 /*
1982 * Prove that the new dependency does not connect a hardirq-safe 2058 * Step 1: gather all hard/soft IRQs usages backward in an
1983 * lock with a hardirq-unsafe lock - to achieve this we search 2059 * accumulated usage mask.
1984 * the backwards-subgraph starting at <prev>, and the
1985 * forwards-subgraph starting at <next>:
1986 */ 2060 */
1987 if (!check_usage(curr, prev, next, bit, 2061 this.parent = NULL;
1988 exclusive_bit(bit), state_name(bit))) 2062 this.class = hlock_class(prev);
1989 return 0; 2063
2064 ret = __bfs_backwards(&this, &usage_mask, usage_accumulate, NULL);
2065 if (ret < 0)
2066 return print_bfs_bug(ret);
1990 2067
1991 bit++; /* _READ */ 2068 usage_mask &= LOCKF_USED_IN_IRQ_ALL;
2069 if (!usage_mask)
2070 return 1;
1992 2071
1993 /* 2072 /*
1994 * Prove that the new dependency does not connect a hardirq-safe-read 2073 * Step 2: find exclusive uses forward that match the previous
1995 * lock with a hardirq-unsafe lock - to achieve this we search 2074 * backward accumulated mask.
1996 * the backwards-subgraph starting at <prev>, and the
1997 * forwards-subgraph starting at <next>:
1998 */ 2075 */
1999 if (!check_usage(curr, prev, next, bit, 2076 forward_mask = exclusive_mask(usage_mask);
2000 exclusive_bit(bit), state_name(bit)))
2001 return 0;
2002 2077
2003 return 1; 2078 that.parent = NULL;
2004} 2079 that.class = hlock_class(next);
2005 2080
2006static int 2081 ret = find_usage_forwards(&that, forward_mask, &target_entry1);
2007check_prev_add_irq(struct task_struct *curr, struct held_lock *prev, 2082 if (ret < 0)
2008 struct held_lock *next) 2083 return print_bfs_bug(ret);
2009{ 2084 if (ret == 1)
2010#define LOCKDEP_STATE(__STATE) \ 2085 return ret;
2011 if (!check_irq_usage(curr, prev, next, LOCK_USED_IN_##__STATE)) \
2012 return 0;
2013#include "lockdep_states.h"
2014#undef LOCKDEP_STATE
2015 2086
2016 return 1; 2087 /*
2088 * Step 3: we found a bad match! Now retrieve a lock from the backward
2089 * list whose usage mask matches the exclusive usage mask from the
2090 * lock found on the forward list.
2091 */
2092 backward_mask = original_mask(target_entry1->class->usage_mask);
2093
2094 ret = find_usage_backwards(&this, backward_mask, &target_entry);
2095 if (ret < 0)
2096 return print_bfs_bug(ret);
2097 if (DEBUG_LOCKS_WARN_ON(ret == 1))
2098 return 1;
2099
2100 /*
2101 * Step 4: narrow down to a pair of incompatible usage bits
2102 * and report it.
2103 */
2104 ret = find_exclusive_match(target_entry->class->usage_mask,
2105 target_entry1->class->usage_mask,
2106 &backward_bit, &forward_bit);
2107 if (DEBUG_LOCKS_WARN_ON(ret == -1))
2108 return 1;
2109
2110 return print_bad_irq_dependency(curr, &this, &that,
2111 target_entry, target_entry1,
2112 prev, next,
2113 backward_bit, forward_bit,
2114 state_name(backward_bit));
2017} 2115}
2018 2116
2019static void inc_chains(void) 2117static void inc_chains(void)
@@ -2030,9 +2128,8 @@ static void inc_chains(void)
2030 2128
2031#else 2129#else
2032 2130
2033static inline int 2131static inline int check_irq_usage(struct task_struct *curr,
2034check_prev_add_irq(struct task_struct *curr, struct held_lock *prev, 2132 struct held_lock *prev, struct held_lock *next)
2035 struct held_lock *next)
2036{ 2133{
2037 return 1; 2134 return 1;
2038} 2135}
@@ -2211,7 +2308,7 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev,
2211 else if (unlikely(ret < 0)) 2308 else if (unlikely(ret < 0))
2212 return print_bfs_bug(ret); 2309 return print_bfs_bug(ret);
2213 2310
2214 if (!check_prev_add_irq(curr, prev, next)) 2311 if (!check_irq_usage(curr, prev, next))
2215 return 0; 2312 return 0;
2216 2313
2217 /* 2314 /*
@@ -2773,6 +2870,12 @@ static void check_chain_key(struct task_struct *curr)
2773#endif 2870#endif
2774} 2871}
2775 2872
2873static int mark_lock(struct task_struct *curr, struct held_lock *this,
2874 enum lock_usage_bit new_bit);
2875
2876#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING)
2877
2878
2776static void 2879static void
2777print_usage_bug_scenario(struct held_lock *lock) 2880print_usage_bug_scenario(struct held_lock *lock)
2778{ 2881{
@@ -2842,10 +2945,6 @@ valid_state(struct task_struct *curr, struct held_lock *this,
2842 return 1; 2945 return 1;
2843} 2946}
2844 2947
2845static int mark_lock(struct task_struct *curr, struct held_lock *this,
2846 enum lock_usage_bit new_bit);
2847
2848#if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING)
2849 2948
2850/* 2949/*
2851 * print irq inversion bug: 2950 * print irq inversion bug:
@@ -2925,7 +3024,7 @@ check_usage_forwards(struct task_struct *curr, struct held_lock *this,
2925 3024
2926 root.parent = NULL; 3025 root.parent = NULL;
2927 root.class = hlock_class(this); 3026 root.class = hlock_class(this);
2928 ret = find_usage_forwards(&root, bit, &target_entry); 3027 ret = find_usage_forwards(&root, lock_flag(bit), &target_entry);
2929 if (ret < 0) 3028 if (ret < 0)
2930 return print_bfs_bug(ret); 3029 return print_bfs_bug(ret);
2931 if (ret == 1) 3030 if (ret == 1)
@@ -2949,7 +3048,7 @@ check_usage_backwards(struct task_struct *curr, struct held_lock *this,
2949 3048
2950 root.parent = NULL; 3049 root.parent = NULL;
2951 root.class = hlock_class(this); 3050 root.class = hlock_class(this);
2952 ret = find_usage_backwards(&root, bit, &target_entry); 3051 ret = find_usage_backwards(&root, lock_flag(bit), &target_entry);
2953 if (ret < 0) 3052 if (ret < 0)
2954 return print_bfs_bug(ret); 3053 return print_bfs_bug(ret);
2955 if (ret == 1) 3054 if (ret == 1)
@@ -3004,7 +3103,7 @@ static int (*state_verbose_f[])(struct lock_class *class) = {
3004static inline int state_verbose(enum lock_usage_bit bit, 3103static inline int state_verbose(enum lock_usage_bit bit,
3005 struct lock_class *class) 3104 struct lock_class *class)
3006{ 3105{
3007 return state_verbose_f[bit >> 2](class); 3106 return state_verbose_f[bit >> LOCK_USAGE_DIR_MASK](class);
3008} 3107}
3009 3108
3010typedef int (*check_usage_f)(struct task_struct *, struct held_lock *, 3109typedef int (*check_usage_f)(struct task_struct *, struct held_lock *,
@@ -3146,7 +3245,7 @@ void lockdep_hardirqs_on(unsigned long ip)
3146 /* 3245 /*
3147 * See the fine text that goes along with this variable definition. 3246 * See the fine text that goes along with this variable definition.
3148 */ 3247 */
3149 if (DEBUG_LOCKS_WARN_ON(unlikely(early_boot_irqs_disabled))) 3248 if (DEBUG_LOCKS_WARN_ON(early_boot_irqs_disabled))
3150 return; 3249 return;
3151 3250
3152 /* 3251 /*
diff --git a/kernel/locking/lockdep_internals.h b/kernel/locking/lockdep_internals.h
index d4c197425f68..150ec3f0c5b5 100644
--- a/kernel/locking/lockdep_internals.h
+++ b/kernel/locking/lockdep_internals.h
@@ -42,13 +42,35 @@ enum {
42 __LOCKF(USED) 42 __LOCKF(USED)
43}; 43};
44 44
45#define LOCKF_ENABLED_IRQ (LOCKF_ENABLED_HARDIRQ | LOCKF_ENABLED_SOFTIRQ) 45#define LOCKDEP_STATE(__STATE) LOCKF_ENABLED_##__STATE |
46#define LOCKF_USED_IN_IRQ (LOCKF_USED_IN_HARDIRQ | LOCKF_USED_IN_SOFTIRQ) 46static const unsigned long LOCKF_ENABLED_IRQ =
47#include "lockdep_states.h"
48 0;
49#undef LOCKDEP_STATE
50
51#define LOCKDEP_STATE(__STATE) LOCKF_USED_IN_##__STATE |
52static const unsigned long LOCKF_USED_IN_IRQ =
53#include "lockdep_states.h"
54 0;
55#undef LOCKDEP_STATE
56
57#define LOCKDEP_STATE(__STATE) LOCKF_ENABLED_##__STATE##_READ |
58static const unsigned long LOCKF_ENABLED_IRQ_READ =
59#include "lockdep_states.h"
60 0;
61#undef LOCKDEP_STATE
62
63#define LOCKDEP_STATE(__STATE) LOCKF_USED_IN_##__STATE##_READ |
64static const unsigned long LOCKF_USED_IN_IRQ_READ =
65#include "lockdep_states.h"
66 0;
67#undef LOCKDEP_STATE
68
69#define LOCKF_ENABLED_IRQ_ALL (LOCKF_ENABLED_IRQ | LOCKF_ENABLED_IRQ_READ)
70#define LOCKF_USED_IN_IRQ_ALL (LOCKF_USED_IN_IRQ | LOCKF_USED_IN_IRQ_READ)
47 71
48#define LOCKF_ENABLED_IRQ_READ \ 72#define LOCKF_IRQ (LOCKF_ENABLED_IRQ | LOCKF_USED_IN_IRQ)
49 (LOCKF_ENABLED_HARDIRQ_READ | LOCKF_ENABLED_SOFTIRQ_READ) 73#define LOCKF_IRQ_READ (LOCKF_ENABLED_IRQ_READ | LOCKF_USED_IN_IRQ_READ)
50#define LOCKF_USED_IN_IRQ_READ \
51 (LOCKF_USED_IN_HARDIRQ_READ | LOCKF_USED_IN_SOFTIRQ_READ)
52 74
53/* 75/*
54 * CONFIG_LOCKDEP_SMALL is defined for sparc. Sparc requires .text, 76 * CONFIG_LOCKDEP_SMALL is defined for sparc. Sparc requires .text,
diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c
index 883cf1b92d90..f17dad99eec8 100644
--- a/kernel/locking/percpu-rwsem.c
+++ b/kernel/locking/percpu-rwsem.c
@@ -7,6 +7,8 @@
7#include <linux/sched.h> 7#include <linux/sched.h>
8#include <linux/errno.h> 8#include <linux/errno.h>
9 9
10#include "rwsem.h"
11
10int __percpu_init_rwsem(struct percpu_rw_semaphore *sem, 12int __percpu_init_rwsem(struct percpu_rw_semaphore *sem,
11 const char *name, struct lock_class_key *rwsem_key) 13 const char *name, struct lock_class_key *rwsem_key)
12{ 14{
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c
index 5e9247dc2515..e14b32c69639 100644
--- a/kernel/locking/qspinlock.c
+++ b/kernel/locking/qspinlock.c
@@ -395,7 +395,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
395 * 0,1,0 -> 0,0,1 395 * 0,1,0 -> 0,0,1
396 */ 396 */
397 clear_pending_set_locked(lock); 397 clear_pending_set_locked(lock);
398 qstat_inc(qstat_lock_pending, true); 398 lockevent_inc(lock_pending);
399 return; 399 return;
400 400
401 /* 401 /*
@@ -403,7 +403,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val)
403 * queuing. 403 * queuing.
404 */ 404 */
405queue: 405queue:
406 qstat_inc(qstat_lock_slowpath, true); 406 lockevent_inc(lock_slowpath);
407pv_queue: 407pv_queue:
408 node = this_cpu_ptr(&qnodes[0].mcs); 408 node = this_cpu_ptr(&qnodes[0].mcs);
409 idx = node->count++; 409 idx = node->count++;
@@ -419,7 +419,7 @@ pv_queue:
419 * simple enough. 419 * simple enough.
420 */ 420 */
421 if (unlikely(idx >= MAX_NODES)) { 421 if (unlikely(idx >= MAX_NODES)) {
422 qstat_inc(qstat_lock_no_node, true); 422 lockevent_inc(lock_no_node);
423 while (!queued_spin_trylock(lock)) 423 while (!queued_spin_trylock(lock))
424 cpu_relax(); 424 cpu_relax();
425 goto release; 425 goto release;
@@ -430,7 +430,7 @@ pv_queue:
430 /* 430 /*
431 * Keep counts of non-zero index values: 431 * Keep counts of non-zero index values:
432 */ 432 */
433 qstat_inc(qstat_lock_use_node2 + idx - 1, idx); 433 lockevent_cond_inc(lock_use_node2 + idx - 1, idx);
434 434
435 /* 435 /*
436 * Ensure that we increment the head node->count before initialising 436 * Ensure that we increment the head node->count before initialising
diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h
index 8f36c27c1794..89bab079e7a4 100644
--- a/kernel/locking/qspinlock_paravirt.h
+++ b/kernel/locking/qspinlock_paravirt.h
@@ -89,7 +89,7 @@ static inline bool pv_hybrid_queued_unfair_trylock(struct qspinlock *lock)
89 89
90 if (!(val & _Q_LOCKED_PENDING_MASK) && 90 if (!(val & _Q_LOCKED_PENDING_MASK) &&
91 (cmpxchg_acquire(&lock->locked, 0, _Q_LOCKED_VAL) == 0)) { 91 (cmpxchg_acquire(&lock->locked, 0, _Q_LOCKED_VAL) == 0)) {
92 qstat_inc(qstat_pv_lock_stealing, true); 92 lockevent_inc(pv_lock_stealing);
93 return true; 93 return true;
94 } 94 }
95 if (!(val & _Q_TAIL_MASK) || (val & _Q_PENDING_MASK)) 95 if (!(val & _Q_TAIL_MASK) || (val & _Q_PENDING_MASK))
@@ -219,7 +219,7 @@ static struct qspinlock **pv_hash(struct qspinlock *lock, struct pv_node *node)
219 hopcnt++; 219 hopcnt++;
220 if (!cmpxchg(&he->lock, NULL, lock)) { 220 if (!cmpxchg(&he->lock, NULL, lock)) {
221 WRITE_ONCE(he->node, node); 221 WRITE_ONCE(he->node, node);
222 qstat_hop(hopcnt); 222 lockevent_pv_hop(hopcnt);
223 return &he->lock; 223 return &he->lock;
224 } 224 }
225 } 225 }
@@ -320,8 +320,8 @@ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev)
320 smp_store_mb(pn->state, vcpu_halted); 320 smp_store_mb(pn->state, vcpu_halted);
321 321
322 if (!READ_ONCE(node->locked)) { 322 if (!READ_ONCE(node->locked)) {
323 qstat_inc(qstat_pv_wait_node, true); 323 lockevent_inc(pv_wait_node);
324 qstat_inc(qstat_pv_wait_early, wait_early); 324 lockevent_cond_inc(pv_wait_early, wait_early);
325 pv_wait(&pn->state, vcpu_halted); 325 pv_wait(&pn->state, vcpu_halted);
326 } 326 }
327 327
@@ -339,7 +339,8 @@ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev)
339 * So it is better to spin for a while in the hope that the 339 * So it is better to spin for a while in the hope that the
340 * MCS lock will be released soon. 340 * MCS lock will be released soon.
341 */ 341 */
342 qstat_inc(qstat_pv_spurious_wakeup, !READ_ONCE(node->locked)); 342 lockevent_cond_inc(pv_spurious_wakeup,
343 !READ_ONCE(node->locked));
343 } 344 }
344 345
345 /* 346 /*
@@ -416,7 +417,7 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node)
416 /* 417 /*
417 * Tracking # of slowpath locking operations 418 * Tracking # of slowpath locking operations
418 */ 419 */
419 qstat_inc(qstat_lock_slowpath, true); 420 lockevent_inc(lock_slowpath);
420 421
421 for (;; waitcnt++) { 422 for (;; waitcnt++) {
422 /* 423 /*
@@ -464,8 +465,8 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node)
464 } 465 }
465 } 466 }
466 WRITE_ONCE(pn->state, vcpu_hashed); 467 WRITE_ONCE(pn->state, vcpu_hashed);
467 qstat_inc(qstat_pv_wait_head, true); 468 lockevent_inc(pv_wait_head);
468 qstat_inc(qstat_pv_wait_again, waitcnt); 469 lockevent_cond_inc(pv_wait_again, waitcnt);
469 pv_wait(&lock->locked, _Q_SLOW_VAL); 470 pv_wait(&lock->locked, _Q_SLOW_VAL);
470 471
471 /* 472 /*
@@ -528,7 +529,7 @@ __pv_queued_spin_unlock_slowpath(struct qspinlock *lock, u8 locked)
528 * vCPU is harmless other than the additional latency in completing 529 * vCPU is harmless other than the additional latency in completing
529 * the unlock. 530 * the unlock.
530 */ 531 */
531 qstat_inc(qstat_pv_kick_unlock, true); 532 lockevent_inc(pv_kick_unlock);
532 pv_kick(node->cpu); 533 pv_kick(node->cpu);
533} 534}
534 535
diff --git a/kernel/locking/qspinlock_stat.h b/kernel/locking/qspinlock_stat.h
index d73f85388d5c..54152670ff24 100644
--- a/kernel/locking/qspinlock_stat.h
+++ b/kernel/locking/qspinlock_stat.h
@@ -9,262 +9,105 @@
9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 9 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10 * GNU General Public License for more details. 10 * GNU General Public License for more details.
11 * 11 *
12 * Authors: Waiman Long <waiman.long@hpe.com> 12 * Authors: Waiman Long <longman@redhat.com>
13 */ 13 */
14 14
15/* 15#include "lock_events.h"
16 * When queued spinlock statistical counters are enabled, the following
17 * debugfs files will be created for reporting the counter values:
18 *
19 * <debugfs>/qlockstat/
20 * pv_hash_hops - average # of hops per hashing operation
21 * pv_kick_unlock - # of vCPU kicks issued at unlock time
22 * pv_kick_wake - # of vCPU kicks used for computing pv_latency_wake
23 * pv_latency_kick - average latency (ns) of vCPU kick operation
24 * pv_latency_wake - average latency (ns) from vCPU kick to wakeup
25 * pv_lock_stealing - # of lock stealing operations
26 * pv_spurious_wakeup - # of spurious wakeups in non-head vCPUs
27 * pv_wait_again - # of wait's after a queue head vCPU kick
28 * pv_wait_early - # of early vCPU wait's
29 * pv_wait_head - # of vCPU wait's at the queue head
30 * pv_wait_node - # of vCPU wait's at a non-head queue node
31 * lock_pending - # of locking operations via pending code
32 * lock_slowpath - # of locking operations via MCS lock queue
33 * lock_use_node2 - # of locking operations that use 2nd per-CPU node
34 * lock_use_node3 - # of locking operations that use 3rd per-CPU node
35 * lock_use_node4 - # of locking operations that use 4th per-CPU node
36 * lock_no_node - # of locking operations without using per-CPU node
37 *
38 * Subtracting lock_use_node[234] from lock_slowpath will give you
39 * lock_use_node1.
40 *
41 * Writing to the "reset_counters" file will reset all the above counter
42 * values.
43 *
44 * These statistical counters are implemented as per-cpu variables which are
45 * summed and computed whenever the corresponding debugfs files are read. This
46 * minimizes added overhead making the counters usable even in a production
47 * environment.
48 *
49 * There may be slight difference between pv_kick_wake and pv_kick_unlock.
50 */
51enum qlock_stats {
52 qstat_pv_hash_hops,
53 qstat_pv_kick_unlock,
54 qstat_pv_kick_wake,
55 qstat_pv_latency_kick,
56 qstat_pv_latency_wake,
57 qstat_pv_lock_stealing,
58 qstat_pv_spurious_wakeup,
59 qstat_pv_wait_again,
60 qstat_pv_wait_early,
61 qstat_pv_wait_head,
62 qstat_pv_wait_node,
63 qstat_lock_pending,
64 qstat_lock_slowpath,
65 qstat_lock_use_node2,
66 qstat_lock_use_node3,
67 qstat_lock_use_node4,
68 qstat_lock_no_node,
69 qstat_num, /* Total number of statistical counters */
70 qstat_reset_cnts = qstat_num,
71};
72 16
73#ifdef CONFIG_QUEUED_LOCK_STAT 17#ifdef CONFIG_LOCK_EVENT_COUNTS
18#ifdef CONFIG_PARAVIRT_SPINLOCKS
74/* 19/*
75 * Collect pvqspinlock statistics 20 * Collect pvqspinlock locking event counts
76 */ 21 */
77#include <linux/debugfs.h>
78#include <linux/sched.h> 22#include <linux/sched.h>
79#include <linux/sched/clock.h> 23#include <linux/sched/clock.h>
80#include <linux/fs.h> 24#include <linux/fs.h>
81 25
82static const char * const qstat_names[qstat_num + 1] = { 26#define EVENT_COUNT(ev) lockevents[LOCKEVENT_ ## ev]
83 [qstat_pv_hash_hops] = "pv_hash_hops",
84 [qstat_pv_kick_unlock] = "pv_kick_unlock",
85 [qstat_pv_kick_wake] = "pv_kick_wake",
86 [qstat_pv_spurious_wakeup] = "pv_spurious_wakeup",
87 [qstat_pv_latency_kick] = "pv_latency_kick",
88 [qstat_pv_latency_wake] = "pv_latency_wake",
89 [qstat_pv_lock_stealing] = "pv_lock_stealing",
90 [qstat_pv_wait_again] = "pv_wait_again",
91 [qstat_pv_wait_early] = "pv_wait_early",
92 [qstat_pv_wait_head] = "pv_wait_head",
93 [qstat_pv_wait_node] = "pv_wait_node",
94 [qstat_lock_pending] = "lock_pending",
95 [qstat_lock_slowpath] = "lock_slowpath",
96 [qstat_lock_use_node2] = "lock_use_node2",
97 [qstat_lock_use_node3] = "lock_use_node3",
98 [qstat_lock_use_node4] = "lock_use_node4",
99 [qstat_lock_no_node] = "lock_no_node",
100 [qstat_reset_cnts] = "reset_counters",
101};
102 27
103/* 28/*
104 * Per-cpu counters 29 * PV specific per-cpu counter
105 */ 30 */
106static DEFINE_PER_CPU(unsigned long, qstats[qstat_num]);
107static DEFINE_PER_CPU(u64, pv_kick_time); 31static DEFINE_PER_CPU(u64, pv_kick_time);
108 32
109/* 33/*
110 * Function to read and return the qlock statistical counter values 34 * Function to read and return the PV qspinlock counts.
111 * 35 *
112 * The following counters are handled specially: 36 * The following counters are handled specially:
113 * 1. qstat_pv_latency_kick 37 * 1. pv_latency_kick
114 * Average kick latency (ns) = pv_latency_kick/pv_kick_unlock 38 * Average kick latency (ns) = pv_latency_kick/pv_kick_unlock
115 * 2. qstat_pv_latency_wake 39 * 2. pv_latency_wake
116 * Average wake latency (ns) = pv_latency_wake/pv_kick_wake 40 * Average wake latency (ns) = pv_latency_wake/pv_kick_wake
117 * 3. qstat_pv_hash_hops 41 * 3. pv_hash_hops
118 * Average hops/hash = pv_hash_hops/pv_kick_unlock 42 * Average hops/hash = pv_hash_hops/pv_kick_unlock
119 */ 43 */
120static ssize_t qstat_read(struct file *file, char __user *user_buf, 44ssize_t lockevent_read(struct file *file, char __user *user_buf,
121 size_t count, loff_t *ppos) 45 size_t count, loff_t *ppos)
122{ 46{
123 char buf[64]; 47 char buf[64];
124 int cpu, counter, len; 48 int cpu, id, len;
125 u64 stat = 0, kicks = 0; 49 u64 sum = 0, kicks = 0;
126 50
127 /* 51 /*
128 * Get the counter ID stored in file->f_inode->i_private 52 * Get the counter ID stored in file->f_inode->i_private
129 */ 53 */
130 counter = (long)file_inode(file)->i_private; 54 id = (long)file_inode(file)->i_private;
131 55
132 if (counter >= qstat_num) 56 if (id >= lockevent_num)
133 return -EBADF; 57 return -EBADF;
134 58
135 for_each_possible_cpu(cpu) { 59 for_each_possible_cpu(cpu) {
136 stat += per_cpu(qstats[counter], cpu); 60 sum += per_cpu(lockevents[id], cpu);
137 /* 61 /*
138 * Need to sum additional counter for some of them 62 * Need to sum additional counters for some of them
139 */ 63 */
140 switch (counter) { 64 switch (id) {
141 65
142 case qstat_pv_latency_kick: 66 case LOCKEVENT_pv_latency_kick:
143 case qstat_pv_hash_hops: 67 case LOCKEVENT_pv_hash_hops:
144 kicks += per_cpu(qstats[qstat_pv_kick_unlock], cpu); 68 kicks += per_cpu(EVENT_COUNT(pv_kick_unlock), cpu);
145 break; 69 break;
146 70
147 case qstat_pv_latency_wake: 71 case LOCKEVENT_pv_latency_wake:
148 kicks += per_cpu(qstats[qstat_pv_kick_wake], cpu); 72 kicks += per_cpu(EVENT_COUNT(pv_kick_wake), cpu);
149 break; 73 break;
150 } 74 }
151 } 75 }
152 76
153 if (counter == qstat_pv_hash_hops) { 77 if (id == LOCKEVENT_pv_hash_hops) {
154 u64 frac = 0; 78 u64 frac = 0;
155 79
156 if (kicks) { 80 if (kicks) {
157 frac = 100ULL * do_div(stat, kicks); 81 frac = 100ULL * do_div(sum, kicks);
158 frac = DIV_ROUND_CLOSEST_ULL(frac, kicks); 82 frac = DIV_ROUND_CLOSEST_ULL(frac, kicks);
159 } 83 }
160 84
161 /* 85 /*
162 * Return a X.XX decimal number 86 * Return a X.XX decimal number
163 */ 87 */
164 len = snprintf(buf, sizeof(buf) - 1, "%llu.%02llu\n", stat, frac); 88 len = snprintf(buf, sizeof(buf) - 1, "%llu.%02llu\n",
89 sum, frac);
165 } else { 90 } else {
166 /* 91 /*
167 * Round to the nearest ns 92 * Round to the nearest ns
168 */ 93 */
169 if ((counter == qstat_pv_latency_kick) || 94 if ((id == LOCKEVENT_pv_latency_kick) ||
170 (counter == qstat_pv_latency_wake)) { 95 (id == LOCKEVENT_pv_latency_wake)) {
171 if (kicks) 96 if (kicks)
172 stat = DIV_ROUND_CLOSEST_ULL(stat, kicks); 97 sum = DIV_ROUND_CLOSEST_ULL(sum, kicks);
173 } 98 }
174 len = snprintf(buf, sizeof(buf) - 1, "%llu\n", stat); 99 len = snprintf(buf, sizeof(buf) - 1, "%llu\n", sum);
175 } 100 }
176 101
177 return simple_read_from_buffer(user_buf, count, ppos, buf, len); 102 return simple_read_from_buffer(user_buf, count, ppos, buf, len);
178} 103}
179 104
180/* 105/*
181 * Function to handle write request
182 *
183 * When counter = reset_cnts, reset all the counter values.
184 * Since the counter updates aren't atomic, the resetting is done twice
185 * to make sure that the counters are very likely to be all cleared.
186 */
187static ssize_t qstat_write(struct file *file, const char __user *user_buf,
188 size_t count, loff_t *ppos)
189{
190 int cpu;
191
192 /*
193 * Get the counter ID stored in file->f_inode->i_private
194 */
195 if ((long)file_inode(file)->i_private != qstat_reset_cnts)
196 return count;
197
198 for_each_possible_cpu(cpu) {
199 int i;
200 unsigned long *ptr = per_cpu_ptr(qstats, cpu);
201
202 for (i = 0 ; i < qstat_num; i++)
203 WRITE_ONCE(ptr[i], 0);
204 }
205 return count;
206}
207
208/*
209 * Debugfs data structures
210 */
211static const struct file_operations fops_qstat = {
212 .read = qstat_read,
213 .write = qstat_write,
214 .llseek = default_llseek,
215};
216
217/*
218 * Initialize debugfs for the qspinlock statistical counters
219 */
220static int __init init_qspinlock_stat(void)
221{
222 struct dentry *d_qstat = debugfs_create_dir("qlockstat", NULL);
223 int i;
224
225 if (!d_qstat)
226 goto out;
227
228 /*
229 * Create the debugfs files
230 *
231 * As reading from and writing to the stat files can be slow, only
232 * root is allowed to do the read/write to limit impact to system
233 * performance.
234 */
235 for (i = 0; i < qstat_num; i++)
236 if (!debugfs_create_file(qstat_names[i], 0400, d_qstat,
237 (void *)(long)i, &fops_qstat))
238 goto fail_undo;
239
240 if (!debugfs_create_file(qstat_names[qstat_reset_cnts], 0200, d_qstat,
241 (void *)(long)qstat_reset_cnts, &fops_qstat))
242 goto fail_undo;
243
244 return 0;
245fail_undo:
246 debugfs_remove_recursive(d_qstat);
247out:
248 pr_warn("Could not create 'qlockstat' debugfs entries\n");
249 return -ENOMEM;
250}
251fs_initcall(init_qspinlock_stat);
252
253/*
254 * Increment the PV qspinlock statistical counters
255 */
256static inline void qstat_inc(enum qlock_stats stat, bool cond)
257{
258 if (cond)
259 this_cpu_inc(qstats[stat]);
260}
261
262/*
263 * PV hash hop count 106 * PV hash hop count
264 */ 107 */
265static inline void qstat_hop(int hopcnt) 108static inline void lockevent_pv_hop(int hopcnt)
266{ 109{
267 this_cpu_add(qstats[qstat_pv_hash_hops], hopcnt); 110 this_cpu_add(EVENT_COUNT(pv_hash_hops), hopcnt);
268} 111}
269 112
270/* 113/*
@@ -276,7 +119,7 @@ static inline void __pv_kick(int cpu)
276 119
277 per_cpu(pv_kick_time, cpu) = start; 120 per_cpu(pv_kick_time, cpu) = start;
278 pv_kick(cpu); 121 pv_kick(cpu);
279 this_cpu_add(qstats[qstat_pv_latency_kick], sched_clock() - start); 122 this_cpu_add(EVENT_COUNT(pv_latency_kick), sched_clock() - start);
280} 123}
281 124
282/* 125/*
@@ -289,18 +132,19 @@ static inline void __pv_wait(u8 *ptr, u8 val)
289 *pkick_time = 0; 132 *pkick_time = 0;
290 pv_wait(ptr, val); 133 pv_wait(ptr, val);
291 if (*pkick_time) { 134 if (*pkick_time) {
292 this_cpu_add(qstats[qstat_pv_latency_wake], 135 this_cpu_add(EVENT_COUNT(pv_latency_wake),
293 sched_clock() - *pkick_time); 136 sched_clock() - *pkick_time);
294 qstat_inc(qstat_pv_kick_wake, true); 137 lockevent_inc(pv_kick_wake);
295 } 138 }
296} 139}
297 140
298#define pv_kick(c) __pv_kick(c) 141#define pv_kick(c) __pv_kick(c)
299#define pv_wait(p, v) __pv_wait(p, v) 142#define pv_wait(p, v) __pv_wait(p, v)
300 143
301#else /* CONFIG_QUEUED_LOCK_STAT */ 144#endif /* CONFIG_PARAVIRT_SPINLOCKS */
145
146#else /* CONFIG_LOCK_EVENT_COUNTS */
302 147
303static inline void qstat_inc(enum qlock_stats stat, bool cond) { } 148static inline void lockevent_pv_hop(int hopcnt) { }
304static inline void qstat_hop(int hopcnt) { }
305 149
306#endif /* CONFIG_QUEUED_LOCK_STAT */ 150#endif /* CONFIG_LOCK_EVENT_COUNTS */
diff --git a/kernel/locking/rwsem-spinlock.c b/kernel/locking/rwsem-spinlock.c
deleted file mode 100644
index a7ffb2a96ede..000000000000
--- a/kernel/locking/rwsem-spinlock.c
+++ /dev/null
@@ -1,339 +0,0 @@
1// SPDX-License-Identifier: GPL-2.0
2/* rwsem-spinlock.c: R/W semaphores: contention handling functions for
3 * generic spinlock implementation
4 *
5 * Copyright (c) 2001 David Howells (dhowells@redhat.com).
6 * - Derived partially from idea by Andrea Arcangeli <andrea@suse.de>
7 * - Derived also from comments by Linus
8 */
9#include <linux/rwsem.h>
10#include <linux/sched/signal.h>
11#include <linux/sched/debug.h>
12#include <linux/export.h>
13
14enum rwsem_waiter_type {
15 RWSEM_WAITING_FOR_WRITE,
16 RWSEM_WAITING_FOR_READ
17};
18
19struct rwsem_waiter {
20 struct list_head list;
21 struct task_struct *task;
22 enum rwsem_waiter_type type;
23};
24
25int rwsem_is_locked(struct rw_semaphore *sem)
26{
27 int ret = 1;
28 unsigned long flags;
29
30 if (raw_spin_trylock_irqsave(&sem->wait_lock, flags)) {
31 ret = (sem->count != 0);
32 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
33 }
34 return ret;
35}
36EXPORT_SYMBOL(rwsem_is_locked);
37
38/*
39 * initialise the semaphore
40 */
41void __init_rwsem(struct rw_semaphore *sem, const char *name,
42 struct lock_class_key *key)
43{
44#ifdef CONFIG_DEBUG_LOCK_ALLOC
45 /*
46 * Make sure we are not reinitializing a held semaphore:
47 */
48 debug_check_no_locks_freed((void *)sem, sizeof(*sem));
49 lockdep_init_map(&sem->dep_map, name, key, 0);
50#endif
51 sem->count = 0;
52 raw_spin_lock_init(&sem->wait_lock);
53 INIT_LIST_HEAD(&sem->wait_list);
54}
55EXPORT_SYMBOL(__init_rwsem);
56
57/*
58 * handle the lock release when processes blocked on it that can now run
59 * - if we come here, then:
60 * - the 'active count' _reached_ zero
61 * - the 'waiting count' is non-zero
62 * - the spinlock must be held by the caller
63 * - woken process blocks are discarded from the list after having task zeroed
64 * - writers are only woken if wakewrite is non-zero
65 */
66static inline struct rw_semaphore *
67__rwsem_do_wake(struct rw_semaphore *sem, int wakewrite)
68{
69 struct rwsem_waiter *waiter;
70 struct task_struct *tsk;
71 int woken;
72
73 waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
74
75 if (waiter->type == RWSEM_WAITING_FOR_WRITE) {
76 if (wakewrite)
77 /* Wake up a writer. Note that we do not grant it the
78 * lock - it will have to acquire it when it runs. */
79 wake_up_process(waiter->task);
80 goto out;
81 }
82
83 /* grant an infinite number of read locks to the front of the queue */
84 woken = 0;
85 do {
86 struct list_head *next = waiter->list.next;
87
88 list_del(&waiter->list);
89 tsk = waiter->task;
90 /*
91 * Make sure we do not wakeup the next reader before
92 * setting the nil condition to grant the next reader;
93 * otherwise we could miss the wakeup on the other
94 * side and end up sleeping again. See the pairing
95 * in rwsem_down_read_failed().
96 */
97 smp_mb();
98 waiter->task = NULL;
99 wake_up_process(tsk);
100 put_task_struct(tsk);
101 woken++;
102 if (next == &sem->wait_list)
103 break;
104 waiter = list_entry(next, struct rwsem_waiter, list);
105 } while (waiter->type != RWSEM_WAITING_FOR_WRITE);
106
107 sem->count += woken;
108
109 out:
110 return sem;
111}
112
113/*
114 * wake a single writer
115 */
116static inline struct rw_semaphore *
117__rwsem_wake_one_writer(struct rw_semaphore *sem)
118{
119 struct rwsem_waiter *waiter;
120
121 waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
122 wake_up_process(waiter->task);
123
124 return sem;
125}
126
127/*
128 * get a read lock on the semaphore
129 */
130int __sched __down_read_common(struct rw_semaphore *sem, int state)
131{
132 struct rwsem_waiter waiter;
133 unsigned long flags;
134
135 raw_spin_lock_irqsave(&sem->wait_lock, flags);
136
137 if (sem->count >= 0 && list_empty(&sem->wait_list)) {
138 /* granted */
139 sem->count++;
140 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
141 goto out;
142 }
143
144 /* set up my own style of waitqueue */
145 waiter.task = current;
146 waiter.type = RWSEM_WAITING_FOR_READ;
147 get_task_struct(current);
148
149 list_add_tail(&waiter.list, &sem->wait_list);
150
151 /* wait to be given the lock */
152 for (;;) {
153 if (!waiter.task)
154 break;
155 if (signal_pending_state(state, current))
156 goto out_nolock;
157 set_current_state(state);
158 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
159 schedule();
160 raw_spin_lock_irqsave(&sem->wait_lock, flags);
161 }
162
163 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
164 out:
165 return 0;
166
167out_nolock:
168 /*
169 * We didn't take the lock, so that there is a writer, which
170 * is owner or the first waiter of the sem. If it's a waiter,
171 * it will be woken by current owner. Not need to wake anybody.
172 */
173 list_del(&waiter.list);
174 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
175 return -EINTR;
176}
177
178void __sched __down_read(struct rw_semaphore *sem)
179{
180 __down_read_common(sem, TASK_UNINTERRUPTIBLE);
181}
182
183int __sched __down_read_killable(struct rw_semaphore *sem)
184{
185 return __down_read_common(sem, TASK_KILLABLE);
186}
187
188/*
189 * trylock for reading -- returns 1 if successful, 0 if contention
190 */
191int __down_read_trylock(struct rw_semaphore *sem)
192{
193 unsigned long flags;
194 int ret = 0;
195
196
197 raw_spin_lock_irqsave(&sem->wait_lock, flags);
198
199 if (sem->count >= 0 && list_empty(&sem->wait_list)) {
200 /* granted */
201 sem->count++;
202 ret = 1;
203 }
204
205 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
206
207 return ret;
208}
209
210/*
211 * get a write lock on the semaphore
212 */
213int __sched __down_write_common(struct rw_semaphore *sem, int state)
214{
215 struct rwsem_waiter waiter;
216 unsigned long flags;
217 int ret = 0;
218
219 raw_spin_lock_irqsave(&sem->wait_lock, flags);
220
221 /* set up my own style of waitqueue */
222 waiter.task = current;
223 waiter.type = RWSEM_WAITING_FOR_WRITE;
224 list_add_tail(&waiter.list, &sem->wait_list);
225
226 /* wait for someone to release the lock */
227 for (;;) {
228 /*
229 * That is the key to support write lock stealing: allows the
230 * task already on CPU to get the lock soon rather than put
231 * itself into sleep and waiting for system woke it or someone
232 * else in the head of the wait list up.
233 */
234 if (sem->count == 0)
235 break;
236 if (signal_pending_state(state, current))
237 goto out_nolock;
238
239 set_current_state(state);
240 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
241 schedule();
242 raw_spin_lock_irqsave(&sem->wait_lock, flags);
243 }
244 /* got the lock */
245 sem->count = -1;
246 list_del(&waiter.list);
247
248 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
249
250 return ret;
251
252out_nolock:
253 list_del(&waiter.list);
254 if (!list_empty(&sem->wait_list) && sem->count >= 0)
255 __rwsem_do_wake(sem, 0);
256 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
257
258 return -EINTR;
259}
260
261void __sched __down_write(struct rw_semaphore *sem)
262{
263 __down_write_common(sem, TASK_UNINTERRUPTIBLE);
264}
265
266int __sched __down_write_killable(struct rw_semaphore *sem)
267{
268 return __down_write_common(sem, TASK_KILLABLE);
269}
270
271/*
272 * trylock for writing -- returns 1 if successful, 0 if contention
273 */
274int __down_write_trylock(struct rw_semaphore *sem)
275{
276 unsigned long flags;
277 int ret = 0;
278
279 raw_spin_lock_irqsave(&sem->wait_lock, flags);
280
281 if (sem->count == 0) {
282 /* got the lock */
283 sem->count = -1;
284 ret = 1;
285 }
286
287 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
288
289 return ret;
290}
291
292/*
293 * release a read lock on the semaphore
294 */
295void __up_read(struct rw_semaphore *sem)
296{
297 unsigned long flags;
298
299 raw_spin_lock_irqsave(&sem->wait_lock, flags);
300
301 if (--sem->count == 0 && !list_empty(&sem->wait_list))
302 sem = __rwsem_wake_one_writer(sem);
303
304 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
305}
306
307/*
308 * release a write lock on the semaphore
309 */
310void __up_write(struct rw_semaphore *sem)
311{
312 unsigned long flags;
313
314 raw_spin_lock_irqsave(&sem->wait_lock, flags);
315
316 sem->count = 0;
317 if (!list_empty(&sem->wait_list))
318 sem = __rwsem_do_wake(sem, 1);
319
320 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
321}
322
323/*
324 * downgrade a write lock into a read lock
325 * - just wake up any readers at the front of the queue
326 */
327void __downgrade_write(struct rw_semaphore *sem)
328{
329 unsigned long flags;
330
331 raw_spin_lock_irqsave(&sem->wait_lock, flags);
332
333 sem->count = 1;
334 if (!list_empty(&sem->wait_list))
335 sem = __rwsem_do_wake(sem, 0);
336
337 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
338}
339
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c
index fbe96341beee..6b3ee9948bf1 100644
--- a/kernel/locking/rwsem-xadd.c
+++ b/kernel/locking/rwsem-xadd.c
@@ -147,6 +147,7 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem,
147 * will notice the queued writer. 147 * will notice the queued writer.
148 */ 148 */
149 wake_q_add(wake_q, waiter->task); 149 wake_q_add(wake_q, waiter->task);
150 lockevent_inc(rwsem_wake_writer);
150 } 151 }
151 152
152 return; 153 return;
@@ -176,9 +177,8 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem,
176 goto try_reader_grant; 177 goto try_reader_grant;
177 } 178 }
178 /* 179 /*
179 * It is not really necessary to set it to reader-owned here, 180 * Set it to reader-owned to give spinners an early
180 * but it gives the spinners an early indication that the 181 * indication that readers now have the lock.
181 * readers now have the lock.
182 */ 182 */
183 __rwsem_set_reader_owned(sem, waiter->task); 183 __rwsem_set_reader_owned(sem, waiter->task);
184 } 184 }
@@ -215,6 +215,7 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem,
215 } 215 }
216 216
217 adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment; 217 adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment;
218 lockevent_cond_inc(rwsem_wake_reader, woken);
218 if (list_empty(&sem->wait_list)) { 219 if (list_empty(&sem->wait_list)) {
219 /* hit end of list above */ 220 /* hit end of list above */
220 adjustment -= RWSEM_WAITING_BIAS; 221 adjustment -= RWSEM_WAITING_BIAS;
@@ -225,92 +226,6 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem,
225} 226}
226 227
227/* 228/*
228 * Wait for the read lock to be granted
229 */
230static inline struct rw_semaphore __sched *
231__rwsem_down_read_failed_common(struct rw_semaphore *sem, int state)
232{
233 long count, adjustment = -RWSEM_ACTIVE_READ_BIAS;
234 struct rwsem_waiter waiter;
235 DEFINE_WAKE_Q(wake_q);
236
237 waiter.task = current;
238 waiter.type = RWSEM_WAITING_FOR_READ;
239
240 raw_spin_lock_irq(&sem->wait_lock);
241 if (list_empty(&sem->wait_list)) {
242 /*
243 * In case the wait queue is empty and the lock isn't owned
244 * by a writer, this reader can exit the slowpath and return
245 * immediately as its RWSEM_ACTIVE_READ_BIAS has already
246 * been set in the count.
247 */
248 if (atomic_long_read(&sem->count) >= 0) {
249 raw_spin_unlock_irq(&sem->wait_lock);
250 return sem;
251 }
252 adjustment += RWSEM_WAITING_BIAS;
253 }
254 list_add_tail(&waiter.list, &sem->wait_list);
255
256 /* we're now waiting on the lock, but no longer actively locking */
257 count = atomic_long_add_return(adjustment, &sem->count);
258
259 /*
260 * If there are no active locks, wake the front queued process(es).
261 *
262 * If there are no writers and we are first in the queue,
263 * wake our own waiter to join the existing active readers !
264 */
265 if (count == RWSEM_WAITING_BIAS ||
266 (count > RWSEM_WAITING_BIAS &&
267 adjustment != -RWSEM_ACTIVE_READ_BIAS))
268 __rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
269
270 raw_spin_unlock_irq(&sem->wait_lock);
271 wake_up_q(&wake_q);
272
273 /* wait to be given the lock */
274 while (true) {
275 set_current_state(state);
276 if (!waiter.task)
277 break;
278 if (signal_pending_state(state, current)) {
279 raw_spin_lock_irq(&sem->wait_lock);
280 if (waiter.task)
281 goto out_nolock;
282 raw_spin_unlock_irq(&sem->wait_lock);
283 break;
284 }
285 schedule();
286 }
287
288 __set_current_state(TASK_RUNNING);
289 return sem;
290out_nolock:
291 list_del(&waiter.list);
292 if (list_empty(&sem->wait_list))
293 atomic_long_add(-RWSEM_WAITING_BIAS, &sem->count);
294 raw_spin_unlock_irq(&sem->wait_lock);
295 __set_current_state(TASK_RUNNING);
296 return ERR_PTR(-EINTR);
297}
298
299__visible struct rw_semaphore * __sched
300rwsem_down_read_failed(struct rw_semaphore *sem)
301{
302 return __rwsem_down_read_failed_common(sem, TASK_UNINTERRUPTIBLE);
303}
304EXPORT_SYMBOL(rwsem_down_read_failed);
305
306__visible struct rw_semaphore * __sched
307rwsem_down_read_failed_killable(struct rw_semaphore *sem)
308{
309 return __rwsem_down_read_failed_common(sem, TASK_KILLABLE);
310}
311EXPORT_SYMBOL(rwsem_down_read_failed_killable);
312
313/*
314 * This function must be called with the sem->wait_lock held to prevent 229 * This function must be called with the sem->wait_lock held to prevent
315 * race conditions between checking the rwsem wait list and setting the 230 * race conditions between checking the rwsem wait list and setting the
316 * sem->count accordingly. 231 * sem->count accordingly.
@@ -346,21 +261,17 @@ static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem)
346 */ 261 */
347static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem) 262static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem)
348{ 263{
349 long old, count = atomic_long_read(&sem->count); 264 long count = atomic_long_read(&sem->count);
350
351 while (true) {
352 if (!(count == 0 || count == RWSEM_WAITING_BIAS))
353 return false;
354 265
355 old = atomic_long_cmpxchg_acquire(&sem->count, count, 266 while (!count || count == RWSEM_WAITING_BIAS) {
356 count + RWSEM_ACTIVE_WRITE_BIAS); 267 if (atomic_long_try_cmpxchg_acquire(&sem->count, &count,
357 if (old == count) { 268 count + RWSEM_ACTIVE_WRITE_BIAS)) {
358 rwsem_set_owner(sem); 269 rwsem_set_owner(sem);
270 lockevent_inc(rwsem_opt_wlock);
359 return true; 271 return true;
360 } 272 }
361
362 count = old;
363 } 273 }
274 return false;
364} 275}
365 276
366static inline bool owner_on_cpu(struct task_struct *owner) 277static inline bool owner_on_cpu(struct task_struct *owner)
@@ -481,6 +392,7 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem)
481 osq_unlock(&sem->osq); 392 osq_unlock(&sem->osq);
482done: 393done:
483 preempt_enable(); 394 preempt_enable();
395 lockevent_cond_inc(rwsem_opt_fail, !taken);
484 return taken; 396 return taken;
485} 397}
486 398
@@ -505,6 +417,97 @@ static inline bool rwsem_has_spinner(struct rw_semaphore *sem)
505#endif 417#endif
506 418
507/* 419/*
420 * Wait for the read lock to be granted
421 */
422static inline struct rw_semaphore __sched *
423__rwsem_down_read_failed_common(struct rw_semaphore *sem, int state)
424{
425 long count, adjustment = -RWSEM_ACTIVE_READ_BIAS;
426 struct rwsem_waiter waiter;
427 DEFINE_WAKE_Q(wake_q);
428
429 waiter.task = current;
430 waiter.type = RWSEM_WAITING_FOR_READ;
431
432 raw_spin_lock_irq(&sem->wait_lock);
433 if (list_empty(&sem->wait_list)) {
434 /*
435 * In case the wait queue is empty and the lock isn't owned
436 * by a writer, this reader can exit the slowpath and return
437 * immediately as its RWSEM_ACTIVE_READ_BIAS has already
438 * been set in the count.
439 */
440 if (atomic_long_read(&sem->count) >= 0) {
441 raw_spin_unlock_irq(&sem->wait_lock);
442 rwsem_set_reader_owned(sem);
443 lockevent_inc(rwsem_rlock_fast);
444 return sem;
445 }
446 adjustment += RWSEM_WAITING_BIAS;
447 }
448 list_add_tail(&waiter.list, &sem->wait_list);
449
450 /* we're now waiting on the lock, but no longer actively locking */
451 count = atomic_long_add_return(adjustment, &sem->count);
452
453 /*
454 * If there are no active locks, wake the front queued process(es).
455 *
456 * If there are no writers and we are first in the queue,
457 * wake our own waiter to join the existing active readers !
458 */
459 if (count == RWSEM_WAITING_BIAS ||
460 (count > RWSEM_WAITING_BIAS &&
461 adjustment != -RWSEM_ACTIVE_READ_BIAS))
462 __rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
463
464 raw_spin_unlock_irq(&sem->wait_lock);
465 wake_up_q(&wake_q);
466
467 /* wait to be given the lock */
468 while (true) {
469 set_current_state(state);
470 if (!waiter.task)
471 break;
472 if (signal_pending_state(state, current)) {
473 raw_spin_lock_irq(&sem->wait_lock);
474 if (waiter.task)
475 goto out_nolock;
476 raw_spin_unlock_irq(&sem->wait_lock);
477 break;
478 }
479 schedule();
480 lockevent_inc(rwsem_sleep_reader);
481 }
482
483 __set_current_state(TASK_RUNNING);
484 lockevent_inc(rwsem_rlock);
485 return sem;
486out_nolock:
487 list_del(&waiter.list);
488 if (list_empty(&sem->wait_list))
489 atomic_long_add(-RWSEM_WAITING_BIAS, &sem->count);
490 raw_spin_unlock_irq(&sem->wait_lock);
491 __set_current_state(TASK_RUNNING);
492 lockevent_inc(rwsem_rlock_fail);
493 return ERR_PTR(-EINTR);
494}
495
496__visible struct rw_semaphore * __sched
497rwsem_down_read_failed(struct rw_semaphore *sem)
498{
499 return __rwsem_down_read_failed_common(sem, TASK_UNINTERRUPTIBLE);
500}
501EXPORT_SYMBOL(rwsem_down_read_failed);
502
503__visible struct rw_semaphore * __sched
504rwsem_down_read_failed_killable(struct rw_semaphore *sem)
505{
506 return __rwsem_down_read_failed_common(sem, TASK_KILLABLE);
507}
508EXPORT_SYMBOL(rwsem_down_read_failed_killable);
509
510/*
508 * Wait until we successfully acquire the write lock 511 * Wait until we successfully acquire the write lock
509 */ 512 */
510static inline struct rw_semaphore * 513static inline struct rw_semaphore *
@@ -580,6 +583,7 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state)
580 goto out_nolock; 583 goto out_nolock;
581 584
582 schedule(); 585 schedule();
586 lockevent_inc(rwsem_sleep_writer);
583 set_current_state(state); 587 set_current_state(state);
584 } while ((count = atomic_long_read(&sem->count)) & RWSEM_ACTIVE_MASK); 588 } while ((count = atomic_long_read(&sem->count)) & RWSEM_ACTIVE_MASK);
585 589
@@ -588,6 +592,7 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state)
588 __set_current_state(TASK_RUNNING); 592 __set_current_state(TASK_RUNNING);
589 list_del(&waiter.list); 593 list_del(&waiter.list);
590 raw_spin_unlock_irq(&sem->wait_lock); 594 raw_spin_unlock_irq(&sem->wait_lock);
595 lockevent_inc(rwsem_wlock);
591 596
592 return ret; 597 return ret;
593 598
@@ -601,6 +606,7 @@ out_nolock:
601 __rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q); 606 __rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q);
602 raw_spin_unlock_irq(&sem->wait_lock); 607 raw_spin_unlock_irq(&sem->wait_lock);
603 wake_up_q(&wake_q); 608 wake_up_q(&wake_q);
609 lockevent_inc(rwsem_wlock_fail);
604 610
605 return ERR_PTR(-EINTR); 611 return ERR_PTR(-EINTR);
606} 612}
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c
index e586f0d03ad3..ccbf18f560ff 100644
--- a/kernel/locking/rwsem.c
+++ b/kernel/locking/rwsem.c
@@ -24,7 +24,6 @@ void __sched down_read(struct rw_semaphore *sem)
24 rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_); 24 rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_);
25 25
26 LOCK_CONTENDED(sem, __down_read_trylock, __down_read); 26 LOCK_CONTENDED(sem, __down_read_trylock, __down_read);
27 rwsem_set_reader_owned(sem);
28} 27}
29 28
30EXPORT_SYMBOL(down_read); 29EXPORT_SYMBOL(down_read);
@@ -39,7 +38,6 @@ int __sched down_read_killable(struct rw_semaphore *sem)
39 return -EINTR; 38 return -EINTR;
40 } 39 }
41 40
42 rwsem_set_reader_owned(sem);
43 return 0; 41 return 0;
44} 42}
45 43
@@ -52,10 +50,8 @@ int down_read_trylock(struct rw_semaphore *sem)
52{ 50{
53 int ret = __down_read_trylock(sem); 51 int ret = __down_read_trylock(sem);
54 52
55 if (ret == 1) { 53 if (ret == 1)
56 rwsem_acquire_read(&sem->dep_map, 0, 1, _RET_IP_); 54 rwsem_acquire_read(&sem->dep_map, 0, 1, _RET_IP_);
57 rwsem_set_reader_owned(sem);
58 }
59 return ret; 55 return ret;
60} 56}
61 57
@@ -70,7 +66,6 @@ void __sched down_write(struct rw_semaphore *sem)
70 rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_); 66 rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_);
71 67
72 LOCK_CONTENDED(sem, __down_write_trylock, __down_write); 68 LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
73 rwsem_set_owner(sem);
74} 69}
75 70
76EXPORT_SYMBOL(down_write); 71EXPORT_SYMBOL(down_write);
@@ -88,7 +83,6 @@ int __sched down_write_killable(struct rw_semaphore *sem)
88 return -EINTR; 83 return -EINTR;
89 } 84 }
90 85
91 rwsem_set_owner(sem);
92 return 0; 86 return 0;
93} 87}
94 88
@@ -101,10 +95,8 @@ int down_write_trylock(struct rw_semaphore *sem)
101{ 95{
102 int ret = __down_write_trylock(sem); 96 int ret = __down_write_trylock(sem);
103 97
104 if (ret == 1) { 98 if (ret == 1)
105 rwsem_acquire(&sem->dep_map, 0, 1, _RET_IP_); 99 rwsem_acquire(&sem->dep_map, 0, 1, _RET_IP_);
106 rwsem_set_owner(sem);
107 }
108 100
109 return ret; 101 return ret;
110} 102}
@@ -117,9 +109,7 @@ EXPORT_SYMBOL(down_write_trylock);
117void up_read(struct rw_semaphore *sem) 109void up_read(struct rw_semaphore *sem)
118{ 110{
119 rwsem_release(&sem->dep_map, 1, _RET_IP_); 111 rwsem_release(&sem->dep_map, 1, _RET_IP_);
120 DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & RWSEM_READER_OWNED));
121 112
122 rwsem_clear_reader_owned(sem);
123 __up_read(sem); 113 __up_read(sem);
124} 114}
125 115
@@ -131,9 +121,7 @@ EXPORT_SYMBOL(up_read);
131void up_write(struct rw_semaphore *sem) 121void up_write(struct rw_semaphore *sem)
132{ 122{
133 rwsem_release(&sem->dep_map, 1, _RET_IP_); 123 rwsem_release(&sem->dep_map, 1, _RET_IP_);
134 DEBUG_RWSEMS_WARN_ON(sem->owner != current);
135 124
136 rwsem_clear_owner(sem);
137 __up_write(sem); 125 __up_write(sem);
138} 126}
139 127
@@ -145,9 +133,7 @@ EXPORT_SYMBOL(up_write);
145void downgrade_write(struct rw_semaphore *sem) 133void downgrade_write(struct rw_semaphore *sem)
146{ 134{
147 lock_downgrade(&sem->dep_map, _RET_IP_); 135 lock_downgrade(&sem->dep_map, _RET_IP_);
148 DEBUG_RWSEMS_WARN_ON(sem->owner != current);
149 136
150 rwsem_set_reader_owned(sem);
151 __downgrade_write(sem); 137 __downgrade_write(sem);
152} 138}
153 139
@@ -161,7 +147,6 @@ void down_read_nested(struct rw_semaphore *sem, int subclass)
161 rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_); 147 rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_);
162 148
163 LOCK_CONTENDED(sem, __down_read_trylock, __down_read); 149 LOCK_CONTENDED(sem, __down_read_trylock, __down_read);
164 rwsem_set_reader_owned(sem);
165} 150}
166 151
167EXPORT_SYMBOL(down_read_nested); 152EXPORT_SYMBOL(down_read_nested);
@@ -172,7 +157,6 @@ void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest)
172 rwsem_acquire_nest(&sem->dep_map, 0, 0, nest, _RET_IP_); 157 rwsem_acquire_nest(&sem->dep_map, 0, 0, nest, _RET_IP_);
173 158
174 LOCK_CONTENDED(sem, __down_write_trylock, __down_write); 159 LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
175 rwsem_set_owner(sem);
176} 160}
177 161
178EXPORT_SYMBOL(_down_write_nest_lock); 162EXPORT_SYMBOL(_down_write_nest_lock);
@@ -193,7 +177,6 @@ void down_write_nested(struct rw_semaphore *sem, int subclass)
193 rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_); 177 rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_);
194 178
195 LOCK_CONTENDED(sem, __down_write_trylock, __down_write); 179 LOCK_CONTENDED(sem, __down_write_trylock, __down_write);
196 rwsem_set_owner(sem);
197} 180}
198 181
199EXPORT_SYMBOL(down_write_nested); 182EXPORT_SYMBOL(down_write_nested);
@@ -208,7 +191,6 @@ int __sched down_write_killable_nested(struct rw_semaphore *sem, int subclass)
208 return -EINTR; 191 return -EINTR;
209 } 192 }
210 193
211 rwsem_set_owner(sem);
212 return 0; 194 return 0;
213} 195}
214 196
@@ -216,7 +198,8 @@ EXPORT_SYMBOL(down_write_killable_nested);
216 198
217void up_read_non_owner(struct rw_semaphore *sem) 199void up_read_non_owner(struct rw_semaphore *sem)
218{ 200{
219 DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & RWSEM_READER_OWNED)); 201 DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & RWSEM_READER_OWNED),
202 sem);
220 __up_read(sem); 203 __up_read(sem);
221} 204}
222 205
diff --git a/kernel/locking/rwsem.h b/kernel/locking/rwsem.h
index bad2bca0268b..64877f5294e3 100644
--- a/kernel/locking/rwsem.h
+++ b/kernel/locking/rwsem.h
@@ -23,15 +23,44 @@
23 * is involved. Ideally we would like to track all the readers that own 23 * is involved. Ideally we would like to track all the readers that own
24 * a rwsem, but the overhead is simply too big. 24 * a rwsem, but the overhead is simply too big.
25 */ 25 */
26#include "lock_events.h"
27
26#define RWSEM_READER_OWNED (1UL << 0) 28#define RWSEM_READER_OWNED (1UL << 0)
27#define RWSEM_ANONYMOUSLY_OWNED (1UL << 1) 29#define RWSEM_ANONYMOUSLY_OWNED (1UL << 1)
28 30
29#ifdef CONFIG_DEBUG_RWSEMS 31#ifdef CONFIG_DEBUG_RWSEMS
30# define DEBUG_RWSEMS_WARN_ON(c) DEBUG_LOCKS_WARN_ON(c) 32# define DEBUG_RWSEMS_WARN_ON(c, sem) do { \
33 if (!debug_locks_silent && \
34 WARN_ONCE(c, "DEBUG_RWSEMS_WARN_ON(%s): count = 0x%lx, owner = 0x%lx, curr 0x%lx, list %sempty\n",\
35 #c, atomic_long_read(&(sem)->count), \
36 (long)((sem)->owner), (long)current, \
37 list_empty(&(sem)->wait_list) ? "" : "not ")) \
38 debug_locks_off(); \
39 } while (0)
40#else
41# define DEBUG_RWSEMS_WARN_ON(c, sem)
42#endif
43
44/*
45 * R/W semaphores originally for PPC using the stuff in lib/rwsem.c.
46 * Adapted largely from include/asm-i386/rwsem.h
47 * by Paul Mackerras <paulus@samba.org>.
48 */
49
50/*
51 * the semaphore definition
52 */
53#ifdef CONFIG_64BIT
54# define RWSEM_ACTIVE_MASK 0xffffffffL
31#else 55#else
32# define DEBUG_RWSEMS_WARN_ON(c) 56# define RWSEM_ACTIVE_MASK 0x0000ffffL
33#endif 57#endif
34 58
59#define RWSEM_ACTIVE_BIAS 0x00000001L
60#define RWSEM_WAITING_BIAS (-RWSEM_ACTIVE_MASK-1)
61#define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS
62#define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS)
63
35#ifdef CONFIG_RWSEM_SPIN_ON_OWNER 64#ifdef CONFIG_RWSEM_SPIN_ON_OWNER
36/* 65/*
37 * All writes to owner are protected by WRITE_ONCE() to make sure that 66 * All writes to owner are protected by WRITE_ONCE() to make sure that
@@ -132,3 +161,144 @@ static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem)
132{ 161{
133} 162}
134#endif 163#endif
164
165extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem);
166extern struct rw_semaphore *rwsem_down_read_failed_killable(struct rw_semaphore *sem);
167extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem);
168extern struct rw_semaphore *rwsem_down_write_failed_killable(struct rw_semaphore *sem);
169extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem);
170extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem);
171
172/*
173 * lock for reading
174 */
175static inline void __down_read(struct rw_semaphore *sem)
176{
177 if (unlikely(atomic_long_inc_return_acquire(&sem->count) <= 0)) {
178 rwsem_down_read_failed(sem);
179 DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner &
180 RWSEM_READER_OWNED), sem);
181 } else {
182 rwsem_set_reader_owned(sem);
183 }
184}
185
186static inline int __down_read_killable(struct rw_semaphore *sem)
187{
188 if (unlikely(atomic_long_inc_return_acquire(&sem->count) <= 0)) {
189 if (IS_ERR(rwsem_down_read_failed_killable(sem)))
190 return -EINTR;
191 DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner &
192 RWSEM_READER_OWNED), sem);
193 } else {
194 rwsem_set_reader_owned(sem);
195 }
196 return 0;
197}
198
199static inline int __down_read_trylock(struct rw_semaphore *sem)
200{
201 /*
202 * Optimize for the case when the rwsem is not locked at all.
203 */
204 long tmp = RWSEM_UNLOCKED_VALUE;
205
206 lockevent_inc(rwsem_rtrylock);
207 do {
208 if (atomic_long_try_cmpxchg_acquire(&sem->count, &tmp,
209 tmp + RWSEM_ACTIVE_READ_BIAS)) {
210 rwsem_set_reader_owned(sem);
211 return 1;
212 }
213 } while (tmp >= 0);
214 return 0;
215}
216
217/*
218 * lock for writing
219 */
220static inline void __down_write(struct rw_semaphore *sem)
221{
222 long tmp;
223
224 tmp = atomic_long_add_return_acquire(RWSEM_ACTIVE_WRITE_BIAS,
225 &sem->count);
226 if (unlikely(tmp != RWSEM_ACTIVE_WRITE_BIAS))
227 rwsem_down_write_failed(sem);
228 rwsem_set_owner(sem);
229}
230
231static inline int __down_write_killable(struct rw_semaphore *sem)
232{
233 long tmp;
234
235 tmp = atomic_long_add_return_acquire(RWSEM_ACTIVE_WRITE_BIAS,
236 &sem->count);
237 if (unlikely(tmp != RWSEM_ACTIVE_WRITE_BIAS))
238 if (IS_ERR(rwsem_down_write_failed_killable(sem)))
239 return -EINTR;
240 rwsem_set_owner(sem);
241 return 0;
242}
243
244static inline int __down_write_trylock(struct rw_semaphore *sem)
245{
246 long tmp;
247
248 lockevent_inc(rwsem_wtrylock);
249 tmp = atomic_long_cmpxchg_acquire(&sem->count, RWSEM_UNLOCKED_VALUE,
250 RWSEM_ACTIVE_WRITE_BIAS);
251 if (tmp == RWSEM_UNLOCKED_VALUE) {
252 rwsem_set_owner(sem);
253 return true;
254 }
255 return false;
256}
257
258/*
259 * unlock after reading
260 */
261static inline void __up_read(struct rw_semaphore *sem)
262{
263 long tmp;
264
265 DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & RWSEM_READER_OWNED),
266 sem);
267 rwsem_clear_reader_owned(sem);
268 tmp = atomic_long_dec_return_release(&sem->count);
269 if (unlikely(tmp < -1 && (tmp & RWSEM_ACTIVE_MASK) == 0))
270 rwsem_wake(sem);
271}
272
273/*
274 * unlock after writing
275 */
276static inline void __up_write(struct rw_semaphore *sem)
277{
278 DEBUG_RWSEMS_WARN_ON(sem->owner != current, sem);
279 rwsem_clear_owner(sem);
280 if (unlikely(atomic_long_sub_return_release(RWSEM_ACTIVE_WRITE_BIAS,
281 &sem->count) < 0))
282 rwsem_wake(sem);
283}
284
285/*
286 * downgrade write lock to read lock
287 */
288static inline void __downgrade_write(struct rw_semaphore *sem)
289{
290 long tmp;
291
292 /*
293 * When downgrading from exclusive to shared ownership,
294 * anything inside the write-locked region cannot leak
295 * into the read side. In contrast, anything in the
296 * read-locked region is ok to be re-ordered into the
297 * write side. As such, rely on RELEASE semantics.
298 */
299 DEBUG_RWSEMS_WARN_ON(sem->owner != current, sem);
300 tmp = atomic_long_add_return_release(-RWSEM_WAITING_BIAS, &sem->count);
301 rwsem_set_reader_owned(sem);
302 if (tmp < 0)
303 rwsem_downgrade_wake(sem);
304}