diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-05-06 16:50:15 -0400 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-05-06 16:50:15 -0400 |
commit | 007dc78fea62610bf06829e38f1d8c69b6ea5af6 (patch) | |
tree | 683af90696ed7a237dedd48030bfd649e5822955 /kernel/locking | |
parent | 2f1835dffa949f560dfa3ed63c0bfc10944b461c (diff) | |
parent | d671002be6bdd7f77a771e23bf3e95d1f16775e6 (diff) |
Merge branch 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull locking updates from Ingo Molnar:
"Here are the locking changes in this cycle:
- rwsem unification and simpler micro-optimizations to prepare for
more intrusive (and more lucrative) scalability improvements in
v5.3 (Waiman Long)
- Lockdep irq state tracking flag usage cleanups (Frederic
Weisbecker)
- static key improvements (Jakub Kicinski, Peter Zijlstra)
- misc updates, cleanups and smaller fixes"
* 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (26 commits)
locking/lockdep: Remove unnecessary unlikely()
locking/static_key: Don't take sleeping locks in __static_key_slow_dec_deferred()
locking/static_key: Factor out the fast path of static_key_slow_dec()
locking/static_key: Add support for deferred static branches
locking/lockdep: Test all incompatible scenarios at once in check_irq_usage()
locking/lockdep: Avoid bogus Clang warning
locking/lockdep: Generate LOCKF_ bit composites
locking/lockdep: Use expanded masks on find_usage_*() functions
locking/lockdep: Map remaining magic numbers to lock usage mask names
locking/lockdep: Move valid_state() inside CONFIG_TRACE_IRQFLAGS && CONFIG_PROVE_LOCKING
locking/rwsem: Prevent unneeded warning during locking selftest
locking/rwsem: Optimize rwsem structure for uncontended lock acquisition
locking/rwsem: Enable lock event counting
locking/lock_events: Don't show pvqspinlock events on bare metal
locking/lock_events: Make lock_events available for all archs & other locks
locking/qspinlock_stat: Introduce generic lockevent_*() counting APIs
locking/rwsem: Enhance DEBUG_RWSEMS_WARN_ON() macro
locking/rwsem: Add debug check for __down_read*()
locking/rwsem: Micro-optimize rwsem_try_read_lock_unqueued()
locking/rwsem: Move rwsem internal function declarations to rwsem-xadd.h
...
Diffstat (limited to 'kernel/locking')
-rw-r--r-- | kernel/locking/Makefile | 5 | ||||
-rw-r--r-- | kernel/locking/lock_events.c | 179 | ||||
-rw-r--r-- | kernel/locking/lock_events.h | 59 | ||||
-rw-r--r-- | kernel/locking/lock_events_list.h | 67 | ||||
-rw-r--r-- | kernel/locking/lockdep.c | 267 | ||||
-rw-r--r-- | kernel/locking/lockdep_internals.h | 34 | ||||
-rw-r--r-- | kernel/locking/percpu-rwsem.c | 2 | ||||
-rw-r--r-- | kernel/locking/qspinlock.c | 8 | ||||
-rw-r--r-- | kernel/locking/qspinlock_paravirt.h | 19 | ||||
-rw-r--r-- | kernel/locking/qspinlock_stat.h | 242 | ||||
-rw-r--r-- | kernel/locking/rwsem-spinlock.c | 339 | ||||
-rw-r--r-- | kernel/locking/rwsem-xadd.c | 204 | ||||
-rw-r--r-- | kernel/locking/rwsem.c | 25 | ||||
-rw-r--r-- | kernel/locking/rwsem.h | 174 |
14 files changed, 858 insertions, 766 deletions
diff --git a/kernel/locking/Makefile b/kernel/locking/Makefile index 392c7f23af76..6fe2f333aecb 100644 --- a/kernel/locking/Makefile +++ b/kernel/locking/Makefile | |||
@@ -3,7 +3,7 @@ | |||
3 | # and is generally not a function of system call inputs. | 3 | # and is generally not a function of system call inputs. |
4 | KCOV_INSTRUMENT := n | 4 | KCOV_INSTRUMENT := n |
5 | 5 | ||
6 | obj-y += mutex.o semaphore.o rwsem.o percpu-rwsem.o | 6 | obj-y += mutex.o semaphore.o rwsem.o percpu-rwsem.o rwsem-xadd.o |
7 | 7 | ||
8 | ifdef CONFIG_FUNCTION_TRACER | 8 | ifdef CONFIG_FUNCTION_TRACER |
9 | CFLAGS_REMOVE_lockdep.o = $(CC_FLAGS_FTRACE) | 9 | CFLAGS_REMOVE_lockdep.o = $(CC_FLAGS_FTRACE) |
@@ -25,8 +25,7 @@ obj-$(CONFIG_RT_MUTEXES) += rtmutex.o | |||
25 | obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o | 25 | obj-$(CONFIG_DEBUG_RT_MUTEXES) += rtmutex-debug.o |
26 | obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o | 26 | obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock.o |
27 | obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o | 27 | obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o |
28 | obj-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o | ||
29 | obj-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem-xadd.o | ||
30 | obj-$(CONFIG_QUEUED_RWLOCKS) += qrwlock.o | 28 | obj-$(CONFIG_QUEUED_RWLOCKS) += qrwlock.o |
31 | obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o | 29 | obj-$(CONFIG_LOCK_TORTURE_TEST) += locktorture.o |
32 | obj-$(CONFIG_WW_MUTEX_SELFTEST) += test-ww_mutex.o | 30 | obj-$(CONFIG_WW_MUTEX_SELFTEST) += test-ww_mutex.o |
31 | obj-$(CONFIG_LOCK_EVENT_COUNTS) += lock_events.o | ||
diff --git a/kernel/locking/lock_events.c b/kernel/locking/lock_events.c new file mode 100644 index 000000000000..fa2c2f951c6b --- /dev/null +++ b/kernel/locking/lock_events.c | |||
@@ -0,0 +1,179 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
2 | /* | ||
3 | * This program is free software; you can redistribute it and/or modify | ||
4 | * it under the terms of the GNU General Public License as published by | ||
5 | * the Free Software Foundation; either version 2 of the License, or | ||
6 | * (at your option) any later version. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | * Authors: Waiman Long <waiman.long@hpe.com> | ||
14 | */ | ||
15 | |||
16 | /* | ||
17 | * Collect locking event counts | ||
18 | */ | ||
19 | #include <linux/debugfs.h> | ||
20 | #include <linux/sched.h> | ||
21 | #include <linux/sched/clock.h> | ||
22 | #include <linux/fs.h> | ||
23 | |||
24 | #include "lock_events.h" | ||
25 | |||
26 | #undef LOCK_EVENT | ||
27 | #define LOCK_EVENT(name) [LOCKEVENT_ ## name] = #name, | ||
28 | |||
29 | #define LOCK_EVENTS_DIR "lock_event_counts" | ||
30 | |||
31 | /* | ||
32 | * When CONFIG_LOCK_EVENT_COUNTS is enabled, event counts of different | ||
33 | * types of locks will be reported under the <debugfs>/lock_event_counts/ | ||
34 | * directory. See lock_events_list.h for the list of available locking | ||
35 | * events. | ||
36 | * | ||
37 | * Writing to the special ".reset_counts" file will reset all the above | ||
38 | * locking event counts. This is a very slow operation and so should not | ||
39 | * be done frequently. | ||
40 | * | ||
41 | * These event counts are implemented as per-cpu variables which are | ||
42 | * summed and computed whenever the corresponding debugfs files are read. This | ||
43 | * minimizes added overhead making the counts usable even in a production | ||
44 | * environment. | ||
45 | */ | ||
46 | static const char * const lockevent_names[lockevent_num + 1] = { | ||
47 | |||
48 | #include "lock_events_list.h" | ||
49 | |||
50 | [LOCKEVENT_reset_cnts] = ".reset_counts", | ||
51 | }; | ||
52 | |||
53 | /* | ||
54 | * Per-cpu counts | ||
55 | */ | ||
56 | DEFINE_PER_CPU(unsigned long, lockevents[lockevent_num]); | ||
57 | |||
58 | /* | ||
59 | * The lockevent_read() function can be overridden. | ||
60 | */ | ||
61 | ssize_t __weak lockevent_read(struct file *file, char __user *user_buf, | ||
62 | size_t count, loff_t *ppos) | ||
63 | { | ||
64 | char buf[64]; | ||
65 | int cpu, id, len; | ||
66 | u64 sum = 0; | ||
67 | |||
68 | /* | ||
69 | * Get the counter ID stored in file->f_inode->i_private | ||
70 | */ | ||
71 | id = (long)file_inode(file)->i_private; | ||
72 | |||
73 | if (id >= lockevent_num) | ||
74 | return -EBADF; | ||
75 | |||
76 | for_each_possible_cpu(cpu) | ||
77 | sum += per_cpu(lockevents[id], cpu); | ||
78 | len = snprintf(buf, sizeof(buf) - 1, "%llu\n", sum); | ||
79 | |||
80 | return simple_read_from_buffer(user_buf, count, ppos, buf, len); | ||
81 | } | ||
82 | |||
83 | /* | ||
84 | * Function to handle write request | ||
85 | * | ||
86 | * When idx = reset_cnts, reset all the counts. | ||
87 | */ | ||
88 | static ssize_t lockevent_write(struct file *file, const char __user *user_buf, | ||
89 | size_t count, loff_t *ppos) | ||
90 | { | ||
91 | int cpu; | ||
92 | |||
93 | /* | ||
94 | * Get the counter ID stored in file->f_inode->i_private | ||
95 | */ | ||
96 | if ((long)file_inode(file)->i_private != LOCKEVENT_reset_cnts) | ||
97 | return count; | ||
98 | |||
99 | for_each_possible_cpu(cpu) { | ||
100 | int i; | ||
101 | unsigned long *ptr = per_cpu_ptr(lockevents, cpu); | ||
102 | |||
103 | for (i = 0 ; i < lockevent_num; i++) | ||
104 | WRITE_ONCE(ptr[i], 0); | ||
105 | } | ||
106 | return count; | ||
107 | } | ||
108 | |||
109 | /* | ||
110 | * Debugfs data structures | ||
111 | */ | ||
112 | static const struct file_operations fops_lockevent = { | ||
113 | .read = lockevent_read, | ||
114 | .write = lockevent_write, | ||
115 | .llseek = default_llseek, | ||
116 | }; | ||
117 | |||
118 | #ifdef CONFIG_PARAVIRT_SPINLOCKS | ||
119 | #include <asm/paravirt.h> | ||
120 | |||
121 | static bool __init skip_lockevent(const char *name) | ||
122 | { | ||
123 | static int pv_on __initdata = -1; | ||
124 | |||
125 | if (pv_on < 0) | ||
126 | pv_on = !pv_is_native_spin_unlock(); | ||
127 | /* | ||
128 | * Skip PV qspinlock events on bare metal. | ||
129 | */ | ||
130 | if (!pv_on && !memcmp(name, "pv_", 3)) | ||
131 | return true; | ||
132 | return false; | ||
133 | } | ||
134 | #else | ||
135 | static inline bool skip_lockevent(const char *name) | ||
136 | { | ||
137 | return false; | ||
138 | } | ||
139 | #endif | ||
140 | |||
141 | /* | ||
142 | * Initialize debugfs for the locking event counts. | ||
143 | */ | ||
144 | static int __init init_lockevent_counts(void) | ||
145 | { | ||
146 | struct dentry *d_counts = debugfs_create_dir(LOCK_EVENTS_DIR, NULL); | ||
147 | int i; | ||
148 | |||
149 | if (!d_counts) | ||
150 | goto out; | ||
151 | |||
152 | /* | ||
153 | * Create the debugfs files | ||
154 | * | ||
155 | * As reading from and writing to the stat files can be slow, only | ||
156 | * root is allowed to do the read/write to limit impact to system | ||
157 | * performance. | ||
158 | */ | ||
159 | for (i = 0; i < lockevent_num; i++) { | ||
160 | if (skip_lockevent(lockevent_names[i])) | ||
161 | continue; | ||
162 | if (!debugfs_create_file(lockevent_names[i], 0400, d_counts, | ||
163 | (void *)(long)i, &fops_lockevent)) | ||
164 | goto fail_undo; | ||
165 | } | ||
166 | |||
167 | if (!debugfs_create_file(lockevent_names[LOCKEVENT_reset_cnts], 0200, | ||
168 | d_counts, (void *)(long)LOCKEVENT_reset_cnts, | ||
169 | &fops_lockevent)) | ||
170 | goto fail_undo; | ||
171 | |||
172 | return 0; | ||
173 | fail_undo: | ||
174 | debugfs_remove_recursive(d_counts); | ||
175 | out: | ||
176 | pr_warn("Could not create '%s' debugfs entries\n", LOCK_EVENTS_DIR); | ||
177 | return -ENOMEM; | ||
178 | } | ||
179 | fs_initcall(init_lockevent_counts); | ||
diff --git a/kernel/locking/lock_events.h b/kernel/locking/lock_events.h new file mode 100644 index 000000000000..feb1acc54611 --- /dev/null +++ b/kernel/locking/lock_events.h | |||
@@ -0,0 +1,59 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
2 | /* | ||
3 | * This program is free software; you can redistribute it and/or modify | ||
4 | * it under the terms of the GNU General Public License as published by | ||
5 | * the Free Software Foundation; either version 2 of the License, or | ||
6 | * (at your option) any later version. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | * Authors: Waiman Long <longman@redhat.com> | ||
14 | */ | ||
15 | |||
16 | #ifndef __LOCKING_LOCK_EVENTS_H | ||
17 | #define __LOCKING_LOCK_EVENTS_H | ||
18 | |||
19 | enum lock_events { | ||
20 | |||
21 | #include "lock_events_list.h" | ||
22 | |||
23 | lockevent_num, /* Total number of lock event counts */ | ||
24 | LOCKEVENT_reset_cnts = lockevent_num, | ||
25 | }; | ||
26 | |||
27 | #ifdef CONFIG_LOCK_EVENT_COUNTS | ||
28 | /* | ||
29 | * Per-cpu counters | ||
30 | */ | ||
31 | DECLARE_PER_CPU(unsigned long, lockevents[lockevent_num]); | ||
32 | |||
33 | /* | ||
34 | * Increment the PV qspinlock statistical counters | ||
35 | */ | ||
36 | static inline void __lockevent_inc(enum lock_events event, bool cond) | ||
37 | { | ||
38 | if (cond) | ||
39 | __this_cpu_inc(lockevents[event]); | ||
40 | } | ||
41 | |||
42 | #define lockevent_inc(ev) __lockevent_inc(LOCKEVENT_ ##ev, true) | ||
43 | #define lockevent_cond_inc(ev, c) __lockevent_inc(LOCKEVENT_ ##ev, c) | ||
44 | |||
45 | static inline void __lockevent_add(enum lock_events event, int inc) | ||
46 | { | ||
47 | __this_cpu_add(lockevents[event], inc); | ||
48 | } | ||
49 | |||
50 | #define lockevent_add(ev, c) __lockevent_add(LOCKEVENT_ ##ev, c) | ||
51 | |||
52 | #else /* CONFIG_LOCK_EVENT_COUNTS */ | ||
53 | |||
54 | #define lockevent_inc(ev) | ||
55 | #define lockevent_add(ev, c) | ||
56 | #define lockevent_cond_inc(ev, c) | ||
57 | |||
58 | #endif /* CONFIG_LOCK_EVENT_COUNTS */ | ||
59 | #endif /* __LOCKING_LOCK_EVENTS_H */ | ||
diff --git a/kernel/locking/lock_events_list.h b/kernel/locking/lock_events_list.h new file mode 100644 index 000000000000..ad7668cfc9da --- /dev/null +++ b/kernel/locking/lock_events_list.h | |||
@@ -0,0 +1,67 @@ | |||
1 | /* SPDX-License-Identifier: GPL-2.0 */ | ||
2 | /* | ||
3 | * This program is free software; you can redistribute it and/or modify | ||
4 | * it under the terms of the GNU General Public License as published by | ||
5 | * the Free Software Foundation; either version 2 of the License, or | ||
6 | * (at your option) any later version. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
11 | * GNU General Public License for more details. | ||
12 | * | ||
13 | * Authors: Waiman Long <longman@redhat.com> | ||
14 | */ | ||
15 | |||
16 | #ifndef LOCK_EVENT | ||
17 | #define LOCK_EVENT(name) LOCKEVENT_ ## name, | ||
18 | #endif | ||
19 | |||
20 | #ifdef CONFIG_QUEUED_SPINLOCKS | ||
21 | #ifdef CONFIG_PARAVIRT_SPINLOCKS | ||
22 | /* | ||
23 | * Locking events for PV qspinlock. | ||
24 | */ | ||
25 | LOCK_EVENT(pv_hash_hops) /* Average # of hops per hashing operation */ | ||
26 | LOCK_EVENT(pv_kick_unlock) /* # of vCPU kicks issued at unlock time */ | ||
27 | LOCK_EVENT(pv_kick_wake) /* # of vCPU kicks for pv_latency_wake */ | ||
28 | LOCK_EVENT(pv_latency_kick) /* Average latency (ns) of vCPU kick */ | ||
29 | LOCK_EVENT(pv_latency_wake) /* Average latency (ns) of kick-to-wakeup */ | ||
30 | LOCK_EVENT(pv_lock_stealing) /* # of lock stealing operations */ | ||
31 | LOCK_EVENT(pv_spurious_wakeup) /* # of spurious wakeups in non-head vCPUs */ | ||
32 | LOCK_EVENT(pv_wait_again) /* # of wait's after queue head vCPU kick */ | ||
33 | LOCK_EVENT(pv_wait_early) /* # of early vCPU wait's */ | ||
34 | LOCK_EVENT(pv_wait_head) /* # of vCPU wait's at the queue head */ | ||
35 | LOCK_EVENT(pv_wait_node) /* # of vCPU wait's at non-head queue node */ | ||
36 | #endif /* CONFIG_PARAVIRT_SPINLOCKS */ | ||
37 | |||
38 | /* | ||
39 | * Locking events for qspinlock | ||
40 | * | ||
41 | * Subtracting lock_use_node[234] from lock_slowpath will give you | ||
42 | * lock_use_node1. | ||
43 | */ | ||
44 | LOCK_EVENT(lock_pending) /* # of locking ops via pending code */ | ||
45 | LOCK_EVENT(lock_slowpath) /* # of locking ops via MCS lock queue */ | ||
46 | LOCK_EVENT(lock_use_node2) /* # of locking ops that use 2nd percpu node */ | ||
47 | LOCK_EVENT(lock_use_node3) /* # of locking ops that use 3rd percpu node */ | ||
48 | LOCK_EVENT(lock_use_node4) /* # of locking ops that use 4th percpu node */ | ||
49 | LOCK_EVENT(lock_no_node) /* # of locking ops w/o using percpu node */ | ||
50 | #endif /* CONFIG_QUEUED_SPINLOCKS */ | ||
51 | |||
52 | /* | ||
53 | * Locking events for rwsem | ||
54 | */ | ||
55 | LOCK_EVENT(rwsem_sleep_reader) /* # of reader sleeps */ | ||
56 | LOCK_EVENT(rwsem_sleep_writer) /* # of writer sleeps */ | ||
57 | LOCK_EVENT(rwsem_wake_reader) /* # of reader wakeups */ | ||
58 | LOCK_EVENT(rwsem_wake_writer) /* # of writer wakeups */ | ||
59 | LOCK_EVENT(rwsem_opt_wlock) /* # of write locks opt-spin acquired */ | ||
60 | LOCK_EVENT(rwsem_opt_fail) /* # of failed opt-spinnings */ | ||
61 | LOCK_EVENT(rwsem_rlock) /* # of read locks acquired */ | ||
62 | LOCK_EVENT(rwsem_rlock_fast) /* # of fast read locks acquired */ | ||
63 | LOCK_EVENT(rwsem_rlock_fail) /* # of failed read lock acquisitions */ | ||
64 | LOCK_EVENT(rwsem_rtrylock) /* # of read trylock calls */ | ||
65 | LOCK_EVENT(rwsem_wlock) /* # of write locks acquired */ | ||
66 | LOCK_EVENT(rwsem_wlock_fail) /* # of failed write lock acquisitions */ | ||
67 | LOCK_EVENT(rwsem_wtrylock) /* # of write trylock calls */ | ||
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 91c6b89f04df..27b992fe8cec 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c | |||
@@ -501,11 +501,11 @@ static char get_usage_char(struct lock_class *class, enum lock_usage_bit bit) | |||
501 | { | 501 | { |
502 | char c = '.'; | 502 | char c = '.'; |
503 | 503 | ||
504 | if (class->usage_mask & lock_flag(bit + 2)) | 504 | if (class->usage_mask & lock_flag(bit + LOCK_USAGE_DIR_MASK)) |
505 | c = '+'; | 505 | c = '+'; |
506 | if (class->usage_mask & lock_flag(bit)) { | 506 | if (class->usage_mask & lock_flag(bit)) { |
507 | c = '-'; | 507 | c = '-'; |
508 | if (class->usage_mask & lock_flag(bit + 2)) | 508 | if (class->usage_mask & lock_flag(bit + LOCK_USAGE_DIR_MASK)) |
509 | c = '?'; | 509 | c = '?'; |
510 | } | 510 | } |
511 | 511 | ||
@@ -1666,19 +1666,25 @@ check_redundant(struct lock_list *root, struct lock_class *target, | |||
1666 | } | 1666 | } |
1667 | 1667 | ||
1668 | #if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) | 1668 | #if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) |
1669 | |||
1670 | static inline int usage_accumulate(struct lock_list *entry, void *mask) | ||
1671 | { | ||
1672 | *(unsigned long *)mask |= entry->class->usage_mask; | ||
1673 | |||
1674 | return 0; | ||
1675 | } | ||
1676 | |||
1669 | /* | 1677 | /* |
1670 | * Forwards and backwards subgraph searching, for the purposes of | 1678 | * Forwards and backwards subgraph searching, for the purposes of |
1671 | * proving that two subgraphs can be connected by a new dependency | 1679 | * proving that two subgraphs can be connected by a new dependency |
1672 | * without creating any illegal irq-safe -> irq-unsafe lock dependency. | 1680 | * without creating any illegal irq-safe -> irq-unsafe lock dependency. |
1673 | */ | 1681 | */ |
1674 | 1682 | ||
1675 | static inline int usage_match(struct lock_list *entry, void *bit) | 1683 | static inline int usage_match(struct lock_list *entry, void *mask) |
1676 | { | 1684 | { |
1677 | return entry->class->usage_mask & (1 << (enum lock_usage_bit)bit); | 1685 | return entry->class->usage_mask & *(unsigned long *)mask; |
1678 | } | 1686 | } |
1679 | 1687 | ||
1680 | |||
1681 | |||
1682 | /* | 1688 | /* |
1683 | * Find a node in the forwards-direction dependency sub-graph starting | 1689 | * Find a node in the forwards-direction dependency sub-graph starting |
1684 | * at @root->class that matches @bit. | 1690 | * at @root->class that matches @bit. |
@@ -1690,14 +1696,14 @@ static inline int usage_match(struct lock_list *entry, void *bit) | |||
1690 | * Return <0 on error. | 1696 | * Return <0 on error. |
1691 | */ | 1697 | */ |
1692 | static int | 1698 | static int |
1693 | find_usage_forwards(struct lock_list *root, enum lock_usage_bit bit, | 1699 | find_usage_forwards(struct lock_list *root, unsigned long usage_mask, |
1694 | struct lock_list **target_entry) | 1700 | struct lock_list **target_entry) |
1695 | { | 1701 | { |
1696 | int result; | 1702 | int result; |
1697 | 1703 | ||
1698 | debug_atomic_inc(nr_find_usage_forwards_checks); | 1704 | debug_atomic_inc(nr_find_usage_forwards_checks); |
1699 | 1705 | ||
1700 | result = __bfs_forwards(root, (void *)bit, usage_match, target_entry); | 1706 | result = __bfs_forwards(root, &usage_mask, usage_match, target_entry); |
1701 | 1707 | ||
1702 | return result; | 1708 | return result; |
1703 | } | 1709 | } |
@@ -1713,14 +1719,14 @@ find_usage_forwards(struct lock_list *root, enum lock_usage_bit bit, | |||
1713 | * Return <0 on error. | 1719 | * Return <0 on error. |
1714 | */ | 1720 | */ |
1715 | static int | 1721 | static int |
1716 | find_usage_backwards(struct lock_list *root, enum lock_usage_bit bit, | 1722 | find_usage_backwards(struct lock_list *root, unsigned long usage_mask, |
1717 | struct lock_list **target_entry) | 1723 | struct lock_list **target_entry) |
1718 | { | 1724 | { |
1719 | int result; | 1725 | int result; |
1720 | 1726 | ||
1721 | debug_atomic_inc(nr_find_usage_backwards_checks); | 1727 | debug_atomic_inc(nr_find_usage_backwards_checks); |
1722 | 1728 | ||
1723 | result = __bfs_backwards(root, (void *)bit, usage_match, target_entry); | 1729 | result = __bfs_backwards(root, &usage_mask, usage_match, target_entry); |
1724 | 1730 | ||
1725 | return result; | 1731 | return result; |
1726 | } | 1732 | } |
@@ -1912,39 +1918,6 @@ print_bad_irq_dependency(struct task_struct *curr, | |||
1912 | return 0; | 1918 | return 0; |
1913 | } | 1919 | } |
1914 | 1920 | ||
1915 | static int | ||
1916 | check_usage(struct task_struct *curr, struct held_lock *prev, | ||
1917 | struct held_lock *next, enum lock_usage_bit bit_backwards, | ||
1918 | enum lock_usage_bit bit_forwards, const char *irqclass) | ||
1919 | { | ||
1920 | int ret; | ||
1921 | struct lock_list this, that; | ||
1922 | struct lock_list *uninitialized_var(target_entry); | ||
1923 | struct lock_list *uninitialized_var(target_entry1); | ||
1924 | |||
1925 | this.parent = NULL; | ||
1926 | |||
1927 | this.class = hlock_class(prev); | ||
1928 | ret = find_usage_backwards(&this, bit_backwards, &target_entry); | ||
1929 | if (ret < 0) | ||
1930 | return print_bfs_bug(ret); | ||
1931 | if (ret == 1) | ||
1932 | return ret; | ||
1933 | |||
1934 | that.parent = NULL; | ||
1935 | that.class = hlock_class(next); | ||
1936 | ret = find_usage_forwards(&that, bit_forwards, &target_entry1); | ||
1937 | if (ret < 0) | ||
1938 | return print_bfs_bug(ret); | ||
1939 | if (ret == 1) | ||
1940 | return ret; | ||
1941 | |||
1942 | return print_bad_irq_dependency(curr, &this, &that, | ||
1943 | target_entry, target_entry1, | ||
1944 | prev, next, | ||
1945 | bit_backwards, bit_forwards, irqclass); | ||
1946 | } | ||
1947 | |||
1948 | static const char *state_names[] = { | 1921 | static const char *state_names[] = { |
1949 | #define LOCKDEP_STATE(__STATE) \ | 1922 | #define LOCKDEP_STATE(__STATE) \ |
1950 | __stringify(__STATE), | 1923 | __stringify(__STATE), |
@@ -1961,9 +1934,19 @@ static const char *state_rnames[] = { | |||
1961 | 1934 | ||
1962 | static inline const char *state_name(enum lock_usage_bit bit) | 1935 | static inline const char *state_name(enum lock_usage_bit bit) |
1963 | { | 1936 | { |
1964 | return (bit & LOCK_USAGE_READ_MASK) ? state_rnames[bit >> 2] : state_names[bit >> 2]; | 1937 | if (bit & LOCK_USAGE_READ_MASK) |
1938 | return state_rnames[bit >> LOCK_USAGE_DIR_MASK]; | ||
1939 | else | ||
1940 | return state_names[bit >> LOCK_USAGE_DIR_MASK]; | ||
1965 | } | 1941 | } |
1966 | 1942 | ||
1943 | /* | ||
1944 | * The bit number is encoded like: | ||
1945 | * | ||
1946 | * bit0: 0 exclusive, 1 read lock | ||
1947 | * bit1: 0 used in irq, 1 irq enabled | ||
1948 | * bit2-n: state | ||
1949 | */ | ||
1967 | static int exclusive_bit(int new_bit) | 1950 | static int exclusive_bit(int new_bit) |
1968 | { | 1951 | { |
1969 | int state = new_bit & LOCK_USAGE_STATE_MASK; | 1952 | int state = new_bit & LOCK_USAGE_STATE_MASK; |
@@ -1975,45 +1958,160 @@ static int exclusive_bit(int new_bit) | |||
1975 | return state | (dir ^ LOCK_USAGE_DIR_MASK); | 1958 | return state | (dir ^ LOCK_USAGE_DIR_MASK); |
1976 | } | 1959 | } |
1977 | 1960 | ||
1961 | /* | ||
1962 | * Observe that when given a bitmask where each bitnr is encoded as above, a | ||
1963 | * right shift of the mask transforms the individual bitnrs as -1 and | ||
1964 | * conversely, a left shift transforms into +1 for the individual bitnrs. | ||
1965 | * | ||
1966 | * So for all bits whose number have LOCK_ENABLED_* set (bitnr1 == 1), we can | ||
1967 | * create the mask with those bit numbers using LOCK_USED_IN_* (bitnr1 == 0) | ||
1968 | * instead by subtracting the bit number by 2, or shifting the mask right by 2. | ||
1969 | * | ||
1970 | * Similarly, bitnr1 == 0 becomes bitnr1 == 1 by adding 2, or shifting left 2. | ||
1971 | * | ||
1972 | * So split the mask (note that LOCKF_ENABLED_IRQ_ALL|LOCKF_USED_IN_IRQ_ALL is | ||
1973 | * all bits set) and recompose with bitnr1 flipped. | ||
1974 | */ | ||
1975 | static unsigned long invert_dir_mask(unsigned long mask) | ||
1976 | { | ||
1977 | unsigned long excl = 0; | ||
1978 | |||
1979 | /* Invert dir */ | ||
1980 | excl |= (mask & LOCKF_ENABLED_IRQ_ALL) >> LOCK_USAGE_DIR_MASK; | ||
1981 | excl |= (mask & LOCKF_USED_IN_IRQ_ALL) << LOCK_USAGE_DIR_MASK; | ||
1982 | |||
1983 | return excl; | ||
1984 | } | ||
1985 | |||
1986 | /* | ||
1987 | * As above, we clear bitnr0 (LOCK_*_READ off) with bitmask ops. First, for all | ||
1988 | * bits with bitnr0 set (LOCK_*_READ), add those with bitnr0 cleared (LOCK_*). | ||
1989 | * And then mask out all bitnr0. | ||
1990 | */ | ||
1991 | static unsigned long exclusive_mask(unsigned long mask) | ||
1992 | { | ||
1993 | unsigned long excl = invert_dir_mask(mask); | ||
1994 | |||
1995 | /* Strip read */ | ||
1996 | excl |= (excl & LOCKF_IRQ_READ) >> LOCK_USAGE_READ_MASK; | ||
1997 | excl &= ~LOCKF_IRQ_READ; | ||
1998 | |||
1999 | return excl; | ||
2000 | } | ||
2001 | |||
2002 | /* | ||
2003 | * Retrieve the _possible_ original mask to which @mask is | ||
2004 | * exclusive. Ie: this is the opposite of exclusive_mask(). | ||
2005 | * Note that 2 possible original bits can match an exclusive | ||
2006 | * bit: one has LOCK_USAGE_READ_MASK set, the other has it | ||
2007 | * cleared. So both are returned for each exclusive bit. | ||
2008 | */ | ||
2009 | static unsigned long original_mask(unsigned long mask) | ||
2010 | { | ||
2011 | unsigned long excl = invert_dir_mask(mask); | ||
2012 | |||
2013 | /* Include read in existing usages */ | ||
2014 | excl |= (excl & LOCKF_IRQ) << LOCK_USAGE_READ_MASK; | ||
2015 | |||
2016 | return excl; | ||
2017 | } | ||
2018 | |||
2019 | /* | ||
2020 | * Find the first pair of bit match between an original | ||
2021 | * usage mask and an exclusive usage mask. | ||
2022 | */ | ||
2023 | static int find_exclusive_match(unsigned long mask, | ||
2024 | unsigned long excl_mask, | ||
2025 | enum lock_usage_bit *bitp, | ||
2026 | enum lock_usage_bit *excl_bitp) | ||
2027 | { | ||
2028 | int bit, excl; | ||
2029 | |||
2030 | for_each_set_bit(bit, &mask, LOCK_USED) { | ||
2031 | excl = exclusive_bit(bit); | ||
2032 | if (excl_mask & lock_flag(excl)) { | ||
2033 | *bitp = bit; | ||
2034 | *excl_bitp = excl; | ||
2035 | return 0; | ||
2036 | } | ||
2037 | } | ||
2038 | return -1; | ||
2039 | } | ||
2040 | |||
2041 | /* | ||
2042 | * Prove that the new dependency does not connect a hardirq-safe(-read) | ||
2043 | * lock with a hardirq-unsafe lock - to achieve this we search | ||
2044 | * the backwards-subgraph starting at <prev>, and the | ||
2045 | * forwards-subgraph starting at <next>: | ||
2046 | */ | ||
1978 | static int check_irq_usage(struct task_struct *curr, struct held_lock *prev, | 2047 | static int check_irq_usage(struct task_struct *curr, struct held_lock *prev, |
1979 | struct held_lock *next, enum lock_usage_bit bit) | 2048 | struct held_lock *next) |
1980 | { | 2049 | { |
2050 | unsigned long usage_mask = 0, forward_mask, backward_mask; | ||
2051 | enum lock_usage_bit forward_bit = 0, backward_bit = 0; | ||
2052 | struct lock_list *uninitialized_var(target_entry1); | ||
2053 | struct lock_list *uninitialized_var(target_entry); | ||
2054 | struct lock_list this, that; | ||
2055 | int ret; | ||
2056 | |||
1981 | /* | 2057 | /* |
1982 | * Prove that the new dependency does not connect a hardirq-safe | 2058 | * Step 1: gather all hard/soft IRQs usages backward in an |
1983 | * lock with a hardirq-unsafe lock - to achieve this we search | 2059 | * accumulated usage mask. |
1984 | * the backwards-subgraph starting at <prev>, and the | ||
1985 | * forwards-subgraph starting at <next>: | ||
1986 | */ | 2060 | */ |
1987 | if (!check_usage(curr, prev, next, bit, | 2061 | this.parent = NULL; |
1988 | exclusive_bit(bit), state_name(bit))) | 2062 | this.class = hlock_class(prev); |
1989 | return 0; | 2063 | |
2064 | ret = __bfs_backwards(&this, &usage_mask, usage_accumulate, NULL); | ||
2065 | if (ret < 0) | ||
2066 | return print_bfs_bug(ret); | ||
1990 | 2067 | ||
1991 | bit++; /* _READ */ | 2068 | usage_mask &= LOCKF_USED_IN_IRQ_ALL; |
2069 | if (!usage_mask) | ||
2070 | return 1; | ||
1992 | 2071 | ||
1993 | /* | 2072 | /* |
1994 | * Prove that the new dependency does not connect a hardirq-safe-read | 2073 | * Step 2: find exclusive uses forward that match the previous |
1995 | * lock with a hardirq-unsafe lock - to achieve this we search | 2074 | * backward accumulated mask. |
1996 | * the backwards-subgraph starting at <prev>, and the | ||
1997 | * forwards-subgraph starting at <next>: | ||
1998 | */ | 2075 | */ |
1999 | if (!check_usage(curr, prev, next, bit, | 2076 | forward_mask = exclusive_mask(usage_mask); |
2000 | exclusive_bit(bit), state_name(bit))) | ||
2001 | return 0; | ||
2002 | 2077 | ||
2003 | return 1; | 2078 | that.parent = NULL; |
2004 | } | 2079 | that.class = hlock_class(next); |
2005 | 2080 | ||
2006 | static int | 2081 | ret = find_usage_forwards(&that, forward_mask, &target_entry1); |
2007 | check_prev_add_irq(struct task_struct *curr, struct held_lock *prev, | 2082 | if (ret < 0) |
2008 | struct held_lock *next) | 2083 | return print_bfs_bug(ret); |
2009 | { | 2084 | if (ret == 1) |
2010 | #define LOCKDEP_STATE(__STATE) \ | 2085 | return ret; |
2011 | if (!check_irq_usage(curr, prev, next, LOCK_USED_IN_##__STATE)) \ | ||
2012 | return 0; | ||
2013 | #include "lockdep_states.h" | ||
2014 | #undef LOCKDEP_STATE | ||
2015 | 2086 | ||
2016 | return 1; | 2087 | /* |
2088 | * Step 3: we found a bad match! Now retrieve a lock from the backward | ||
2089 | * list whose usage mask matches the exclusive usage mask from the | ||
2090 | * lock found on the forward list. | ||
2091 | */ | ||
2092 | backward_mask = original_mask(target_entry1->class->usage_mask); | ||
2093 | |||
2094 | ret = find_usage_backwards(&this, backward_mask, &target_entry); | ||
2095 | if (ret < 0) | ||
2096 | return print_bfs_bug(ret); | ||
2097 | if (DEBUG_LOCKS_WARN_ON(ret == 1)) | ||
2098 | return 1; | ||
2099 | |||
2100 | /* | ||
2101 | * Step 4: narrow down to a pair of incompatible usage bits | ||
2102 | * and report it. | ||
2103 | */ | ||
2104 | ret = find_exclusive_match(target_entry->class->usage_mask, | ||
2105 | target_entry1->class->usage_mask, | ||
2106 | &backward_bit, &forward_bit); | ||
2107 | if (DEBUG_LOCKS_WARN_ON(ret == -1)) | ||
2108 | return 1; | ||
2109 | |||
2110 | return print_bad_irq_dependency(curr, &this, &that, | ||
2111 | target_entry, target_entry1, | ||
2112 | prev, next, | ||
2113 | backward_bit, forward_bit, | ||
2114 | state_name(backward_bit)); | ||
2017 | } | 2115 | } |
2018 | 2116 | ||
2019 | static void inc_chains(void) | 2117 | static void inc_chains(void) |
@@ -2030,9 +2128,8 @@ static void inc_chains(void) | |||
2030 | 2128 | ||
2031 | #else | 2129 | #else |
2032 | 2130 | ||
2033 | static inline int | 2131 | static inline int check_irq_usage(struct task_struct *curr, |
2034 | check_prev_add_irq(struct task_struct *curr, struct held_lock *prev, | 2132 | struct held_lock *prev, struct held_lock *next) |
2035 | struct held_lock *next) | ||
2036 | { | 2133 | { |
2037 | return 1; | 2134 | return 1; |
2038 | } | 2135 | } |
@@ -2211,7 +2308,7 @@ check_prev_add(struct task_struct *curr, struct held_lock *prev, | |||
2211 | else if (unlikely(ret < 0)) | 2308 | else if (unlikely(ret < 0)) |
2212 | return print_bfs_bug(ret); | 2309 | return print_bfs_bug(ret); |
2213 | 2310 | ||
2214 | if (!check_prev_add_irq(curr, prev, next)) | 2311 | if (!check_irq_usage(curr, prev, next)) |
2215 | return 0; | 2312 | return 0; |
2216 | 2313 | ||
2217 | /* | 2314 | /* |
@@ -2773,6 +2870,12 @@ static void check_chain_key(struct task_struct *curr) | |||
2773 | #endif | 2870 | #endif |
2774 | } | 2871 | } |
2775 | 2872 | ||
2873 | static int mark_lock(struct task_struct *curr, struct held_lock *this, | ||
2874 | enum lock_usage_bit new_bit); | ||
2875 | |||
2876 | #if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) | ||
2877 | |||
2878 | |||
2776 | static void | 2879 | static void |
2777 | print_usage_bug_scenario(struct held_lock *lock) | 2880 | print_usage_bug_scenario(struct held_lock *lock) |
2778 | { | 2881 | { |
@@ -2842,10 +2945,6 @@ valid_state(struct task_struct *curr, struct held_lock *this, | |||
2842 | return 1; | 2945 | return 1; |
2843 | } | 2946 | } |
2844 | 2947 | ||
2845 | static int mark_lock(struct task_struct *curr, struct held_lock *this, | ||
2846 | enum lock_usage_bit new_bit); | ||
2847 | |||
2848 | #if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING) | ||
2849 | 2948 | ||
2850 | /* | 2949 | /* |
2851 | * print irq inversion bug: | 2950 | * print irq inversion bug: |
@@ -2925,7 +3024,7 @@ check_usage_forwards(struct task_struct *curr, struct held_lock *this, | |||
2925 | 3024 | ||
2926 | root.parent = NULL; | 3025 | root.parent = NULL; |
2927 | root.class = hlock_class(this); | 3026 | root.class = hlock_class(this); |
2928 | ret = find_usage_forwards(&root, bit, &target_entry); | 3027 | ret = find_usage_forwards(&root, lock_flag(bit), &target_entry); |
2929 | if (ret < 0) | 3028 | if (ret < 0) |
2930 | return print_bfs_bug(ret); | 3029 | return print_bfs_bug(ret); |
2931 | if (ret == 1) | 3030 | if (ret == 1) |
@@ -2949,7 +3048,7 @@ check_usage_backwards(struct task_struct *curr, struct held_lock *this, | |||
2949 | 3048 | ||
2950 | root.parent = NULL; | 3049 | root.parent = NULL; |
2951 | root.class = hlock_class(this); | 3050 | root.class = hlock_class(this); |
2952 | ret = find_usage_backwards(&root, bit, &target_entry); | 3051 | ret = find_usage_backwards(&root, lock_flag(bit), &target_entry); |
2953 | if (ret < 0) | 3052 | if (ret < 0) |
2954 | return print_bfs_bug(ret); | 3053 | return print_bfs_bug(ret); |
2955 | if (ret == 1) | 3054 | if (ret == 1) |
@@ -3004,7 +3103,7 @@ static int (*state_verbose_f[])(struct lock_class *class) = { | |||
3004 | static inline int state_verbose(enum lock_usage_bit bit, | 3103 | static inline int state_verbose(enum lock_usage_bit bit, |
3005 | struct lock_class *class) | 3104 | struct lock_class *class) |
3006 | { | 3105 | { |
3007 | return state_verbose_f[bit >> 2](class); | 3106 | return state_verbose_f[bit >> LOCK_USAGE_DIR_MASK](class); |
3008 | } | 3107 | } |
3009 | 3108 | ||
3010 | typedef int (*check_usage_f)(struct task_struct *, struct held_lock *, | 3109 | typedef int (*check_usage_f)(struct task_struct *, struct held_lock *, |
@@ -3146,7 +3245,7 @@ void lockdep_hardirqs_on(unsigned long ip) | |||
3146 | /* | 3245 | /* |
3147 | * See the fine text that goes along with this variable definition. | 3246 | * See the fine text that goes along with this variable definition. |
3148 | */ | 3247 | */ |
3149 | if (DEBUG_LOCKS_WARN_ON(unlikely(early_boot_irqs_disabled))) | 3248 | if (DEBUG_LOCKS_WARN_ON(early_boot_irqs_disabled)) |
3150 | return; | 3249 | return; |
3151 | 3250 | ||
3152 | /* | 3251 | /* |
diff --git a/kernel/locking/lockdep_internals.h b/kernel/locking/lockdep_internals.h index d4c197425f68..150ec3f0c5b5 100644 --- a/kernel/locking/lockdep_internals.h +++ b/kernel/locking/lockdep_internals.h | |||
@@ -42,13 +42,35 @@ enum { | |||
42 | __LOCKF(USED) | 42 | __LOCKF(USED) |
43 | }; | 43 | }; |
44 | 44 | ||
45 | #define LOCKF_ENABLED_IRQ (LOCKF_ENABLED_HARDIRQ | LOCKF_ENABLED_SOFTIRQ) | 45 | #define LOCKDEP_STATE(__STATE) LOCKF_ENABLED_##__STATE | |
46 | #define LOCKF_USED_IN_IRQ (LOCKF_USED_IN_HARDIRQ | LOCKF_USED_IN_SOFTIRQ) | 46 | static const unsigned long LOCKF_ENABLED_IRQ = |
47 | #include "lockdep_states.h" | ||
48 | 0; | ||
49 | #undef LOCKDEP_STATE | ||
50 | |||
51 | #define LOCKDEP_STATE(__STATE) LOCKF_USED_IN_##__STATE | | ||
52 | static const unsigned long LOCKF_USED_IN_IRQ = | ||
53 | #include "lockdep_states.h" | ||
54 | 0; | ||
55 | #undef LOCKDEP_STATE | ||
56 | |||
57 | #define LOCKDEP_STATE(__STATE) LOCKF_ENABLED_##__STATE##_READ | | ||
58 | static const unsigned long LOCKF_ENABLED_IRQ_READ = | ||
59 | #include "lockdep_states.h" | ||
60 | 0; | ||
61 | #undef LOCKDEP_STATE | ||
62 | |||
63 | #define LOCKDEP_STATE(__STATE) LOCKF_USED_IN_##__STATE##_READ | | ||
64 | static const unsigned long LOCKF_USED_IN_IRQ_READ = | ||
65 | #include "lockdep_states.h" | ||
66 | 0; | ||
67 | #undef LOCKDEP_STATE | ||
68 | |||
69 | #define LOCKF_ENABLED_IRQ_ALL (LOCKF_ENABLED_IRQ | LOCKF_ENABLED_IRQ_READ) | ||
70 | #define LOCKF_USED_IN_IRQ_ALL (LOCKF_USED_IN_IRQ | LOCKF_USED_IN_IRQ_READ) | ||
47 | 71 | ||
48 | #define LOCKF_ENABLED_IRQ_READ \ | 72 | #define LOCKF_IRQ (LOCKF_ENABLED_IRQ | LOCKF_USED_IN_IRQ) |
49 | (LOCKF_ENABLED_HARDIRQ_READ | LOCKF_ENABLED_SOFTIRQ_READ) | 73 | #define LOCKF_IRQ_READ (LOCKF_ENABLED_IRQ_READ | LOCKF_USED_IN_IRQ_READ) |
50 | #define LOCKF_USED_IN_IRQ_READ \ | ||
51 | (LOCKF_USED_IN_HARDIRQ_READ | LOCKF_USED_IN_SOFTIRQ_READ) | ||
52 | 74 | ||
53 | /* | 75 | /* |
54 | * CONFIG_LOCKDEP_SMALL is defined for sparc. Sparc requires .text, | 76 | * CONFIG_LOCKDEP_SMALL is defined for sparc. Sparc requires .text, |
diff --git a/kernel/locking/percpu-rwsem.c b/kernel/locking/percpu-rwsem.c index 883cf1b92d90..f17dad99eec8 100644 --- a/kernel/locking/percpu-rwsem.c +++ b/kernel/locking/percpu-rwsem.c | |||
@@ -7,6 +7,8 @@ | |||
7 | #include <linux/sched.h> | 7 | #include <linux/sched.h> |
8 | #include <linux/errno.h> | 8 | #include <linux/errno.h> |
9 | 9 | ||
10 | #include "rwsem.h" | ||
11 | |||
10 | int __percpu_init_rwsem(struct percpu_rw_semaphore *sem, | 12 | int __percpu_init_rwsem(struct percpu_rw_semaphore *sem, |
11 | const char *name, struct lock_class_key *rwsem_key) | 13 | const char *name, struct lock_class_key *rwsem_key) |
12 | { | 14 | { |
diff --git a/kernel/locking/qspinlock.c b/kernel/locking/qspinlock.c index 5e9247dc2515..e14b32c69639 100644 --- a/kernel/locking/qspinlock.c +++ b/kernel/locking/qspinlock.c | |||
@@ -395,7 +395,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) | |||
395 | * 0,1,0 -> 0,0,1 | 395 | * 0,1,0 -> 0,0,1 |
396 | */ | 396 | */ |
397 | clear_pending_set_locked(lock); | 397 | clear_pending_set_locked(lock); |
398 | qstat_inc(qstat_lock_pending, true); | 398 | lockevent_inc(lock_pending); |
399 | return; | 399 | return; |
400 | 400 | ||
401 | /* | 401 | /* |
@@ -403,7 +403,7 @@ void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val) | |||
403 | * queuing. | 403 | * queuing. |
404 | */ | 404 | */ |
405 | queue: | 405 | queue: |
406 | qstat_inc(qstat_lock_slowpath, true); | 406 | lockevent_inc(lock_slowpath); |
407 | pv_queue: | 407 | pv_queue: |
408 | node = this_cpu_ptr(&qnodes[0].mcs); | 408 | node = this_cpu_ptr(&qnodes[0].mcs); |
409 | idx = node->count++; | 409 | idx = node->count++; |
@@ -419,7 +419,7 @@ pv_queue: | |||
419 | * simple enough. | 419 | * simple enough. |
420 | */ | 420 | */ |
421 | if (unlikely(idx >= MAX_NODES)) { | 421 | if (unlikely(idx >= MAX_NODES)) { |
422 | qstat_inc(qstat_lock_no_node, true); | 422 | lockevent_inc(lock_no_node); |
423 | while (!queued_spin_trylock(lock)) | 423 | while (!queued_spin_trylock(lock)) |
424 | cpu_relax(); | 424 | cpu_relax(); |
425 | goto release; | 425 | goto release; |
@@ -430,7 +430,7 @@ pv_queue: | |||
430 | /* | 430 | /* |
431 | * Keep counts of non-zero index values: | 431 | * Keep counts of non-zero index values: |
432 | */ | 432 | */ |
433 | qstat_inc(qstat_lock_use_node2 + idx - 1, idx); | 433 | lockevent_cond_inc(lock_use_node2 + idx - 1, idx); |
434 | 434 | ||
435 | /* | 435 | /* |
436 | * Ensure that we increment the head node->count before initialising | 436 | * Ensure that we increment the head node->count before initialising |
diff --git a/kernel/locking/qspinlock_paravirt.h b/kernel/locking/qspinlock_paravirt.h index 8f36c27c1794..89bab079e7a4 100644 --- a/kernel/locking/qspinlock_paravirt.h +++ b/kernel/locking/qspinlock_paravirt.h | |||
@@ -89,7 +89,7 @@ static inline bool pv_hybrid_queued_unfair_trylock(struct qspinlock *lock) | |||
89 | 89 | ||
90 | if (!(val & _Q_LOCKED_PENDING_MASK) && | 90 | if (!(val & _Q_LOCKED_PENDING_MASK) && |
91 | (cmpxchg_acquire(&lock->locked, 0, _Q_LOCKED_VAL) == 0)) { | 91 | (cmpxchg_acquire(&lock->locked, 0, _Q_LOCKED_VAL) == 0)) { |
92 | qstat_inc(qstat_pv_lock_stealing, true); | 92 | lockevent_inc(pv_lock_stealing); |
93 | return true; | 93 | return true; |
94 | } | 94 | } |
95 | if (!(val & _Q_TAIL_MASK) || (val & _Q_PENDING_MASK)) | 95 | if (!(val & _Q_TAIL_MASK) || (val & _Q_PENDING_MASK)) |
@@ -219,7 +219,7 @@ static struct qspinlock **pv_hash(struct qspinlock *lock, struct pv_node *node) | |||
219 | hopcnt++; | 219 | hopcnt++; |
220 | if (!cmpxchg(&he->lock, NULL, lock)) { | 220 | if (!cmpxchg(&he->lock, NULL, lock)) { |
221 | WRITE_ONCE(he->node, node); | 221 | WRITE_ONCE(he->node, node); |
222 | qstat_hop(hopcnt); | 222 | lockevent_pv_hop(hopcnt); |
223 | return &he->lock; | 223 | return &he->lock; |
224 | } | 224 | } |
225 | } | 225 | } |
@@ -320,8 +320,8 @@ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev) | |||
320 | smp_store_mb(pn->state, vcpu_halted); | 320 | smp_store_mb(pn->state, vcpu_halted); |
321 | 321 | ||
322 | if (!READ_ONCE(node->locked)) { | 322 | if (!READ_ONCE(node->locked)) { |
323 | qstat_inc(qstat_pv_wait_node, true); | 323 | lockevent_inc(pv_wait_node); |
324 | qstat_inc(qstat_pv_wait_early, wait_early); | 324 | lockevent_cond_inc(pv_wait_early, wait_early); |
325 | pv_wait(&pn->state, vcpu_halted); | 325 | pv_wait(&pn->state, vcpu_halted); |
326 | } | 326 | } |
327 | 327 | ||
@@ -339,7 +339,8 @@ static void pv_wait_node(struct mcs_spinlock *node, struct mcs_spinlock *prev) | |||
339 | * So it is better to spin for a while in the hope that the | 339 | * So it is better to spin for a while in the hope that the |
340 | * MCS lock will be released soon. | 340 | * MCS lock will be released soon. |
341 | */ | 341 | */ |
342 | qstat_inc(qstat_pv_spurious_wakeup, !READ_ONCE(node->locked)); | 342 | lockevent_cond_inc(pv_spurious_wakeup, |
343 | !READ_ONCE(node->locked)); | ||
343 | } | 344 | } |
344 | 345 | ||
345 | /* | 346 | /* |
@@ -416,7 +417,7 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node) | |||
416 | /* | 417 | /* |
417 | * Tracking # of slowpath locking operations | 418 | * Tracking # of slowpath locking operations |
418 | */ | 419 | */ |
419 | qstat_inc(qstat_lock_slowpath, true); | 420 | lockevent_inc(lock_slowpath); |
420 | 421 | ||
421 | for (;; waitcnt++) { | 422 | for (;; waitcnt++) { |
422 | /* | 423 | /* |
@@ -464,8 +465,8 @@ pv_wait_head_or_lock(struct qspinlock *lock, struct mcs_spinlock *node) | |||
464 | } | 465 | } |
465 | } | 466 | } |
466 | WRITE_ONCE(pn->state, vcpu_hashed); | 467 | WRITE_ONCE(pn->state, vcpu_hashed); |
467 | qstat_inc(qstat_pv_wait_head, true); | 468 | lockevent_inc(pv_wait_head); |
468 | qstat_inc(qstat_pv_wait_again, waitcnt); | 469 | lockevent_cond_inc(pv_wait_again, waitcnt); |
469 | pv_wait(&lock->locked, _Q_SLOW_VAL); | 470 | pv_wait(&lock->locked, _Q_SLOW_VAL); |
470 | 471 | ||
471 | /* | 472 | /* |
@@ -528,7 +529,7 @@ __pv_queued_spin_unlock_slowpath(struct qspinlock *lock, u8 locked) | |||
528 | * vCPU is harmless other than the additional latency in completing | 529 | * vCPU is harmless other than the additional latency in completing |
529 | * the unlock. | 530 | * the unlock. |
530 | */ | 531 | */ |
531 | qstat_inc(qstat_pv_kick_unlock, true); | 532 | lockevent_inc(pv_kick_unlock); |
532 | pv_kick(node->cpu); | 533 | pv_kick(node->cpu); |
533 | } | 534 | } |
534 | 535 | ||
diff --git a/kernel/locking/qspinlock_stat.h b/kernel/locking/qspinlock_stat.h index d73f85388d5c..54152670ff24 100644 --- a/kernel/locking/qspinlock_stat.h +++ b/kernel/locking/qspinlock_stat.h | |||
@@ -9,262 +9,105 @@ | |||
9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
10 | * GNU General Public License for more details. | 10 | * GNU General Public License for more details. |
11 | * | 11 | * |
12 | * Authors: Waiman Long <waiman.long@hpe.com> | 12 | * Authors: Waiman Long <longman@redhat.com> |
13 | */ | 13 | */ |
14 | 14 | ||
15 | /* | 15 | #include "lock_events.h" |
16 | * When queued spinlock statistical counters are enabled, the following | ||
17 | * debugfs files will be created for reporting the counter values: | ||
18 | * | ||
19 | * <debugfs>/qlockstat/ | ||
20 | * pv_hash_hops - average # of hops per hashing operation | ||
21 | * pv_kick_unlock - # of vCPU kicks issued at unlock time | ||
22 | * pv_kick_wake - # of vCPU kicks used for computing pv_latency_wake | ||
23 | * pv_latency_kick - average latency (ns) of vCPU kick operation | ||
24 | * pv_latency_wake - average latency (ns) from vCPU kick to wakeup | ||
25 | * pv_lock_stealing - # of lock stealing operations | ||
26 | * pv_spurious_wakeup - # of spurious wakeups in non-head vCPUs | ||
27 | * pv_wait_again - # of wait's after a queue head vCPU kick | ||
28 | * pv_wait_early - # of early vCPU wait's | ||
29 | * pv_wait_head - # of vCPU wait's at the queue head | ||
30 | * pv_wait_node - # of vCPU wait's at a non-head queue node | ||
31 | * lock_pending - # of locking operations via pending code | ||
32 | * lock_slowpath - # of locking operations via MCS lock queue | ||
33 | * lock_use_node2 - # of locking operations that use 2nd per-CPU node | ||
34 | * lock_use_node3 - # of locking operations that use 3rd per-CPU node | ||
35 | * lock_use_node4 - # of locking operations that use 4th per-CPU node | ||
36 | * lock_no_node - # of locking operations without using per-CPU node | ||
37 | * | ||
38 | * Subtracting lock_use_node[234] from lock_slowpath will give you | ||
39 | * lock_use_node1. | ||
40 | * | ||
41 | * Writing to the "reset_counters" file will reset all the above counter | ||
42 | * values. | ||
43 | * | ||
44 | * These statistical counters are implemented as per-cpu variables which are | ||
45 | * summed and computed whenever the corresponding debugfs files are read. This | ||
46 | * minimizes added overhead making the counters usable even in a production | ||
47 | * environment. | ||
48 | * | ||
49 | * There may be slight difference between pv_kick_wake and pv_kick_unlock. | ||
50 | */ | ||
51 | enum qlock_stats { | ||
52 | qstat_pv_hash_hops, | ||
53 | qstat_pv_kick_unlock, | ||
54 | qstat_pv_kick_wake, | ||
55 | qstat_pv_latency_kick, | ||
56 | qstat_pv_latency_wake, | ||
57 | qstat_pv_lock_stealing, | ||
58 | qstat_pv_spurious_wakeup, | ||
59 | qstat_pv_wait_again, | ||
60 | qstat_pv_wait_early, | ||
61 | qstat_pv_wait_head, | ||
62 | qstat_pv_wait_node, | ||
63 | qstat_lock_pending, | ||
64 | qstat_lock_slowpath, | ||
65 | qstat_lock_use_node2, | ||
66 | qstat_lock_use_node3, | ||
67 | qstat_lock_use_node4, | ||
68 | qstat_lock_no_node, | ||
69 | qstat_num, /* Total number of statistical counters */ | ||
70 | qstat_reset_cnts = qstat_num, | ||
71 | }; | ||
72 | 16 | ||
73 | #ifdef CONFIG_QUEUED_LOCK_STAT | 17 | #ifdef CONFIG_LOCK_EVENT_COUNTS |
18 | #ifdef CONFIG_PARAVIRT_SPINLOCKS | ||
74 | /* | 19 | /* |
75 | * Collect pvqspinlock statistics | 20 | * Collect pvqspinlock locking event counts |
76 | */ | 21 | */ |
77 | #include <linux/debugfs.h> | ||
78 | #include <linux/sched.h> | 22 | #include <linux/sched.h> |
79 | #include <linux/sched/clock.h> | 23 | #include <linux/sched/clock.h> |
80 | #include <linux/fs.h> | 24 | #include <linux/fs.h> |
81 | 25 | ||
82 | static const char * const qstat_names[qstat_num + 1] = { | 26 | #define EVENT_COUNT(ev) lockevents[LOCKEVENT_ ## ev] |
83 | [qstat_pv_hash_hops] = "pv_hash_hops", | ||
84 | [qstat_pv_kick_unlock] = "pv_kick_unlock", | ||
85 | [qstat_pv_kick_wake] = "pv_kick_wake", | ||
86 | [qstat_pv_spurious_wakeup] = "pv_spurious_wakeup", | ||
87 | [qstat_pv_latency_kick] = "pv_latency_kick", | ||
88 | [qstat_pv_latency_wake] = "pv_latency_wake", | ||
89 | [qstat_pv_lock_stealing] = "pv_lock_stealing", | ||
90 | [qstat_pv_wait_again] = "pv_wait_again", | ||
91 | [qstat_pv_wait_early] = "pv_wait_early", | ||
92 | [qstat_pv_wait_head] = "pv_wait_head", | ||
93 | [qstat_pv_wait_node] = "pv_wait_node", | ||
94 | [qstat_lock_pending] = "lock_pending", | ||
95 | [qstat_lock_slowpath] = "lock_slowpath", | ||
96 | [qstat_lock_use_node2] = "lock_use_node2", | ||
97 | [qstat_lock_use_node3] = "lock_use_node3", | ||
98 | [qstat_lock_use_node4] = "lock_use_node4", | ||
99 | [qstat_lock_no_node] = "lock_no_node", | ||
100 | [qstat_reset_cnts] = "reset_counters", | ||
101 | }; | ||
102 | 27 | ||
103 | /* | 28 | /* |
104 | * Per-cpu counters | 29 | * PV specific per-cpu counter |
105 | */ | 30 | */ |
106 | static DEFINE_PER_CPU(unsigned long, qstats[qstat_num]); | ||
107 | static DEFINE_PER_CPU(u64, pv_kick_time); | 31 | static DEFINE_PER_CPU(u64, pv_kick_time); |
108 | 32 | ||
109 | /* | 33 | /* |
110 | * Function to read and return the qlock statistical counter values | 34 | * Function to read and return the PV qspinlock counts. |
111 | * | 35 | * |
112 | * The following counters are handled specially: | 36 | * The following counters are handled specially: |
113 | * 1. qstat_pv_latency_kick | 37 | * 1. pv_latency_kick |
114 | * Average kick latency (ns) = pv_latency_kick/pv_kick_unlock | 38 | * Average kick latency (ns) = pv_latency_kick/pv_kick_unlock |
115 | * 2. qstat_pv_latency_wake | 39 | * 2. pv_latency_wake |
116 | * Average wake latency (ns) = pv_latency_wake/pv_kick_wake | 40 | * Average wake latency (ns) = pv_latency_wake/pv_kick_wake |
117 | * 3. qstat_pv_hash_hops | 41 | * 3. pv_hash_hops |
118 | * Average hops/hash = pv_hash_hops/pv_kick_unlock | 42 | * Average hops/hash = pv_hash_hops/pv_kick_unlock |
119 | */ | 43 | */ |
120 | static ssize_t qstat_read(struct file *file, char __user *user_buf, | 44 | ssize_t lockevent_read(struct file *file, char __user *user_buf, |
121 | size_t count, loff_t *ppos) | 45 | size_t count, loff_t *ppos) |
122 | { | 46 | { |
123 | char buf[64]; | 47 | char buf[64]; |
124 | int cpu, counter, len; | 48 | int cpu, id, len; |
125 | u64 stat = 0, kicks = 0; | 49 | u64 sum = 0, kicks = 0; |
126 | 50 | ||
127 | /* | 51 | /* |
128 | * Get the counter ID stored in file->f_inode->i_private | 52 | * Get the counter ID stored in file->f_inode->i_private |
129 | */ | 53 | */ |
130 | counter = (long)file_inode(file)->i_private; | 54 | id = (long)file_inode(file)->i_private; |
131 | 55 | ||
132 | if (counter >= qstat_num) | 56 | if (id >= lockevent_num) |
133 | return -EBADF; | 57 | return -EBADF; |
134 | 58 | ||
135 | for_each_possible_cpu(cpu) { | 59 | for_each_possible_cpu(cpu) { |
136 | stat += per_cpu(qstats[counter], cpu); | 60 | sum += per_cpu(lockevents[id], cpu); |
137 | /* | 61 | /* |
138 | * Need to sum additional counter for some of them | 62 | * Need to sum additional counters for some of them |
139 | */ | 63 | */ |
140 | switch (counter) { | 64 | switch (id) { |
141 | 65 | ||
142 | case qstat_pv_latency_kick: | 66 | case LOCKEVENT_pv_latency_kick: |
143 | case qstat_pv_hash_hops: | 67 | case LOCKEVENT_pv_hash_hops: |
144 | kicks += per_cpu(qstats[qstat_pv_kick_unlock], cpu); | 68 | kicks += per_cpu(EVENT_COUNT(pv_kick_unlock), cpu); |
145 | break; | 69 | break; |
146 | 70 | ||
147 | case qstat_pv_latency_wake: | 71 | case LOCKEVENT_pv_latency_wake: |
148 | kicks += per_cpu(qstats[qstat_pv_kick_wake], cpu); | 72 | kicks += per_cpu(EVENT_COUNT(pv_kick_wake), cpu); |
149 | break; | 73 | break; |
150 | } | 74 | } |
151 | } | 75 | } |
152 | 76 | ||
153 | if (counter == qstat_pv_hash_hops) { | 77 | if (id == LOCKEVENT_pv_hash_hops) { |
154 | u64 frac = 0; | 78 | u64 frac = 0; |
155 | 79 | ||
156 | if (kicks) { | 80 | if (kicks) { |
157 | frac = 100ULL * do_div(stat, kicks); | 81 | frac = 100ULL * do_div(sum, kicks); |
158 | frac = DIV_ROUND_CLOSEST_ULL(frac, kicks); | 82 | frac = DIV_ROUND_CLOSEST_ULL(frac, kicks); |
159 | } | 83 | } |
160 | 84 | ||
161 | /* | 85 | /* |
162 | * Return a X.XX decimal number | 86 | * Return a X.XX decimal number |
163 | */ | 87 | */ |
164 | len = snprintf(buf, sizeof(buf) - 1, "%llu.%02llu\n", stat, frac); | 88 | len = snprintf(buf, sizeof(buf) - 1, "%llu.%02llu\n", |
89 | sum, frac); | ||
165 | } else { | 90 | } else { |
166 | /* | 91 | /* |
167 | * Round to the nearest ns | 92 | * Round to the nearest ns |
168 | */ | 93 | */ |
169 | if ((counter == qstat_pv_latency_kick) || | 94 | if ((id == LOCKEVENT_pv_latency_kick) || |
170 | (counter == qstat_pv_latency_wake)) { | 95 | (id == LOCKEVENT_pv_latency_wake)) { |
171 | if (kicks) | 96 | if (kicks) |
172 | stat = DIV_ROUND_CLOSEST_ULL(stat, kicks); | 97 | sum = DIV_ROUND_CLOSEST_ULL(sum, kicks); |
173 | } | 98 | } |
174 | len = snprintf(buf, sizeof(buf) - 1, "%llu\n", stat); | 99 | len = snprintf(buf, sizeof(buf) - 1, "%llu\n", sum); |
175 | } | 100 | } |
176 | 101 | ||
177 | return simple_read_from_buffer(user_buf, count, ppos, buf, len); | 102 | return simple_read_from_buffer(user_buf, count, ppos, buf, len); |
178 | } | 103 | } |
179 | 104 | ||
180 | /* | 105 | /* |
181 | * Function to handle write request | ||
182 | * | ||
183 | * When counter = reset_cnts, reset all the counter values. | ||
184 | * Since the counter updates aren't atomic, the resetting is done twice | ||
185 | * to make sure that the counters are very likely to be all cleared. | ||
186 | */ | ||
187 | static ssize_t qstat_write(struct file *file, const char __user *user_buf, | ||
188 | size_t count, loff_t *ppos) | ||
189 | { | ||
190 | int cpu; | ||
191 | |||
192 | /* | ||
193 | * Get the counter ID stored in file->f_inode->i_private | ||
194 | */ | ||
195 | if ((long)file_inode(file)->i_private != qstat_reset_cnts) | ||
196 | return count; | ||
197 | |||
198 | for_each_possible_cpu(cpu) { | ||
199 | int i; | ||
200 | unsigned long *ptr = per_cpu_ptr(qstats, cpu); | ||
201 | |||
202 | for (i = 0 ; i < qstat_num; i++) | ||
203 | WRITE_ONCE(ptr[i], 0); | ||
204 | } | ||
205 | return count; | ||
206 | } | ||
207 | |||
208 | /* | ||
209 | * Debugfs data structures | ||
210 | */ | ||
211 | static const struct file_operations fops_qstat = { | ||
212 | .read = qstat_read, | ||
213 | .write = qstat_write, | ||
214 | .llseek = default_llseek, | ||
215 | }; | ||
216 | |||
217 | /* | ||
218 | * Initialize debugfs for the qspinlock statistical counters | ||
219 | */ | ||
220 | static int __init init_qspinlock_stat(void) | ||
221 | { | ||
222 | struct dentry *d_qstat = debugfs_create_dir("qlockstat", NULL); | ||
223 | int i; | ||
224 | |||
225 | if (!d_qstat) | ||
226 | goto out; | ||
227 | |||
228 | /* | ||
229 | * Create the debugfs files | ||
230 | * | ||
231 | * As reading from and writing to the stat files can be slow, only | ||
232 | * root is allowed to do the read/write to limit impact to system | ||
233 | * performance. | ||
234 | */ | ||
235 | for (i = 0; i < qstat_num; i++) | ||
236 | if (!debugfs_create_file(qstat_names[i], 0400, d_qstat, | ||
237 | (void *)(long)i, &fops_qstat)) | ||
238 | goto fail_undo; | ||
239 | |||
240 | if (!debugfs_create_file(qstat_names[qstat_reset_cnts], 0200, d_qstat, | ||
241 | (void *)(long)qstat_reset_cnts, &fops_qstat)) | ||
242 | goto fail_undo; | ||
243 | |||
244 | return 0; | ||
245 | fail_undo: | ||
246 | debugfs_remove_recursive(d_qstat); | ||
247 | out: | ||
248 | pr_warn("Could not create 'qlockstat' debugfs entries\n"); | ||
249 | return -ENOMEM; | ||
250 | } | ||
251 | fs_initcall(init_qspinlock_stat); | ||
252 | |||
253 | /* | ||
254 | * Increment the PV qspinlock statistical counters | ||
255 | */ | ||
256 | static inline void qstat_inc(enum qlock_stats stat, bool cond) | ||
257 | { | ||
258 | if (cond) | ||
259 | this_cpu_inc(qstats[stat]); | ||
260 | } | ||
261 | |||
262 | /* | ||
263 | * PV hash hop count | 106 | * PV hash hop count |
264 | */ | 107 | */ |
265 | static inline void qstat_hop(int hopcnt) | 108 | static inline void lockevent_pv_hop(int hopcnt) |
266 | { | 109 | { |
267 | this_cpu_add(qstats[qstat_pv_hash_hops], hopcnt); | 110 | this_cpu_add(EVENT_COUNT(pv_hash_hops), hopcnt); |
268 | } | 111 | } |
269 | 112 | ||
270 | /* | 113 | /* |
@@ -276,7 +119,7 @@ static inline void __pv_kick(int cpu) | |||
276 | 119 | ||
277 | per_cpu(pv_kick_time, cpu) = start; | 120 | per_cpu(pv_kick_time, cpu) = start; |
278 | pv_kick(cpu); | 121 | pv_kick(cpu); |
279 | this_cpu_add(qstats[qstat_pv_latency_kick], sched_clock() - start); | 122 | this_cpu_add(EVENT_COUNT(pv_latency_kick), sched_clock() - start); |
280 | } | 123 | } |
281 | 124 | ||
282 | /* | 125 | /* |
@@ -289,18 +132,19 @@ static inline void __pv_wait(u8 *ptr, u8 val) | |||
289 | *pkick_time = 0; | 132 | *pkick_time = 0; |
290 | pv_wait(ptr, val); | 133 | pv_wait(ptr, val); |
291 | if (*pkick_time) { | 134 | if (*pkick_time) { |
292 | this_cpu_add(qstats[qstat_pv_latency_wake], | 135 | this_cpu_add(EVENT_COUNT(pv_latency_wake), |
293 | sched_clock() - *pkick_time); | 136 | sched_clock() - *pkick_time); |
294 | qstat_inc(qstat_pv_kick_wake, true); | 137 | lockevent_inc(pv_kick_wake); |
295 | } | 138 | } |
296 | } | 139 | } |
297 | 140 | ||
298 | #define pv_kick(c) __pv_kick(c) | 141 | #define pv_kick(c) __pv_kick(c) |
299 | #define pv_wait(p, v) __pv_wait(p, v) | 142 | #define pv_wait(p, v) __pv_wait(p, v) |
300 | 143 | ||
301 | #else /* CONFIG_QUEUED_LOCK_STAT */ | 144 | #endif /* CONFIG_PARAVIRT_SPINLOCKS */ |
145 | |||
146 | #else /* CONFIG_LOCK_EVENT_COUNTS */ | ||
302 | 147 | ||
303 | static inline void qstat_inc(enum qlock_stats stat, bool cond) { } | 148 | static inline void lockevent_pv_hop(int hopcnt) { } |
304 | static inline void qstat_hop(int hopcnt) { } | ||
305 | 149 | ||
306 | #endif /* CONFIG_QUEUED_LOCK_STAT */ | 150 | #endif /* CONFIG_LOCK_EVENT_COUNTS */ |
diff --git a/kernel/locking/rwsem-spinlock.c b/kernel/locking/rwsem-spinlock.c deleted file mode 100644 index a7ffb2a96ede..000000000000 --- a/kernel/locking/rwsem-spinlock.c +++ /dev/null | |||
@@ -1,339 +0,0 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | ||
2 | /* rwsem-spinlock.c: R/W semaphores: contention handling functions for | ||
3 | * generic spinlock implementation | ||
4 | * | ||
5 | * Copyright (c) 2001 David Howells (dhowells@redhat.com). | ||
6 | * - Derived partially from idea by Andrea Arcangeli <andrea@suse.de> | ||
7 | * - Derived also from comments by Linus | ||
8 | */ | ||
9 | #include <linux/rwsem.h> | ||
10 | #include <linux/sched/signal.h> | ||
11 | #include <linux/sched/debug.h> | ||
12 | #include <linux/export.h> | ||
13 | |||
14 | enum rwsem_waiter_type { | ||
15 | RWSEM_WAITING_FOR_WRITE, | ||
16 | RWSEM_WAITING_FOR_READ | ||
17 | }; | ||
18 | |||
19 | struct rwsem_waiter { | ||
20 | struct list_head list; | ||
21 | struct task_struct *task; | ||
22 | enum rwsem_waiter_type type; | ||
23 | }; | ||
24 | |||
25 | int rwsem_is_locked(struct rw_semaphore *sem) | ||
26 | { | ||
27 | int ret = 1; | ||
28 | unsigned long flags; | ||
29 | |||
30 | if (raw_spin_trylock_irqsave(&sem->wait_lock, flags)) { | ||
31 | ret = (sem->count != 0); | ||
32 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); | ||
33 | } | ||
34 | return ret; | ||
35 | } | ||
36 | EXPORT_SYMBOL(rwsem_is_locked); | ||
37 | |||
38 | /* | ||
39 | * initialise the semaphore | ||
40 | */ | ||
41 | void __init_rwsem(struct rw_semaphore *sem, const char *name, | ||
42 | struct lock_class_key *key) | ||
43 | { | ||
44 | #ifdef CONFIG_DEBUG_LOCK_ALLOC | ||
45 | /* | ||
46 | * Make sure we are not reinitializing a held semaphore: | ||
47 | */ | ||
48 | debug_check_no_locks_freed((void *)sem, sizeof(*sem)); | ||
49 | lockdep_init_map(&sem->dep_map, name, key, 0); | ||
50 | #endif | ||
51 | sem->count = 0; | ||
52 | raw_spin_lock_init(&sem->wait_lock); | ||
53 | INIT_LIST_HEAD(&sem->wait_list); | ||
54 | } | ||
55 | EXPORT_SYMBOL(__init_rwsem); | ||
56 | |||
57 | /* | ||
58 | * handle the lock release when processes blocked on it that can now run | ||
59 | * - if we come here, then: | ||
60 | * - the 'active count' _reached_ zero | ||
61 | * - the 'waiting count' is non-zero | ||
62 | * - the spinlock must be held by the caller | ||
63 | * - woken process blocks are discarded from the list after having task zeroed | ||
64 | * - writers are only woken if wakewrite is non-zero | ||
65 | */ | ||
66 | static inline struct rw_semaphore * | ||
67 | __rwsem_do_wake(struct rw_semaphore *sem, int wakewrite) | ||
68 | { | ||
69 | struct rwsem_waiter *waiter; | ||
70 | struct task_struct *tsk; | ||
71 | int woken; | ||
72 | |||
73 | waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); | ||
74 | |||
75 | if (waiter->type == RWSEM_WAITING_FOR_WRITE) { | ||
76 | if (wakewrite) | ||
77 | /* Wake up a writer. Note that we do not grant it the | ||
78 | * lock - it will have to acquire it when it runs. */ | ||
79 | wake_up_process(waiter->task); | ||
80 | goto out; | ||
81 | } | ||
82 | |||
83 | /* grant an infinite number of read locks to the front of the queue */ | ||
84 | woken = 0; | ||
85 | do { | ||
86 | struct list_head *next = waiter->list.next; | ||
87 | |||
88 | list_del(&waiter->list); | ||
89 | tsk = waiter->task; | ||
90 | /* | ||
91 | * Make sure we do not wakeup the next reader before | ||
92 | * setting the nil condition to grant the next reader; | ||
93 | * otherwise we could miss the wakeup on the other | ||
94 | * side and end up sleeping again. See the pairing | ||
95 | * in rwsem_down_read_failed(). | ||
96 | */ | ||
97 | smp_mb(); | ||
98 | waiter->task = NULL; | ||
99 | wake_up_process(tsk); | ||
100 | put_task_struct(tsk); | ||
101 | woken++; | ||
102 | if (next == &sem->wait_list) | ||
103 | break; | ||
104 | waiter = list_entry(next, struct rwsem_waiter, list); | ||
105 | } while (waiter->type != RWSEM_WAITING_FOR_WRITE); | ||
106 | |||
107 | sem->count += woken; | ||
108 | |||
109 | out: | ||
110 | return sem; | ||
111 | } | ||
112 | |||
113 | /* | ||
114 | * wake a single writer | ||
115 | */ | ||
116 | static inline struct rw_semaphore * | ||
117 | __rwsem_wake_one_writer(struct rw_semaphore *sem) | ||
118 | { | ||
119 | struct rwsem_waiter *waiter; | ||
120 | |||
121 | waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list); | ||
122 | wake_up_process(waiter->task); | ||
123 | |||
124 | return sem; | ||
125 | } | ||
126 | |||
127 | /* | ||
128 | * get a read lock on the semaphore | ||
129 | */ | ||
130 | int __sched __down_read_common(struct rw_semaphore *sem, int state) | ||
131 | { | ||
132 | struct rwsem_waiter waiter; | ||
133 | unsigned long flags; | ||
134 | |||
135 | raw_spin_lock_irqsave(&sem->wait_lock, flags); | ||
136 | |||
137 | if (sem->count >= 0 && list_empty(&sem->wait_list)) { | ||
138 | /* granted */ | ||
139 | sem->count++; | ||
140 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); | ||
141 | goto out; | ||
142 | } | ||
143 | |||
144 | /* set up my own style of waitqueue */ | ||
145 | waiter.task = current; | ||
146 | waiter.type = RWSEM_WAITING_FOR_READ; | ||
147 | get_task_struct(current); | ||
148 | |||
149 | list_add_tail(&waiter.list, &sem->wait_list); | ||
150 | |||
151 | /* wait to be given the lock */ | ||
152 | for (;;) { | ||
153 | if (!waiter.task) | ||
154 | break; | ||
155 | if (signal_pending_state(state, current)) | ||
156 | goto out_nolock; | ||
157 | set_current_state(state); | ||
158 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); | ||
159 | schedule(); | ||
160 | raw_spin_lock_irqsave(&sem->wait_lock, flags); | ||
161 | } | ||
162 | |||
163 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); | ||
164 | out: | ||
165 | return 0; | ||
166 | |||
167 | out_nolock: | ||
168 | /* | ||
169 | * We didn't take the lock, so that there is a writer, which | ||
170 | * is owner or the first waiter of the sem. If it's a waiter, | ||
171 | * it will be woken by current owner. Not need to wake anybody. | ||
172 | */ | ||
173 | list_del(&waiter.list); | ||
174 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); | ||
175 | return -EINTR; | ||
176 | } | ||
177 | |||
178 | void __sched __down_read(struct rw_semaphore *sem) | ||
179 | { | ||
180 | __down_read_common(sem, TASK_UNINTERRUPTIBLE); | ||
181 | } | ||
182 | |||
183 | int __sched __down_read_killable(struct rw_semaphore *sem) | ||
184 | { | ||
185 | return __down_read_common(sem, TASK_KILLABLE); | ||
186 | } | ||
187 | |||
188 | /* | ||
189 | * trylock for reading -- returns 1 if successful, 0 if contention | ||
190 | */ | ||
191 | int __down_read_trylock(struct rw_semaphore *sem) | ||
192 | { | ||
193 | unsigned long flags; | ||
194 | int ret = 0; | ||
195 | |||
196 | |||
197 | raw_spin_lock_irqsave(&sem->wait_lock, flags); | ||
198 | |||
199 | if (sem->count >= 0 && list_empty(&sem->wait_list)) { | ||
200 | /* granted */ | ||
201 | sem->count++; | ||
202 | ret = 1; | ||
203 | } | ||
204 | |||
205 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); | ||
206 | |||
207 | return ret; | ||
208 | } | ||
209 | |||
210 | /* | ||
211 | * get a write lock on the semaphore | ||
212 | */ | ||
213 | int __sched __down_write_common(struct rw_semaphore *sem, int state) | ||
214 | { | ||
215 | struct rwsem_waiter waiter; | ||
216 | unsigned long flags; | ||
217 | int ret = 0; | ||
218 | |||
219 | raw_spin_lock_irqsave(&sem->wait_lock, flags); | ||
220 | |||
221 | /* set up my own style of waitqueue */ | ||
222 | waiter.task = current; | ||
223 | waiter.type = RWSEM_WAITING_FOR_WRITE; | ||
224 | list_add_tail(&waiter.list, &sem->wait_list); | ||
225 | |||
226 | /* wait for someone to release the lock */ | ||
227 | for (;;) { | ||
228 | /* | ||
229 | * That is the key to support write lock stealing: allows the | ||
230 | * task already on CPU to get the lock soon rather than put | ||
231 | * itself into sleep and waiting for system woke it or someone | ||
232 | * else in the head of the wait list up. | ||
233 | */ | ||
234 | if (sem->count == 0) | ||
235 | break; | ||
236 | if (signal_pending_state(state, current)) | ||
237 | goto out_nolock; | ||
238 | |||
239 | set_current_state(state); | ||
240 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); | ||
241 | schedule(); | ||
242 | raw_spin_lock_irqsave(&sem->wait_lock, flags); | ||
243 | } | ||
244 | /* got the lock */ | ||
245 | sem->count = -1; | ||
246 | list_del(&waiter.list); | ||
247 | |||
248 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); | ||
249 | |||
250 | return ret; | ||
251 | |||
252 | out_nolock: | ||
253 | list_del(&waiter.list); | ||
254 | if (!list_empty(&sem->wait_list) && sem->count >= 0) | ||
255 | __rwsem_do_wake(sem, 0); | ||
256 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); | ||
257 | |||
258 | return -EINTR; | ||
259 | } | ||
260 | |||
261 | void __sched __down_write(struct rw_semaphore *sem) | ||
262 | { | ||
263 | __down_write_common(sem, TASK_UNINTERRUPTIBLE); | ||
264 | } | ||
265 | |||
266 | int __sched __down_write_killable(struct rw_semaphore *sem) | ||
267 | { | ||
268 | return __down_write_common(sem, TASK_KILLABLE); | ||
269 | } | ||
270 | |||
271 | /* | ||
272 | * trylock for writing -- returns 1 if successful, 0 if contention | ||
273 | */ | ||
274 | int __down_write_trylock(struct rw_semaphore *sem) | ||
275 | { | ||
276 | unsigned long flags; | ||
277 | int ret = 0; | ||
278 | |||
279 | raw_spin_lock_irqsave(&sem->wait_lock, flags); | ||
280 | |||
281 | if (sem->count == 0) { | ||
282 | /* got the lock */ | ||
283 | sem->count = -1; | ||
284 | ret = 1; | ||
285 | } | ||
286 | |||
287 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); | ||
288 | |||
289 | return ret; | ||
290 | } | ||
291 | |||
292 | /* | ||
293 | * release a read lock on the semaphore | ||
294 | */ | ||
295 | void __up_read(struct rw_semaphore *sem) | ||
296 | { | ||
297 | unsigned long flags; | ||
298 | |||
299 | raw_spin_lock_irqsave(&sem->wait_lock, flags); | ||
300 | |||
301 | if (--sem->count == 0 && !list_empty(&sem->wait_list)) | ||
302 | sem = __rwsem_wake_one_writer(sem); | ||
303 | |||
304 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); | ||
305 | } | ||
306 | |||
307 | /* | ||
308 | * release a write lock on the semaphore | ||
309 | */ | ||
310 | void __up_write(struct rw_semaphore *sem) | ||
311 | { | ||
312 | unsigned long flags; | ||
313 | |||
314 | raw_spin_lock_irqsave(&sem->wait_lock, flags); | ||
315 | |||
316 | sem->count = 0; | ||
317 | if (!list_empty(&sem->wait_list)) | ||
318 | sem = __rwsem_do_wake(sem, 1); | ||
319 | |||
320 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); | ||
321 | } | ||
322 | |||
323 | /* | ||
324 | * downgrade a write lock into a read lock | ||
325 | * - just wake up any readers at the front of the queue | ||
326 | */ | ||
327 | void __downgrade_write(struct rw_semaphore *sem) | ||
328 | { | ||
329 | unsigned long flags; | ||
330 | |||
331 | raw_spin_lock_irqsave(&sem->wait_lock, flags); | ||
332 | |||
333 | sem->count = 1; | ||
334 | if (!list_empty(&sem->wait_list)) | ||
335 | sem = __rwsem_do_wake(sem, 0); | ||
336 | |||
337 | raw_spin_unlock_irqrestore(&sem->wait_lock, flags); | ||
338 | } | ||
339 | |||
diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index fbe96341beee..6b3ee9948bf1 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c | |||
@@ -147,6 +147,7 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem, | |||
147 | * will notice the queued writer. | 147 | * will notice the queued writer. |
148 | */ | 148 | */ |
149 | wake_q_add(wake_q, waiter->task); | 149 | wake_q_add(wake_q, waiter->task); |
150 | lockevent_inc(rwsem_wake_writer); | ||
150 | } | 151 | } |
151 | 152 | ||
152 | return; | 153 | return; |
@@ -176,9 +177,8 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem, | |||
176 | goto try_reader_grant; | 177 | goto try_reader_grant; |
177 | } | 178 | } |
178 | /* | 179 | /* |
179 | * It is not really necessary to set it to reader-owned here, | 180 | * Set it to reader-owned to give spinners an early |
180 | * but it gives the spinners an early indication that the | 181 | * indication that readers now have the lock. |
181 | * readers now have the lock. | ||
182 | */ | 182 | */ |
183 | __rwsem_set_reader_owned(sem, waiter->task); | 183 | __rwsem_set_reader_owned(sem, waiter->task); |
184 | } | 184 | } |
@@ -215,6 +215,7 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem, | |||
215 | } | 215 | } |
216 | 216 | ||
217 | adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment; | 217 | adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment; |
218 | lockevent_cond_inc(rwsem_wake_reader, woken); | ||
218 | if (list_empty(&sem->wait_list)) { | 219 | if (list_empty(&sem->wait_list)) { |
219 | /* hit end of list above */ | 220 | /* hit end of list above */ |
220 | adjustment -= RWSEM_WAITING_BIAS; | 221 | adjustment -= RWSEM_WAITING_BIAS; |
@@ -225,92 +226,6 @@ static void __rwsem_mark_wake(struct rw_semaphore *sem, | |||
225 | } | 226 | } |
226 | 227 | ||
227 | /* | 228 | /* |
228 | * Wait for the read lock to be granted | ||
229 | */ | ||
230 | static inline struct rw_semaphore __sched * | ||
231 | __rwsem_down_read_failed_common(struct rw_semaphore *sem, int state) | ||
232 | { | ||
233 | long count, adjustment = -RWSEM_ACTIVE_READ_BIAS; | ||
234 | struct rwsem_waiter waiter; | ||
235 | DEFINE_WAKE_Q(wake_q); | ||
236 | |||
237 | waiter.task = current; | ||
238 | waiter.type = RWSEM_WAITING_FOR_READ; | ||
239 | |||
240 | raw_spin_lock_irq(&sem->wait_lock); | ||
241 | if (list_empty(&sem->wait_list)) { | ||
242 | /* | ||
243 | * In case the wait queue is empty and the lock isn't owned | ||
244 | * by a writer, this reader can exit the slowpath and return | ||
245 | * immediately as its RWSEM_ACTIVE_READ_BIAS has already | ||
246 | * been set in the count. | ||
247 | */ | ||
248 | if (atomic_long_read(&sem->count) >= 0) { | ||
249 | raw_spin_unlock_irq(&sem->wait_lock); | ||
250 | return sem; | ||
251 | } | ||
252 | adjustment += RWSEM_WAITING_BIAS; | ||
253 | } | ||
254 | list_add_tail(&waiter.list, &sem->wait_list); | ||
255 | |||
256 | /* we're now waiting on the lock, but no longer actively locking */ | ||
257 | count = atomic_long_add_return(adjustment, &sem->count); | ||
258 | |||
259 | /* | ||
260 | * If there are no active locks, wake the front queued process(es). | ||
261 | * | ||
262 | * If there are no writers and we are first in the queue, | ||
263 | * wake our own waiter to join the existing active readers ! | ||
264 | */ | ||
265 | if (count == RWSEM_WAITING_BIAS || | ||
266 | (count > RWSEM_WAITING_BIAS && | ||
267 | adjustment != -RWSEM_ACTIVE_READ_BIAS)) | ||
268 | __rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q); | ||
269 | |||
270 | raw_spin_unlock_irq(&sem->wait_lock); | ||
271 | wake_up_q(&wake_q); | ||
272 | |||
273 | /* wait to be given the lock */ | ||
274 | while (true) { | ||
275 | set_current_state(state); | ||
276 | if (!waiter.task) | ||
277 | break; | ||
278 | if (signal_pending_state(state, current)) { | ||
279 | raw_spin_lock_irq(&sem->wait_lock); | ||
280 | if (waiter.task) | ||
281 | goto out_nolock; | ||
282 | raw_spin_unlock_irq(&sem->wait_lock); | ||
283 | break; | ||
284 | } | ||
285 | schedule(); | ||
286 | } | ||
287 | |||
288 | __set_current_state(TASK_RUNNING); | ||
289 | return sem; | ||
290 | out_nolock: | ||
291 | list_del(&waiter.list); | ||
292 | if (list_empty(&sem->wait_list)) | ||
293 | atomic_long_add(-RWSEM_WAITING_BIAS, &sem->count); | ||
294 | raw_spin_unlock_irq(&sem->wait_lock); | ||
295 | __set_current_state(TASK_RUNNING); | ||
296 | return ERR_PTR(-EINTR); | ||
297 | } | ||
298 | |||
299 | __visible struct rw_semaphore * __sched | ||
300 | rwsem_down_read_failed(struct rw_semaphore *sem) | ||
301 | { | ||
302 | return __rwsem_down_read_failed_common(sem, TASK_UNINTERRUPTIBLE); | ||
303 | } | ||
304 | EXPORT_SYMBOL(rwsem_down_read_failed); | ||
305 | |||
306 | __visible struct rw_semaphore * __sched | ||
307 | rwsem_down_read_failed_killable(struct rw_semaphore *sem) | ||
308 | { | ||
309 | return __rwsem_down_read_failed_common(sem, TASK_KILLABLE); | ||
310 | } | ||
311 | EXPORT_SYMBOL(rwsem_down_read_failed_killable); | ||
312 | |||
313 | /* | ||
314 | * This function must be called with the sem->wait_lock held to prevent | 229 | * This function must be called with the sem->wait_lock held to prevent |
315 | * race conditions between checking the rwsem wait list and setting the | 230 | * race conditions between checking the rwsem wait list and setting the |
316 | * sem->count accordingly. | 231 | * sem->count accordingly. |
@@ -346,21 +261,17 @@ static inline bool rwsem_try_write_lock(long count, struct rw_semaphore *sem) | |||
346 | */ | 261 | */ |
347 | static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem) | 262 | static inline bool rwsem_try_write_lock_unqueued(struct rw_semaphore *sem) |
348 | { | 263 | { |
349 | long old, count = atomic_long_read(&sem->count); | 264 | long count = atomic_long_read(&sem->count); |
350 | |||
351 | while (true) { | ||
352 | if (!(count == 0 || count == RWSEM_WAITING_BIAS)) | ||
353 | return false; | ||
354 | 265 | ||
355 | old = atomic_long_cmpxchg_acquire(&sem->count, count, | 266 | while (!count || count == RWSEM_WAITING_BIAS) { |
356 | count + RWSEM_ACTIVE_WRITE_BIAS); | 267 | if (atomic_long_try_cmpxchg_acquire(&sem->count, &count, |
357 | if (old == count) { | 268 | count + RWSEM_ACTIVE_WRITE_BIAS)) { |
358 | rwsem_set_owner(sem); | 269 | rwsem_set_owner(sem); |
270 | lockevent_inc(rwsem_opt_wlock); | ||
359 | return true; | 271 | return true; |
360 | } | 272 | } |
361 | |||
362 | count = old; | ||
363 | } | 273 | } |
274 | return false; | ||
364 | } | 275 | } |
365 | 276 | ||
366 | static inline bool owner_on_cpu(struct task_struct *owner) | 277 | static inline bool owner_on_cpu(struct task_struct *owner) |
@@ -481,6 +392,7 @@ static bool rwsem_optimistic_spin(struct rw_semaphore *sem) | |||
481 | osq_unlock(&sem->osq); | 392 | osq_unlock(&sem->osq); |
482 | done: | 393 | done: |
483 | preempt_enable(); | 394 | preempt_enable(); |
395 | lockevent_cond_inc(rwsem_opt_fail, !taken); | ||
484 | return taken; | 396 | return taken; |
485 | } | 397 | } |
486 | 398 | ||
@@ -505,6 +417,97 @@ static inline bool rwsem_has_spinner(struct rw_semaphore *sem) | |||
505 | #endif | 417 | #endif |
506 | 418 | ||
507 | /* | 419 | /* |
420 | * Wait for the read lock to be granted | ||
421 | */ | ||
422 | static inline struct rw_semaphore __sched * | ||
423 | __rwsem_down_read_failed_common(struct rw_semaphore *sem, int state) | ||
424 | { | ||
425 | long count, adjustment = -RWSEM_ACTIVE_READ_BIAS; | ||
426 | struct rwsem_waiter waiter; | ||
427 | DEFINE_WAKE_Q(wake_q); | ||
428 | |||
429 | waiter.task = current; | ||
430 | waiter.type = RWSEM_WAITING_FOR_READ; | ||
431 | |||
432 | raw_spin_lock_irq(&sem->wait_lock); | ||
433 | if (list_empty(&sem->wait_list)) { | ||
434 | /* | ||
435 | * In case the wait queue is empty and the lock isn't owned | ||
436 | * by a writer, this reader can exit the slowpath and return | ||
437 | * immediately as its RWSEM_ACTIVE_READ_BIAS has already | ||
438 | * been set in the count. | ||
439 | */ | ||
440 | if (atomic_long_read(&sem->count) >= 0) { | ||
441 | raw_spin_unlock_irq(&sem->wait_lock); | ||
442 | rwsem_set_reader_owned(sem); | ||
443 | lockevent_inc(rwsem_rlock_fast); | ||
444 | return sem; | ||
445 | } | ||
446 | adjustment += RWSEM_WAITING_BIAS; | ||
447 | } | ||
448 | list_add_tail(&waiter.list, &sem->wait_list); | ||
449 | |||
450 | /* we're now waiting on the lock, but no longer actively locking */ | ||
451 | count = atomic_long_add_return(adjustment, &sem->count); | ||
452 | |||
453 | /* | ||
454 | * If there are no active locks, wake the front queued process(es). | ||
455 | * | ||
456 | * If there are no writers and we are first in the queue, | ||
457 | * wake our own waiter to join the existing active readers ! | ||
458 | */ | ||
459 | if (count == RWSEM_WAITING_BIAS || | ||
460 | (count > RWSEM_WAITING_BIAS && | ||
461 | adjustment != -RWSEM_ACTIVE_READ_BIAS)) | ||
462 | __rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q); | ||
463 | |||
464 | raw_spin_unlock_irq(&sem->wait_lock); | ||
465 | wake_up_q(&wake_q); | ||
466 | |||
467 | /* wait to be given the lock */ | ||
468 | while (true) { | ||
469 | set_current_state(state); | ||
470 | if (!waiter.task) | ||
471 | break; | ||
472 | if (signal_pending_state(state, current)) { | ||
473 | raw_spin_lock_irq(&sem->wait_lock); | ||
474 | if (waiter.task) | ||
475 | goto out_nolock; | ||
476 | raw_spin_unlock_irq(&sem->wait_lock); | ||
477 | break; | ||
478 | } | ||
479 | schedule(); | ||
480 | lockevent_inc(rwsem_sleep_reader); | ||
481 | } | ||
482 | |||
483 | __set_current_state(TASK_RUNNING); | ||
484 | lockevent_inc(rwsem_rlock); | ||
485 | return sem; | ||
486 | out_nolock: | ||
487 | list_del(&waiter.list); | ||
488 | if (list_empty(&sem->wait_list)) | ||
489 | atomic_long_add(-RWSEM_WAITING_BIAS, &sem->count); | ||
490 | raw_spin_unlock_irq(&sem->wait_lock); | ||
491 | __set_current_state(TASK_RUNNING); | ||
492 | lockevent_inc(rwsem_rlock_fail); | ||
493 | return ERR_PTR(-EINTR); | ||
494 | } | ||
495 | |||
496 | __visible struct rw_semaphore * __sched | ||
497 | rwsem_down_read_failed(struct rw_semaphore *sem) | ||
498 | { | ||
499 | return __rwsem_down_read_failed_common(sem, TASK_UNINTERRUPTIBLE); | ||
500 | } | ||
501 | EXPORT_SYMBOL(rwsem_down_read_failed); | ||
502 | |||
503 | __visible struct rw_semaphore * __sched | ||
504 | rwsem_down_read_failed_killable(struct rw_semaphore *sem) | ||
505 | { | ||
506 | return __rwsem_down_read_failed_common(sem, TASK_KILLABLE); | ||
507 | } | ||
508 | EXPORT_SYMBOL(rwsem_down_read_failed_killable); | ||
509 | |||
510 | /* | ||
508 | * Wait until we successfully acquire the write lock | 511 | * Wait until we successfully acquire the write lock |
509 | */ | 512 | */ |
510 | static inline struct rw_semaphore * | 513 | static inline struct rw_semaphore * |
@@ -580,6 +583,7 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state) | |||
580 | goto out_nolock; | 583 | goto out_nolock; |
581 | 584 | ||
582 | schedule(); | 585 | schedule(); |
586 | lockevent_inc(rwsem_sleep_writer); | ||
583 | set_current_state(state); | 587 | set_current_state(state); |
584 | } while ((count = atomic_long_read(&sem->count)) & RWSEM_ACTIVE_MASK); | 588 | } while ((count = atomic_long_read(&sem->count)) & RWSEM_ACTIVE_MASK); |
585 | 589 | ||
@@ -588,6 +592,7 @@ __rwsem_down_write_failed_common(struct rw_semaphore *sem, int state) | |||
588 | __set_current_state(TASK_RUNNING); | 592 | __set_current_state(TASK_RUNNING); |
589 | list_del(&waiter.list); | 593 | list_del(&waiter.list); |
590 | raw_spin_unlock_irq(&sem->wait_lock); | 594 | raw_spin_unlock_irq(&sem->wait_lock); |
595 | lockevent_inc(rwsem_wlock); | ||
591 | 596 | ||
592 | return ret; | 597 | return ret; |
593 | 598 | ||
@@ -601,6 +606,7 @@ out_nolock: | |||
601 | __rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q); | 606 | __rwsem_mark_wake(sem, RWSEM_WAKE_ANY, &wake_q); |
602 | raw_spin_unlock_irq(&sem->wait_lock); | 607 | raw_spin_unlock_irq(&sem->wait_lock); |
603 | wake_up_q(&wake_q); | 608 | wake_up_q(&wake_q); |
609 | lockevent_inc(rwsem_wlock_fail); | ||
604 | 610 | ||
605 | return ERR_PTR(-EINTR); | 611 | return ERR_PTR(-EINTR); |
606 | } | 612 | } |
diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index e586f0d03ad3..ccbf18f560ff 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c | |||
@@ -24,7 +24,6 @@ void __sched down_read(struct rw_semaphore *sem) | |||
24 | rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_); | 24 | rwsem_acquire_read(&sem->dep_map, 0, 0, _RET_IP_); |
25 | 25 | ||
26 | LOCK_CONTENDED(sem, __down_read_trylock, __down_read); | 26 | LOCK_CONTENDED(sem, __down_read_trylock, __down_read); |
27 | rwsem_set_reader_owned(sem); | ||
28 | } | 27 | } |
29 | 28 | ||
30 | EXPORT_SYMBOL(down_read); | 29 | EXPORT_SYMBOL(down_read); |
@@ -39,7 +38,6 @@ int __sched down_read_killable(struct rw_semaphore *sem) | |||
39 | return -EINTR; | 38 | return -EINTR; |
40 | } | 39 | } |
41 | 40 | ||
42 | rwsem_set_reader_owned(sem); | ||
43 | return 0; | 41 | return 0; |
44 | } | 42 | } |
45 | 43 | ||
@@ -52,10 +50,8 @@ int down_read_trylock(struct rw_semaphore *sem) | |||
52 | { | 50 | { |
53 | int ret = __down_read_trylock(sem); | 51 | int ret = __down_read_trylock(sem); |
54 | 52 | ||
55 | if (ret == 1) { | 53 | if (ret == 1) |
56 | rwsem_acquire_read(&sem->dep_map, 0, 1, _RET_IP_); | 54 | rwsem_acquire_read(&sem->dep_map, 0, 1, _RET_IP_); |
57 | rwsem_set_reader_owned(sem); | ||
58 | } | ||
59 | return ret; | 55 | return ret; |
60 | } | 56 | } |
61 | 57 | ||
@@ -70,7 +66,6 @@ void __sched down_write(struct rw_semaphore *sem) | |||
70 | rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_); | 66 | rwsem_acquire(&sem->dep_map, 0, 0, _RET_IP_); |
71 | 67 | ||
72 | LOCK_CONTENDED(sem, __down_write_trylock, __down_write); | 68 | LOCK_CONTENDED(sem, __down_write_trylock, __down_write); |
73 | rwsem_set_owner(sem); | ||
74 | } | 69 | } |
75 | 70 | ||
76 | EXPORT_SYMBOL(down_write); | 71 | EXPORT_SYMBOL(down_write); |
@@ -88,7 +83,6 @@ int __sched down_write_killable(struct rw_semaphore *sem) | |||
88 | return -EINTR; | 83 | return -EINTR; |
89 | } | 84 | } |
90 | 85 | ||
91 | rwsem_set_owner(sem); | ||
92 | return 0; | 86 | return 0; |
93 | } | 87 | } |
94 | 88 | ||
@@ -101,10 +95,8 @@ int down_write_trylock(struct rw_semaphore *sem) | |||
101 | { | 95 | { |
102 | int ret = __down_write_trylock(sem); | 96 | int ret = __down_write_trylock(sem); |
103 | 97 | ||
104 | if (ret == 1) { | 98 | if (ret == 1) |
105 | rwsem_acquire(&sem->dep_map, 0, 1, _RET_IP_); | 99 | rwsem_acquire(&sem->dep_map, 0, 1, _RET_IP_); |
106 | rwsem_set_owner(sem); | ||
107 | } | ||
108 | 100 | ||
109 | return ret; | 101 | return ret; |
110 | } | 102 | } |
@@ -117,9 +109,7 @@ EXPORT_SYMBOL(down_write_trylock); | |||
117 | void up_read(struct rw_semaphore *sem) | 109 | void up_read(struct rw_semaphore *sem) |
118 | { | 110 | { |
119 | rwsem_release(&sem->dep_map, 1, _RET_IP_); | 111 | rwsem_release(&sem->dep_map, 1, _RET_IP_); |
120 | DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & RWSEM_READER_OWNED)); | ||
121 | 112 | ||
122 | rwsem_clear_reader_owned(sem); | ||
123 | __up_read(sem); | 113 | __up_read(sem); |
124 | } | 114 | } |
125 | 115 | ||
@@ -131,9 +121,7 @@ EXPORT_SYMBOL(up_read); | |||
131 | void up_write(struct rw_semaphore *sem) | 121 | void up_write(struct rw_semaphore *sem) |
132 | { | 122 | { |
133 | rwsem_release(&sem->dep_map, 1, _RET_IP_); | 123 | rwsem_release(&sem->dep_map, 1, _RET_IP_); |
134 | DEBUG_RWSEMS_WARN_ON(sem->owner != current); | ||
135 | 124 | ||
136 | rwsem_clear_owner(sem); | ||
137 | __up_write(sem); | 125 | __up_write(sem); |
138 | } | 126 | } |
139 | 127 | ||
@@ -145,9 +133,7 @@ EXPORT_SYMBOL(up_write); | |||
145 | void downgrade_write(struct rw_semaphore *sem) | 133 | void downgrade_write(struct rw_semaphore *sem) |
146 | { | 134 | { |
147 | lock_downgrade(&sem->dep_map, _RET_IP_); | 135 | lock_downgrade(&sem->dep_map, _RET_IP_); |
148 | DEBUG_RWSEMS_WARN_ON(sem->owner != current); | ||
149 | 136 | ||
150 | rwsem_set_reader_owned(sem); | ||
151 | __downgrade_write(sem); | 137 | __downgrade_write(sem); |
152 | } | 138 | } |
153 | 139 | ||
@@ -161,7 +147,6 @@ void down_read_nested(struct rw_semaphore *sem, int subclass) | |||
161 | rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_); | 147 | rwsem_acquire_read(&sem->dep_map, subclass, 0, _RET_IP_); |
162 | 148 | ||
163 | LOCK_CONTENDED(sem, __down_read_trylock, __down_read); | 149 | LOCK_CONTENDED(sem, __down_read_trylock, __down_read); |
164 | rwsem_set_reader_owned(sem); | ||
165 | } | 150 | } |
166 | 151 | ||
167 | EXPORT_SYMBOL(down_read_nested); | 152 | EXPORT_SYMBOL(down_read_nested); |
@@ -172,7 +157,6 @@ void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest) | |||
172 | rwsem_acquire_nest(&sem->dep_map, 0, 0, nest, _RET_IP_); | 157 | rwsem_acquire_nest(&sem->dep_map, 0, 0, nest, _RET_IP_); |
173 | 158 | ||
174 | LOCK_CONTENDED(sem, __down_write_trylock, __down_write); | 159 | LOCK_CONTENDED(sem, __down_write_trylock, __down_write); |
175 | rwsem_set_owner(sem); | ||
176 | } | 160 | } |
177 | 161 | ||
178 | EXPORT_SYMBOL(_down_write_nest_lock); | 162 | EXPORT_SYMBOL(_down_write_nest_lock); |
@@ -193,7 +177,6 @@ void down_write_nested(struct rw_semaphore *sem, int subclass) | |||
193 | rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_); | 177 | rwsem_acquire(&sem->dep_map, subclass, 0, _RET_IP_); |
194 | 178 | ||
195 | LOCK_CONTENDED(sem, __down_write_trylock, __down_write); | 179 | LOCK_CONTENDED(sem, __down_write_trylock, __down_write); |
196 | rwsem_set_owner(sem); | ||
197 | } | 180 | } |
198 | 181 | ||
199 | EXPORT_SYMBOL(down_write_nested); | 182 | EXPORT_SYMBOL(down_write_nested); |
@@ -208,7 +191,6 @@ int __sched down_write_killable_nested(struct rw_semaphore *sem, int subclass) | |||
208 | return -EINTR; | 191 | return -EINTR; |
209 | } | 192 | } |
210 | 193 | ||
211 | rwsem_set_owner(sem); | ||
212 | return 0; | 194 | return 0; |
213 | } | 195 | } |
214 | 196 | ||
@@ -216,7 +198,8 @@ EXPORT_SYMBOL(down_write_killable_nested); | |||
216 | 198 | ||
217 | void up_read_non_owner(struct rw_semaphore *sem) | 199 | void up_read_non_owner(struct rw_semaphore *sem) |
218 | { | 200 | { |
219 | DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & RWSEM_READER_OWNED)); | 201 | DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & RWSEM_READER_OWNED), |
202 | sem); | ||
220 | __up_read(sem); | 203 | __up_read(sem); |
221 | } | 204 | } |
222 | 205 | ||
diff --git a/kernel/locking/rwsem.h b/kernel/locking/rwsem.h index bad2bca0268b..64877f5294e3 100644 --- a/kernel/locking/rwsem.h +++ b/kernel/locking/rwsem.h | |||
@@ -23,15 +23,44 @@ | |||
23 | * is involved. Ideally we would like to track all the readers that own | 23 | * is involved. Ideally we would like to track all the readers that own |
24 | * a rwsem, but the overhead is simply too big. | 24 | * a rwsem, but the overhead is simply too big. |
25 | */ | 25 | */ |
26 | #include "lock_events.h" | ||
27 | |||
26 | #define RWSEM_READER_OWNED (1UL << 0) | 28 | #define RWSEM_READER_OWNED (1UL << 0) |
27 | #define RWSEM_ANONYMOUSLY_OWNED (1UL << 1) | 29 | #define RWSEM_ANONYMOUSLY_OWNED (1UL << 1) |
28 | 30 | ||
29 | #ifdef CONFIG_DEBUG_RWSEMS | 31 | #ifdef CONFIG_DEBUG_RWSEMS |
30 | # define DEBUG_RWSEMS_WARN_ON(c) DEBUG_LOCKS_WARN_ON(c) | 32 | # define DEBUG_RWSEMS_WARN_ON(c, sem) do { \ |
33 | if (!debug_locks_silent && \ | ||
34 | WARN_ONCE(c, "DEBUG_RWSEMS_WARN_ON(%s): count = 0x%lx, owner = 0x%lx, curr 0x%lx, list %sempty\n",\ | ||
35 | #c, atomic_long_read(&(sem)->count), \ | ||
36 | (long)((sem)->owner), (long)current, \ | ||
37 | list_empty(&(sem)->wait_list) ? "" : "not ")) \ | ||
38 | debug_locks_off(); \ | ||
39 | } while (0) | ||
40 | #else | ||
41 | # define DEBUG_RWSEMS_WARN_ON(c, sem) | ||
42 | #endif | ||
43 | |||
44 | /* | ||
45 | * R/W semaphores originally for PPC using the stuff in lib/rwsem.c. | ||
46 | * Adapted largely from include/asm-i386/rwsem.h | ||
47 | * by Paul Mackerras <paulus@samba.org>. | ||
48 | */ | ||
49 | |||
50 | /* | ||
51 | * the semaphore definition | ||
52 | */ | ||
53 | #ifdef CONFIG_64BIT | ||
54 | # define RWSEM_ACTIVE_MASK 0xffffffffL | ||
31 | #else | 55 | #else |
32 | # define DEBUG_RWSEMS_WARN_ON(c) | 56 | # define RWSEM_ACTIVE_MASK 0x0000ffffL |
33 | #endif | 57 | #endif |
34 | 58 | ||
59 | #define RWSEM_ACTIVE_BIAS 0x00000001L | ||
60 | #define RWSEM_WAITING_BIAS (-RWSEM_ACTIVE_MASK-1) | ||
61 | #define RWSEM_ACTIVE_READ_BIAS RWSEM_ACTIVE_BIAS | ||
62 | #define RWSEM_ACTIVE_WRITE_BIAS (RWSEM_WAITING_BIAS + RWSEM_ACTIVE_BIAS) | ||
63 | |||
35 | #ifdef CONFIG_RWSEM_SPIN_ON_OWNER | 64 | #ifdef CONFIG_RWSEM_SPIN_ON_OWNER |
36 | /* | 65 | /* |
37 | * All writes to owner are protected by WRITE_ONCE() to make sure that | 66 | * All writes to owner are protected by WRITE_ONCE() to make sure that |
@@ -132,3 +161,144 @@ static inline void rwsem_clear_reader_owned(struct rw_semaphore *sem) | |||
132 | { | 161 | { |
133 | } | 162 | } |
134 | #endif | 163 | #endif |
164 | |||
165 | extern struct rw_semaphore *rwsem_down_read_failed(struct rw_semaphore *sem); | ||
166 | extern struct rw_semaphore *rwsem_down_read_failed_killable(struct rw_semaphore *sem); | ||
167 | extern struct rw_semaphore *rwsem_down_write_failed(struct rw_semaphore *sem); | ||
168 | extern struct rw_semaphore *rwsem_down_write_failed_killable(struct rw_semaphore *sem); | ||
169 | extern struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem); | ||
170 | extern struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem); | ||
171 | |||
172 | /* | ||
173 | * lock for reading | ||
174 | */ | ||
175 | static inline void __down_read(struct rw_semaphore *sem) | ||
176 | { | ||
177 | if (unlikely(atomic_long_inc_return_acquire(&sem->count) <= 0)) { | ||
178 | rwsem_down_read_failed(sem); | ||
179 | DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & | ||
180 | RWSEM_READER_OWNED), sem); | ||
181 | } else { | ||
182 | rwsem_set_reader_owned(sem); | ||
183 | } | ||
184 | } | ||
185 | |||
186 | static inline int __down_read_killable(struct rw_semaphore *sem) | ||
187 | { | ||
188 | if (unlikely(atomic_long_inc_return_acquire(&sem->count) <= 0)) { | ||
189 | if (IS_ERR(rwsem_down_read_failed_killable(sem))) | ||
190 | return -EINTR; | ||
191 | DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & | ||
192 | RWSEM_READER_OWNED), sem); | ||
193 | } else { | ||
194 | rwsem_set_reader_owned(sem); | ||
195 | } | ||
196 | return 0; | ||
197 | } | ||
198 | |||
199 | static inline int __down_read_trylock(struct rw_semaphore *sem) | ||
200 | { | ||
201 | /* | ||
202 | * Optimize for the case when the rwsem is not locked at all. | ||
203 | */ | ||
204 | long tmp = RWSEM_UNLOCKED_VALUE; | ||
205 | |||
206 | lockevent_inc(rwsem_rtrylock); | ||
207 | do { | ||
208 | if (atomic_long_try_cmpxchg_acquire(&sem->count, &tmp, | ||
209 | tmp + RWSEM_ACTIVE_READ_BIAS)) { | ||
210 | rwsem_set_reader_owned(sem); | ||
211 | return 1; | ||
212 | } | ||
213 | } while (tmp >= 0); | ||
214 | return 0; | ||
215 | } | ||
216 | |||
217 | /* | ||
218 | * lock for writing | ||
219 | */ | ||
220 | static inline void __down_write(struct rw_semaphore *sem) | ||
221 | { | ||
222 | long tmp; | ||
223 | |||
224 | tmp = atomic_long_add_return_acquire(RWSEM_ACTIVE_WRITE_BIAS, | ||
225 | &sem->count); | ||
226 | if (unlikely(tmp != RWSEM_ACTIVE_WRITE_BIAS)) | ||
227 | rwsem_down_write_failed(sem); | ||
228 | rwsem_set_owner(sem); | ||
229 | } | ||
230 | |||
231 | static inline int __down_write_killable(struct rw_semaphore *sem) | ||
232 | { | ||
233 | long tmp; | ||
234 | |||
235 | tmp = atomic_long_add_return_acquire(RWSEM_ACTIVE_WRITE_BIAS, | ||
236 | &sem->count); | ||
237 | if (unlikely(tmp != RWSEM_ACTIVE_WRITE_BIAS)) | ||
238 | if (IS_ERR(rwsem_down_write_failed_killable(sem))) | ||
239 | return -EINTR; | ||
240 | rwsem_set_owner(sem); | ||
241 | return 0; | ||
242 | } | ||
243 | |||
244 | static inline int __down_write_trylock(struct rw_semaphore *sem) | ||
245 | { | ||
246 | long tmp; | ||
247 | |||
248 | lockevent_inc(rwsem_wtrylock); | ||
249 | tmp = atomic_long_cmpxchg_acquire(&sem->count, RWSEM_UNLOCKED_VALUE, | ||
250 | RWSEM_ACTIVE_WRITE_BIAS); | ||
251 | if (tmp == RWSEM_UNLOCKED_VALUE) { | ||
252 | rwsem_set_owner(sem); | ||
253 | return true; | ||
254 | } | ||
255 | return false; | ||
256 | } | ||
257 | |||
258 | /* | ||
259 | * unlock after reading | ||
260 | */ | ||
261 | static inline void __up_read(struct rw_semaphore *sem) | ||
262 | { | ||
263 | long tmp; | ||
264 | |||
265 | DEBUG_RWSEMS_WARN_ON(!((unsigned long)sem->owner & RWSEM_READER_OWNED), | ||
266 | sem); | ||
267 | rwsem_clear_reader_owned(sem); | ||
268 | tmp = atomic_long_dec_return_release(&sem->count); | ||
269 | if (unlikely(tmp < -1 && (tmp & RWSEM_ACTIVE_MASK) == 0)) | ||
270 | rwsem_wake(sem); | ||
271 | } | ||
272 | |||
273 | /* | ||
274 | * unlock after writing | ||
275 | */ | ||
276 | static inline void __up_write(struct rw_semaphore *sem) | ||
277 | { | ||
278 | DEBUG_RWSEMS_WARN_ON(sem->owner != current, sem); | ||
279 | rwsem_clear_owner(sem); | ||
280 | if (unlikely(atomic_long_sub_return_release(RWSEM_ACTIVE_WRITE_BIAS, | ||
281 | &sem->count) < 0)) | ||
282 | rwsem_wake(sem); | ||
283 | } | ||
284 | |||
285 | /* | ||
286 | * downgrade write lock to read lock | ||
287 | */ | ||
288 | static inline void __downgrade_write(struct rw_semaphore *sem) | ||
289 | { | ||
290 | long tmp; | ||
291 | |||
292 | /* | ||
293 | * When downgrading from exclusive to shared ownership, | ||
294 | * anything inside the write-locked region cannot leak | ||
295 | * into the read side. In contrast, anything in the | ||
296 | * read-locked region is ok to be re-ordered into the | ||
297 | * write side. As such, rely on RELEASE semantics. | ||
298 | */ | ||
299 | DEBUG_RWSEMS_WARN_ON(sem->owner != current, sem); | ||
300 | tmp = atomic_long_add_return_release(-RWSEM_WAITING_BIAS, &sem->count); | ||
301 | rwsem_set_reader_owned(sem); | ||
302 | if (tmp < 0) | ||
303 | rwsem_downgrade_wake(sem); | ||
304 | } | ||