diff options
Diffstat (limited to 'kernel/locking/qspinlock_stat.h')
-rw-r--r-- | kernel/locking/qspinlock_stat.h | 242 |
1 files changed, 43 insertions, 199 deletions
diff --git a/kernel/locking/qspinlock_stat.h b/kernel/locking/qspinlock_stat.h index d73f85388d5c..54152670ff24 100644 --- a/kernel/locking/qspinlock_stat.h +++ b/kernel/locking/qspinlock_stat.h | |||
@@ -9,262 +9,105 @@ | |||
9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | 9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
10 | * GNU General Public License for more details. | 10 | * GNU General Public License for more details. |
11 | * | 11 | * |
12 | * Authors: Waiman Long <waiman.long@hpe.com> | 12 | * Authors: Waiman Long <longman@redhat.com> |
13 | */ | 13 | */ |
14 | 14 | ||
15 | /* | 15 | #include "lock_events.h" |
16 | * When queued spinlock statistical counters are enabled, the following | ||
17 | * debugfs files will be created for reporting the counter values: | ||
18 | * | ||
19 | * <debugfs>/qlockstat/ | ||
20 | * pv_hash_hops - average # of hops per hashing operation | ||
21 | * pv_kick_unlock - # of vCPU kicks issued at unlock time | ||
22 | * pv_kick_wake - # of vCPU kicks used for computing pv_latency_wake | ||
23 | * pv_latency_kick - average latency (ns) of vCPU kick operation | ||
24 | * pv_latency_wake - average latency (ns) from vCPU kick to wakeup | ||
25 | * pv_lock_stealing - # of lock stealing operations | ||
26 | * pv_spurious_wakeup - # of spurious wakeups in non-head vCPUs | ||
27 | * pv_wait_again - # of wait's after a queue head vCPU kick | ||
28 | * pv_wait_early - # of early vCPU wait's | ||
29 | * pv_wait_head - # of vCPU wait's at the queue head | ||
30 | * pv_wait_node - # of vCPU wait's at a non-head queue node | ||
31 | * lock_pending - # of locking operations via pending code | ||
32 | * lock_slowpath - # of locking operations via MCS lock queue | ||
33 | * lock_use_node2 - # of locking operations that use 2nd per-CPU node | ||
34 | * lock_use_node3 - # of locking operations that use 3rd per-CPU node | ||
35 | * lock_use_node4 - # of locking operations that use 4th per-CPU node | ||
36 | * lock_no_node - # of locking operations without using per-CPU node | ||
37 | * | ||
38 | * Subtracting lock_use_node[234] from lock_slowpath will give you | ||
39 | * lock_use_node1. | ||
40 | * | ||
41 | * Writing to the "reset_counters" file will reset all the above counter | ||
42 | * values. | ||
43 | * | ||
44 | * These statistical counters are implemented as per-cpu variables which are | ||
45 | * summed and computed whenever the corresponding debugfs files are read. This | ||
46 | * minimizes added overhead making the counters usable even in a production | ||
47 | * environment. | ||
48 | * | ||
49 | * There may be slight difference between pv_kick_wake and pv_kick_unlock. | ||
50 | */ | ||
51 | enum qlock_stats { | ||
52 | qstat_pv_hash_hops, | ||
53 | qstat_pv_kick_unlock, | ||
54 | qstat_pv_kick_wake, | ||
55 | qstat_pv_latency_kick, | ||
56 | qstat_pv_latency_wake, | ||
57 | qstat_pv_lock_stealing, | ||
58 | qstat_pv_spurious_wakeup, | ||
59 | qstat_pv_wait_again, | ||
60 | qstat_pv_wait_early, | ||
61 | qstat_pv_wait_head, | ||
62 | qstat_pv_wait_node, | ||
63 | qstat_lock_pending, | ||
64 | qstat_lock_slowpath, | ||
65 | qstat_lock_use_node2, | ||
66 | qstat_lock_use_node3, | ||
67 | qstat_lock_use_node4, | ||
68 | qstat_lock_no_node, | ||
69 | qstat_num, /* Total number of statistical counters */ | ||
70 | qstat_reset_cnts = qstat_num, | ||
71 | }; | ||
72 | 16 | ||
73 | #ifdef CONFIG_QUEUED_LOCK_STAT | 17 | #ifdef CONFIG_LOCK_EVENT_COUNTS |
18 | #ifdef CONFIG_PARAVIRT_SPINLOCKS | ||
74 | /* | 19 | /* |
75 | * Collect pvqspinlock statistics | 20 | * Collect pvqspinlock locking event counts |
76 | */ | 21 | */ |
77 | #include <linux/debugfs.h> | ||
78 | #include <linux/sched.h> | 22 | #include <linux/sched.h> |
79 | #include <linux/sched/clock.h> | 23 | #include <linux/sched/clock.h> |
80 | #include <linux/fs.h> | 24 | #include <linux/fs.h> |
81 | 25 | ||
82 | static const char * const qstat_names[qstat_num + 1] = { | 26 | #define EVENT_COUNT(ev) lockevents[LOCKEVENT_ ## ev] |
83 | [qstat_pv_hash_hops] = "pv_hash_hops", | ||
84 | [qstat_pv_kick_unlock] = "pv_kick_unlock", | ||
85 | [qstat_pv_kick_wake] = "pv_kick_wake", | ||
86 | [qstat_pv_spurious_wakeup] = "pv_spurious_wakeup", | ||
87 | [qstat_pv_latency_kick] = "pv_latency_kick", | ||
88 | [qstat_pv_latency_wake] = "pv_latency_wake", | ||
89 | [qstat_pv_lock_stealing] = "pv_lock_stealing", | ||
90 | [qstat_pv_wait_again] = "pv_wait_again", | ||
91 | [qstat_pv_wait_early] = "pv_wait_early", | ||
92 | [qstat_pv_wait_head] = "pv_wait_head", | ||
93 | [qstat_pv_wait_node] = "pv_wait_node", | ||
94 | [qstat_lock_pending] = "lock_pending", | ||
95 | [qstat_lock_slowpath] = "lock_slowpath", | ||
96 | [qstat_lock_use_node2] = "lock_use_node2", | ||
97 | [qstat_lock_use_node3] = "lock_use_node3", | ||
98 | [qstat_lock_use_node4] = "lock_use_node4", | ||
99 | [qstat_lock_no_node] = "lock_no_node", | ||
100 | [qstat_reset_cnts] = "reset_counters", | ||
101 | }; | ||
102 | 27 | ||
103 | /* | 28 | /* |
104 | * Per-cpu counters | 29 | * PV specific per-cpu counter |
105 | */ | 30 | */ |
106 | static DEFINE_PER_CPU(unsigned long, qstats[qstat_num]); | ||
107 | static DEFINE_PER_CPU(u64, pv_kick_time); | 31 | static DEFINE_PER_CPU(u64, pv_kick_time); |
108 | 32 | ||
109 | /* | 33 | /* |
110 | * Function to read and return the qlock statistical counter values | 34 | * Function to read and return the PV qspinlock counts. |
111 | * | 35 | * |
112 | * The following counters are handled specially: | 36 | * The following counters are handled specially: |
113 | * 1. qstat_pv_latency_kick | 37 | * 1. pv_latency_kick |
114 | * Average kick latency (ns) = pv_latency_kick/pv_kick_unlock | 38 | * Average kick latency (ns) = pv_latency_kick/pv_kick_unlock |
115 | * 2. qstat_pv_latency_wake | 39 | * 2. pv_latency_wake |
116 | * Average wake latency (ns) = pv_latency_wake/pv_kick_wake | 40 | * Average wake latency (ns) = pv_latency_wake/pv_kick_wake |
117 | * 3. qstat_pv_hash_hops | 41 | * 3. pv_hash_hops |
118 | * Average hops/hash = pv_hash_hops/pv_kick_unlock | 42 | * Average hops/hash = pv_hash_hops/pv_kick_unlock |
119 | */ | 43 | */ |
120 | static ssize_t qstat_read(struct file *file, char __user *user_buf, | 44 | ssize_t lockevent_read(struct file *file, char __user *user_buf, |
121 | size_t count, loff_t *ppos) | 45 | size_t count, loff_t *ppos) |
122 | { | 46 | { |
123 | char buf[64]; | 47 | char buf[64]; |
124 | int cpu, counter, len; | 48 | int cpu, id, len; |
125 | u64 stat = 0, kicks = 0; | 49 | u64 sum = 0, kicks = 0; |
126 | 50 | ||
127 | /* | 51 | /* |
128 | * Get the counter ID stored in file->f_inode->i_private | 52 | * Get the counter ID stored in file->f_inode->i_private |
129 | */ | 53 | */ |
130 | counter = (long)file_inode(file)->i_private; | 54 | id = (long)file_inode(file)->i_private; |
131 | 55 | ||
132 | if (counter >= qstat_num) | 56 | if (id >= lockevent_num) |
133 | return -EBADF; | 57 | return -EBADF; |
134 | 58 | ||
135 | for_each_possible_cpu(cpu) { | 59 | for_each_possible_cpu(cpu) { |
136 | stat += per_cpu(qstats[counter], cpu); | 60 | sum += per_cpu(lockevents[id], cpu); |
137 | /* | 61 | /* |
138 | * Need to sum additional counter for some of them | 62 | * Need to sum additional counters for some of them |
139 | */ | 63 | */ |
140 | switch (counter) { | 64 | switch (id) { |
141 | 65 | ||
142 | case qstat_pv_latency_kick: | 66 | case LOCKEVENT_pv_latency_kick: |
143 | case qstat_pv_hash_hops: | 67 | case LOCKEVENT_pv_hash_hops: |
144 | kicks += per_cpu(qstats[qstat_pv_kick_unlock], cpu); | 68 | kicks += per_cpu(EVENT_COUNT(pv_kick_unlock), cpu); |
145 | break; | 69 | break; |
146 | 70 | ||
147 | case qstat_pv_latency_wake: | 71 | case LOCKEVENT_pv_latency_wake: |
148 | kicks += per_cpu(qstats[qstat_pv_kick_wake], cpu); | 72 | kicks += per_cpu(EVENT_COUNT(pv_kick_wake), cpu); |
149 | break; | 73 | break; |
150 | } | 74 | } |
151 | } | 75 | } |
152 | 76 | ||
153 | if (counter == qstat_pv_hash_hops) { | 77 | if (id == LOCKEVENT_pv_hash_hops) { |
154 | u64 frac = 0; | 78 | u64 frac = 0; |
155 | 79 | ||
156 | if (kicks) { | 80 | if (kicks) { |
157 | frac = 100ULL * do_div(stat, kicks); | 81 | frac = 100ULL * do_div(sum, kicks); |
158 | frac = DIV_ROUND_CLOSEST_ULL(frac, kicks); | 82 | frac = DIV_ROUND_CLOSEST_ULL(frac, kicks); |
159 | } | 83 | } |
160 | 84 | ||
161 | /* | 85 | /* |
162 | * Return a X.XX decimal number | 86 | * Return a X.XX decimal number |
163 | */ | 87 | */ |
164 | len = snprintf(buf, sizeof(buf) - 1, "%llu.%02llu\n", stat, frac); | 88 | len = snprintf(buf, sizeof(buf) - 1, "%llu.%02llu\n", |
89 | sum, frac); | ||
165 | } else { | 90 | } else { |
166 | /* | 91 | /* |
167 | * Round to the nearest ns | 92 | * Round to the nearest ns |
168 | */ | 93 | */ |
169 | if ((counter == qstat_pv_latency_kick) || | 94 | if ((id == LOCKEVENT_pv_latency_kick) || |
170 | (counter == qstat_pv_latency_wake)) { | 95 | (id == LOCKEVENT_pv_latency_wake)) { |
171 | if (kicks) | 96 | if (kicks) |
172 | stat = DIV_ROUND_CLOSEST_ULL(stat, kicks); | 97 | sum = DIV_ROUND_CLOSEST_ULL(sum, kicks); |
173 | } | 98 | } |
174 | len = snprintf(buf, sizeof(buf) - 1, "%llu\n", stat); | 99 | len = snprintf(buf, sizeof(buf) - 1, "%llu\n", sum); |
175 | } | 100 | } |
176 | 101 | ||
177 | return simple_read_from_buffer(user_buf, count, ppos, buf, len); | 102 | return simple_read_from_buffer(user_buf, count, ppos, buf, len); |
178 | } | 103 | } |
179 | 104 | ||
180 | /* | 105 | /* |
181 | * Function to handle write request | ||
182 | * | ||
183 | * When counter = reset_cnts, reset all the counter values. | ||
184 | * Since the counter updates aren't atomic, the resetting is done twice | ||
185 | * to make sure that the counters are very likely to be all cleared. | ||
186 | */ | ||
187 | static ssize_t qstat_write(struct file *file, const char __user *user_buf, | ||
188 | size_t count, loff_t *ppos) | ||
189 | { | ||
190 | int cpu; | ||
191 | |||
192 | /* | ||
193 | * Get the counter ID stored in file->f_inode->i_private | ||
194 | */ | ||
195 | if ((long)file_inode(file)->i_private != qstat_reset_cnts) | ||
196 | return count; | ||
197 | |||
198 | for_each_possible_cpu(cpu) { | ||
199 | int i; | ||
200 | unsigned long *ptr = per_cpu_ptr(qstats, cpu); | ||
201 | |||
202 | for (i = 0 ; i < qstat_num; i++) | ||
203 | WRITE_ONCE(ptr[i], 0); | ||
204 | } | ||
205 | return count; | ||
206 | } | ||
207 | |||
208 | /* | ||
209 | * Debugfs data structures | ||
210 | */ | ||
211 | static const struct file_operations fops_qstat = { | ||
212 | .read = qstat_read, | ||
213 | .write = qstat_write, | ||
214 | .llseek = default_llseek, | ||
215 | }; | ||
216 | |||
217 | /* | ||
218 | * Initialize debugfs for the qspinlock statistical counters | ||
219 | */ | ||
220 | static int __init init_qspinlock_stat(void) | ||
221 | { | ||
222 | struct dentry *d_qstat = debugfs_create_dir("qlockstat", NULL); | ||
223 | int i; | ||
224 | |||
225 | if (!d_qstat) | ||
226 | goto out; | ||
227 | |||
228 | /* | ||
229 | * Create the debugfs files | ||
230 | * | ||
231 | * As reading from and writing to the stat files can be slow, only | ||
232 | * root is allowed to do the read/write to limit impact to system | ||
233 | * performance. | ||
234 | */ | ||
235 | for (i = 0; i < qstat_num; i++) | ||
236 | if (!debugfs_create_file(qstat_names[i], 0400, d_qstat, | ||
237 | (void *)(long)i, &fops_qstat)) | ||
238 | goto fail_undo; | ||
239 | |||
240 | if (!debugfs_create_file(qstat_names[qstat_reset_cnts], 0200, d_qstat, | ||
241 | (void *)(long)qstat_reset_cnts, &fops_qstat)) | ||
242 | goto fail_undo; | ||
243 | |||
244 | return 0; | ||
245 | fail_undo: | ||
246 | debugfs_remove_recursive(d_qstat); | ||
247 | out: | ||
248 | pr_warn("Could not create 'qlockstat' debugfs entries\n"); | ||
249 | return -ENOMEM; | ||
250 | } | ||
251 | fs_initcall(init_qspinlock_stat); | ||
252 | |||
253 | /* | ||
254 | * Increment the PV qspinlock statistical counters | ||
255 | */ | ||
256 | static inline void qstat_inc(enum qlock_stats stat, bool cond) | ||
257 | { | ||
258 | if (cond) | ||
259 | this_cpu_inc(qstats[stat]); | ||
260 | } | ||
261 | |||
262 | /* | ||
263 | * PV hash hop count | 106 | * PV hash hop count |
264 | */ | 107 | */ |
265 | static inline void qstat_hop(int hopcnt) | 108 | static inline void lockevent_pv_hop(int hopcnt) |
266 | { | 109 | { |
267 | this_cpu_add(qstats[qstat_pv_hash_hops], hopcnt); | 110 | this_cpu_add(EVENT_COUNT(pv_hash_hops), hopcnt); |
268 | } | 111 | } |
269 | 112 | ||
270 | /* | 113 | /* |
@@ -276,7 +119,7 @@ static inline void __pv_kick(int cpu) | |||
276 | 119 | ||
277 | per_cpu(pv_kick_time, cpu) = start; | 120 | per_cpu(pv_kick_time, cpu) = start; |
278 | pv_kick(cpu); | 121 | pv_kick(cpu); |
279 | this_cpu_add(qstats[qstat_pv_latency_kick], sched_clock() - start); | 122 | this_cpu_add(EVENT_COUNT(pv_latency_kick), sched_clock() - start); |
280 | } | 123 | } |
281 | 124 | ||
282 | /* | 125 | /* |
@@ -289,18 +132,19 @@ static inline void __pv_wait(u8 *ptr, u8 val) | |||
289 | *pkick_time = 0; | 132 | *pkick_time = 0; |
290 | pv_wait(ptr, val); | 133 | pv_wait(ptr, val); |
291 | if (*pkick_time) { | 134 | if (*pkick_time) { |
292 | this_cpu_add(qstats[qstat_pv_latency_wake], | 135 | this_cpu_add(EVENT_COUNT(pv_latency_wake), |
293 | sched_clock() - *pkick_time); | 136 | sched_clock() - *pkick_time); |
294 | qstat_inc(qstat_pv_kick_wake, true); | 137 | lockevent_inc(pv_kick_wake); |
295 | } | 138 | } |
296 | } | 139 | } |
297 | 140 | ||
298 | #define pv_kick(c) __pv_kick(c) | 141 | #define pv_kick(c) __pv_kick(c) |
299 | #define pv_wait(p, v) __pv_wait(p, v) | 142 | #define pv_wait(p, v) __pv_wait(p, v) |
300 | 143 | ||
301 | #else /* CONFIG_QUEUED_LOCK_STAT */ | 144 | #endif /* CONFIG_PARAVIRT_SPINLOCKS */ |
145 | |||
146 | #else /* CONFIG_LOCK_EVENT_COUNTS */ | ||
302 | 147 | ||
303 | static inline void qstat_inc(enum qlock_stats stat, bool cond) { } | 148 | static inline void lockevent_pv_hop(int hopcnt) { } |
304 | static inline void qstat_hop(int hopcnt) { } | ||
305 | 149 | ||
306 | #endif /* CONFIG_QUEUED_LOCK_STAT */ | 150 | #endif /* CONFIG_LOCK_EVENT_COUNTS */ |