summaryrefslogtreecommitdiffstats
path: root/kernel/irq/timings.c
diff options
context:
space:
mode:
authorDaniel Lezcano <daniel.lezcano@linaro.org>2019-03-28 11:13:36 -0400
committerThomas Gleixner <tglx@linutronix.de>2019-04-05 16:51:29 -0400
commitbbba0e7c5cdadb47a91edea1d5cd0caadbbb016f (patch)
tree18df2204805ddb0ef0d6972cab6b1f2607c2867c /kernel/irq/timings.c
parentbfe83844987a52dc1f71f757b60523811502dc93 (diff)
genirq/timings: Add array suffix computation code
The previous approach based on the variance was discarding values from the timings when they were considered as anomalies as stated by the normal law statistical model. However in the interrupt life, there can be multiple anomalies due to the nature of the device generating the interrupts, and most of the time a repeating pattern can be observed, that is particulary true for network, console, MMC or SSD devices. The variance approach missed the patterns and it was only able to deal with the interrupt coming in regular intervals, thus reducing considerably the scope of what is predictable. In order to find out the repeating patterns, the interrupt intervals are grouped in a ilog2 basis to create a suite of numbers with small amplitude. Every group contains an exponential moving average of the values belonging to the group. The array suffix, a data structure used for string searching, data compression, etc ..., is built from the suite of numbers and the suffixes are then searched in this suite. The tests showed the algorithm is able to find all repeating patterns, as well as regular interval in less than 1us on x86-i7. Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: rjw@rjwysocki.net Cc: ulf.hansson@linaro.org Cc: linux-pm@vger.kernel.org Link: https://lkml.kernel.org/r/20190328151336.5316-2-daniel.lezcano@linaro.org
Diffstat (limited to 'kernel/irq/timings.c')
-rw-r--r--kernel/irq/timings.c462
1 files changed, 457 insertions, 5 deletions
diff --git a/kernel/irq/timings.c b/kernel/irq/timings.c
index 3cde046a2bc8..90c735da15d0 100644
--- a/kernel/irq/timings.c
+++ b/kernel/irq/timings.c
@@ -8,6 +8,8 @@
8#include <linux/interrupt.h> 8#include <linux/interrupt.h>
9#include <linux/idr.h> 9#include <linux/idr.h>
10#include <linux/irq.h> 10#include <linux/irq.h>
11#include <linux/math64.h>
12#include <linux/log2.h>
11 13
12#include <trace/events/irq.h> 14#include <trace/events/irq.h>
13 15
@@ -17,10 +19,6 @@ DEFINE_STATIC_KEY_FALSE(irq_timing_enabled);
17 19
18DEFINE_PER_CPU(struct irq_timings, irq_timings); 20DEFINE_PER_CPU(struct irq_timings, irq_timings);
19 21
20struct irqt_stat {
21 u64 next_evt;
22};
23
24static DEFINE_IDR(irqt_stats); 22static DEFINE_IDR(irqt_stats);
25 23
26void irq_timings_enable(void) 24void irq_timings_enable(void)
@@ -33,6 +31,410 @@ void irq_timings_disable(void)
33 static_branch_disable(&irq_timing_enabled); 31 static_branch_disable(&irq_timing_enabled);
34} 32}
35 33
34/*
35 * The main goal of this algorithm is to predict the next interrupt
36 * occurrence on the current CPU.
37 *
38 * Currently, the interrupt timings are stored in a circular array
39 * buffer every time there is an interrupt, as a tuple: the interrupt
40 * number and the associated timestamp when the event occurred <irq,
41 * timestamp>.
42 *
43 * For every interrupt occurring in a short period of time, we can
44 * measure the elapsed time between the occurrences for the same
45 * interrupt and we end up with a suite of intervals. The experience
46 * showed the interrupts are often coming following a periodic
47 * pattern.
48 *
49 * The objective of the algorithm is to find out this periodic pattern
50 * in a fastest way and use its period to predict the next irq event.
51 *
52 * When the next interrupt event is requested, we are in the situation
53 * where the interrupts are disabled and the circular buffer
54 * containing the timings is filled with the events which happened
55 * after the previous next-interrupt-event request.
56 *
57 * At this point, we read the circular buffer and we fill the irq
58 * related statistics structure. After this step, the circular array
59 * containing the timings is empty because all the values are
60 * dispatched in their corresponding buffers.
61 *
62 * Now for each interrupt, we can predict the next event by using the
63 * suffix array, log interval and exponential moving average
64 *
65 * 1. Suffix array
66 *
67 * Suffix array is an array of all the suffixes of a string. It is
68 * widely used as a data structure for compression, text search, ...
69 * For instance for the word 'banana', the suffixes will be: 'banana'
70 * 'anana' 'nana' 'ana' 'na' 'a'
71 *
72 * Usually, the suffix array is sorted but for our purpose it is
73 * not necessary and won't provide any improvement in the context of
74 * the solved problem where we clearly define the boundaries of the
75 * search by a max period and min period.
76 *
77 * The suffix array will build a suite of intervals of different
78 * length and will look for the repetition of each suite. If the suite
79 * is repeating then we have the period because it is the length of
80 * the suite whatever its position in the buffer.
81 *
82 * 2. Log interval
83 *
84 * We saw the irq timings allow to compute the interval of the
85 * occurrences for a specific interrupt. We can reasonibly assume the
86 * longer is the interval, the higher is the error for the next event
87 * and we can consider storing those interval values into an array
88 * where each slot in the array correspond to an interval at the power
89 * of 2 of the index. For example, index 12 will contain values
90 * between 2^11 and 2^12.
91 *
92 * At the end we have an array of values where at each index defines a
93 * [2^index - 1, 2 ^ index] interval values allowing to store a large
94 * number of values inside a small array.
95 *
96 * For example, if we have the value 1123, then we store it at
97 * ilog2(1123) = 10 index value.
98 *
99 * Storing those value at the specific index is done by computing an
100 * exponential moving average for this specific slot. For instance,
101 * for values 1800, 1123, 1453, ... fall under the same slot (10) and
102 * the exponential moving average is computed every time a new value
103 * is stored at this slot.
104 *
105 * 3. Exponential Moving Average
106 *
107 * The EMA is largely used to track a signal for stocks or as a low
108 * pass filter. The magic of the formula, is it is very simple and the
109 * reactivity of the average can be tuned with the factors called
110 * alpha.
111 *
112 * The higher the alphas are, the faster the average respond to the
113 * signal change. In our case, if a slot in the array is a big
114 * interval, we can have numbers with a big difference between
115 * them. The impact of those differences in the average computation
116 * can be tuned by changing the alpha value.
117 *
118 *
119 * -- The algorithm --
120 *
121 * We saw the different processing above, now let's see how they are
122 * used together.
123 *
124 * For each interrupt:
125 * For each interval:
126 * Compute the index = ilog2(interval)
127 * Compute a new_ema(buffer[index], interval)
128 * Store the index in a circular buffer
129 *
130 * Compute the suffix array of the indexes
131 *
132 * For each suffix:
133 * If the suffix is reverse-found 3 times
134 * Return suffix
135 *
136 * Return Not found
137 *
138 * However we can not have endless suffix array to be build, it won't
139 * make sense and it will add an extra overhead, so we can restrict
140 * this to a maximum suffix length of 5 and a minimum suffix length of
141 * 2. The experience showed 5 is the majority of the maximum pattern
142 * period found for different devices.
143 *
144 * The result is a pattern finding less than 1us for an interrupt.
145 *
146 * Example based on real values:
147 *
148 * Example 1 : MMC write/read interrupt interval:
149 *
150 * 223947, 1240, 1384, 1386, 1386,
151 * 217416, 1236, 1384, 1386, 1387,
152 * 214719, 1241, 1386, 1387, 1384,
153 * 213696, 1234, 1384, 1386, 1388,
154 * 219904, 1240, 1385, 1389, 1385,
155 * 212240, 1240, 1386, 1386, 1386,
156 * 214415, 1236, 1384, 1386, 1387,
157 * 214276, 1234, 1384, 1388, ?
158 *
159 * For each element, apply ilog2(value)
160 *
161 * 15, 8, 8, 8, 8,
162 * 15, 8, 8, 8, 8,
163 * 15, 8, 8, 8, 8,
164 * 15, 8, 8, 8, 8,
165 * 15, 8, 8, 8, 8,
166 * 15, 8, 8, 8, 8,
167 * 15, 8, 8, 8, 8,
168 * 15, 8, 8, 8, ?
169 *
170 * Max period of 5, we take the last (max_period * 3) 15 elements as
171 * we can be confident if the pattern repeats itself three times it is
172 * a repeating pattern.
173 *
174 * 8,
175 * 15, 8, 8, 8, 8,
176 * 15, 8, 8, 8, 8,
177 * 15, 8, 8, 8, ?
178 *
179 * Suffixes are:
180 *
181 * 1) 8, 15, 8, 8, 8 <- max period
182 * 2) 8, 15, 8, 8
183 * 3) 8, 15, 8
184 * 4) 8, 15 <- min period
185 *
186 * From there we search the repeating pattern for each suffix.
187 *
188 * buffer: 8, 15, 8, 8, 8, 8, 15, 8, 8, 8, 8, 15, 8, 8, 8
189 * | | | | | | | | | | | | | | |
190 * 8, 15, 8, 8, 8 | | | | | | | | | |
191 * 8, 15, 8, 8, 8 | | | | |
192 * 8, 15, 8, 8, 8
193 *
194 * When moving the suffix, we found exactly 3 matches.
195 *
196 * The first suffix with period 5 is repeating.
197 *
198 * The next event is (3 * max_period) % suffix_period
199 *
200 * In this example, the result 0, so the next event is suffix[0] => 8
201 *
202 * However, 8 is the index in the array of exponential moving average
203 * which was calculated on the fly when storing the values, so the
204 * interval is ema[8] = 1366
205 *
206 *
207 * Example 2:
208 *
209 * 4, 3, 5, 100,
210 * 3, 3, 5, 117,
211 * 4, 4, 5, 112,
212 * 4, 3, 4, 110,
213 * 3, 5, 3, 117,
214 * 4, 4, 5, 112,
215 * 4, 3, 4, 110,
216 * 3, 4, 5, 112,
217 * 4, 3, 4, 110
218 *
219 * ilog2
220 *
221 * 0, 0, 0, 4,
222 * 0, 0, 0, 4,
223 * 0, 0, 0, 4,
224 * 0, 0, 0, 4,
225 * 0, 0, 0, 4,
226 * 0, 0, 0, 4,
227 * 0, 0, 0, 4,
228 * 0, 0, 0, 4,
229 * 0, 0, 0, 4
230 *
231 * Max period 5:
232 * 0, 0, 4,
233 * 0, 0, 0, 4,
234 * 0, 0, 0, 4,
235 * 0, 0, 0, 4
236 *
237 * Suffixes:
238 *
239 * 1) 0, 0, 4, 0, 0
240 * 2) 0, 0, 4, 0
241 * 3) 0, 0, 4
242 * 4) 0, 0
243 *
244 * buffer: 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4
245 * | | | | | | X
246 * 0, 0, 4, 0, 0, | X
247 * 0, 0
248 *
249 * buffer: 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4
250 * | | | | | | | | | | | | | | |
251 * 0, 0, 4, 0, | | | | | | | | | | |
252 * 0, 0, 4, 0, | | | | | | |
253 * 0, 0, 4, 0, | | |
254 * 0 0 4
255 *
256 * Pattern is found 3 times, the remaining is 1 which results from
257 * (max_period * 3) % suffix_period. This value is the index in the
258 * suffix arrays. The suffix array for a period 4 has the value 4
259 * at index 1.
260 */
261#define EMA_ALPHA_VAL 64
262#define EMA_ALPHA_SHIFT 7
263
264#define PREDICTION_PERIOD_MIN 2
265#define PREDICTION_PERIOD_MAX 5
266#define PREDICTION_FACTOR 4
267#define PREDICTION_MAX 10 /* 2 ^ PREDICTION_MAX useconds */
268#define PREDICTION_BUFFER_SIZE 16 /* slots for EMAs, hardly more than 16 */
269
270struct irqt_stat {
271 u64 last_ts;
272 u64 ema_time[PREDICTION_BUFFER_SIZE];
273 int timings[IRQ_TIMINGS_SIZE];
274 int circ_timings[IRQ_TIMINGS_SIZE];
275 int count;
276};
277
278/*
279 * Exponential moving average computation
280 */
281static u64 irq_timings_ema_new(u64 value, u64 ema_old)
282{
283 s64 diff;
284
285 if (unlikely(!ema_old))
286 return value;
287
288 diff = (value - ema_old) * EMA_ALPHA_VAL;
289 /*
290 * We can use a s64 type variable to be added with the u64
291 * ema_old variable as this one will never have its topmost
292 * bit set, it will be always smaller than 2^63 nanosec
293 * interrupt interval (292 years).
294 */
295 return ema_old + (diff >> EMA_ALPHA_SHIFT);
296}
297
298static int irq_timings_next_event_index(int *buffer, size_t len, int period_max)
299{
300 int i;
301
302 /*
303 * The buffer contains the suite of intervals, in a ilog2
304 * basis, we are looking for a repetition. We point the
305 * beginning of the search three times the length of the
306 * period beginning at the end of the buffer. We do that for
307 * each suffix.
308 */
309 for (i = period_max; i >= PREDICTION_PERIOD_MIN ; i--) {
310
311 int *begin = &buffer[len - (i * 3)];
312 int *ptr = begin;
313
314 /*
315 * We look if the suite with period 'i' repeat
316 * itself. If it is truncated at the end, as it
317 * repeats we can use the period to find out the next
318 * element.
319 */
320 while (!memcmp(ptr, begin, i * sizeof(*ptr))) {
321 ptr += i;
322 if (ptr >= &buffer[len])
323 return begin[((i * 3) % i)];
324 }
325 }
326
327 return -1;
328}
329
330static u64 __irq_timings_next_event(struct irqt_stat *irqs, int irq, u64 now)
331{
332 int index, i, period_max, count, start, min = INT_MAX;
333
334 if ((now - irqs->last_ts) >= NSEC_PER_SEC) {
335 irqs->count = irqs->last_ts = 0;
336 return U64_MAX;
337 }
338
339 /*
340 * As we want to find three times the repetition, we need a
341 * number of intervals greater or equal to three times the
342 * maximum period, otherwise we truncate the max period.
343 */
344 period_max = irqs->count > (3 * PREDICTION_PERIOD_MAX) ?
345 PREDICTION_PERIOD_MAX : irqs->count / 3;
346
347 /*
348 * If we don't have enough irq timings for this prediction,
349 * just bail out.
350 */
351 if (period_max <= PREDICTION_PERIOD_MIN)
352 return U64_MAX;
353
354 /*
355 * 'count' will depends if the circular buffer wrapped or not
356 */
357 count = irqs->count < IRQ_TIMINGS_SIZE ?
358 irqs->count : IRQ_TIMINGS_SIZE;
359
360 start = irqs->count < IRQ_TIMINGS_SIZE ?
361 0 : (irqs->count & IRQ_TIMINGS_MASK);
362
363 /*
364 * Copy the content of the circular buffer into another buffer
365 * in order to linearize the buffer instead of dealing with
366 * wrapping indexes and shifted array which will be prone to
367 * error and extremelly difficult to debug.
368 */
369 for (i = 0; i < count; i++) {
370 int index = (start + i) & IRQ_TIMINGS_MASK;
371
372 irqs->timings[i] = irqs->circ_timings[index];
373 min = min_t(int, irqs->timings[i], min);
374 }
375
376 index = irq_timings_next_event_index(irqs->timings, count, period_max);
377 if (index < 0)
378 return irqs->last_ts + irqs->ema_time[min];
379
380 return irqs->last_ts + irqs->ema_time[index];
381}
382
383static inline void irq_timings_store(int irq, struct irqt_stat *irqs, u64 ts)
384{
385 u64 old_ts = irqs->last_ts;
386 u64 interval;
387 int index;
388
389 /*
390 * The timestamps are absolute time values, we need to compute
391 * the timing interval between two interrupts.
392 */
393 irqs->last_ts = ts;
394
395 /*
396 * The interval type is u64 in order to deal with the same
397 * type in our computation, that prevent mindfuck issues with
398 * overflow, sign and division.
399 */
400 interval = ts - old_ts;
401
402 /*
403 * The interrupt triggered more than one second apart, that
404 * ends the sequence as predictible for our purpose. In this
405 * case, assume we have the beginning of a sequence and the
406 * timestamp is the first value. As it is impossible to
407 * predict anything at this point, return.
408 *
409 * Note the first timestamp of the sequence will always fall
410 * in this test because the old_ts is zero. That is what we
411 * want as we need another timestamp to compute an interval.
412 */
413 if (interval >= NSEC_PER_SEC) {
414 irqs->count = 0;
415 return;
416 }
417
418 /*
419 * Get the index in the ema table for this interrupt. The
420 * PREDICTION_FACTOR increase the interval size for the array
421 * of exponential average.
422 */
423 index = likely(interval) ?
424 ilog2((interval >> 10) / PREDICTION_FACTOR) : 0;
425
426 /*
427 * Store the index as an element of the pattern in another
428 * circular array.
429 */
430 irqs->circ_timings[irqs->count & IRQ_TIMINGS_MASK] = index;
431
432 irqs->ema_time[index] = irq_timings_ema_new(interval,
433 irqs->ema_time[index]);
434
435 irqs->count++;
436}
437
36/** 438/**
37 * irq_timings_next_event - Return when the next event is supposed to arrive 439 * irq_timings_next_event - Return when the next event is supposed to arrive
38 * 440 *
@@ -61,6 +463,12 @@ void irq_timings_disable(void)
61 */ 463 */
62u64 irq_timings_next_event(u64 now) 464u64 irq_timings_next_event(u64 now)
63{ 465{
466 struct irq_timings *irqts = this_cpu_ptr(&irq_timings);
467 struct irqt_stat *irqs;
468 struct irqt_stat __percpu *s;
469 u64 ts, next_evt = U64_MAX;
470 int i, irq = 0;
471
64 /* 472 /*
65 * This function must be called with the local irq disabled in 473 * This function must be called with the local irq disabled in
66 * order to prevent the timings circular buffer to be updated 474 * order to prevent the timings circular buffer to be updated
@@ -68,7 +476,51 @@ u64 irq_timings_next_event(u64 now)
68 */ 476 */
69 lockdep_assert_irqs_disabled(); 477 lockdep_assert_irqs_disabled();
70 478
71 return 0; 479 if (!irqts->count)
480 return next_evt;
481
482 /*
483 * Number of elements in the circular buffer: If it happens it
484 * was flushed before, then the number of elements could be
485 * smaller than IRQ_TIMINGS_SIZE, so the count is used,
486 * otherwise the array size is used as we wrapped. The index
487 * begins from zero when we did not wrap. That could be done
488 * in a nicer way with the proper circular array structure
489 * type but with the cost of extra computation in the
490 * interrupt handler hot path. We choose efficiency.
491 *
492 * Inject measured irq/timestamp to the pattern prediction
493 * model while decrementing the counter because we consume the
494 * data from our circular buffer.
495 */
496
497 i = (irqts->count & IRQ_TIMINGS_MASK) - 1;
498 irqts->count = min(IRQ_TIMINGS_SIZE, irqts->count);
499
500 for (; irqts->count > 0; irqts->count--, i = (i + 1) & IRQ_TIMINGS_MASK) {
501 irq = irq_timing_decode(irqts->values[i], &ts);
502 s = idr_find(&irqt_stats, irq);
503 if (s)
504 irq_timings_store(irq, this_cpu_ptr(s), ts);
505 }
506
507 /*
508 * Look in the list of interrupts' statistics, the earliest
509 * next event.
510 */
511 idr_for_each_entry(&irqt_stats, s, i) {
512
513 irqs = this_cpu_ptr(s);
514
515 ts = __irq_timings_next_event(irqs, i, now);
516 if (ts <= now)
517 return now;
518
519 if (ts < next_evt)
520 next_evt = ts;
521 }
522
523 return next_evt;
72} 524}
73 525
74void irq_timings_free(int irq) 526void irq_timings_free(int irq)