diff options
Diffstat (limited to 'kernel/irq/timings.c')
-rw-r--r-- | kernel/irq/timings.c | 522 |
1 files changed, 363 insertions, 159 deletions
diff --git a/kernel/irq/timings.c b/kernel/irq/timings.c index 1e4cb63a5c82..90c735da15d0 100644 --- a/kernel/irq/timings.c +++ b/kernel/irq/timings.c | |||
@@ -9,6 +9,7 @@ | |||
9 | #include <linux/idr.h> | 9 | #include <linux/idr.h> |
10 | #include <linux/irq.h> | 10 | #include <linux/irq.h> |
11 | #include <linux/math64.h> | 11 | #include <linux/math64.h> |
12 | #include <linux/log2.h> | ||
12 | 13 | ||
13 | #include <trace/events/irq.h> | 14 | #include <trace/events/irq.h> |
14 | 15 | ||
@@ -18,16 +19,6 @@ DEFINE_STATIC_KEY_FALSE(irq_timing_enabled); | |||
18 | 19 | ||
19 | DEFINE_PER_CPU(struct irq_timings, irq_timings); | 20 | DEFINE_PER_CPU(struct irq_timings, irq_timings); |
20 | 21 | ||
21 | struct irqt_stat { | ||
22 | u64 next_evt; | ||
23 | u64 last_ts; | ||
24 | u64 variance; | ||
25 | u32 avg; | ||
26 | u32 nr_samples; | ||
27 | int anomalies; | ||
28 | int valid; | ||
29 | }; | ||
30 | |||
31 | static DEFINE_IDR(irqt_stats); | 22 | static DEFINE_IDR(irqt_stats); |
32 | 23 | ||
33 | void irq_timings_enable(void) | 24 | void irq_timings_enable(void) |
@@ -40,75 +31,360 @@ void irq_timings_disable(void) | |||
40 | static_branch_disable(&irq_timing_enabled); | 31 | static_branch_disable(&irq_timing_enabled); |
41 | } | 32 | } |
42 | 33 | ||
43 | /** | 34 | /* |
44 | * irqs_update - update the irq timing statistics with a new timestamp | 35 | * The main goal of this algorithm is to predict the next interrupt |
36 | * occurrence on the current CPU. | ||
37 | * | ||
38 | * Currently, the interrupt timings are stored in a circular array | ||
39 | * buffer every time there is an interrupt, as a tuple: the interrupt | ||
40 | * number and the associated timestamp when the event occurred <irq, | ||
41 | * timestamp>. | ||
42 | * | ||
43 | * For every interrupt occurring in a short period of time, we can | ||
44 | * measure the elapsed time between the occurrences for the same | ||
45 | * interrupt and we end up with a suite of intervals. The experience | ||
46 | * showed the interrupts are often coming following a periodic | ||
47 | * pattern. | ||
48 | * | ||
49 | * The objective of the algorithm is to find out this periodic pattern | ||
50 | * in a fastest way and use its period to predict the next irq event. | ||
51 | * | ||
52 | * When the next interrupt event is requested, we are in the situation | ||
53 | * where the interrupts are disabled and the circular buffer | ||
54 | * containing the timings is filled with the events which happened | ||
55 | * after the previous next-interrupt-event request. | ||
56 | * | ||
57 | * At this point, we read the circular buffer and we fill the irq | ||
58 | * related statistics structure. After this step, the circular array | ||
59 | * containing the timings is empty because all the values are | ||
60 | * dispatched in their corresponding buffers. | ||
61 | * | ||
62 | * Now for each interrupt, we can predict the next event by using the | ||
63 | * suffix array, log interval and exponential moving average | ||
64 | * | ||
65 | * 1. Suffix array | ||
66 | * | ||
67 | * Suffix array is an array of all the suffixes of a string. It is | ||
68 | * widely used as a data structure for compression, text search, ... | ||
69 | * For instance for the word 'banana', the suffixes will be: 'banana' | ||
70 | * 'anana' 'nana' 'ana' 'na' 'a' | ||
71 | * | ||
72 | * Usually, the suffix array is sorted but for our purpose it is | ||
73 | * not necessary and won't provide any improvement in the context of | ||
74 | * the solved problem where we clearly define the boundaries of the | ||
75 | * search by a max period and min period. | ||
76 | * | ||
77 | * The suffix array will build a suite of intervals of different | ||
78 | * length and will look for the repetition of each suite. If the suite | ||
79 | * is repeating then we have the period because it is the length of | ||
80 | * the suite whatever its position in the buffer. | ||
81 | * | ||
82 | * 2. Log interval | ||
83 | * | ||
84 | * We saw the irq timings allow to compute the interval of the | ||
85 | * occurrences for a specific interrupt. We can reasonibly assume the | ||
86 | * longer is the interval, the higher is the error for the next event | ||
87 | * and we can consider storing those interval values into an array | ||
88 | * where each slot in the array correspond to an interval at the power | ||
89 | * of 2 of the index. For example, index 12 will contain values | ||
90 | * between 2^11 and 2^12. | ||
91 | * | ||
92 | * At the end we have an array of values where at each index defines a | ||
93 | * [2^index - 1, 2 ^ index] interval values allowing to store a large | ||
94 | * number of values inside a small array. | ||
95 | * | ||
96 | * For example, if we have the value 1123, then we store it at | ||
97 | * ilog2(1123) = 10 index value. | ||
98 | * | ||
99 | * Storing those value at the specific index is done by computing an | ||
100 | * exponential moving average for this specific slot. For instance, | ||
101 | * for values 1800, 1123, 1453, ... fall under the same slot (10) and | ||
102 | * the exponential moving average is computed every time a new value | ||
103 | * is stored at this slot. | ||
104 | * | ||
105 | * 3. Exponential Moving Average | ||
106 | * | ||
107 | * The EMA is largely used to track a signal for stocks or as a low | ||
108 | * pass filter. The magic of the formula, is it is very simple and the | ||
109 | * reactivity of the average can be tuned with the factors called | ||
110 | * alpha. | ||
111 | * | ||
112 | * The higher the alphas are, the faster the average respond to the | ||
113 | * signal change. In our case, if a slot in the array is a big | ||
114 | * interval, we can have numbers with a big difference between | ||
115 | * them. The impact of those differences in the average computation | ||
116 | * can be tuned by changing the alpha value. | ||
117 | * | ||
118 | * | ||
119 | * -- The algorithm -- | ||
120 | * | ||
121 | * We saw the different processing above, now let's see how they are | ||
122 | * used together. | ||
123 | * | ||
124 | * For each interrupt: | ||
125 | * For each interval: | ||
126 | * Compute the index = ilog2(interval) | ||
127 | * Compute a new_ema(buffer[index], interval) | ||
128 | * Store the index in a circular buffer | ||
129 | * | ||
130 | * Compute the suffix array of the indexes | ||
131 | * | ||
132 | * For each suffix: | ||
133 | * If the suffix is reverse-found 3 times | ||
134 | * Return suffix | ||
135 | * | ||
136 | * Return Not found | ||
137 | * | ||
138 | * However we can not have endless suffix array to be build, it won't | ||
139 | * make sense and it will add an extra overhead, so we can restrict | ||
140 | * this to a maximum suffix length of 5 and a minimum suffix length of | ||
141 | * 2. The experience showed 5 is the majority of the maximum pattern | ||
142 | * period found for different devices. | ||
143 | * | ||
144 | * The result is a pattern finding less than 1us for an interrupt. | ||
45 | * | 145 | * |
46 | * @irqs: an irqt_stat struct pointer | 146 | * Example based on real values: |
47 | * @ts: the new timestamp | ||
48 | * | 147 | * |
49 | * The statistics are computed online, in other words, the code is | 148 | * Example 1 : MMC write/read interrupt interval: |
50 | * designed to compute the statistics on a stream of values rather | ||
51 | * than doing multiple passes on the values to compute the average, | ||
52 | * then the variance. The integer division introduces a loss of | ||
53 | * precision but with an acceptable error margin regarding the results | ||
54 | * we would have with the double floating precision: we are dealing | ||
55 | * with nanosec, so big numbers, consequently the mantisse is | ||
56 | * negligeable, especially when converting the time in usec | ||
57 | * afterwards. | ||
58 | * | 149 | * |
59 | * The computation happens at idle time. When the CPU is not idle, the | 150 | * 223947, 1240, 1384, 1386, 1386, |
60 | * interrupts' timestamps are stored in the circular buffer, when the | 151 | * 217416, 1236, 1384, 1386, 1387, |
61 | * CPU goes idle and this routine is called, all the buffer's values | 152 | * 214719, 1241, 1386, 1387, 1384, |
62 | * are injected in the statistical model continuying to extend the | 153 | * 213696, 1234, 1384, 1386, 1388, |
63 | * statistics from the previous busy-idle cycle. | 154 | * 219904, 1240, 1385, 1389, 1385, |
155 | * 212240, 1240, 1386, 1386, 1386, | ||
156 | * 214415, 1236, 1384, 1386, 1387, | ||
157 | * 214276, 1234, 1384, 1388, ? | ||
64 | * | 158 | * |
65 | * The observations showed a device will trigger a burst of periodic | 159 | * For each element, apply ilog2(value) |
66 | * interrupts followed by one or two peaks of longer time, for | ||
67 | * instance when a SD card device flushes its cache, then the periodic | ||
68 | * intervals occur again. A one second inactivity period resets the | ||
69 | * stats, that gives us the certitude the statistical values won't | ||
70 | * exceed 1x10^9, thus the computation won't overflow. | ||
71 | * | 160 | * |
72 | * Basically, the purpose of the algorithm is to watch the periodic | 161 | * 15, 8, 8, 8, 8, |
73 | * interrupts and eliminate the peaks. | 162 | * 15, 8, 8, 8, 8, |
163 | * 15, 8, 8, 8, 8, | ||
164 | * 15, 8, 8, 8, 8, | ||
165 | * 15, 8, 8, 8, 8, | ||
166 | * 15, 8, 8, 8, 8, | ||
167 | * 15, 8, 8, 8, 8, | ||
168 | * 15, 8, 8, 8, ? | ||
74 | * | 169 | * |
75 | * An interrupt is considered periodically stable if the interval of | 170 | * Max period of 5, we take the last (max_period * 3) 15 elements as |
76 | * its occurences follow the normal distribution, thus the values | 171 | * we can be confident if the pattern repeats itself three times it is |
77 | * comply with: | 172 | * a repeating pattern. |
78 | * | 173 | * |
79 | * avg - 3 x stddev < value < avg + 3 x stddev | 174 | * 8, |
175 | * 15, 8, 8, 8, 8, | ||
176 | * 15, 8, 8, 8, 8, | ||
177 | * 15, 8, 8, 8, ? | ||
80 | * | 178 | * |
81 | * Which can be simplified to: | 179 | * Suffixes are: |
82 | * | 180 | * |
83 | * -3 x stddev < value - avg < 3 x stddev | 181 | * 1) 8, 15, 8, 8, 8 <- max period |
182 | * 2) 8, 15, 8, 8 | ||
183 | * 3) 8, 15, 8 | ||
184 | * 4) 8, 15 <- min period | ||
84 | * | 185 | * |
85 | * abs(value - avg) < 3 x stddev | 186 | * From there we search the repeating pattern for each suffix. |
86 | * | 187 | * |
87 | * In order to save a costly square root computation, we use the | 188 | * buffer: 8, 15, 8, 8, 8, 8, 15, 8, 8, 8, 8, 15, 8, 8, 8 |
88 | * variance. For the record, stddev = sqrt(variance). The equation | 189 | * | | | | | | | | | | | | | | | |
89 | * above becomes: | 190 | * 8, 15, 8, 8, 8 | | | | | | | | | | |
191 | * 8, 15, 8, 8, 8 | | | | | | ||
192 | * 8, 15, 8, 8, 8 | ||
90 | * | 193 | * |
91 | * abs(value - avg) < 3 x sqrt(variance) | 194 | * When moving the suffix, we found exactly 3 matches. |
92 | * | 195 | * |
93 | * And finally we square it: | 196 | * The first suffix with period 5 is repeating. |
94 | * | 197 | * |
95 | * (value - avg) ^ 2 < (3 x sqrt(variance)) ^ 2 | 198 | * The next event is (3 * max_period) % suffix_period |
96 | * | 199 | * |
97 | * (value - avg) x (value - avg) < 9 x variance | 200 | * In this example, the result 0, so the next event is suffix[0] => 8 |
98 | * | 201 | * |
99 | * Statistically speaking, any values out of this interval is | 202 | * However, 8 is the index in the array of exponential moving average |
100 | * considered as an anomaly and is discarded. However, a normal | 203 | * which was calculated on the fly when storing the values, so the |
101 | * distribution appears when the number of samples is 30 (it is the | 204 | * interval is ema[8] = 1366 |
102 | * rule of thumb in statistics, cf. "30 samples" on Internet). When | ||
103 | * there are three consecutive anomalies, the statistics are resetted. | ||
104 | * | 205 | * |
206 | * | ||
207 | * Example 2: | ||
208 | * | ||
209 | * 4, 3, 5, 100, | ||
210 | * 3, 3, 5, 117, | ||
211 | * 4, 4, 5, 112, | ||
212 | * 4, 3, 4, 110, | ||
213 | * 3, 5, 3, 117, | ||
214 | * 4, 4, 5, 112, | ||
215 | * 4, 3, 4, 110, | ||
216 | * 3, 4, 5, 112, | ||
217 | * 4, 3, 4, 110 | ||
218 | * | ||
219 | * ilog2 | ||
220 | * | ||
221 | * 0, 0, 0, 4, | ||
222 | * 0, 0, 0, 4, | ||
223 | * 0, 0, 0, 4, | ||
224 | * 0, 0, 0, 4, | ||
225 | * 0, 0, 0, 4, | ||
226 | * 0, 0, 0, 4, | ||
227 | * 0, 0, 0, 4, | ||
228 | * 0, 0, 0, 4, | ||
229 | * 0, 0, 0, 4 | ||
230 | * | ||
231 | * Max period 5: | ||
232 | * 0, 0, 4, | ||
233 | * 0, 0, 0, 4, | ||
234 | * 0, 0, 0, 4, | ||
235 | * 0, 0, 0, 4 | ||
236 | * | ||
237 | * Suffixes: | ||
238 | * | ||
239 | * 1) 0, 0, 4, 0, 0 | ||
240 | * 2) 0, 0, 4, 0 | ||
241 | * 3) 0, 0, 4 | ||
242 | * 4) 0, 0 | ||
243 | * | ||
244 | * buffer: 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4 | ||
245 | * | | | | | | X | ||
246 | * 0, 0, 4, 0, 0, | X | ||
247 | * 0, 0 | ||
248 | * | ||
249 | * buffer: 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4, 0, 0, 0, 4 | ||
250 | * | | | | | | | | | | | | | | | | ||
251 | * 0, 0, 4, 0, | | | | | | | | | | | | ||
252 | * 0, 0, 4, 0, | | | | | | | | ||
253 | * 0, 0, 4, 0, | | | | ||
254 | * 0 0 4 | ||
255 | * | ||
256 | * Pattern is found 3 times, the remaining is 1 which results from | ||
257 | * (max_period * 3) % suffix_period. This value is the index in the | ||
258 | * suffix arrays. The suffix array for a period 4 has the value 4 | ||
259 | * at index 1. | ||
260 | */ | ||
261 | #define EMA_ALPHA_VAL 64 | ||
262 | #define EMA_ALPHA_SHIFT 7 | ||
263 | |||
264 | #define PREDICTION_PERIOD_MIN 2 | ||
265 | #define PREDICTION_PERIOD_MAX 5 | ||
266 | #define PREDICTION_FACTOR 4 | ||
267 | #define PREDICTION_MAX 10 /* 2 ^ PREDICTION_MAX useconds */ | ||
268 | #define PREDICTION_BUFFER_SIZE 16 /* slots for EMAs, hardly more than 16 */ | ||
269 | |||
270 | struct irqt_stat { | ||
271 | u64 last_ts; | ||
272 | u64 ema_time[PREDICTION_BUFFER_SIZE]; | ||
273 | int timings[IRQ_TIMINGS_SIZE]; | ||
274 | int circ_timings[IRQ_TIMINGS_SIZE]; | ||
275 | int count; | ||
276 | }; | ||
277 | |||
278 | /* | ||
279 | * Exponential moving average computation | ||
105 | */ | 280 | */ |
106 | static void irqs_update(struct irqt_stat *irqs, u64 ts) | 281 | static u64 irq_timings_ema_new(u64 value, u64 ema_old) |
282 | { | ||
283 | s64 diff; | ||
284 | |||
285 | if (unlikely(!ema_old)) | ||
286 | return value; | ||
287 | |||
288 | diff = (value - ema_old) * EMA_ALPHA_VAL; | ||
289 | /* | ||
290 | * We can use a s64 type variable to be added with the u64 | ||
291 | * ema_old variable as this one will never have its topmost | ||
292 | * bit set, it will be always smaller than 2^63 nanosec | ||
293 | * interrupt interval (292 years). | ||
294 | */ | ||
295 | return ema_old + (diff >> EMA_ALPHA_SHIFT); | ||
296 | } | ||
297 | |||
298 | static int irq_timings_next_event_index(int *buffer, size_t len, int period_max) | ||
299 | { | ||
300 | int i; | ||
301 | |||
302 | /* | ||
303 | * The buffer contains the suite of intervals, in a ilog2 | ||
304 | * basis, we are looking for a repetition. We point the | ||
305 | * beginning of the search three times the length of the | ||
306 | * period beginning at the end of the buffer. We do that for | ||
307 | * each suffix. | ||
308 | */ | ||
309 | for (i = period_max; i >= PREDICTION_PERIOD_MIN ; i--) { | ||
310 | |||
311 | int *begin = &buffer[len - (i * 3)]; | ||
312 | int *ptr = begin; | ||
313 | |||
314 | /* | ||
315 | * We look if the suite with period 'i' repeat | ||
316 | * itself. If it is truncated at the end, as it | ||
317 | * repeats we can use the period to find out the next | ||
318 | * element. | ||
319 | */ | ||
320 | while (!memcmp(ptr, begin, i * sizeof(*ptr))) { | ||
321 | ptr += i; | ||
322 | if (ptr >= &buffer[len]) | ||
323 | return begin[((i * 3) % i)]; | ||
324 | } | ||
325 | } | ||
326 | |||
327 | return -1; | ||
328 | } | ||
329 | |||
330 | static u64 __irq_timings_next_event(struct irqt_stat *irqs, int irq, u64 now) | ||
331 | { | ||
332 | int index, i, period_max, count, start, min = INT_MAX; | ||
333 | |||
334 | if ((now - irqs->last_ts) >= NSEC_PER_SEC) { | ||
335 | irqs->count = irqs->last_ts = 0; | ||
336 | return U64_MAX; | ||
337 | } | ||
338 | |||
339 | /* | ||
340 | * As we want to find three times the repetition, we need a | ||
341 | * number of intervals greater or equal to three times the | ||
342 | * maximum period, otherwise we truncate the max period. | ||
343 | */ | ||
344 | period_max = irqs->count > (3 * PREDICTION_PERIOD_MAX) ? | ||
345 | PREDICTION_PERIOD_MAX : irqs->count / 3; | ||
346 | |||
347 | /* | ||
348 | * If we don't have enough irq timings for this prediction, | ||
349 | * just bail out. | ||
350 | */ | ||
351 | if (period_max <= PREDICTION_PERIOD_MIN) | ||
352 | return U64_MAX; | ||
353 | |||
354 | /* | ||
355 | * 'count' will depends if the circular buffer wrapped or not | ||
356 | */ | ||
357 | count = irqs->count < IRQ_TIMINGS_SIZE ? | ||
358 | irqs->count : IRQ_TIMINGS_SIZE; | ||
359 | |||
360 | start = irqs->count < IRQ_TIMINGS_SIZE ? | ||
361 | 0 : (irqs->count & IRQ_TIMINGS_MASK); | ||
362 | |||
363 | /* | ||
364 | * Copy the content of the circular buffer into another buffer | ||
365 | * in order to linearize the buffer instead of dealing with | ||
366 | * wrapping indexes and shifted array which will be prone to | ||
367 | * error and extremelly difficult to debug. | ||
368 | */ | ||
369 | for (i = 0; i < count; i++) { | ||
370 | int index = (start + i) & IRQ_TIMINGS_MASK; | ||
371 | |||
372 | irqs->timings[i] = irqs->circ_timings[index]; | ||
373 | min = min_t(int, irqs->timings[i], min); | ||
374 | } | ||
375 | |||
376 | index = irq_timings_next_event_index(irqs->timings, count, period_max); | ||
377 | if (index < 0) | ||
378 | return irqs->last_ts + irqs->ema_time[min]; | ||
379 | |||
380 | return irqs->last_ts + irqs->ema_time[index]; | ||
381 | } | ||
382 | |||
383 | static inline void irq_timings_store(int irq, struct irqt_stat *irqs, u64 ts) | ||
107 | { | 384 | { |
108 | u64 old_ts = irqs->last_ts; | 385 | u64 old_ts = irqs->last_ts; |
109 | u64 variance = 0; | ||
110 | u64 interval; | 386 | u64 interval; |
111 | s64 diff; | 387 | int index; |
112 | 388 | ||
113 | /* | 389 | /* |
114 | * The timestamps are absolute time values, we need to compute | 390 | * The timestamps are absolute time values, we need to compute |
@@ -135,87 +411,28 @@ static void irqs_update(struct irqt_stat *irqs, u64 ts) | |||
135 | * want as we need another timestamp to compute an interval. | 411 | * want as we need another timestamp to compute an interval. |
136 | */ | 412 | */ |
137 | if (interval >= NSEC_PER_SEC) { | 413 | if (interval >= NSEC_PER_SEC) { |
138 | memset(irqs, 0, sizeof(*irqs)); | 414 | irqs->count = 0; |
139 | irqs->last_ts = ts; | ||
140 | return; | 415 | return; |
141 | } | 416 | } |
142 | 417 | ||
143 | /* | 418 | /* |
144 | * Pre-compute the delta with the average as the result is | 419 | * Get the index in the ema table for this interrupt. The |
145 | * used several times in this function. | 420 | * PREDICTION_FACTOR increase the interval size for the array |
146 | */ | 421 | * of exponential average. |
147 | diff = interval - irqs->avg; | ||
148 | |||
149 | /* | ||
150 | * Increment the number of samples. | ||
151 | */ | ||
152 | irqs->nr_samples++; | ||
153 | |||
154 | /* | ||
155 | * Online variance divided by the number of elements if there | ||
156 | * is more than one sample. Normally the formula is division | ||
157 | * by nr_samples - 1 but we assume the number of element will be | ||
158 | * more than 32 and dividing by 32 instead of 31 is enough | ||
159 | * precise. | ||
160 | */ | ||
161 | if (likely(irqs->nr_samples > 1)) | ||
162 | variance = irqs->variance >> IRQ_TIMINGS_SHIFT; | ||
163 | |||
164 | /* | ||
165 | * The rule of thumb in statistics for the normal distribution | ||
166 | * is having at least 30 samples in order to have the model to | ||
167 | * apply. Values outside the interval are considered as an | ||
168 | * anomaly. | ||
169 | */ | ||
170 | if ((irqs->nr_samples >= 30) && ((diff * diff) > (9 * variance))) { | ||
171 | /* | ||
172 | * After three consecutive anomalies, we reset the | ||
173 | * stats as it is no longer stable enough. | ||
174 | */ | ||
175 | if (irqs->anomalies++ >= 3) { | ||
176 | memset(irqs, 0, sizeof(*irqs)); | ||
177 | irqs->last_ts = ts; | ||
178 | return; | ||
179 | } | ||
180 | } else { | ||
181 | /* | ||
182 | * The anomalies must be consecutives, so at this | ||
183 | * point, we reset the anomalies counter. | ||
184 | */ | ||
185 | irqs->anomalies = 0; | ||
186 | } | ||
187 | |||
188 | /* | ||
189 | * The interrupt is considered stable enough to try to predict | ||
190 | * the next event on it. | ||
191 | */ | 422 | */ |
192 | irqs->valid = 1; | 423 | index = likely(interval) ? |
424 | ilog2((interval >> 10) / PREDICTION_FACTOR) : 0; | ||
193 | 425 | ||
194 | /* | 426 | /* |
195 | * Online average algorithm: | 427 | * Store the index as an element of the pattern in another |
196 | * | 428 | * circular array. |
197 | * new_average = average + ((value - average) / count) | ||
198 | * | ||
199 | * The variance computation depends on the new average | ||
200 | * to be computed here first. | ||
201 | * | ||
202 | */ | 429 | */ |
203 | irqs->avg = irqs->avg + (diff >> IRQ_TIMINGS_SHIFT); | 430 | irqs->circ_timings[irqs->count & IRQ_TIMINGS_MASK] = index; |
204 | 431 | ||
205 | /* | 432 | irqs->ema_time[index] = irq_timings_ema_new(interval, |
206 | * Online variance algorithm: | 433 | irqs->ema_time[index]); |
207 | * | ||
208 | * new_variance = variance + (value - average) x (value - new_average) | ||
209 | * | ||
210 | * Warning: irqs->avg is updated with the line above, hence | ||
211 | * 'interval - irqs->avg' is no longer equal to 'diff' | ||
212 | */ | ||
213 | irqs->variance = irqs->variance + (diff * (interval - irqs->avg)); | ||
214 | 434 | ||
215 | /* | 435 | irqs->count++; |
216 | * Update the next event | ||
217 | */ | ||
218 | irqs->next_evt = ts + irqs->avg; | ||
219 | } | 436 | } |
220 | 437 | ||
221 | /** | 438 | /** |
@@ -259,6 +476,9 @@ u64 irq_timings_next_event(u64 now) | |||
259 | */ | 476 | */ |
260 | lockdep_assert_irqs_disabled(); | 477 | lockdep_assert_irqs_disabled(); |
261 | 478 | ||
479 | if (!irqts->count) | ||
480 | return next_evt; | ||
481 | |||
262 | /* | 482 | /* |
263 | * Number of elements in the circular buffer: If it happens it | 483 | * Number of elements in the circular buffer: If it happens it |
264 | * was flushed before, then the number of elements could be | 484 | * was flushed before, then the number of elements could be |
@@ -269,21 +489,19 @@ u64 irq_timings_next_event(u64 now) | |||
269 | * type but with the cost of extra computation in the | 489 | * type but with the cost of extra computation in the |
270 | * interrupt handler hot path. We choose efficiency. | 490 | * interrupt handler hot path. We choose efficiency. |
271 | * | 491 | * |
272 | * Inject measured irq/timestamp to the statistical model | 492 | * Inject measured irq/timestamp to the pattern prediction |
273 | * while decrementing the counter because we consume the data | 493 | * model while decrementing the counter because we consume the |
274 | * from our circular buffer. | 494 | * data from our circular buffer. |
275 | */ | 495 | */ |
276 | for (i = irqts->count & IRQ_TIMINGS_MASK, | ||
277 | irqts->count = min(IRQ_TIMINGS_SIZE, irqts->count); | ||
278 | irqts->count > 0; irqts->count--, i = (i + 1) & IRQ_TIMINGS_MASK) { | ||
279 | 496 | ||
280 | irq = irq_timing_decode(irqts->values[i], &ts); | 497 | i = (irqts->count & IRQ_TIMINGS_MASK) - 1; |
498 | irqts->count = min(IRQ_TIMINGS_SIZE, irqts->count); | ||
281 | 499 | ||
500 | for (; irqts->count > 0; irqts->count--, i = (i + 1) & IRQ_TIMINGS_MASK) { | ||
501 | irq = irq_timing_decode(irqts->values[i], &ts); | ||
282 | s = idr_find(&irqt_stats, irq); | 502 | s = idr_find(&irqt_stats, irq); |
283 | if (s) { | 503 | if (s) |
284 | irqs = this_cpu_ptr(s); | 504 | irq_timings_store(irq, this_cpu_ptr(s), ts); |
285 | irqs_update(irqs, ts); | ||
286 | } | ||
287 | } | 505 | } |
288 | 506 | ||
289 | /* | 507 | /* |
@@ -294,26 +512,12 @@ u64 irq_timings_next_event(u64 now) | |||
294 | 512 | ||
295 | irqs = this_cpu_ptr(s); | 513 | irqs = this_cpu_ptr(s); |
296 | 514 | ||
297 | if (!irqs->valid) | 515 | ts = __irq_timings_next_event(irqs, i, now); |
298 | continue; | 516 | if (ts <= now) |
517 | return now; | ||
299 | 518 | ||
300 | if (irqs->next_evt <= now) { | 519 | if (ts < next_evt) |
301 | irq = i; | 520 | next_evt = ts; |
302 | next_evt = now; | ||
303 | |||
304 | /* | ||
305 | * This interrupt mustn't use in the future | ||
306 | * until new events occur and update the | ||
307 | * statistics. | ||
308 | */ | ||
309 | irqs->valid = 0; | ||
310 | break; | ||
311 | } | ||
312 | |||
313 | if (irqs->next_evt < next_evt) { | ||
314 | irq = i; | ||
315 | next_evt = irqs->next_evt; | ||
316 | } | ||
317 | } | 521 | } |
318 | 522 | ||
319 | return next_evt; | 523 | return next_evt; |