genirq/timings: Add infrastructure for estimating the next interrupt arrival time

An interrupt behaves with a burst of activity with periodic interval of time followed by one or two peaks of longer interval. As the time intervals are periodic, statistically speaking they follow a normal distribution and each interrupts can be tracked individually. Add a mechanism to compute the statistics on all interrupts, except the timers which are deterministic from a prediction point of view, as their expiry time is known. The goal is to extract the periodicity for each interrupt, with the last timestamp and sum them, so the next event can be predicted to a certain extent. Taking the earliest prediction gives the expected wakeup on the system (assuming a timer won't expire before). Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Cc: Nicolas Pitre <nicolas.pitre@linaro.org> Cc: Jens Axboe <axboe@kernel.dk> Cc: Hannes Reinecke <hare@suse.com> Cc: Vincent Guittot <vincent.guittot@linaro.org> Cc: "Rafael J . Wysocki" <rafael@kernel.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Bjorn Helgaas <bhelgaas@google.com> Link: http://lkml.kernel.org/r/1498227072-5980-2-git-send-email-daniel.lezcano@linaro.org
author: Daniel Lezcano <daniel.lezcano@linaro.org> 2017-06-23 10:11:08 -0400
committer: Thomas Gleixner <tglx@linutronix.de> 2017-06-24 05:44:39 -0400
commit: e1c921495534002d727b15a76a2f8c20b6b108b5 (patch)
tree: 28500774c5b8ff667a304d9a74ead820590f85e3
parent: b2d3d61adb7b73cfe5f82404f7a130a76fc64232 (diff)
3 files changed, 359 insertions, 0 deletions
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 9f617238a2f7..37f8e354f564 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -706,6 +706,7 @@ static inline void init_irq_proc(void)
 #ifdef CONFIG_IRQ_TIMINGS
 void irq_timings_enable(void);
 void irq_timings_disable(void);
+u64 irq_timings_next_event(u64 now);
 #endif
 struct seq_file;
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index b95b74920433..9da14d125df4 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -275,13 +275,21 @@ struct irq_timings {
 DECLARE_PER_CPU(struct irq_timings, irq_timings);
+extern void irq_timings_free(int irq);
+extern int irq_timings_alloc(int irq);
 static inline void irq_remove_timings(struct irq_desc *desc)
 {
        desc->istate &= ~IRQS_TIMINGS;
+        irq_timings_free(irq_desc_get_irq(desc));
 }
 static inline void irq_setup_timings(struct irq_desc *desc, struct irqaction *act)
 {
+        int irq = irq_desc_get_irq(desc);
+        int ret;
        /*
         * We don't need the measurement because the idle code already
         * knows the next expiry event.
@@ -289,6 +297,17 @@ static inline void irq_setup_timings(struct irq_desc *desc, struct irqaction *ac
        if (act->flags & __IRQF_TIMER)
                return;
+        /*
+         * In case the timing allocation fails, we just want to warn,
+         * not fail, so letting the system boot anyway.
+         */
+        ret = irq_timings_alloc(irq);
+        if (ret) {
+                pr_warn("Failed to allocate irq timing stats for irq%d (%d)",
+                        irq, ret);
+                return;
+        }
        desc->istate |= IRQS_TIMINGS;
 }
diff --git a/kernel/irq/timings.c b/kernel/irq/timings.c
index 56cf6870fa26..c8c1d073fbf1 100644
--- a/kernel/irq/timings.c
+++ b/kernel/irq/timings.c
@@ -8,10 +8,16 @@
 * published by the Free Software Foundation.
 *
 */
+#include <linux/kernel.h>
 #include <linux/percpu.h>
+#include <linux/slab.h>
 #include <linux/static_key.h>
 #include <linux/interrupt.h>
+#include <linux/idr.h>
 #include <linux/irq.h>
+#include <linux/math64.h>
+#include <trace/events/irq.h>
 #include "internals.h"
@@ -19,6 +25,18 @@ DEFINE_STATIC_KEY_FALSE(irq_timing_enabled);
 DEFINE_PER_CPU(struct irq_timings, irq_timings);
+struct irqt_stat {
+        u64     next_evt;
+        u64     last_ts;
+        u64     variance;
+        u32     avg;
+        u32     nr_samples;
+        int     anomalies;
+        int     valid;
+};
+static DEFINE_IDR(irqt_stats);
 void irq_timings_enable(void)
 {
        static_branch_enable(&irq_timing_enabled);
@@ -28,3 +46,324 @@ void irq_timings_disable(void)
 {
        static_branch_disable(&irq_timing_enabled);
 }
+/**
+ * irqs_update - update the irq timing statistics with a new timestamp
+ *
+ * @irqs: an irqt_stat struct pointer
+ * @ts: the new timestamp
+ *
+ * The statistics are computed online, in other words, the code is
+ * designed to compute the statistics on a stream of values rather
+ * than doing multiple passes on the values to compute the average,
+ * then the variance. The integer division introduces a loss of
+ * precision but with an acceptable error margin regarding the results
+ * we would have with the double floating precision: we are dealing
+ * with nanosec, so big numbers, consequently the mantisse is
+ * negligeable, especially when converting the time in usec
+ * afterwards.
+ *
+ * The computation happens at idle time. When the CPU is not idle, the
+ * interrupts' timestamps are stored in the circular buffer, when the
+ * CPU goes idle and this routine is called, all the buffer's values
+ * are injected in the statistical model continuying to extend the
+ * statistics from the previous busy-idle cycle.
+ *
+ * The observations showed a device will trigger a burst of periodic
+ * interrupts followed by one or two peaks of longer time, for
+ * instance when a SD card device flushes its cache, then the periodic
+ * intervals occur again. A one second inactivity period resets the
+ * stats, that gives us the certitude the statistical values won't
+ * exceed 1x10^9, thus the computation won't overflow.
+ *
+ * Basically, the purpose of the algorithm is to watch the periodic
+ * interrupts and eliminate the peaks.
+ *
+ * An interrupt is considered periodically stable if the interval of
+ * its occurences follow the normal distribution, thus the values
+ * comply with:
+ *
+ *      avg - 3 x stddev < value < avg + 3 x stddev
+ *
+ * Which can be simplified to:
+ *
+ *      -3 x stddev < value - avg < 3 x stddev
+ *
+ *      abs(value - avg) < 3 x stddev
+ *
+ * In order to save a costly square root computation, we use the
+ * variance. For the record, stddev = sqrt(variance). The equation
+ * above becomes:
+ *
+ *      abs(value - avg) < 3 x sqrt(variance)
+ *
+ * And finally we square it:
+ *
+ *      (value - avg) ^ 2 < (3 x sqrt(variance)) ^ 2
+ *
+ *      (value - avg) x (value - avg) < 9 x variance
+ *
+ * Statistically speaking, any values out of this interval is
+ * considered as an anomaly and is discarded. However, a normal
+ * distribution appears when the number of samples is 30 (it is the
+ * rule of thumb in statistics, cf. "30 samples" on Internet). When
+ * there are three consecutive anomalies, the statistics are resetted.
+ *
+ */
+static void irqs_update(struct irqt_stat *irqs, u64 ts)
+{
+        u64 old_ts = irqs->last_ts;
+        u64 variance = 0;
+        u64 interval;
+        s64 diff;
+        /*
+         * The timestamps are absolute time values, we need to compute
+         * the timing interval between two interrupts.
+         */
+        irqs->last_ts = ts;
+        /*
+         * The interval type is u64 in order to deal with the same
+         * type in our computation, that prevent mindfuck issues with
+         * overflow, sign and division.
+         */
+        interval = ts - old_ts;
+        /*
+         * The interrupt triggered more than one second apart, that
+         * ends the sequence as predictible for our purpose. In this
+         * case, assume we have the beginning of a sequence and the
+         * timestamp is the first value. As it is impossible to
+         * predict anything at this point, return.
+         *
+         * Note the first timestamp of the sequence will always fall
+         * in this test because the old_ts is zero. That is what we
+         * want as we need another timestamp to compute an interval.
+         */
+        if (interval >= NSEC_PER_SEC) {
+                memset(irqs, 0, sizeof(*irqs));
+                irqs->last_ts = ts;
+                return;
+        }
+        /*
+         * Pre-compute the delta with the average as the result is
+         * used several times in this function.
+         */
+        diff = interval - irqs->avg;
+        /*
+         * Increment the number of samples.
+         */
+        irqs->nr_samples++;
+        /*
+         * Online variance divided by the number of elements if there
+         * is more than one sample.  Normally the formula is division
+         * by nr_samples - 1 but we assume the number of element will be
+         * more than 32 and dividing by 32 instead of 31 is enough
+         * precise.
+         */
+        if (likely(irqs->nr_samples > 1))
+                variance = irqs->variance >> IRQ_TIMINGS_SHIFT;
+        /*
+         * The rule of thumb in statistics for the normal distribution
+         * is having at least 30 samples in order to have the model to
+         * apply. Values outside the interval are considered as an
+         * anomaly.
+         */
+        if ((irqs->nr_samples >= 30) && ((diff * diff) > (9 * variance))) {
+                /*
+                 * After three consecutive anomalies, we reset the
+                 * stats as it is no longer stable enough.
+                 */
+                if (irqs->anomalies++ >= 3) {
+                        memset(irqs, 0, sizeof(*irqs));
+                        irqs->last_ts = ts;
+                        return;
+                }
+        } else {
+                /*
+                 * The anomalies must be consecutives, so at this
+                 * point, we reset the anomalies counter.
+                 */
+                irqs->anomalies = 0;
+        }
+        /*
+         * The interrupt is considered stable enough to try to predict
+         * the next event on it.
+         */
+        irqs->valid = 1;
+        /*
+         * Online average algorithm:
+         *
+         *  new_average = average + ((value - average) / count)
+         *
+         * The variance computation depends on the new average
+         * to be computed here first.
+         *
+         */
+        irqs->avg = irqs->avg + (diff >> IRQ_TIMINGS_SHIFT);
+        /*
+         * Online variance algorithm:
+         *
+         *  new_variance = variance + (value - average) x (value - new_average)
+         *
+         * Warning: irqs->avg is updated with the line above, hence
+         * 'interval - irqs->avg' is no longer equal to 'diff'
+         */
+        irqs->variance = irqs->variance + (diff * (interval - irqs->avg));
+        /*
+         * Update the next event
+         */
+        irqs->next_evt = ts + irqs->avg;
+}
+/**
+ * irq_timings_next_event - Return when the next event is supposed to arrive
+ *
+ * During the last busy cycle, the number of interrupts is incremented
+ * and stored in the irq_timings structure. This information is
+ * necessary to:
+ *
+ * - know if the index in the table wrapped up:
+ *
+ *      If more than the array size interrupts happened during the
+ *      last busy/idle cycle, the index wrapped up and we have to
+ *      begin with the next element in the array which is the last one
+ *      in the sequence, otherwise it is a the index 0.
+ *
+ * - have an indication of the interrupts activity on this CPU
+ *   (eg. irq/sec)
+ *
+ * The values are 'consumed' after inserting in the statistical model,
+ * thus the count is reinitialized.
+ *
+ * The array of values **must** be browsed in the time direction, the
+ * timestamp must increase between an element and the next one.
+ *
+ * Returns a nanosec time based estimation of the earliest interrupt,
+ * U64_MAX otherwise.
+ */
+u64 irq_timings_next_event(u64 now)
+{
+        struct irq_timings *irqts = this_cpu_ptr(&irq_timings);
+        struct irqt_stat *irqs;
+        struct irqt_stat __percpu *s;
+        u64 ts, next_evt = U64_MAX;
+        int i, irq = 0;
+        /*
+         * This function must be called with the local irq disabled in
+         * order to prevent the timings circular buffer to be updated
+         * while we are reading it.
+         */
+        WARN_ON_ONCE(!irqs_disabled());
+        /*
+         * Number of elements in the circular buffer: If it happens it
+         * was flushed before, then the number of elements could be
+         * smaller than IRQ_TIMINGS_SIZE, so the count is used,
+         * otherwise the array size is used as we wrapped. The index
+         * begins from zero when we did not wrap. That could be done
+         * in a nicer way with the proper circular array structure
+         * type but with the cost of extra computation in the
+         * interrupt handler hot path. We choose efficiency.
+         *
+         * Inject measured irq/timestamp to the statistical model
+         * while decrementing the counter because we consume the data
+         * from our circular buffer.
+         */
+        for (i = irqts->count & IRQ_TIMINGS_MASK,
+                     irqts->count = min(IRQ_TIMINGS_SIZE, irqts->count);
+             irqts->count > 0; irqts->count--, i = (i + 1) & IRQ_TIMINGS_MASK) {
+                irq = irq_timing_decode(irqts->values[i], &ts);
+                s = idr_find(&irqt_stats, irq);
+                if (s) {
+                        irqs = this_cpu_ptr(s);
+                        irqs_update(irqs, ts);
+                }
+        }
+        /*
+         * Look in the list of interrupts' statistics, the earliest
+         * next event.
+         */
+        idr_for_each_entry(&irqt_stats, s, i) {
+                irqs = this_cpu_ptr(s);
+                if (!irqs->valid)
+                        continue;
+                if (irqs->next_evt <= now) {
+                        irq = i;
+                        next_evt = now;
+                        /*
+                         * This interrupt mustn't use in the future
+                         * until new events occur and update the
+                         * statistics.
+                         */
+                        irqs->valid = 0;
+                        break;
+                }
+                if (irqs->next_evt < next_evt) {
+                        irq = i;
+                        next_evt = irqs->next_evt;
+                }
+        }
+        return next_evt;
+}
+void irq_timings_free(int irq)
+{
+        struct irqt_stat __percpu *s;
+        s = idr_find(&irqt_stats, irq);
+        if (s) {
+                free_percpu(s);
+                idr_remove(&irqt_stats, irq);
+        }
+}
+int irq_timings_alloc(int irq)
+{
+        struct irqt_stat __percpu *s;
+        int id;
+        /*
+         * Some platforms can have the same private interrupt per cpu,
+         * so this function may be be called several times with the
+         * same interrupt number. Just bail out in case the per cpu
+         * stat structure is already allocated.
+         */
+        s = idr_find(&irqt_stats, irq);
+        if (s)
+                return 0;
+        s = alloc_percpu(*s);
+        if (!s)
+                return -ENOMEM;
+        idr_preload(GFP_KERNEL);
+        id = idr_alloc(&irqt_stats, s, irq, irq + 1, GFP_NOWAIT);
+        idr_preload_end();
+        if (id < 0) {
+                free_percpu(s);
+                return id;
+        }
+        return 0;
+}
author	Daniel Lezcano <daniel.lezcano@linaro.org>	2017-06-23 10:11:08 -0400
committer	Thomas Gleixner <tglx@linutronix.de>	2017-06-24 05:44:39 -0400
commit	e1c921495534002d727b15a76a2f8c20b6b108b5 (patch)
tree	28500774c5b8ff667a304d9a74ead820590f85e3
parent	b2d3d61adb7b73cfe5f82404f7a130a76fc64232 (diff)

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 9f617238a2f7..37f8e354f564 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h
@@ -706,6 +706,7 @@ static inline void init_irq_proc(void)
706	#ifdef CONFIG_IRQ_TIMINGS	706	#ifdef CONFIG_IRQ_TIMINGS
707	void irq_timings_enable(void);	707	void irq_timings_enable(void);
708	void irq_timings_disable(void);	708	void irq_timings_disable(void);
		709	u64 irq_timings_next_event(u64 now);
709	#endif	710	#endif
710		711
711	struct seq_file;	712	struct seq_file;


diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h index b95b74920433..9da14d125df4 100644 --- a/kernel/irq/internals.h +++ b/kernel/irq/internals.h
@@ -275,13 +275,21 @@ struct irq_timings {
275		275
276	DECLARE_PER_CPU(struct irq_timings, irq_timings);	276	DECLARE_PER_CPU(struct irq_timings, irq_timings);
277		277
		278	extern void irq_timings_free(int irq);
		279	extern int irq_timings_alloc(int irq);
		280
278	static inline void irq_remove_timings(struct irq_desc *desc)	281	static inline void irq_remove_timings(struct irq_desc *desc)
279	{	282	{
280	desc->istate &= ~IRQS_TIMINGS;	283	desc->istate &= ~IRQS_TIMINGS;
		284
		285	irq_timings_free(irq_desc_get_irq(desc));
281	}	286	}
282		287
283	static inline void irq_setup_timings(struct irq_desc desc, struct irqaction act)	288	static inline void irq_setup_timings(struct irq_desc desc, struct irqaction act)
284	{	289	{
		290	int irq = irq_desc_get_irq(desc);
		291	int ret;
		292
285	/*	293	/*
286	* We don't need the measurement because the idle code already	294	* We don't need the measurement because the idle code already
287	* knows the next expiry event.	295	* knows the next expiry event.
@@ -289,6 +297,17 @@ static inline void irq_setup_timings(struct irq_desc desc, struct irqaction ac
289	if (act->flags & __IRQF_TIMER)	297	if (act->flags & __IRQF_TIMER)
290	return;	298	return;
291		299
		300	/*
		301	* In case the timing allocation fails, we just want to warn,
		302	* not fail, so letting the system boot anyway.
		303	*/
		304	ret = irq_timings_alloc(irq);
		305	if (ret) {
		306	pr_warn("Failed to allocate irq timing stats for irq%d (%d)",
		307	irq, ret);
		308	return;
		309	}
		310
292	desc->istate \|= IRQS_TIMINGS;	311	desc->istate \|= IRQS_TIMINGS;
293	}	312	}
294		313


diff --git a/kernel/irq/timings.c b/kernel/irq/timings.c index 56cf6870fa26..c8c1d073fbf1 100644 --- a/kernel/irq/timings.c +++ b/kernel/irq/timings.c
@@ -8,10 +8,16 @@
8	* published by the Free Software Foundation.	8	* published by the Free Software Foundation.
9	*	9	*
10	*/	10	*/
		11	#include <linux/kernel.h>
11	#include <linux/percpu.h>	12	#include <linux/percpu.h>
		13	#include <linux/slab.h>
12	#include <linux/static_key.h>	14	#include <linux/static_key.h>
13	#include <linux/interrupt.h>	15	#include <linux/interrupt.h>
		16	#include <linux/idr.h>
14	#include <linux/irq.h>	17	#include <linux/irq.h>
		18	#include <linux/math64.h>
		19
		20	#include <trace/events/irq.h>
15		21
16	#include "internals.h"	22	#include "internals.h"
17		23
@@ -19,6 +25,18 @@ DEFINE_STATIC_KEY_FALSE(irq_timing_enabled);
19		25
20	DEFINE_PER_CPU(struct irq_timings, irq_timings);	26	DEFINE_PER_CPU(struct irq_timings, irq_timings);
21		27
		28	struct irqt_stat {
		29	u64 next_evt;
		30	u64 last_ts;
		31	u64 variance;
		32	u32 avg;
		33	u32 nr_samples;
		34	int anomalies;
		35	int valid;
		36	};
		37
		38	static DEFINE_IDR(irqt_stats);
		39
22	void irq_timings_enable(void)	40	void irq_timings_enable(void)
23	{	41	{
24	static_branch_enable(&irq_timing_enabled);	42	static_branch_enable(&irq_timing_enabled);
@@ -28,3 +46,324 @@ void irq_timings_disable(void)
28	{	46	{
29	static_branch_disable(&irq_timing_enabled);	47	static_branch_disable(&irq_timing_enabled);
30	}	48	}
		49
		50	/**
		51	* irqs_update - update the irq timing statistics with a new timestamp
		52	*
		53	* @irqs: an irqt_stat struct pointer
		54	* @ts: the new timestamp
		55	*
		56	* The statistics are computed online, in other words, the code is
		57	* designed to compute the statistics on a stream of values rather
		58	* than doing multiple passes on the values to compute the average,
		59	* then the variance. The integer division introduces a loss of
		60	* precision but with an acceptable error margin regarding the results
		61	* we would have with the double floating precision: we are dealing
		62	* with nanosec, so big numbers, consequently the mantisse is
		63	* negligeable, especially when converting the time in usec
		64	* afterwards.
		65	*
		66	* The computation happens at idle time. When the CPU is not idle, the
		67	* interrupts' timestamps are stored in the circular buffer, when the
		68	* CPU goes idle and this routine is called, all the buffer's values
		69	* are injected in the statistical model continuying to extend the
		70	* statistics from the previous busy-idle cycle.
		71	*
		72	* The observations showed a device will trigger a burst of periodic
		73	* interrupts followed by one or two peaks of longer time, for
		74	* instance when a SD card device flushes its cache, then the periodic
		75	* intervals occur again. A one second inactivity period resets the
		76	* stats, that gives us the certitude the statistical values won't
		77	* exceed 1x10^9, thus the computation won't overflow.
		78	*
		79	* Basically, the purpose of the algorithm is to watch the periodic
		80	* interrupts and eliminate the peaks.
		81	*
		82	* An interrupt is considered periodically stable if the interval of
		83	* its occurences follow the normal distribution, thus the values
		84	* comply with:
		85	*
		86	* avg - 3 x stddev < value < avg + 3 x stddev
		87	*
		88	* Which can be simplified to:
		89	*
		90	* -3 x stddev < value - avg < 3 x stddev
		91	*
		92	* abs(value - avg) < 3 x stddev
		93	*
		94	* In order to save a costly square root computation, we use the
		95	* variance. For the record, stddev = sqrt(variance). The equation
		96	* above becomes:
		97	*
		98	* abs(value - avg) < 3 x sqrt(variance)
		99	*
		100	* And finally we square it:
		101	*
		102	* (value - avg) ^ 2 < (3 x sqrt(variance)) ^ 2
		103	*
		104	* (value - avg) x (value - avg) < 9 x variance
		105	*
		106	* Statistically speaking, any values out of this interval is
		107	* considered as an anomaly and is discarded. However, a normal
		108	* distribution appears when the number of samples is 30 (it is the
		109	* rule of thumb in statistics, cf. "30 samples" on Internet). When
		110	* there are three consecutive anomalies, the statistics are resetted.
		111	*
		112	*/
		113	static void irqs_update(struct irqt_stat *irqs, u64 ts)
		114	{
		115	u64 old_ts = irqs->last_ts;
		116	u64 variance = 0;
		117	u64 interval;
		118	s64 diff;
		119
		120	/*
		121	* The timestamps are absolute time values, we need to compute
		122	* the timing interval between two interrupts.
		123	*/
		124	irqs->last_ts = ts;
		125
		126	/*
		127	* The interval type is u64 in order to deal with the same
		128	* type in our computation, that prevent mindfuck issues with
		129	* overflow, sign and division.
		130	*/
		131	interval = ts - old_ts;
		132
		133	/*
		134	* The interrupt triggered more than one second apart, that
		135	* ends the sequence as predictible for our purpose. In this
		136	* case, assume we have the beginning of a sequence and the
		137	* timestamp is the first value. As it is impossible to
		138	* predict anything at this point, return.
		139	*
		140	* Note the first timestamp of the sequence will always fall
		141	* in this test because the old_ts is zero. That is what we
		142	* want as we need another timestamp to compute an interval.
		143	*/
		144	if (interval >= NSEC_PER_SEC) {
		145	memset(irqs, 0, sizeof(*irqs));
		146	irqs->last_ts = ts;
		147	return;
		148	}
		149
		150	/*
		151	* Pre-compute the delta with the average as the result is
		152	* used several times in this function.
		153	*/
		154	diff = interval - irqs->avg;
		155
		156	/*
		157	* Increment the number of samples.
		158	*/
		159	irqs->nr_samples++;
		160
		161	/*
		162	* Online variance divided by the number of elements if there
		163	* is more than one sample. Normally the formula is division
		164	* by nr_samples - 1 but we assume the number of element will be
		165	* more than 32 and dividing by 32 instead of 31 is enough
		166	* precise.
		167	*/
		168	if (likely(irqs->nr_samples > 1))
		169	variance = irqs->variance >> IRQ_TIMINGS_SHIFT;
		170
		171	/*
		172	* The rule of thumb in statistics for the normal distribution
		173	* is having at least 30 samples in order to have the model to
		174	* apply. Values outside the interval are considered as an
		175	* anomaly.
		176	*/
		177	if ((irqs->nr_samples >= 30) && ((diff * diff) > (9 * variance))) {
		178	/*
		179	* After three consecutive anomalies, we reset the
		180	* stats as it is no longer stable enough.
		181	*/
		182	if (irqs->anomalies++ >= 3) {
		183	memset(irqs, 0, sizeof(*irqs));
		184	irqs->last_ts = ts;
		185	return;
		186	}
		187	} else {
		188	/*
		189	* The anomalies must be consecutives, so at this
		190	* point, we reset the anomalies counter.
		191	*/
		192	irqs->anomalies = 0;
		193	}
		194
		195	/*
		196	* The interrupt is considered stable enough to try to predict
		197	* the next event on it.
		198	*/
		199	irqs->valid = 1;
		200
		201	/*
		202	* Online average algorithm:
		203	*
		204	* new_average = average + ((value - average) / count)
		205	*
		206	* The variance computation depends on the new average
		207	* to be computed here first.
		208	*
		209	*/
		210	irqs->avg = irqs->avg + (diff >> IRQ_TIMINGS_SHIFT);
		211
		212	/*
		213	* Online variance algorithm:
		214	*
		215	* new_variance = variance + (value - average) x (value - new_average)
		216	*
		217	* Warning: irqs->avg is updated with the line above, hence
		218	* 'interval - irqs->avg' is no longer equal to 'diff'
		219	*/
		220	irqs->variance = irqs->variance + (diff * (interval - irqs->avg));
		221
		222	/*
		223	* Update the next event
		224	*/
		225	irqs->next_evt = ts + irqs->avg;
		226	}
		227
		228	/**
		229	* irq_timings_next_event - Return when the next event is supposed to arrive
		230	*
		231	* During the last busy cycle, the number of interrupts is incremented
		232	* and stored in the irq_timings structure. This information is
		233	* necessary to:
		234	*
		235	* - know if the index in the table wrapped up:
		236	*
		237	* If more than the array size interrupts happened during the
		238	* last busy/idle cycle, the index wrapped up and we have to
		239	* begin with the next element in the array which is the last one
		240	* in the sequence, otherwise it is a the index 0.
		241	*
		242	* - have an indication of the interrupts activity on this CPU
		243	* (eg. irq/sec)
		244	*
		245	* The values are 'consumed' after inserting in the statistical model,
		246	* thus the count is reinitialized.
		247	*
		248	* The array of values must be browsed in the time direction, the
		249	* timestamp must increase between an element and the next one.
		250	*
		251	* Returns a nanosec time based estimation of the earliest interrupt,
		252	* U64_MAX otherwise.
		253	*/
		254	u64 irq_timings_next_event(u64 now)
		255	{
		256	struct irq_timings *irqts = this_cpu_ptr(&irq_timings);
		257	struct irqt_stat *irqs;
		258	struct irqt_stat __percpu *s;
		259	u64 ts, next_evt = U64_MAX;
		260	int i, irq = 0;
		261
		262	/*
		263	* This function must be called with the local irq disabled in
		264	* order to prevent the timings circular buffer to be updated
		265	* while we are reading it.
		266	*/
		267	WARN_ON_ONCE(!irqs_disabled());
		268
		269	/*
		270	* Number of elements in the circular buffer: If it happens it
		271	* was flushed before, then the number of elements could be
		272	* smaller than IRQ_TIMINGS_SIZE, so the count is used,
		273	* otherwise the array size is used as we wrapped. The index
		274	* begins from zero when we did not wrap. That could be done
		275	* in a nicer way with the proper circular array structure
		276	* type but with the cost of extra computation in the
		277	* interrupt handler hot path. We choose efficiency.
		278	*
		279	* Inject measured irq/timestamp to the statistical model
		280	* while decrementing the counter because we consume the data
		281	* from our circular buffer.
		282	*/
		283	for (i = irqts->count & IRQ_TIMINGS_MASK,
		284	irqts->count = min(IRQ_TIMINGS_SIZE, irqts->count);
		285	irqts->count > 0; irqts->count--, i = (i + 1) & IRQ_TIMINGS_MASK) {
		286
		287	irq = irq_timing_decode(irqts->values[i], &ts);
		288
		289	s = idr_find(&irqt_stats, irq);
		290	if (s) {
		291	irqs = this_cpu_ptr(s);
		292	irqs_update(irqs, ts);
		293	}
		294	}
		295
		296	/*
		297	* Look in the list of interrupts' statistics, the earliest
		298	* next event.
		299	*/
		300	idr_for_each_entry(&irqt_stats, s, i) {
		301
		302	irqs = this_cpu_ptr(s);
		303
		304	if (!irqs->valid)
		305	continue;
		306
		307	if (irqs->next_evt <= now) {
		308	irq = i;
		309	next_evt = now;
		310
		311	/*
		312	* This interrupt mustn't use in the future
		313	* until new events occur and update the
		314	* statistics.
		315	*/
		316	irqs->valid = 0;
		317	break;
		318	}
		319
		320	if (irqs->next_evt < next_evt) {
		321	irq = i;
		322	next_evt = irqs->next_evt;
		323	}
		324	}
		325
		326	return next_evt;
		327	}
		328
		329	void irq_timings_free(int irq)
		330	{
		331	struct irqt_stat __percpu *s;
		332
		333	s = idr_find(&irqt_stats, irq);
		334	if (s) {
		335	free_percpu(s);
		336	idr_remove(&irqt_stats, irq);
		337	}
		338	}
		339
		340	int irq_timings_alloc(int irq)
		341	{
		342	struct irqt_stat __percpu *s;
		343	int id;
		344
		345	/*
		346	* Some platforms can have the same private interrupt per cpu,
		347	* so this function may be be called several times with the
		348	* same interrupt number. Just bail out in case the per cpu
		349	* stat structure is already allocated.
		350	*/
		351	s = idr_find(&irqt_stats, irq);
		352	if (s)
		353	return 0;
		354
		355	s = alloc_percpu(*s);
		356	if (!s)
		357	return -ENOMEM;
		358
		359	idr_preload(GFP_KERNEL);
		360	id = idr_alloc(&irqt_stats, s, irq, irq + 1, GFP_NOWAIT);
		361	idr_preload_end();
		362
		363	if (id < 0) {
		364	free_percpu(s);
		365	return id;
		366	}
		367
		368	return 0;
		369	}