aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorIngo Molnar <mingo@elte.hu>2009-09-22 09:50:24 -0400
committerIngo Molnar <mingo@elte.hu>2009-09-22 11:30:45 -0400
commitb417c9fd8690637f0c91479435ab3e2bf450c038 (patch)
tree54ee0e5a92a867e5ceebc14e75cdf3dfc45191fa
parent3967684006f30c253bc6d4a6604d1bad4a7fc672 (diff)
x86: mce: Fix thermal throttling message storm
If a system switches back and forth between hot and cold mode, the MCE code will print a stream of critical kernel messages. Extend the throttling code to properly notice this, by only printing the first hot + cold transition and omitting the rest up to CHECK_INTERVAL (5 minutes). This way we'll only get a single incident of: [ 102.356584] CPU0: Temperature above threshold, cpu clock throttled (total events = 1) [ 102.357000] Disabling lock debugging due to kernel taint [ 102.369223] CPU0: Temperature/speed normal Every 5 minutes. The 'total events' count tells the number of cold/hot transitions detected, should overheating occur after 5 minutes again: [ 402.357580] CPU0: Temperature above threshold, cpu clock throttled (total events = 24891) [ 402.358001] CPU0: Temperature/speed normal [ 450.704142] Machine check events logged Cc: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com> Cc: Huang Ying <ying.huang@intel.com> Cc: Andi Kleen <ak@linux.intel.com> LKML-Reference: <new-submission> Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r--arch/x86/kernel/cpu/mcheck/therm_throt.c6
1 files changed, 4 insertions, 2 deletions
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c
index db80b577f601..b3a1dba75330 100644
--- a/arch/x86/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c
@@ -42,6 +42,7 @@ struct thermal_state {
42 42
43 u64 next_check; 43 u64 next_check;
44 unsigned long throttle_count; 44 unsigned long throttle_count;
45 unsigned long last_throttle_count;
45}; 46};
46 47
47static DEFINE_PER_CPU(struct thermal_state, thermal_state); 48static DEFINE_PER_CPU(struct thermal_state, thermal_state);
@@ -120,11 +121,12 @@ static int therm_throt_process(bool is_throttled)
120 if (is_throttled) 121 if (is_throttled)
121 state->throttle_count++; 122 state->throttle_count++;
122 123
123 if (!(was_throttled ^ is_throttled) && 124 if (time_before64(now, state->next_check) &&
124 time_before64(now, state->next_check)) 125 state->throttle_count != state->last_throttle_count)
125 return 0; 126 return 0;
126 127
127 state->next_check = now + CHECK_INTERVAL; 128 state->next_check = now + CHECK_INTERVAL;
129 state->last_throttle_count = state->throttle_count;
128 130
129 /* if we just entered the thermal event */ 131 /* if we just entered the thermal event */
130 if (is_throttled) { 132 if (is_throttled) {