diff options
author | Ingo Molnar <mingo@elte.hu> | 2009-09-22 09:50:24 -0400 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2009-09-22 11:30:45 -0400 |
commit | b417c9fd8690637f0c91479435ab3e2bf450c038 (patch) | |
tree | 54ee0e5a92a867e5ceebc14e75cdf3dfc45191fa | |
parent | 3967684006f30c253bc6d4a6604d1bad4a7fc672 (diff) |
x86: mce: Fix thermal throttling message storm
If a system switches back and forth between hot and cold mode,
the MCE code will print a stream of critical kernel messages.
Extend the throttling code to properly notice this, by
only printing the first hot + cold transition and omitting
the rest up to CHECK_INTERVAL (5 minutes).
This way we'll only get a single incident of:
[ 102.356584] CPU0: Temperature above threshold, cpu clock throttled (total events = 1)
[ 102.357000] Disabling lock debugging due to kernel taint
[ 102.369223] CPU0: Temperature/speed normal
Every 5 minutes. The 'total events' count tells the number of cold/hot
transitions detected, should overheating occur after 5 minutes again:
[ 402.357580] CPU0: Temperature above threshold, cpu clock throttled (total events = 24891)
[ 402.358001] CPU0: Temperature/speed normal
[ 450.704142] Machine check events logged
Cc: Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
Cc: Huang Ying <ying.huang@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/therm_throt.c | 6 |
1 files changed, 4 insertions, 2 deletions
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index db80b577f601..b3a1dba75330 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c | |||
@@ -42,6 +42,7 @@ struct thermal_state { | |||
42 | 42 | ||
43 | u64 next_check; | 43 | u64 next_check; |
44 | unsigned long throttle_count; | 44 | unsigned long throttle_count; |
45 | unsigned long last_throttle_count; | ||
45 | }; | 46 | }; |
46 | 47 | ||
47 | static DEFINE_PER_CPU(struct thermal_state, thermal_state); | 48 | static DEFINE_PER_CPU(struct thermal_state, thermal_state); |
@@ -120,11 +121,12 @@ static int therm_throt_process(bool is_throttled) | |||
120 | if (is_throttled) | 121 | if (is_throttled) |
121 | state->throttle_count++; | 122 | state->throttle_count++; |
122 | 123 | ||
123 | if (!(was_throttled ^ is_throttled) && | 124 | if (time_before64(now, state->next_check) && |
124 | time_before64(now, state->next_check)) | 125 | state->throttle_count != state->last_throttle_count) |
125 | return 0; | 126 | return 0; |
126 | 127 | ||
127 | state->next_check = now + CHECK_INTERVAL; | 128 | state->next_check = now + CHECK_INTERVAL; |
129 | state->last_throttle_count = state->throttle_count; | ||
128 | 130 | ||
129 | /* if we just entered the thermal event */ | 131 | /* if we just entered the thermal event */ |
130 | if (is_throttled) { | 132 | if (is_throttled) { |