diff options
Diffstat (limited to 'arch/x86/kernel/cpu/mcheck')
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/mce.c | 3 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/therm_throt.c | 206 |
2 files changed, 169 insertions, 40 deletions
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 18cc42562250..e1269d62c569 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c | |||
@@ -51,7 +51,7 @@ | |||
51 | static DEFINE_MUTEX(mce_read_mutex); | 51 | static DEFINE_MUTEX(mce_read_mutex); |
52 | 52 | ||
53 | #define rcu_dereference_check_mce(p) \ | 53 | #define rcu_dereference_check_mce(p) \ |
54 | rcu_dereference_check((p), \ | 54 | rcu_dereference_index_check((p), \ |
55 | rcu_read_lock_sched_held() || \ | 55 | rcu_read_lock_sched_held() || \ |
56 | lockdep_is_held(&mce_read_mutex)) | 56 | lockdep_is_held(&mce_read_mutex)) |
57 | 57 | ||
@@ -600,6 +600,7 @@ void machine_check_poll(enum mcp_flags flags, mce_banks_t *b) | |||
600 | */ | 600 | */ |
601 | if (!(flags & MCP_DONTLOG) && !mce_dont_log_ce) { | 601 | if (!(flags & MCP_DONTLOG) && !mce_dont_log_ce) { |
602 | mce_log(&m); | 602 | mce_log(&m); |
603 | atomic_notifier_call_chain(&x86_mce_decoder_chain, 0, &m); | ||
603 | add_taint(TAINT_MACHINE_CHECK); | 604 | add_taint(TAINT_MACHINE_CHECK); |
604 | } | 605 | } |
605 | 606 | ||
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index e1a0a3bf9716..c2a8b26d4fea 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c | |||
@@ -34,15 +34,25 @@ | |||
34 | /* How long to wait between reporting thermal events */ | 34 | /* How long to wait between reporting thermal events */ |
35 | #define CHECK_INTERVAL (300 * HZ) | 35 | #define CHECK_INTERVAL (300 * HZ) |
36 | 36 | ||
37 | #define THERMAL_THROTTLING_EVENT 0 | ||
38 | #define POWER_LIMIT_EVENT 1 | ||
39 | |||
37 | /* | 40 | /* |
38 | * Current thermal throttling state: | 41 | * Current thermal event state: |
39 | */ | 42 | */ |
40 | struct thermal_state { | 43 | struct _thermal_state { |
41 | bool is_throttled; | 44 | bool new_event; |
42 | 45 | int event; | |
43 | u64 next_check; | 46 | u64 next_check; |
44 | unsigned long throttle_count; | 47 | unsigned long count; |
45 | unsigned long last_throttle_count; | 48 | unsigned long last_count; |
49 | }; | ||
50 | |||
51 | struct thermal_state { | ||
52 | struct _thermal_state core_throttle; | ||
53 | struct _thermal_state core_power_limit; | ||
54 | struct _thermal_state package_throttle; | ||
55 | struct _thermal_state package_power_limit; | ||
46 | }; | 56 | }; |
47 | 57 | ||
48 | static DEFINE_PER_CPU(struct thermal_state, thermal_state); | 58 | static DEFINE_PER_CPU(struct thermal_state, thermal_state); |
@@ -53,11 +63,13 @@ static u32 lvtthmr_init __read_mostly; | |||
53 | 63 | ||
54 | #ifdef CONFIG_SYSFS | 64 | #ifdef CONFIG_SYSFS |
55 | #define define_therm_throt_sysdev_one_ro(_name) \ | 65 | #define define_therm_throt_sysdev_one_ro(_name) \ |
56 | static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL) | 66 | static SYSDEV_ATTR(_name, 0444, \ |
67 | therm_throt_sysdev_show_##_name, \ | ||
68 | NULL) \ | ||
57 | 69 | ||
58 | #define define_therm_throt_sysdev_show_func(name) \ | 70 | #define define_therm_throt_sysdev_show_func(event, name) \ |
59 | \ | 71 | \ |
60 | static ssize_t therm_throt_sysdev_show_##name( \ | 72 | static ssize_t therm_throt_sysdev_show_##event##_##name( \ |
61 | struct sys_device *dev, \ | 73 | struct sys_device *dev, \ |
62 | struct sysdev_attribute *attr, \ | 74 | struct sysdev_attribute *attr, \ |
63 | char *buf) \ | 75 | char *buf) \ |
@@ -66,30 +78,42 @@ static ssize_t therm_throt_sysdev_show_##name( \ | |||
66 | ssize_t ret; \ | 78 | ssize_t ret; \ |
67 | \ | 79 | \ |
68 | preempt_disable(); /* CPU hotplug */ \ | 80 | preempt_disable(); /* CPU hotplug */ \ |
69 | if (cpu_online(cpu)) \ | 81 | if (cpu_online(cpu)) { \ |
70 | ret = sprintf(buf, "%lu\n", \ | 82 | ret = sprintf(buf, "%lu\n", \ |
71 | per_cpu(thermal_state, cpu).name); \ | 83 | per_cpu(thermal_state, cpu).event.name); \ |
72 | else \ | 84 | } else \ |
73 | ret = 0; \ | 85 | ret = 0; \ |
74 | preempt_enable(); \ | 86 | preempt_enable(); \ |
75 | \ | 87 | \ |
76 | return ret; \ | 88 | return ret; \ |
77 | } | 89 | } |
78 | 90 | ||
79 | define_therm_throt_sysdev_show_func(throttle_count); | 91 | define_therm_throt_sysdev_show_func(core_throttle, count); |
80 | define_therm_throt_sysdev_one_ro(throttle_count); | 92 | define_therm_throt_sysdev_one_ro(core_throttle_count); |
93 | |||
94 | define_therm_throt_sysdev_show_func(core_power_limit, count); | ||
95 | define_therm_throt_sysdev_one_ro(core_power_limit_count); | ||
96 | |||
97 | define_therm_throt_sysdev_show_func(package_throttle, count); | ||
98 | define_therm_throt_sysdev_one_ro(package_throttle_count); | ||
99 | |||
100 | define_therm_throt_sysdev_show_func(package_power_limit, count); | ||
101 | define_therm_throt_sysdev_one_ro(package_power_limit_count); | ||
81 | 102 | ||
82 | static struct attribute *thermal_throttle_attrs[] = { | 103 | static struct attribute *thermal_throttle_attrs[] = { |
83 | &attr_throttle_count.attr, | 104 | &attr_core_throttle_count.attr, |
84 | NULL | 105 | NULL |
85 | }; | 106 | }; |
86 | 107 | ||
87 | static struct attribute_group thermal_throttle_attr_group = { | 108 | static struct attribute_group thermal_attr_group = { |
88 | .attrs = thermal_throttle_attrs, | 109 | .attrs = thermal_throttle_attrs, |
89 | .name = "thermal_throttle" | 110 | .name = "thermal_throttle" |
90 | }; | 111 | }; |
91 | #endif /* CONFIG_SYSFS */ | 112 | #endif /* CONFIG_SYSFS */ |
92 | 113 | ||
114 | #define CORE_LEVEL 0 | ||
115 | #define PACKAGE_LEVEL 1 | ||
116 | |||
93 | /*** | 117 | /*** |
94 | * therm_throt_process - Process thermal throttling event from interrupt | 118 | * therm_throt_process - Process thermal throttling event from interrupt |
95 | * @curr: Whether the condition is current or not (boolean), since the | 119 | * @curr: Whether the condition is current or not (boolean), since the |
@@ -106,39 +130,70 @@ static struct attribute_group thermal_throttle_attr_group = { | |||
106 | * 1 : Event should be logged further, and a message has been | 130 | * 1 : Event should be logged further, and a message has been |
107 | * printed to the syslog. | 131 | * printed to the syslog. |
108 | */ | 132 | */ |
109 | static int therm_throt_process(bool is_throttled) | 133 | static int therm_throt_process(bool new_event, int event, int level) |
110 | { | 134 | { |
111 | struct thermal_state *state; | 135 | struct _thermal_state *state; |
112 | unsigned int this_cpu; | 136 | unsigned int this_cpu = smp_processor_id(); |
113 | bool was_throttled; | 137 | bool old_event; |
114 | u64 now; | 138 | u64 now; |
139 | struct thermal_state *pstate = &per_cpu(thermal_state, this_cpu); | ||
115 | 140 | ||
116 | this_cpu = smp_processor_id(); | ||
117 | now = get_jiffies_64(); | 141 | now = get_jiffies_64(); |
118 | state = &per_cpu(thermal_state, this_cpu); | 142 | if (level == CORE_LEVEL) { |
143 | if (event == THERMAL_THROTTLING_EVENT) | ||
144 | state = &pstate->core_throttle; | ||
145 | else if (event == POWER_LIMIT_EVENT) | ||
146 | state = &pstate->core_power_limit; | ||
147 | else | ||
148 | return 0; | ||
149 | } else if (level == PACKAGE_LEVEL) { | ||
150 | if (event == THERMAL_THROTTLING_EVENT) | ||
151 | state = &pstate->package_throttle; | ||
152 | else if (event == POWER_LIMIT_EVENT) | ||
153 | state = &pstate->package_power_limit; | ||
154 | else | ||
155 | return 0; | ||
156 | } else | ||
157 | return 0; | ||
119 | 158 | ||
120 | was_throttled = state->is_throttled; | 159 | old_event = state->new_event; |
121 | state->is_throttled = is_throttled; | 160 | state->new_event = new_event; |
122 | 161 | ||
123 | if (is_throttled) | 162 | if (new_event) |
124 | state->throttle_count++; | 163 | state->count++; |
125 | 164 | ||
126 | if (time_before64(now, state->next_check) && | 165 | if (time_before64(now, state->next_check) && |
127 | state->throttle_count != state->last_throttle_count) | 166 | state->count != state->last_count) |
128 | return 0; | 167 | return 0; |
129 | 168 | ||
130 | state->next_check = now + CHECK_INTERVAL; | 169 | state->next_check = now + CHECK_INTERVAL; |
131 | state->last_throttle_count = state->throttle_count; | 170 | state->last_count = state->count; |
132 | 171 | ||
133 | /* if we just entered the thermal event */ | 172 | /* if we just entered the thermal event */ |
134 | if (is_throttled) { | 173 | if (new_event) { |
135 | printk(KERN_CRIT "CPU%d: Temperature above threshold, cpu clock throttled (total events = %lu)\n", this_cpu, state->throttle_count); | 174 | if (event == THERMAL_THROTTLING_EVENT) |
175 | printk(KERN_CRIT "CPU%d: %s temperature above threshold, cpu clock throttled (total events = %lu)\n", | ||
176 | this_cpu, | ||
177 | level == CORE_LEVEL ? "Core" : "Package", | ||
178 | state->count); | ||
179 | else | ||
180 | printk(KERN_CRIT "CPU%d: %s power limit notification (total events = %lu)\n", | ||
181 | this_cpu, | ||
182 | level == CORE_LEVEL ? "Core" : "Package", | ||
183 | state->count); | ||
136 | 184 | ||
137 | add_taint(TAINT_MACHINE_CHECK); | 185 | add_taint(TAINT_MACHINE_CHECK); |
138 | return 1; | 186 | return 1; |
139 | } | 187 | } |
140 | if (was_throttled) { | 188 | if (old_event) { |
141 | printk(KERN_INFO "CPU%d: Temperature/speed normal\n", this_cpu); | 189 | if (event == THERMAL_THROTTLING_EVENT) |
190 | printk(KERN_INFO "CPU%d: %s temperature/speed normal\n", | ||
191 | this_cpu, | ||
192 | level == CORE_LEVEL ? "Core" : "Package"); | ||
193 | else | ||
194 | printk(KERN_INFO "CPU%d: %s power limit normal\n", | ||
195 | this_cpu, | ||
196 | level == CORE_LEVEL ? "Core" : "Package"); | ||
142 | return 1; | 197 | return 1; |
143 | } | 198 | } |
144 | 199 | ||
@@ -149,13 +204,32 @@ static int therm_throt_process(bool is_throttled) | |||
149 | /* Add/Remove thermal_throttle interface for CPU device: */ | 204 | /* Add/Remove thermal_throttle interface for CPU device: */ |
150 | static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev) | 205 | static __cpuinit int thermal_throttle_add_dev(struct sys_device *sys_dev) |
151 | { | 206 | { |
152 | return sysfs_create_group(&sys_dev->kobj, | 207 | int err; |
153 | &thermal_throttle_attr_group); | 208 | struct cpuinfo_x86 *c = &cpu_data(smp_processor_id()); |
209 | |||
210 | err = sysfs_create_group(&sys_dev->kobj, &thermal_attr_group); | ||
211 | if (err) | ||
212 | return err; | ||
213 | |||
214 | if (cpu_has(c, X86_FEATURE_PLN)) | ||
215 | err = sysfs_add_file_to_group(&sys_dev->kobj, | ||
216 | &attr_core_power_limit_count.attr, | ||
217 | thermal_attr_group.name); | ||
218 | if (cpu_has(c, X86_FEATURE_PTS)) | ||
219 | err = sysfs_add_file_to_group(&sys_dev->kobj, | ||
220 | &attr_package_throttle_count.attr, | ||
221 | thermal_attr_group.name); | ||
222 | if (cpu_has(c, X86_FEATURE_PLN)) | ||
223 | err = sysfs_add_file_to_group(&sys_dev->kobj, | ||
224 | &attr_package_power_limit_count.attr, | ||
225 | thermal_attr_group.name); | ||
226 | |||
227 | return err; | ||
154 | } | 228 | } |
155 | 229 | ||
156 | static __cpuinit void thermal_throttle_remove_dev(struct sys_device *sys_dev) | 230 | static __cpuinit void thermal_throttle_remove_dev(struct sys_device *sys_dev) |
157 | { | 231 | { |
158 | sysfs_remove_group(&sys_dev->kobj, &thermal_throttle_attr_group); | 232 | sysfs_remove_group(&sys_dev->kobj, &thermal_attr_group); |
159 | } | 233 | } |
160 | 234 | ||
161 | /* Mutex protecting device creation against CPU hotplug: */ | 235 | /* Mutex protecting device creation against CPU hotplug: */ |
@@ -226,14 +300,50 @@ device_initcall(thermal_throttle_init_device); | |||
226 | 300 | ||
227 | #endif /* CONFIG_SYSFS */ | 301 | #endif /* CONFIG_SYSFS */ |
228 | 302 | ||
303 | /* | ||
304 | * Set up the most two significant bit to notify mce log that this thermal | ||
305 | * event type. | ||
306 | * This is a temp solution. May be changed in the future with mce log | ||
307 | * infrasture. | ||
308 | */ | ||
309 | #define CORE_THROTTLED (0) | ||
310 | #define CORE_POWER_LIMIT ((__u64)1 << 62) | ||
311 | #define PACKAGE_THROTTLED ((__u64)2 << 62) | ||
312 | #define PACKAGE_POWER_LIMIT ((__u64)3 << 62) | ||
313 | |||
229 | /* Thermal transition interrupt handler */ | 314 | /* Thermal transition interrupt handler */ |
230 | static void intel_thermal_interrupt(void) | 315 | static void intel_thermal_interrupt(void) |
231 | { | 316 | { |
232 | __u64 msr_val; | 317 | __u64 msr_val; |
318 | struct cpuinfo_x86 *c = &cpu_data(smp_processor_id()); | ||
233 | 319 | ||
234 | rdmsrl(MSR_IA32_THERM_STATUS, msr_val); | 320 | rdmsrl(MSR_IA32_THERM_STATUS, msr_val); |
235 | if (therm_throt_process((msr_val & THERM_STATUS_PROCHOT) != 0)) | 321 | |
236 | mce_log_therm_throt_event(msr_val); | 322 | if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT, |
323 | THERMAL_THROTTLING_EVENT, | ||
324 | CORE_LEVEL) != 0) | ||
325 | mce_log_therm_throt_event(CORE_THROTTLED | msr_val); | ||
326 | |||
327 | if (cpu_has(c, X86_FEATURE_PLN)) | ||
328 | if (therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT, | ||
329 | POWER_LIMIT_EVENT, | ||
330 | CORE_LEVEL) != 0) | ||
331 | mce_log_therm_throt_event(CORE_POWER_LIMIT | msr_val); | ||
332 | |||
333 | if (cpu_has(c, X86_FEATURE_PTS)) { | ||
334 | rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val); | ||
335 | if (therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT, | ||
336 | THERMAL_THROTTLING_EVENT, | ||
337 | PACKAGE_LEVEL) != 0) | ||
338 | mce_log_therm_throt_event(PACKAGE_THROTTLED | msr_val); | ||
339 | if (cpu_has(c, X86_FEATURE_PLN)) | ||
340 | if (therm_throt_process(msr_val & | ||
341 | PACKAGE_THERM_STATUS_POWER_LIMIT, | ||
342 | POWER_LIMIT_EVENT, | ||
343 | PACKAGE_LEVEL) != 0) | ||
344 | mce_log_therm_throt_event(PACKAGE_POWER_LIMIT | ||
345 | | msr_val); | ||
346 | } | ||
237 | } | 347 | } |
238 | 348 | ||
239 | static void unexpected_thermal_interrupt(void) | 349 | static void unexpected_thermal_interrupt(void) |
@@ -335,8 +445,26 @@ void intel_init_thermal(struct cpuinfo_x86 *c) | |||
335 | apic_write(APIC_LVTTHMR, h); | 445 | apic_write(APIC_LVTTHMR, h); |
336 | 446 | ||
337 | rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); | 447 | rdmsr(MSR_IA32_THERM_INTERRUPT, l, h); |
338 | wrmsr(MSR_IA32_THERM_INTERRUPT, | 448 | if (cpu_has(c, X86_FEATURE_PLN)) |
339 | l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h); | 449 | wrmsr(MSR_IA32_THERM_INTERRUPT, |
450 | l | (THERM_INT_LOW_ENABLE | ||
451 | | THERM_INT_HIGH_ENABLE | THERM_INT_PLN_ENABLE), h); | ||
452 | else | ||
453 | wrmsr(MSR_IA32_THERM_INTERRUPT, | ||
454 | l | (THERM_INT_LOW_ENABLE | THERM_INT_HIGH_ENABLE), h); | ||
455 | |||
456 | if (cpu_has(c, X86_FEATURE_PTS)) { | ||
457 | rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h); | ||
458 | if (cpu_has(c, X86_FEATURE_PLN)) | ||
459 | wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, | ||
460 | l | (PACKAGE_THERM_INT_LOW_ENABLE | ||
461 | | PACKAGE_THERM_INT_HIGH_ENABLE | ||
462 | | PACKAGE_THERM_INT_PLN_ENABLE), h); | ||
463 | else | ||
464 | wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, | ||
465 | l | (PACKAGE_THERM_INT_LOW_ENABLE | ||
466 | | PACKAGE_THERM_INT_HIGH_ENABLE), h); | ||
467 | } | ||
340 | 468 | ||
341 | smp_thermal_vector = intel_thermal_interrupt; | 469 | smp_thermal_vector = intel_thermal_interrupt; |
342 | 470 | ||