aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDmitriy Zavin <dmitriyz@google.com>2006-09-26 04:52:42 -0400
committerAndi Kleen <andi@basil.nowhere.org>2006-09-26 04:52:42 -0400
commit3222b36f46c22f46697a0a53fa8804153a32669f (patch)
treecf3625063b02ac9854ec57682ac8cad06f92267d
parent66aea9913cf435fe92ebb7bf869b4f15901ab993 (diff)
[PATCH] x86: Add a cumulative thermal throttle event counter.
The counter is exported to /sys that keeps track of the number of thermal events, such that the user knows how bad the thermal problem might be (since the logging to syslog and mcelog is rate limited). AK: Fixed cpu hotplug locking Signed-off-by: Dmitriy Zavin <dmitriyz@google.com> Signed-off-by: Andi Kleen <ak@suse.de>
-rw-r--r--arch/i386/kernel/cpu/mcheck/p4.c3
-rw-r--r--arch/i386/kernel/cpu/mcheck/therm_throt.c133
-rw-r--r--arch/x86_64/kernel/mce_intel.c3
-rw-r--r--include/asm-i386/therm_throt.h3
4 files changed, 136 insertions, 6 deletions
diff --git a/arch/i386/kernel/cpu/mcheck/p4.c b/arch/i386/kernel/cpu/mcheck/p4.c
index d83a669d376f..504434a46011 100644
--- a/arch/i386/kernel/cpu/mcheck/p4.c
+++ b/arch/i386/kernel/cpu/mcheck/p4.c
@@ -115,6 +115,9 @@ static void intel_init_thermal(struct cpuinfo_x86 *c)
115 l = apic_read (APIC_LVTTHMR); 115 l = apic_read (APIC_LVTTHMR);
116 apic_write_around (APIC_LVTTHMR, l & ~APIC_LVT_MASKED); 116 apic_write_around (APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
117 printk (KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu); 117 printk (KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu);
118
119 /* enable thermal throttle processing */
120 atomic_set(&therm_throt_en, 1);
118 return; 121 return;
119} 122}
120#endif /* CONFIG_X86_MCE_P4THERMAL */ 123#endif /* CONFIG_X86_MCE_P4THERMAL */
diff --git a/arch/i386/kernel/cpu/mcheck/therm_throt.c b/arch/i386/kernel/cpu/mcheck/therm_throt.c
index 101f7ace00ce..4f43047de406 100644
--- a/arch/i386/kernel/cpu/mcheck/therm_throt.c
+++ b/arch/i386/kernel/cpu/mcheck/therm_throt.c
@@ -1,15 +1,22 @@
1/* 1/*
2 * linux/arch/i386/kerne/cpu/mcheck/therm_throt.c 2 * linux/arch/i386/kerne/cpu/mcheck/therm_throt.c
3 * 3 *
4 * Thermal throttle event support code. 4 * Thermal throttle event support code (such as syslog messaging and rate
5 * limiting) that was factored out from x86_64 (mce_intel.c) and i386 (p4.c).
6 * This allows consistent reporting of CPU thermal throttle events.
7 *
8 * Maintains a counter in /sys that keeps track of the number of thermal
9 * events, such that the user knows how bad the thermal problem might be
10 * (since the logging to syslog and mcelog is rate limited).
5 * 11 *
6 * Author: Dmitriy Zavin (dmitriyz@google.com) 12 * Author: Dmitriy Zavin (dmitriyz@google.com)
7 * 13 *
8 * Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c. 14 * Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c.
9 * 15 * Inspired by Ross Biro's and Al Borchers' counter code.
10 */ 16 */
11 17
12#include <linux/percpu.h> 18#include <linux/percpu.h>
19#include <linux/sysdev.h>
13#include <linux/cpu.h> 20#include <linux/cpu.h>
14#include <asm/cpu.h> 21#include <asm/cpu.h>
15#include <linux/notifier.h> 22#include <linux/notifier.h>
@@ -18,15 +25,53 @@
18/* How long to wait between reporting thermal events */ 25/* How long to wait between reporting thermal events */
19#define CHECK_INTERVAL (300 * HZ) 26#define CHECK_INTERVAL (300 * HZ)
20 27
21static DEFINE_PER_CPU(__u64, next_check); 28static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES;
29static DEFINE_PER_CPU(unsigned long, thermal_throttle_count);
30atomic_t therm_throt_en = ATOMIC_INIT(0);
31
32#ifdef CONFIG_SYSFS
33#define define_therm_throt_sysdev_one_ro(_name) \
34 static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL)
35
36#define define_therm_throt_sysdev_show_func(name) \
37static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev, \
38 char *buf) \
39{ \
40 unsigned int cpu = dev->id; \
41 ssize_t ret; \
42 \
43 preempt_disable(); /* CPU hotplug */ \
44 if (cpu_online(cpu)) \
45 ret = sprintf(buf, "%lu\n", \
46 per_cpu(thermal_throttle_##name, cpu)); \
47 else \
48 ret = 0; \
49 preempt_enable(); \
50 \
51 return ret; \
52}
53
54define_therm_throt_sysdev_show_func(count);
55define_therm_throt_sysdev_one_ro(count);
56
57static struct attribute *thermal_throttle_attrs[] = {
58 &attr_count.attr,
59 NULL
60};
61
62static struct attribute_group thermal_throttle_attr_group = {
63 .attrs = thermal_throttle_attrs,
64 .name = "thermal_throttle"
65};
66#endif /* CONFIG_SYSFS */
22 67
23/*** 68/***
24 * therm_throt_process - Process thermal throttling event 69 * therm_throt_process - Process thermal throttling event from interrupt
25 * @curr: Whether the condition is current or not (boolean), since the 70 * @curr: Whether the condition is current or not (boolean), since the
26 * thermal interrupt normally gets called both when the thermal 71 * thermal interrupt normally gets called both when the thermal
27 * event begins and once the event has ended. 72 * event begins and once the event has ended.
28 * 73 *
29 * This function is normally called by the thermal interrupt after the 74 * This function is called by the thermal interrupt after the
30 * IRQ has been acknowledged. 75 * IRQ has been acknowledged.
31 * 76 *
32 * It will take care of rate limiting and printing messages to the syslog. 77 * It will take care of rate limiting and printing messages to the syslog.
@@ -41,6 +86,9 @@ int therm_throt_process(int curr)
41 unsigned int cpu = smp_processor_id(); 86 unsigned int cpu = smp_processor_id();
42 __u64 tmp_jiffs = get_jiffies_64(); 87 __u64 tmp_jiffs = get_jiffies_64();
43 88
89 if (curr)
90 __get_cpu_var(thermal_throttle_count)++;
91
44 if (time_before64(tmp_jiffs, __get_cpu_var(next_check))) 92 if (time_before64(tmp_jiffs, __get_cpu_var(next_check)))
45 return 0; 93 return 0;
46 94
@@ -49,7 +97,9 @@ int therm_throt_process(int curr)
49 /* if we just entered the thermal event */ 97 /* if we just entered the thermal event */
50 if (curr) { 98 if (curr) {
51 printk(KERN_CRIT "CPU%d: Temperature above threshold, " 99 printk(KERN_CRIT "CPU%d: Temperature above threshold, "
52 "cpu clock throttled\n", cpu); 100 "cpu clock throttled (total events = %lu)\n", cpu,
101 __get_cpu_var(thermal_throttle_count));
102
53 add_taint(TAINT_MACHINE_CHECK); 103 add_taint(TAINT_MACHINE_CHECK);
54 } else { 104 } else {
55 printk(KERN_CRIT "CPU%d: Temperature/speed normal\n", cpu); 105 printk(KERN_CRIT "CPU%d: Temperature/speed normal\n", cpu);
@@ -57,3 +107,74 @@ int therm_throt_process(int curr)
57 107
58 return 1; 108 return 1;
59} 109}
110
111#ifdef CONFIG_SYSFS
112/* Add/Remove thermal_throttle interface for CPU device */
113static __cpuinit int thermal_throttle_add_dev(struct sys_device * sys_dev)
114{
115 sysfs_create_group(&sys_dev->kobj, &thermal_throttle_attr_group);
116 return 0;
117}
118
119#ifdef CONFIG_HOTPLUG_CPU
120static __cpuinit int thermal_throttle_remove_dev(struct sys_device * sys_dev)
121{
122 sysfs_remove_group(&sys_dev->kobj, &thermal_throttle_attr_group);
123 return 0;
124}
125
126/* Mutex protecting device creation against CPU hotplug */
127static DEFINE_MUTEX(therm_cpu_lock);
128
129/* Get notified when a cpu comes on/off. Be hotplug friendly. */
130static __cpuinit int thermal_throttle_cpu_callback(struct notifier_block *nfb,
131 unsigned long action,
132 void *hcpu)
133{
134 unsigned int cpu = (unsigned long)hcpu;
135 struct sys_device *sys_dev;
136
137 sys_dev = get_cpu_sysdev(cpu);
138 mutex_lock(&therm_cpu_lock);
139 switch (action) {
140 case CPU_ONLINE:
141 thermal_throttle_add_dev(sys_dev);
142 break;
143 case CPU_DEAD:
144 thermal_throttle_remove_dev(sys_dev);
145 break;
146 }
147 mutex_unlock(&therm_cpu_lock);
148 return NOTIFY_OK;
149}
150
151static struct notifier_block thermal_throttle_cpu_notifier =
152{
153 .notifier_call = thermal_throttle_cpu_callback,
154};
155#endif /* CONFIG_HOTPLUG_CPU */
156
157static __init int thermal_throttle_init_device(void)
158{
159 unsigned int cpu = 0;
160
161 if (!atomic_read(&therm_throt_en))
162 return 0;
163
164 register_hotcpu_notifier(&thermal_throttle_cpu_notifier);
165
166#ifdef CONFIG_HOTPLUG_CPU
167 mutex_lock(&therm_cpu_lock);
168#endif
169 /* connect live CPUs to sysfs */
170 for_each_online_cpu(cpu)
171 thermal_throttle_add_dev(get_cpu_sysdev(cpu));
172#ifdef CONFIG_HOTPLUG_CPU
173 mutex_unlock(&therm_cpu_lock);
174#endif
175
176 return 0;
177}
178
179device_initcall(thermal_throttle_init_device);
180#endif /* CONFIG_SYSFS */
diff --git a/arch/x86_64/kernel/mce_intel.c b/arch/x86_64/kernel/mce_intel.c
index dec11219e278..6551505d8a2c 100644
--- a/arch/x86_64/kernel/mce_intel.c
+++ b/arch/x86_64/kernel/mce_intel.c
@@ -77,6 +77,9 @@ static void __cpuinit intel_init_thermal(struct cpuinfo_x86 *c)
77 apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED); 77 apic_write(APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
78 printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n", 78 printk(KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n",
79 cpu, tm2 ? "TM2" : "TM1"); 79 cpu, tm2 ? "TM2" : "TM1");
80
81 /* enable thermal throttle processing */
82 atomic_set(&therm_throt_en, 1);
80 return; 83 return;
81} 84}
82 85
diff --git a/include/asm-i386/therm_throt.h b/include/asm-i386/therm_throt.h
index 3c9c22cc10c9..399bf6026b16 100644
--- a/include/asm-i386/therm_throt.h
+++ b/include/asm-i386/therm_throt.h
@@ -1,6 +1,9 @@
1#ifndef __ASM_I386_THERM_THROT_H__ 1#ifndef __ASM_I386_THERM_THROT_H__
2#define __ASM_I386_THERM_THROT_H__ 1 2#define __ASM_I386_THERM_THROT_H__ 1
3 3
4#include <asm/atomic.h>
5
6extern atomic_t therm_throt_en;
4int therm_throt_process(int curr); 7int therm_throt_process(int curr);
5 8
6#endif /* __ASM_I386_THERM_THROT_H__ */ 9#endif /* __ASM_I386_THERM_THROT_H__ */