diff options
author | Dmitriy Zavin <dmitriyz@google.com> | 2006-09-26 04:52:42 -0400 |
---|---|---|
committer | Andi Kleen <andi@basil.nowhere.org> | 2006-09-26 04:52:42 -0400 |
commit | 3222b36f46c22f46697a0a53fa8804153a32669f (patch) | |
tree | cf3625063b02ac9854ec57682ac8cad06f92267d /arch/i386 | |
parent | 66aea9913cf435fe92ebb7bf869b4f15901ab993 (diff) |
[PATCH] x86: Add a cumulative thermal throttle event counter.
The counter is exported to /sys that keeps track of the
number of thermal events, such that the user knows how bad the
thermal problem might be (since the logging to syslog and mcelog
is rate limited).
AK: Fixed cpu hotplug locking
Signed-off-by: Dmitriy Zavin <dmitriyz@google.com>
Signed-off-by: Andi Kleen <ak@suse.de>
Diffstat (limited to 'arch/i386')
-rw-r--r-- | arch/i386/kernel/cpu/mcheck/p4.c | 3 | ||||
-rw-r--r-- | arch/i386/kernel/cpu/mcheck/therm_throt.c | 133 |
2 files changed, 130 insertions, 6 deletions
diff --git a/arch/i386/kernel/cpu/mcheck/p4.c b/arch/i386/kernel/cpu/mcheck/p4.c index d83a669d376f..504434a46011 100644 --- a/arch/i386/kernel/cpu/mcheck/p4.c +++ b/arch/i386/kernel/cpu/mcheck/p4.c | |||
@@ -115,6 +115,9 @@ static void intel_init_thermal(struct cpuinfo_x86 *c) | |||
115 | l = apic_read (APIC_LVTTHMR); | 115 | l = apic_read (APIC_LVTTHMR); |
116 | apic_write_around (APIC_LVTTHMR, l & ~APIC_LVT_MASKED); | 116 | apic_write_around (APIC_LVTTHMR, l & ~APIC_LVT_MASKED); |
117 | printk (KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu); | 117 | printk (KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu); |
118 | |||
119 | /* enable thermal throttle processing */ | ||
120 | atomic_set(&therm_throt_en, 1); | ||
118 | return; | 121 | return; |
119 | } | 122 | } |
120 | #endif /* CONFIG_X86_MCE_P4THERMAL */ | 123 | #endif /* CONFIG_X86_MCE_P4THERMAL */ |
diff --git a/arch/i386/kernel/cpu/mcheck/therm_throt.c b/arch/i386/kernel/cpu/mcheck/therm_throt.c index 101f7ace00ce..4f43047de406 100644 --- a/arch/i386/kernel/cpu/mcheck/therm_throt.c +++ b/arch/i386/kernel/cpu/mcheck/therm_throt.c | |||
@@ -1,15 +1,22 @@ | |||
1 | /* | 1 | /* |
2 | * linux/arch/i386/kerne/cpu/mcheck/therm_throt.c | 2 | * linux/arch/i386/kerne/cpu/mcheck/therm_throt.c |
3 | * | 3 | * |
4 | * Thermal throttle event support code. | 4 | * Thermal throttle event support code (such as syslog messaging and rate |
5 | * limiting) that was factored out from x86_64 (mce_intel.c) and i386 (p4.c). | ||
6 | * This allows consistent reporting of CPU thermal throttle events. | ||
7 | * | ||
8 | * Maintains a counter in /sys that keeps track of the number of thermal | ||
9 | * events, such that the user knows how bad the thermal problem might be | ||
10 | * (since the logging to syslog and mcelog is rate limited). | ||
5 | * | 11 | * |
6 | * Author: Dmitriy Zavin (dmitriyz@google.com) | 12 | * Author: Dmitriy Zavin (dmitriyz@google.com) |
7 | * | 13 | * |
8 | * Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c. | 14 | * Credits: Adapted from Zwane Mwaikambo's original code in mce_intel.c. |
9 | * | 15 | * Inspired by Ross Biro's and Al Borchers' counter code. |
10 | */ | 16 | */ |
11 | 17 | ||
12 | #include <linux/percpu.h> | 18 | #include <linux/percpu.h> |
19 | #include <linux/sysdev.h> | ||
13 | #include <linux/cpu.h> | 20 | #include <linux/cpu.h> |
14 | #include <asm/cpu.h> | 21 | #include <asm/cpu.h> |
15 | #include <linux/notifier.h> | 22 | #include <linux/notifier.h> |
@@ -18,15 +25,53 @@ | |||
18 | /* How long to wait between reporting thermal events */ | 25 | /* How long to wait between reporting thermal events */ |
19 | #define CHECK_INTERVAL (300 * HZ) | 26 | #define CHECK_INTERVAL (300 * HZ) |
20 | 27 | ||
21 | static DEFINE_PER_CPU(__u64, next_check); | 28 | static DEFINE_PER_CPU(__u64, next_check) = INITIAL_JIFFIES; |
29 | static DEFINE_PER_CPU(unsigned long, thermal_throttle_count); | ||
30 | atomic_t therm_throt_en = ATOMIC_INIT(0); | ||
31 | |||
32 | #ifdef CONFIG_SYSFS | ||
33 | #define define_therm_throt_sysdev_one_ro(_name) \ | ||
34 | static SYSDEV_ATTR(_name, 0444, therm_throt_sysdev_show_##_name, NULL) | ||
35 | |||
36 | #define define_therm_throt_sysdev_show_func(name) \ | ||
37 | static ssize_t therm_throt_sysdev_show_##name(struct sys_device *dev, \ | ||
38 | char *buf) \ | ||
39 | { \ | ||
40 | unsigned int cpu = dev->id; \ | ||
41 | ssize_t ret; \ | ||
42 | \ | ||
43 | preempt_disable(); /* CPU hotplug */ \ | ||
44 | if (cpu_online(cpu)) \ | ||
45 | ret = sprintf(buf, "%lu\n", \ | ||
46 | per_cpu(thermal_throttle_##name, cpu)); \ | ||
47 | else \ | ||
48 | ret = 0; \ | ||
49 | preempt_enable(); \ | ||
50 | \ | ||
51 | return ret; \ | ||
52 | } | ||
53 | |||
54 | define_therm_throt_sysdev_show_func(count); | ||
55 | define_therm_throt_sysdev_one_ro(count); | ||
56 | |||
57 | static struct attribute *thermal_throttle_attrs[] = { | ||
58 | &attr_count.attr, | ||
59 | NULL | ||
60 | }; | ||
61 | |||
62 | static struct attribute_group thermal_throttle_attr_group = { | ||
63 | .attrs = thermal_throttle_attrs, | ||
64 | .name = "thermal_throttle" | ||
65 | }; | ||
66 | #endif /* CONFIG_SYSFS */ | ||
22 | 67 | ||
23 | /*** | 68 | /*** |
24 | * therm_throt_process - Process thermal throttling event | 69 | * therm_throt_process - Process thermal throttling event from interrupt |
25 | * @curr: Whether the condition is current or not (boolean), since the | 70 | * @curr: Whether the condition is current or not (boolean), since the |
26 | * thermal interrupt normally gets called both when the thermal | 71 | * thermal interrupt normally gets called both when the thermal |
27 | * event begins and once the event has ended. | 72 | * event begins and once the event has ended. |
28 | * | 73 | * |
29 | * This function is normally called by the thermal interrupt after the | 74 | * This function is called by the thermal interrupt after the |
30 | * IRQ has been acknowledged. | 75 | * IRQ has been acknowledged. |
31 | * | 76 | * |
32 | * It will take care of rate limiting and printing messages to the syslog. | 77 | * It will take care of rate limiting and printing messages to the syslog. |
@@ -41,6 +86,9 @@ int therm_throt_process(int curr) | |||
41 | unsigned int cpu = smp_processor_id(); | 86 | unsigned int cpu = smp_processor_id(); |
42 | __u64 tmp_jiffs = get_jiffies_64(); | 87 | __u64 tmp_jiffs = get_jiffies_64(); |
43 | 88 | ||
89 | if (curr) | ||
90 | __get_cpu_var(thermal_throttle_count)++; | ||
91 | |||
44 | if (time_before64(tmp_jiffs, __get_cpu_var(next_check))) | 92 | if (time_before64(tmp_jiffs, __get_cpu_var(next_check))) |
45 | return 0; | 93 | return 0; |
46 | 94 | ||
@@ -49,7 +97,9 @@ int therm_throt_process(int curr) | |||
49 | /* if we just entered the thermal event */ | 97 | /* if we just entered the thermal event */ |
50 | if (curr) { | 98 | if (curr) { |
51 | printk(KERN_CRIT "CPU%d: Temperature above threshold, " | 99 | printk(KERN_CRIT "CPU%d: Temperature above threshold, " |
52 | "cpu clock throttled\n", cpu); | 100 | "cpu clock throttled (total events = %lu)\n", cpu, |
101 | __get_cpu_var(thermal_throttle_count)); | ||
102 | |||
53 | add_taint(TAINT_MACHINE_CHECK); | 103 | add_taint(TAINT_MACHINE_CHECK); |
54 | } else { | 104 | } else { |
55 | printk(KERN_CRIT "CPU%d: Temperature/speed normal\n", cpu); | 105 | printk(KERN_CRIT "CPU%d: Temperature/speed normal\n", cpu); |
@@ -57,3 +107,74 @@ int therm_throt_process(int curr) | |||
57 | 107 | ||
58 | return 1; | 108 | return 1; |
59 | } | 109 | } |
110 | |||
111 | #ifdef CONFIG_SYSFS | ||
112 | /* Add/Remove thermal_throttle interface for CPU device */ | ||
113 | static __cpuinit int thermal_throttle_add_dev(struct sys_device * sys_dev) | ||
114 | { | ||
115 | sysfs_create_group(&sys_dev->kobj, &thermal_throttle_attr_group); | ||
116 | return 0; | ||
117 | } | ||
118 | |||
119 | #ifdef CONFIG_HOTPLUG_CPU | ||
120 | static __cpuinit int thermal_throttle_remove_dev(struct sys_device * sys_dev) | ||
121 | { | ||
122 | sysfs_remove_group(&sys_dev->kobj, &thermal_throttle_attr_group); | ||
123 | return 0; | ||
124 | } | ||
125 | |||
126 | /* Mutex protecting device creation against CPU hotplug */ | ||
127 | static DEFINE_MUTEX(therm_cpu_lock); | ||
128 | |||
129 | /* Get notified when a cpu comes on/off. Be hotplug friendly. */ | ||
130 | static __cpuinit int thermal_throttle_cpu_callback(struct notifier_block *nfb, | ||
131 | unsigned long action, | ||
132 | void *hcpu) | ||
133 | { | ||
134 | unsigned int cpu = (unsigned long)hcpu; | ||
135 | struct sys_device *sys_dev; | ||
136 | |||
137 | sys_dev = get_cpu_sysdev(cpu); | ||
138 | mutex_lock(&therm_cpu_lock); | ||
139 | switch (action) { | ||
140 | case CPU_ONLINE: | ||
141 | thermal_throttle_add_dev(sys_dev); | ||
142 | break; | ||
143 | case CPU_DEAD: | ||
144 | thermal_throttle_remove_dev(sys_dev); | ||
145 | break; | ||
146 | } | ||
147 | mutex_unlock(&therm_cpu_lock); | ||
148 | return NOTIFY_OK; | ||
149 | } | ||
150 | |||
151 | static struct notifier_block thermal_throttle_cpu_notifier = | ||
152 | { | ||
153 | .notifier_call = thermal_throttle_cpu_callback, | ||
154 | }; | ||
155 | #endif /* CONFIG_HOTPLUG_CPU */ | ||
156 | |||
157 | static __init int thermal_throttle_init_device(void) | ||
158 | { | ||
159 | unsigned int cpu = 0; | ||
160 | |||
161 | if (!atomic_read(&therm_throt_en)) | ||
162 | return 0; | ||
163 | |||
164 | register_hotcpu_notifier(&thermal_throttle_cpu_notifier); | ||
165 | |||
166 | #ifdef CONFIG_HOTPLUG_CPU | ||
167 | mutex_lock(&therm_cpu_lock); | ||
168 | #endif | ||
169 | /* connect live CPUs to sysfs */ | ||
170 | for_each_online_cpu(cpu) | ||
171 | thermal_throttle_add_dev(get_cpu_sysdev(cpu)); | ||
172 | #ifdef CONFIG_HOTPLUG_CPU | ||
173 | mutex_unlock(&therm_cpu_lock); | ||
174 | #endif | ||
175 | |||
176 | return 0; | ||
177 | } | ||
178 | |||
179 | device_initcall(thermal_throttle_init_device); | ||
180 | #endif /* CONFIG_SYSFS */ | ||