diff options
-rw-r--r-- | Documentation/thermal/x86_pkg_temperature_thermal | 47 | ||||
-rw-r--r-- | arch/x86/include/asm/mce.h | 7 | ||||
-rw-r--r-- | arch/x86/kernel/cpu/mcheck/therm_throt.c | 63 | ||||
-rw-r--r-- | drivers/thermal/Kconfig | 13 | ||||
-rw-r--r-- | drivers/thermal/Makefile | 1 | ||||
-rw-r--r-- | drivers/thermal/x86_pkg_temp_thermal.c | 642 |
6 files changed, 769 insertions, 4 deletions
diff --git a/Documentation/thermal/x86_pkg_temperature_thermal b/Documentation/thermal/x86_pkg_temperature_thermal new file mode 100644 index 000000000000..17a3a4c0a0ca --- /dev/null +++ b/Documentation/thermal/x86_pkg_temperature_thermal | |||
@@ -0,0 +1,47 @@ | |||
1 | Kernel driver: x86_pkg_temp_thermal | ||
2 | =================== | ||
3 | |||
4 | Supported chips: | ||
5 | * x86: with package level thermal management | ||
6 | (Verify using: CPUID.06H:EAX[bit 6] =1) | ||
7 | |||
8 | Authors: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com> | ||
9 | |||
10 | Reference | ||
11 | --- | ||
12 | Intel® 64 and IA-32 Architectures Software Developer’s Manual (Jan, 2013): | ||
13 | Chapter 14.6: PACKAGE LEVEL THERMAL MANAGEMENT | ||
14 | |||
15 | Description | ||
16 | --------- | ||
17 | |||
18 | This driver register CPU digital temperature package level sensor as a thermal | ||
19 | zone with maximum two user mode configurable trip points. Number of trip points | ||
20 | depends on the capability of the package. Once the trip point is violated, | ||
21 | user mode can receive notification via thermal notification mechanism and can | ||
22 | take any action to control temperature. | ||
23 | |||
24 | |||
25 | Threshold management | ||
26 | -------------------- | ||
27 | Each package will register as a thermal zone under /sys/class/thermal. | ||
28 | Example: | ||
29 | /sys/class/thermal/thermal_zone1 | ||
30 | |||
31 | This contains two trip points: | ||
32 | - trip_point_0_temp | ||
33 | - trip_point_1_temp | ||
34 | |||
35 | User can set any temperature between 0 to TJ-Max temperature. Temperature units | ||
36 | are in milli-degree Celsius. Refer to "Documentation/thermal/sysfs-api.txt" for | ||
37 | thermal sys-fs details. | ||
38 | |||
39 | Any value other than 0 in these trip points, can trigger thermal notifications. | ||
40 | Setting 0, stops sending thermal notifications. | ||
41 | |||
42 | Thermal notifications: To get kobject-uevent notifications, set the thermal zone | ||
43 | policy to "user_space". For example: echo -n "user_space" > policy | ||
44 | |||
45 | |||
46 | |||
47 | |||
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index fa5f71e021d5..16a214557a58 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h | |||
@@ -214,6 +214,13 @@ void mce_log_therm_throt_event(__u64 status); | |||
214 | /* Interrupt Handler for core thermal thresholds */ | 214 | /* Interrupt Handler for core thermal thresholds */ |
215 | extern int (*platform_thermal_notify)(__u64 msr_val); | 215 | extern int (*platform_thermal_notify)(__u64 msr_val); |
216 | 216 | ||
217 | /* Interrupt Handler for package thermal thresholds */ | ||
218 | extern int (*platform_thermal_package_notify)(__u64 msr_val); | ||
219 | |||
220 | /* Callback support of rate control, return true, if | ||
221 | * callback has rate control */ | ||
222 | extern bool (*platform_thermal_package_rate_control)(void); | ||
223 | |||
217 | #ifdef CONFIG_X86_THERMAL_VECTOR | 224 | #ifdef CONFIG_X86_THERMAL_VECTOR |
218 | extern void mcheck_intel_therm_init(void); | 225 | extern void mcheck_intel_therm_init(void); |
219 | #else | 226 | #else |
diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 47a1870279aa..4131c0393594 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c | |||
@@ -54,12 +54,24 @@ struct thermal_state { | |||
54 | struct _thermal_state package_power_limit; | 54 | struct _thermal_state package_power_limit; |
55 | struct _thermal_state core_thresh0; | 55 | struct _thermal_state core_thresh0; |
56 | struct _thermal_state core_thresh1; | 56 | struct _thermal_state core_thresh1; |
57 | struct _thermal_state pkg_thresh0; | ||
58 | struct _thermal_state pkg_thresh1; | ||
57 | }; | 59 | }; |
58 | 60 | ||
59 | /* Callback to handle core threshold interrupts */ | 61 | /* Callback to handle core threshold interrupts */ |
60 | int (*platform_thermal_notify)(__u64 msr_val); | 62 | int (*platform_thermal_notify)(__u64 msr_val); |
61 | EXPORT_SYMBOL(platform_thermal_notify); | 63 | EXPORT_SYMBOL(platform_thermal_notify); |
62 | 64 | ||
65 | /* Callback to handle core package threshold_interrupts */ | ||
66 | int (*platform_thermal_package_notify)(__u64 msr_val); | ||
67 | EXPORT_SYMBOL_GPL(platform_thermal_package_notify); | ||
68 | |||
69 | /* Callback support of rate control, return true, if | ||
70 | * callback has rate control */ | ||
71 | bool (*platform_thermal_package_rate_control)(void); | ||
72 | EXPORT_SYMBOL_GPL(platform_thermal_package_rate_control); | ||
73 | |||
74 | |||
63 | static DEFINE_PER_CPU(struct thermal_state, thermal_state); | 75 | static DEFINE_PER_CPU(struct thermal_state, thermal_state); |
64 | 76 | ||
65 | static atomic_t therm_throt_en = ATOMIC_INIT(0); | 77 | static atomic_t therm_throt_en = ATOMIC_INIT(0); |
@@ -203,19 +215,25 @@ static int therm_throt_process(bool new_event, int event, int level) | |||
203 | return 0; | 215 | return 0; |
204 | } | 216 | } |
205 | 217 | ||
206 | static int thresh_event_valid(int event) | 218 | static int thresh_event_valid(int level, int event) |
207 | { | 219 | { |
208 | struct _thermal_state *state; | 220 | struct _thermal_state *state; |
209 | unsigned int this_cpu = smp_processor_id(); | 221 | unsigned int this_cpu = smp_processor_id(); |
210 | struct thermal_state *pstate = &per_cpu(thermal_state, this_cpu); | 222 | struct thermal_state *pstate = &per_cpu(thermal_state, this_cpu); |
211 | u64 now = get_jiffies_64(); | 223 | u64 now = get_jiffies_64(); |
212 | 224 | ||
213 | state = (event == 0) ? &pstate->core_thresh0 : &pstate->core_thresh1; | 225 | if (level == PACKAGE_LEVEL) |
226 | state = (event == 0) ? &pstate->pkg_thresh0 : | ||
227 | &pstate->pkg_thresh1; | ||
228 | else | ||
229 | state = (event == 0) ? &pstate->core_thresh0 : | ||
230 | &pstate->core_thresh1; | ||
214 | 231 | ||
215 | if (time_before64(now, state->next_check)) | 232 | if (time_before64(now, state->next_check)) |
216 | return 0; | 233 | return 0; |
217 | 234 | ||
218 | state->next_check = now + CHECK_INTERVAL; | 235 | state->next_check = now + CHECK_INTERVAL; |
236 | |||
219 | return 1; | 237 | return 1; |
220 | } | 238 | } |
221 | 239 | ||
@@ -321,6 +339,39 @@ device_initcall(thermal_throttle_init_device); | |||
321 | 339 | ||
322 | #endif /* CONFIG_SYSFS */ | 340 | #endif /* CONFIG_SYSFS */ |
323 | 341 | ||
342 | static void notify_package_thresholds(__u64 msr_val) | ||
343 | { | ||
344 | bool notify_thres_0 = false; | ||
345 | bool notify_thres_1 = false; | ||
346 | |||
347 | if (!platform_thermal_package_notify) | ||
348 | return; | ||
349 | |||
350 | /* lower threshold check */ | ||
351 | if (msr_val & THERM_LOG_THRESHOLD0) | ||
352 | notify_thres_0 = true; | ||
353 | /* higher threshold check */ | ||
354 | if (msr_val & THERM_LOG_THRESHOLD1) | ||
355 | notify_thres_1 = true; | ||
356 | |||
357 | if (!notify_thres_0 && !notify_thres_1) | ||
358 | return; | ||
359 | |||
360 | if (platform_thermal_package_rate_control && | ||
361 | platform_thermal_package_rate_control()) { | ||
362 | /* Rate control is implemented in callback */ | ||
363 | platform_thermal_package_notify(msr_val); | ||
364 | return; | ||
365 | } | ||
366 | |||
367 | /* lower threshold reached */ | ||
368 | if (notify_thres_0 && thresh_event_valid(PACKAGE_LEVEL, 0)) | ||
369 | platform_thermal_package_notify(msr_val); | ||
370 | /* higher threshold reached */ | ||
371 | if (notify_thres_1 && thresh_event_valid(PACKAGE_LEVEL, 1)) | ||
372 | platform_thermal_package_notify(msr_val); | ||
373 | } | ||
374 | |||
324 | static void notify_thresholds(__u64 msr_val) | 375 | static void notify_thresholds(__u64 msr_val) |
325 | { | 376 | { |
326 | /* check whether the interrupt handler is defined; | 377 | /* check whether the interrupt handler is defined; |
@@ -330,10 +381,12 @@ static void notify_thresholds(__u64 msr_val) | |||
330 | return; | 381 | return; |
331 | 382 | ||
332 | /* lower threshold reached */ | 383 | /* lower threshold reached */ |
333 | if ((msr_val & THERM_LOG_THRESHOLD0) && thresh_event_valid(0)) | 384 | if ((msr_val & THERM_LOG_THRESHOLD0) && |
385 | thresh_event_valid(CORE_LEVEL, 0)) | ||
334 | platform_thermal_notify(msr_val); | 386 | platform_thermal_notify(msr_val); |
335 | /* higher threshold reached */ | 387 | /* higher threshold reached */ |
336 | if ((msr_val & THERM_LOG_THRESHOLD1) && thresh_event_valid(1)) | 388 | if ((msr_val & THERM_LOG_THRESHOLD1) && |
389 | thresh_event_valid(CORE_LEVEL, 1)) | ||
337 | platform_thermal_notify(msr_val); | 390 | platform_thermal_notify(msr_val); |
338 | } | 391 | } |
339 | 392 | ||
@@ -359,6 +412,8 @@ static void intel_thermal_interrupt(void) | |||
359 | 412 | ||
360 | if (this_cpu_has(X86_FEATURE_PTS)) { | 413 | if (this_cpu_has(X86_FEATURE_PTS)) { |
361 | rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val); | 414 | rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val); |
415 | /* check violations of package thermal thresholds */ | ||
416 | notify_package_thresholds(msr_val); | ||
362 | therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT, | 417 | therm_throt_process(msr_val & PACKAGE_THERM_STATUS_PROCHOT, |
363 | THERMAL_THROTTLING_EVENT, | 418 | THERMAL_THROTTLING_EVENT, |
364 | PACKAGE_LEVEL); | 419 | PACKAGE_LEVEL); |
diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig index 7205c70a46a3..b13c2bcccb72 100644 --- a/drivers/thermal/Kconfig +++ b/drivers/thermal/Kconfig | |||
@@ -169,7 +169,20 @@ config INTEL_POWERCLAMP | |||
169 | enforce idle time which results in more package C-state residency. The | 169 | enforce idle time which results in more package C-state residency. The |
170 | user interface is exposed via generic thermal framework. | 170 | user interface is exposed via generic thermal framework. |
171 | 171 | ||
172 | config X86_PKG_TEMP_THERMAL | ||
173 | tristate "X86 package temperature thermal driver" | ||
174 | depends on THERMAL | ||
175 | depends on X86 | ||
176 | select THERMAL_GOV_USER_SPACE | ||
177 | default m | ||
178 | help | ||
179 | Enable this to register CPU digital sensor for package temperature as | ||
180 | thermal zone. Each package will have its own thermal zone. There are | ||
181 | two trip points which can be set by user to get notifications via thermal | ||
182 | notification methods. | ||
183 | |||
172 | menu "Texas Instruments thermal drivers" | 184 | menu "Texas Instruments thermal drivers" |
173 | source "drivers/thermal/ti-soc-thermal/Kconfig" | 185 | source "drivers/thermal/ti-soc-thermal/Kconfig" |
174 | endmenu | 186 | endmenu |
187 | |||
175 | endif | 188 | endif |
diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile index 85693941fda0..67184a293e3f 100644 --- a/drivers/thermal/Makefile +++ b/drivers/thermal/Makefile | |||
@@ -23,4 +23,5 @@ obj-$(CONFIG_DB8500_THERMAL) += db8500_thermal.o | |||
23 | obj-$(CONFIG_ARMADA_THERMAL) += armada_thermal.o | 23 | obj-$(CONFIG_ARMADA_THERMAL) += armada_thermal.o |
24 | obj-$(CONFIG_DB8500_CPUFREQ_COOLING) += db8500_cpufreq_cooling.o | 24 | obj-$(CONFIG_DB8500_CPUFREQ_COOLING) += db8500_cpufreq_cooling.o |
25 | obj-$(CONFIG_INTEL_POWERCLAMP) += intel_powerclamp.o | 25 | obj-$(CONFIG_INTEL_POWERCLAMP) += intel_powerclamp.o |
26 | obj-$(CONFIG_X86_PKG_TEMP_THERMAL) += x86_pkg_temp_thermal.o | ||
26 | obj-$(CONFIG_TI_SOC_THERMAL) += ti-soc-thermal/ | 27 | obj-$(CONFIG_TI_SOC_THERMAL) += ti-soc-thermal/ |
diff --git a/drivers/thermal/x86_pkg_temp_thermal.c b/drivers/thermal/x86_pkg_temp_thermal.c new file mode 100644 index 000000000000..5de56f671a9d --- /dev/null +++ b/drivers/thermal/x86_pkg_temp_thermal.c | |||
@@ -0,0 +1,642 @@ | |||
1 | /* | ||
2 | * x86_pkg_temp_thermal driver | ||
3 | * Copyright (c) 2013, Intel Corporation. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify it | ||
6 | * under the terms and conditions of the GNU General Public License, | ||
7 | * version 2, as published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
12 | * more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License along with | ||
15 | * this program; if not, write to the Free Software Foundation, Inc. | ||
16 | * | ||
17 | */ | ||
18 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
19 | |||
20 | #include <linux/module.h> | ||
21 | #include <linux/init.h> | ||
22 | #include <linux/err.h> | ||
23 | #include <linux/param.h> | ||
24 | #include <linux/device.h> | ||
25 | #include <linux/platform_device.h> | ||
26 | #include <linux/cpu.h> | ||
27 | #include <linux/smp.h> | ||
28 | #include <linux/slab.h> | ||
29 | #include <linux/pm.h> | ||
30 | #include <linux/thermal.h> | ||
31 | #include <linux/debugfs.h> | ||
32 | #include <asm/cpu_device_id.h> | ||
33 | #include <asm/mce.h> | ||
34 | |||
35 | /* | ||
36 | * Rate control delay: Idea is to introduce denounce effect | ||
37 | * This should be long enough to avoid reduce events, when | ||
38 | * threshold is set to a temperature, which is constantly | ||
39 | * violated, but at the short enough to take any action. | ||
40 | * The action can be remove threshold or change it to next | ||
41 | * interesting setting. Based on experiments, in around | ||
42 | * every 5 seconds under load will give us a significant | ||
43 | * temperature change. | ||
44 | */ | ||
45 | #define PKG_TEMP_THERMAL_NOTIFY_DELAY 5000 | ||
46 | static int notify_delay_ms = PKG_TEMP_THERMAL_NOTIFY_DELAY; | ||
47 | module_param(notify_delay_ms, int, 0644); | ||
48 | MODULE_PARM_DESC(notify_delay_ms, | ||
49 | "User space notification delay in milli seconds."); | ||
50 | |||
51 | /* Number of trip points in thermal zone. Currently it can't | ||
52 | * be more than 2. MSR can allow setting and getting notifications | ||
53 | * for only 2 thresholds. This define enforces this, if there | ||
54 | * is some wrong values returned by cpuid for number of thresholds. | ||
55 | */ | ||
56 | #define MAX_NUMBER_OF_TRIPS 2 | ||
57 | |||
58 | struct phy_dev_entry { | ||
59 | struct list_head list; | ||
60 | u16 phys_proc_id; | ||
61 | u16 first_cpu; | ||
62 | u32 tj_max; | ||
63 | int ref_cnt; | ||
64 | u32 start_pkg_therm_low; | ||
65 | u32 start_pkg_therm_high; | ||
66 | struct thermal_zone_device *tzone; | ||
67 | }; | ||
68 | |||
69 | /* List maintaining number of package instances */ | ||
70 | static LIST_HEAD(phy_dev_list); | ||
71 | static DEFINE_MUTEX(phy_dev_list_mutex); | ||
72 | |||
73 | /* Interrupt to work function schedule queue */ | ||
74 | static DEFINE_PER_CPU(struct delayed_work, pkg_temp_thermal_threshold_work); | ||
75 | |||
76 | /* To track if the work is already scheduled on a package */ | ||
77 | static u8 *pkg_work_scheduled; | ||
78 | |||
79 | /* Spin lock to prevent races with pkg_work_scheduled */ | ||
80 | static spinlock_t pkg_work_lock; | ||
81 | static u16 max_phy_id; | ||
82 | |||
83 | /* Debug counters to show using debugfs */ | ||
84 | static struct dentry *debugfs; | ||
85 | static unsigned int pkg_interrupt_cnt; | ||
86 | static unsigned int pkg_work_cnt; | ||
87 | |||
88 | static int pkg_temp_debugfs_init(void) | ||
89 | { | ||
90 | struct dentry *d; | ||
91 | |||
92 | debugfs = debugfs_create_dir("pkg_temp_thermal", NULL); | ||
93 | if (!debugfs) | ||
94 | return -ENOENT; | ||
95 | |||
96 | d = debugfs_create_u32("pkg_thres_interrupt", S_IRUGO, debugfs, | ||
97 | (u32 *)&pkg_interrupt_cnt); | ||
98 | if (!d) | ||
99 | goto err_out; | ||
100 | |||
101 | d = debugfs_create_u32("pkg_thres_work", S_IRUGO, debugfs, | ||
102 | (u32 *)&pkg_work_cnt); | ||
103 | if (!d) | ||
104 | goto err_out; | ||
105 | |||
106 | return 0; | ||
107 | |||
108 | err_out: | ||
109 | debugfs_remove_recursive(debugfs); | ||
110 | return -ENOENT; | ||
111 | } | ||
112 | |||
113 | static struct phy_dev_entry | ||
114 | *pkg_temp_thermal_get_phy_entry(unsigned int cpu) | ||
115 | { | ||
116 | u16 phys_proc_id = topology_physical_package_id(cpu); | ||
117 | struct phy_dev_entry *phy_ptr; | ||
118 | |||
119 | mutex_lock(&phy_dev_list_mutex); | ||
120 | |||
121 | list_for_each_entry(phy_ptr, &phy_dev_list, list) | ||
122 | if (phy_ptr->phys_proc_id == phys_proc_id) { | ||
123 | mutex_unlock(&phy_dev_list_mutex); | ||
124 | return phy_ptr; | ||
125 | } | ||
126 | |||
127 | mutex_unlock(&phy_dev_list_mutex); | ||
128 | |||
129 | return NULL; | ||
130 | } | ||
131 | |||
132 | /* | ||
133 | * tj-max is is interesting because threshold is set relative to this | ||
134 | * temperature. | ||
135 | */ | ||
136 | static int get_tj_max(int cpu, u32 *tj_max) | ||
137 | { | ||
138 | u32 eax, edx; | ||
139 | u32 val; | ||
140 | int err; | ||
141 | |||
142 | err = rdmsr_safe_on_cpu(cpu, MSR_IA32_TEMPERATURE_TARGET, &eax, &edx); | ||
143 | if (err) | ||
144 | goto err_ret; | ||
145 | else { | ||
146 | val = (eax >> 16) & 0xff; | ||
147 | if (val) | ||
148 | *tj_max = val * 1000; | ||
149 | else { | ||
150 | err = -EINVAL; | ||
151 | goto err_ret; | ||
152 | } | ||
153 | } | ||
154 | |||
155 | return 0; | ||
156 | err_ret: | ||
157 | *tj_max = 0; | ||
158 | return err; | ||
159 | } | ||
160 | |||
161 | static int sys_get_curr_temp(struct thermal_zone_device *tzd, unsigned long *temp) | ||
162 | { | ||
163 | u32 eax, edx; | ||
164 | struct phy_dev_entry *phy_dev_entry; | ||
165 | |||
166 | phy_dev_entry = tzd->devdata; | ||
167 | rdmsr_on_cpu(phy_dev_entry->first_cpu, MSR_IA32_PACKAGE_THERM_STATUS, | ||
168 | &eax, &edx); | ||
169 | if (eax & 0x80000000) { | ||
170 | *temp = phy_dev_entry->tj_max - | ||
171 | ((eax >> 16) & 0x7f) * 1000; | ||
172 | pr_debug("sys_get_curr_temp %ld\n", *temp); | ||
173 | return 0; | ||
174 | } | ||
175 | |||
176 | return -EINVAL; | ||
177 | } | ||
178 | |||
179 | static int sys_get_trip_temp(struct thermal_zone_device *tzd, | ||
180 | int trip, unsigned long *temp) | ||
181 | { | ||
182 | u32 eax, edx; | ||
183 | struct phy_dev_entry *phy_dev_entry; | ||
184 | u32 mask, shift; | ||
185 | unsigned long thres_reg_value; | ||
186 | int ret; | ||
187 | |||
188 | if (trip >= MAX_NUMBER_OF_TRIPS) | ||
189 | return -EINVAL; | ||
190 | |||
191 | phy_dev_entry = tzd->devdata; | ||
192 | |||
193 | if (trip) { | ||
194 | mask = THERM_MASK_THRESHOLD1; | ||
195 | shift = THERM_SHIFT_THRESHOLD1; | ||
196 | } else { | ||
197 | mask = THERM_MASK_THRESHOLD0; | ||
198 | shift = THERM_SHIFT_THRESHOLD0; | ||
199 | } | ||
200 | |||
201 | ret = rdmsr_on_cpu(phy_dev_entry->first_cpu, | ||
202 | MSR_IA32_PACKAGE_THERM_INTERRUPT, &eax, &edx); | ||
203 | if (ret < 0) | ||
204 | return -EINVAL; | ||
205 | |||
206 | thres_reg_value = (eax & mask) >> shift; | ||
207 | if (thres_reg_value) | ||
208 | *temp = phy_dev_entry->tj_max - thres_reg_value * 1000; | ||
209 | else | ||
210 | *temp = 0; | ||
211 | pr_debug("sys_get_trip_temp %ld\n", *temp); | ||
212 | |||
213 | return 0; | ||
214 | } | ||
215 | |||
216 | int sys_set_trip_temp(struct thermal_zone_device *tzd, int trip, | ||
217 | unsigned long temp) | ||
218 | { | ||
219 | u32 l, h; | ||
220 | struct phy_dev_entry *phy_dev_entry; | ||
221 | u32 mask, shift, intr; | ||
222 | int ret; | ||
223 | |||
224 | phy_dev_entry = tzd->devdata; | ||
225 | |||
226 | if (trip >= MAX_NUMBER_OF_TRIPS || temp >= phy_dev_entry->tj_max) | ||
227 | return -EINVAL; | ||
228 | |||
229 | ret = rdmsr_on_cpu(phy_dev_entry->first_cpu, | ||
230 | MSR_IA32_PACKAGE_THERM_INTERRUPT, | ||
231 | &l, &h); | ||
232 | if (ret < 0) | ||
233 | return -EINVAL; | ||
234 | |||
235 | if (trip) { | ||
236 | mask = THERM_MASK_THRESHOLD1; | ||
237 | shift = THERM_SHIFT_THRESHOLD1; | ||
238 | intr = THERM_INT_THRESHOLD1_ENABLE; | ||
239 | } else { | ||
240 | mask = THERM_MASK_THRESHOLD0; | ||
241 | shift = THERM_SHIFT_THRESHOLD0; | ||
242 | intr = THERM_INT_THRESHOLD0_ENABLE; | ||
243 | } | ||
244 | l &= ~mask; | ||
245 | /* | ||
246 | * When users space sets a trip temperature == 0, which is indication | ||
247 | * that, it is no longer interested in receiving notifications. | ||
248 | */ | ||
249 | if (!temp) | ||
250 | l &= ~intr; | ||
251 | else { | ||
252 | l |= (phy_dev_entry->tj_max - temp)/1000 << shift; | ||
253 | l |= intr; | ||
254 | } | ||
255 | |||
256 | return wrmsr_on_cpu(phy_dev_entry->first_cpu, | ||
257 | MSR_IA32_PACKAGE_THERM_INTERRUPT, | ||
258 | l, h); | ||
259 | } | ||
260 | |||
261 | static int sys_get_trip_type(struct thermal_zone_device *thermal, | ||
262 | int trip, enum thermal_trip_type *type) | ||
263 | { | ||
264 | |||
265 | *type = THERMAL_TRIP_PASSIVE; | ||
266 | |||
267 | return 0; | ||
268 | } | ||
269 | |||
270 | /* Thermal zone callback registry */ | ||
271 | static struct thermal_zone_device_ops tzone_ops = { | ||
272 | .get_temp = sys_get_curr_temp, | ||
273 | .get_trip_temp = sys_get_trip_temp, | ||
274 | .get_trip_type = sys_get_trip_type, | ||
275 | .set_trip_temp = sys_set_trip_temp, | ||
276 | }; | ||
277 | |||
278 | static bool pkg_temp_thermal_platform_thermal_rate_control(void) | ||
279 | { | ||
280 | return true; | ||
281 | } | ||
282 | |||
283 | /* Enable threshold interrupt on local package/cpu */ | ||
284 | static inline void enable_pkg_thres_interrupt(void) | ||
285 | { | ||
286 | u32 l, h; | ||
287 | u8 thres_0, thres_1; | ||
288 | |||
289 | rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h); | ||
290 | /* only enable/disable if it had valid threshold value */ | ||
291 | thres_0 = (l & THERM_MASK_THRESHOLD0) >> THERM_SHIFT_THRESHOLD0; | ||
292 | thres_1 = (l & THERM_MASK_THRESHOLD1) >> THERM_SHIFT_THRESHOLD1; | ||
293 | if (thres_0) | ||
294 | l |= THERM_INT_THRESHOLD0_ENABLE; | ||
295 | if (thres_1) | ||
296 | l |= THERM_INT_THRESHOLD1_ENABLE; | ||
297 | wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h); | ||
298 | } | ||
299 | |||
300 | /* Disable threshold interrupt on local package/cpu */ | ||
301 | static inline void disable_pkg_thres_interrupt(void) | ||
302 | { | ||
303 | u32 l, h; | ||
304 | rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h); | ||
305 | wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, | ||
306 | l & (~THERM_INT_THRESHOLD0_ENABLE) & | ||
307 | (~THERM_INT_THRESHOLD1_ENABLE), h); | ||
308 | } | ||
309 | |||
310 | static void pkg_temp_thermal_threshold_work_fn(struct work_struct *work) | ||
311 | { | ||
312 | __u64 msr_val; | ||
313 | int cpu = smp_processor_id(); | ||
314 | int phy_id = topology_physical_package_id(cpu); | ||
315 | struct phy_dev_entry *phdev = pkg_temp_thermal_get_phy_entry(cpu); | ||
316 | bool notify = false; | ||
317 | |||
318 | if (!phdev) | ||
319 | return; | ||
320 | |||
321 | spin_lock(&pkg_work_lock); | ||
322 | ++pkg_work_cnt; | ||
323 | if (unlikely(phy_id > max_phy_id)) { | ||
324 | spin_unlock(&pkg_work_lock); | ||
325 | return; | ||
326 | } | ||
327 | pkg_work_scheduled[phy_id] = 0; | ||
328 | spin_unlock(&pkg_work_lock); | ||
329 | |||
330 | enable_pkg_thres_interrupt(); | ||
331 | rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val); | ||
332 | if (msr_val & THERM_LOG_THRESHOLD0) { | ||
333 | wrmsrl(MSR_IA32_PACKAGE_THERM_STATUS, | ||
334 | msr_val & ~THERM_LOG_THRESHOLD0); | ||
335 | notify = true; | ||
336 | } | ||
337 | if (msr_val & THERM_LOG_THRESHOLD1) { | ||
338 | wrmsrl(MSR_IA32_PACKAGE_THERM_STATUS, | ||
339 | msr_val & ~THERM_LOG_THRESHOLD1); | ||
340 | notify = true; | ||
341 | } | ||
342 | if (notify) { | ||
343 | pr_debug("thermal_zone_device_update\n"); | ||
344 | thermal_zone_device_update(phdev->tzone); | ||
345 | } | ||
346 | } | ||
347 | |||
348 | static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val) | ||
349 | { | ||
350 | unsigned long flags; | ||
351 | int cpu = smp_processor_id(); | ||
352 | int phy_id = topology_physical_package_id(cpu); | ||
353 | |||
354 | /* | ||
355 | * When a package is in interrupted state, all CPU's in that package | ||
356 | * are in the same interrupt state. So scheduling on any one CPU in | ||
357 | * the package is enough and simply return for others. | ||
358 | */ | ||
359 | spin_lock_irqsave(&pkg_work_lock, flags); | ||
360 | ++pkg_interrupt_cnt; | ||
361 | if (unlikely(phy_id > max_phy_id) || unlikely(!pkg_work_scheduled) || | ||
362 | pkg_work_scheduled[phy_id]) { | ||
363 | disable_pkg_thres_interrupt(); | ||
364 | spin_unlock_irqrestore(&pkg_work_lock, flags); | ||
365 | return -EINVAL; | ||
366 | } | ||
367 | pkg_work_scheduled[phy_id] = 1; | ||
368 | spin_unlock_irqrestore(&pkg_work_lock, flags); | ||
369 | |||
370 | disable_pkg_thres_interrupt(); | ||
371 | schedule_delayed_work_on(cpu, | ||
372 | &per_cpu(pkg_temp_thermal_threshold_work, cpu), | ||
373 | msecs_to_jiffies(notify_delay_ms)); | ||
374 | return 0; | ||
375 | } | ||
376 | |||
377 | static int find_siblings_cpu(int cpu) | ||
378 | { | ||
379 | int i; | ||
380 | int id = topology_physical_package_id(cpu); | ||
381 | |||
382 | for_each_online_cpu(i) | ||
383 | if (i != cpu && topology_physical_package_id(i) == id) | ||
384 | return i; | ||
385 | |||
386 | return 0; | ||
387 | } | ||
388 | |||
389 | static int pkg_temp_thermal_device_add(unsigned int cpu) | ||
390 | { | ||
391 | int err; | ||
392 | u32 tj_max; | ||
393 | struct phy_dev_entry *phy_dev_entry; | ||
394 | char buffer[30]; | ||
395 | int thres_count; | ||
396 | u32 eax, ebx, ecx, edx; | ||
397 | |||
398 | cpuid(6, &eax, &ebx, &ecx, &edx); | ||
399 | thres_count = ebx & 0x07; | ||
400 | if (!thres_count) | ||
401 | return -ENODEV; | ||
402 | |||
403 | thres_count = clamp_val(thres_count, 0, MAX_NUMBER_OF_TRIPS); | ||
404 | |||
405 | err = get_tj_max(cpu, &tj_max); | ||
406 | if (err) | ||
407 | goto err_ret; | ||
408 | |||
409 | mutex_lock(&phy_dev_list_mutex); | ||
410 | |||
411 | phy_dev_entry = kzalloc(sizeof(*phy_dev_entry), GFP_KERNEL); | ||
412 | if (!phy_dev_entry) { | ||
413 | err = -ENOMEM; | ||
414 | goto err_ret_unlock; | ||
415 | } | ||
416 | |||
417 | spin_lock(&pkg_work_lock); | ||
418 | if (topology_physical_package_id(cpu) > max_phy_id) | ||
419 | max_phy_id = topology_physical_package_id(cpu); | ||
420 | pkg_work_scheduled = krealloc(pkg_work_scheduled, | ||
421 | (max_phy_id+1) * sizeof(u8), GFP_ATOMIC); | ||
422 | if (!pkg_work_scheduled) { | ||
423 | spin_unlock(&pkg_work_lock); | ||
424 | err = -ENOMEM; | ||
425 | goto err_ret_free; | ||
426 | } | ||
427 | pkg_work_scheduled[topology_physical_package_id(cpu)] = 0; | ||
428 | spin_unlock(&pkg_work_lock); | ||
429 | |||
430 | phy_dev_entry->phys_proc_id = topology_physical_package_id(cpu); | ||
431 | phy_dev_entry->first_cpu = cpu; | ||
432 | phy_dev_entry->tj_max = tj_max; | ||
433 | phy_dev_entry->ref_cnt = 1; | ||
434 | snprintf(buffer, sizeof(buffer), "pkg-temp-%d\n", | ||
435 | phy_dev_entry->phys_proc_id); | ||
436 | phy_dev_entry->tzone = thermal_zone_device_register(buffer, | ||
437 | thres_count, | ||
438 | (thres_count == MAX_NUMBER_OF_TRIPS) ? | ||
439 | 0x03 : 0x01, | ||
440 | phy_dev_entry, &tzone_ops, NULL, 0, 0); | ||
441 | if (IS_ERR(phy_dev_entry->tzone)) { | ||
442 | err = PTR_ERR(phy_dev_entry->tzone); | ||
443 | goto err_ret_free; | ||
444 | } | ||
445 | /* Store MSR value for package thermal interrupt, to restore at exit */ | ||
446 | rdmsr_on_cpu(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, | ||
447 | &phy_dev_entry->start_pkg_therm_low, | ||
448 | &phy_dev_entry->start_pkg_therm_high); | ||
449 | |||
450 | list_add_tail(&phy_dev_entry->list, &phy_dev_list); | ||
451 | pr_debug("pkg_temp_thermal_device_add :phy_id %d cpu %d\n", | ||
452 | phy_dev_entry->phys_proc_id, cpu); | ||
453 | |||
454 | mutex_unlock(&phy_dev_list_mutex); | ||
455 | |||
456 | return 0; | ||
457 | |||
458 | err_ret_free: | ||
459 | kfree(phy_dev_entry); | ||
460 | err_ret_unlock: | ||
461 | mutex_unlock(&phy_dev_list_mutex); | ||
462 | |||
463 | err_ret: | ||
464 | return err; | ||
465 | } | ||
466 | |||
467 | static int pkg_temp_thermal_device_remove(unsigned int cpu) | ||
468 | { | ||
469 | struct phy_dev_entry *n; | ||
470 | u16 phys_proc_id = topology_physical_package_id(cpu); | ||
471 | struct phy_dev_entry *phdev = | ||
472 | pkg_temp_thermal_get_phy_entry(cpu); | ||
473 | |||
474 | if (!phdev) | ||
475 | return -ENODEV; | ||
476 | |||
477 | mutex_lock(&phy_dev_list_mutex); | ||
478 | /* If we are loosing the first cpu for this package, we need change */ | ||
479 | if (phdev->first_cpu == cpu) { | ||
480 | phdev->first_cpu = find_siblings_cpu(cpu); | ||
481 | pr_debug("thermal_device_remove: first cpu switched %d\n", | ||
482 | phdev->first_cpu); | ||
483 | } | ||
484 | /* | ||
485 | * It is possible that no siblings left as this was the last cpu | ||
486 | * going offline. We don't need to worry about this assignment | ||
487 | * as the phydev entry will be removed in this case and | ||
488 | * thermal zone is removed. | ||
489 | */ | ||
490 | --phdev->ref_cnt; | ||
491 | pr_debug("thermal_device_remove: pkg: %d cpu %d ref_cnt %d\n", | ||
492 | phys_proc_id, cpu, phdev->ref_cnt); | ||
493 | if (!phdev->ref_cnt) | ||
494 | list_for_each_entry_safe(phdev, n, &phy_dev_list, list) { | ||
495 | if (phdev->phys_proc_id == phys_proc_id) { | ||
496 | thermal_zone_device_unregister(phdev->tzone); | ||
497 | list_del(&phdev->list); | ||
498 | kfree(phdev); | ||
499 | break; | ||
500 | } | ||
501 | } | ||
502 | mutex_unlock(&phy_dev_list_mutex); | ||
503 | |||
504 | return 0; | ||
505 | } | ||
506 | |||
507 | static int get_core_online(unsigned int cpu) | ||
508 | { | ||
509 | struct cpuinfo_x86 *c = &cpu_data(cpu); | ||
510 | struct phy_dev_entry *phdev = pkg_temp_thermal_get_phy_entry(cpu); | ||
511 | |||
512 | /* Check if there is already an instance for this package */ | ||
513 | if (!phdev) { | ||
514 | if (!cpu_has(c, X86_FEATURE_DTHERM) && | ||
515 | !cpu_has(c, X86_FEATURE_PTS)) | ||
516 | return -ENODEV; | ||
517 | if (pkg_temp_thermal_device_add(cpu)) | ||
518 | return -ENODEV; | ||
519 | } else { | ||
520 | mutex_lock(&phy_dev_list_mutex); | ||
521 | ++phdev->ref_cnt; | ||
522 | pr_debug("get_core_online: cpu %d ref_cnt %d\n", | ||
523 | cpu, phdev->ref_cnt); | ||
524 | mutex_unlock(&phy_dev_list_mutex); | ||
525 | } | ||
526 | INIT_DELAYED_WORK(&per_cpu(pkg_temp_thermal_threshold_work, cpu), | ||
527 | pkg_temp_thermal_threshold_work_fn); | ||
528 | |||
529 | pr_debug("get_core_online: cpu %d successful\n", cpu); | ||
530 | |||
531 | return 0; | ||
532 | } | ||
533 | |||
534 | static void put_core_offline(unsigned int cpu) | ||
535 | { | ||
536 | if (!pkg_temp_thermal_device_remove(cpu)) | ||
537 | cancel_delayed_work_sync( | ||
538 | &per_cpu(pkg_temp_thermal_threshold_work, cpu)); | ||
539 | |||
540 | pr_debug("put_core_offline: cpu %d\n", cpu); | ||
541 | } | ||
542 | |||
543 | static int pkg_temp_thermal_cpu_callback(struct notifier_block *nfb, | ||
544 | unsigned long action, void *hcpu) | ||
545 | { | ||
546 | unsigned int cpu = (unsigned long) hcpu; | ||
547 | |||
548 | switch (action) { | ||
549 | case CPU_ONLINE: | ||
550 | case CPU_DOWN_FAILED: | ||
551 | get_core_online(cpu); | ||
552 | break; | ||
553 | case CPU_DOWN_PREPARE: | ||
554 | put_core_offline(cpu); | ||
555 | break; | ||
556 | } | ||
557 | return NOTIFY_OK; | ||
558 | } | ||
559 | |||
560 | static struct notifier_block pkg_temp_thermal_notifier __refdata = { | ||
561 | .notifier_call = pkg_temp_thermal_cpu_callback, | ||
562 | }; | ||
563 | |||
564 | static const struct x86_cpu_id __initconst pkg_temp_thermal_ids[] = { | ||
565 | { X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, X86_FEATURE_DTHERM }, | ||
566 | {} | ||
567 | }; | ||
568 | MODULE_DEVICE_TABLE(x86cpu, pkg_temp_thermal_ids); | ||
569 | |||
570 | static int __init pkg_temp_thermal_init(void) | ||
571 | { | ||
572 | int i; | ||
573 | |||
574 | if (!x86_match_cpu(pkg_temp_thermal_ids)) | ||
575 | return -ENODEV; | ||
576 | |||
577 | spin_lock_init(&pkg_work_lock); | ||
578 | platform_thermal_package_notify = | ||
579 | pkg_temp_thermal_platform_thermal_notify; | ||
580 | platform_thermal_package_rate_control = | ||
581 | pkg_temp_thermal_platform_thermal_rate_control; | ||
582 | |||
583 | get_online_cpus(); | ||
584 | for_each_online_cpu(i) | ||
585 | if (get_core_online(i)) | ||
586 | goto err_ret; | ||
587 | register_hotcpu_notifier(&pkg_temp_thermal_notifier); | ||
588 | put_online_cpus(); | ||
589 | |||
590 | pkg_temp_debugfs_init(); /* Don't care if fails */ | ||
591 | |||
592 | return 0; | ||
593 | |||
594 | err_ret: | ||
595 | get_online_cpus(); | ||
596 | for_each_online_cpu(i) | ||
597 | put_core_offline(i); | ||
598 | put_online_cpus(); | ||
599 | kfree(pkg_work_scheduled); | ||
600 | platform_thermal_package_notify = NULL; | ||
601 | platform_thermal_package_rate_control = NULL; | ||
602 | |||
603 | return -ENODEV; | ||
604 | } | ||
605 | |||
606 | static void __exit pkg_temp_thermal_exit(void) | ||
607 | { | ||
608 | struct phy_dev_entry *phdev, *n; | ||
609 | int i; | ||
610 | |||
611 | get_online_cpus(); | ||
612 | unregister_hotcpu_notifier(&pkg_temp_thermal_notifier); | ||
613 | mutex_lock(&phy_dev_list_mutex); | ||
614 | list_for_each_entry_safe(phdev, n, &phy_dev_list, list) { | ||
615 | /* Retore old MSR value for package thermal interrupt */ | ||
616 | wrmsr_on_cpu(phdev->first_cpu, | ||
617 | MSR_IA32_PACKAGE_THERM_INTERRUPT, | ||
618 | phdev->start_pkg_therm_low, | ||
619 | phdev->start_pkg_therm_high); | ||
620 | thermal_zone_device_unregister(phdev->tzone); | ||
621 | list_del(&phdev->list); | ||
622 | kfree(phdev); | ||
623 | } | ||
624 | mutex_unlock(&phy_dev_list_mutex); | ||
625 | platform_thermal_package_notify = NULL; | ||
626 | platform_thermal_package_rate_control = NULL; | ||
627 | for_each_online_cpu(i) | ||
628 | cancel_delayed_work_sync( | ||
629 | &per_cpu(pkg_temp_thermal_threshold_work, i)); | ||
630 | put_online_cpus(); | ||
631 | |||
632 | kfree(pkg_work_scheduled); | ||
633 | |||
634 | debugfs_remove_recursive(debugfs); | ||
635 | } | ||
636 | |||
637 | module_init(pkg_temp_thermal_init) | ||
638 | module_exit(pkg_temp_thermal_exit) | ||
639 | |||
640 | MODULE_DESCRIPTION("X86 PKG TEMP Thermal Driver"); | ||
641 | MODULE_AUTHOR("Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>"); | ||
642 | MODULE_LICENSE("GPL v2"); | ||