diff options
author | Zhang Rui <rui.zhang@intel.com> | 2013-06-17 18:31:26 -0400 |
---|---|---|
committer | Zhang Rui <rui.zhang@intel.com> | 2013-06-17 18:31:26 -0400 |
commit | f157f5964bdc604d8cbf0b71780a0d47ea9f7371 (patch) | |
tree | 8667f1068b026c640cae1a6cb229ca4944c8c3c7 /drivers/thermal | |
parent | 30072fb91e1447b00fa148500c49010265b530c6 (diff) | |
parent | 23be63f48d928cd5a21db58f73c731357e895250 (diff) |
Merge branch 'cpu-package-thermal' of .git into next
Conflicts:
drivers/thermal/Kconfig
drivers/thermal/Makefile
Diffstat (limited to 'drivers/thermal')
-rw-r--r-- | drivers/thermal/Kconfig | 13 | ||||
-rw-r--r-- | drivers/thermal/Makefile | 1 | ||||
-rw-r--r-- | drivers/thermal/x86_pkg_temp_thermal.c | 642 |
3 files changed, 656 insertions, 0 deletions
diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig index 7205c70a46a3..b13c2bcccb72 100644 --- a/drivers/thermal/Kconfig +++ b/drivers/thermal/Kconfig | |||
@@ -169,7 +169,20 @@ config INTEL_POWERCLAMP | |||
169 | enforce idle time which results in more package C-state residency. The | 169 | enforce idle time which results in more package C-state residency. The |
170 | user interface is exposed via generic thermal framework. | 170 | user interface is exposed via generic thermal framework. |
171 | 171 | ||
172 | config X86_PKG_TEMP_THERMAL | ||
173 | tristate "X86 package temperature thermal driver" | ||
174 | depends on THERMAL | ||
175 | depends on X86 | ||
176 | select THERMAL_GOV_USER_SPACE | ||
177 | default m | ||
178 | help | ||
179 | Enable this to register CPU digital sensor for package temperature as | ||
180 | thermal zone. Each package will have its own thermal zone. There are | ||
181 | two trip points which can be set by user to get notifications via thermal | ||
182 | notification methods. | ||
183 | |||
172 | menu "Texas Instruments thermal drivers" | 184 | menu "Texas Instruments thermal drivers" |
173 | source "drivers/thermal/ti-soc-thermal/Kconfig" | 185 | source "drivers/thermal/ti-soc-thermal/Kconfig" |
174 | endmenu | 186 | endmenu |
187 | |||
175 | endif | 188 | endif |
diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile index 85693941fda0..67184a293e3f 100644 --- a/drivers/thermal/Makefile +++ b/drivers/thermal/Makefile | |||
@@ -23,4 +23,5 @@ obj-$(CONFIG_DB8500_THERMAL) += db8500_thermal.o | |||
23 | obj-$(CONFIG_ARMADA_THERMAL) += armada_thermal.o | 23 | obj-$(CONFIG_ARMADA_THERMAL) += armada_thermal.o |
24 | obj-$(CONFIG_DB8500_CPUFREQ_COOLING) += db8500_cpufreq_cooling.o | 24 | obj-$(CONFIG_DB8500_CPUFREQ_COOLING) += db8500_cpufreq_cooling.o |
25 | obj-$(CONFIG_INTEL_POWERCLAMP) += intel_powerclamp.o | 25 | obj-$(CONFIG_INTEL_POWERCLAMP) += intel_powerclamp.o |
26 | obj-$(CONFIG_X86_PKG_TEMP_THERMAL) += x86_pkg_temp_thermal.o | ||
26 | obj-$(CONFIG_TI_SOC_THERMAL) += ti-soc-thermal/ | 27 | obj-$(CONFIG_TI_SOC_THERMAL) += ti-soc-thermal/ |
diff --git a/drivers/thermal/x86_pkg_temp_thermal.c b/drivers/thermal/x86_pkg_temp_thermal.c new file mode 100644 index 000000000000..5de56f671a9d --- /dev/null +++ b/drivers/thermal/x86_pkg_temp_thermal.c | |||
@@ -0,0 +1,642 @@ | |||
1 | /* | ||
2 | * x86_pkg_temp_thermal driver | ||
3 | * Copyright (c) 2013, Intel Corporation. | ||
4 | * | ||
5 | * This program is free software; you can redistribute it and/or modify it | ||
6 | * under the terms and conditions of the GNU General Public License, | ||
7 | * version 2, as published by the Free Software Foundation. | ||
8 | * | ||
9 | * This program is distributed in the hope it will be useful, but WITHOUT | ||
10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or | ||
11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for | ||
12 | * more details. | ||
13 | * | ||
14 | * You should have received a copy of the GNU General Public License along with | ||
15 | * this program; if not, write to the Free Software Foundation, Inc. | ||
16 | * | ||
17 | */ | ||
18 | #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt | ||
19 | |||
20 | #include <linux/module.h> | ||
21 | #include <linux/init.h> | ||
22 | #include <linux/err.h> | ||
23 | #include <linux/param.h> | ||
24 | #include <linux/device.h> | ||
25 | #include <linux/platform_device.h> | ||
26 | #include <linux/cpu.h> | ||
27 | #include <linux/smp.h> | ||
28 | #include <linux/slab.h> | ||
29 | #include <linux/pm.h> | ||
30 | #include <linux/thermal.h> | ||
31 | #include <linux/debugfs.h> | ||
32 | #include <asm/cpu_device_id.h> | ||
33 | #include <asm/mce.h> | ||
34 | |||
35 | /* | ||
36 | * Rate control delay: Idea is to introduce denounce effect | ||
37 | * This should be long enough to avoid reduce events, when | ||
38 | * threshold is set to a temperature, which is constantly | ||
39 | * violated, but at the short enough to take any action. | ||
40 | * The action can be remove threshold or change it to next | ||
41 | * interesting setting. Based on experiments, in around | ||
42 | * every 5 seconds under load will give us a significant | ||
43 | * temperature change. | ||
44 | */ | ||
45 | #define PKG_TEMP_THERMAL_NOTIFY_DELAY 5000 | ||
46 | static int notify_delay_ms = PKG_TEMP_THERMAL_NOTIFY_DELAY; | ||
47 | module_param(notify_delay_ms, int, 0644); | ||
48 | MODULE_PARM_DESC(notify_delay_ms, | ||
49 | "User space notification delay in milli seconds."); | ||
50 | |||
51 | /* Number of trip points in thermal zone. Currently it can't | ||
52 | * be more than 2. MSR can allow setting and getting notifications | ||
53 | * for only 2 thresholds. This define enforces this, if there | ||
54 | * is some wrong values returned by cpuid for number of thresholds. | ||
55 | */ | ||
56 | #define MAX_NUMBER_OF_TRIPS 2 | ||
57 | |||
58 | struct phy_dev_entry { | ||
59 | struct list_head list; | ||
60 | u16 phys_proc_id; | ||
61 | u16 first_cpu; | ||
62 | u32 tj_max; | ||
63 | int ref_cnt; | ||
64 | u32 start_pkg_therm_low; | ||
65 | u32 start_pkg_therm_high; | ||
66 | struct thermal_zone_device *tzone; | ||
67 | }; | ||
68 | |||
69 | /* List maintaining number of package instances */ | ||
70 | static LIST_HEAD(phy_dev_list); | ||
71 | static DEFINE_MUTEX(phy_dev_list_mutex); | ||
72 | |||
73 | /* Interrupt to work function schedule queue */ | ||
74 | static DEFINE_PER_CPU(struct delayed_work, pkg_temp_thermal_threshold_work); | ||
75 | |||
76 | /* To track if the work is already scheduled on a package */ | ||
77 | static u8 *pkg_work_scheduled; | ||
78 | |||
79 | /* Spin lock to prevent races with pkg_work_scheduled */ | ||
80 | static spinlock_t pkg_work_lock; | ||
81 | static u16 max_phy_id; | ||
82 | |||
83 | /* Debug counters to show using debugfs */ | ||
84 | static struct dentry *debugfs; | ||
85 | static unsigned int pkg_interrupt_cnt; | ||
86 | static unsigned int pkg_work_cnt; | ||
87 | |||
88 | static int pkg_temp_debugfs_init(void) | ||
89 | { | ||
90 | struct dentry *d; | ||
91 | |||
92 | debugfs = debugfs_create_dir("pkg_temp_thermal", NULL); | ||
93 | if (!debugfs) | ||
94 | return -ENOENT; | ||
95 | |||
96 | d = debugfs_create_u32("pkg_thres_interrupt", S_IRUGO, debugfs, | ||
97 | (u32 *)&pkg_interrupt_cnt); | ||
98 | if (!d) | ||
99 | goto err_out; | ||
100 | |||
101 | d = debugfs_create_u32("pkg_thres_work", S_IRUGO, debugfs, | ||
102 | (u32 *)&pkg_work_cnt); | ||
103 | if (!d) | ||
104 | goto err_out; | ||
105 | |||
106 | return 0; | ||
107 | |||
108 | err_out: | ||
109 | debugfs_remove_recursive(debugfs); | ||
110 | return -ENOENT; | ||
111 | } | ||
112 | |||
113 | static struct phy_dev_entry | ||
114 | *pkg_temp_thermal_get_phy_entry(unsigned int cpu) | ||
115 | { | ||
116 | u16 phys_proc_id = topology_physical_package_id(cpu); | ||
117 | struct phy_dev_entry *phy_ptr; | ||
118 | |||
119 | mutex_lock(&phy_dev_list_mutex); | ||
120 | |||
121 | list_for_each_entry(phy_ptr, &phy_dev_list, list) | ||
122 | if (phy_ptr->phys_proc_id == phys_proc_id) { | ||
123 | mutex_unlock(&phy_dev_list_mutex); | ||
124 | return phy_ptr; | ||
125 | } | ||
126 | |||
127 | mutex_unlock(&phy_dev_list_mutex); | ||
128 | |||
129 | return NULL; | ||
130 | } | ||
131 | |||
132 | /* | ||
133 | * tj-max is is interesting because threshold is set relative to this | ||
134 | * temperature. | ||
135 | */ | ||
136 | static int get_tj_max(int cpu, u32 *tj_max) | ||
137 | { | ||
138 | u32 eax, edx; | ||
139 | u32 val; | ||
140 | int err; | ||
141 | |||
142 | err = rdmsr_safe_on_cpu(cpu, MSR_IA32_TEMPERATURE_TARGET, &eax, &edx); | ||
143 | if (err) | ||
144 | goto err_ret; | ||
145 | else { | ||
146 | val = (eax >> 16) & 0xff; | ||
147 | if (val) | ||
148 | *tj_max = val * 1000; | ||
149 | else { | ||
150 | err = -EINVAL; | ||
151 | goto err_ret; | ||
152 | } | ||
153 | } | ||
154 | |||
155 | return 0; | ||
156 | err_ret: | ||
157 | *tj_max = 0; | ||
158 | return err; | ||
159 | } | ||
160 | |||
161 | static int sys_get_curr_temp(struct thermal_zone_device *tzd, unsigned long *temp) | ||
162 | { | ||
163 | u32 eax, edx; | ||
164 | struct phy_dev_entry *phy_dev_entry; | ||
165 | |||
166 | phy_dev_entry = tzd->devdata; | ||
167 | rdmsr_on_cpu(phy_dev_entry->first_cpu, MSR_IA32_PACKAGE_THERM_STATUS, | ||
168 | &eax, &edx); | ||
169 | if (eax & 0x80000000) { | ||
170 | *temp = phy_dev_entry->tj_max - | ||
171 | ((eax >> 16) & 0x7f) * 1000; | ||
172 | pr_debug("sys_get_curr_temp %ld\n", *temp); | ||
173 | return 0; | ||
174 | } | ||
175 | |||
176 | return -EINVAL; | ||
177 | } | ||
178 | |||
179 | static int sys_get_trip_temp(struct thermal_zone_device *tzd, | ||
180 | int trip, unsigned long *temp) | ||
181 | { | ||
182 | u32 eax, edx; | ||
183 | struct phy_dev_entry *phy_dev_entry; | ||
184 | u32 mask, shift; | ||
185 | unsigned long thres_reg_value; | ||
186 | int ret; | ||
187 | |||
188 | if (trip >= MAX_NUMBER_OF_TRIPS) | ||
189 | return -EINVAL; | ||
190 | |||
191 | phy_dev_entry = tzd->devdata; | ||
192 | |||
193 | if (trip) { | ||
194 | mask = THERM_MASK_THRESHOLD1; | ||
195 | shift = THERM_SHIFT_THRESHOLD1; | ||
196 | } else { | ||
197 | mask = THERM_MASK_THRESHOLD0; | ||
198 | shift = THERM_SHIFT_THRESHOLD0; | ||
199 | } | ||
200 | |||
201 | ret = rdmsr_on_cpu(phy_dev_entry->first_cpu, | ||
202 | MSR_IA32_PACKAGE_THERM_INTERRUPT, &eax, &edx); | ||
203 | if (ret < 0) | ||
204 | return -EINVAL; | ||
205 | |||
206 | thres_reg_value = (eax & mask) >> shift; | ||
207 | if (thres_reg_value) | ||
208 | *temp = phy_dev_entry->tj_max - thres_reg_value * 1000; | ||
209 | else | ||
210 | *temp = 0; | ||
211 | pr_debug("sys_get_trip_temp %ld\n", *temp); | ||
212 | |||
213 | return 0; | ||
214 | } | ||
215 | |||
216 | int sys_set_trip_temp(struct thermal_zone_device *tzd, int trip, | ||
217 | unsigned long temp) | ||
218 | { | ||
219 | u32 l, h; | ||
220 | struct phy_dev_entry *phy_dev_entry; | ||
221 | u32 mask, shift, intr; | ||
222 | int ret; | ||
223 | |||
224 | phy_dev_entry = tzd->devdata; | ||
225 | |||
226 | if (trip >= MAX_NUMBER_OF_TRIPS || temp >= phy_dev_entry->tj_max) | ||
227 | return -EINVAL; | ||
228 | |||
229 | ret = rdmsr_on_cpu(phy_dev_entry->first_cpu, | ||
230 | MSR_IA32_PACKAGE_THERM_INTERRUPT, | ||
231 | &l, &h); | ||
232 | if (ret < 0) | ||
233 | return -EINVAL; | ||
234 | |||
235 | if (trip) { | ||
236 | mask = THERM_MASK_THRESHOLD1; | ||
237 | shift = THERM_SHIFT_THRESHOLD1; | ||
238 | intr = THERM_INT_THRESHOLD1_ENABLE; | ||
239 | } else { | ||
240 | mask = THERM_MASK_THRESHOLD0; | ||
241 | shift = THERM_SHIFT_THRESHOLD0; | ||
242 | intr = THERM_INT_THRESHOLD0_ENABLE; | ||
243 | } | ||
244 | l &= ~mask; | ||
245 | /* | ||
246 | * When users space sets a trip temperature == 0, which is indication | ||
247 | * that, it is no longer interested in receiving notifications. | ||
248 | */ | ||
249 | if (!temp) | ||
250 | l &= ~intr; | ||
251 | else { | ||
252 | l |= (phy_dev_entry->tj_max - temp)/1000 << shift; | ||
253 | l |= intr; | ||
254 | } | ||
255 | |||
256 | return wrmsr_on_cpu(phy_dev_entry->first_cpu, | ||
257 | MSR_IA32_PACKAGE_THERM_INTERRUPT, | ||
258 | l, h); | ||
259 | } | ||
260 | |||
261 | static int sys_get_trip_type(struct thermal_zone_device *thermal, | ||
262 | int trip, enum thermal_trip_type *type) | ||
263 | { | ||
264 | |||
265 | *type = THERMAL_TRIP_PASSIVE; | ||
266 | |||
267 | return 0; | ||
268 | } | ||
269 | |||
270 | /* Thermal zone callback registry */ | ||
271 | static struct thermal_zone_device_ops tzone_ops = { | ||
272 | .get_temp = sys_get_curr_temp, | ||
273 | .get_trip_temp = sys_get_trip_temp, | ||
274 | .get_trip_type = sys_get_trip_type, | ||
275 | .set_trip_temp = sys_set_trip_temp, | ||
276 | }; | ||
277 | |||
278 | static bool pkg_temp_thermal_platform_thermal_rate_control(void) | ||
279 | { | ||
280 | return true; | ||
281 | } | ||
282 | |||
283 | /* Enable threshold interrupt on local package/cpu */ | ||
284 | static inline void enable_pkg_thres_interrupt(void) | ||
285 | { | ||
286 | u32 l, h; | ||
287 | u8 thres_0, thres_1; | ||
288 | |||
289 | rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h); | ||
290 | /* only enable/disable if it had valid threshold value */ | ||
291 | thres_0 = (l & THERM_MASK_THRESHOLD0) >> THERM_SHIFT_THRESHOLD0; | ||
292 | thres_1 = (l & THERM_MASK_THRESHOLD1) >> THERM_SHIFT_THRESHOLD1; | ||
293 | if (thres_0) | ||
294 | l |= THERM_INT_THRESHOLD0_ENABLE; | ||
295 | if (thres_1) | ||
296 | l |= THERM_INT_THRESHOLD1_ENABLE; | ||
297 | wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h); | ||
298 | } | ||
299 | |||
300 | /* Disable threshold interrupt on local package/cpu */ | ||
301 | static inline void disable_pkg_thres_interrupt(void) | ||
302 | { | ||
303 | u32 l, h; | ||
304 | rdmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h); | ||
305 | wrmsr(MSR_IA32_PACKAGE_THERM_INTERRUPT, | ||
306 | l & (~THERM_INT_THRESHOLD0_ENABLE) & | ||
307 | (~THERM_INT_THRESHOLD1_ENABLE), h); | ||
308 | } | ||
309 | |||
310 | static void pkg_temp_thermal_threshold_work_fn(struct work_struct *work) | ||
311 | { | ||
312 | __u64 msr_val; | ||
313 | int cpu = smp_processor_id(); | ||
314 | int phy_id = topology_physical_package_id(cpu); | ||
315 | struct phy_dev_entry *phdev = pkg_temp_thermal_get_phy_entry(cpu); | ||
316 | bool notify = false; | ||
317 | |||
318 | if (!phdev) | ||
319 | return; | ||
320 | |||
321 | spin_lock(&pkg_work_lock); | ||
322 | ++pkg_work_cnt; | ||
323 | if (unlikely(phy_id > max_phy_id)) { | ||
324 | spin_unlock(&pkg_work_lock); | ||
325 | return; | ||
326 | } | ||
327 | pkg_work_scheduled[phy_id] = 0; | ||
328 | spin_unlock(&pkg_work_lock); | ||
329 | |||
330 | enable_pkg_thres_interrupt(); | ||
331 | rdmsrl(MSR_IA32_PACKAGE_THERM_STATUS, msr_val); | ||
332 | if (msr_val & THERM_LOG_THRESHOLD0) { | ||
333 | wrmsrl(MSR_IA32_PACKAGE_THERM_STATUS, | ||
334 | msr_val & ~THERM_LOG_THRESHOLD0); | ||
335 | notify = true; | ||
336 | } | ||
337 | if (msr_val & THERM_LOG_THRESHOLD1) { | ||
338 | wrmsrl(MSR_IA32_PACKAGE_THERM_STATUS, | ||
339 | msr_val & ~THERM_LOG_THRESHOLD1); | ||
340 | notify = true; | ||
341 | } | ||
342 | if (notify) { | ||
343 | pr_debug("thermal_zone_device_update\n"); | ||
344 | thermal_zone_device_update(phdev->tzone); | ||
345 | } | ||
346 | } | ||
347 | |||
348 | static int pkg_temp_thermal_platform_thermal_notify(__u64 msr_val) | ||
349 | { | ||
350 | unsigned long flags; | ||
351 | int cpu = smp_processor_id(); | ||
352 | int phy_id = topology_physical_package_id(cpu); | ||
353 | |||
354 | /* | ||
355 | * When a package is in interrupted state, all CPU's in that package | ||
356 | * are in the same interrupt state. So scheduling on any one CPU in | ||
357 | * the package is enough and simply return for others. | ||
358 | */ | ||
359 | spin_lock_irqsave(&pkg_work_lock, flags); | ||
360 | ++pkg_interrupt_cnt; | ||
361 | if (unlikely(phy_id > max_phy_id) || unlikely(!pkg_work_scheduled) || | ||
362 | pkg_work_scheduled[phy_id]) { | ||
363 | disable_pkg_thres_interrupt(); | ||
364 | spin_unlock_irqrestore(&pkg_work_lock, flags); | ||
365 | return -EINVAL; | ||
366 | } | ||
367 | pkg_work_scheduled[phy_id] = 1; | ||
368 | spin_unlock_irqrestore(&pkg_work_lock, flags); | ||
369 | |||
370 | disable_pkg_thres_interrupt(); | ||
371 | schedule_delayed_work_on(cpu, | ||
372 | &per_cpu(pkg_temp_thermal_threshold_work, cpu), | ||
373 | msecs_to_jiffies(notify_delay_ms)); | ||
374 | return 0; | ||
375 | } | ||
376 | |||
377 | static int find_siblings_cpu(int cpu) | ||
378 | { | ||
379 | int i; | ||
380 | int id = topology_physical_package_id(cpu); | ||
381 | |||
382 | for_each_online_cpu(i) | ||
383 | if (i != cpu && topology_physical_package_id(i) == id) | ||
384 | return i; | ||
385 | |||
386 | return 0; | ||
387 | } | ||
388 | |||
389 | static int pkg_temp_thermal_device_add(unsigned int cpu) | ||
390 | { | ||
391 | int err; | ||
392 | u32 tj_max; | ||
393 | struct phy_dev_entry *phy_dev_entry; | ||
394 | char buffer[30]; | ||
395 | int thres_count; | ||
396 | u32 eax, ebx, ecx, edx; | ||
397 | |||
398 | cpuid(6, &eax, &ebx, &ecx, &edx); | ||
399 | thres_count = ebx & 0x07; | ||
400 | if (!thres_count) | ||
401 | return -ENODEV; | ||
402 | |||
403 | thres_count = clamp_val(thres_count, 0, MAX_NUMBER_OF_TRIPS); | ||
404 | |||
405 | err = get_tj_max(cpu, &tj_max); | ||
406 | if (err) | ||
407 | goto err_ret; | ||
408 | |||
409 | mutex_lock(&phy_dev_list_mutex); | ||
410 | |||
411 | phy_dev_entry = kzalloc(sizeof(*phy_dev_entry), GFP_KERNEL); | ||
412 | if (!phy_dev_entry) { | ||
413 | err = -ENOMEM; | ||
414 | goto err_ret_unlock; | ||
415 | } | ||
416 | |||
417 | spin_lock(&pkg_work_lock); | ||
418 | if (topology_physical_package_id(cpu) > max_phy_id) | ||
419 | max_phy_id = topology_physical_package_id(cpu); | ||
420 | pkg_work_scheduled = krealloc(pkg_work_scheduled, | ||
421 | (max_phy_id+1) * sizeof(u8), GFP_ATOMIC); | ||
422 | if (!pkg_work_scheduled) { | ||
423 | spin_unlock(&pkg_work_lock); | ||
424 | err = -ENOMEM; | ||
425 | goto err_ret_free; | ||
426 | } | ||
427 | pkg_work_scheduled[topology_physical_package_id(cpu)] = 0; | ||
428 | spin_unlock(&pkg_work_lock); | ||
429 | |||
430 | phy_dev_entry->phys_proc_id = topology_physical_package_id(cpu); | ||
431 | phy_dev_entry->first_cpu = cpu; | ||
432 | phy_dev_entry->tj_max = tj_max; | ||
433 | phy_dev_entry->ref_cnt = 1; | ||
434 | snprintf(buffer, sizeof(buffer), "pkg-temp-%d\n", | ||
435 | phy_dev_entry->phys_proc_id); | ||
436 | phy_dev_entry->tzone = thermal_zone_device_register(buffer, | ||
437 | thres_count, | ||
438 | (thres_count == MAX_NUMBER_OF_TRIPS) ? | ||
439 | 0x03 : 0x01, | ||
440 | phy_dev_entry, &tzone_ops, NULL, 0, 0); | ||
441 | if (IS_ERR(phy_dev_entry->tzone)) { | ||
442 | err = PTR_ERR(phy_dev_entry->tzone); | ||
443 | goto err_ret_free; | ||
444 | } | ||
445 | /* Store MSR value for package thermal interrupt, to restore at exit */ | ||
446 | rdmsr_on_cpu(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, | ||
447 | &phy_dev_entry->start_pkg_therm_low, | ||
448 | &phy_dev_entry->start_pkg_therm_high); | ||
449 | |||
450 | list_add_tail(&phy_dev_entry->list, &phy_dev_list); | ||
451 | pr_debug("pkg_temp_thermal_device_add :phy_id %d cpu %d\n", | ||
452 | phy_dev_entry->phys_proc_id, cpu); | ||
453 | |||
454 | mutex_unlock(&phy_dev_list_mutex); | ||
455 | |||
456 | return 0; | ||
457 | |||
458 | err_ret_free: | ||
459 | kfree(phy_dev_entry); | ||
460 | err_ret_unlock: | ||
461 | mutex_unlock(&phy_dev_list_mutex); | ||
462 | |||
463 | err_ret: | ||
464 | return err; | ||
465 | } | ||
466 | |||
467 | static int pkg_temp_thermal_device_remove(unsigned int cpu) | ||
468 | { | ||
469 | struct phy_dev_entry *n; | ||
470 | u16 phys_proc_id = topology_physical_package_id(cpu); | ||
471 | struct phy_dev_entry *phdev = | ||
472 | pkg_temp_thermal_get_phy_entry(cpu); | ||
473 | |||
474 | if (!phdev) | ||
475 | return -ENODEV; | ||
476 | |||
477 | mutex_lock(&phy_dev_list_mutex); | ||
478 | /* If we are loosing the first cpu for this package, we need change */ | ||
479 | if (phdev->first_cpu == cpu) { | ||
480 | phdev->first_cpu = find_siblings_cpu(cpu); | ||
481 | pr_debug("thermal_device_remove: first cpu switched %d\n", | ||
482 | phdev->first_cpu); | ||
483 | } | ||
484 | /* | ||
485 | * It is possible that no siblings left as this was the last cpu | ||
486 | * going offline. We don't need to worry about this assignment | ||
487 | * as the phydev entry will be removed in this case and | ||
488 | * thermal zone is removed. | ||
489 | */ | ||
490 | --phdev->ref_cnt; | ||
491 | pr_debug("thermal_device_remove: pkg: %d cpu %d ref_cnt %d\n", | ||
492 | phys_proc_id, cpu, phdev->ref_cnt); | ||
493 | if (!phdev->ref_cnt) | ||
494 | list_for_each_entry_safe(phdev, n, &phy_dev_list, list) { | ||
495 | if (phdev->phys_proc_id == phys_proc_id) { | ||
496 | thermal_zone_device_unregister(phdev->tzone); | ||
497 | list_del(&phdev->list); | ||
498 | kfree(phdev); | ||
499 | break; | ||
500 | } | ||
501 | } | ||
502 | mutex_unlock(&phy_dev_list_mutex); | ||
503 | |||
504 | return 0; | ||
505 | } | ||
506 | |||
507 | static int get_core_online(unsigned int cpu) | ||
508 | { | ||
509 | struct cpuinfo_x86 *c = &cpu_data(cpu); | ||
510 | struct phy_dev_entry *phdev = pkg_temp_thermal_get_phy_entry(cpu); | ||
511 | |||
512 | /* Check if there is already an instance for this package */ | ||
513 | if (!phdev) { | ||
514 | if (!cpu_has(c, X86_FEATURE_DTHERM) && | ||
515 | !cpu_has(c, X86_FEATURE_PTS)) | ||
516 | return -ENODEV; | ||
517 | if (pkg_temp_thermal_device_add(cpu)) | ||
518 | return -ENODEV; | ||
519 | } else { | ||
520 | mutex_lock(&phy_dev_list_mutex); | ||
521 | ++phdev->ref_cnt; | ||
522 | pr_debug("get_core_online: cpu %d ref_cnt %d\n", | ||
523 | cpu, phdev->ref_cnt); | ||
524 | mutex_unlock(&phy_dev_list_mutex); | ||
525 | } | ||
526 | INIT_DELAYED_WORK(&per_cpu(pkg_temp_thermal_threshold_work, cpu), | ||
527 | pkg_temp_thermal_threshold_work_fn); | ||
528 | |||
529 | pr_debug("get_core_online: cpu %d successful\n", cpu); | ||
530 | |||
531 | return 0; | ||
532 | } | ||
533 | |||
534 | static void put_core_offline(unsigned int cpu) | ||
535 | { | ||
536 | if (!pkg_temp_thermal_device_remove(cpu)) | ||
537 | cancel_delayed_work_sync( | ||
538 | &per_cpu(pkg_temp_thermal_threshold_work, cpu)); | ||
539 | |||
540 | pr_debug("put_core_offline: cpu %d\n", cpu); | ||
541 | } | ||
542 | |||
543 | static int pkg_temp_thermal_cpu_callback(struct notifier_block *nfb, | ||
544 | unsigned long action, void *hcpu) | ||
545 | { | ||
546 | unsigned int cpu = (unsigned long) hcpu; | ||
547 | |||
548 | switch (action) { | ||
549 | case CPU_ONLINE: | ||
550 | case CPU_DOWN_FAILED: | ||
551 | get_core_online(cpu); | ||
552 | break; | ||
553 | case CPU_DOWN_PREPARE: | ||
554 | put_core_offline(cpu); | ||
555 | break; | ||
556 | } | ||
557 | return NOTIFY_OK; | ||
558 | } | ||
559 | |||
560 | static struct notifier_block pkg_temp_thermal_notifier __refdata = { | ||
561 | .notifier_call = pkg_temp_thermal_cpu_callback, | ||
562 | }; | ||
563 | |||
564 | static const struct x86_cpu_id __initconst pkg_temp_thermal_ids[] = { | ||
565 | { X86_VENDOR_INTEL, X86_FAMILY_ANY, X86_MODEL_ANY, X86_FEATURE_DTHERM }, | ||
566 | {} | ||
567 | }; | ||
568 | MODULE_DEVICE_TABLE(x86cpu, pkg_temp_thermal_ids); | ||
569 | |||
570 | static int __init pkg_temp_thermal_init(void) | ||
571 | { | ||
572 | int i; | ||
573 | |||
574 | if (!x86_match_cpu(pkg_temp_thermal_ids)) | ||
575 | return -ENODEV; | ||
576 | |||
577 | spin_lock_init(&pkg_work_lock); | ||
578 | platform_thermal_package_notify = | ||
579 | pkg_temp_thermal_platform_thermal_notify; | ||
580 | platform_thermal_package_rate_control = | ||
581 | pkg_temp_thermal_platform_thermal_rate_control; | ||
582 | |||
583 | get_online_cpus(); | ||
584 | for_each_online_cpu(i) | ||
585 | if (get_core_online(i)) | ||
586 | goto err_ret; | ||
587 | register_hotcpu_notifier(&pkg_temp_thermal_notifier); | ||
588 | put_online_cpus(); | ||
589 | |||
590 | pkg_temp_debugfs_init(); /* Don't care if fails */ | ||
591 | |||
592 | return 0; | ||
593 | |||
594 | err_ret: | ||
595 | get_online_cpus(); | ||
596 | for_each_online_cpu(i) | ||
597 | put_core_offline(i); | ||
598 | put_online_cpus(); | ||
599 | kfree(pkg_work_scheduled); | ||
600 | platform_thermal_package_notify = NULL; | ||
601 | platform_thermal_package_rate_control = NULL; | ||
602 | |||
603 | return -ENODEV; | ||
604 | } | ||
605 | |||
606 | static void __exit pkg_temp_thermal_exit(void) | ||
607 | { | ||
608 | struct phy_dev_entry *phdev, *n; | ||
609 | int i; | ||
610 | |||
611 | get_online_cpus(); | ||
612 | unregister_hotcpu_notifier(&pkg_temp_thermal_notifier); | ||
613 | mutex_lock(&phy_dev_list_mutex); | ||
614 | list_for_each_entry_safe(phdev, n, &phy_dev_list, list) { | ||
615 | /* Retore old MSR value for package thermal interrupt */ | ||
616 | wrmsr_on_cpu(phdev->first_cpu, | ||
617 | MSR_IA32_PACKAGE_THERM_INTERRUPT, | ||
618 | phdev->start_pkg_therm_low, | ||
619 | phdev->start_pkg_therm_high); | ||
620 | thermal_zone_device_unregister(phdev->tzone); | ||
621 | list_del(&phdev->list); | ||
622 | kfree(phdev); | ||
623 | } | ||
624 | mutex_unlock(&phy_dev_list_mutex); | ||
625 | platform_thermal_package_notify = NULL; | ||
626 | platform_thermal_package_rate_control = NULL; | ||
627 | for_each_online_cpu(i) | ||
628 | cancel_delayed_work_sync( | ||
629 | &per_cpu(pkg_temp_thermal_threshold_work, i)); | ||
630 | put_online_cpus(); | ||
631 | |||
632 | kfree(pkg_work_scheduled); | ||
633 | |||
634 | debugfs_remove_recursive(debugfs); | ||
635 | } | ||
636 | |||
637 | module_init(pkg_temp_thermal_init) | ||
638 | module_exit(pkg_temp_thermal_exit) | ||
639 | |||
640 | MODULE_DESCRIPTION("X86 PKG TEMP Thermal Driver"); | ||
641 | MODULE_AUTHOR("Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>"); | ||
642 | MODULE_LICENSE("GPL v2"); | ||