aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/platform/x86/intel_ips.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2010-08-04 13:44:06 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2010-08-04 13:44:06 -0400
commitc145307a110c14d09d5d92ff3c49dc0940e44b80 (patch)
treecba923818dea8857022de06ffd94ec6b2967aa1f /drivers/platform/x86/intel_ips.c
parent5e83f6fbdb020b70c0e413312801424d13c58d68 (diff)
parent1a14703d6b20010401ca273ac1f07bff7992aa2c (diff)
Merge branch 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mjg59/platform-drivers-x86
* 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mjg59/platform-drivers-x86: (88 commits) ips driver: make it less chatty intel_scu_ipc: fix size field for intel_scu_ipc_command intel_scu_ipc: return -EIO for error condition in busy_loop intel_scu_ipc: fix data packing of PMIC command on Moorestown Clean up command packing on MRST. zero the stack buffer before giving random garbage to the SCU Fix stack buffer size for IPC writev messages intel_scu_ipc: Use the new cpu identification function intel_scu_ipc: tidy up unused bits Remove indirect read write api support. intel_scu_ipc: Support Medfield processors intel_scu_ipc: detect CPU type automatically x86 plat: limit x86 platform driver menu to X86 acpi ec_sys: Be more cautious about ec write access acpi ec: Fix possible double io port registration hp-wmi: acpi_drivers.h is already included through acpi.h two lines below hp-wmi: Fix mixing up of and/or directive dell-laptop: make dell_laptop_i8042_filter() static asus-laptop: fix asus_input_init error path msi-wmi: make needlessly global symbols static ...
Diffstat (limited to 'drivers/platform/x86/intel_ips.c')
-rw-r--r--drivers/platform/x86/intel_ips.c1660
1 files changed, 1660 insertions, 0 deletions
diff --git a/drivers/platform/x86/intel_ips.c b/drivers/platform/x86/intel_ips.c
new file mode 100644
index 00000000000..afe82e50dfe
--- /dev/null
+++ b/drivers/platform/x86/intel_ips.c
@@ -0,0 +1,1660 @@
1/*
2 * Copyright (c) 2009-2010 Intel Corporation
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc.,
15 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
16 *
17 * The full GNU General Public License is included in this distribution in
18 * the file called "COPYING".
19 *
20 * Authors:
21 * Jesse Barnes <jbarnes@virtuousgeek.org>
22 */
23
24/*
25 * Some Intel Ibex Peak based platforms support so-called "intelligent
26 * power sharing", which allows the CPU and GPU to cooperate to maximize
27 * performance within a given TDP (thermal design point). This driver
28 * performs the coordination between the CPU and GPU, monitors thermal and
29 * power statistics in the platform, and initializes power monitoring
30 * hardware. It also provides a few tunables to control behavior. Its
31 * primary purpose is to safely allow CPU and GPU turbo modes to be enabled
32 * by tracking power and thermal budget; secondarily it can boost turbo
33 * performance by allocating more power or thermal budget to the CPU or GPU
34 * based on available headroom and activity.
35 *
36 * The basic algorithm is driven by a 5s moving average of tempurature. If
37 * thermal headroom is available, the CPU and/or GPU power clamps may be
38 * adjusted upwards. If we hit the thermal ceiling or a thermal trigger,
39 * we scale back the clamp. Aside from trigger events (when we're critically
40 * close or over our TDP) we don't adjust the clamps more than once every
41 * five seconds.
42 *
43 * The thermal device (device 31, function 6) has a set of registers that
44 * are updated by the ME firmware. The ME should also take the clamp values
45 * written to those registers and write them to the CPU, but we currently
46 * bypass that functionality and write the CPU MSR directly.
47 *
48 * UNSUPPORTED:
49 * - dual MCP configs
50 *
51 * TODO:
52 * - handle CPU hotplug
53 * - provide turbo enable/disable api
54 * - make sure we can write turbo enable/disable reg based on MISC_EN
55 *
56 * Related documents:
57 * - CDI 403777, 403778 - Auburndale EDS vol 1 & 2
58 * - CDI 401376 - Ibex Peak EDS
59 * - ref 26037, 26641 - IPS BIOS spec
60 * - ref 26489 - Nehalem BIOS writer's guide
61 * - ref 26921 - Ibex Peak BIOS Specification
62 */
63
64#include <linux/debugfs.h>
65#include <linux/delay.h>
66#include <linux/interrupt.h>
67#include <linux/kernel.h>
68#include <linux/kthread.h>
69#include <linux/module.h>
70#include <linux/pci.h>
71#include <linux/sched.h>
72#include <linux/seq_file.h>
73#include <linux/string.h>
74#include <linux/tick.h>
75#include <linux/timer.h>
76#include <drm/i915_drm.h>
77#include <asm/msr.h>
78#include <asm/processor.h>
79
80#define PCI_DEVICE_ID_INTEL_THERMAL_SENSOR 0x3b32
81
82/*
83 * Package level MSRs for monitor/control
84 */
85#define PLATFORM_INFO 0xce
86#define PLATFORM_TDP (1<<29)
87#define PLATFORM_RATIO (1<<28)
88
89#define IA32_MISC_ENABLE 0x1a0
90#define IA32_MISC_TURBO_EN (1ULL<<38)
91
92#define TURBO_POWER_CURRENT_LIMIT 0x1ac
93#define TURBO_TDC_OVR_EN (1UL<<31)
94#define TURBO_TDC_MASK (0x000000007fff0000UL)
95#define TURBO_TDC_SHIFT (16)
96#define TURBO_TDP_OVR_EN (1UL<<15)
97#define TURBO_TDP_MASK (0x0000000000003fffUL)
98
99/*
100 * Core/thread MSRs for monitoring
101 */
102#define IA32_PERF_CTL 0x199
103#define IA32_PERF_TURBO_DIS (1ULL<<32)
104
105/*
106 * Thermal PCI device regs
107 */
108#define THM_CFG_TBAR 0x10
109#define THM_CFG_TBAR_HI 0x14
110
111#define THM_TSIU 0x00
112#define THM_TSE 0x01
113#define TSE_EN 0xb8
114#define THM_TSS 0x02
115#define THM_TSTR 0x03
116#define THM_TSTTP 0x04
117#define THM_TSCO 0x08
118#define THM_TSES 0x0c
119#define THM_TSGPEN 0x0d
120#define TSGPEN_HOT_LOHI (1<<1)
121#define TSGPEN_CRIT_LOHI (1<<2)
122#define THM_TSPC 0x0e
123#define THM_PPEC 0x10
124#define THM_CTA 0x12
125#define THM_PTA 0x14
126#define PTA_SLOPE_MASK (0xff00)
127#define PTA_SLOPE_SHIFT 8
128#define PTA_OFFSET_MASK (0x00ff)
129#define THM_MGTA 0x16
130#define MGTA_SLOPE_MASK (0xff00)
131#define MGTA_SLOPE_SHIFT 8
132#define MGTA_OFFSET_MASK (0x00ff)
133#define THM_TRC 0x1a
134#define TRC_CORE2_EN (1<<15)
135#define TRC_THM_EN (1<<12)
136#define TRC_C6_WAR (1<<8)
137#define TRC_CORE1_EN (1<<7)
138#define TRC_CORE_PWR (1<<6)
139#define TRC_PCH_EN (1<<5)
140#define TRC_MCH_EN (1<<4)
141#define TRC_DIMM4 (1<<3)
142#define TRC_DIMM3 (1<<2)
143#define TRC_DIMM2 (1<<1)
144#define TRC_DIMM1 (1<<0)
145#define THM_TES 0x20
146#define THM_TEN 0x21
147#define TEN_UPDATE_EN 1
148#define THM_PSC 0x24
149#define PSC_NTG (1<<0) /* No GFX turbo support */
150#define PSC_NTPC (1<<1) /* No CPU turbo support */
151#define PSC_PP_DEF (0<<2) /* Perf policy up to driver */
152#define PSP_PP_PC (1<<2) /* BIOS prefers CPU perf */
153#define PSP_PP_BAL (2<<2) /* BIOS wants balanced perf */
154#define PSP_PP_GFX (3<<2) /* BIOS prefers GFX perf */
155#define PSP_PBRT (1<<4) /* BIOS run time support */
156#define THM_CTV1 0x30
157#define CTV_TEMP_ERROR (1<<15)
158#define CTV_TEMP_MASK 0x3f
159#define CTV_
160#define THM_CTV2 0x32
161#define THM_CEC 0x34 /* undocumented power accumulator in joules */
162#define THM_AE 0x3f
163#define THM_HTS 0x50 /* 32 bits */
164#define HTS_PCPL_MASK (0x7fe00000)
165#define HTS_PCPL_SHIFT 21
166#define HTS_GPL_MASK (0x001ff000)
167#define HTS_GPL_SHIFT 12
168#define HTS_PP_MASK (0x00000c00)
169#define HTS_PP_SHIFT 10
170#define HTS_PP_DEF 0
171#define HTS_PP_PROC 1
172#define HTS_PP_BAL 2
173#define HTS_PP_GFX 3
174#define HTS_PCTD_DIS (1<<9)
175#define HTS_GTD_DIS (1<<8)
176#define HTS_PTL_MASK (0x000000fe)
177#define HTS_PTL_SHIFT 1
178#define HTS_NVV (1<<0)
179#define THM_HTSHI 0x54 /* 16 bits */
180#define HTS2_PPL_MASK (0x03ff)
181#define HTS2_PRST_MASK (0x3c00)
182#define HTS2_PRST_SHIFT 10
183#define HTS2_PRST_UNLOADED 0
184#define HTS2_PRST_RUNNING 1
185#define HTS2_PRST_TDISOP 2 /* turbo disabled due to power */
186#define HTS2_PRST_TDISHT 3 /* turbo disabled due to high temp */
187#define HTS2_PRST_TDISUSR 4 /* user disabled turbo */
188#define HTS2_PRST_TDISPLAT 5 /* platform disabled turbo */
189#define HTS2_PRST_TDISPM 6 /* power management disabled turbo */
190#define HTS2_PRST_TDISERR 7 /* some kind of error disabled turbo */
191#define THM_PTL 0x56
192#define THM_MGTV 0x58
193#define TV_MASK 0x000000000000ff00
194#define TV_SHIFT 8
195#define THM_PTV 0x60
196#define PTV_MASK 0x00ff
197#define THM_MMGPC 0x64
198#define THM_MPPC 0x66
199#define THM_MPCPC 0x68
200#define THM_TSPIEN 0x82
201#define TSPIEN_AUX_LOHI (1<<0)
202#define TSPIEN_HOT_LOHI (1<<1)
203#define TSPIEN_CRIT_LOHI (1<<2)
204#define TSPIEN_AUX2_LOHI (1<<3)
205#define THM_TSLOCK 0x83
206#define THM_ATR 0x84
207#define THM_TOF 0x87
208#define THM_STS 0x98
209#define STS_PCPL_MASK (0x7fe00000)
210#define STS_PCPL_SHIFT 21
211#define STS_GPL_MASK (0x001ff000)
212#define STS_GPL_SHIFT 12
213#define STS_PP_MASK (0x00000c00)
214#define STS_PP_SHIFT 10
215#define STS_PP_DEF 0
216#define STS_PP_PROC 1
217#define STS_PP_BAL 2
218#define STS_PP_GFX 3
219#define STS_PCTD_DIS (1<<9)
220#define STS_GTD_DIS (1<<8)
221#define STS_PTL_MASK (0x000000fe)
222#define STS_PTL_SHIFT 1
223#define STS_NVV (1<<0)
224#define THM_SEC 0x9c
225#define SEC_ACK (1<<0)
226#define THM_TC3 0xa4
227#define THM_TC1 0xa8
228#define STS_PPL_MASK (0x0003ff00)
229#define STS_PPL_SHIFT 16
230#define THM_TC2 0xac
231#define THM_DTV 0xb0
232#define THM_ITV 0xd8
233#define ITV_ME_SEQNO_MASK 0x000f0000 /* ME should update every ~200ms */
234#define ITV_ME_SEQNO_SHIFT (16)
235#define ITV_MCH_TEMP_MASK 0x0000ff00
236#define ITV_MCH_TEMP_SHIFT (8)
237#define ITV_PCH_TEMP_MASK 0x000000ff
238
239#define thm_readb(off) readb(ips->regmap + (off))
240#define thm_readw(off) readw(ips->regmap + (off))
241#define thm_readl(off) readl(ips->regmap + (off))
242#define thm_readq(off) readq(ips->regmap + (off))
243
244#define thm_writeb(off, val) writeb((val), ips->regmap + (off))
245#define thm_writew(off, val) writew((val), ips->regmap + (off))
246#define thm_writel(off, val) writel((val), ips->regmap + (off))
247
248static const int IPS_ADJUST_PERIOD = 5000; /* ms */
249
250/* For initial average collection */
251static const int IPS_SAMPLE_PERIOD = 200; /* ms */
252static const int IPS_SAMPLE_WINDOW = 5000; /* 5s moving window of samples */
253#define IPS_SAMPLE_COUNT (IPS_SAMPLE_WINDOW / IPS_SAMPLE_PERIOD)
254
255/* Per-SKU limits */
256struct ips_mcp_limits {
257 int cpu_family;
258 int cpu_model; /* includes extended model... */
259 int mcp_power_limit; /* mW units */
260 int core_power_limit;
261 int mch_power_limit;
262 int core_temp_limit; /* degrees C */
263 int mch_temp_limit;
264};
265
266/* Max temps are -10 degrees C to avoid PROCHOT# */
267
268struct ips_mcp_limits ips_sv_limits = {
269 .mcp_power_limit = 35000,
270 .core_power_limit = 29000,
271 .mch_power_limit = 20000,
272 .core_temp_limit = 95,
273 .mch_temp_limit = 90
274};
275
276struct ips_mcp_limits ips_lv_limits = {
277 .mcp_power_limit = 25000,
278 .core_power_limit = 21000,
279 .mch_power_limit = 13000,
280 .core_temp_limit = 95,
281 .mch_temp_limit = 90
282};
283
284struct ips_mcp_limits ips_ulv_limits = {
285 .mcp_power_limit = 18000,
286 .core_power_limit = 14000,
287 .mch_power_limit = 11000,
288 .core_temp_limit = 95,
289 .mch_temp_limit = 90
290};
291
292struct ips_driver {
293 struct pci_dev *dev;
294 void *regmap;
295 struct task_struct *monitor;
296 struct task_struct *adjust;
297 struct dentry *debug_root;
298
299 /* Average CPU core temps (all averages in .01 degrees C for precision) */
300 u16 ctv1_avg_temp;
301 u16 ctv2_avg_temp;
302 /* GMCH average */
303 u16 mch_avg_temp;
304 /* Average for the CPU (both cores?) */
305 u16 mcp_avg_temp;
306 /* Average power consumption (in mW) */
307 u32 cpu_avg_power;
308 u32 mch_avg_power;
309
310 /* Offset values */
311 u16 cta_val;
312 u16 pta_val;
313 u16 mgta_val;
314
315 /* Maximums & prefs, protected by turbo status lock */
316 spinlock_t turbo_status_lock;
317 u16 mcp_temp_limit;
318 u16 mcp_power_limit;
319 u16 core_power_limit;
320 u16 mch_power_limit;
321 bool cpu_turbo_enabled;
322 bool __cpu_turbo_on;
323 bool gpu_turbo_enabled;
324 bool __gpu_turbo_on;
325 bool gpu_preferred;
326 bool poll_turbo_status;
327 bool second_cpu;
328 struct ips_mcp_limits *limits;
329
330 /* Optional MCH interfaces for if i915 is in use */
331 unsigned long (*read_mch_val)(void);
332 bool (*gpu_raise)(void);
333 bool (*gpu_lower)(void);
334 bool (*gpu_busy)(void);
335 bool (*gpu_turbo_disable)(void);
336
337 /* For restoration at unload */
338 u64 orig_turbo_limit;
339 u64 orig_turbo_ratios;
340};
341
342/**
343 * ips_cpu_busy - is CPU busy?
344 * @ips: IPS driver struct
345 *
346 * Check CPU for load to see whether we should increase its thermal budget.
347 *
348 * RETURNS:
349 * True if the CPU could use more power, false otherwise.
350 */
351static bool ips_cpu_busy(struct ips_driver *ips)
352{
353 if ((avenrun[0] >> FSHIFT) > 1)
354 return true;
355
356 return false;
357}
358
359/**
360 * ips_cpu_raise - raise CPU power clamp
361 * @ips: IPS driver struct
362 *
363 * Raise the CPU power clamp by %IPS_CPU_STEP, in accordance with TDP for
364 * this platform.
365 *
366 * We do this by adjusting the TURBO_POWER_CURRENT_LIMIT MSR upwards (as
367 * long as we haven't hit the TDP limit for the SKU).
368 */
369static void ips_cpu_raise(struct ips_driver *ips)
370{
371 u64 turbo_override;
372 u16 cur_tdp_limit, new_tdp_limit;
373
374 if (!ips->cpu_turbo_enabled)
375 return;
376
377 rdmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
378
379 cur_tdp_limit = turbo_override & TURBO_TDP_MASK;
380 new_tdp_limit = cur_tdp_limit + 8; /* 1W increase */
381
382 /* Clamp to SKU TDP limit */
383 if (((new_tdp_limit * 10) / 8) > ips->core_power_limit)
384 new_tdp_limit = cur_tdp_limit;
385
386 thm_writew(THM_MPCPC, (new_tdp_limit * 10) / 8);
387
388 turbo_override |= TURBO_TDC_OVR_EN | TURBO_TDC_OVR_EN;
389 wrmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
390
391 turbo_override &= ~TURBO_TDP_MASK;
392 turbo_override |= new_tdp_limit;
393
394 wrmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
395}
396
397/**
398 * ips_cpu_lower - lower CPU power clamp
399 * @ips: IPS driver struct
400 *
401 * Lower CPU power clamp b %IPS_CPU_STEP if possible.
402 *
403 * We do this by adjusting the TURBO_POWER_CURRENT_LIMIT MSR down, going
404 * as low as the platform limits will allow (though we could go lower there
405 * wouldn't be much point).
406 */
407static void ips_cpu_lower(struct ips_driver *ips)
408{
409 u64 turbo_override;
410 u16 cur_limit, new_limit;
411
412 rdmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
413
414 cur_limit = turbo_override & TURBO_TDP_MASK;
415 new_limit = cur_limit - 8; /* 1W decrease */
416
417 /* Clamp to SKU TDP limit */
418 if (((new_limit * 10) / 8) < (ips->orig_turbo_limit & TURBO_TDP_MASK))
419 new_limit = ips->orig_turbo_limit & TURBO_TDP_MASK;
420
421 thm_writew(THM_MPCPC, (new_limit * 10) / 8);
422
423 turbo_override |= TURBO_TDC_OVR_EN | TURBO_TDC_OVR_EN;
424 wrmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
425
426 turbo_override &= ~TURBO_TDP_MASK;
427 turbo_override |= new_limit;
428
429 wrmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
430}
431
432/**
433 * do_enable_cpu_turbo - internal turbo enable function
434 * @data: unused
435 *
436 * Internal function for actually updating MSRs. When we enable/disable
437 * turbo, we need to do it on each CPU; this function is the one called
438 * by on_each_cpu() when needed.
439 */
440static void do_enable_cpu_turbo(void *data)
441{
442 u64 perf_ctl;
443
444 rdmsrl(IA32_PERF_CTL, perf_ctl);
445 if (perf_ctl & IA32_PERF_TURBO_DIS) {
446 perf_ctl &= ~IA32_PERF_TURBO_DIS;
447 wrmsrl(IA32_PERF_CTL, perf_ctl);
448 }
449}
450
451/**
452 * ips_enable_cpu_turbo - enable turbo mode on all CPUs
453 * @ips: IPS driver struct
454 *
455 * Enable turbo mode by clearing the disable bit in IA32_PERF_CTL on
456 * all logical threads.
457 */
458static void ips_enable_cpu_turbo(struct ips_driver *ips)
459{
460 /* Already on, no need to mess with MSRs */
461 if (ips->__cpu_turbo_on)
462 return;
463
464 on_each_cpu(do_enable_cpu_turbo, ips, 1);
465
466 ips->__cpu_turbo_on = true;
467}
468
469/**
470 * do_disable_cpu_turbo - internal turbo disable function
471 * @data: unused
472 *
473 * Internal function for actually updating MSRs. When we enable/disable
474 * turbo, we need to do it on each CPU; this function is the one called
475 * by on_each_cpu() when needed.
476 */
477static void do_disable_cpu_turbo(void *data)
478{
479 u64 perf_ctl;
480
481 rdmsrl(IA32_PERF_CTL, perf_ctl);
482 if (!(perf_ctl & IA32_PERF_TURBO_DIS)) {
483 perf_ctl |= IA32_PERF_TURBO_DIS;
484 wrmsrl(IA32_PERF_CTL, perf_ctl);
485 }
486}
487
488/**
489 * ips_disable_cpu_turbo - disable turbo mode on all CPUs
490 * @ips: IPS driver struct
491 *
492 * Disable turbo mode by setting the disable bit in IA32_PERF_CTL on
493 * all logical threads.
494 */
495static void ips_disable_cpu_turbo(struct ips_driver *ips)
496{
497 /* Already off, leave it */
498 if (!ips->__cpu_turbo_on)
499 return;
500
501 on_each_cpu(do_disable_cpu_turbo, ips, 1);
502
503 ips->__cpu_turbo_on = false;
504}
505
506/**
507 * ips_gpu_busy - is GPU busy?
508 * @ips: IPS driver struct
509 *
510 * Check GPU for load to see whether we should increase its thermal budget.
511 * We need to call into the i915 driver in this case.
512 *
513 * RETURNS:
514 * True if the GPU could use more power, false otherwise.
515 */
516static bool ips_gpu_busy(struct ips_driver *ips)
517{
518 if (!ips->gpu_turbo_enabled)
519 return false;
520
521 return ips->gpu_busy();
522}
523
524/**
525 * ips_gpu_raise - raise GPU power clamp
526 * @ips: IPS driver struct
527 *
528 * Raise the GPU frequency/power if possible. We need to call into the
529 * i915 driver in this case.
530 */
531static void ips_gpu_raise(struct ips_driver *ips)
532{
533 if (!ips->gpu_turbo_enabled)
534 return;
535
536 if (!ips->gpu_raise())
537 ips->gpu_turbo_enabled = false;
538
539 return;
540}
541
542/**
543 * ips_gpu_lower - lower GPU power clamp
544 * @ips: IPS driver struct
545 *
546 * Lower GPU frequency/power if possible. Need to call i915.
547 */
548static void ips_gpu_lower(struct ips_driver *ips)
549{
550 if (!ips->gpu_turbo_enabled)
551 return;
552
553 if (!ips->gpu_lower())
554 ips->gpu_turbo_enabled = false;
555
556 return;
557}
558
559/**
560 * ips_enable_gpu_turbo - notify the gfx driver turbo is available
561 * @ips: IPS driver struct
562 *
563 * Call into the graphics driver indicating that it can safely use
564 * turbo mode.
565 */
566static void ips_enable_gpu_turbo(struct ips_driver *ips)
567{
568 if (ips->__gpu_turbo_on)
569 return;
570 ips->__gpu_turbo_on = true;
571}
572
573/**
574 * ips_disable_gpu_turbo - notify the gfx driver to disable turbo mode
575 * @ips: IPS driver struct
576 *
577 * Request that the graphics driver disable turbo mode.
578 */
579static void ips_disable_gpu_turbo(struct ips_driver *ips)
580{
581 /* Avoid calling i915 if turbo is already disabled */
582 if (!ips->__gpu_turbo_on)
583 return;
584
585 if (!ips->gpu_turbo_disable())
586 dev_err(&ips->dev->dev, "failed to disable graphis turbo\n");
587 else
588 ips->__gpu_turbo_on = false;
589}
590
591/**
592 * mcp_exceeded - check whether we're outside our thermal & power limits
593 * @ips: IPS driver struct
594 *
595 * Check whether the MCP is over its thermal or power budget.
596 */
597static bool mcp_exceeded(struct ips_driver *ips)
598{
599 unsigned long flags;
600 bool ret = false;
601
602 spin_lock_irqsave(&ips->turbo_status_lock, flags);
603 if (ips->mcp_avg_temp > (ips->mcp_temp_limit * 100))
604 ret = true;
605 if (ips->cpu_avg_power + ips->mch_avg_power > ips->mcp_power_limit)
606 ret = true;
607 spin_unlock_irqrestore(&ips->turbo_status_lock, flags);
608
609 if (ret)
610 dev_info(&ips->dev->dev,
611 "MCP power or thermal limit exceeded\n");
612
613 return ret;
614}
615
616/**
617 * cpu_exceeded - check whether a CPU core is outside its limits
618 * @ips: IPS driver struct
619 * @cpu: CPU number to check
620 *
621 * Check a given CPU's average temp or power is over its limit.
622 */
623static bool cpu_exceeded(struct ips_driver *ips, int cpu)
624{
625 unsigned long flags;
626 int avg;
627 bool ret = false;
628
629 spin_lock_irqsave(&ips->turbo_status_lock, flags);
630 avg = cpu ? ips->ctv2_avg_temp : ips->ctv1_avg_temp;
631 if (avg > (ips->limits->core_temp_limit * 100))
632 ret = true;
633 if (ips->cpu_avg_power > ips->core_power_limit * 100)
634 ret = true;
635 spin_unlock_irqrestore(&ips->turbo_status_lock, flags);
636
637 if (ret)
638 dev_info(&ips->dev->dev,
639 "CPU power or thermal limit exceeded\n");
640
641 return ret;
642}
643
644/**
645 * mch_exceeded - check whether the GPU is over budget
646 * @ips: IPS driver struct
647 *
648 * Check the MCH temp & power against their maximums.
649 */
650static bool mch_exceeded(struct ips_driver *ips)
651{
652 unsigned long flags;
653 bool ret = false;
654
655 spin_lock_irqsave(&ips->turbo_status_lock, flags);
656 if (ips->mch_avg_temp > (ips->limits->mch_temp_limit * 100))
657 ret = true;
658 if (ips->mch_avg_power > ips->mch_power_limit)
659 ret = true;
660 spin_unlock_irqrestore(&ips->turbo_status_lock, flags);
661
662 return ret;
663}
664
665/**
666 * update_turbo_limits - get various limits & settings from regs
667 * @ips: IPS driver struct
668 *
669 * Update the IPS power & temp limits, along with turbo enable flags,
670 * based on latest register contents.
671 *
672 * Used at init time and for runtime BIOS support, which requires polling
673 * the regs for updates (as a result of AC->DC transition for example).
674 *
675 * LOCKING:
676 * Caller must hold turbo_status_lock (outside of init)
677 */
678static void update_turbo_limits(struct ips_driver *ips)
679{
680 u32 hts = thm_readl(THM_HTS);
681
682 ips->cpu_turbo_enabled = !(hts & HTS_PCTD_DIS);
683 ips->gpu_turbo_enabled = !(hts & HTS_GTD_DIS);
684 ips->core_power_limit = thm_readw(THM_MPCPC);
685 ips->mch_power_limit = thm_readw(THM_MMGPC);
686 ips->mcp_temp_limit = thm_readw(THM_PTL);
687 ips->mcp_power_limit = thm_readw(THM_MPPC);
688
689 /* Ignore BIOS CPU vs GPU pref */
690}
691
692/**
693 * ips_adjust - adjust power clamp based on thermal state
694 * @data: ips driver structure
695 *
696 * Wake up every 5s or so and check whether we should adjust the power clamp.
697 * Check CPU and GPU load to determine which needs adjustment. There are
698 * several things to consider here:
699 * - do we need to adjust up or down?
700 * - is CPU busy?
701 * - is GPU busy?
702 * - is CPU in turbo?
703 * - is GPU in turbo?
704 * - is CPU or GPU preferred? (CPU is default)
705 *
706 * So, given the above, we do the following:
707 * - up (TDP available)
708 * - CPU not busy, GPU not busy - nothing
709 * - CPU busy, GPU not busy - adjust CPU up
710 * - CPU not busy, GPU busy - adjust GPU up
711 * - CPU busy, GPU busy - adjust preferred unit up, taking headroom from
712 * non-preferred unit if necessary
713 * - down (at TDP limit)
714 * - adjust both CPU and GPU down if possible
715 *
716 cpu+ gpu+ cpu+gpu- cpu-gpu+ cpu-gpu-
717cpu < gpu < cpu+gpu+ cpu+ gpu+ nothing
718cpu < gpu >= cpu+gpu-(mcp<) cpu+gpu-(mcp<) gpu- gpu-
719cpu >= gpu < cpu-gpu+(mcp<) cpu- cpu-gpu+(mcp<) cpu-
720cpu >= gpu >= cpu-gpu- cpu-gpu- cpu-gpu- cpu-gpu-
721 *
722 */
723static int ips_adjust(void *data)
724{
725 struct ips_driver *ips = data;
726 unsigned long flags;
727
728 dev_dbg(&ips->dev->dev, "starting ips-adjust thread\n");
729
730 /*
731 * Adjust CPU and GPU clamps every 5s if needed. Doing it more
732 * often isn't recommended due to ME interaction.
733 */
734 do {
735 bool cpu_busy = ips_cpu_busy(ips);
736 bool gpu_busy = ips_gpu_busy(ips);
737
738 spin_lock_irqsave(&ips->turbo_status_lock, flags);
739 if (ips->poll_turbo_status)
740 update_turbo_limits(ips);
741 spin_unlock_irqrestore(&ips->turbo_status_lock, flags);
742
743 /* Update turbo status if necessary */
744 if (ips->cpu_turbo_enabled)
745 ips_enable_cpu_turbo(ips);
746 else
747 ips_disable_cpu_turbo(ips);
748
749 if (ips->gpu_turbo_enabled)
750 ips_enable_gpu_turbo(ips);
751 else
752 ips_disable_gpu_turbo(ips);
753
754 /* We're outside our comfort zone, crank them down */
755 if (mcp_exceeded(ips)) {
756 ips_cpu_lower(ips);
757 ips_gpu_lower(ips);
758 goto sleep;
759 }
760
761 if (!cpu_exceeded(ips, 0) && cpu_busy)
762 ips_cpu_raise(ips);
763 else
764 ips_cpu_lower(ips);
765
766 if (!mch_exceeded(ips) && gpu_busy)
767 ips_gpu_raise(ips);
768 else
769 ips_gpu_lower(ips);
770
771sleep:
772 schedule_timeout_interruptible(msecs_to_jiffies(IPS_ADJUST_PERIOD));
773 } while (!kthread_should_stop());
774
775 dev_dbg(&ips->dev->dev, "ips-adjust thread stopped\n");
776
777 return 0;
778}
779
780/*
781 * Helpers for reading out temp/power values and calculating their
782 * averages for the decision making and monitoring functions.
783 */
784
785static u16 calc_avg_temp(struct ips_driver *ips, u16 *array)
786{
787 u64 total = 0;
788 int i;
789 u16 avg;
790
791 for (i = 0; i < IPS_SAMPLE_COUNT; i++)
792 total += (u64)(array[i] * 100);
793
794 do_div(total, IPS_SAMPLE_COUNT);
795
796 avg = (u16)total;
797
798 return avg;
799}
800
801static u16 read_mgtv(struct ips_driver *ips)
802{
803 u16 ret;
804 u64 slope, offset;
805 u64 val;
806
807 val = thm_readq(THM_MGTV);
808 val = (val & TV_MASK) >> TV_SHIFT;
809
810 slope = offset = thm_readw(THM_MGTA);
811 slope = (slope & MGTA_SLOPE_MASK) >> MGTA_SLOPE_SHIFT;
812 offset = offset & MGTA_OFFSET_MASK;
813
814 ret = ((val * slope + 0x40) >> 7) + offset;
815
816 return 0; /* MCH temp reporting buggy */
817}
818
819static u16 read_ptv(struct ips_driver *ips)
820{
821 u16 val, slope, offset;
822
823 slope = (ips->pta_val & PTA_SLOPE_MASK) >> PTA_SLOPE_SHIFT;
824 offset = ips->pta_val & PTA_OFFSET_MASK;
825
826 val = thm_readw(THM_PTV) & PTV_MASK;
827
828 return val;
829}
830
831static u16 read_ctv(struct ips_driver *ips, int cpu)
832{
833 int reg = cpu ? THM_CTV2 : THM_CTV1;
834 u16 val;
835
836 val = thm_readw(reg);
837 if (!(val & CTV_TEMP_ERROR))
838 val = (val) >> 6; /* discard fractional component */
839 else
840 val = 0;
841
842 return val;
843}
844
845static u32 get_cpu_power(struct ips_driver *ips, u32 *last, int period)
846{
847 u32 val;
848 u32 ret;
849
850 /*
851 * CEC is in joules/65535. Take difference over time to
852 * get watts.
853 */
854 val = thm_readl(THM_CEC);
855
856 /* period is in ms and we want mW */
857 ret = (((val - *last) * 1000) / period);
858 ret = (ret * 1000) / 65535;
859 *last = val;
860
861 return ret;
862}
863
864static const u16 temp_decay_factor = 2;
865static u16 update_average_temp(u16 avg, u16 val)
866{
867 u16 ret;
868
869 /* Multiply by 100 for extra precision */
870 ret = (val * 100 / temp_decay_factor) +
871 (((temp_decay_factor - 1) * avg) / temp_decay_factor);
872 return ret;
873}
874
875static const u16 power_decay_factor = 2;
876static u16 update_average_power(u32 avg, u32 val)
877{
878 u32 ret;
879
880 ret = (val / power_decay_factor) +
881 (((power_decay_factor - 1) * avg) / power_decay_factor);
882
883 return ret;
884}
885
886static u32 calc_avg_power(struct ips_driver *ips, u32 *array)
887{
888 u64 total = 0;
889 u32 avg;
890 int i;
891
892 for (i = 0; i < IPS_SAMPLE_COUNT; i++)
893 total += array[i];
894
895 do_div(total, IPS_SAMPLE_COUNT);
896 avg = (u32)total;
897
898 return avg;
899}
900
901static void monitor_timeout(unsigned long arg)
902{
903 wake_up_process((struct task_struct *)arg);
904}
905
906/**
907 * ips_monitor - temp/power monitoring thread
908 * @data: ips driver structure
909 *
910 * This is the main function for the IPS driver. It monitors power and
911 * tempurature in the MCP and adjusts CPU and GPU power clams accordingly.
912 *
913 * We keep a 5s moving average of power consumption and tempurature. Using
914 * that data, along with CPU vs GPU preference, we adjust the power clamps
915 * up or down.
916 */
917static int ips_monitor(void *data)
918{
919 struct ips_driver *ips = data;
920 struct timer_list timer;
921 unsigned long seqno_timestamp, expire, last_msecs, last_sample_period;
922 int i;
923 u32 *cpu_samples, *mchp_samples, old_cpu_power;
924 u16 *mcp_samples, *ctv1_samples, *ctv2_samples, *mch_samples;
925 u8 cur_seqno, last_seqno;
926
927 mcp_samples = kzalloc(sizeof(u16) * IPS_SAMPLE_COUNT, GFP_KERNEL);
928 ctv1_samples = kzalloc(sizeof(u16) * IPS_SAMPLE_COUNT, GFP_KERNEL);
929 ctv2_samples = kzalloc(sizeof(u16) * IPS_SAMPLE_COUNT, GFP_KERNEL);
930 mch_samples = kzalloc(sizeof(u16) * IPS_SAMPLE_COUNT, GFP_KERNEL);
931 cpu_samples = kzalloc(sizeof(u32) * IPS_SAMPLE_COUNT, GFP_KERNEL);
932 mchp_samples = kzalloc(sizeof(u32) * IPS_SAMPLE_COUNT, GFP_KERNEL);
933 if (!mcp_samples || !ctv1_samples || !ctv2_samples || !mch_samples ||
934 !cpu_samples || !mchp_samples) {
935 dev_err(&ips->dev->dev,
936 "failed to allocate sample array, ips disabled\n");
937 kfree(mcp_samples);
938 kfree(ctv1_samples);
939 kfree(ctv2_samples);
940 kfree(mch_samples);
941 kfree(cpu_samples);
942 kfree(mchp_samples);
943 kthread_stop(ips->adjust);
944 return -ENOMEM;
945 }
946
947 last_seqno = (thm_readl(THM_ITV) & ITV_ME_SEQNO_MASK) >>
948 ITV_ME_SEQNO_SHIFT;
949 seqno_timestamp = get_jiffies_64();
950
951 old_cpu_power = thm_readl(THM_CEC) / 65535;
952 schedule_timeout_interruptible(msecs_to_jiffies(IPS_SAMPLE_PERIOD));
953
954 /* Collect an initial average */
955 for (i = 0; i < IPS_SAMPLE_COUNT; i++) {
956 u32 mchp, cpu_power;
957 u16 val;
958
959 mcp_samples[i] = read_ptv(ips);
960
961 val = read_ctv(ips, 0);
962 ctv1_samples[i] = val;
963
964 val = read_ctv(ips, 1);
965 ctv2_samples[i] = val;
966
967 val = read_mgtv(ips);
968 mch_samples[i] = val;
969
970 cpu_power = get_cpu_power(ips, &old_cpu_power,
971 IPS_SAMPLE_PERIOD);
972 cpu_samples[i] = cpu_power;
973
974 if (ips->read_mch_val) {
975 mchp = ips->read_mch_val();
976 mchp_samples[i] = mchp;
977 }
978
979 schedule_timeout_interruptible(msecs_to_jiffies(IPS_SAMPLE_PERIOD));
980 if (kthread_should_stop())
981 break;
982 }
983
984 ips->mcp_avg_temp = calc_avg_temp(ips, mcp_samples);
985 ips->ctv1_avg_temp = calc_avg_temp(ips, ctv1_samples);
986 ips->ctv2_avg_temp = calc_avg_temp(ips, ctv2_samples);
987 ips->mch_avg_temp = calc_avg_temp(ips, mch_samples);
988 ips->cpu_avg_power = calc_avg_power(ips, cpu_samples);
989 ips->mch_avg_power = calc_avg_power(ips, mchp_samples);
990 kfree(mcp_samples);
991 kfree(ctv1_samples);
992 kfree(ctv2_samples);
993 kfree(mch_samples);
994 kfree(cpu_samples);
995 kfree(mchp_samples);
996
997 /* Start the adjustment thread now that we have data */
998 wake_up_process(ips->adjust);
999
1000 /*
1001 * Ok, now we have an initial avg. From here on out, we track the
1002 * running avg using a decaying average calculation. This allows
1003 * us to reduce the sample frequency if the CPU and GPU are idle.
1004 */
1005 old_cpu_power = thm_readl(THM_CEC);
1006 schedule_timeout_interruptible(msecs_to_jiffies(IPS_SAMPLE_PERIOD));
1007 last_sample_period = IPS_SAMPLE_PERIOD;
1008
1009 setup_deferrable_timer_on_stack(&timer, monitor_timeout,
1010 (unsigned long)current);
1011 do {
1012 u32 cpu_val, mch_val;
1013 u16 val;
1014
1015 /* MCP itself */
1016 val = read_ptv(ips);
1017 ips->mcp_avg_temp = update_average_temp(ips->mcp_avg_temp, val);
1018
1019 /* Processor 0 */
1020 val = read_ctv(ips, 0);
1021 ips->ctv1_avg_temp =
1022 update_average_temp(ips->ctv1_avg_temp, val);
1023 /* Power */
1024 cpu_val = get_cpu_power(ips, &old_cpu_power,
1025 last_sample_period);
1026 ips->cpu_avg_power =
1027 update_average_power(ips->cpu_avg_power, cpu_val);
1028
1029 if (ips->second_cpu) {
1030 /* Processor 1 */
1031 val = read_ctv(ips, 1);
1032 ips->ctv2_avg_temp =
1033 update_average_temp(ips->ctv2_avg_temp, val);
1034 }
1035
1036 /* MCH */
1037 val = read_mgtv(ips);
1038 ips->mch_avg_temp = update_average_temp(ips->mch_avg_temp, val);
1039 /* Power */
1040 if (ips->read_mch_val) {
1041 mch_val = ips->read_mch_val();
1042 ips->mch_avg_power =
1043 update_average_power(ips->mch_avg_power,
1044 mch_val);
1045 }
1046
1047 /*
1048 * Make sure ME is updating thermal regs.
1049 * Note:
1050 * If it's been more than a second since the last update,
1051 * the ME is probably hung.
1052 */
1053 cur_seqno = (thm_readl(THM_ITV) & ITV_ME_SEQNO_MASK) >>
1054 ITV_ME_SEQNO_SHIFT;
1055 if (cur_seqno == last_seqno &&
1056 time_after(jiffies, seqno_timestamp + HZ)) {
1057 dev_warn(&ips->dev->dev, "ME failed to update for more than 1s, likely hung\n");
1058 } else {
1059 seqno_timestamp = get_jiffies_64();
1060 last_seqno = cur_seqno;
1061 }
1062
1063 last_msecs = jiffies_to_msecs(jiffies);
1064 expire = jiffies + msecs_to_jiffies(IPS_SAMPLE_PERIOD);
1065
1066 __set_current_state(TASK_UNINTERRUPTIBLE);
1067 mod_timer(&timer, expire);
1068 schedule();
1069
1070 /* Calculate actual sample period for power averaging */
1071 last_sample_period = jiffies_to_msecs(jiffies) - last_msecs;
1072 if (!last_sample_period)
1073 last_sample_period = 1;
1074 } while (!kthread_should_stop());
1075
1076 del_timer_sync(&timer);
1077 destroy_timer_on_stack(&timer);
1078
1079 dev_dbg(&ips->dev->dev, "ips-monitor thread stopped\n");
1080
1081 return 0;
1082}
1083
1084#if 0
1085#define THM_DUMPW(reg) \
1086 { \
1087 u16 val = thm_readw(reg); \
1088 dev_dbg(&ips->dev->dev, #reg ": 0x%04x\n", val); \
1089 }
1090#define THM_DUMPL(reg) \
1091 { \
1092 u32 val = thm_readl(reg); \
1093 dev_dbg(&ips->dev->dev, #reg ": 0x%08x\n", val); \
1094 }
1095#define THM_DUMPQ(reg) \
1096 { \
1097 u64 val = thm_readq(reg); \
1098 dev_dbg(&ips->dev->dev, #reg ": 0x%016x\n", val); \
1099 }
1100
1101static void dump_thermal_info(struct ips_driver *ips)
1102{
1103 u16 ptl;
1104
1105 ptl = thm_readw(THM_PTL);
1106 dev_dbg(&ips->dev->dev, "Processor temp limit: %d\n", ptl);
1107
1108 THM_DUMPW(THM_CTA);
1109 THM_DUMPW(THM_TRC);
1110 THM_DUMPW(THM_CTV1);
1111 THM_DUMPL(THM_STS);
1112 THM_DUMPW(THM_PTV);
1113 THM_DUMPQ(THM_MGTV);
1114}
1115#endif
1116
1117/**
1118 * ips_irq_handler - handle temperature triggers and other IPS events
1119 * @irq: irq number
1120 * @arg: unused
1121 *
1122 * Handle temperature limit trigger events, generally by lowering the clamps.
1123 * If we're at a critical limit, we clamp back to the lowest possible value
1124 * to prevent emergency shutdown.
1125 */
1126static irqreturn_t ips_irq_handler(int irq, void *arg)
1127{
1128 struct ips_driver *ips = arg;
1129 u8 tses = thm_readb(THM_TSES);
1130 u8 tes = thm_readb(THM_TES);
1131
1132 if (!tses && !tes)
1133 return IRQ_NONE;
1134
1135 dev_info(&ips->dev->dev, "TSES: 0x%02x\n", tses);
1136 dev_info(&ips->dev->dev, "TES: 0x%02x\n", tes);
1137
1138 /* STS update from EC? */
1139 if (tes & 1) {
1140 u32 sts, tc1;
1141
1142 sts = thm_readl(THM_STS);
1143 tc1 = thm_readl(THM_TC1);
1144
1145 if (sts & STS_NVV) {
1146 spin_lock(&ips->turbo_status_lock);
1147 ips->core_power_limit = (sts & STS_PCPL_MASK) >>
1148 STS_PCPL_SHIFT;
1149 ips->mch_power_limit = (sts & STS_GPL_MASK) >>
1150 STS_GPL_SHIFT;
1151 /* ignore EC CPU vs GPU pref */
1152 ips->cpu_turbo_enabled = !(sts & STS_PCTD_DIS);
1153 ips->gpu_turbo_enabled = !(sts & STS_GTD_DIS);
1154 ips->mcp_temp_limit = (sts & STS_PTL_MASK) >>
1155 STS_PTL_SHIFT;
1156 ips->mcp_power_limit = (tc1 & STS_PPL_MASK) >>
1157 STS_PPL_SHIFT;
1158 spin_unlock(&ips->turbo_status_lock);
1159
1160 thm_writeb(THM_SEC, SEC_ACK);
1161 }
1162 thm_writeb(THM_TES, tes);
1163 }
1164
1165 /* Thermal trip */
1166 if (tses) {
1167 dev_warn(&ips->dev->dev,
1168 "thermal trip occurred, tses: 0x%04x\n", tses);
1169 thm_writeb(THM_TSES, tses);
1170 }
1171
1172 return IRQ_HANDLED;
1173}
1174
1175#ifndef CONFIG_DEBUG_FS
1176static void ips_debugfs_init(struct ips_driver *ips) { return; }
1177static void ips_debugfs_cleanup(struct ips_driver *ips) { return; }
1178#else
1179
1180/* Expose current state and limits in debugfs if possible */
1181
1182struct ips_debugfs_node {
1183 struct ips_driver *ips;
1184 char *name;
1185 int (*show)(struct seq_file *m, void *data);
1186};
1187
1188static int show_cpu_temp(struct seq_file *m, void *data)
1189{
1190 struct ips_driver *ips = m->private;
1191
1192 seq_printf(m, "%d.%02d\n", ips->ctv1_avg_temp / 100,
1193 ips->ctv1_avg_temp % 100);
1194
1195 return 0;
1196}
1197
1198static int show_cpu_power(struct seq_file *m, void *data)
1199{
1200 struct ips_driver *ips = m->private;
1201
1202 seq_printf(m, "%dmW\n", ips->cpu_avg_power);
1203
1204 return 0;
1205}
1206
1207static int show_cpu_clamp(struct seq_file *m, void *data)
1208{
1209 u64 turbo_override;
1210 int tdp, tdc;
1211
1212 rdmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
1213
1214 tdp = (int)(turbo_override & TURBO_TDP_MASK);
1215 tdc = (int)((turbo_override & TURBO_TDC_MASK) >> TURBO_TDC_SHIFT);
1216
1217 /* Convert to .1W/A units */
1218 tdp = tdp * 10 / 8;
1219 tdc = tdc * 10 / 8;
1220
1221 /* Watts Amperes */
1222 seq_printf(m, "%d.%dW %d.%dA\n", tdp / 10, tdp % 10,
1223 tdc / 10, tdc % 10);
1224
1225 return 0;
1226}
1227
1228static int show_mch_temp(struct seq_file *m, void *data)
1229{
1230 struct ips_driver *ips = m->private;
1231
1232 seq_printf(m, "%d.%02d\n", ips->mch_avg_temp / 100,
1233 ips->mch_avg_temp % 100);
1234
1235 return 0;
1236}
1237
1238static int show_mch_power(struct seq_file *m, void *data)
1239{
1240 struct ips_driver *ips = m->private;
1241
1242 seq_printf(m, "%dmW\n", ips->mch_avg_power);
1243
1244 return 0;
1245}
1246
1247static struct ips_debugfs_node ips_debug_files[] = {
1248 { NULL, "cpu_temp", show_cpu_temp },
1249 { NULL, "cpu_power", show_cpu_power },
1250 { NULL, "cpu_clamp", show_cpu_clamp },
1251 { NULL, "mch_temp", show_mch_temp },
1252 { NULL, "mch_power", show_mch_power },
1253};
1254
1255static int ips_debugfs_open(struct inode *inode, struct file *file)
1256{
1257 struct ips_debugfs_node *node = inode->i_private;
1258
1259 return single_open(file, node->show, node->ips);
1260}
1261
1262static const struct file_operations ips_debugfs_ops = {
1263 .owner = THIS_MODULE,
1264 .open = ips_debugfs_open,
1265 .read = seq_read,
1266 .llseek = seq_lseek,
1267 .release = single_release,
1268};
1269
1270static void ips_debugfs_cleanup(struct ips_driver *ips)
1271{
1272 if (ips->debug_root)
1273 debugfs_remove_recursive(ips->debug_root);
1274 return;
1275}
1276
1277static void ips_debugfs_init(struct ips_driver *ips)
1278{
1279 int i;
1280
1281 ips->debug_root = debugfs_create_dir("ips", NULL);
1282 if (!ips->debug_root) {
1283 dev_err(&ips->dev->dev,
1284 "failed to create debugfs entries: %ld\n",
1285 PTR_ERR(ips->debug_root));
1286 return;
1287 }
1288
1289 for (i = 0; i < ARRAY_SIZE(ips_debug_files); i++) {
1290 struct dentry *ent;
1291 struct ips_debugfs_node *node = &ips_debug_files[i];
1292
1293 node->ips = ips;
1294 ent = debugfs_create_file(node->name, S_IFREG | S_IRUGO,
1295 ips->debug_root, node,
1296 &ips_debugfs_ops);
1297 if (!ent) {
1298 dev_err(&ips->dev->dev,
1299 "failed to create debug file: %ld\n",
1300 PTR_ERR(ent));
1301 goto err_cleanup;
1302 }
1303 }
1304
1305 return;
1306
1307err_cleanup:
1308 ips_debugfs_cleanup(ips);
1309 return;
1310}
1311#endif /* CONFIG_DEBUG_FS */
1312
1313/**
1314 * ips_detect_cpu - detect whether CPU supports IPS
1315 *
1316 * Walk our list and see if we're on a supported CPU. If we find one,
1317 * return the limits for it.
1318 */
1319static struct ips_mcp_limits *ips_detect_cpu(struct ips_driver *ips)
1320{
1321 u64 turbo_power, misc_en;
1322 struct ips_mcp_limits *limits = NULL;
1323 u16 tdp;
1324
1325 if (!(boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 37)) {
1326 dev_info(&ips->dev->dev, "Non-IPS CPU detected.\n");
1327 goto out;
1328 }
1329
1330 rdmsrl(IA32_MISC_ENABLE, misc_en);
1331 /*
1332 * If the turbo enable bit isn't set, we shouldn't try to enable/disable
1333 * turbo manually or we'll get an illegal MSR access, even though
1334 * turbo will still be available.
1335 */
1336 if (!(misc_en & IA32_MISC_TURBO_EN))
1337 ; /* add turbo MSR write allowed flag if necessary */
1338
1339 if (strstr(boot_cpu_data.x86_model_id, "CPU M"))
1340 limits = &ips_sv_limits;
1341 else if (strstr(boot_cpu_data.x86_model_id, "CPU L"))
1342 limits = &ips_lv_limits;
1343 else if (strstr(boot_cpu_data.x86_model_id, "CPU U"))
1344 limits = &ips_ulv_limits;
1345 else
1346 dev_info(&ips->dev->dev, "No CPUID match found.\n");
1347
1348 rdmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_power);
1349 tdp = turbo_power & TURBO_TDP_MASK;
1350
1351 /* Sanity check TDP against CPU */
1352 if (limits->mcp_power_limit != (tdp / 8) * 1000) {
1353 dev_warn(&ips->dev->dev, "Warning: CPU TDP doesn't match expected value (found %d, expected %d)\n",
1354 tdp / 8, limits->mcp_power_limit / 1000);
1355 }
1356
1357out:
1358 return limits;
1359}
1360
1361/**
1362 * ips_get_i915_syms - try to get GPU control methods from i915 driver
1363 * @ips: IPS driver
1364 *
1365 * The i915 driver exports several interfaces to allow the IPS driver to
1366 * monitor and control graphics turbo mode. If we can find them, we can
1367 * enable graphics turbo, otherwise we must disable it to avoid exceeding
1368 * thermal and power limits in the MCP.
1369 */
1370static bool ips_get_i915_syms(struct ips_driver *ips)
1371{
1372 ips->read_mch_val = symbol_get(i915_read_mch_val);
1373 if (!ips->read_mch_val)
1374 goto out_err;
1375 ips->gpu_raise = symbol_get(i915_gpu_raise);
1376 if (!ips->gpu_raise)
1377 goto out_put_mch;
1378 ips->gpu_lower = symbol_get(i915_gpu_lower);
1379 if (!ips->gpu_lower)
1380 goto out_put_raise;
1381 ips->gpu_busy = symbol_get(i915_gpu_busy);
1382 if (!ips->gpu_busy)
1383 goto out_put_lower;
1384 ips->gpu_turbo_disable = symbol_get(i915_gpu_turbo_disable);
1385 if (!ips->gpu_turbo_disable)
1386 goto out_put_busy;
1387
1388 return true;
1389
1390out_put_busy:
1391 symbol_put(i915_gpu_turbo_disable);
1392out_put_lower:
1393 symbol_put(i915_gpu_lower);
1394out_put_raise:
1395 symbol_put(i915_gpu_raise);
1396out_put_mch:
1397 symbol_put(i915_read_mch_val);
1398out_err:
1399 return false;
1400}
1401
1402static DEFINE_PCI_DEVICE_TABLE(ips_id_table) = {
1403 { PCI_DEVICE(PCI_VENDOR_ID_INTEL,
1404 PCI_DEVICE_ID_INTEL_THERMAL_SENSOR), },
1405 { 0, }
1406};
1407
1408MODULE_DEVICE_TABLE(pci, ips_id_table);
1409
1410static int ips_probe(struct pci_dev *dev, const struct pci_device_id *id)
1411{
1412 u64 platform_info;
1413 struct ips_driver *ips;
1414 u32 hts;
1415 int ret = 0;
1416 u16 htshi, trc, trc_required_mask;
1417 u8 tse;
1418
1419 ips = kzalloc(sizeof(struct ips_driver), GFP_KERNEL);
1420 if (!ips)
1421 return -ENOMEM;
1422
1423 pci_set_drvdata(dev, ips);
1424 ips->dev = dev;
1425
1426 ips->limits = ips_detect_cpu(ips);
1427 if (!ips->limits) {
1428 dev_info(&dev->dev, "IPS not supported on this CPU\n");
1429 ret = -ENXIO;
1430 goto error_free;
1431 }
1432
1433 spin_lock_init(&ips->turbo_status_lock);
1434
1435 if (!pci_resource_start(dev, 0)) {
1436 dev_err(&dev->dev, "TBAR not assigned, aborting\n");
1437 ret = -ENXIO;
1438 goto error_free;
1439 }
1440
1441 ret = pci_request_regions(dev, "ips thermal sensor");
1442 if (ret) {
1443 dev_err(&dev->dev, "thermal resource busy, aborting\n");
1444 goto error_free;
1445 }
1446
1447 ret = pci_enable_device(dev);
1448 if (ret) {
1449 dev_err(&dev->dev, "can't enable PCI device, aborting\n");
1450 goto error_free;
1451 }
1452
1453 ips->regmap = ioremap(pci_resource_start(dev, 0),
1454 pci_resource_len(dev, 0));
1455 if (!ips->regmap) {
1456 dev_err(&dev->dev, "failed to map thermal regs, aborting\n");
1457 ret = -EBUSY;
1458 goto error_release;
1459 }
1460
1461 tse = thm_readb(THM_TSE);
1462 if (tse != TSE_EN) {
1463 dev_err(&dev->dev, "thermal device not enabled (0x%02x), aborting\n", tse);
1464 ret = -ENXIO;
1465 goto error_unmap;
1466 }
1467
1468 trc = thm_readw(THM_TRC);
1469 trc_required_mask = TRC_CORE1_EN | TRC_CORE_PWR | TRC_MCH_EN;
1470 if ((trc & trc_required_mask) != trc_required_mask) {
1471 dev_err(&dev->dev, "thermal reporting for required devices not enabled, aborting\n");
1472 ret = -ENXIO;
1473 goto error_unmap;
1474 }
1475
1476 if (trc & TRC_CORE2_EN)
1477 ips->second_cpu = true;
1478
1479 update_turbo_limits(ips);
1480 dev_dbg(&dev->dev, "max cpu power clamp: %dW\n",
1481 ips->mcp_power_limit / 10);
1482 dev_dbg(&dev->dev, "max core power clamp: %dW\n",
1483 ips->core_power_limit / 10);
1484 /* BIOS may update limits at runtime */
1485 if (thm_readl(THM_PSC) & PSP_PBRT)
1486 ips->poll_turbo_status = true;
1487
1488 if (!ips_get_i915_syms(ips)) {
1489 dev_err(&dev->dev, "failed to get i915 symbols, graphics turbo disabled\n");
1490 ips->gpu_turbo_enabled = false;
1491 } else {
1492 dev_dbg(&dev->dev, "graphics turbo enabled\n");
1493 ips->gpu_turbo_enabled = true;
1494 }
1495
1496 /*
1497 * Check PLATFORM_INFO MSR to make sure this chip is
1498 * turbo capable.
1499 */
1500 rdmsrl(PLATFORM_INFO, platform_info);
1501 if (!(platform_info & PLATFORM_TDP)) {
1502 dev_err(&dev->dev, "platform indicates TDP override unavailable, aborting\n");
1503 ret = -ENODEV;
1504 goto error_unmap;
1505 }
1506
1507 /*
1508 * IRQ handler for ME interaction
1509 * Note: don't use MSI here as the PCH has bugs.
1510 */
1511 pci_disable_msi(dev);
1512 ret = request_irq(dev->irq, ips_irq_handler, IRQF_SHARED, "ips",
1513 ips);
1514 if (ret) {
1515 dev_err(&dev->dev, "request irq failed, aborting\n");
1516 goto error_unmap;
1517 }
1518
1519 /* Enable aux, hot & critical interrupts */
1520 thm_writeb(THM_TSPIEN, TSPIEN_AUX2_LOHI | TSPIEN_CRIT_LOHI |
1521 TSPIEN_HOT_LOHI | TSPIEN_AUX_LOHI);
1522 thm_writeb(THM_TEN, TEN_UPDATE_EN);
1523
1524 /* Collect adjustment values */
1525 ips->cta_val = thm_readw(THM_CTA);
1526 ips->pta_val = thm_readw(THM_PTA);
1527 ips->mgta_val = thm_readw(THM_MGTA);
1528
1529 /* Save turbo limits & ratios */
1530 rdmsrl(TURBO_POWER_CURRENT_LIMIT, ips->orig_turbo_limit);
1531
1532 ips_enable_cpu_turbo(ips);
1533 ips->cpu_turbo_enabled = true;
1534
1535 /* Set up the work queue and monitor/adjust threads */
1536 ips->monitor = kthread_run(ips_monitor, ips, "ips-monitor");
1537 if (IS_ERR(ips->monitor)) {
1538 dev_err(&dev->dev,
1539 "failed to create thermal monitor thread, aborting\n");
1540 ret = -ENOMEM;
1541 goto error_free_irq;
1542 }
1543
1544 ips->adjust = kthread_create(ips_adjust, ips, "ips-adjust");
1545 if (IS_ERR(ips->adjust)) {
1546 dev_err(&dev->dev,
1547 "failed to create thermal adjust thread, aborting\n");
1548 ret = -ENOMEM;
1549 goto error_thread_cleanup;
1550 }
1551
1552 hts = (ips->core_power_limit << HTS_PCPL_SHIFT) |
1553 (ips->mcp_temp_limit << HTS_PTL_SHIFT) | HTS_NVV;
1554 htshi = HTS2_PRST_RUNNING << HTS2_PRST_SHIFT;
1555
1556 thm_writew(THM_HTSHI, htshi);
1557 thm_writel(THM_HTS, hts);
1558
1559 ips_debugfs_init(ips);
1560
1561 dev_info(&dev->dev, "IPS driver initialized, MCP temp limit %d\n",
1562 ips->mcp_temp_limit);
1563 return ret;
1564
1565error_thread_cleanup:
1566 kthread_stop(ips->monitor);
1567error_free_irq:
1568 free_irq(ips->dev->irq, ips);
1569error_unmap:
1570 iounmap(ips->regmap);
1571error_release:
1572 pci_release_regions(dev);
1573error_free:
1574 kfree(ips);
1575 return ret;
1576}
1577
1578static void ips_remove(struct pci_dev *dev)
1579{
1580 struct ips_driver *ips = pci_get_drvdata(dev);
1581 u64 turbo_override;
1582
1583 if (!ips)
1584 return;
1585
1586 ips_debugfs_cleanup(ips);
1587
1588 /* Release i915 driver */
1589 if (ips->read_mch_val)
1590 symbol_put(i915_read_mch_val);
1591 if (ips->gpu_raise)
1592 symbol_put(i915_gpu_raise);
1593 if (ips->gpu_lower)
1594 symbol_put(i915_gpu_lower);
1595 if (ips->gpu_busy)
1596 symbol_put(i915_gpu_busy);
1597 if (ips->gpu_turbo_disable)
1598 symbol_put(i915_gpu_turbo_disable);
1599
1600 rdmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
1601 turbo_override &= ~(TURBO_TDC_OVR_EN | TURBO_TDP_OVR_EN);
1602 wrmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
1603 wrmsrl(TURBO_POWER_CURRENT_LIMIT, ips->orig_turbo_limit);
1604
1605 free_irq(ips->dev->irq, ips);
1606 if (ips->adjust)
1607 kthread_stop(ips->adjust);
1608 if (ips->monitor)
1609 kthread_stop(ips->monitor);
1610 iounmap(ips->regmap);
1611 pci_release_regions(dev);
1612 kfree(ips);
1613 dev_dbg(&dev->dev, "IPS driver removed\n");
1614}
1615
1616#ifdef CONFIG_PM
1617static int ips_suspend(struct pci_dev *dev, pm_message_t state)
1618{
1619 return 0;
1620}
1621
1622static int ips_resume(struct pci_dev *dev)
1623{
1624 return 0;
1625}
1626#else
1627#define ips_suspend NULL
1628#define ips_resume NULL
1629#endif /* CONFIG_PM */
1630
1631static void ips_shutdown(struct pci_dev *dev)
1632{
1633}
1634
1635static struct pci_driver ips_pci_driver = {
1636 .name = "intel ips",
1637 .id_table = ips_id_table,
1638 .probe = ips_probe,
1639 .remove = ips_remove,
1640 .suspend = ips_suspend,
1641 .resume = ips_resume,
1642 .shutdown = ips_shutdown,
1643};
1644
1645static int __init ips_init(void)
1646{
1647 return pci_register_driver(&ips_pci_driver);
1648}
1649module_init(ips_init);
1650
1651static void ips_exit(void)
1652{
1653 pci_unregister_driver(&ips_pci_driver);
1654 return;
1655}
1656module_exit(ips_exit);
1657
1658MODULE_LICENSE("GPL");
1659MODULE_AUTHOR("Jesse Barnes <jbarnes@virtuousgeek.org>");
1660MODULE_DESCRIPTION("Intelligent Power Sharing Driver");