aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJesse Barnes <jbarnes@virtuousgeek.org>2010-05-14 18:41:14 -0400
committerMatthew Garrett <mjg@redhat.com>2010-08-03 09:48:45 -0400
commitaa7ffc01d254c91a36bf854d57a14049c6134c72 (patch)
tree589fb5fbaf42a41de2915818e589e7368df67778
parent8cadd2831bf3abc94f4530e7fdbab7bb39b6b27d (diff)
x86 platform driver: intelligent power sharing driver
Intel Core i3/5 platforms with integrated graphics support both CPU and GPU turbo mode. CPU turbo mode is opportunistic: the CPU will use any available power to increase core frequencies if thermal headroom is available. The GPU side is more manual however; the graphics driver must monitor GPU power and temperature and coordinate with a core thermal driver to take advantage of available thermal and power headroom in the package. The intelligent power sharing (IPS) driver is intended to coordinate this activity by monitoring MCP (multi-chip package) temperature and power, allowing the CPU and/or GPU to increase their power consumption, and thus performance, when possible. The goal is to maximize performance within a given platform's TDP (thermal design point). Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org> Signed-off-by: Matthew Garrett <mjg@redhat.com>
-rw-r--r--drivers/platform/x86/Kconfig10
-rw-r--r--drivers/platform/x86/Makefile1
-rw-r--r--drivers/platform/x86/intel_ips.c1655
-rw-r--r--include/drm/i915_drm.h9
4 files changed, 1675 insertions, 0 deletions
diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig
index fd060016b7e9..724b2ed1a3cb 100644
--- a/drivers/platform/x86/Kconfig
+++ b/drivers/platform/x86/Kconfig
@@ -539,4 +539,14 @@ config INTEL_SCU_IPC
539 some embedded Intel x86 platforms. This is not needed for PC-type 539 some embedded Intel x86 platforms. This is not needed for PC-type
540 machines. 540 machines.
541 541
542config INTEL_IPS
543 tristate "Intel Intelligent Power Sharing"
544 depends on ACPI
545 ---help---
546 Intel Calpella platforms support dynamic power sharing between the
547 CPU and GPU, maximizing performance in a given TDP. This driver,
548 along with the CPU frequency and i915 drivers, provides that
549 functionality. If in doubt, say Y here; it will only load on
550 supported platforms.
551
542endif # X86_PLATFORM_DEVICES 552endif # X86_PLATFORM_DEVICES
diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile
index 8770bfe71431..7318fc2c1629 100644
--- a/drivers/platform/x86/Makefile
+++ b/drivers/platform/x86/Makefile
@@ -26,3 +26,4 @@ obj-$(CONFIG_TOPSTAR_LAPTOP) += topstar-laptop.o
26obj-$(CONFIG_ACPI_TOSHIBA) += toshiba_acpi.o 26obj-$(CONFIG_ACPI_TOSHIBA) += toshiba_acpi.o
27obj-$(CONFIG_TOSHIBA_BT_RFKILL) += toshiba_bluetooth.o 27obj-$(CONFIG_TOSHIBA_BT_RFKILL) += toshiba_bluetooth.o
28obj-$(CONFIG_INTEL_SCU_IPC) += intel_scu_ipc.o 28obj-$(CONFIG_INTEL_SCU_IPC) += intel_scu_ipc.o
29obj-$(CONFIG_INTEL_IPS) += intel_ips.o
diff --git a/drivers/platform/x86/intel_ips.c b/drivers/platform/x86/intel_ips.c
new file mode 100644
index 000000000000..f1dce3b8372d
--- /dev/null
+++ b/drivers/platform/x86/intel_ips.c
@@ -0,0 +1,1655 @@
1/*
2 * Copyright (c) 2009-2010 Intel Corporation
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms and conditions of the GNU General Public License,
6 * version 2, as published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope it will be useful, but WITHOUT
9 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
11 * more details.
12 *
13 * You should have received a copy of the GNU General Public License along with
14 * this program; if not, write to the Free Software Foundation, Inc.,
15 * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
16 *
17 * The full GNU General Public License is included in this distribution in
18 * the file called "COPYING".
19 *
20 * Authors:
21 * Jesse Barnes <jbarnes@virtuousgeek.org>
22 */
23
24/*
25 * Some Intel Ibex Peak based platforms support so-called "intelligent
26 * power sharing", which allows the CPU and GPU to cooperate to maximize
27 * performance within a given TDP (thermal design point). This driver
28 * performs the coordination between the CPU and GPU, monitors thermal and
29 * power statistics in the platform, and initializes power monitoring
30 * hardware. It also provides a few tunables to control behavior. Its
31 * primary purpose is to safely allow CPU and GPU turbo modes to be enabled
32 * by tracking power and thermal budget; secondarily it can boost turbo
33 * performance by allocating more power or thermal budget to the CPU or GPU
34 * based on available headroom and activity.
35 *
36 * The basic algorithm is driven by a 5s moving average of tempurature. If
37 * thermal headroom is available, the CPU and/or GPU power clamps may be
38 * adjusted upwards. If we hit the thermal ceiling or a thermal trigger,
39 * we scale back the clamp. Aside from trigger events (when we're critically
40 * close or over our TDP) we don't adjust the clamps more than once every
41 * five seconds.
42 *
43 * The thermal device (device 31, function 6) has a set of registers that
44 * are updated by the ME firmware. The ME should also take the clamp values
45 * written to those registers and write them to the CPU, but we currently
46 * bypass that functionality and write the CPU MSR directly.
47 *
48 * UNSUPPORTED:
49 * - dual MCP configs
50 *
51 * TODO:
52 * - handle CPU hotplug
53 * - provide turbo enable/disable api
54 * - make sure we can write turbo enable/disable reg based on MISC_EN
55 *
56 * Related documents:
57 * - CDI 403777, 403778 - Auburndale EDS vol 1 & 2
58 * - CDI 401376 - Ibex Peak EDS
59 * - ref 26037, 26641 - IPS BIOS spec
60 * - ref 26489 - Nehalem BIOS writer's guide
61 * - ref 26921 - Ibex Peak BIOS Specification
62 */
63
64#include <linux/debugfs.h>
65#include <linux/delay.h>
66#include <linux/interrupt.h>
67#include <linux/kernel.h>
68#include <linux/kthread.h>
69#include <linux/module.h>
70#include <linux/pci.h>
71#include <linux/sched.h>
72#include <linux/seq_file.h>
73#include <linux/string.h>
74#include <linux/tick.h>
75#include <linux/timer.h>
76#include <drm/i915_drm.h>
77#include <asm/msr.h>
78#include <asm/processor.h>
79
80#define PCI_DEVICE_ID_INTEL_THERMAL_SENSOR 0x3b32
81
82/*
83 * Package level MSRs for monitor/control
84 */
85#define PLATFORM_INFO 0xce
86#define PLATFORM_TDP (1<<29)
87#define PLATFORM_RATIO (1<<28)
88
89#define IA32_MISC_ENABLE 0x1a0
90#define IA32_MISC_TURBO_EN (1ULL<<38)
91
92#define TURBO_POWER_CURRENT_LIMIT 0x1ac
93#define TURBO_TDC_OVR_EN (1UL<<31)
94#define TURBO_TDC_MASK (0x000000007fff0000UL)
95#define TURBO_TDC_SHIFT (16)
96#define TURBO_TDP_OVR_EN (1UL<<15)
97#define TURBO_TDP_MASK (0x0000000000003fffUL)
98
99/*
100 * Core/thread MSRs for monitoring
101 */
102#define IA32_PERF_CTL 0x199
103#define IA32_PERF_TURBO_DIS (1ULL<<32)
104
105/*
106 * Thermal PCI device regs
107 */
108#define THM_CFG_TBAR 0x10
109#define THM_CFG_TBAR_HI 0x14
110
111#define THM_TSIU 0x00
112#define THM_TSE 0x01
113#define TSE_EN 0xb8
114#define THM_TSS 0x02
115#define THM_TSTR 0x03
116#define THM_TSTTP 0x04
117#define THM_TSCO 0x08
118#define THM_TSES 0x0c
119#define THM_TSGPEN 0x0d
120#define TSGPEN_HOT_LOHI (1<<1)
121#define TSGPEN_CRIT_LOHI (1<<2)
122#define THM_TSPC 0x0e
123#define THM_PPEC 0x10
124#define THM_CTA 0x12
125#define THM_PTA 0x14
126#define PTA_SLOPE_MASK (0xff00)
127#define PTA_SLOPE_SHIFT 8
128#define PTA_OFFSET_MASK (0x00ff)
129#define THM_MGTA 0x16
130#define MGTA_SLOPE_MASK (0xff00)
131#define MGTA_SLOPE_SHIFT 8
132#define MGTA_OFFSET_MASK (0x00ff)
133#define THM_TRC 0x1a
134#define TRC_CORE2_EN (1<<15)
135#define TRC_THM_EN (1<<12)
136#define TRC_C6_WAR (1<<8)
137#define TRC_CORE1_EN (1<<7)
138#define TRC_CORE_PWR (1<<6)
139#define TRC_PCH_EN (1<<5)
140#define TRC_MCH_EN (1<<4)
141#define TRC_DIMM4 (1<<3)
142#define TRC_DIMM3 (1<<2)
143#define TRC_DIMM2 (1<<1)
144#define TRC_DIMM1 (1<<0)
145#define THM_TES 0x20
146#define THM_TEN 0x21
147#define TEN_UPDATE_EN 1
148#define THM_PSC 0x24
149#define PSC_NTG (1<<0) /* No GFX turbo support */
150#define PSC_NTPC (1<<1) /* No CPU turbo support */
151#define PSC_PP_DEF (0<<2) /* Perf policy up to driver */
152#define PSP_PP_PC (1<<2) /* BIOS prefers CPU perf */
153#define PSP_PP_BAL (2<<2) /* BIOS wants balanced perf */
154#define PSP_PP_GFX (3<<2) /* BIOS prefers GFX perf */
155#define PSP_PBRT (1<<4) /* BIOS run time support */
156#define THM_CTV1 0x30
157#define CTV_TEMP_ERROR (1<<15)
158#define CTV_TEMP_MASK 0x3f
159#define CTV_
160#define THM_CTV2 0x32
161#define THM_CEC 0x34 /* undocumented power accumulator in joules */
162#define THM_AE 0x3f
163#define THM_HTS 0x50 /* 32 bits */
164#define HTS_PCPL_MASK (0x7fe00000)
165#define HTS_PCPL_SHIFT 21
166#define HTS_GPL_MASK (0x001ff000)
167#define HTS_GPL_SHIFT 12
168#define HTS_PP_MASK (0x00000c00)
169#define HTS_PP_SHIFT 10
170#define HTS_PP_DEF 0
171#define HTS_PP_PROC 1
172#define HTS_PP_BAL 2
173#define HTS_PP_GFX 3
174#define HTS_PCTD_DIS (1<<9)
175#define HTS_GTD_DIS (1<<8)
176#define HTS_PTL_MASK (0x000000fe)
177#define HTS_PTL_SHIFT 1
178#define HTS_NVV (1<<0)
179#define THM_HTSHI 0x54 /* 16 bits */
180#define HTS2_PPL_MASK (0x03ff)
181#define HTS2_PRST_MASK (0x3c00)
182#define HTS2_PRST_SHIFT 10
183#define HTS2_PRST_UNLOADED 0
184#define HTS2_PRST_RUNNING 1
185#define HTS2_PRST_TDISOP 2 /* turbo disabled due to power */
186#define HTS2_PRST_TDISHT 3 /* turbo disabled due to high temp */
187#define HTS2_PRST_TDISUSR 4 /* user disabled turbo */
188#define HTS2_PRST_TDISPLAT 5 /* platform disabled turbo */
189#define HTS2_PRST_TDISPM 6 /* power management disabled turbo */
190#define HTS2_PRST_TDISERR 7 /* some kind of error disabled turbo */
191#define THM_PTL 0x56
192#define THM_MGTV 0x58
193#define TV_MASK 0x000000000000ff00
194#define TV_SHIFT 8
195#define THM_PTV 0x60
196#define PTV_MASK 0x00ff
197#define THM_MMGPC 0x64
198#define THM_MPPC 0x66
199#define THM_MPCPC 0x68
200#define THM_TSPIEN 0x82
201#define TSPIEN_AUX_LOHI (1<<0)
202#define TSPIEN_HOT_LOHI (1<<1)
203#define TSPIEN_CRIT_LOHI (1<<2)
204#define TSPIEN_AUX2_LOHI (1<<3)
205#define THM_TSLOCK 0x83
206#define THM_ATR 0x84
207#define THM_TOF 0x87
208#define THM_STS 0x98
209#define STS_PCPL_MASK (0x7fe00000)
210#define STS_PCPL_SHIFT 21
211#define STS_GPL_MASK (0x001ff000)
212#define STS_GPL_SHIFT 12
213#define STS_PP_MASK (0x00000c00)
214#define STS_PP_SHIFT 10
215#define STS_PP_DEF 0
216#define STS_PP_PROC 1
217#define STS_PP_BAL 2
218#define STS_PP_GFX 3
219#define STS_PCTD_DIS (1<<9)
220#define STS_GTD_DIS (1<<8)
221#define STS_PTL_MASK (0x000000fe)
222#define STS_PTL_SHIFT 1
223#define STS_NVV (1<<0)
224#define THM_SEC 0x9c
225#define SEC_ACK (1<<0)
226#define THM_TC3 0xa4
227#define THM_TC1 0xa8
228#define STS_PPL_MASK (0x0003ff00)
229#define STS_PPL_SHIFT 16
230#define THM_TC2 0xac
231#define THM_DTV 0xb0
232#define THM_ITV 0xd8
233#define ITV_ME_SEQNO_MASK 0x000f0000 /* ME should update every ~200ms */
234#define ITV_ME_SEQNO_SHIFT (16)
235#define ITV_MCH_TEMP_MASK 0x0000ff00
236#define ITV_MCH_TEMP_SHIFT (8)
237#define ITV_PCH_TEMP_MASK 0x000000ff
238
239#define thm_readb(off) readb(ips->regmap + (off))
240#define thm_readw(off) readw(ips->regmap + (off))
241#define thm_readl(off) readl(ips->regmap + (off))
242#define thm_readq(off) readq(ips->regmap + (off))
243
244#define thm_writeb(off, val) writeb((val), ips->regmap + (off))
245#define thm_writew(off, val) writew((val), ips->regmap + (off))
246#define thm_writel(off, val) writel((val), ips->regmap + (off))
247
248static const int IPS_ADJUST_PERIOD = 5000; /* ms */
249
250/* For initial average collection */
251static const int IPS_SAMPLE_PERIOD = 200; /* ms */
252static const int IPS_SAMPLE_WINDOW = 5000; /* 5s moving window of samples */
253#define IPS_SAMPLE_COUNT (IPS_SAMPLE_WINDOW / IPS_SAMPLE_PERIOD)
254
255/* Per-SKU limits */
256struct ips_mcp_limits {
257 int cpu_family;
258 int cpu_model; /* includes extended model... */
259 int mcp_power_limit; /* mW units */
260 int core_power_limit;
261 int mch_power_limit;
262 int core_temp_limit; /* degrees C */
263 int mch_temp_limit;
264};
265
266/* Max temps are -10 degrees C to avoid PROCHOT# */
267
268struct ips_mcp_limits ips_sv_limits = {
269 .mcp_power_limit = 35000,
270 .core_power_limit = 29000,
271 .mch_power_limit = 20000,
272 .core_temp_limit = 95,
273 .mch_temp_limit = 90
274};
275
276struct ips_mcp_limits ips_lv_limits = {
277 .mcp_power_limit = 25000,
278 .core_power_limit = 21000,
279 .mch_power_limit = 13000,
280 .core_temp_limit = 95,
281 .mch_temp_limit = 90
282};
283
284struct ips_mcp_limits ips_ulv_limits = {
285 .mcp_power_limit = 18000,
286 .core_power_limit = 14000,
287 .mch_power_limit = 11000,
288 .core_temp_limit = 95,
289 .mch_temp_limit = 90
290};
291
292struct ips_driver {
293 struct pci_dev *dev;
294 void *regmap;
295 struct task_struct *monitor;
296 struct task_struct *adjust;
297 struct dentry *debug_root;
298
299 /* Average CPU core temps (all averages in .01 degrees C for precision) */
300 u16 ctv1_avg_temp;
301 u16 ctv2_avg_temp;
302 /* GMCH average */
303 u16 mch_avg_temp;
304 /* Average for the CPU (both cores?) */
305 u16 mcp_avg_temp;
306 /* Average power consumption (in mW) */
307 u32 cpu_avg_power;
308 u32 mch_avg_power;
309
310 /* Offset values */
311 u16 cta_val;
312 u16 pta_val;
313 u16 mgta_val;
314
315 /* Maximums & prefs, protected by turbo status lock */
316 spinlock_t turbo_status_lock;
317 u16 mcp_temp_limit;
318 u16 mcp_power_limit;
319 u16 core_power_limit;
320 u16 mch_power_limit;
321 bool cpu_turbo_enabled;
322 bool __cpu_turbo_on;
323 bool gpu_turbo_enabled;
324 bool __gpu_turbo_on;
325 bool gpu_preferred;
326 bool poll_turbo_status;
327 bool second_cpu;
328 struct ips_mcp_limits *limits;
329
330 /* Optional MCH interfaces for if i915 is in use */
331 unsigned long (*read_mch_val)(void);
332 bool (*gpu_raise)(void);
333 bool (*gpu_lower)(void);
334 bool (*gpu_busy)(void);
335 bool (*gpu_turbo_disable)(void);
336
337 /* For restoration at unload */
338 u64 orig_turbo_limit;
339 u64 orig_turbo_ratios;
340};
341
342/**
343 * ips_cpu_busy - is CPU busy?
344 * @ips: IPS driver struct
345 *
346 * Check CPU for load to see whether we should increase its thermal budget.
347 *
348 * RETURNS:
349 * True if the CPU could use more power, false otherwise.
350 */
351static bool ips_cpu_busy(struct ips_driver *ips)
352{
353 if ((avenrun[0] >> FSHIFT) > 1)
354 return true;
355
356 return false;
357}
358
359/**
360 * ips_cpu_raise - raise CPU power clamp
361 * @ips: IPS driver struct
362 *
363 * Raise the CPU power clamp by %IPS_CPU_STEP, in accordance with TDP for
364 * this platform.
365 *
366 * We do this by adjusting the TURBO_POWER_CURRENT_LIMIT MSR upwards (as
367 * long as we haven't hit the TDP limit for the SKU).
368 */
369static void ips_cpu_raise(struct ips_driver *ips)
370{
371 u64 turbo_override;
372 u16 cur_tdp_limit, new_tdp_limit;
373
374 if (!ips->cpu_turbo_enabled)
375 return;
376
377 rdmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
378
379 cur_tdp_limit = turbo_override & TURBO_TDP_MASK;
380 new_tdp_limit = cur_tdp_limit + 8; /* 1W increase */
381
382 /* Clamp to SKU TDP limit */
383 if (((new_tdp_limit * 10) / 8) > ips->core_power_limit)
384 new_tdp_limit = cur_tdp_limit;
385
386 thm_writew(THM_MPCPC, (new_tdp_limit * 10) / 8);
387
388 turbo_override |= TURBO_TDC_OVR_EN | TURBO_TDC_OVR_EN;
389 wrmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
390
391 turbo_override &= ~TURBO_TDP_MASK;
392 turbo_override |= new_tdp_limit;
393
394 wrmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
395}
396
397/**
398 * ips_cpu_lower - lower CPU power clamp
399 * @ips: IPS driver struct
400 *
401 * Lower CPU power clamp b %IPS_CPU_STEP if possible.
402 *
403 * We do this by adjusting the TURBO_POWER_CURRENT_LIMIT MSR down, going
404 * as low as the platform limits will allow (though we could go lower there
405 * wouldn't be much point).
406 */
407static void ips_cpu_lower(struct ips_driver *ips)
408{
409 u64 turbo_override;
410 u16 cur_limit, new_limit;
411
412 rdmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
413
414 cur_limit = turbo_override & TURBO_TDP_MASK;
415 new_limit = cur_limit - 8; /* 1W decrease */
416
417 /* Clamp to SKU TDP limit */
418 if (((new_limit * 10) / 8) < (ips->orig_turbo_limit & TURBO_TDP_MASK))
419 new_limit = ips->orig_turbo_limit & TURBO_TDP_MASK;
420
421 thm_writew(THM_MPCPC, (new_limit * 10) / 8);
422
423 turbo_override |= TURBO_TDC_OVR_EN | TURBO_TDC_OVR_EN;
424 wrmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
425
426 turbo_override &= ~TURBO_TDP_MASK;
427 turbo_override |= new_limit;
428
429 wrmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
430}
431
432/**
433 * do_enable_cpu_turbo - internal turbo enable function
434 * @data: unused
435 *
436 * Internal function for actually updating MSRs. When we enable/disable
437 * turbo, we need to do it on each CPU; this function is the one called
438 * by on_each_cpu() when needed.
439 */
440static void do_enable_cpu_turbo(void *data)
441{
442 u64 perf_ctl;
443
444 rdmsrl(IA32_PERF_CTL, perf_ctl);
445 if (perf_ctl & IA32_PERF_TURBO_DIS) {
446 perf_ctl &= ~IA32_PERF_TURBO_DIS;
447 wrmsrl(IA32_PERF_CTL, perf_ctl);
448 }
449}
450
451/**
452 * ips_enable_cpu_turbo - enable turbo mode on all CPUs
453 * @ips: IPS driver struct
454 *
455 * Enable turbo mode by clearing the disable bit in IA32_PERF_CTL on
456 * all logical threads.
457 */
458static void ips_enable_cpu_turbo(struct ips_driver *ips)
459{
460 /* Already on, no need to mess with MSRs */
461 if (ips->__cpu_turbo_on)
462 return;
463
464 on_each_cpu(do_enable_cpu_turbo, ips, 1);
465
466 ips->__cpu_turbo_on = true;
467}
468
469/**
470 * do_disable_cpu_turbo - internal turbo disable function
471 * @data: unused
472 *
473 * Internal function for actually updating MSRs. When we enable/disable
474 * turbo, we need to do it on each CPU; this function is the one called
475 * by on_each_cpu() when needed.
476 */
477static void do_disable_cpu_turbo(void *data)
478{
479 u64 perf_ctl;
480
481 rdmsrl(IA32_PERF_CTL, perf_ctl);
482 if (!(perf_ctl & IA32_PERF_TURBO_DIS)) {
483 perf_ctl |= IA32_PERF_TURBO_DIS;
484 wrmsrl(IA32_PERF_CTL, perf_ctl);
485 }
486}
487
488/**
489 * ips_disable_cpu_turbo - disable turbo mode on all CPUs
490 * @ips: IPS driver struct
491 *
492 * Disable turbo mode by setting the disable bit in IA32_PERF_CTL on
493 * all logical threads.
494 */
495static void ips_disable_cpu_turbo(struct ips_driver *ips)
496{
497 /* Already off, leave it */
498 if (!ips->__cpu_turbo_on)
499 return;
500
501 on_each_cpu(do_disable_cpu_turbo, ips, 1);
502
503 ips->__cpu_turbo_on = false;
504}
505
506/**
507 * ips_gpu_busy - is GPU busy?
508 * @ips: IPS driver struct
509 *
510 * Check GPU for load to see whether we should increase its thermal budget.
511 * We need to call into the i915 driver in this case.
512 *
513 * RETURNS:
514 * True if the GPU could use more power, false otherwise.
515 */
516static bool ips_gpu_busy(struct ips_driver *ips)
517{
518 return false;
519}
520
521/**
522 * ips_gpu_raise - raise GPU power clamp
523 * @ips: IPS driver struct
524 *
525 * Raise the GPU frequency/power if possible. We need to call into the
526 * i915 driver in this case.
527 */
528static void ips_gpu_raise(struct ips_driver *ips)
529{
530 if (!ips->gpu_turbo_enabled)
531 return;
532
533 if (!ips->gpu_raise())
534 ips->gpu_turbo_enabled = false;
535
536 return;
537}
538
539/**
540 * ips_gpu_lower - lower GPU power clamp
541 * @ips: IPS driver struct
542 *
543 * Lower GPU frequency/power if possible. Need to call i915.
544 */
545static void ips_gpu_lower(struct ips_driver *ips)
546{
547 if (!ips->gpu_turbo_enabled)
548 return;
549
550 if (!ips->gpu_lower())
551 ips->gpu_turbo_enabled = false;
552
553 return;
554}
555
556/**
557 * ips_enable_gpu_turbo - notify the gfx driver turbo is available
558 * @ips: IPS driver struct
559 *
560 * Call into the graphics driver indicating that it can safely use
561 * turbo mode.
562 */
563static void ips_enable_gpu_turbo(struct ips_driver *ips)
564{
565 if (ips->__gpu_turbo_on)
566 return;
567 ips->__gpu_turbo_on = true;
568}
569
570/**
571 * ips_disable_gpu_turbo - notify the gfx driver to disable turbo mode
572 * @ips: IPS driver struct
573 *
574 * Request that the graphics driver disable turbo mode.
575 */
576static void ips_disable_gpu_turbo(struct ips_driver *ips)
577{
578 /* Avoid calling i915 if turbo is already disabled */
579 if (!ips->__gpu_turbo_on)
580 return;
581
582 if (!ips->gpu_turbo_disable())
583 dev_err(&ips->dev->dev, "failed to disable graphis turbo\n");
584 else
585 ips->__gpu_turbo_on = false;
586}
587
588/**
589 * mcp_exceeded - check whether we're outside our thermal & power limits
590 * @ips: IPS driver struct
591 *
592 * Check whether the MCP is over its thermal or power budget.
593 */
594static bool mcp_exceeded(struct ips_driver *ips)
595{
596 unsigned long flags;
597 bool ret = false;
598
599 spin_lock_irqsave(&ips->turbo_status_lock, flags);
600 if (ips->mcp_avg_temp > (ips->mcp_temp_limit * 100))
601 ret = true;
602 if (ips->cpu_avg_power + ips->mch_avg_power > ips->mcp_power_limit)
603 ret = true;
604 spin_unlock_irqrestore(&ips->turbo_status_lock, flags);
605
606 if (ret)
607 dev_warn(&ips->dev->dev,
608 "MCP power or thermal limit exceeded\n");
609
610 return ret;
611}
612
613/**
614 * cpu_exceeded - check whether a CPU core is outside its limits
615 * @ips: IPS driver struct
616 * @cpu: CPU number to check
617 *
618 * Check a given CPU's average temp or power is over its limit.
619 */
620static bool cpu_exceeded(struct ips_driver *ips, int cpu)
621{
622 unsigned long flags;
623 int avg;
624 bool ret = false;
625
626 spin_lock_irqsave(&ips->turbo_status_lock, flags);
627 avg = cpu ? ips->ctv2_avg_temp : ips->ctv1_avg_temp;
628 if (avg > (ips->limits->core_temp_limit * 100))
629 ret = true;
630 if (ips->cpu_avg_power > ips->core_power_limit)
631 ret = true;
632 spin_unlock_irqrestore(&ips->turbo_status_lock, flags);
633
634 if (ret)
635 dev_warn(&ips->dev->dev,
636 "CPU power or thermal limit exceeded\n");
637
638 return ret;
639}
640
641/**
642 * mch_exceeded - check whether the GPU is over budget
643 * @ips: IPS driver struct
644 *
645 * Check the MCH temp & power against their maximums.
646 */
647static bool mch_exceeded(struct ips_driver *ips)
648{
649 unsigned long flags;
650 bool ret = false;
651
652 spin_lock_irqsave(&ips->turbo_status_lock, flags);
653 if (ips->mch_avg_temp > (ips->limits->mch_temp_limit * 100))
654 ret = true;
655 spin_unlock_irqrestore(&ips->turbo_status_lock, flags);
656
657 return ret;
658}
659
660/**
661 * update_turbo_limits - get various limits & settings from regs
662 * @ips: IPS driver struct
663 *
664 * Update the IPS power & temp limits, along with turbo enable flags,
665 * based on latest register contents.
666 *
667 * Used at init time and for runtime BIOS support, which requires polling
668 * the regs for updates (as a result of AC->DC transition for example).
669 *
670 * LOCKING:
671 * Caller must hold turbo_status_lock (outside of init)
672 */
673static void update_turbo_limits(struct ips_driver *ips)
674{
675 u32 hts = thm_readl(THM_HTS);
676
677 ips->cpu_turbo_enabled = !(hts & HTS_PCTD_DIS);
678 ips->gpu_turbo_enabled = !(hts & HTS_GTD_DIS);
679 ips->core_power_limit = thm_readw(THM_MPCPC);
680 ips->mch_power_limit = thm_readw(THM_MMGPC);
681 ips->mcp_temp_limit = thm_readw(THM_PTL);
682 ips->mcp_power_limit = thm_readw(THM_MPPC);
683
684 /* Ignore BIOS CPU vs GPU pref */
685}
686
687/**
688 * ips_adjust - adjust power clamp based on thermal state
689 * @data: ips driver structure
690 *
691 * Wake up every 5s or so and check whether we should adjust the power clamp.
692 * Check CPU and GPU load to determine which needs adjustment. There are
693 * several things to consider here:
694 * - do we need to adjust up or down?
695 * - is CPU busy?
696 * - is GPU busy?
697 * - is CPU in turbo?
698 * - is GPU in turbo?
699 * - is CPU or GPU preferred? (CPU is default)
700 *
701 * So, given the above, we do the following:
702 * - up (TDP available)
703 * - CPU not busy, GPU not busy - nothing
704 * - CPU busy, GPU not busy - adjust CPU up
705 * - CPU not busy, GPU busy - adjust GPU up
706 * - CPU busy, GPU busy - adjust preferred unit up, taking headroom from
707 * non-preferred unit if necessary
708 * - down (at TDP limit)
709 * - adjust both CPU and GPU down if possible
710 *
711 cpu+ gpu+ cpu+gpu- cpu-gpu+ cpu-gpu-
712cpu < gpu < cpu+gpu+ cpu+ gpu+ nothing
713cpu < gpu >= cpu+gpu-(mcp<) cpu+gpu-(mcp<) gpu- gpu-
714cpu >= gpu < cpu-gpu+(mcp<) cpu- cpu-gpu+(mcp<) cpu-
715cpu >= gpu >= cpu-gpu- cpu-gpu- cpu-gpu- cpu-gpu-
716 *
717 */
718static int ips_adjust(void *data)
719{
720 struct ips_driver *ips = data;
721 unsigned long flags;
722
723 dev_dbg(&ips->dev->dev, "starting ips-adjust thread\n");
724
725 /*
726 * Adjust CPU and GPU clamps every 5s if needed. Doing it more
727 * often isn't recommended due to ME interaction.
728 */
729 do {
730 bool cpu_busy = ips_cpu_busy(ips);
731 bool gpu_busy = ips_gpu_busy(ips);
732
733 spin_lock_irqsave(&ips->turbo_status_lock, flags);
734 if (ips->poll_turbo_status)
735 update_turbo_limits(ips);
736 spin_unlock_irqrestore(&ips->turbo_status_lock, flags);
737
738 /* Update turbo status if necessary */
739 if (ips->cpu_turbo_enabled)
740 ips_enable_cpu_turbo(ips);
741 else
742 ips_disable_cpu_turbo(ips);
743
744 if (ips->gpu_turbo_enabled)
745 ips_enable_gpu_turbo(ips);
746 else
747 ips_disable_gpu_turbo(ips);
748
749 /* We're outside our comfort zone, crank them down */
750 if (!mcp_exceeded(ips)) {
751 ips_cpu_lower(ips);
752 ips_gpu_lower(ips);
753 goto sleep;
754 }
755
756 if (!cpu_exceeded(ips, 0) && cpu_busy)
757 ips_cpu_raise(ips);
758 else
759 ips_cpu_lower(ips);
760
761 if (!mch_exceeded(ips) && gpu_busy)
762 ips_gpu_raise(ips);
763 else
764 ips_gpu_lower(ips);
765
766sleep:
767 schedule_timeout_interruptible(msecs_to_jiffies(IPS_ADJUST_PERIOD));
768 } while (!kthread_should_stop());
769
770 dev_dbg(&ips->dev->dev, "ips-adjust thread stopped\n");
771
772 return 0;
773}
774
775/*
776 * Helpers for reading out temp/power values and calculating their
777 * averages for the decision making and monitoring functions.
778 */
779
780static u16 calc_avg_temp(struct ips_driver *ips, u16 *array)
781{
782 u64 total = 0;
783 int i;
784 u16 avg;
785
786 for (i = 0; i < IPS_SAMPLE_COUNT; i++)
787 total += (u64)(array[i] * 100);
788
789 do_div(total, IPS_SAMPLE_COUNT);
790
791 avg = (u16)total;
792
793 return avg;
794}
795
796static u16 read_mgtv(struct ips_driver *ips)
797{
798 u16 ret;
799 u64 slope, offset;
800 u64 val;
801
802 val = thm_readq(THM_MGTV);
803 val = (val & TV_MASK) >> TV_SHIFT;
804
805 slope = offset = thm_readw(THM_MGTA);
806 slope = (slope & MGTA_SLOPE_MASK) >> MGTA_SLOPE_SHIFT;
807 offset = offset & MGTA_OFFSET_MASK;
808
809 ret = ((val * slope + 0x40) >> 7) + offset;
810
811
812 return ret;
813}
814
815static u16 read_ptv(struct ips_driver *ips)
816{
817 u16 val, slope, offset;
818
819 slope = (ips->pta_val & PTA_SLOPE_MASK) >> PTA_SLOPE_SHIFT;
820 offset = ips->pta_val & PTA_OFFSET_MASK;
821
822 val = thm_readw(THM_PTV) & PTV_MASK;
823
824 return val;
825}
826
827static u16 read_ctv(struct ips_driver *ips, int cpu)
828{
829 int reg = cpu ? THM_CTV2 : THM_CTV1;
830 u16 val;
831
832 val = thm_readw(reg);
833 if (!(val & CTV_TEMP_ERROR))
834 val = (val) >> 6; /* discard fractional component */
835 else
836 val = 0;
837
838 return val;
839}
840
841static u32 get_cpu_power(struct ips_driver *ips, u32 *last, int period)
842{
843 u32 val;
844 u32 ret;
845
846 /*
847 * CEC is in joules/65535. Take difference over time to
848 * get watts.
849 */
850 val = thm_readl(THM_CEC);
851
852 /* period is in ms and we want mW */
853 ret = (((val - *last) * 1000) / period);
854 ret = (ret * 1000) / 65535;
855 *last = val;
856
857 return ret;
858}
859
860static const u16 temp_decay_factor = 2;
861static u16 update_average_temp(u16 avg, u16 val)
862{
863 u16 ret;
864
865 /* Multiply by 100 for extra precision */
866 ret = (val * 100 / temp_decay_factor) +
867 (((temp_decay_factor - 1) * avg) / temp_decay_factor);
868 return ret;
869}
870
871static const u16 power_decay_factor = 2;
872static u16 update_average_power(u32 avg, u32 val)
873{
874 u32 ret;
875
876 ret = (val / power_decay_factor) +
877 (((power_decay_factor - 1) * avg) / power_decay_factor);
878
879 return ret;
880}
881
882static u32 calc_avg_power(struct ips_driver *ips, u32 *array)
883{
884 u64 total = 0;
885 u32 avg;
886 int i;
887
888 for (i = 0; i < IPS_SAMPLE_COUNT; i++)
889 total += array[i];
890
891 do_div(total, IPS_SAMPLE_COUNT);
892 avg = (u32)total;
893
894 return avg;
895}
896
897static void monitor_timeout(unsigned long arg)
898{
899 wake_up_process((struct task_struct *)arg);
900}
901
902/**
903 * ips_monitor - temp/power monitoring thread
904 * @data: ips driver structure
905 *
906 * This is the main function for the IPS driver. It monitors power and
907 * tempurature in the MCP and adjusts CPU and GPU power clams accordingly.
908 *
909 * We keep a 5s moving average of power consumption and tempurature. Using
910 * that data, along with CPU vs GPU preference, we adjust the power clamps
911 * up or down.
912 */
913static int ips_monitor(void *data)
914{
915 struct ips_driver *ips = data;
916 struct timer_list timer;
917 unsigned long seqno_timestamp, expire, last_msecs, last_sample_period;
918 int i;
919 u32 *cpu_samples = NULL, *mchp_samples = NULL, old_cpu_power;
920 u16 *mcp_samples = NULL, *ctv1_samples = NULL, *ctv2_samples = NULL,
921 *mch_samples = NULL;
922 u8 cur_seqno, last_seqno;
923
924 mcp_samples = kzalloc(sizeof(u16) * IPS_SAMPLE_COUNT, GFP_KERNEL);
925 ctv1_samples = kzalloc(sizeof(u16) * IPS_SAMPLE_COUNT, GFP_KERNEL);
926 ctv2_samples = kzalloc(sizeof(u16) * IPS_SAMPLE_COUNT, GFP_KERNEL);
927 mch_samples = kzalloc(sizeof(u16) * IPS_SAMPLE_COUNT, GFP_KERNEL);
928 cpu_samples = kzalloc(sizeof(u32) * IPS_SAMPLE_COUNT, GFP_KERNEL);
929 mchp_samples = kzalloc(sizeof(u32) * IPS_SAMPLE_COUNT, GFP_KERNEL);
930 if (!mcp_samples || !ctv1_samples || !ctv2_samples || !mch_samples) {
931 dev_err(&ips->dev->dev,
932 "failed to allocate sample array, ips disabled\n");
933 kfree(mcp_samples);
934 kfree(ctv1_samples);
935 kfree(ctv2_samples);
936 kfree(mch_samples);
937 kfree(cpu_samples);
938 kthread_stop(ips->adjust);
939 return -ENOMEM;
940 }
941
942 last_seqno = (thm_readl(THM_ITV) & ITV_ME_SEQNO_MASK) >>
943 ITV_ME_SEQNO_SHIFT;
944 seqno_timestamp = get_jiffies_64();
945
946 old_cpu_power = thm_readl(THM_CEC) / 65535;
947 schedule_timeout_interruptible(msecs_to_jiffies(IPS_SAMPLE_PERIOD));
948
949 /* Collect an initial average */
950 for (i = 0; i < IPS_SAMPLE_COUNT; i++) {
951 u32 mchp, cpu_power;
952 u16 val;
953
954 mcp_samples[i] = read_ptv(ips);
955
956 val = read_ctv(ips, 0);
957 ctv1_samples[i] = val;
958
959 val = read_ctv(ips, 1);
960 ctv2_samples[i] = val;
961
962 val = read_mgtv(ips);
963 mch_samples[i] = val;
964
965 cpu_power = get_cpu_power(ips, &old_cpu_power,
966 IPS_SAMPLE_PERIOD);
967 cpu_samples[i] = cpu_power;
968
969 if (ips->read_mch_val) {
970 mchp = ips->read_mch_val();
971 mchp_samples[i] = mchp;
972 }
973
974 schedule_timeout_interruptible(msecs_to_jiffies(IPS_SAMPLE_PERIOD));
975 if (kthread_should_stop())
976 break;
977 }
978
979 ips->mcp_avg_temp = calc_avg_temp(ips, mcp_samples);
980 ips->ctv1_avg_temp = calc_avg_temp(ips, ctv1_samples);
981 ips->ctv2_avg_temp = calc_avg_temp(ips, ctv2_samples);
982 ips->mch_avg_temp = calc_avg_temp(ips, mch_samples);
983 ips->cpu_avg_power = calc_avg_power(ips, cpu_samples);
984 ips->mch_avg_power = calc_avg_power(ips, mchp_samples);
985 kfree(mcp_samples);
986 kfree(ctv1_samples);
987 kfree(ctv2_samples);
988 kfree(mch_samples);
989 kfree(cpu_samples);
990 kfree(mchp_samples);
991
992 /* Start the adjustment thread now that we have data */
993 wake_up_process(ips->adjust);
994
995 /*
996 * Ok, now we have an initial avg. From here on out, we track the
997 * running avg using a decaying average calculation. This allows
998 * us to reduce the sample frequency if the CPU and GPU are idle.
999 */
1000 old_cpu_power = thm_readl(THM_CEC);
1001 schedule_timeout_interruptible(msecs_to_jiffies(IPS_SAMPLE_PERIOD));
1002 last_sample_period = IPS_SAMPLE_PERIOD;
1003
1004 setup_deferrable_timer_on_stack(&timer, monitor_timeout,
1005 (unsigned long)current);
1006 do {
1007 u32 cpu_val, mch_val;
1008 u16 val;
1009
1010 /* MCP itself */
1011 val = read_ptv(ips);
1012 ips->mcp_avg_temp = update_average_temp(ips->mcp_avg_temp, val);
1013
1014 /* Processor 0 */
1015 val = read_ctv(ips, 0);
1016 ips->ctv1_avg_temp =
1017 update_average_temp(ips->ctv1_avg_temp, val);
1018 /* Power */
1019 cpu_val = get_cpu_power(ips, &old_cpu_power,
1020 last_sample_period);
1021 ips->cpu_avg_power =
1022 update_average_power(ips->cpu_avg_power, cpu_val);
1023
1024 if (ips->second_cpu) {
1025 /* Processor 1 */
1026 val = read_ctv(ips, 1);
1027 ips->ctv2_avg_temp =
1028 update_average_temp(ips->ctv2_avg_temp, val);
1029 }
1030
1031 /* MCH */
1032 val = read_mgtv(ips);
1033 ips->mch_avg_temp = update_average_temp(ips->mch_avg_temp, val);
1034 /* Power */
1035 if (ips->read_mch_val) {
1036 mch_val = ips->read_mch_val();
1037 ips->mch_avg_power =
1038 update_average_power(ips->mch_avg_power,
1039 mch_val);
1040 }
1041
1042 /*
1043 * Make sure ME is updating thermal regs.
1044 * Note:
1045 * If it's been more than a second since the last update,
1046 * the ME is probably hung.
1047 */
1048 cur_seqno = (thm_readl(THM_ITV) & ITV_ME_SEQNO_MASK) >>
1049 ITV_ME_SEQNO_SHIFT;
1050 if (cur_seqno == last_seqno &&
1051 time_after(jiffies, seqno_timestamp + HZ)) {
1052 dev_warn(&ips->dev->dev, "ME failed to update for more than 1s, likely hung\n");
1053 } else {
1054 seqno_timestamp = get_jiffies_64();
1055 last_seqno = cur_seqno;
1056 }
1057
1058 last_msecs = jiffies_to_msecs(jiffies);
1059 expire = jiffies + msecs_to_jiffies(IPS_SAMPLE_PERIOD);
1060
1061 __set_current_state(TASK_UNINTERRUPTIBLE);
1062 mod_timer(&timer, expire);
1063 schedule();
1064
1065 /* Calculate actual sample period for power averaging */
1066 last_sample_period = jiffies_to_msecs(jiffies) - last_msecs;
1067 if (!last_sample_period)
1068 last_sample_period = 1;
1069 } while (!kthread_should_stop());
1070
1071 del_timer_sync(&timer);
1072 destroy_timer_on_stack(&timer);
1073
1074 dev_dbg(&ips->dev->dev, "ips-monitor thread stopped\n");
1075
1076 return 0;
1077}
1078
1079#if 0
1080#define THM_DUMPW(reg) \
1081 { \
1082 u16 val = thm_readw(reg); \
1083 dev_dbg(&ips->dev->dev, #reg ": 0x%04x\n", val); \
1084 }
1085#define THM_DUMPL(reg) \
1086 { \
1087 u32 val = thm_readl(reg); \
1088 dev_dbg(&ips->dev->dev, #reg ": 0x%08x\n", val); \
1089 }
1090#define THM_DUMPQ(reg) \
1091 { \
1092 u64 val = thm_readq(reg); \
1093 dev_dbg(&ips->dev->dev, #reg ": 0x%016x\n", val); \
1094 }
1095
1096static void dump_thermal_info(struct ips_driver *ips)
1097{
1098 u16 ptl;
1099
1100 ptl = thm_readw(THM_PTL);
1101 dev_dbg(&ips->dev->dev, "Processor temp limit: %d\n", ptl);
1102
1103 THM_DUMPW(THM_CTA);
1104 THM_DUMPW(THM_TRC);
1105 THM_DUMPW(THM_CTV1);
1106 THM_DUMPL(THM_STS);
1107 THM_DUMPW(THM_PTV);
1108 THM_DUMPQ(THM_MGTV);
1109}
1110#endif
1111
1112/**
1113 * ips_irq_handler - handle temperature triggers and other IPS events
1114 * @irq: irq number
1115 * @arg: unused
1116 *
1117 * Handle temperature limit trigger events, generally by lowering the clamps.
1118 * If we're at a critical limit, we clamp back to the lowest possible value
1119 * to prevent emergency shutdown.
1120 */
1121static irqreturn_t ips_irq_handler(int irq, void *arg)
1122{
1123 struct ips_driver *ips = arg;
1124 u8 tses = thm_readb(THM_TSES);
1125 u8 tes = thm_readb(THM_TES);
1126
1127 if (!tses && !tes)
1128 return IRQ_NONE;
1129
1130 dev_info(&ips->dev->dev, "TSES: 0x%02x\n", tses);
1131 dev_info(&ips->dev->dev, "TES: 0x%02x\n", tes);
1132
1133 /* STS update from EC? */
1134 if (tes & 1) {
1135 u32 sts, tc1;
1136
1137 sts = thm_readl(THM_STS);
1138 tc1 = thm_readl(THM_TC1);
1139
1140 if (sts & STS_NVV) {
1141 spin_lock(&ips->turbo_status_lock);
1142 ips->core_power_limit = (sts & STS_PCPL_MASK) >>
1143 STS_PCPL_SHIFT;
1144 ips->mch_power_limit = (sts & STS_GPL_MASK) >>
1145 STS_GPL_SHIFT;
1146 /* ignore EC CPU vs GPU pref */
1147 ips->cpu_turbo_enabled = !(sts & STS_PCTD_DIS);
1148 ips->gpu_turbo_enabled = !(sts & STS_GTD_DIS);
1149 ips->mcp_temp_limit = (sts & STS_PTL_MASK) >>
1150 STS_PTL_SHIFT;
1151 ips->mcp_power_limit = (tc1 & STS_PPL_MASK) >>
1152 STS_PPL_SHIFT;
1153 spin_unlock(&ips->turbo_status_lock);
1154
1155 thm_writeb(THM_SEC, SEC_ACK);
1156 }
1157 thm_writeb(THM_TES, tes);
1158 }
1159
1160 /* Thermal trip */
1161 if (tses) {
1162 dev_warn(&ips->dev->dev,
1163 "thermal trip occurred, tses: 0x%04x\n", tses);
1164 thm_writeb(THM_TSES, tses);
1165 }
1166
1167 return IRQ_HANDLED;
1168}
1169
1170#ifndef CONFIG_DEBUG_FS
1171static void ips_debugfs_init(struct ips_driver *ips) { return; }
1172static void ips_debugfs_cleanup(struct ips_driver *ips) { return; }
1173#else
1174
1175/* Expose current state and limits in debugfs if possible */
1176
1177struct ips_debugfs_node {
1178 struct ips_driver *ips;
1179 char *name;
1180 int (*show)(struct seq_file *m, void *data);
1181};
1182
1183static int show_cpu_temp(struct seq_file *m, void *data)
1184{
1185 struct ips_driver *ips = m->private;
1186
1187 seq_printf(m, "%d.%02d\n", ips->ctv1_avg_temp / 100,
1188 ips->ctv1_avg_temp % 100);
1189
1190 return 0;
1191}
1192
1193static int show_cpu_power(struct seq_file *m, void *data)
1194{
1195 struct ips_driver *ips = m->private;
1196
1197 seq_printf(m, "%dmW\n", ips->cpu_avg_power);
1198
1199 return 0;
1200}
1201
1202static int show_cpu_clamp(struct seq_file *m, void *data)
1203{
1204 u64 turbo_override;
1205 int tdp, tdc;
1206
1207 rdmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
1208
1209 tdp = (int)(turbo_override & TURBO_TDP_MASK);
1210 tdc = (int)((turbo_override & TURBO_TDC_MASK) >> TURBO_TDC_SHIFT);
1211
1212 /* Convert to .1W/A units */
1213 tdp = tdp * 10 / 8;
1214 tdc = tdc * 10 / 8;
1215
1216 /* Watts Amperes */
1217 seq_printf(m, "%d.%dW %d.%dA\n", tdp / 10, tdp % 10,
1218 tdc / 10, tdc % 10);
1219
1220 return 0;
1221}
1222
1223static int show_mch_temp(struct seq_file *m, void *data)
1224{
1225 struct ips_driver *ips = m->private;
1226
1227 seq_printf(m, "%d.%02d\n", ips->mch_avg_temp / 100,
1228 ips->mch_avg_temp % 100);
1229
1230 return 0;
1231}
1232
1233static int show_mch_power(struct seq_file *m, void *data)
1234{
1235 struct ips_driver *ips = m->private;
1236
1237 seq_printf(m, "%dmW\n", ips->mch_avg_power);
1238
1239 return 0;
1240}
1241
1242static struct ips_debugfs_node ips_debug_files[] = {
1243 { NULL, "cpu_temp", show_cpu_temp },
1244 { NULL, "cpu_power", show_cpu_power },
1245 { NULL, "cpu_clamp", show_cpu_clamp },
1246 { NULL, "mch_temp", show_mch_temp },
1247 { NULL, "mch_power", show_mch_power },
1248};
1249
1250static int ips_debugfs_open(struct inode *inode, struct file *file)
1251{
1252 struct ips_debugfs_node *node = inode->i_private;
1253
1254 return single_open(file, node->show, node->ips);
1255}
1256
1257static const struct file_operations ips_debugfs_ops = {
1258 .owner = THIS_MODULE,
1259 .open = ips_debugfs_open,
1260 .read = seq_read,
1261 .llseek = seq_lseek,
1262 .release = single_release,
1263};
1264
1265static void ips_debugfs_cleanup(struct ips_driver *ips)
1266{
1267 if (ips->debug_root)
1268 debugfs_remove_recursive(ips->debug_root);
1269 return;
1270}
1271
1272static void ips_debugfs_init(struct ips_driver *ips)
1273{
1274 int i;
1275
1276 ips->debug_root = debugfs_create_dir("ips", NULL);
1277 if (!ips->debug_root) {
1278 dev_err(&ips->dev->dev,
1279 "failed to create debugfs entries: %ld\n",
1280 PTR_ERR(ips->debug_root));
1281 return;
1282 }
1283
1284 for (i = 0; i < ARRAY_SIZE(ips_debug_files); i++) {
1285 struct dentry *ent;
1286 struct ips_debugfs_node *node = &ips_debug_files[i];
1287
1288 node->ips = ips;
1289 ent = debugfs_create_file(node->name, S_IFREG | S_IRUGO,
1290 ips->debug_root, node,
1291 &ips_debugfs_ops);
1292 if (!ent) {
1293 dev_err(&ips->dev->dev,
1294 "failed to create debug file: %ld\n",
1295 PTR_ERR(ent));
1296 goto err_cleanup;
1297 }
1298 }
1299
1300 return;
1301
1302err_cleanup:
1303 ips_debugfs_cleanup(ips);
1304 return;
1305}
1306#endif /* CONFIG_DEBUG_FS */
1307
1308/**
1309 * ips_detect_cpu - detect whether CPU supports IPS
1310 *
1311 * Walk our list and see if we're on a supported CPU. If we find one,
1312 * return the limits for it.
1313 */
1314static struct ips_mcp_limits *ips_detect_cpu(struct ips_driver *ips)
1315{
1316 u64 turbo_power, misc_en;
1317 struct ips_mcp_limits *limits = NULL;
1318 u16 tdp;
1319
1320 if (!(boot_cpu_data.x86 == 6 && boot_cpu_data.x86_model == 37)) {
1321 dev_info(&ips->dev->dev, "Non-IPS CPU detected.\n");
1322 goto out;
1323 }
1324
1325 rdmsrl(IA32_MISC_ENABLE, misc_en);
1326 /*
1327 * If the turbo enable bit isn't set, we shouldn't try to enable/disable
1328 * turbo manually or we'll get an illegal MSR access, even though
1329 * turbo will still be available.
1330 */
1331 if (!(misc_en & IA32_MISC_TURBO_EN))
1332 ; /* add turbo MSR write allowed flag if necessary */
1333
1334 if (strstr(boot_cpu_data.x86_model_id, "CPU M"))
1335 limits = &ips_sv_limits;
1336 else if (strstr(boot_cpu_data.x86_model_id, "CPU L"))
1337 limits = &ips_lv_limits;
1338 else if (strstr(boot_cpu_data.x86_model_id, "CPU U"))
1339 limits = &ips_ulv_limits;
1340 else
1341 dev_info(&ips->dev->dev, "No CPUID match found.\n");
1342
1343 rdmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_power);
1344 tdp = turbo_power & TURBO_TDP_MASK;
1345
1346 /* Sanity check TDP against CPU */
1347 if (limits->mcp_power_limit != (tdp / 8) * 1000) {
1348 dev_warn(&ips->dev->dev, "Warning: CPU TDP doesn't match expected value (found %d, expected %d)\n",
1349 tdp / 8, limits->mcp_power_limit / 1000);
1350 }
1351
1352out:
1353 return limits;
1354}
1355
1356/**
1357 * ips_get_i915_syms - try to get GPU control methods from i915 driver
1358 * @ips: IPS driver
1359 *
1360 * The i915 driver exports several interfaces to allow the IPS driver to
1361 * monitor and control graphics turbo mode. If we can find them, we can
1362 * enable graphics turbo, otherwise we must disable it to avoid exceeding
1363 * thermal and power limits in the MCP.
1364 */
1365static bool ips_get_i915_syms(struct ips_driver *ips)
1366{
1367 ips->read_mch_val = symbol_get(i915_read_mch_val);
1368 if (!ips->read_mch_val)
1369 goto out_err;
1370 ips->gpu_raise = symbol_get(i915_gpu_raise);
1371 if (!ips->gpu_raise)
1372 goto out_put_mch;
1373 ips->gpu_lower = symbol_get(i915_gpu_lower);
1374 if (!ips->gpu_lower)
1375 goto out_put_raise;
1376 ips->gpu_busy = symbol_get(i915_gpu_busy);
1377 if (!ips->gpu_busy)
1378 goto out_put_lower;
1379 ips->gpu_turbo_disable = symbol_get(i915_gpu_turbo_disable);
1380 if (!ips->gpu_turbo_disable)
1381 goto out_put_busy;
1382
1383 return true;
1384
1385out_put_busy:
1386 symbol_put(i915_gpu_turbo_disable);
1387out_put_lower:
1388 symbol_put(i915_gpu_lower);
1389out_put_raise:
1390 symbol_put(i915_gpu_raise);
1391out_put_mch:
1392 symbol_put(i915_read_mch_val);
1393out_err:
1394 return false;
1395}
1396
1397static DEFINE_PCI_DEVICE_TABLE(ips_id_table) = {
1398 { PCI_DEVICE(PCI_VENDOR_ID_INTEL,
1399 PCI_DEVICE_ID_INTEL_THERMAL_SENSOR), },
1400 { 0, }
1401};
1402
1403MODULE_DEVICE_TABLE(pci, ips_id_table);
1404
1405static int ips_probe(struct pci_dev *dev, const struct pci_device_id *id)
1406{
1407 u64 platform_info;
1408 struct ips_driver *ips;
1409 u32 hts;
1410 int ret = 0;
1411 u16 htshi, trc, trc_required_mask;
1412 u8 tse;
1413
1414 ips = kzalloc(sizeof(struct ips_driver), GFP_KERNEL);
1415 if (!ips)
1416 return -ENOMEM;
1417
1418 pci_set_drvdata(dev, ips);
1419 ips->dev = dev;
1420
1421 ips->limits = ips_detect_cpu(ips);
1422 if (!ips->limits) {
1423 dev_info(&dev->dev, "IPS not supported on this CPU\n");
1424 ret = -ENXIO;
1425 goto error_free;
1426 }
1427
1428 spin_lock_init(&ips->turbo_status_lock);
1429
1430 if (!pci_resource_start(dev, 0)) {
1431 dev_err(&dev->dev, "TBAR not assigned, aborting\n");
1432 ret = -ENXIO;
1433 goto error_free;
1434 }
1435
1436 ret = pci_request_regions(dev, "ips thermal sensor");
1437 if (ret) {
1438 dev_err(&dev->dev, "thermal resource busy, aborting\n");
1439 goto error_free;
1440 }
1441
1442 ret = pci_enable_device(dev);
1443 if (ret) {
1444 dev_err(&dev->dev, "can't enable PCI device, aborting\n");
1445 goto error_free;
1446 }
1447
1448 ips->regmap = ioremap(pci_resource_start(dev, 0),
1449 pci_resource_len(dev, 0));
1450 if (!ips->regmap) {
1451 dev_err(&dev->dev, "failed to map thermal regs, aborting\n");
1452 ret = -EBUSY;
1453 goto error_release;
1454 }
1455
1456 tse = thm_readb(THM_TSE);
1457 if (tse != TSE_EN) {
1458 dev_err(&dev->dev, "thermal device not enabled (0x%02x), aborting\n", tse);
1459 ret = -ENXIO;
1460 goto error_unmap;
1461 }
1462
1463 trc = thm_readw(THM_TRC);
1464 trc_required_mask = TRC_CORE1_EN | TRC_CORE_PWR | TRC_MCH_EN;
1465 if ((trc & trc_required_mask) != trc_required_mask) {
1466 dev_err(&dev->dev, "thermal reporting for required devices not enabled, aborting\n");
1467 ret = -ENXIO;
1468 goto error_unmap;
1469 }
1470
1471 if (trc & TRC_CORE2_EN)
1472 ips->second_cpu = true;
1473
1474 if (!ips_get_i915_syms(ips)) {
1475 dev_err(&dev->dev, "failed to get i915 symbols, graphics turbo disabled\n");
1476 ips->gpu_turbo_enabled = false;
1477 } else {
1478 dev_dbg(&dev->dev, "graphics turbo enabled\n");
1479 ips->gpu_turbo_enabled = true;
1480 }
1481
1482 update_turbo_limits(ips);
1483 dev_dbg(&dev->dev, "max cpu power clamp: %dW\n",
1484 ips->mcp_power_limit / 10);
1485 dev_dbg(&dev->dev, "max core power clamp: %dW\n",
1486 ips->core_power_limit / 10);
1487 /* BIOS may update limits at runtime */
1488 if (thm_readl(THM_PSC) & PSP_PBRT)
1489 ips->poll_turbo_status = true;
1490
1491 /*
1492 * Check PLATFORM_INFO MSR to make sure this chip is
1493 * turbo capable.
1494 */
1495 rdmsrl(PLATFORM_INFO, platform_info);
1496 if (!(platform_info & PLATFORM_TDP)) {
1497 dev_err(&dev->dev, "platform indicates TDP override unavailable, aborting\n");
1498 ret = -ENODEV;
1499 goto error_unmap;
1500 }
1501
1502 /*
1503 * IRQ handler for ME interaction
1504 * Note: don't use MSI here as the PCH has bugs.
1505 */
1506 pci_disable_msi(dev);
1507 ret = request_irq(dev->irq, ips_irq_handler, IRQF_SHARED, "ips",
1508 ips);
1509 if (ret) {
1510 dev_err(&dev->dev, "request irq failed, aborting\n");
1511 goto error_unmap;
1512 }
1513
1514 /* Enable aux, hot & critical interrupts */
1515 thm_writeb(THM_TSPIEN, TSPIEN_AUX2_LOHI | TSPIEN_CRIT_LOHI |
1516 TSPIEN_HOT_LOHI | TSPIEN_AUX_LOHI);
1517 thm_writeb(THM_TEN, TEN_UPDATE_EN);
1518
1519 /* Collect adjustment values */
1520 ips->cta_val = thm_readw(THM_CTA);
1521 ips->pta_val = thm_readw(THM_PTA);
1522 ips->mgta_val = thm_readw(THM_MGTA);
1523
1524 /* Save turbo limits & ratios */
1525 rdmsrl(TURBO_POWER_CURRENT_LIMIT, ips->orig_turbo_limit);
1526
1527 ips_enable_cpu_turbo(ips);
1528 ips->cpu_turbo_enabled = true;
1529
1530 /* Set up the work queue and monitor/adjust threads */
1531 ips->monitor = kthread_run(ips_monitor, ips, "ips-monitor");
1532 if (IS_ERR(ips->monitor)) {
1533 dev_err(&dev->dev,
1534 "failed to create thermal monitor thread, aborting\n");
1535 ret = -ENOMEM;
1536 goto error_free_irq;
1537 }
1538
1539 ips->adjust = kthread_create(ips_adjust, ips, "ips-adjust");
1540 if (IS_ERR(ips->adjust)) {
1541 dev_err(&dev->dev,
1542 "failed to create thermal adjust thread, aborting\n");
1543 ret = -ENOMEM;
1544 goto error_thread_cleanup;
1545 }
1546
1547 hts = (ips->core_power_limit << HTS_PCPL_SHIFT) |
1548 (ips->mcp_temp_limit << HTS_PTL_SHIFT) | HTS_NVV;
1549 htshi = HTS2_PRST_RUNNING << HTS2_PRST_SHIFT;
1550
1551 thm_writew(THM_HTSHI, htshi);
1552 thm_writel(THM_HTS, hts);
1553
1554 ips_debugfs_init(ips);
1555
1556 dev_info(&dev->dev, "IPS driver initialized, MCP temp limit %d\n",
1557 ips->mcp_temp_limit);
1558 return ret;
1559
1560error_thread_cleanup:
1561 kthread_stop(ips->monitor);
1562error_free_irq:
1563 free_irq(ips->dev->irq, ips);
1564error_unmap:
1565 iounmap(ips->regmap);
1566error_release:
1567 pci_release_regions(dev);
1568error_free:
1569 kfree(ips);
1570 return ret;
1571}
1572
1573static void ips_remove(struct pci_dev *dev)
1574{
1575 struct ips_driver *ips = pci_get_drvdata(dev);
1576 u64 turbo_override;
1577
1578 if (!ips)
1579 return;
1580
1581 ips_debugfs_cleanup(ips);
1582
1583 /* Release i915 driver */
1584 if (ips->read_mch_val)
1585 symbol_put(i915_read_mch_val);
1586 if (ips->gpu_raise)
1587 symbol_put(i915_gpu_raise);
1588 if (ips->gpu_lower)
1589 symbol_put(i915_gpu_lower);
1590 if (ips->gpu_busy)
1591 symbol_put(i915_gpu_busy);
1592 if (ips->gpu_turbo_disable)
1593 symbol_put(i915_gpu_turbo_disable);
1594
1595 rdmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
1596 turbo_override &= ~(TURBO_TDC_OVR_EN | TURBO_TDP_OVR_EN);
1597 wrmsrl(TURBO_POWER_CURRENT_LIMIT, turbo_override);
1598 wrmsrl(TURBO_POWER_CURRENT_LIMIT, ips->orig_turbo_limit);
1599
1600 free_irq(ips->dev->irq, ips);
1601 if (ips->adjust)
1602 kthread_stop(ips->adjust);
1603 if (ips->monitor)
1604 kthread_stop(ips->monitor);
1605 iounmap(ips->regmap);
1606 pci_release_regions(dev);
1607 kfree(ips);
1608 dev_dbg(&dev->dev, "IPS driver removed\n");
1609}
1610
1611#ifdef CONFIG_PM
1612static int ips_suspend(struct pci_dev *dev, pm_message_t state)
1613{
1614 return 0;
1615}
1616
1617static int ips_resume(struct pci_dev *dev)
1618{
1619 return 0;
1620}
1621#else
1622#define ips_suspend NULL
1623#define ips_resume NULL
1624#endif /* CONFIG_PM */
1625
1626static void ips_shutdown(struct pci_dev *dev)
1627{
1628}
1629
1630static struct pci_driver ips_pci_driver = {
1631 .name = "intel ips",
1632 .id_table = ips_id_table,
1633 .probe = ips_probe,
1634 .remove = ips_remove,
1635 .suspend = ips_suspend,
1636 .resume = ips_resume,
1637 .shutdown = ips_shutdown,
1638};
1639
1640static int __init ips_init(void)
1641{
1642 return pci_register_driver(&ips_pci_driver);
1643}
1644module_init(ips_init);
1645
1646static void ips_exit(void)
1647{
1648 pci_unregister_driver(&ips_pci_driver);
1649 return;
1650}
1651module_exit(ips_exit);
1652
1653MODULE_LICENSE("GPL");
1654MODULE_AUTHOR("Jesse Barnes <jbarnes@virtuousgeek.org>");
1655MODULE_DESCRIPTION("Intelligent Power Sharing Driver");
diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
index 7f0028e1010b..8f8b072c4c7b 100644
--- a/include/drm/i915_drm.h
+++ b/include/drm/i915_drm.h
@@ -33,6 +33,15 @@
33 * subject to backwards-compatibility constraints. 33 * subject to backwards-compatibility constraints.
34 */ 34 */
35 35
36#ifdef __KERNEL__
37/* For use by IPS driver */
38extern unsigned long i915_read_mch_val(void);
39extern bool i915_gpu_raise(void);
40extern bool i915_gpu_lower(void);
41extern bool i915_gpu_busy(void);
42extern bool i915_gpu_turbo_disable(void);
43#endif
44
36/* Each region is a minimum of 16k, and there are at most 255 of them. 45/* Each region is a minimum of 16k, and there are at most 255 of them.
37 */ 46 */
38#define I915_NR_TEX_REGIONS 255 /* table size 2k - maximum due to use 47#define I915_NR_TEX_REGIONS 255 /* table size 2k - maximum due to use