aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/powercap/intel_rapl.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/powercap/intel_rapl.c')
-rw-r--r--drivers/powercap/intel_rapl.c264
1 files changed, 177 insertions, 87 deletions
diff --git a/drivers/powercap/intel_rapl.c b/drivers/powercap/intel_rapl.c
index 45e05b32f9b6..c71443c4f265 100644
--- a/drivers/powercap/intel_rapl.c
+++ b/drivers/powercap/intel_rapl.c
@@ -29,6 +29,7 @@
29#include <linux/sysfs.h> 29#include <linux/sysfs.h>
30#include <linux/cpu.h> 30#include <linux/cpu.h>
31#include <linux/powercap.h> 31#include <linux/powercap.h>
32#include <asm/iosf_mbi.h>
32 33
33#include <asm/processor.h> 34#include <asm/processor.h>
34#include <asm/cpu_device_id.h> 35#include <asm/cpu_device_id.h>
@@ -70,11 +71,6 @@
70#define RAPL_PRIMITIVE_DERIVED BIT(1) /* not from raw data */ 71#define RAPL_PRIMITIVE_DERIVED BIT(1) /* not from raw data */
71#define RAPL_PRIMITIVE_DUMMY BIT(2) 72#define RAPL_PRIMITIVE_DUMMY BIT(2)
72 73
73/* scale RAPL units to avoid floating point math inside kernel */
74#define POWER_UNIT_SCALE (1000000)
75#define ENERGY_UNIT_SCALE (1000000)
76#define TIME_UNIT_SCALE (1000000)
77
78#define TIME_WINDOW_MAX_MSEC 40000 74#define TIME_WINDOW_MAX_MSEC 40000
79#define TIME_WINDOW_MIN_MSEC 250 75#define TIME_WINDOW_MIN_MSEC 250
80 76
@@ -175,9 +171,9 @@ struct rapl_package {
175 unsigned int id; /* physical package/socket id */ 171 unsigned int id; /* physical package/socket id */
176 unsigned int nr_domains; 172 unsigned int nr_domains;
177 unsigned long domain_map; /* bit map of active domains */ 173 unsigned long domain_map; /* bit map of active domains */
178 unsigned int power_unit_divisor; 174 unsigned int power_unit;
179 unsigned int energy_unit_divisor; 175 unsigned int energy_unit;
180 unsigned int time_unit_divisor; 176 unsigned int time_unit;
181 struct rapl_domain *domains; /* array of domains, sized at runtime */ 177 struct rapl_domain *domains; /* array of domains, sized at runtime */
182 struct powercap_zone *power_zone; /* keep track of parent zone */ 178 struct powercap_zone *power_zone; /* keep track of parent zone */
183 int nr_cpus; /* active cpus on the package, topology info is lost during 179 int nr_cpus; /* active cpus on the package, topology info is lost during
@@ -188,6 +184,18 @@ struct rapl_package {
188 */ 184 */
189 struct list_head plist; 185 struct list_head plist;
190}; 186};
187
188struct rapl_defaults {
189 int (*check_unit)(struct rapl_package *rp, int cpu);
190 void (*set_floor_freq)(struct rapl_domain *rd, bool mode);
191 u64 (*compute_time_window)(struct rapl_package *rp, u64 val,
192 bool to_raw);
193};
194static struct rapl_defaults *rapl_defaults;
195
196/* Sideband MBI registers */
197#define IOSF_CPU_POWER_BUDGET_CTL (0x2)
198
191#define PACKAGE_PLN_INT_SAVED BIT(0) 199#define PACKAGE_PLN_INT_SAVED BIT(0)
192#define MAX_PRIM_NAME (32) 200#define MAX_PRIM_NAME (32)
193 201
@@ -339,23 +347,13 @@ static int find_nr_power_limit(struct rapl_domain *rd)
339static int set_domain_enable(struct powercap_zone *power_zone, bool mode) 347static int set_domain_enable(struct powercap_zone *power_zone, bool mode)
340{ 348{
341 struct rapl_domain *rd = power_zone_to_rapl_domain(power_zone); 349 struct rapl_domain *rd = power_zone_to_rapl_domain(power_zone);
342 int nr_powerlimit;
343 350
344 if (rd->state & DOMAIN_STATE_BIOS_LOCKED) 351 if (rd->state & DOMAIN_STATE_BIOS_LOCKED)
345 return -EACCES; 352 return -EACCES;
353
346 get_online_cpus(); 354 get_online_cpus();
347 nr_powerlimit = find_nr_power_limit(rd);
348 /* here we activate/deactivate the hardware for power limiting */
349 rapl_write_data_raw(rd, PL1_ENABLE, mode); 355 rapl_write_data_raw(rd, PL1_ENABLE, mode);
350 /* always enable clamp such that p-state can go below OS requested 356 rapl_defaults->set_floor_freq(rd, mode);
351 * range. power capping priority over guranteed frequency.
352 */
353 rapl_write_data_raw(rd, PL1_CLAMP, mode);
354 /* some domains have pl2 */
355 if (nr_powerlimit > 1) {
356 rapl_write_data_raw(rd, PL2_ENABLE, mode);
357 rapl_write_data_raw(rd, PL2_CLAMP, mode);
358 }
359 put_online_cpus(); 357 put_online_cpus();
360 358
361 return 0; 359 return 0;
@@ -653,9 +651,7 @@ static void rapl_init_domains(struct rapl_package *rp)
653static u64 rapl_unit_xlate(int package, enum unit_type type, u64 value, 651static u64 rapl_unit_xlate(int package, enum unit_type type, u64 value,
654 int to_raw) 652 int to_raw)
655{ 653{
656 u64 divisor = 1; 654 u64 units = 1;
657 int scale = 1; /* scale to user friendly data without floating point */
658 u64 f, y; /* fraction and exp. used for time unit */
659 struct rapl_package *rp; 655 struct rapl_package *rp;
660 656
661 rp = find_package_by_id(package); 657 rp = find_package_by_id(package);
@@ -664,42 +660,24 @@ static u64 rapl_unit_xlate(int package, enum unit_type type, u64 value,
664 660
665 switch (type) { 661 switch (type) {
666 case POWER_UNIT: 662 case POWER_UNIT:
667 divisor = rp->power_unit_divisor; 663 units = rp->power_unit;
668 scale = POWER_UNIT_SCALE;
669 break; 664 break;
670 case ENERGY_UNIT: 665 case ENERGY_UNIT:
671 scale = ENERGY_UNIT_SCALE; 666 units = rp->energy_unit;
672 divisor = rp->energy_unit_divisor;
673 break; 667 break;
674 case TIME_UNIT: 668 case TIME_UNIT:
675 divisor = rp->time_unit_divisor; 669 return rapl_defaults->compute_time_window(rp, value, to_raw);
676 scale = TIME_UNIT_SCALE;
677 /* special processing based on 2^Y*(1+F)/4 = val/divisor, refer
678 * to Intel Software Developer's manual Vol. 3a, CH 14.7.4.
679 */
680 if (!to_raw) {
681 f = (value & 0x60) >> 5;
682 y = value & 0x1f;
683 value = (1 << y) * (4 + f) * scale / 4;
684 return div64_u64(value, divisor);
685 } else {
686 do_div(value, scale);
687 value *= divisor;
688 y = ilog2(value);
689 f = div64_u64(4 * (value - (1 << y)), 1 << y);
690 value = (y & 0x1f) | ((f & 0x3) << 5);
691 return value;
692 }
693 break;
694 case ARBITRARY_UNIT: 670 case ARBITRARY_UNIT:
695 default: 671 default:
696 return value; 672 return value;
697 }; 673 };
698 674
699 if (to_raw) 675 if (to_raw)
700 return div64_u64(value * divisor, scale); 676 return div64_u64(value, units);
701 else 677
702 return div64_u64(value * scale, divisor); 678 value *= units;
679
680 return value;
703} 681}
704 682
705/* in the order of enum rapl_primitives */ 683/* in the order of enum rapl_primitives */
@@ -833,12 +811,18 @@ static int rapl_write_data_raw(struct rapl_domain *rd,
833 return 0; 811 return 0;
834} 812}
835 813
836static const struct x86_cpu_id energy_unit_quirk_ids[] = { 814/*
837 { X86_VENDOR_INTEL, 6, 0x37},/* Valleyview */ 815 * Raw RAPL data stored in MSRs are in certain scales. We need to
838 {} 816 * convert them into standard units based on the units reported in
839}; 817 * the RAPL unit MSRs. This is specific to CPUs as the method to
840 818 * calculate units differ on different CPUs.
841static int rapl_check_unit(struct rapl_package *rp, int cpu) 819 * We convert the units to below format based on CPUs.
820 * i.e.
821 * energy unit: microJoules : Represented in microJoules by default
822 * power unit : microWatts : Represented in milliWatts by default
823 * time unit : microseconds: Represented in seconds by default
824 */
825static int rapl_check_unit_core(struct rapl_package *rp, int cpu)
842{ 826{
843 u64 msr_val; 827 u64 msr_val;
844 u32 value; 828 u32 value;
@@ -849,36 +833,47 @@ static int rapl_check_unit(struct rapl_package *rp, int cpu)
849 return -ENODEV; 833 return -ENODEV;
850 } 834 }
851 835
852 /* Raw RAPL data stored in MSRs are in certain scales. We need to
853 * convert them into standard units based on the divisors reported in
854 * the RAPL unit MSRs.
855 * i.e.
856 * energy unit: 1/enery_unit_divisor Joules
857 * power unit: 1/power_unit_divisor Watts
858 * time unit: 1/time_unit_divisor Seconds
859 */
860 value = (msr_val & ENERGY_UNIT_MASK) >> ENERGY_UNIT_OFFSET; 836 value = (msr_val & ENERGY_UNIT_MASK) >> ENERGY_UNIT_OFFSET;
861 /* some CPUs have different way to calculate energy unit */ 837 rp->energy_unit = 1000000 / (1 << value);
862 if (x86_match_cpu(energy_unit_quirk_ids))
863 rp->energy_unit_divisor = 1000000 / (1 << value);
864 else
865 rp->energy_unit_divisor = 1 << value;
866 838
867 value = (msr_val & POWER_UNIT_MASK) >> POWER_UNIT_OFFSET; 839 value = (msr_val & POWER_UNIT_MASK) >> POWER_UNIT_OFFSET;
868 rp->power_unit_divisor = 1 << value; 840 rp->power_unit = 1000000 / (1 << value);
869 841
870 value = (msr_val & TIME_UNIT_MASK) >> TIME_UNIT_OFFSET; 842 value = (msr_val & TIME_UNIT_MASK) >> TIME_UNIT_OFFSET;
871 rp->time_unit_divisor = 1 << value; 843 rp->time_unit = 1000000 / (1 << value);
872 844
873 pr_debug("Physical package %d units: energy=%d, time=%d, power=%d\n", 845 pr_debug("Core CPU package %d energy=%duJ, time=%dus, power=%duW\n",
874 rp->id, 846 rp->id, rp->energy_unit, rp->time_unit, rp->power_unit);
875 rp->energy_unit_divisor,
876 rp->time_unit_divisor,
877 rp->power_unit_divisor);
878 847
879 return 0; 848 return 0;
880} 849}
881 850
851static int rapl_check_unit_atom(struct rapl_package *rp, int cpu)
852{
853 u64 msr_val;
854 u32 value;
855
856 if (rdmsrl_safe_on_cpu(cpu, MSR_RAPL_POWER_UNIT, &msr_val)) {
857 pr_err("Failed to read power unit MSR 0x%x on CPU %d, exit.\n",
858 MSR_RAPL_POWER_UNIT, cpu);
859 return -ENODEV;
860 }
861 value = (msr_val & ENERGY_UNIT_MASK) >> ENERGY_UNIT_OFFSET;
862 rp->energy_unit = 1 << value;
863
864 value = (msr_val & POWER_UNIT_MASK) >> POWER_UNIT_OFFSET;
865 rp->power_unit = (1 << value) * 1000;
866
867 value = (msr_val & TIME_UNIT_MASK) >> TIME_UNIT_OFFSET;
868 rp->time_unit = 1000000 / (1 << value);
869
870 pr_debug("Atom package %d energy=%duJ, time=%dus, power=%duW\n",
871 rp->id, rp->energy_unit, rp->time_unit, rp->power_unit);
872
873 return 0;
874}
875
876
882/* REVISIT: 877/* REVISIT:
883 * When package power limit is set artificially low by RAPL, LVT 878 * When package power limit is set artificially low by RAPL, LVT
884 * thermal interrupt for package power limit should be ignored 879 * thermal interrupt for package power limit should be ignored
@@ -946,16 +941,107 @@ static void package_power_limit_irq_restore(int package_id)
946 wrmsr_on_cpu(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h); 941 wrmsr_on_cpu(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h);
947} 942}
948 943
944static void set_floor_freq_default(struct rapl_domain *rd, bool mode)
945{
946 int nr_powerlimit = find_nr_power_limit(rd);
947
948 /* always enable clamp such that p-state can go below OS requested
949 * range. power capping priority over guranteed frequency.
950 */
951 rapl_write_data_raw(rd, PL1_CLAMP, mode);
952
953 /* some domains have pl2 */
954 if (nr_powerlimit > 1) {
955 rapl_write_data_raw(rd, PL2_ENABLE, mode);
956 rapl_write_data_raw(rd, PL2_CLAMP, mode);
957 }
958}
959
960static void set_floor_freq_atom(struct rapl_domain *rd, bool enable)
961{
962 static u32 power_ctrl_orig_val;
963 u32 mdata;
964
965 if (!power_ctrl_orig_val)
966 iosf_mbi_read(BT_MBI_UNIT_PMC, BT_MBI_PMC_READ,
967 IOSF_CPU_POWER_BUDGET_CTL, &power_ctrl_orig_val);
968 mdata = power_ctrl_orig_val;
969 if (enable) {
970 mdata &= ~(0x7f << 8);
971 mdata |= 1 << 8;
972 }
973 iosf_mbi_write(BT_MBI_UNIT_PMC, BT_MBI_PMC_WRITE,
974 IOSF_CPU_POWER_BUDGET_CTL, mdata);
975}
976
977static u64 rapl_compute_time_window_core(struct rapl_package *rp, u64 value,
978 bool to_raw)
979{
980 u64 f, y; /* fraction and exp. used for time unit */
981
982 /*
983 * Special processing based on 2^Y*(1+F/4), refer
984 * to Intel Software Developer's manual Vol.3B: CH 14.9.3.
985 */
986 if (!to_raw) {
987 f = (value & 0x60) >> 5;
988 y = value & 0x1f;
989 value = (1 << y) * (4 + f) * rp->time_unit / 4;
990 } else {
991 do_div(value, rp->time_unit);
992 y = ilog2(value);
993 f = div64_u64(4 * (value - (1 << y)), 1 << y);
994 value = (y & 0x1f) | ((f & 0x3) << 5);
995 }
996 return value;
997}
998
999static u64 rapl_compute_time_window_atom(struct rapl_package *rp, u64 value,
1000 bool to_raw)
1001{
1002 /*
1003 * Atom time unit encoding is straight forward val * time_unit,
1004 * where time_unit is default to 1 sec. Never 0.
1005 */
1006 if (!to_raw)
1007 return (value) ? value *= rp->time_unit : rp->time_unit;
1008 else
1009 value = div64_u64(value, rp->time_unit);
1010
1011 return value;
1012}
1013
1014static const struct rapl_defaults rapl_defaults_core = {
1015 .check_unit = rapl_check_unit_core,
1016 .set_floor_freq = set_floor_freq_default,
1017 .compute_time_window = rapl_compute_time_window_core,
1018};
1019
1020static const struct rapl_defaults rapl_defaults_atom = {
1021 .check_unit = rapl_check_unit_atom,
1022 .set_floor_freq = set_floor_freq_atom,
1023 .compute_time_window = rapl_compute_time_window_atom,
1024};
1025
1026#define RAPL_CPU(_model, _ops) { \
1027 .vendor = X86_VENDOR_INTEL, \
1028 .family = 6, \
1029 .model = _model, \
1030 .driver_data = (kernel_ulong_t)&_ops, \
1031 }
1032
949static const struct x86_cpu_id rapl_ids[] = { 1033static const struct x86_cpu_id rapl_ids[] = {
950 { X86_VENDOR_INTEL, 6, 0x2a},/* Sandy Bridge */ 1034 RAPL_CPU(0x2a, rapl_defaults_core),/* Sandy Bridge */
951 { X86_VENDOR_INTEL, 6, 0x2d},/* Sandy Bridge EP */ 1035 RAPL_CPU(0x2d, rapl_defaults_core),/* Sandy Bridge EP */
952 { X86_VENDOR_INTEL, 6, 0x37},/* Valleyview */ 1036 RAPL_CPU(0x37, rapl_defaults_atom),/* Valleyview */
953 { X86_VENDOR_INTEL, 6, 0x3a},/* Ivy Bridge */ 1037 RAPL_CPU(0x3a, rapl_defaults_core),/* Ivy Bridge */
954 { X86_VENDOR_INTEL, 6, 0x3c},/* Haswell */ 1038 RAPL_CPU(0x3c, rapl_defaults_core),/* Haswell */
955 { X86_VENDOR_INTEL, 6, 0x3d},/* Broadwell */ 1039 RAPL_CPU(0x3d, rapl_defaults_core),/* Broadwell */
956 { X86_VENDOR_INTEL, 6, 0x3f},/* Haswell */ 1040 RAPL_CPU(0x3f, rapl_defaults_core),/* Haswell */
957 { X86_VENDOR_INTEL, 6, 0x45},/* Haswell ULT */ 1041 RAPL_CPU(0x45, rapl_defaults_core),/* Haswell ULT */
958 /* TODO: Add more CPU IDs after testing */ 1042 RAPL_CPU(0x4C, rapl_defaults_atom),/* Braswell */
1043 RAPL_CPU(0x4A, rapl_defaults_atom),/* Tangier */
1044 RAPL_CPU(0x5A, rapl_defaults_atom),/* Annidale */
959 {} 1045 {}
960}; 1046};
961MODULE_DEVICE_TABLE(x86cpu, rapl_ids); 1047MODULE_DEVICE_TABLE(x86cpu, rapl_ids);
@@ -1241,7 +1327,7 @@ static int rapl_detect_topology(void)
1241 1327
1242 /* check if the package contains valid domains */ 1328 /* check if the package contains valid domains */
1243 if (rapl_detect_domains(new_package, i) || 1329 if (rapl_detect_domains(new_package, i) ||
1244 rapl_check_unit(new_package, i)) { 1330 rapl_defaults->check_unit(new_package, i)) {
1245 kfree(new_package->domains); 1331 kfree(new_package->domains);
1246 kfree(new_package); 1332 kfree(new_package);
1247 /* free up the packages already initialized */ 1333 /* free up the packages already initialized */
@@ -1296,7 +1382,7 @@ static int rapl_add_package(int cpu)
1296 rp->nr_cpus = 1; 1382 rp->nr_cpus = 1;
1297 /* check if the package contains valid domains */ 1383 /* check if the package contains valid domains */
1298 if (rapl_detect_domains(rp, cpu) || 1384 if (rapl_detect_domains(rp, cpu) ||
1299 rapl_check_unit(rp, cpu)) { 1385 rapl_defaults->check_unit(rp, cpu)) {
1300 ret = -ENODEV; 1386 ret = -ENODEV;
1301 goto err_free_package; 1387 goto err_free_package;
1302 } 1388 }
@@ -1358,14 +1444,18 @@ static struct notifier_block rapl_cpu_notifier = {
1358static int __init rapl_init(void) 1444static int __init rapl_init(void)
1359{ 1445{
1360 int ret = 0; 1446 int ret = 0;
1447 const struct x86_cpu_id *id;
1361 1448
1362 if (!x86_match_cpu(rapl_ids)) { 1449 id = x86_match_cpu(rapl_ids);
1450 if (!id) {
1363 pr_err("driver does not support CPU family %d model %d\n", 1451 pr_err("driver does not support CPU family %d model %d\n",
1364 boot_cpu_data.x86, boot_cpu_data.x86_model); 1452 boot_cpu_data.x86, boot_cpu_data.x86_model);
1365 1453
1366 return -ENODEV; 1454 return -ENODEV;
1367 } 1455 }
1368 1456
1457 rapl_defaults = (struct rapl_defaults *)id->driver_data;
1458
1369 cpu_notifier_register_begin(); 1459 cpu_notifier_register_begin();
1370 1460
1371 /* prevent CPU hotplug during detection */ 1461 /* prevent CPU hotplug during detection */