diff options
Diffstat (limited to 'drivers/powercap/intel_rapl.c')
-rw-r--r-- | drivers/powercap/intel_rapl.c | 264 |
1 files changed, 177 insertions, 87 deletions
diff --git a/drivers/powercap/intel_rapl.c b/drivers/powercap/intel_rapl.c index 45e05b32f9b6..c71443c4f265 100644 --- a/drivers/powercap/intel_rapl.c +++ b/drivers/powercap/intel_rapl.c | |||
@@ -29,6 +29,7 @@ | |||
29 | #include <linux/sysfs.h> | 29 | #include <linux/sysfs.h> |
30 | #include <linux/cpu.h> | 30 | #include <linux/cpu.h> |
31 | #include <linux/powercap.h> | 31 | #include <linux/powercap.h> |
32 | #include <asm/iosf_mbi.h> | ||
32 | 33 | ||
33 | #include <asm/processor.h> | 34 | #include <asm/processor.h> |
34 | #include <asm/cpu_device_id.h> | 35 | #include <asm/cpu_device_id.h> |
@@ -70,11 +71,6 @@ | |||
70 | #define RAPL_PRIMITIVE_DERIVED BIT(1) /* not from raw data */ | 71 | #define RAPL_PRIMITIVE_DERIVED BIT(1) /* not from raw data */ |
71 | #define RAPL_PRIMITIVE_DUMMY BIT(2) | 72 | #define RAPL_PRIMITIVE_DUMMY BIT(2) |
72 | 73 | ||
73 | /* scale RAPL units to avoid floating point math inside kernel */ | ||
74 | #define POWER_UNIT_SCALE (1000000) | ||
75 | #define ENERGY_UNIT_SCALE (1000000) | ||
76 | #define TIME_UNIT_SCALE (1000000) | ||
77 | |||
78 | #define TIME_WINDOW_MAX_MSEC 40000 | 74 | #define TIME_WINDOW_MAX_MSEC 40000 |
79 | #define TIME_WINDOW_MIN_MSEC 250 | 75 | #define TIME_WINDOW_MIN_MSEC 250 |
80 | 76 | ||
@@ -175,9 +171,9 @@ struct rapl_package { | |||
175 | unsigned int id; /* physical package/socket id */ | 171 | unsigned int id; /* physical package/socket id */ |
176 | unsigned int nr_domains; | 172 | unsigned int nr_domains; |
177 | unsigned long domain_map; /* bit map of active domains */ | 173 | unsigned long domain_map; /* bit map of active domains */ |
178 | unsigned int power_unit_divisor; | 174 | unsigned int power_unit; |
179 | unsigned int energy_unit_divisor; | 175 | unsigned int energy_unit; |
180 | unsigned int time_unit_divisor; | 176 | unsigned int time_unit; |
181 | struct rapl_domain *domains; /* array of domains, sized at runtime */ | 177 | struct rapl_domain *domains; /* array of domains, sized at runtime */ |
182 | struct powercap_zone *power_zone; /* keep track of parent zone */ | 178 | struct powercap_zone *power_zone; /* keep track of parent zone */ |
183 | int nr_cpus; /* active cpus on the package, topology info is lost during | 179 | int nr_cpus; /* active cpus on the package, topology info is lost during |
@@ -188,6 +184,18 @@ struct rapl_package { | |||
188 | */ | 184 | */ |
189 | struct list_head plist; | 185 | struct list_head plist; |
190 | }; | 186 | }; |
187 | |||
188 | struct rapl_defaults { | ||
189 | int (*check_unit)(struct rapl_package *rp, int cpu); | ||
190 | void (*set_floor_freq)(struct rapl_domain *rd, bool mode); | ||
191 | u64 (*compute_time_window)(struct rapl_package *rp, u64 val, | ||
192 | bool to_raw); | ||
193 | }; | ||
194 | static struct rapl_defaults *rapl_defaults; | ||
195 | |||
196 | /* Sideband MBI registers */ | ||
197 | #define IOSF_CPU_POWER_BUDGET_CTL (0x2) | ||
198 | |||
191 | #define PACKAGE_PLN_INT_SAVED BIT(0) | 199 | #define PACKAGE_PLN_INT_SAVED BIT(0) |
192 | #define MAX_PRIM_NAME (32) | 200 | #define MAX_PRIM_NAME (32) |
193 | 201 | ||
@@ -339,23 +347,13 @@ static int find_nr_power_limit(struct rapl_domain *rd) | |||
339 | static int set_domain_enable(struct powercap_zone *power_zone, bool mode) | 347 | static int set_domain_enable(struct powercap_zone *power_zone, bool mode) |
340 | { | 348 | { |
341 | struct rapl_domain *rd = power_zone_to_rapl_domain(power_zone); | 349 | struct rapl_domain *rd = power_zone_to_rapl_domain(power_zone); |
342 | int nr_powerlimit; | ||
343 | 350 | ||
344 | if (rd->state & DOMAIN_STATE_BIOS_LOCKED) | 351 | if (rd->state & DOMAIN_STATE_BIOS_LOCKED) |
345 | return -EACCES; | 352 | return -EACCES; |
353 | |||
346 | get_online_cpus(); | 354 | get_online_cpus(); |
347 | nr_powerlimit = find_nr_power_limit(rd); | ||
348 | /* here we activate/deactivate the hardware for power limiting */ | ||
349 | rapl_write_data_raw(rd, PL1_ENABLE, mode); | 355 | rapl_write_data_raw(rd, PL1_ENABLE, mode); |
350 | /* always enable clamp such that p-state can go below OS requested | 356 | rapl_defaults->set_floor_freq(rd, mode); |
351 | * range. power capping priority over guranteed frequency. | ||
352 | */ | ||
353 | rapl_write_data_raw(rd, PL1_CLAMP, mode); | ||
354 | /* some domains have pl2 */ | ||
355 | if (nr_powerlimit > 1) { | ||
356 | rapl_write_data_raw(rd, PL2_ENABLE, mode); | ||
357 | rapl_write_data_raw(rd, PL2_CLAMP, mode); | ||
358 | } | ||
359 | put_online_cpus(); | 357 | put_online_cpus(); |
360 | 358 | ||
361 | return 0; | 359 | return 0; |
@@ -653,9 +651,7 @@ static void rapl_init_domains(struct rapl_package *rp) | |||
653 | static u64 rapl_unit_xlate(int package, enum unit_type type, u64 value, | 651 | static u64 rapl_unit_xlate(int package, enum unit_type type, u64 value, |
654 | int to_raw) | 652 | int to_raw) |
655 | { | 653 | { |
656 | u64 divisor = 1; | 654 | u64 units = 1; |
657 | int scale = 1; /* scale to user friendly data without floating point */ | ||
658 | u64 f, y; /* fraction and exp. used for time unit */ | ||
659 | struct rapl_package *rp; | 655 | struct rapl_package *rp; |
660 | 656 | ||
661 | rp = find_package_by_id(package); | 657 | rp = find_package_by_id(package); |
@@ -664,42 +660,24 @@ static u64 rapl_unit_xlate(int package, enum unit_type type, u64 value, | |||
664 | 660 | ||
665 | switch (type) { | 661 | switch (type) { |
666 | case POWER_UNIT: | 662 | case POWER_UNIT: |
667 | divisor = rp->power_unit_divisor; | 663 | units = rp->power_unit; |
668 | scale = POWER_UNIT_SCALE; | ||
669 | break; | 664 | break; |
670 | case ENERGY_UNIT: | 665 | case ENERGY_UNIT: |
671 | scale = ENERGY_UNIT_SCALE; | 666 | units = rp->energy_unit; |
672 | divisor = rp->energy_unit_divisor; | ||
673 | break; | 667 | break; |
674 | case TIME_UNIT: | 668 | case TIME_UNIT: |
675 | divisor = rp->time_unit_divisor; | 669 | return rapl_defaults->compute_time_window(rp, value, to_raw); |
676 | scale = TIME_UNIT_SCALE; | ||
677 | /* special processing based on 2^Y*(1+F)/4 = val/divisor, refer | ||
678 | * to Intel Software Developer's manual Vol. 3a, CH 14.7.4. | ||
679 | */ | ||
680 | if (!to_raw) { | ||
681 | f = (value & 0x60) >> 5; | ||
682 | y = value & 0x1f; | ||
683 | value = (1 << y) * (4 + f) * scale / 4; | ||
684 | return div64_u64(value, divisor); | ||
685 | } else { | ||
686 | do_div(value, scale); | ||
687 | value *= divisor; | ||
688 | y = ilog2(value); | ||
689 | f = div64_u64(4 * (value - (1 << y)), 1 << y); | ||
690 | value = (y & 0x1f) | ((f & 0x3) << 5); | ||
691 | return value; | ||
692 | } | ||
693 | break; | ||
694 | case ARBITRARY_UNIT: | 670 | case ARBITRARY_UNIT: |
695 | default: | 671 | default: |
696 | return value; | 672 | return value; |
697 | }; | 673 | }; |
698 | 674 | ||
699 | if (to_raw) | 675 | if (to_raw) |
700 | return div64_u64(value * divisor, scale); | 676 | return div64_u64(value, units); |
701 | else | 677 | |
702 | return div64_u64(value * scale, divisor); | 678 | value *= units; |
679 | |||
680 | return value; | ||
703 | } | 681 | } |
704 | 682 | ||
705 | /* in the order of enum rapl_primitives */ | 683 | /* in the order of enum rapl_primitives */ |
@@ -833,12 +811,18 @@ static int rapl_write_data_raw(struct rapl_domain *rd, | |||
833 | return 0; | 811 | return 0; |
834 | } | 812 | } |
835 | 813 | ||
836 | static const struct x86_cpu_id energy_unit_quirk_ids[] = { | 814 | /* |
837 | { X86_VENDOR_INTEL, 6, 0x37},/* Valleyview */ | 815 | * Raw RAPL data stored in MSRs are in certain scales. We need to |
838 | {} | 816 | * convert them into standard units based on the units reported in |
839 | }; | 817 | * the RAPL unit MSRs. This is specific to CPUs as the method to |
840 | 818 | * calculate units differ on different CPUs. | |
841 | static int rapl_check_unit(struct rapl_package *rp, int cpu) | 819 | * We convert the units to below format based on CPUs. |
820 | * i.e. | ||
821 | * energy unit: microJoules : Represented in microJoules by default | ||
822 | * power unit : microWatts : Represented in milliWatts by default | ||
823 | * time unit : microseconds: Represented in seconds by default | ||
824 | */ | ||
825 | static int rapl_check_unit_core(struct rapl_package *rp, int cpu) | ||
842 | { | 826 | { |
843 | u64 msr_val; | 827 | u64 msr_val; |
844 | u32 value; | 828 | u32 value; |
@@ -849,36 +833,47 @@ static int rapl_check_unit(struct rapl_package *rp, int cpu) | |||
849 | return -ENODEV; | 833 | return -ENODEV; |
850 | } | 834 | } |
851 | 835 | ||
852 | /* Raw RAPL data stored in MSRs are in certain scales. We need to | ||
853 | * convert them into standard units based on the divisors reported in | ||
854 | * the RAPL unit MSRs. | ||
855 | * i.e. | ||
856 | * energy unit: 1/enery_unit_divisor Joules | ||
857 | * power unit: 1/power_unit_divisor Watts | ||
858 | * time unit: 1/time_unit_divisor Seconds | ||
859 | */ | ||
860 | value = (msr_val & ENERGY_UNIT_MASK) >> ENERGY_UNIT_OFFSET; | 836 | value = (msr_val & ENERGY_UNIT_MASK) >> ENERGY_UNIT_OFFSET; |
861 | /* some CPUs have different way to calculate energy unit */ | 837 | rp->energy_unit = 1000000 / (1 << value); |
862 | if (x86_match_cpu(energy_unit_quirk_ids)) | ||
863 | rp->energy_unit_divisor = 1000000 / (1 << value); | ||
864 | else | ||
865 | rp->energy_unit_divisor = 1 << value; | ||
866 | 838 | ||
867 | value = (msr_val & POWER_UNIT_MASK) >> POWER_UNIT_OFFSET; | 839 | value = (msr_val & POWER_UNIT_MASK) >> POWER_UNIT_OFFSET; |
868 | rp->power_unit_divisor = 1 << value; | 840 | rp->power_unit = 1000000 / (1 << value); |
869 | 841 | ||
870 | value = (msr_val & TIME_UNIT_MASK) >> TIME_UNIT_OFFSET; | 842 | value = (msr_val & TIME_UNIT_MASK) >> TIME_UNIT_OFFSET; |
871 | rp->time_unit_divisor = 1 << value; | 843 | rp->time_unit = 1000000 / (1 << value); |
872 | 844 | ||
873 | pr_debug("Physical package %d units: energy=%d, time=%d, power=%d\n", | 845 | pr_debug("Core CPU package %d energy=%duJ, time=%dus, power=%duW\n", |
874 | rp->id, | 846 | rp->id, rp->energy_unit, rp->time_unit, rp->power_unit); |
875 | rp->energy_unit_divisor, | ||
876 | rp->time_unit_divisor, | ||
877 | rp->power_unit_divisor); | ||
878 | 847 | ||
879 | return 0; | 848 | return 0; |
880 | } | 849 | } |
881 | 850 | ||
851 | static int rapl_check_unit_atom(struct rapl_package *rp, int cpu) | ||
852 | { | ||
853 | u64 msr_val; | ||
854 | u32 value; | ||
855 | |||
856 | if (rdmsrl_safe_on_cpu(cpu, MSR_RAPL_POWER_UNIT, &msr_val)) { | ||
857 | pr_err("Failed to read power unit MSR 0x%x on CPU %d, exit.\n", | ||
858 | MSR_RAPL_POWER_UNIT, cpu); | ||
859 | return -ENODEV; | ||
860 | } | ||
861 | value = (msr_val & ENERGY_UNIT_MASK) >> ENERGY_UNIT_OFFSET; | ||
862 | rp->energy_unit = 1 << value; | ||
863 | |||
864 | value = (msr_val & POWER_UNIT_MASK) >> POWER_UNIT_OFFSET; | ||
865 | rp->power_unit = (1 << value) * 1000; | ||
866 | |||
867 | value = (msr_val & TIME_UNIT_MASK) >> TIME_UNIT_OFFSET; | ||
868 | rp->time_unit = 1000000 / (1 << value); | ||
869 | |||
870 | pr_debug("Atom package %d energy=%duJ, time=%dus, power=%duW\n", | ||
871 | rp->id, rp->energy_unit, rp->time_unit, rp->power_unit); | ||
872 | |||
873 | return 0; | ||
874 | } | ||
875 | |||
876 | |||
882 | /* REVISIT: | 877 | /* REVISIT: |
883 | * When package power limit is set artificially low by RAPL, LVT | 878 | * When package power limit is set artificially low by RAPL, LVT |
884 | * thermal interrupt for package power limit should be ignored | 879 | * thermal interrupt for package power limit should be ignored |
@@ -946,16 +941,107 @@ static void package_power_limit_irq_restore(int package_id) | |||
946 | wrmsr_on_cpu(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h); | 941 | wrmsr_on_cpu(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h); |
947 | } | 942 | } |
948 | 943 | ||
944 | static void set_floor_freq_default(struct rapl_domain *rd, bool mode) | ||
945 | { | ||
946 | int nr_powerlimit = find_nr_power_limit(rd); | ||
947 | |||
948 | /* always enable clamp such that p-state can go below OS requested | ||
949 | * range. power capping priority over guranteed frequency. | ||
950 | */ | ||
951 | rapl_write_data_raw(rd, PL1_CLAMP, mode); | ||
952 | |||
953 | /* some domains have pl2 */ | ||
954 | if (nr_powerlimit > 1) { | ||
955 | rapl_write_data_raw(rd, PL2_ENABLE, mode); | ||
956 | rapl_write_data_raw(rd, PL2_CLAMP, mode); | ||
957 | } | ||
958 | } | ||
959 | |||
960 | static void set_floor_freq_atom(struct rapl_domain *rd, bool enable) | ||
961 | { | ||
962 | static u32 power_ctrl_orig_val; | ||
963 | u32 mdata; | ||
964 | |||
965 | if (!power_ctrl_orig_val) | ||
966 | iosf_mbi_read(BT_MBI_UNIT_PMC, BT_MBI_PMC_READ, | ||
967 | IOSF_CPU_POWER_BUDGET_CTL, &power_ctrl_orig_val); | ||
968 | mdata = power_ctrl_orig_val; | ||
969 | if (enable) { | ||
970 | mdata &= ~(0x7f << 8); | ||
971 | mdata |= 1 << 8; | ||
972 | } | ||
973 | iosf_mbi_write(BT_MBI_UNIT_PMC, BT_MBI_PMC_WRITE, | ||
974 | IOSF_CPU_POWER_BUDGET_CTL, mdata); | ||
975 | } | ||
976 | |||
977 | static u64 rapl_compute_time_window_core(struct rapl_package *rp, u64 value, | ||
978 | bool to_raw) | ||
979 | { | ||
980 | u64 f, y; /* fraction and exp. used for time unit */ | ||
981 | |||
982 | /* | ||
983 | * Special processing based on 2^Y*(1+F/4), refer | ||
984 | * to Intel Software Developer's manual Vol.3B: CH 14.9.3. | ||
985 | */ | ||
986 | if (!to_raw) { | ||
987 | f = (value & 0x60) >> 5; | ||
988 | y = value & 0x1f; | ||
989 | value = (1 << y) * (4 + f) * rp->time_unit / 4; | ||
990 | } else { | ||
991 | do_div(value, rp->time_unit); | ||
992 | y = ilog2(value); | ||
993 | f = div64_u64(4 * (value - (1 << y)), 1 << y); | ||
994 | value = (y & 0x1f) | ((f & 0x3) << 5); | ||
995 | } | ||
996 | return value; | ||
997 | } | ||
998 | |||
999 | static u64 rapl_compute_time_window_atom(struct rapl_package *rp, u64 value, | ||
1000 | bool to_raw) | ||
1001 | { | ||
1002 | /* | ||
1003 | * Atom time unit encoding is straight forward val * time_unit, | ||
1004 | * where time_unit is default to 1 sec. Never 0. | ||
1005 | */ | ||
1006 | if (!to_raw) | ||
1007 | return (value) ? value *= rp->time_unit : rp->time_unit; | ||
1008 | else | ||
1009 | value = div64_u64(value, rp->time_unit); | ||
1010 | |||
1011 | return value; | ||
1012 | } | ||
1013 | |||
1014 | static const struct rapl_defaults rapl_defaults_core = { | ||
1015 | .check_unit = rapl_check_unit_core, | ||
1016 | .set_floor_freq = set_floor_freq_default, | ||
1017 | .compute_time_window = rapl_compute_time_window_core, | ||
1018 | }; | ||
1019 | |||
1020 | static const struct rapl_defaults rapl_defaults_atom = { | ||
1021 | .check_unit = rapl_check_unit_atom, | ||
1022 | .set_floor_freq = set_floor_freq_atom, | ||
1023 | .compute_time_window = rapl_compute_time_window_atom, | ||
1024 | }; | ||
1025 | |||
1026 | #define RAPL_CPU(_model, _ops) { \ | ||
1027 | .vendor = X86_VENDOR_INTEL, \ | ||
1028 | .family = 6, \ | ||
1029 | .model = _model, \ | ||
1030 | .driver_data = (kernel_ulong_t)&_ops, \ | ||
1031 | } | ||
1032 | |||
949 | static const struct x86_cpu_id rapl_ids[] = { | 1033 | static const struct x86_cpu_id rapl_ids[] = { |
950 | { X86_VENDOR_INTEL, 6, 0x2a},/* Sandy Bridge */ | 1034 | RAPL_CPU(0x2a, rapl_defaults_core),/* Sandy Bridge */ |
951 | { X86_VENDOR_INTEL, 6, 0x2d},/* Sandy Bridge EP */ | 1035 | RAPL_CPU(0x2d, rapl_defaults_core),/* Sandy Bridge EP */ |
952 | { X86_VENDOR_INTEL, 6, 0x37},/* Valleyview */ | 1036 | RAPL_CPU(0x37, rapl_defaults_atom),/* Valleyview */ |
953 | { X86_VENDOR_INTEL, 6, 0x3a},/* Ivy Bridge */ | 1037 | RAPL_CPU(0x3a, rapl_defaults_core),/* Ivy Bridge */ |
954 | { X86_VENDOR_INTEL, 6, 0x3c},/* Haswell */ | 1038 | RAPL_CPU(0x3c, rapl_defaults_core),/* Haswell */ |
955 | { X86_VENDOR_INTEL, 6, 0x3d},/* Broadwell */ | 1039 | RAPL_CPU(0x3d, rapl_defaults_core),/* Broadwell */ |
956 | { X86_VENDOR_INTEL, 6, 0x3f},/* Haswell */ | 1040 | RAPL_CPU(0x3f, rapl_defaults_core),/* Haswell */ |
957 | { X86_VENDOR_INTEL, 6, 0x45},/* Haswell ULT */ | 1041 | RAPL_CPU(0x45, rapl_defaults_core),/* Haswell ULT */ |
958 | /* TODO: Add more CPU IDs after testing */ | 1042 | RAPL_CPU(0x4C, rapl_defaults_atom),/* Braswell */ |
1043 | RAPL_CPU(0x4A, rapl_defaults_atom),/* Tangier */ | ||
1044 | RAPL_CPU(0x5A, rapl_defaults_atom),/* Annidale */ | ||
959 | {} | 1045 | {} |
960 | }; | 1046 | }; |
961 | MODULE_DEVICE_TABLE(x86cpu, rapl_ids); | 1047 | MODULE_DEVICE_TABLE(x86cpu, rapl_ids); |
@@ -1241,7 +1327,7 @@ static int rapl_detect_topology(void) | |||
1241 | 1327 | ||
1242 | /* check if the package contains valid domains */ | 1328 | /* check if the package contains valid domains */ |
1243 | if (rapl_detect_domains(new_package, i) || | 1329 | if (rapl_detect_domains(new_package, i) || |
1244 | rapl_check_unit(new_package, i)) { | 1330 | rapl_defaults->check_unit(new_package, i)) { |
1245 | kfree(new_package->domains); | 1331 | kfree(new_package->domains); |
1246 | kfree(new_package); | 1332 | kfree(new_package); |
1247 | /* free up the packages already initialized */ | 1333 | /* free up the packages already initialized */ |
@@ -1296,7 +1382,7 @@ static int rapl_add_package(int cpu) | |||
1296 | rp->nr_cpus = 1; | 1382 | rp->nr_cpus = 1; |
1297 | /* check if the package contains valid domains */ | 1383 | /* check if the package contains valid domains */ |
1298 | if (rapl_detect_domains(rp, cpu) || | 1384 | if (rapl_detect_domains(rp, cpu) || |
1299 | rapl_check_unit(rp, cpu)) { | 1385 | rapl_defaults->check_unit(rp, cpu)) { |
1300 | ret = -ENODEV; | 1386 | ret = -ENODEV; |
1301 | goto err_free_package; | 1387 | goto err_free_package; |
1302 | } | 1388 | } |
@@ -1358,14 +1444,18 @@ static struct notifier_block rapl_cpu_notifier = { | |||
1358 | static int __init rapl_init(void) | 1444 | static int __init rapl_init(void) |
1359 | { | 1445 | { |
1360 | int ret = 0; | 1446 | int ret = 0; |
1447 | const struct x86_cpu_id *id; | ||
1361 | 1448 | ||
1362 | if (!x86_match_cpu(rapl_ids)) { | 1449 | id = x86_match_cpu(rapl_ids); |
1450 | if (!id) { | ||
1363 | pr_err("driver does not support CPU family %d model %d\n", | 1451 | pr_err("driver does not support CPU family %d model %d\n", |
1364 | boot_cpu_data.x86, boot_cpu_data.x86_model); | 1452 | boot_cpu_data.x86, boot_cpu_data.x86_model); |
1365 | 1453 | ||
1366 | return -ENODEV; | 1454 | return -ENODEV; |
1367 | } | 1455 | } |
1368 | 1456 | ||
1457 | rapl_defaults = (struct rapl_defaults *)id->driver_data; | ||
1458 | |||
1369 | cpu_notifier_register_begin(); | 1459 | cpu_notifier_register_begin(); |
1370 | 1460 | ||
1371 | /* prevent CPU hotplug during detection */ | 1461 | /* prevent CPU hotplug during detection */ |