diff options
-rw-r--r-- | drivers/powercap/intel_rapl.c | 216 |
1 files changed, 139 insertions, 77 deletions
diff --git a/drivers/powercap/intel_rapl.c b/drivers/powercap/intel_rapl.c index 256efed5b88f..4631696a7ccf 100644 --- a/drivers/powercap/intel_rapl.c +++ b/drivers/powercap/intel_rapl.c | |||
@@ -29,6 +29,7 @@ | |||
29 | #include <linux/sysfs.h> | 29 | #include <linux/sysfs.h> |
30 | #include <linux/cpu.h> | 30 | #include <linux/cpu.h> |
31 | #include <linux/powercap.h> | 31 | #include <linux/powercap.h> |
32 | #include <asm/iosf_mbi.h> | ||
32 | 33 | ||
33 | #include <asm/processor.h> | 34 | #include <asm/processor.h> |
34 | #include <asm/cpu_device_id.h> | 35 | #include <asm/cpu_device_id.h> |
@@ -70,11 +71,6 @@ | |||
70 | #define RAPL_PRIMITIVE_DERIVED BIT(1) /* not from raw data */ | 71 | #define RAPL_PRIMITIVE_DERIVED BIT(1) /* not from raw data */ |
71 | #define RAPL_PRIMITIVE_DUMMY BIT(2) | 72 | #define RAPL_PRIMITIVE_DUMMY BIT(2) |
72 | 73 | ||
73 | /* scale RAPL units to avoid floating point math inside kernel */ | ||
74 | #define POWER_UNIT_SCALE (1000000) | ||
75 | #define ENERGY_UNIT_SCALE (1000000) | ||
76 | #define TIME_UNIT_SCALE (1000000) | ||
77 | |||
78 | #define TIME_WINDOW_MAX_MSEC 40000 | 74 | #define TIME_WINDOW_MAX_MSEC 40000 |
79 | #define TIME_WINDOW_MIN_MSEC 250 | 75 | #define TIME_WINDOW_MIN_MSEC 250 |
80 | 76 | ||
@@ -175,9 +171,9 @@ struct rapl_package { | |||
175 | unsigned int id; /* physical package/socket id */ | 171 | unsigned int id; /* physical package/socket id */ |
176 | unsigned int nr_domains; | 172 | unsigned int nr_domains; |
177 | unsigned long domain_map; /* bit map of active domains */ | 173 | unsigned long domain_map; /* bit map of active domains */ |
178 | unsigned int power_unit_divisor; | 174 | unsigned int power_unit; |
179 | unsigned int energy_unit_divisor; | 175 | unsigned int energy_unit; |
180 | unsigned int time_unit_divisor; | 176 | unsigned int time_unit; |
181 | struct rapl_domain *domains; /* array of domains, sized at runtime */ | 177 | struct rapl_domain *domains; /* array of domains, sized at runtime */ |
182 | struct powercap_zone *power_zone; /* keep track of parent zone */ | 178 | struct powercap_zone *power_zone; /* keep track of parent zone */ |
183 | int nr_cpus; /* active cpus on the package, topology info is lost during | 179 | int nr_cpus; /* active cpus on the package, topology info is lost during |
@@ -197,6 +193,9 @@ struct rapl_defaults { | |||
197 | }; | 193 | }; |
198 | static struct rapl_defaults *rapl_defaults; | 194 | static struct rapl_defaults *rapl_defaults; |
199 | 195 | ||
196 | /* Sideband MBI registers */ | ||
197 | #define IOSF_CPU_POWER_BUDGET_CTL (0x2) | ||
198 | |||
200 | #define PACKAGE_PLN_INT_SAVED BIT(0) | 199 | #define PACKAGE_PLN_INT_SAVED BIT(0) |
201 | #define MAX_PRIM_NAME (32) | 200 | #define MAX_PRIM_NAME (32) |
202 | 201 | ||
@@ -348,23 +347,13 @@ static int find_nr_power_limit(struct rapl_domain *rd) | |||
348 | static int set_domain_enable(struct powercap_zone *power_zone, bool mode) | 347 | static int set_domain_enable(struct powercap_zone *power_zone, bool mode) |
349 | { | 348 | { |
350 | struct rapl_domain *rd = power_zone_to_rapl_domain(power_zone); | 349 | struct rapl_domain *rd = power_zone_to_rapl_domain(power_zone); |
351 | int nr_powerlimit; | ||
352 | 350 | ||
353 | if (rd->state & DOMAIN_STATE_BIOS_LOCKED) | 351 | if (rd->state & DOMAIN_STATE_BIOS_LOCKED) |
354 | return -EACCES; | 352 | return -EACCES; |
353 | |||
355 | get_online_cpus(); | 354 | get_online_cpus(); |
356 | nr_powerlimit = find_nr_power_limit(rd); | ||
357 | /* here we activate/deactivate the hardware for power limiting */ | ||
358 | rapl_write_data_raw(rd, PL1_ENABLE, mode); | 355 | rapl_write_data_raw(rd, PL1_ENABLE, mode); |
359 | /* always enable clamp such that p-state can go below OS requested | 356 | rapl_defaults->set_floor_freq(rd, mode); |
360 | * range. power capping priority over guranteed frequency. | ||
361 | */ | ||
362 | rapl_write_data_raw(rd, PL1_CLAMP, mode); | ||
363 | /* some domains have pl2 */ | ||
364 | if (nr_powerlimit > 1) { | ||
365 | rapl_write_data_raw(rd, PL2_ENABLE, mode); | ||
366 | rapl_write_data_raw(rd, PL2_CLAMP, mode); | ||
367 | } | ||
368 | put_online_cpus(); | 357 | put_online_cpus(); |
369 | 358 | ||
370 | return 0; | 359 | return 0; |
@@ -662,9 +651,7 @@ static void rapl_init_domains(struct rapl_package *rp) | |||
662 | static u64 rapl_unit_xlate(int package, enum unit_type type, u64 value, | 651 | static u64 rapl_unit_xlate(int package, enum unit_type type, u64 value, |
663 | int to_raw) | 652 | int to_raw) |
664 | { | 653 | { |
665 | u64 divisor = 1; | 654 | u64 units = 1; |
666 | int scale = 1; /* scale to user friendly data without floating point */ | ||
667 | u64 f, y; /* fraction and exp. used for time unit */ | ||
668 | struct rapl_package *rp; | 655 | struct rapl_package *rp; |
669 | 656 | ||
670 | rp = find_package_by_id(package); | 657 | rp = find_package_by_id(package); |
@@ -673,42 +660,24 @@ static u64 rapl_unit_xlate(int package, enum unit_type type, u64 value, | |||
673 | 660 | ||
674 | switch (type) { | 661 | switch (type) { |
675 | case POWER_UNIT: | 662 | case POWER_UNIT: |
676 | divisor = rp->power_unit_divisor; | 663 | units = rp->power_unit; |
677 | scale = POWER_UNIT_SCALE; | ||
678 | break; | 664 | break; |
679 | case ENERGY_UNIT: | 665 | case ENERGY_UNIT: |
680 | scale = ENERGY_UNIT_SCALE; | 666 | units = rp->energy_unit; |
681 | divisor = rp->energy_unit_divisor; | ||
682 | break; | 667 | break; |
683 | case TIME_UNIT: | 668 | case TIME_UNIT: |
684 | divisor = rp->time_unit_divisor; | 669 | return rapl_defaults->compute_time_window(rp, value, to_raw); |
685 | scale = TIME_UNIT_SCALE; | ||
686 | /* special processing based on 2^Y*(1+F)/4 = val/divisor, refer | ||
687 | * to Intel Software Developer's manual Vol. 3a, CH 14.7.4. | ||
688 | */ | ||
689 | if (!to_raw) { | ||
690 | f = (value & 0x60) >> 5; | ||
691 | y = value & 0x1f; | ||
692 | value = (1 << y) * (4 + f) * scale / 4; | ||
693 | return div64_u64(value, divisor); | ||
694 | } else { | ||
695 | do_div(value, scale); | ||
696 | value *= divisor; | ||
697 | y = ilog2(value); | ||
698 | f = div64_u64(4 * (value - (1 << y)), 1 << y); | ||
699 | value = (y & 0x1f) | ((f & 0x3) << 5); | ||
700 | return value; | ||
701 | } | ||
702 | break; | ||
703 | case ARBITRARY_UNIT: | 670 | case ARBITRARY_UNIT: |
704 | default: | 671 | default: |
705 | return value; | 672 | return value; |
706 | }; | 673 | }; |
707 | 674 | ||
708 | if (to_raw) | 675 | if (to_raw) |
709 | return div64_u64(value * divisor, scale); | 676 | return div64_u64(value, units); |
710 | else | 677 | |
711 | return div64_u64(value * scale, divisor); | 678 | value *= units; |
679 | |||
680 | return value; | ||
712 | } | 681 | } |
713 | 682 | ||
714 | /* in the order of enum rapl_primitives */ | 683 | /* in the order of enum rapl_primitives */ |
@@ -842,12 +811,18 @@ static int rapl_write_data_raw(struct rapl_domain *rd, | |||
842 | return 0; | 811 | return 0; |
843 | } | 812 | } |
844 | 813 | ||
845 | static const struct x86_cpu_id energy_unit_quirk_ids[] = { | 814 | /* |
846 | { X86_VENDOR_INTEL, 6, 0x37},/* Valleyview */ | 815 | * Raw RAPL data stored in MSRs are in certain scales. We need to |
847 | {} | 816 | * convert them into standard units based on the units reported in |
848 | }; | 817 | * the RAPL unit MSRs. This is specific to CPUs as the method to |
849 | 818 | * calculate units differ on different CPUs. | |
850 | static int rapl_check_unit(struct rapl_package *rp, int cpu) | 819 | * We convert the units to below format based on CPUs. |
820 | * i.e. | ||
821 | * energy unit: microJoules : Represented in microJoules by default | ||
822 | * power unit : microWatts : Represented in milliWatts by default | ||
823 | * time unit : microseconds: Represented in seconds by default | ||
824 | */ | ||
825 | static int rapl_check_unit_core(struct rapl_package *rp, int cpu) | ||
851 | { | 826 | { |
852 | u64 msr_val; | 827 | u64 msr_val; |
853 | u32 value; | 828 | u32 value; |
@@ -858,36 +833,47 @@ static int rapl_check_unit(struct rapl_package *rp, int cpu) | |||
858 | return -ENODEV; | 833 | return -ENODEV; |
859 | } | 834 | } |
860 | 835 | ||
861 | /* Raw RAPL data stored in MSRs are in certain scales. We need to | ||
862 | * convert them into standard units based on the divisors reported in | ||
863 | * the RAPL unit MSRs. | ||
864 | * i.e. | ||
865 | * energy unit: 1/enery_unit_divisor Joules | ||
866 | * power unit: 1/power_unit_divisor Watts | ||
867 | * time unit: 1/time_unit_divisor Seconds | ||
868 | */ | ||
869 | value = (msr_val & ENERGY_UNIT_MASK) >> ENERGY_UNIT_OFFSET; | 836 | value = (msr_val & ENERGY_UNIT_MASK) >> ENERGY_UNIT_OFFSET; |
870 | /* some CPUs have different way to calculate energy unit */ | 837 | rp->energy_unit = 1000000 / (1 << value); |
871 | if (x86_match_cpu(energy_unit_quirk_ids)) | ||
872 | rp->energy_unit_divisor = 1000000 / (1 << value); | ||
873 | else | ||
874 | rp->energy_unit_divisor = 1 << value; | ||
875 | 838 | ||
876 | value = (msr_val & POWER_UNIT_MASK) >> POWER_UNIT_OFFSET; | 839 | value = (msr_val & POWER_UNIT_MASK) >> POWER_UNIT_OFFSET; |
877 | rp->power_unit_divisor = 1 << value; | 840 | rp->power_unit = 1000000 / (1 << value); |
878 | 841 | ||
879 | value = (msr_val & TIME_UNIT_MASK) >> TIME_UNIT_OFFSET; | 842 | value = (msr_val & TIME_UNIT_MASK) >> TIME_UNIT_OFFSET; |
880 | rp->time_unit_divisor = 1 << value; | 843 | rp->time_unit = 1000000 / (1 << value); |
881 | 844 | ||
882 | pr_debug("Physical package %d units: energy=%d, time=%d, power=%d\n", | 845 | pr_debug("Core CPU package %d energy=%duJ, time=%dus, power=%duW\n", |
883 | rp->id, | 846 | rp->id, rp->energy_unit, rp->time_unit, rp->power_unit); |
884 | rp->energy_unit_divisor, | ||
885 | rp->time_unit_divisor, | ||
886 | rp->power_unit_divisor); | ||
887 | 847 | ||
888 | return 0; | 848 | return 0; |
889 | } | 849 | } |
890 | 850 | ||
851 | static int rapl_check_unit_atom(struct rapl_package *rp, int cpu) | ||
852 | { | ||
853 | u64 msr_val; | ||
854 | u32 value; | ||
855 | |||
856 | if (rdmsrl_safe_on_cpu(cpu, MSR_RAPL_POWER_UNIT, &msr_val)) { | ||
857 | pr_err("Failed to read power unit MSR 0x%x on CPU %d, exit.\n", | ||
858 | MSR_RAPL_POWER_UNIT, cpu); | ||
859 | return -ENODEV; | ||
860 | } | ||
861 | value = (msr_val & ENERGY_UNIT_MASK) >> ENERGY_UNIT_OFFSET; | ||
862 | rp->energy_unit = 1 << value; | ||
863 | |||
864 | value = (msr_val & POWER_UNIT_MASK) >> POWER_UNIT_OFFSET; | ||
865 | rp->power_unit = (1 << value) * 1000; | ||
866 | |||
867 | value = (msr_val & TIME_UNIT_MASK) >> TIME_UNIT_OFFSET; | ||
868 | rp->time_unit = 1000000 / (1 << value); | ||
869 | |||
870 | pr_debug("Atom package %d energy=%duJ, time=%dus, power=%duW\n", | ||
871 | rp->id, rp->energy_unit, rp->time_unit, rp->power_unit); | ||
872 | |||
873 | return 0; | ||
874 | } | ||
875 | |||
876 | |||
891 | /* REVISIT: | 877 | /* REVISIT: |
892 | * When package power limit is set artificially low by RAPL, LVT | 878 | * When package power limit is set artificially low by RAPL, LVT |
893 | * thermal interrupt for package power limit should be ignored | 879 | * thermal interrupt for package power limit should be ignored |
@@ -955,10 +941,86 @@ static void package_power_limit_irq_restore(int package_id) | |||
955 | wrmsr_on_cpu(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h); | 941 | wrmsr_on_cpu(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h); |
956 | } | 942 | } |
957 | 943 | ||
944 | static void set_floor_freq_default(struct rapl_domain *rd, bool mode) | ||
945 | { | ||
946 | int nr_powerlimit = find_nr_power_limit(rd); | ||
947 | |||
948 | /* always enable clamp such that p-state can go below OS requested | ||
949 | * range. power capping priority over guranteed frequency. | ||
950 | */ | ||
951 | rapl_write_data_raw(rd, PL1_CLAMP, mode); | ||
952 | |||
953 | /* some domains have pl2 */ | ||
954 | if (nr_powerlimit > 1) { | ||
955 | rapl_write_data_raw(rd, PL2_ENABLE, mode); | ||
956 | rapl_write_data_raw(rd, PL2_CLAMP, mode); | ||
957 | } | ||
958 | } | ||
959 | |||
960 | static void set_floor_freq_atom(struct rapl_domain *rd, bool enable) | ||
961 | { | ||
962 | static u32 power_ctrl_orig_val; | ||
963 | u32 mdata; | ||
964 | |||
965 | if (!power_ctrl_orig_val) | ||
966 | iosf_mbi_read(BT_MBI_UNIT_PMC, BT_MBI_PMC_READ, | ||
967 | IOSF_CPU_POWER_BUDGET_CTL, &power_ctrl_orig_val); | ||
968 | mdata = power_ctrl_orig_val; | ||
969 | if (enable) { | ||
970 | mdata &= ~(0x7f << 8); | ||
971 | mdata |= 1 << 8; | ||
972 | } | ||
973 | iosf_mbi_write(BT_MBI_UNIT_PMC, BT_MBI_PMC_WRITE, | ||
974 | IOSF_CPU_POWER_BUDGET_CTL, mdata); | ||
975 | } | ||
976 | |||
977 | static u64 rapl_compute_time_window_core(struct rapl_package *rp, u64 value, | ||
978 | bool to_raw) | ||
979 | { | ||
980 | u64 f, y; /* fraction and exp. used for time unit */ | ||
981 | |||
982 | /* | ||
983 | * Special processing based on 2^Y*(1+F/4), refer | ||
984 | * to Intel Software Developer's manual Vol.3B: CH 14.9.3. | ||
985 | */ | ||
986 | if (!to_raw) { | ||
987 | f = (value & 0x60) >> 5; | ||
988 | y = value & 0x1f; | ||
989 | value = (1 << y) * (4 + f) * rp->time_unit / 4; | ||
990 | } else { | ||
991 | do_div(value, rp->time_unit); | ||
992 | y = ilog2(value); | ||
993 | f = div64_u64(4 * (value - (1 << y)), 1 << y); | ||
994 | value = (y & 0x1f) | ((f & 0x3) << 5); | ||
995 | } | ||
996 | return value; | ||
997 | } | ||
998 | |||
999 | static u64 rapl_compute_time_window_atom(struct rapl_package *rp, u64 value, | ||
1000 | bool to_raw) | ||
1001 | { | ||
1002 | /* | ||
1003 | * Atom time unit encoding is straight forward val * time_unit, | ||
1004 | * where time_unit is default to 1 sec. Never 0. | ||
1005 | */ | ||
1006 | if (!to_raw) | ||
1007 | return (value) ? value *= rp->time_unit : rp->time_unit; | ||
1008 | else | ||
1009 | value = div64_u64(value, rp->time_unit); | ||
1010 | |||
1011 | return value; | ||
1012 | } | ||
1013 | |||
958 | static const struct rapl_defaults rapl_defaults_core = { | 1014 | static const struct rapl_defaults rapl_defaults_core = { |
1015 | .check_unit = rapl_check_unit_core, | ||
1016 | .set_floor_freq = set_floor_freq_default, | ||
1017 | .compute_time_window = rapl_compute_time_window_core, | ||
959 | }; | 1018 | }; |
960 | 1019 | ||
961 | static const struct rapl_defaults rapl_defaults_atom = { | 1020 | static const struct rapl_defaults rapl_defaults_atom = { |
1021 | .check_unit = rapl_check_unit_atom, | ||
1022 | .set_floor_freq = set_floor_freq_atom, | ||
1023 | .compute_time_window = rapl_compute_time_window_atom, | ||
962 | }; | 1024 | }; |
963 | 1025 | ||
964 | #define RAPL_CPU(_model, _ops) { \ | 1026 | #define RAPL_CPU(_model, _ops) { \ |
@@ -1262,7 +1324,7 @@ static int rapl_detect_topology(void) | |||
1262 | 1324 | ||
1263 | /* check if the package contains valid domains */ | 1325 | /* check if the package contains valid domains */ |
1264 | if (rapl_detect_domains(new_package, i) || | 1326 | if (rapl_detect_domains(new_package, i) || |
1265 | rapl_check_unit(new_package, i)) { | 1327 | rapl_defaults->check_unit(new_package, i)) { |
1266 | kfree(new_package->domains); | 1328 | kfree(new_package->domains); |
1267 | kfree(new_package); | 1329 | kfree(new_package); |
1268 | /* free up the packages already initialized */ | 1330 | /* free up the packages already initialized */ |
@@ -1317,7 +1379,7 @@ static int rapl_add_package(int cpu) | |||
1317 | rp->nr_cpus = 1; | 1379 | rp->nr_cpus = 1; |
1318 | /* check if the package contains valid domains */ | 1380 | /* check if the package contains valid domains */ |
1319 | if (rapl_detect_domains(rp, cpu) || | 1381 | if (rapl_detect_domains(rp, cpu) || |
1320 | rapl_check_unit(rp, cpu)) { | 1382 | rapl_defaults->check_unit(rp, cpu)) { |
1321 | ret = -ENODEV; | 1383 | ret = -ENODEV; |
1322 | goto err_free_package; | 1384 | goto err_free_package; |
1323 | } | 1385 | } |