aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRafael J. Wysocki <rafael.j.wysocki@intel.com>2016-10-01 19:42:45 -0400
committerRafael J. Wysocki <rafael.j.wysocki@intel.com>2016-10-01 19:42:45 -0400
commit7005f6dc69948685b846c895e80fca2646476b83 (patch)
treeb30009dd75caed344a5d03d791398ce5bb88496b
parent2dc3c72cd0a3ea85b8b7ae469904cfc24af1de60 (diff)
parentb6e251178286eff8bbd2c01a1b6aa7a317eb3c67 (diff)
Merge branch 'pm-cpufreq'
* pm-cpufreq: (24 commits) cpufreq: st: add missing \n to end of dev_err message cpufreq: kirkwood: add missing \n to end of dev_err messages cpufreq: CPPC: Avoid overflow when calculating desired_perf cpufreq: ti: Use generic platdev driver cpufreq: intel_pstate: Add io_boost trace cpufreq: intel_pstate: Use IOWAIT flag in Atom algorithm cpufreq: schedutil: Add iowait boosting cpufreq / sched: SCHED_CPUFREQ_IOWAIT flag to indicate iowait condition cpufreq: CPPC: Force reporting values in KHz to fix user space interface cpufreq: create link to policy only for registered CPUs intel_pstate: constify local structures cpufreq: dt: Support governor tunables per policy cpufreq: dt: Update kconfig description cpufreq: dt: Remove unused code MAINTAINERS: Add Documentation/cpu-freq/ cpufreq: dt: Add support for r8a7792 cpufreq / sched: ignore SMT when determining max cpu capacity cpufreq: Drop unnecessary check from cpufreq_policy_alloc() ARM: multi_v7_defconfig: Don't attempt to enable schedutil governor as module ARM: exynos_defconfig: Don't attempt to enable schedutil governor as module ...
-rw-r--r--MAINTAINERS1
-rw-r--r--arch/arm/configs/exynos_defconfig2
-rw-r--r--arch/arm/configs/multi_v7_defconfig2
-rw-r--r--drivers/cpufreq/Kconfig7
-rw-r--r--drivers/cpufreq/cppc_cpufreq.c53
-rw-r--r--drivers/cpufreq/cpufreq-dt-platdev.c11
-rw-r--r--drivers/cpufreq/cpufreq-dt.c6
-rw-r--r--drivers/cpufreq/cpufreq-dt.h19
-rw-r--r--drivers/cpufreq/cpufreq.c93
-rw-r--r--drivers/cpufreq/cpufreq_governor.c2
-rw-r--r--drivers/cpufreq/intel_pstate.c71
-rw-r--r--drivers/cpufreq/kirkwood-cpufreq.c8
-rw-r--r--drivers/cpufreq/scpi-cpufreq.c1
-rw-r--r--drivers/cpufreq/sti-cpufreq.c2
-rw-r--r--include/linux/sched.h13
-rw-r--r--include/trace/events/power.h13
-rw-r--r--kernel/sched/cpufreq.c2
-rw-r--r--kernel/sched/cpufreq_schedutil.c122
-rw-r--r--kernel/sched/deadline.c5
-rw-r--r--kernel/sched/fair.c23
-rw-r--r--kernel/sched/rt.c5
-rw-r--r--kernel/sched/sched.h40
22 files changed, 299 insertions, 202 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 01bff8ea28d8..e750d4a9306c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3282,6 +3282,7 @@ L: linux-pm@vger.kernel.org
3282S: Maintained 3282S: Maintained
3283T: git git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git 3283T: git git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm.git
3284T: git git://git.linaro.org/people/vireshk/linux.git (For ARM Updates) 3284T: git git://git.linaro.org/people/vireshk/linux.git (For ARM Updates)
3285F: Documentation/cpu-freq/
3285F: drivers/cpufreq/ 3286F: drivers/cpufreq/
3286F: include/linux/cpufreq.h 3287F: include/linux/cpufreq.h
3287 3288
diff --git a/arch/arm/configs/exynos_defconfig b/arch/arm/configs/exynos_defconfig
index 01986deef7c5..36cc7cc012f9 100644
--- a/arch/arm/configs/exynos_defconfig
+++ b/arch/arm/configs/exynos_defconfig
@@ -28,7 +28,7 @@ CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y
28CONFIG_CPU_FREQ_GOV_POWERSAVE=m 28CONFIG_CPU_FREQ_GOV_POWERSAVE=m
29CONFIG_CPU_FREQ_GOV_USERSPACE=m 29CONFIG_CPU_FREQ_GOV_USERSPACE=m
30CONFIG_CPU_FREQ_GOV_CONSERVATIVE=m 30CONFIG_CPU_FREQ_GOV_CONSERVATIVE=m
31CONFIG_CPU_FREQ_GOV_SCHEDUTIL=m 31CONFIG_CPU_FREQ_GOV_SCHEDUTIL=y
32CONFIG_CPUFREQ_DT=y 32CONFIG_CPUFREQ_DT=y
33CONFIG_CPU_IDLE=y 33CONFIG_CPU_IDLE=y
34CONFIG_ARM_EXYNOS_CPUIDLE=y 34CONFIG_ARM_EXYNOS_CPUIDLE=y
diff --git a/arch/arm/configs/multi_v7_defconfig b/arch/arm/configs/multi_v7_defconfig
index ea3566fb92e2..58459105cadc 100644
--- a/arch/arm/configs/multi_v7_defconfig
+++ b/arch/arm/configs/multi_v7_defconfig
@@ -135,7 +135,7 @@ CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y
135CONFIG_CPU_FREQ_GOV_POWERSAVE=m 135CONFIG_CPU_FREQ_GOV_POWERSAVE=m
136CONFIG_CPU_FREQ_GOV_USERSPACE=m 136CONFIG_CPU_FREQ_GOV_USERSPACE=m
137CONFIG_CPU_FREQ_GOV_CONSERVATIVE=m 137CONFIG_CPU_FREQ_GOV_CONSERVATIVE=m
138CONFIG_CPU_FREQ_GOV_SCHEDUTIL=m 138CONFIG_CPU_FREQ_GOV_SCHEDUTIL=y
139CONFIG_QORIQ_CPUFREQ=y 139CONFIG_QORIQ_CPUFREQ=y
140CONFIG_CPU_IDLE=y 140CONFIG_CPU_IDLE=y
141CONFIG_ARM_CPUIDLE=y 141CONFIG_ARM_CPUIDLE=y
diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig
index 74919aa81dcb..d8b164a7c4e5 100644
--- a/drivers/cpufreq/Kconfig
+++ b/drivers/cpufreq/Kconfig
@@ -194,7 +194,7 @@ config CPU_FREQ_GOV_CONSERVATIVE
194 If in doubt, say N. 194 If in doubt, say N.
195 195
196config CPU_FREQ_GOV_SCHEDUTIL 196config CPU_FREQ_GOV_SCHEDUTIL
197 tristate "'schedutil' cpufreq policy governor" 197 bool "'schedutil' cpufreq policy governor"
198 depends on CPU_FREQ && SMP 198 depends on CPU_FREQ && SMP
199 select CPU_FREQ_GOV_ATTR_SET 199 select CPU_FREQ_GOV_ATTR_SET
200 select IRQ_WORK 200 select IRQ_WORK
@@ -208,9 +208,6 @@ config CPU_FREQ_GOV_SCHEDUTIL
208 frequency tipping point is at utilization/capacity equal to 80% in 208 frequency tipping point is at utilization/capacity equal to 80% in
209 both cases. 209 both cases.
210 210
211 To compile this driver as a module, choose M here: the module will
212 be called cpufreq_schedutil.
213
214 If in doubt, say N. 211 If in doubt, say N.
215 212
216comment "CPU frequency scaling drivers" 213comment "CPU frequency scaling drivers"
@@ -225,7 +222,7 @@ config CPUFREQ_DT
225 help 222 help
226 This adds a generic DT based cpufreq driver for frequency management. 223 This adds a generic DT based cpufreq driver for frequency management.
227 It supports both uniprocessor (UP) and symmetric multiprocessor (SMP) 224 It supports both uniprocessor (UP) and symmetric multiprocessor (SMP)
228 systems which share clock and voltage across all CPUs. 225 systems.
229 226
230 If in doubt, say N. 227 If in doubt, say N.
231 228
diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
index 8882b8e2ecd0..99db4227ae38 100644
--- a/drivers/cpufreq/cppc_cpufreq.c
+++ b/drivers/cpufreq/cppc_cpufreq.c
@@ -19,10 +19,19 @@
19#include <linux/delay.h> 19#include <linux/delay.h>
20#include <linux/cpu.h> 20#include <linux/cpu.h>
21#include <linux/cpufreq.h> 21#include <linux/cpufreq.h>
22#include <linux/dmi.h>
22#include <linux/vmalloc.h> 23#include <linux/vmalloc.h>
23 24
25#include <asm/unaligned.h>
26
24#include <acpi/cppc_acpi.h> 27#include <acpi/cppc_acpi.h>
25 28
29/* Minimum struct length needed for the DMI processor entry we want */
30#define DMI_ENTRY_PROCESSOR_MIN_LENGTH 48
31
32/* Offest in the DMI processor structure for the max frequency */
33#define DMI_PROCESSOR_MAX_SPEED 0x14
34
26/* 35/*
27 * These structs contain information parsed from per CPU 36 * These structs contain information parsed from per CPU
28 * ACPI _CPC structures. 37 * ACPI _CPC structures.
@@ -32,6 +41,39 @@
32 */ 41 */
33static struct cpudata **all_cpu_data; 42static struct cpudata **all_cpu_data;
34 43
44/* Capture the max KHz from DMI */
45static u64 cppc_dmi_max_khz;
46
47/* Callback function used to retrieve the max frequency from DMI */
48static void cppc_find_dmi_mhz(const struct dmi_header *dm, void *private)
49{
50 const u8 *dmi_data = (const u8 *)dm;
51 u16 *mhz = (u16 *)private;
52
53 if (dm->type == DMI_ENTRY_PROCESSOR &&
54 dm->length >= DMI_ENTRY_PROCESSOR_MIN_LENGTH) {
55 u16 val = (u16)get_unaligned((const u16 *)
56 (dmi_data + DMI_PROCESSOR_MAX_SPEED));
57 *mhz = val > *mhz ? val : *mhz;
58 }
59}
60
61/* Look up the max frequency in DMI */
62static u64 cppc_get_dmi_max_khz(void)
63{
64 u16 mhz = 0;
65
66 dmi_walk(cppc_find_dmi_mhz, &mhz);
67
68 /*
69 * Real stupid fallback value, just in case there is no
70 * actual value set.
71 */
72 mhz = mhz ? mhz : 1;
73
74 return (1000 * mhz);
75}
76
35static int cppc_cpufreq_set_target(struct cpufreq_policy *policy, 77static int cppc_cpufreq_set_target(struct cpufreq_policy *policy,
36 unsigned int target_freq, 78 unsigned int target_freq,
37 unsigned int relation) 79 unsigned int relation)
@@ -42,7 +84,7 @@ static int cppc_cpufreq_set_target(struct cpufreq_policy *policy,
42 84
43 cpu = all_cpu_data[policy->cpu]; 85 cpu = all_cpu_data[policy->cpu];
44 86
45 cpu->perf_ctrls.desired_perf = target_freq; 87 cpu->perf_ctrls.desired_perf = (u64)target_freq * policy->max / cppc_dmi_max_khz;
46 freqs.old = policy->cur; 88 freqs.old = policy->cur;
47 freqs.new = target_freq; 89 freqs.new = target_freq;
48 90
@@ -94,8 +136,10 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy)
94 return ret; 136 return ret;
95 } 137 }
96 138
97 policy->min = cpu->perf_caps.lowest_perf; 139 cppc_dmi_max_khz = cppc_get_dmi_max_khz();
98 policy->max = cpu->perf_caps.highest_perf; 140
141 policy->min = cpu->perf_caps.lowest_perf * cppc_dmi_max_khz / cpu->perf_caps.highest_perf;
142 policy->max = cppc_dmi_max_khz;
99 policy->cpuinfo.min_freq = policy->min; 143 policy->cpuinfo.min_freq = policy->min;
100 policy->cpuinfo.max_freq = policy->max; 144 policy->cpuinfo.max_freq = policy->max;
101 policy->shared_type = cpu->shared_type; 145 policy->shared_type = cpu->shared_type;
@@ -112,7 +156,8 @@ static int cppc_cpufreq_cpu_init(struct cpufreq_policy *policy)
112 cpu->cur_policy = policy; 156 cpu->cur_policy = policy;
113 157
114 /* Set policy->cur to max now. The governors will adjust later. */ 158 /* Set policy->cur to max now. The governors will adjust later. */
115 policy->cur = cpu->perf_ctrls.desired_perf = cpu->perf_caps.highest_perf; 159 policy->cur = cppc_dmi_max_khz;
160 cpu->perf_ctrls.desired_perf = cpu->perf_caps.highest_perf;
116 161
117 ret = cppc_set_perf(cpu_num, &cpu->perf_ctrls); 162 ret = cppc_set_perf(cpu_num, &cpu->perf_ctrls);
118 if (ret) 163 if (ret)
diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c
index 2ee40fd360ca..71267626456b 100644
--- a/drivers/cpufreq/cpufreq-dt-platdev.c
+++ b/drivers/cpufreq/cpufreq-dt-platdev.c
@@ -11,6 +11,8 @@
11#include <linux/of.h> 11#include <linux/of.h>
12#include <linux/platform_device.h> 12#include <linux/platform_device.h>
13 13
14#include "cpufreq-dt.h"
15
14static const struct of_device_id machines[] __initconst = { 16static const struct of_device_id machines[] __initconst = {
15 { .compatible = "allwinner,sun4i-a10", }, 17 { .compatible = "allwinner,sun4i-a10", },
16 { .compatible = "allwinner,sun5i-a10s", }, 18 { .compatible = "allwinner,sun5i-a10s", },
@@ -40,6 +42,7 @@ static const struct of_device_id machines[] __initconst = {
40 { .compatible = "samsung,exynos5250", }, 42 { .compatible = "samsung,exynos5250", },
41#ifndef CONFIG_BL_SWITCHER 43#ifndef CONFIG_BL_SWITCHER
42 { .compatible = "samsung,exynos5420", }, 44 { .compatible = "samsung,exynos5420", },
45 { .compatible = "samsung,exynos5433", },
43 { .compatible = "samsung,exynos5800", }, 46 { .compatible = "samsung,exynos5800", },
44#endif 47#endif
45 48
@@ -51,6 +54,7 @@ static const struct of_device_id machines[] __initconst = {
51 { .compatible = "renesas,r8a7779", }, 54 { .compatible = "renesas,r8a7779", },
52 { .compatible = "renesas,r8a7790", }, 55 { .compatible = "renesas,r8a7790", },
53 { .compatible = "renesas,r8a7791", }, 56 { .compatible = "renesas,r8a7791", },
57 { .compatible = "renesas,r8a7792", },
54 { .compatible = "renesas,r8a7793", }, 58 { .compatible = "renesas,r8a7793", },
55 { .compatible = "renesas,r8a7794", }, 59 { .compatible = "renesas,r8a7794", },
56 { .compatible = "renesas,sh73a0", }, 60 { .compatible = "renesas,sh73a0", },
@@ -68,6 +72,8 @@ static const struct of_device_id machines[] __initconst = {
68 72
69 { .compatible = "sigma,tango4" }, 73 { .compatible = "sigma,tango4" },
70 74
75 { .compatible = "ti,am33xx", },
76 { .compatible = "ti,dra7", },
71 { .compatible = "ti,omap2", }, 77 { .compatible = "ti,omap2", },
72 { .compatible = "ti,omap3", }, 78 { .compatible = "ti,omap3", },
73 { .compatible = "ti,omap4", }, 79 { .compatible = "ti,omap4", },
@@ -91,7 +97,8 @@ static int __init cpufreq_dt_platdev_init(void)
91 if (!match) 97 if (!match)
92 return -ENODEV; 98 return -ENODEV;
93 99
94 return PTR_ERR_OR_ZERO(platform_device_register_simple("cpufreq-dt", -1, 100 return PTR_ERR_OR_ZERO(platform_device_register_data(NULL, "cpufreq-dt",
95 NULL, 0)); 101 -1, match->data,
102 sizeof(struct cpufreq_dt_platform_data)));
96} 103}
97device_initcall(cpufreq_dt_platdev_init); 104device_initcall(cpufreq_dt_platdev_init);
diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c
index 3957de801ae8..5c07ae05d69a 100644
--- a/drivers/cpufreq/cpufreq-dt.c
+++ b/drivers/cpufreq/cpufreq-dt.c
@@ -25,6 +25,8 @@
25#include <linux/slab.h> 25#include <linux/slab.h>
26#include <linux/thermal.h> 26#include <linux/thermal.h>
27 27
28#include "cpufreq-dt.h"
29
28struct private_data { 30struct private_data {
29 struct device *cpu_dev; 31 struct device *cpu_dev;
30 struct thermal_cooling_device *cdev; 32 struct thermal_cooling_device *cdev;
@@ -353,6 +355,7 @@ static struct cpufreq_driver dt_cpufreq_driver = {
353 355
354static int dt_cpufreq_probe(struct platform_device *pdev) 356static int dt_cpufreq_probe(struct platform_device *pdev)
355{ 357{
358 struct cpufreq_dt_platform_data *data = dev_get_platdata(&pdev->dev);
356 int ret; 359 int ret;
357 360
358 /* 361 /*
@@ -366,7 +369,8 @@ static int dt_cpufreq_probe(struct platform_device *pdev)
366 if (ret) 369 if (ret)
367 return ret; 370 return ret;
368 371
369 dt_cpufreq_driver.driver_data = dev_get_platdata(&pdev->dev); 372 if (data && data->have_governor_per_policy)
373 dt_cpufreq_driver.flags |= CPUFREQ_HAVE_GOVERNOR_PER_POLICY;
370 374
371 ret = cpufreq_register_driver(&dt_cpufreq_driver); 375 ret = cpufreq_register_driver(&dt_cpufreq_driver);
372 if (ret) 376 if (ret)
diff --git a/drivers/cpufreq/cpufreq-dt.h b/drivers/cpufreq/cpufreq-dt.h
new file mode 100644
index 000000000000..54d774e46c43
--- /dev/null
+++ b/drivers/cpufreq/cpufreq-dt.h
@@ -0,0 +1,19 @@
1/*
2 * Copyright (C) 2016 Linaro
3 * Viresh Kumar <viresh.kumar@linaro.org>
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License version 2 as
7 * published by the Free Software Foundation.
8 */
9
10#ifndef __CPUFREQ_DT_H__
11#define __CPUFREQ_DT_H__
12
13#include <linux/types.h>
14
15struct cpufreq_dt_platform_data {
16 bool have_governor_per_policy;
17};
18
19#endif /* __CPUFREQ_DT_H__ */
diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 3dd4884c6f9e..3a64136bf21b 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -916,58 +916,18 @@ static struct kobj_type ktype_cpufreq = {
916 .release = cpufreq_sysfs_release, 916 .release = cpufreq_sysfs_release,
917}; 917};
918 918
919static int add_cpu_dev_symlink(struct cpufreq_policy *policy, int cpu) 919static int add_cpu_dev_symlink(struct cpufreq_policy *policy,
920 struct device *dev)
920{ 921{
921 struct device *cpu_dev; 922 dev_dbg(dev, "%s: Adding symlink\n", __func__);
922 923 return sysfs_create_link(&dev->kobj, &policy->kobj, "cpufreq");
923 pr_debug("%s: Adding symlink for CPU: %u\n", __func__, cpu);
924
925 if (!policy)
926 return 0;
927
928 cpu_dev = get_cpu_device(cpu);
929 if (WARN_ON(!cpu_dev))
930 return 0;
931
932 return sysfs_create_link(&cpu_dev->kobj, &policy->kobj, "cpufreq");
933}
934
935static void remove_cpu_dev_symlink(struct cpufreq_policy *policy, int cpu)
936{
937 struct device *cpu_dev;
938
939 pr_debug("%s: Removing symlink for CPU: %u\n", __func__, cpu);
940
941 cpu_dev = get_cpu_device(cpu);
942 if (WARN_ON(!cpu_dev))
943 return;
944
945 sysfs_remove_link(&cpu_dev->kobj, "cpufreq");
946} 924}
947 925
948/* Add/remove symlinks for all related CPUs */ 926static void remove_cpu_dev_symlink(struct cpufreq_policy *policy,
949static int cpufreq_add_dev_symlink(struct cpufreq_policy *policy) 927 struct device *dev)
950{ 928{
951 unsigned int j; 929 dev_dbg(dev, "%s: Removing symlink\n", __func__);
952 int ret = 0; 930 sysfs_remove_link(&dev->kobj, "cpufreq");
953
954 /* Some related CPUs might not be present (physically hotplugged) */
955 for_each_cpu(j, policy->real_cpus) {
956 ret = add_cpu_dev_symlink(policy, j);
957 if (ret)
958 break;
959 }
960
961 return ret;
962}
963
964static void cpufreq_remove_dev_symlink(struct cpufreq_policy *policy)
965{
966 unsigned int j;
967
968 /* Some related CPUs might not be present (physically hotplugged) */
969 for_each_cpu(j, policy->real_cpus)
970 remove_cpu_dev_symlink(policy, j);
971} 931}
972 932
973static int cpufreq_add_dev_interface(struct cpufreq_policy *policy) 933static int cpufreq_add_dev_interface(struct cpufreq_policy *policy)
@@ -999,7 +959,7 @@ static int cpufreq_add_dev_interface(struct cpufreq_policy *policy)
999 return ret; 959 return ret;
1000 } 960 }
1001 961
1002 return cpufreq_add_dev_symlink(policy); 962 return 0;
1003} 963}
1004 964
1005__weak struct cpufreq_governor *cpufreq_default_governor(void) 965__weak struct cpufreq_governor *cpufreq_default_governor(void)
@@ -1073,13 +1033,9 @@ static void handle_update(struct work_struct *work)
1073 1033
1074static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu) 1034static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu)
1075{ 1035{
1076 struct device *dev = get_cpu_device(cpu);
1077 struct cpufreq_policy *policy; 1036 struct cpufreq_policy *policy;
1078 int ret; 1037 int ret;
1079 1038
1080 if (WARN_ON(!dev))
1081 return NULL;
1082
1083 policy = kzalloc(sizeof(*policy), GFP_KERNEL); 1039 policy = kzalloc(sizeof(*policy), GFP_KERNEL);
1084 if (!policy) 1040 if (!policy)
1085 return NULL; 1041 return NULL;
@@ -1133,7 +1089,6 @@ static void cpufreq_policy_put_kobj(struct cpufreq_policy *policy, bool notify)
1133 1089
1134 down_write(&policy->rwsem); 1090 down_write(&policy->rwsem);
1135 cpufreq_stats_free_table(policy); 1091 cpufreq_stats_free_table(policy);
1136 cpufreq_remove_dev_symlink(policy);
1137 kobj = &policy->kobj; 1092 kobj = &policy->kobj;
1138 cmp = &policy->kobj_unregister; 1093 cmp = &policy->kobj_unregister;
1139 up_write(&policy->rwsem); 1094 up_write(&policy->rwsem);
@@ -1215,8 +1170,8 @@ static int cpufreq_online(unsigned int cpu)
1215 if (new_policy) { 1170 if (new_policy) {
1216 /* related_cpus should at least include policy->cpus. */ 1171 /* related_cpus should at least include policy->cpus. */
1217 cpumask_copy(policy->related_cpus, policy->cpus); 1172 cpumask_copy(policy->related_cpus, policy->cpus);
1218 /* Remember CPUs present at the policy creation time. */ 1173 /* Clear mask of registered CPUs */
1219 cpumask_and(policy->real_cpus, policy->cpus, cpu_present_mask); 1174 cpumask_clear(policy->real_cpus);
1220 } 1175 }
1221 1176
1222 /* 1177 /*
@@ -1331,6 +1286,8 @@ out_free_policy:
1331 return ret; 1286 return ret;
1332} 1287}
1333 1288
1289static void cpufreq_offline(unsigned int cpu);
1290
1334/** 1291/**
1335 * cpufreq_add_dev - the cpufreq interface for a CPU device. 1292 * cpufreq_add_dev - the cpufreq interface for a CPU device.
1336 * @dev: CPU device. 1293 * @dev: CPU device.
@@ -1340,22 +1297,28 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
1340{ 1297{
1341 struct cpufreq_policy *policy; 1298 struct cpufreq_policy *policy;
1342 unsigned cpu = dev->id; 1299 unsigned cpu = dev->id;
1300 int ret;
1343 1301
1344 dev_dbg(dev, "%s: adding CPU%u\n", __func__, cpu); 1302 dev_dbg(dev, "%s: adding CPU%u\n", __func__, cpu);
1345 1303
1346 if (cpu_online(cpu)) 1304 if (cpu_online(cpu)) {
1347 return cpufreq_online(cpu); 1305 ret = cpufreq_online(cpu);
1306 if (ret)
1307 return ret;
1308 }
1348 1309
1349 /* 1310 /* Create sysfs link on CPU registration */
1350 * A hotplug notifier will follow and we will handle it as CPU online
1351 * then. For now, just create the sysfs link, unless there is no policy
1352 * or the link is already present.
1353 */
1354 policy = per_cpu(cpufreq_cpu_data, cpu); 1311 policy = per_cpu(cpufreq_cpu_data, cpu);
1355 if (!policy || cpumask_test_and_set_cpu(cpu, policy->real_cpus)) 1312 if (!policy || cpumask_test_and_set_cpu(cpu, policy->real_cpus))
1356 return 0; 1313 return 0;
1357 1314
1358 return add_cpu_dev_symlink(policy, cpu); 1315 ret = add_cpu_dev_symlink(policy, dev);
1316 if (ret) {
1317 cpumask_clear_cpu(cpu, policy->real_cpus);
1318 cpufreq_offline(cpu);
1319 }
1320
1321 return ret;
1359} 1322}
1360 1323
1361static void cpufreq_offline(unsigned int cpu) 1324static void cpufreq_offline(unsigned int cpu)
@@ -1436,7 +1399,7 @@ static void cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif)
1436 cpufreq_offline(cpu); 1399 cpufreq_offline(cpu);
1437 1400
1438 cpumask_clear_cpu(cpu, policy->real_cpus); 1401 cpumask_clear_cpu(cpu, policy->real_cpus);
1439 remove_cpu_dev_symlink(policy, cpu); 1402 remove_cpu_dev_symlink(policy, dev);
1440 1403
1441 if (cpumask_empty(policy->real_cpus)) 1404 if (cpumask_empty(policy->real_cpus))
1442 cpufreq_policy_free(policy, true); 1405 cpufreq_policy_free(policy, true);
diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index e415349ab31b..642dd0f183a8 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -260,7 +260,7 @@ static void dbs_irq_work(struct irq_work *irq_work)
260} 260}
261 261
262static void dbs_update_util_handler(struct update_util_data *data, u64 time, 262static void dbs_update_util_handler(struct update_util_data *data, u64 time,
263 unsigned long util, unsigned long max) 263 unsigned int flags)
264{ 264{
265 struct cpu_dbs_info *cdbs = container_of(data, struct cpu_dbs_info, update_util); 265 struct cpu_dbs_info *cdbs = container_of(data, struct cpu_dbs_info, update_util);
266 struct policy_dbs_info *policy_dbs = cdbs->policy_dbs; 266 struct policy_dbs_info *policy_dbs = cdbs->policy_dbs;
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index be9eade147f2..806f2039571e 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -181,6 +181,8 @@ struct _pid {
181 * @cpu: CPU number for this instance data 181 * @cpu: CPU number for this instance data
182 * @update_util: CPUFreq utility callback information 182 * @update_util: CPUFreq utility callback information
183 * @update_util_set: CPUFreq utility callback is set 183 * @update_util_set: CPUFreq utility callback is set
184 * @iowait_boost: iowait-related boost fraction
185 * @last_update: Time of the last update.
184 * @pstate: Stores P state limits for this CPU 186 * @pstate: Stores P state limits for this CPU
185 * @vid: Stores VID limits for this CPU 187 * @vid: Stores VID limits for this CPU
186 * @pid: Stores PID parameters for this CPU 188 * @pid: Stores PID parameters for this CPU
@@ -206,6 +208,7 @@ struct cpudata {
206 struct vid_data vid; 208 struct vid_data vid;
207 struct _pid pid; 209 struct _pid pid;
208 210
211 u64 last_update;
209 u64 last_sample_time; 212 u64 last_sample_time;
210 u64 prev_aperf; 213 u64 prev_aperf;
211 u64 prev_mperf; 214 u64 prev_mperf;
@@ -216,6 +219,7 @@ struct cpudata {
216 struct acpi_processor_performance acpi_perf_data; 219 struct acpi_processor_performance acpi_perf_data;
217 bool valid_pss_table; 220 bool valid_pss_table;
218#endif 221#endif
222 unsigned int iowait_boost;
219}; 223};
220 224
221static struct cpudata **all_cpu_data; 225static struct cpudata **all_cpu_data;
@@ -229,6 +233,7 @@ static struct cpudata **all_cpu_data;
229 * @p_gain_pct: PID proportional gain 233 * @p_gain_pct: PID proportional gain
230 * @i_gain_pct: PID integral gain 234 * @i_gain_pct: PID integral gain
231 * @d_gain_pct: PID derivative gain 235 * @d_gain_pct: PID derivative gain
236 * @boost_iowait: Whether or not to use iowait boosting.
232 * 237 *
233 * Stores per CPU model static PID configuration data. 238 * Stores per CPU model static PID configuration data.
234 */ 239 */
@@ -240,6 +245,7 @@ struct pstate_adjust_policy {
240 int p_gain_pct; 245 int p_gain_pct;
241 int d_gain_pct; 246 int d_gain_pct;
242 int i_gain_pct; 247 int i_gain_pct;
248 bool boost_iowait;
243}; 249};
244 250
245/** 251/**
@@ -1029,7 +1035,7 @@ static struct cpu_defaults core_params = {
1029 }, 1035 },
1030}; 1036};
1031 1037
1032static struct cpu_defaults silvermont_params = { 1038static const struct cpu_defaults silvermont_params = {
1033 .pid_policy = { 1039 .pid_policy = {
1034 .sample_rate_ms = 10, 1040 .sample_rate_ms = 10,
1035 .deadband = 0, 1041 .deadband = 0,
@@ -1037,6 +1043,7 @@ static struct cpu_defaults silvermont_params = {
1037 .p_gain_pct = 14, 1043 .p_gain_pct = 14,
1038 .d_gain_pct = 0, 1044 .d_gain_pct = 0,
1039 .i_gain_pct = 4, 1045 .i_gain_pct = 4,
1046 .boost_iowait = true,
1040 }, 1047 },
1041 .funcs = { 1048 .funcs = {
1042 .get_max = atom_get_max_pstate, 1049 .get_max = atom_get_max_pstate,
@@ -1050,7 +1057,7 @@ static struct cpu_defaults silvermont_params = {
1050 }, 1057 },
1051}; 1058};
1052 1059
1053static struct cpu_defaults airmont_params = { 1060static const struct cpu_defaults airmont_params = {
1054 .pid_policy = { 1061 .pid_policy = {
1055 .sample_rate_ms = 10, 1062 .sample_rate_ms = 10,
1056 .deadband = 0, 1063 .deadband = 0,
@@ -1058,6 +1065,7 @@ static struct cpu_defaults airmont_params = {
1058 .p_gain_pct = 14, 1065 .p_gain_pct = 14,
1059 .d_gain_pct = 0, 1066 .d_gain_pct = 0,
1060 .i_gain_pct = 4, 1067 .i_gain_pct = 4,
1068 .boost_iowait = true,
1061 }, 1069 },
1062 .funcs = { 1070 .funcs = {
1063 .get_max = atom_get_max_pstate, 1071 .get_max = atom_get_max_pstate,
@@ -1071,7 +1079,7 @@ static struct cpu_defaults airmont_params = {
1071 }, 1079 },
1072}; 1080};
1073 1081
1074static struct cpu_defaults knl_params = { 1082static const struct cpu_defaults knl_params = {
1075 .pid_policy = { 1083 .pid_policy = {
1076 .sample_rate_ms = 10, 1084 .sample_rate_ms = 10,
1077 .deadband = 0, 1085 .deadband = 0,
@@ -1091,7 +1099,7 @@ static struct cpu_defaults knl_params = {
1091 }, 1099 },
1092}; 1100};
1093 1101
1094static struct cpu_defaults bxt_params = { 1102static const struct cpu_defaults bxt_params = {
1095 .pid_policy = { 1103 .pid_policy = {
1096 .sample_rate_ms = 10, 1104 .sample_rate_ms = 10,
1097 .deadband = 0, 1105 .deadband = 0,
@@ -1099,6 +1107,7 @@ static struct cpu_defaults bxt_params = {
1099 .p_gain_pct = 14, 1107 .p_gain_pct = 14,
1100 .d_gain_pct = 0, 1108 .d_gain_pct = 0,
1101 .i_gain_pct = 4, 1109 .i_gain_pct = 4,
1110 .boost_iowait = true,
1102 }, 1111 },
1103 .funcs = { 1112 .funcs = {
1104 .get_max = core_get_max_pstate, 1113 .get_max = core_get_max_pstate,
@@ -1222,36 +1231,18 @@ static inline int32_t get_avg_pstate(struct cpudata *cpu)
1222static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu) 1231static inline int32_t get_target_pstate_use_cpu_load(struct cpudata *cpu)
1223{ 1232{
1224 struct sample *sample = &cpu->sample; 1233 struct sample *sample = &cpu->sample;
1225 u64 cummulative_iowait, delta_iowait_us; 1234 int32_t busy_frac, boost;
1226 u64 delta_iowait_mperf;
1227 u64 mperf, now;
1228 int32_t cpu_load;
1229 1235
1230 cummulative_iowait = get_cpu_iowait_time_us(cpu->cpu, &now); 1236 busy_frac = div_fp(sample->mperf, sample->tsc);
1231 1237
1232 /* 1238 boost = cpu->iowait_boost;
1233 * Convert iowait time into number of IO cycles spent at max_freq. 1239 cpu->iowait_boost >>= 1;
1234 * IO is considered as busy only for the cpu_load algorithm. For
1235 * performance this is not needed since we always try to reach the
1236 * maximum P-State, so we are already boosting the IOs.
1237 */
1238 delta_iowait_us = cummulative_iowait - cpu->prev_cummulative_iowait;
1239 delta_iowait_mperf = div64_u64(delta_iowait_us * cpu->pstate.scaling *
1240 cpu->pstate.max_pstate, MSEC_PER_SEC);
1241 1240
1242 mperf = cpu->sample.mperf + delta_iowait_mperf; 1241 if (busy_frac < boost)
1243 cpu->prev_cummulative_iowait = cummulative_iowait; 1242 busy_frac = boost;
1244 1243
1245 /* 1244 sample->busy_scaled = busy_frac * 100;
1246 * The load can be estimated as the ratio of the mperf counter 1245 return get_avg_pstate(cpu) - pid_calc(&cpu->pid, sample->busy_scaled);
1247 * running at a constant frequency during active periods
1248 * (C0) and the time stamp counter running at the same frequency
1249 * also during C-states.
1250 */
1251 cpu_load = div64_u64(int_tofp(100) * mperf, sample->tsc);
1252 cpu->sample.busy_scaled = cpu_load;
1253
1254 return get_avg_pstate(cpu) - pid_calc(&cpu->pid, cpu_load);
1255} 1246}
1256 1247
1257static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu) 1248static inline int32_t get_target_pstate_use_performance(struct cpudata *cpu)
@@ -1325,15 +1316,29 @@ static inline void intel_pstate_adjust_busy_pstate(struct cpudata *cpu)
1325 sample->mperf, 1316 sample->mperf,
1326 sample->aperf, 1317 sample->aperf,
1327 sample->tsc, 1318 sample->tsc,
1328 get_avg_frequency(cpu)); 1319 get_avg_frequency(cpu),
1320 fp_toint(cpu->iowait_boost * 100));
1329} 1321}
1330 1322
1331static void intel_pstate_update_util(struct update_util_data *data, u64 time, 1323static void intel_pstate_update_util(struct update_util_data *data, u64 time,
1332 unsigned long util, unsigned long max) 1324 unsigned int flags)
1333{ 1325{
1334 struct cpudata *cpu = container_of(data, struct cpudata, update_util); 1326 struct cpudata *cpu = container_of(data, struct cpudata, update_util);
1335 u64 delta_ns = time - cpu->sample.time; 1327 u64 delta_ns;
1328
1329 if (pid_params.boost_iowait) {
1330 if (flags & SCHED_CPUFREQ_IOWAIT) {
1331 cpu->iowait_boost = int_tofp(1);
1332 } else if (cpu->iowait_boost) {
1333 /* Clear iowait_boost if the CPU may have been idle. */
1334 delta_ns = time - cpu->last_update;
1335 if (delta_ns > TICK_NSEC)
1336 cpu->iowait_boost = 0;
1337 }
1338 cpu->last_update = time;
1339 }
1336 1340
1341 delta_ns = time - cpu->sample.time;
1337 if ((s64)delta_ns >= pid_params.sample_rate_ns) { 1342 if ((s64)delta_ns >= pid_params.sample_rate_ns) {
1338 bool sample_taken = intel_pstate_sample(cpu, time); 1343 bool sample_taken = intel_pstate_sample(cpu, time);
1339 1344
diff --git a/drivers/cpufreq/kirkwood-cpufreq.c b/drivers/cpufreq/kirkwood-cpufreq.c
index be42f103db60..1b9bcd76c60e 100644
--- a/drivers/cpufreq/kirkwood-cpufreq.c
+++ b/drivers/cpufreq/kirkwood-cpufreq.c
@@ -123,7 +123,7 @@ static int kirkwood_cpufreq_probe(struct platform_device *pdev)
123 123
124 priv.cpu_clk = of_clk_get_by_name(np, "cpu_clk"); 124 priv.cpu_clk = of_clk_get_by_name(np, "cpu_clk");
125 if (IS_ERR(priv.cpu_clk)) { 125 if (IS_ERR(priv.cpu_clk)) {
126 dev_err(priv.dev, "Unable to get cpuclk"); 126 dev_err(priv.dev, "Unable to get cpuclk\n");
127 return PTR_ERR(priv.cpu_clk); 127 return PTR_ERR(priv.cpu_clk);
128 } 128 }
129 129
@@ -132,7 +132,7 @@ static int kirkwood_cpufreq_probe(struct platform_device *pdev)
132 132
133 priv.ddr_clk = of_clk_get_by_name(np, "ddrclk"); 133 priv.ddr_clk = of_clk_get_by_name(np, "ddrclk");
134 if (IS_ERR(priv.ddr_clk)) { 134 if (IS_ERR(priv.ddr_clk)) {
135 dev_err(priv.dev, "Unable to get ddrclk"); 135 dev_err(priv.dev, "Unable to get ddrclk\n");
136 err = PTR_ERR(priv.ddr_clk); 136 err = PTR_ERR(priv.ddr_clk);
137 goto out_cpu; 137 goto out_cpu;
138 } 138 }
@@ -142,7 +142,7 @@ static int kirkwood_cpufreq_probe(struct platform_device *pdev)
142 142
143 priv.powersave_clk = of_clk_get_by_name(np, "powersave"); 143 priv.powersave_clk = of_clk_get_by_name(np, "powersave");
144 if (IS_ERR(priv.powersave_clk)) { 144 if (IS_ERR(priv.powersave_clk)) {
145 dev_err(priv.dev, "Unable to get powersave"); 145 dev_err(priv.dev, "Unable to get powersave\n");
146 err = PTR_ERR(priv.powersave_clk); 146 err = PTR_ERR(priv.powersave_clk);
147 goto out_ddr; 147 goto out_ddr;
148 } 148 }
@@ -155,7 +155,7 @@ static int kirkwood_cpufreq_probe(struct platform_device *pdev)
155 if (!err) 155 if (!err)
156 return 0; 156 return 0;
157 157
158 dev_err(priv.dev, "Failed to register cpufreq driver"); 158 dev_err(priv.dev, "Failed to register cpufreq driver\n");
159 159
160 clk_disable_unprepare(priv.powersave_clk); 160 clk_disable_unprepare(priv.powersave_clk);
161out_ddr: 161out_ddr:
diff --git a/drivers/cpufreq/scpi-cpufreq.c b/drivers/cpufreq/scpi-cpufreq.c
index e8a7bf57b31b..ea7a4e1b68c2 100644
--- a/drivers/cpufreq/scpi-cpufreq.c
+++ b/drivers/cpufreq/scpi-cpufreq.c
@@ -105,7 +105,6 @@ static int scpi_cpufreq_remove(struct platform_device *pdev)
105static struct platform_driver scpi_cpufreq_platdrv = { 105static struct platform_driver scpi_cpufreq_platdrv = {
106 .driver = { 106 .driver = {
107 .name = "scpi-cpufreq", 107 .name = "scpi-cpufreq",
108 .owner = THIS_MODULE,
109 }, 108 },
110 .probe = scpi_cpufreq_probe, 109 .probe = scpi_cpufreq_probe,
111 .remove = scpi_cpufreq_remove, 110 .remove = scpi_cpufreq_remove,
diff --git a/drivers/cpufreq/sti-cpufreq.c b/drivers/cpufreq/sti-cpufreq.c
index 04042038ec4b..b366e6d830ea 100644
--- a/drivers/cpufreq/sti-cpufreq.c
+++ b/drivers/cpufreq/sti-cpufreq.c
@@ -163,7 +163,7 @@ static int sti_cpufreq_set_opp_info(void)
163 163
164 reg_fields = sti_cpufreq_match(); 164 reg_fields = sti_cpufreq_match();
165 if (!reg_fields) { 165 if (!reg_fields) {
166 dev_err(dev, "This SoC doesn't support voltage scaling"); 166 dev_err(dev, "This SoC doesn't support voltage scaling\n");
167 return -ENODEV; 167 return -ENODEV;
168 } 168 }
169 169
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 62c68e513e39..98fe95fea30c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -3469,15 +3469,20 @@ static inline unsigned long rlimit_max(unsigned int limit)
3469 return task_rlimit_max(current, limit); 3469 return task_rlimit_max(current, limit);
3470} 3470}
3471 3471
3472#define SCHED_CPUFREQ_RT (1U << 0)
3473#define SCHED_CPUFREQ_DL (1U << 1)
3474#define SCHED_CPUFREQ_IOWAIT (1U << 2)
3475
3476#define SCHED_CPUFREQ_RT_DL (SCHED_CPUFREQ_RT | SCHED_CPUFREQ_DL)
3477
3472#ifdef CONFIG_CPU_FREQ 3478#ifdef CONFIG_CPU_FREQ
3473struct update_util_data { 3479struct update_util_data {
3474 void (*func)(struct update_util_data *data, 3480 void (*func)(struct update_util_data *data, u64 time, unsigned int flags);
3475 u64 time, unsigned long util, unsigned long max);
3476}; 3481};
3477 3482
3478void cpufreq_add_update_util_hook(int cpu, struct update_util_data *data, 3483void cpufreq_add_update_util_hook(int cpu, struct update_util_data *data,
3479 void (*func)(struct update_util_data *data, u64 time, 3484 void (*func)(struct update_util_data *data, u64 time,
3480 unsigned long util, unsigned long max)); 3485 unsigned int flags));
3481void cpufreq_remove_update_util_hook(int cpu); 3486void cpufreq_remove_update_util_hook(int cpu);
3482#endif /* CONFIG_CPU_FREQ */ 3487#endif /* CONFIG_CPU_FREQ */
3483 3488
diff --git a/include/trace/events/power.h b/include/trace/events/power.h
index 19e50300ce7d..54e3aad32806 100644
--- a/include/trace/events/power.h
+++ b/include/trace/events/power.h
@@ -69,7 +69,8 @@ TRACE_EVENT(pstate_sample,
69 u64 mperf, 69 u64 mperf,
70 u64 aperf, 70 u64 aperf,
71 u64 tsc, 71 u64 tsc,
72 u32 freq 72 u32 freq,
73 u32 io_boost
73 ), 74 ),
74 75
75 TP_ARGS(core_busy, 76 TP_ARGS(core_busy,
@@ -79,7 +80,8 @@ TRACE_EVENT(pstate_sample,
79 mperf, 80 mperf,
80 aperf, 81 aperf,
81 tsc, 82 tsc,
82 freq 83 freq,
84 io_boost
83 ), 85 ),
84 86
85 TP_STRUCT__entry( 87 TP_STRUCT__entry(
@@ -91,6 +93,7 @@ TRACE_EVENT(pstate_sample,
91 __field(u64, aperf) 93 __field(u64, aperf)
92 __field(u64, tsc) 94 __field(u64, tsc)
93 __field(u32, freq) 95 __field(u32, freq)
96 __field(u32, io_boost)
94 ), 97 ),
95 98
96 TP_fast_assign( 99 TP_fast_assign(
@@ -102,9 +105,10 @@ TRACE_EVENT(pstate_sample,
102 __entry->aperf = aperf; 105 __entry->aperf = aperf;
103 __entry->tsc = tsc; 106 __entry->tsc = tsc;
104 __entry->freq = freq; 107 __entry->freq = freq;
108 __entry->io_boost = io_boost;
105 ), 109 ),
106 110
107 TP_printk("core_busy=%lu scaled=%lu from=%lu to=%lu mperf=%llu aperf=%llu tsc=%llu freq=%lu ", 111 TP_printk("core_busy=%lu scaled=%lu from=%lu to=%lu mperf=%llu aperf=%llu tsc=%llu freq=%lu io_boost=%lu",
108 (unsigned long)__entry->core_busy, 112 (unsigned long)__entry->core_busy,
109 (unsigned long)__entry->scaled_busy, 113 (unsigned long)__entry->scaled_busy,
110 (unsigned long)__entry->from, 114 (unsigned long)__entry->from,
@@ -112,7 +116,8 @@ TRACE_EVENT(pstate_sample,
112 (unsigned long long)__entry->mperf, 116 (unsigned long long)__entry->mperf,
113 (unsigned long long)__entry->aperf, 117 (unsigned long long)__entry->aperf,
114 (unsigned long long)__entry->tsc, 118 (unsigned long long)__entry->tsc,
115 (unsigned long)__entry->freq 119 (unsigned long)__entry->freq,
120 (unsigned long)__entry->io_boost
116 ) 121 )
117 122
118); 123);
diff --git a/kernel/sched/cpufreq.c b/kernel/sched/cpufreq.c
index 1141954e73b4..dbc51442ecbc 100644
--- a/kernel/sched/cpufreq.c
+++ b/kernel/sched/cpufreq.c
@@ -33,7 +33,7 @@ DEFINE_PER_CPU(struct update_util_data *, cpufreq_update_util_data);
33 */ 33 */
34void cpufreq_add_update_util_hook(int cpu, struct update_util_data *data, 34void cpufreq_add_update_util_hook(int cpu, struct update_util_data *data,
35 void (*func)(struct update_util_data *data, u64 time, 35 void (*func)(struct update_util_data *data, u64 time,
36 unsigned long util, unsigned long max)) 36 unsigned int flags))
37{ 37{
38 if (WARN_ON(!data || !func)) 38 if (WARN_ON(!data || !func))
39 return; 39 return;
diff --git a/kernel/sched/cpufreq_schedutil.c b/kernel/sched/cpufreq_schedutil.c
index a84641b222c1..69e06898997d 100644
--- a/kernel/sched/cpufreq_schedutil.c
+++ b/kernel/sched/cpufreq_schedutil.c
@@ -12,7 +12,6 @@
12#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt 12#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
13 13
14#include <linux/cpufreq.h> 14#include <linux/cpufreq.h>
15#include <linux/module.h>
16#include <linux/slab.h> 15#include <linux/slab.h>
17#include <trace/events/power.h> 16#include <trace/events/power.h>
18 17
@@ -48,11 +47,14 @@ struct sugov_cpu {
48 struct sugov_policy *sg_policy; 47 struct sugov_policy *sg_policy;
49 48
50 unsigned int cached_raw_freq; 49 unsigned int cached_raw_freq;
50 unsigned long iowait_boost;
51 unsigned long iowait_boost_max;
52 u64 last_update;
51 53
52 /* The fields below are only needed when sharing a policy. */ 54 /* The fields below are only needed when sharing a policy. */
53 unsigned long util; 55 unsigned long util;
54 unsigned long max; 56 unsigned long max;
55 u64 last_update; 57 unsigned int flags;
56}; 58};
57 59
58static DEFINE_PER_CPU(struct sugov_cpu, sugov_cpu); 60static DEFINE_PER_CPU(struct sugov_cpu, sugov_cpu);
@@ -144,24 +146,75 @@ static unsigned int get_next_freq(struct sugov_cpu *sg_cpu, unsigned long util,
144 return cpufreq_driver_resolve_freq(policy, freq); 146 return cpufreq_driver_resolve_freq(policy, freq);
145} 147}
146 148
149static void sugov_get_util(unsigned long *util, unsigned long *max)
150{
151 struct rq *rq = this_rq();
152 unsigned long cfs_max;
153
154 cfs_max = arch_scale_cpu_capacity(NULL, smp_processor_id());
155
156 *util = min(rq->cfs.avg.util_avg, cfs_max);
157 *max = cfs_max;
158}
159
160static void sugov_set_iowait_boost(struct sugov_cpu *sg_cpu, u64 time,
161 unsigned int flags)
162{
163 if (flags & SCHED_CPUFREQ_IOWAIT) {
164 sg_cpu->iowait_boost = sg_cpu->iowait_boost_max;
165 } else if (sg_cpu->iowait_boost) {
166 s64 delta_ns = time - sg_cpu->last_update;
167
168 /* Clear iowait_boost if the CPU apprears to have been idle. */
169 if (delta_ns > TICK_NSEC)
170 sg_cpu->iowait_boost = 0;
171 }
172}
173
174static void sugov_iowait_boost(struct sugov_cpu *sg_cpu, unsigned long *util,
175 unsigned long *max)
176{
177 unsigned long boost_util = sg_cpu->iowait_boost;
178 unsigned long boost_max = sg_cpu->iowait_boost_max;
179
180 if (!boost_util)
181 return;
182
183 if (*util * boost_max < *max * boost_util) {
184 *util = boost_util;
185 *max = boost_max;
186 }
187 sg_cpu->iowait_boost >>= 1;
188}
189
147static void sugov_update_single(struct update_util_data *hook, u64 time, 190static void sugov_update_single(struct update_util_data *hook, u64 time,
148 unsigned long util, unsigned long max) 191 unsigned int flags)
149{ 192{
150 struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util); 193 struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
151 struct sugov_policy *sg_policy = sg_cpu->sg_policy; 194 struct sugov_policy *sg_policy = sg_cpu->sg_policy;
152 struct cpufreq_policy *policy = sg_policy->policy; 195 struct cpufreq_policy *policy = sg_policy->policy;
196 unsigned long util, max;
153 unsigned int next_f; 197 unsigned int next_f;
154 198
199 sugov_set_iowait_boost(sg_cpu, time, flags);
200 sg_cpu->last_update = time;
201
155 if (!sugov_should_update_freq(sg_policy, time)) 202 if (!sugov_should_update_freq(sg_policy, time))
156 return; 203 return;
157 204
158 next_f = util == ULONG_MAX ? policy->cpuinfo.max_freq : 205 if (flags & SCHED_CPUFREQ_RT_DL) {
159 get_next_freq(sg_cpu, util, max); 206 next_f = policy->cpuinfo.max_freq;
207 } else {
208 sugov_get_util(&util, &max);
209 sugov_iowait_boost(sg_cpu, &util, &max);
210 next_f = get_next_freq(sg_cpu, util, max);
211 }
160 sugov_update_commit(sg_policy, time, next_f); 212 sugov_update_commit(sg_policy, time, next_f);
161} 213}
162 214
163static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu, 215static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu,
164 unsigned long util, unsigned long max) 216 unsigned long util, unsigned long max,
217 unsigned int flags)
165{ 218{
166 struct sugov_policy *sg_policy = sg_cpu->sg_policy; 219 struct sugov_policy *sg_policy = sg_cpu->sg_policy;
167 struct cpufreq_policy *policy = sg_policy->policy; 220 struct cpufreq_policy *policy = sg_policy->policy;
@@ -169,9 +222,11 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu,
169 u64 last_freq_update_time = sg_policy->last_freq_update_time; 222 u64 last_freq_update_time = sg_policy->last_freq_update_time;
170 unsigned int j; 223 unsigned int j;
171 224
172 if (util == ULONG_MAX) 225 if (flags & SCHED_CPUFREQ_RT_DL)
173 return max_f; 226 return max_f;
174 227
228 sugov_iowait_boost(sg_cpu, &util, &max);
229
175 for_each_cpu(j, policy->cpus) { 230 for_each_cpu(j, policy->cpus) {
176 struct sugov_cpu *j_sg_cpu; 231 struct sugov_cpu *j_sg_cpu;
177 unsigned long j_util, j_max; 232 unsigned long j_util, j_max;
@@ -186,41 +241,50 @@ static unsigned int sugov_next_freq_shared(struct sugov_cpu *sg_cpu,
186 * frequency update and the time elapsed between the last update 241 * frequency update and the time elapsed between the last update
187 * of the CPU utilization and the last frequency update is long 242 * of the CPU utilization and the last frequency update is long
188 * enough, don't take the CPU into account as it probably is 243 * enough, don't take the CPU into account as it probably is
189 * idle now. 244 * idle now (and clear iowait_boost for it).
190 */ 245 */
191 delta_ns = last_freq_update_time - j_sg_cpu->last_update; 246 delta_ns = last_freq_update_time - j_sg_cpu->last_update;
192 if (delta_ns > TICK_NSEC) 247 if (delta_ns > TICK_NSEC) {
248 j_sg_cpu->iowait_boost = 0;
193 continue; 249 continue;
194 250 }
195 j_util = j_sg_cpu->util; 251 if (j_sg_cpu->flags & SCHED_CPUFREQ_RT_DL)
196 if (j_util == ULONG_MAX)
197 return max_f; 252 return max_f;
198 253
254 j_util = j_sg_cpu->util;
199 j_max = j_sg_cpu->max; 255 j_max = j_sg_cpu->max;
200 if (j_util * max > j_max * util) { 256 if (j_util * max > j_max * util) {
201 util = j_util; 257 util = j_util;
202 max = j_max; 258 max = j_max;
203 } 259 }
260
261 sugov_iowait_boost(j_sg_cpu, &util, &max);
204 } 262 }
205 263
206 return get_next_freq(sg_cpu, util, max); 264 return get_next_freq(sg_cpu, util, max);
207} 265}
208 266
209static void sugov_update_shared(struct update_util_data *hook, u64 time, 267static void sugov_update_shared(struct update_util_data *hook, u64 time,
210 unsigned long util, unsigned long max) 268 unsigned int flags)
211{ 269{
212 struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util); 270 struct sugov_cpu *sg_cpu = container_of(hook, struct sugov_cpu, update_util);
213 struct sugov_policy *sg_policy = sg_cpu->sg_policy; 271 struct sugov_policy *sg_policy = sg_cpu->sg_policy;
272 unsigned long util, max;
214 unsigned int next_f; 273 unsigned int next_f;
215 274
275 sugov_get_util(&util, &max);
276
216 raw_spin_lock(&sg_policy->update_lock); 277 raw_spin_lock(&sg_policy->update_lock);
217 278
218 sg_cpu->util = util; 279 sg_cpu->util = util;
219 sg_cpu->max = max; 280 sg_cpu->max = max;
281 sg_cpu->flags = flags;
282
283 sugov_set_iowait_boost(sg_cpu, time, flags);
220 sg_cpu->last_update = time; 284 sg_cpu->last_update = time;
221 285
222 if (sugov_should_update_freq(sg_policy, time)) { 286 if (sugov_should_update_freq(sg_policy, time)) {
223 next_f = sugov_next_freq_shared(sg_cpu, util, max); 287 next_f = sugov_next_freq_shared(sg_cpu, util, max, flags);
224 sugov_update_commit(sg_policy, time, next_f); 288 sugov_update_commit(sg_policy, time, next_f);
225 } 289 }
226 290
@@ -444,10 +508,13 @@ static int sugov_start(struct cpufreq_policy *policy)
444 508
445 sg_cpu->sg_policy = sg_policy; 509 sg_cpu->sg_policy = sg_policy;
446 if (policy_is_shared(policy)) { 510 if (policy_is_shared(policy)) {
447 sg_cpu->util = ULONG_MAX; 511 sg_cpu->util = 0;
448 sg_cpu->max = 0; 512 sg_cpu->max = 0;
513 sg_cpu->flags = SCHED_CPUFREQ_RT;
449 sg_cpu->last_update = 0; 514 sg_cpu->last_update = 0;
450 sg_cpu->cached_raw_freq = 0; 515 sg_cpu->cached_raw_freq = 0;
516 sg_cpu->iowait_boost = 0;
517 sg_cpu->iowait_boost_max = policy->cpuinfo.max_freq;
451 cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util, 518 cpufreq_add_update_util_hook(cpu, &sg_cpu->update_util,
452 sugov_update_shared); 519 sugov_update_shared);
453 } else { 520 } else {
@@ -495,28 +562,15 @@ static struct cpufreq_governor schedutil_gov = {
495 .limits = sugov_limits, 562 .limits = sugov_limits,
496}; 563};
497 564
498static int __init sugov_module_init(void)
499{
500 return cpufreq_register_governor(&schedutil_gov);
501}
502
503static void __exit sugov_module_exit(void)
504{
505 cpufreq_unregister_governor(&schedutil_gov);
506}
507
508MODULE_AUTHOR("Rafael J. Wysocki <rafael.j.wysocki@intel.com>");
509MODULE_DESCRIPTION("Utilization-based CPU frequency selection");
510MODULE_LICENSE("GPL");
511
512#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL 565#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_SCHEDUTIL
513struct cpufreq_governor *cpufreq_default_governor(void) 566struct cpufreq_governor *cpufreq_default_governor(void)
514{ 567{
515 return &schedutil_gov; 568 return &schedutil_gov;
516} 569}
517
518fs_initcall(sugov_module_init);
519#else
520module_init(sugov_module_init);
521#endif 570#endif
522module_exit(sugov_module_exit); 571
572static int __init sugov_register(void)
573{
574 return cpufreq_register_governor(&schedutil_gov);
575}
576fs_initcall(sugov_register);
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 1ce8867283dc..974779656999 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -735,9 +735,8 @@ static void update_curr_dl(struct rq *rq)
735 return; 735 return;
736 } 736 }
737 737
738 /* kick cpufreq (see the comment in linux/cpufreq.h). */ 738 /* kick cpufreq (see the comment in kernel/sched/sched.h). */
739 if (cpu_of(rq) == smp_processor_id()) 739 cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_DL);
740 cpufreq_trigger_update(rq_clock(rq));
741 740
742 schedstat_set(curr->se.statistics.exec_max, 741 schedstat_set(curr->se.statistics.exec_max,
743 max(curr->se.statistics.exec_max, delta_exec)); 742 max(curr->se.statistics.exec_max, delta_exec));
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 039de34f1521..a5cd07b25aa1 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -2875,12 +2875,7 @@ static inline void update_tg_load_avg(struct cfs_rq *cfs_rq, int force) {}
2875 2875
2876static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq) 2876static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq)
2877{ 2877{
2878 struct rq *rq = rq_of(cfs_rq); 2878 if (&this_rq()->cfs == cfs_rq) {
2879 int cpu = cpu_of(rq);
2880
2881 if (cpu == smp_processor_id() && &rq->cfs == cfs_rq) {
2882 unsigned long max = rq->cpu_capacity_orig;
2883
2884 /* 2879 /*
2885 * There are a few boundary cases this might miss but it should 2880 * There are a few boundary cases this might miss but it should
2886 * get called often enough that that should (hopefully) not be 2881 * get called often enough that that should (hopefully) not be
@@ -2897,8 +2892,7 @@ static inline void cfs_rq_util_change(struct cfs_rq *cfs_rq)
2897 * 2892 *
2898 * See cpu_util(). 2893 * See cpu_util().
2899 */ 2894 */
2900 cpufreq_update_util(rq_clock(rq), 2895 cpufreq_update_util(rq_of(cfs_rq), 0);
2901 min(cfs_rq->avg.util_avg, max), max);
2902 } 2896 }
2903} 2897}
2904 2898
@@ -3159,10 +3153,7 @@ update_cfs_rq_load_avg(u64 now, struct cfs_rq *cfs_rq, bool update_freq)
3159 3153
3160static inline void update_load_avg(struct sched_entity *se, int not_used) 3154static inline void update_load_avg(struct sched_entity *se, int not_used)
3161{ 3155{
3162 struct cfs_rq *cfs_rq = cfs_rq_of(se); 3156 cpufreq_update_util(rq_of(cfs_rq_of(se)), 0);
3163 struct rq *rq = rq_of(cfs_rq);
3164
3165 cpufreq_trigger_update(rq_clock(rq));
3166} 3157}
3167 3158
3168static inline void 3159static inline void
@@ -4509,6 +4500,14 @@ enqueue_task_fair(struct rq *rq, struct task_struct *p, int flags)
4509 struct cfs_rq *cfs_rq; 4500 struct cfs_rq *cfs_rq;
4510 struct sched_entity *se = &p->se; 4501 struct sched_entity *se = &p->se;
4511 4502
4503 /*
4504 * If in_iowait is set, the code below may not trigger any cpufreq
4505 * utilization updates, so do it here explicitly with the IOWAIT flag
4506 * passed.
4507 */
4508 if (p->in_iowait)
4509 cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_IOWAIT);
4510
4512 for_each_sched_entity(se) { 4511 for_each_sched_entity(se) {
4513 if (se->on_rq) 4512 if (se->on_rq)
4514 break; 4513 break;
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index d5690b722691..2516b8df6dbb 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -957,9 +957,8 @@ static void update_curr_rt(struct rq *rq)
957 if (unlikely((s64)delta_exec <= 0)) 957 if (unlikely((s64)delta_exec <= 0))
958 return; 958 return;
959 959
960 /* Kick cpufreq (see the comment in linux/cpufreq.h). */ 960 /* Kick cpufreq (see the comment in kernel/sched/sched.h). */
961 if (cpu_of(rq) == smp_processor_id()) 961 cpufreq_update_this_cpu(rq, SCHED_CPUFREQ_RT);
962 cpufreq_trigger_update(rq_clock(rq));
963 962
964 schedstat_set(curr->se.statistics.exec_max, 963 schedstat_set(curr->se.statistics.exec_max,
965 max(curr->se.statistics.exec_max, delta_exec)); 964 max(curr->se.statistics.exec_max, delta_exec));
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index c64fc5114004..b7fc1ced4380 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -1763,27 +1763,13 @@ DECLARE_PER_CPU(struct update_util_data *, cpufreq_update_util_data);
1763 1763
1764/** 1764/**
1765 * cpufreq_update_util - Take a note about CPU utilization changes. 1765 * cpufreq_update_util - Take a note about CPU utilization changes.
1766 * @time: Current time. 1766 * @rq: Runqueue to carry out the update for.
1767 * @util: Current utilization. 1767 * @flags: Update reason flags.
1768 * @max: Utilization ceiling.
1769 * 1768 *
1770 * This function is called by the scheduler on every invocation of 1769 * This function is called by the scheduler on the CPU whose utilization is
1771 * update_load_avg() on the CPU whose utilization is being updated. 1770 * being updated.
1772 * 1771 *
1773 * It can only be called from RCU-sched read-side critical sections. 1772 * It can only be called from RCU-sched read-side critical sections.
1774 */
1775static inline void cpufreq_update_util(u64 time, unsigned long util, unsigned long max)
1776{
1777 struct update_util_data *data;
1778
1779 data = rcu_dereference_sched(*this_cpu_ptr(&cpufreq_update_util_data));
1780 if (data)
1781 data->func(data, time, util, max);
1782}
1783
1784/**
1785 * cpufreq_trigger_update - Trigger CPU performance state evaluation if needed.
1786 * @time: Current time.
1787 * 1773 *
1788 * The way cpufreq is currently arranged requires it to evaluate the CPU 1774 * The way cpufreq is currently arranged requires it to evaluate the CPU
1789 * performance state (frequency/voltage) on a regular basis to prevent it from 1775 * performance state (frequency/voltage) on a regular basis to prevent it from
@@ -1797,13 +1783,23 @@ static inline void cpufreq_update_util(u64 time, unsigned long util, unsigned lo
1797 * but that really is a band-aid. Going forward it should be replaced with 1783 * but that really is a band-aid. Going forward it should be replaced with
1798 * solutions targeted more specifically at RT and DL tasks. 1784 * solutions targeted more specifically at RT and DL tasks.
1799 */ 1785 */
1800static inline void cpufreq_trigger_update(u64 time) 1786static inline void cpufreq_update_util(struct rq *rq, unsigned int flags)
1787{
1788 struct update_util_data *data;
1789
1790 data = rcu_dereference_sched(*this_cpu_ptr(&cpufreq_update_util_data));
1791 if (data)
1792 data->func(data, rq_clock(rq), flags);
1793}
1794
1795static inline void cpufreq_update_this_cpu(struct rq *rq, unsigned int flags)
1801{ 1796{
1802 cpufreq_update_util(time, ULONG_MAX, 0); 1797 if (cpu_of(rq) == smp_processor_id())
1798 cpufreq_update_util(rq, flags);
1803} 1799}
1804#else 1800#else
1805static inline void cpufreq_update_util(u64 time, unsigned long util, unsigned long max) {} 1801static inline void cpufreq_update_util(struct rq *rq, unsigned int flags) {}
1806static inline void cpufreq_trigger_update(u64 time) {} 1802static inline void cpufreq_update_this_cpu(struct rq *rq, unsigned int flags) {}
1807#endif /* CONFIG_CPU_FREQ */ 1803#endif /* CONFIG_CPU_FREQ */
1808 1804
1809#ifdef arch_scale_freq_capacity 1805#ifdef arch_scale_freq_capacity