aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-10-30 12:08:07 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2018-10-30 12:08:07 -0400
commit6ef746769ef5cfef84cdfdf61ecbab5a6aa4651a (patch)
tree12baa185ef3b52c4fce8f8fc64a74c4773ca78de
parent85b5d4bcab8b46664f8e1993bd5919cb0f24a3ca (diff)
parentc4ac6889930d027ffa5cf77e0c202e7e97a4be06 (diff)
Merge tag 'pm-4.20-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
Pull more power management updates from Rafael Wysocki: "These remove a questionable heuristic from the menu cpuidle governor, fix a recent build regression in the intel_pstate driver, clean up ARM big-Little support in cpufreq and fix up hung task watchdog's interaction with system-wide power management transitions. Specifics: - Fix build regression in the intel_pstate driver that doesn't build without CONFIG_ACPI after recent changes (Dominik Brodowski). - One of the heuristics in the menu cpuidle governor is based on a function returning 0 most of the time, so drop it and clean up the scheduler code related to it (Daniel Lezcano). - Prevent the arm_big_little cpufreq driver from being used on ARM64 which is not suitable for it and drop the arm_big_little_dt driver that is not used any more (Sudeep Holla). - Prevent the hung task watchdog from triggering during resume from system-wide sleep states by disabling it before freezing tasks and enabling it again after they have been thawed (Vitaly Kuznetsov)" * tag 'pm-4.20-rc1-2' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm: kernel: hung_task.c: disable on suspend cpufreq: remove unused arm_big_little_dt driver cpufreq: drop ARM_BIG_LITTLE_CPUFREQ support for ARM64 cpufreq: intel_pstate: Fix compilation for !CONFIG_ACPI cpuidle: menu: Remove get_loadavg() from the performance multiplier sched: Factor out nr_iowait and nr_iowait_cpu
-rw-r--r--MAINTAINERS1
-rw-r--r--drivers/cpufreq/Kconfig.arm9
-rw-r--r--drivers/cpufreq/Makefile3
-rw-r--r--drivers/cpufreq/arm_big_little_dt.c100
-rw-r--r--drivers/cpufreq/intel_pstate.c20
-rw-r--r--drivers/cpuidle/governors/menu.c25
-rw-r--r--include/linux/sched/stat.h1
-rw-r--r--kernel/hung_task.c30
-rw-r--r--kernel/sched/core.c34
9 files changed, 60 insertions, 163 deletions
diff --git a/MAINTAINERS b/MAINTAINERS
index 5d1c3ae9f933..49ee13372e5b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3823,7 +3823,6 @@ W: http://www.arm.com/products/processors/technologies/biglittleprocessing.php
3823S: Maintained 3823S: Maintained
3824F: drivers/cpufreq/arm_big_little.h 3824F: drivers/cpufreq/arm_big_little.h
3825F: drivers/cpufreq/arm_big_little.c 3825F: drivers/cpufreq/arm_big_little.c
3826F: drivers/cpufreq/arm_big_little_dt.c
3827 3826
3828CPU POWER MONITORING SUBSYSTEM 3827CPU POWER MONITORING SUBSYSTEM
3829M: Thomas Renninger <trenn@suse.com> 3828M: Thomas Renninger <trenn@suse.com>
diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm
index 0cd8eb76ad59..4e1131ef85ae 100644
--- a/drivers/cpufreq/Kconfig.arm
+++ b/drivers/cpufreq/Kconfig.arm
@@ -28,20 +28,13 @@ config ARM_ARMADA_37XX_CPUFREQ
28# big LITTLE core layer and glue drivers 28# big LITTLE core layer and glue drivers
29config ARM_BIG_LITTLE_CPUFREQ 29config ARM_BIG_LITTLE_CPUFREQ
30 tristate "Generic ARM big LITTLE CPUfreq driver" 30 tristate "Generic ARM big LITTLE CPUfreq driver"
31 depends on (ARM_CPU_TOPOLOGY || ARM64) && HAVE_CLK 31 depends on ARM_CPU_TOPOLOGY && HAVE_CLK
32 # if CPU_THERMAL is on and THERMAL=m, ARM_BIT_LITTLE_CPUFREQ cannot be =y 32 # if CPU_THERMAL is on and THERMAL=m, ARM_BIT_LITTLE_CPUFREQ cannot be =y
33 depends on !CPU_THERMAL || THERMAL 33 depends on !CPU_THERMAL || THERMAL
34 select PM_OPP 34 select PM_OPP
35 help 35 help
36 This enables the Generic CPUfreq driver for ARM big.LITTLE platforms. 36 This enables the Generic CPUfreq driver for ARM big.LITTLE platforms.
37 37
38config ARM_DT_BL_CPUFREQ
39 tristate "Generic probing via DT for ARM big LITTLE CPUfreq driver"
40 depends on ARM_BIG_LITTLE_CPUFREQ && OF
41 help
42 This enables probing via DT for Generic CPUfreq driver for ARM
43 big.LITTLE platform. This gets frequency tables from DT.
44
45config ARM_SCPI_CPUFREQ 38config ARM_SCPI_CPUFREQ
46 tristate "SCPI based CPUfreq driver" 39 tristate "SCPI based CPUfreq driver"
47 depends on ARM_SCPI_PROTOCOL && COMMON_CLK_SCPI 40 depends on ARM_SCPI_PROTOCOL && COMMON_CLK_SCPI
diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile
index c1ffeabe4ecf..d5ee4562ed06 100644
--- a/drivers/cpufreq/Makefile
+++ b/drivers/cpufreq/Makefile
@@ -48,9 +48,6 @@ obj-$(CONFIG_X86_SFI_CPUFREQ) += sfi-cpufreq.o
48################################################################################## 48##################################################################################
49# ARM SoC drivers 49# ARM SoC drivers
50obj-$(CONFIG_ARM_BIG_LITTLE_CPUFREQ) += arm_big_little.o 50obj-$(CONFIG_ARM_BIG_LITTLE_CPUFREQ) += arm_big_little.o
51# big LITTLE per platform glues. Keep DT_BL_CPUFREQ as the last entry in all big
52# LITTLE drivers, so that it is probed last.
53obj-$(CONFIG_ARM_DT_BL_CPUFREQ) += arm_big_little_dt.o
54 51
55obj-$(CONFIG_ARM_ARMADA_37XX_CPUFREQ) += armada-37xx-cpufreq.o 52obj-$(CONFIG_ARM_ARMADA_37XX_CPUFREQ) += armada-37xx-cpufreq.o
56obj-$(CONFIG_ARM_BRCMSTB_AVS_CPUFREQ) += brcmstb-avs-cpufreq.o 53obj-$(CONFIG_ARM_BRCMSTB_AVS_CPUFREQ) += brcmstb-avs-cpufreq.o
diff --git a/drivers/cpufreq/arm_big_little_dt.c b/drivers/cpufreq/arm_big_little_dt.c
deleted file mode 100644
index b944f290c8a4..000000000000
--- a/drivers/cpufreq/arm_big_little_dt.c
+++ /dev/null
@@ -1,100 +0,0 @@
1/*
2 * Generic big.LITTLE CPUFreq Interface driver
3 *
4 * It provides necessary ops to arm_big_little cpufreq driver and gets
5 * Frequency information from Device Tree. Freq table in DT must be in KHz.
6 *
7 * Copyright (C) 2013 Linaro.
8 * Viresh Kumar <viresh.kumar@linaro.org>
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License version 2 as
12 * published by the Free Software Foundation.
13 *
14 * This program is distributed "as is" WITHOUT ANY WARRANTY of any
15 * kind, whether express or implied; without even the implied warranty
16 * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 */
19
20#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
21
22#include <linux/cpufreq.h>
23#include <linux/device.h>
24#include <linux/export.h>
25#include <linux/module.h>
26#include <linux/of_device.h>
27#include <linux/pm_opp.h>
28#include <linux/platform_device.h>
29#include <linux/slab.h>
30#include <linux/types.h>
31#include "arm_big_little.h"
32
33/* get cpu node with valid operating-points */
34static struct device_node *get_cpu_node_with_valid_op(int cpu)
35{
36 struct device_node *np = of_cpu_device_node_get(cpu);
37
38 if (!of_get_property(np, "operating-points", NULL)) {
39 of_node_put(np);
40 np = NULL;
41 }
42
43 return np;
44}
45
46static int dt_get_transition_latency(struct device *cpu_dev)
47{
48 struct device_node *np;
49 u32 transition_latency = CPUFREQ_ETERNAL;
50
51 np = of_node_get(cpu_dev->of_node);
52 if (!np) {
53 pr_info("Failed to find cpu node. Use CPUFREQ_ETERNAL transition latency\n");
54 return CPUFREQ_ETERNAL;
55 }
56
57 of_property_read_u32(np, "clock-latency", &transition_latency);
58 of_node_put(np);
59
60 pr_debug("%s: clock-latency: %d\n", __func__, transition_latency);
61 return transition_latency;
62}
63
64static const struct cpufreq_arm_bL_ops dt_bL_ops = {
65 .name = "dt-bl",
66 .get_transition_latency = dt_get_transition_latency,
67 .init_opp_table = dev_pm_opp_of_cpumask_add_table,
68 .free_opp_table = dev_pm_opp_of_cpumask_remove_table,
69};
70
71static int generic_bL_probe(struct platform_device *pdev)
72{
73 struct device_node *np;
74
75 np = get_cpu_node_with_valid_op(0);
76 if (!np)
77 return -ENODEV;
78
79 of_node_put(np);
80 return bL_cpufreq_register(&dt_bL_ops);
81}
82
83static int generic_bL_remove(struct platform_device *pdev)
84{
85 bL_cpufreq_unregister(&dt_bL_ops);
86 return 0;
87}
88
89static struct platform_driver generic_bL_platdrv = {
90 .driver = {
91 .name = "arm-bL-cpufreq-dt",
92 },
93 .probe = generic_bL_probe,
94 .remove = generic_bL_remove,
95};
96module_platform_driver(generic_bL_platdrv);
97
98MODULE_AUTHOR("Viresh Kumar <viresh.kumar@linaro.org>");
99MODULE_DESCRIPTION("Generic ARM big LITTLE cpufreq driver via DT");
100MODULE_LICENSE("GPL v2");
diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 49c0abf2d48f..9578312e43f2 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -386,16 +386,11 @@ static int intel_pstate_get_cppc_guranteed(int cpu)
386 return cppc_perf.guaranteed_perf; 386 return cppc_perf.guaranteed_perf;
387} 387}
388 388
389#else 389#else /* CONFIG_ACPI_CPPC_LIB */
390static void intel_pstate_set_itmt_prio(int cpu) 390static void intel_pstate_set_itmt_prio(int cpu)
391{ 391{
392} 392}
393 393#endif /* CONFIG_ACPI_CPPC_LIB */
394static int intel_pstate_get_cppc_guranteed(int cpu)
395{
396 return -ENOTSUPP;
397}
398#endif
399 394
400static void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy) 395static void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy)
401{ 396{
@@ -477,7 +472,7 @@ static void intel_pstate_exit_perf_limits(struct cpufreq_policy *policy)
477 472
478 acpi_processor_unregister_performance(policy->cpu); 473 acpi_processor_unregister_performance(policy->cpu);
479} 474}
480#else 475#else /* CONFIG_ACPI */
481static inline void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy) 476static inline void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy)
482{ 477{
483} 478}
@@ -490,7 +485,14 @@ static inline bool intel_pstate_acpi_pm_profile_server(void)
490{ 485{
491 return false; 486 return false;
492} 487}
493#endif 488#endif /* CONFIG_ACPI */
489
490#ifndef CONFIG_ACPI_CPPC_LIB
491static int intel_pstate_get_cppc_guranteed(int cpu)
492{
493 return -ENOTSUPP;
494}
495#endif /* CONFIG_ACPI_CPPC_LIB */
494 496
495static inline void update_turbo_state(void) 497static inline void update_turbo_state(void)
496{ 498{
diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index 71979605246e..61316fc51548 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -130,11 +130,6 @@ struct menu_device {
130 int interval_ptr; 130 int interval_ptr;
131}; 131};
132 132
133static inline int get_loadavg(unsigned long load)
134{
135 return LOAD_INT(load) * 10 + LOAD_FRAC(load) / 10;
136}
137
138static inline int which_bucket(unsigned int duration, unsigned long nr_iowaiters) 133static inline int which_bucket(unsigned int duration, unsigned long nr_iowaiters)
139{ 134{
140 int bucket = 0; 135 int bucket = 0;
@@ -168,18 +163,10 @@ static inline int which_bucket(unsigned int duration, unsigned long nr_iowaiters
168 * to be, the higher this multiplier, and thus the higher 163 * to be, the higher this multiplier, and thus the higher
169 * the barrier to go to an expensive C state. 164 * the barrier to go to an expensive C state.
170 */ 165 */
171static inline int performance_multiplier(unsigned long nr_iowaiters, unsigned long load) 166static inline int performance_multiplier(unsigned long nr_iowaiters)
172{ 167{
173 int mult = 1; 168 /* for IO wait tasks (per cpu!) we add 10x each */
174 169 return 1 + 10 * nr_iowaiters;
175 /* for higher loadavg, we are more reluctant */
176
177 mult += 2 * get_loadavg(load);
178
179 /* for IO wait tasks (per cpu!) we add 5x each */
180 mult += 10 * nr_iowaiters;
181
182 return mult;
183} 170}
184 171
185static DEFINE_PER_CPU(struct menu_device, menu_devices); 172static DEFINE_PER_CPU(struct menu_device, menu_devices);
@@ -297,7 +284,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
297 int idx; 284 int idx;
298 unsigned int interactivity_req; 285 unsigned int interactivity_req;
299 unsigned int predicted_us; 286 unsigned int predicted_us;
300 unsigned long nr_iowaiters, cpu_load; 287 unsigned long nr_iowaiters;
301 ktime_t delta_next; 288 ktime_t delta_next;
302 289
303 if (data->needs_update) { 290 if (data->needs_update) {
@@ -308,7 +295,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
308 /* determine the expected residency time, round up */ 295 /* determine the expected residency time, round up */
309 data->next_timer_us = ktime_to_us(tick_nohz_get_sleep_length(&delta_next)); 296 data->next_timer_us = ktime_to_us(tick_nohz_get_sleep_length(&delta_next));
310 297
311 get_iowait_load(&nr_iowaiters, &cpu_load); 298 nr_iowaiters = nr_iowait_cpu(dev->cpu);
312 data->bucket = which_bucket(data->next_timer_us, nr_iowaiters); 299 data->bucket = which_bucket(data->next_timer_us, nr_iowaiters);
313 300
314 if (unlikely(drv->state_count <= 1 || latency_req == 0) || 301 if (unlikely(drv->state_count <= 1 || latency_req == 0) ||
@@ -352,7 +339,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
352 * Use the performance multiplier and the user-configurable 339 * Use the performance multiplier and the user-configurable
353 * latency_req to determine the maximum exit latency. 340 * latency_req to determine the maximum exit latency.
354 */ 341 */
355 interactivity_req = predicted_us / performance_multiplier(nr_iowaiters, cpu_load); 342 interactivity_req = predicted_us / performance_multiplier(nr_iowaiters);
356 if (latency_req > interactivity_req) 343 if (latency_req > interactivity_req)
357 latency_req = interactivity_req; 344 latency_req = interactivity_req;
358 } 345 }
diff --git a/include/linux/sched/stat.h b/include/linux/sched/stat.h
index 04f1321d14c4..f30954cc059d 100644
--- a/include/linux/sched/stat.h
+++ b/include/linux/sched/stat.h
@@ -20,7 +20,6 @@ extern unsigned long nr_running(void);
20extern bool single_task_running(void); 20extern bool single_task_running(void);
21extern unsigned long nr_iowait(void); 21extern unsigned long nr_iowait(void);
22extern unsigned long nr_iowait_cpu(int cpu); 22extern unsigned long nr_iowait_cpu(int cpu);
23extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load);
24 23
25static inline int sched_info_on(void) 24static inline int sched_info_on(void)
26{ 25{
diff --git a/kernel/hung_task.c b/kernel/hung_task.c
index b9132d1269ef..cb8e3e8ac7b9 100644
--- a/kernel/hung_task.c
+++ b/kernel/hung_task.c
@@ -15,6 +15,7 @@
15#include <linux/lockdep.h> 15#include <linux/lockdep.h>
16#include <linux/export.h> 16#include <linux/export.h>
17#include <linux/sysctl.h> 17#include <linux/sysctl.h>
18#include <linux/suspend.h>
18#include <linux/utsname.h> 19#include <linux/utsname.h>
19#include <linux/sched/signal.h> 20#include <linux/sched/signal.h>
20#include <linux/sched/debug.h> 21#include <linux/sched/debug.h>
@@ -242,6 +243,28 @@ void reset_hung_task_detector(void)
242} 243}
243EXPORT_SYMBOL_GPL(reset_hung_task_detector); 244EXPORT_SYMBOL_GPL(reset_hung_task_detector);
244 245
246static bool hung_detector_suspended;
247
248static int hungtask_pm_notify(struct notifier_block *self,
249 unsigned long action, void *hcpu)
250{
251 switch (action) {
252 case PM_SUSPEND_PREPARE:
253 case PM_HIBERNATION_PREPARE:
254 case PM_RESTORE_PREPARE:
255 hung_detector_suspended = true;
256 break;
257 case PM_POST_SUSPEND:
258 case PM_POST_HIBERNATION:
259 case PM_POST_RESTORE:
260 hung_detector_suspended = false;
261 break;
262 default:
263 break;
264 }
265 return NOTIFY_OK;
266}
267
245/* 268/*
246 * kthread which checks for tasks stuck in D state 269 * kthread which checks for tasks stuck in D state
247 */ 270 */
@@ -261,7 +284,8 @@ static int watchdog(void *dummy)
261 interval = min_t(unsigned long, interval, timeout); 284 interval = min_t(unsigned long, interval, timeout);
262 t = hung_timeout_jiffies(hung_last_checked, interval); 285 t = hung_timeout_jiffies(hung_last_checked, interval);
263 if (t <= 0) { 286 if (t <= 0) {
264 if (!atomic_xchg(&reset_hung_task, 0)) 287 if (!atomic_xchg(&reset_hung_task, 0) &&
288 !hung_detector_suspended)
265 check_hung_uninterruptible_tasks(timeout); 289 check_hung_uninterruptible_tasks(timeout);
266 hung_last_checked = jiffies; 290 hung_last_checked = jiffies;
267 continue; 291 continue;
@@ -275,6 +299,10 @@ static int watchdog(void *dummy)
275static int __init hung_task_init(void) 299static int __init hung_task_init(void)
276{ 300{
277 atomic_notifier_chain_register(&panic_notifier_list, &panic_block); 301 atomic_notifier_chain_register(&panic_notifier_list, &panic_block);
302
303 /* Disable hung task detector on suspend */
304 pm_notifier(hungtask_pm_notify, 0);
305
278 watchdog_task = kthread_run(watchdog, NULL, "khungtaskd"); 306 watchdog_task = kthread_run(watchdog, NULL, "khungtaskd");
279 307
280 return 0; 308 return 0;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index fd2fce8a001b..f12225f26b70 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2881,6 +2881,18 @@ unsigned long long nr_context_switches(void)
2881} 2881}
2882 2882
2883/* 2883/*
2884 * Consumers of these two interfaces, like for example the cpuidle menu
2885 * governor, are using nonsensical data. Preferring shallow idle state selection
2886 * for a CPU that has IO-wait which might not even end up running the task when
2887 * it does become runnable.
2888 */
2889
2890unsigned long nr_iowait_cpu(int cpu)
2891{
2892 return atomic_read(&cpu_rq(cpu)->nr_iowait);
2893}
2894
2895/*
2884 * IO-wait accounting, and how its mostly bollocks (on SMP). 2896 * IO-wait accounting, and how its mostly bollocks (on SMP).
2885 * 2897 *
2886 * The idea behind IO-wait account is to account the idle time that we could 2898 * The idea behind IO-wait account is to account the idle time that we could
@@ -2915,31 +2927,11 @@ unsigned long nr_iowait(void)
2915 unsigned long i, sum = 0; 2927 unsigned long i, sum = 0;
2916 2928
2917 for_each_possible_cpu(i) 2929 for_each_possible_cpu(i)
2918 sum += atomic_read(&cpu_rq(i)->nr_iowait); 2930 sum += nr_iowait_cpu(i);
2919 2931
2920 return sum; 2932 return sum;
2921} 2933}
2922 2934
2923/*
2924 * Consumers of these two interfaces, like for example the cpuidle menu
2925 * governor, are using nonsensical data. Preferring shallow idle state selection
2926 * for a CPU that has IO-wait which might not even end up running the task when
2927 * it does become runnable.
2928 */
2929
2930unsigned long nr_iowait_cpu(int cpu)
2931{
2932 struct rq *this = cpu_rq(cpu);
2933 return atomic_read(&this->nr_iowait);
2934}
2935
2936void get_iowait_load(unsigned long *nr_waiters, unsigned long *load)
2937{
2938 struct rq *rq = this_rq();
2939 *nr_waiters = atomic_read(&rq->nr_iowait);
2940 *load = rq->load.weight;
2941}
2942
2943#ifdef CONFIG_SMP 2935#ifdef CONFIG_SMP
2944 2936
2945/* 2937/*