summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJohannes Weiner <hannes@cmpxchg.org>2018-11-30 17:09:58 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2018-11-30 17:56:14 -0500
commite0c274472d5d27f277af722e017525e0b33784cd (patch)
treebbc5a80c7a61acf0747c62e78b741043561311d4
parentdbd4af54745fc0c805217693c807a3928b2d408b (diff)
psi: make disabling/enabling easier for vendor kernels
Mel Gorman reports a hackbench regression with psi that would prohibit shipping the suse kernel with it default-enabled, but he'd still like users to be able to opt in at little to no cost to others. With the current combination of CONFIG_PSI and the psi_disabled bool set from the commandline, this is a challenge. Do the following things to make it easier: 1. Add a config option CONFIG_PSI_DEFAULT_DISABLED that allows distros to enable CONFIG_PSI in their kernel but leave the feature disabled unless a user requests it at boot-time. To avoid double negatives, rename psi_disabled= to psi=. 2. Make psi_disabled a static branch to eliminate any branch costs when the feature is disabled. In terms of numbers before and after this patch, Mel says: : The following is a comparision using CONFIG_PSI=n as a baseline against : your patch and a vanilla kernel : : 4.20.0-rc4 4.20.0-rc4 4.20.0-rc4 : kconfigdisable-v1r1 vanilla psidisable-v1r1 : Amean 1 1.3100 ( 0.00%) 1.3923 ( -6.28%) 1.3427 ( -2.49%) : Amean 3 3.8860 ( 0.00%) 4.1230 * -6.10%* 3.8860 ( -0.00%) : Amean 5 6.8847 ( 0.00%) 8.0390 * -16.77%* 6.7727 ( 1.63%) : Amean 7 9.9310 ( 0.00%) 10.8367 * -9.12%* 9.9910 ( -0.60%) : Amean 12 16.6577 ( 0.00%) 18.2363 * -9.48%* 17.1083 ( -2.71%) : Amean 18 26.5133 ( 0.00%) 27.8833 * -5.17%* 25.7663 ( 2.82%) : Amean 24 34.3003 ( 0.00%) 34.6830 ( -1.12%) 32.0450 ( 6.58%) : Amean 30 40.0063 ( 0.00%) 40.5800 ( -1.43%) 41.5087 ( -3.76%) : Amean 32 40.1407 ( 0.00%) 41.2273 ( -2.71%) 39.9417 ( 0.50%) : : It's showing that the vanilla kernel takes a hit (as the bisection : indicated it would) and that disabling PSI by default is reasonably : close in terms of performance for this particular workload on this : particular machine so; Link: http://lkml.kernel.org/r/20181127165329.GA29728@cmpxchg.org Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Tested-by: Mel Gorman <mgorman@techsingularity.net> Reported-by: Mel Gorman <mgorman@techsingularity.net> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt4
-rw-r--r--include/linux/psi.h3
-rw-r--r--init/Kconfig9
-rw-r--r--kernel/sched/psi.c30
-rw-r--r--kernel/sched/stats.h8
5 files changed, 40 insertions, 14 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 675170c36078..5d6ba930d4f4 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3505,6 +3505,10 @@
3505 before loading. 3505 before loading.
3506 See Documentation/blockdev/ramdisk.txt. 3506 See Documentation/blockdev/ramdisk.txt.
3507 3507
3508 psi= [KNL] Enable or disable pressure stall information
3509 tracking.
3510 Format: <bool>
3511
3508 psmouse.proto= [HW,MOUSE] Highest PS2 mouse protocol extension to 3512 psmouse.proto= [HW,MOUSE] Highest PS2 mouse protocol extension to
3509 probe for; one of (bare|imps|exps|lifebook|any). 3513 probe for; one of (bare|imps|exps|lifebook|any).
3510 psmouse.rate= [HW,MOUSE] Set desired mouse report rate, in reports 3514 psmouse.rate= [HW,MOUSE] Set desired mouse report rate, in reports
diff --git a/include/linux/psi.h b/include/linux/psi.h
index 8e0725aac0aa..7006008d5b72 100644
--- a/include/linux/psi.h
+++ b/include/linux/psi.h
@@ -1,6 +1,7 @@
1#ifndef _LINUX_PSI_H 1#ifndef _LINUX_PSI_H
2#define _LINUX_PSI_H 2#define _LINUX_PSI_H
3 3
4#include <linux/jump_label.h>
4#include <linux/psi_types.h> 5#include <linux/psi_types.h>
5#include <linux/sched.h> 6#include <linux/sched.h>
6 7
@@ -9,7 +10,7 @@ struct css_set;
9 10
10#ifdef CONFIG_PSI 11#ifdef CONFIG_PSI
11 12
12extern bool psi_disabled; 13extern struct static_key_false psi_disabled;
13 14
14void psi_init(void); 15void psi_init(void);
15 16
diff --git a/init/Kconfig b/init/Kconfig
index a4112e95724a..cf5b5a0dcbc2 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -509,6 +509,15 @@ config PSI
509 509
510 Say N if unsure. 510 Say N if unsure.
511 511
512config PSI_DEFAULT_DISABLED
513 bool "Require boot parameter to enable pressure stall information tracking"
514 default n
515 depends on PSI
516 help
517 If set, pressure stall information tracking will be disabled
518 per default but can be enabled through passing psi_enable=1
519 on the kernel commandline during boot.
520
512endmenu # "CPU/Task time and stats accounting" 521endmenu # "CPU/Task time and stats accounting"
513 522
514config CPU_ISOLATION 523config CPU_ISOLATION
diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c
index 3d7355d7c3e3..fe24de3fbc93 100644
--- a/kernel/sched/psi.c
+++ b/kernel/sched/psi.c
@@ -136,8 +136,18 @@
136 136
137static int psi_bug __read_mostly; 137static int psi_bug __read_mostly;
138 138
139bool psi_disabled __read_mostly; 139DEFINE_STATIC_KEY_FALSE(psi_disabled);
140core_param(psi_disabled, psi_disabled, bool, 0644); 140
141#ifdef CONFIG_PSI_DEFAULT_DISABLED
142bool psi_enable;
143#else
144bool psi_enable = true;
145#endif
146static int __init setup_psi(char *str)
147{
148 return kstrtobool(str, &psi_enable) == 0;
149}
150__setup("psi=", setup_psi);
141 151
142/* Running averages - we need to be higher-res than loadavg */ 152/* Running averages - we need to be higher-res than loadavg */
143#define PSI_FREQ (2*HZ+1) /* 2 sec intervals */ 153#define PSI_FREQ (2*HZ+1) /* 2 sec intervals */
@@ -169,8 +179,10 @@ static void group_init(struct psi_group *group)
169 179
170void __init psi_init(void) 180void __init psi_init(void)
171{ 181{
172 if (psi_disabled) 182 if (!psi_enable) {
183 static_branch_enable(&psi_disabled);
173 return; 184 return;
185 }
174 186
175 psi_period = jiffies_to_nsecs(PSI_FREQ); 187 psi_period = jiffies_to_nsecs(PSI_FREQ);
176 group_init(&psi_system); 188 group_init(&psi_system);
@@ -549,7 +561,7 @@ void psi_memstall_enter(unsigned long *flags)
549 struct rq_flags rf; 561 struct rq_flags rf;
550 struct rq *rq; 562 struct rq *rq;
551 563
552 if (psi_disabled) 564 if (static_branch_likely(&psi_disabled))
553 return; 565 return;
554 566
555 *flags = current->flags & PF_MEMSTALL; 567 *flags = current->flags & PF_MEMSTALL;
@@ -579,7 +591,7 @@ void psi_memstall_leave(unsigned long *flags)
579 struct rq_flags rf; 591 struct rq_flags rf;
580 struct rq *rq; 592 struct rq *rq;
581 593
582 if (psi_disabled) 594 if (static_branch_likely(&psi_disabled))
583 return; 595 return;
584 596
585 if (*flags) 597 if (*flags)
@@ -600,7 +612,7 @@ void psi_memstall_leave(unsigned long *flags)
600#ifdef CONFIG_CGROUPS 612#ifdef CONFIG_CGROUPS
601int psi_cgroup_alloc(struct cgroup *cgroup) 613int psi_cgroup_alloc(struct cgroup *cgroup)
602{ 614{
603 if (psi_disabled) 615 if (static_branch_likely(&psi_disabled))
604 return 0; 616 return 0;
605 617
606 cgroup->psi.pcpu = alloc_percpu(struct psi_group_cpu); 618 cgroup->psi.pcpu = alloc_percpu(struct psi_group_cpu);
@@ -612,7 +624,7 @@ int psi_cgroup_alloc(struct cgroup *cgroup)
612 624
613void psi_cgroup_free(struct cgroup *cgroup) 625void psi_cgroup_free(struct cgroup *cgroup)
614{ 626{
615 if (psi_disabled) 627 if (static_branch_likely(&psi_disabled))
616 return; 628 return;
617 629
618 cancel_delayed_work_sync(&cgroup->psi.clock_work); 630 cancel_delayed_work_sync(&cgroup->psi.clock_work);
@@ -637,7 +649,7 @@ void cgroup_move_task(struct task_struct *task, struct css_set *to)
637 struct rq_flags rf; 649 struct rq_flags rf;
638 struct rq *rq; 650 struct rq *rq;
639 651
640 if (psi_disabled) { 652 if (static_branch_likely(&psi_disabled)) {
641 /* 653 /*
642 * Lame to do this here, but the scheduler cannot be locked 654 * Lame to do this here, but the scheduler cannot be locked
643 * from the outside, so we move cgroups from inside sched/. 655 * from the outside, so we move cgroups from inside sched/.
@@ -673,7 +685,7 @@ int psi_show(struct seq_file *m, struct psi_group *group, enum psi_res res)
673{ 685{
674 int full; 686 int full;
675 687
676 if (psi_disabled) 688 if (static_branch_likely(&psi_disabled))
677 return -EOPNOTSUPP; 689 return -EOPNOTSUPP;
678 690
679 update_stats(group); 691 update_stats(group);
diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h
index 4904c4677000..aa0de240fb41 100644
--- a/kernel/sched/stats.h
+++ b/kernel/sched/stats.h
@@ -66,7 +66,7 @@ static inline void psi_enqueue(struct task_struct *p, bool wakeup)
66{ 66{
67 int clear = 0, set = TSK_RUNNING; 67 int clear = 0, set = TSK_RUNNING;
68 68
69 if (psi_disabled) 69 if (static_branch_likely(&psi_disabled))
70 return; 70 return;
71 71
72 if (!wakeup || p->sched_psi_wake_requeue) { 72 if (!wakeup || p->sched_psi_wake_requeue) {
@@ -86,7 +86,7 @@ static inline void psi_dequeue(struct task_struct *p, bool sleep)
86{ 86{
87 int clear = TSK_RUNNING, set = 0; 87 int clear = TSK_RUNNING, set = 0;
88 88
89 if (psi_disabled) 89 if (static_branch_likely(&psi_disabled))
90 return; 90 return;
91 91
92 if (!sleep) { 92 if (!sleep) {
@@ -102,7 +102,7 @@ static inline void psi_dequeue(struct task_struct *p, bool sleep)
102 102
103static inline void psi_ttwu_dequeue(struct task_struct *p) 103static inline void psi_ttwu_dequeue(struct task_struct *p)
104{ 104{
105 if (psi_disabled) 105 if (static_branch_likely(&psi_disabled))
106 return; 106 return;
107 /* 107 /*
108 * Is the task being migrated during a wakeup? Make sure to 108 * Is the task being migrated during a wakeup? Make sure to
@@ -128,7 +128,7 @@ static inline void psi_ttwu_dequeue(struct task_struct *p)
128 128
129static inline void psi_task_tick(struct rq *rq) 129static inline void psi_task_tick(struct rq *rq)
130{ 130{
131 if (psi_disabled) 131 if (static_branch_likely(&psi_disabled))
132 return; 132 return;
133 133
134 if (unlikely(rq->curr->flags & PF_MEMSTALL)) 134 if (unlikely(rq->curr->flags & PF_MEMSTALL))