diff options
| author | Johannes Weiner <hannes@cmpxchg.org> | 2018-11-30 17:09:58 -0500 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-11-30 17:56:14 -0500 |
| commit | e0c274472d5d27f277af722e017525e0b33784cd (patch) | |
| tree | bbc5a80c7a61acf0747c62e78b741043561311d4 | |
| parent | dbd4af54745fc0c805217693c807a3928b2d408b (diff) | |
psi: make disabling/enabling easier for vendor kernels
Mel Gorman reports a hackbench regression with psi that would prohibit
shipping the suse kernel with it default-enabled, but he'd still like
users to be able to opt in at little to no cost to others.
With the current combination of CONFIG_PSI and the psi_disabled bool set
from the commandline, this is a challenge. Do the following things to
make it easier:
1. Add a config option CONFIG_PSI_DEFAULT_DISABLED that allows distros
to enable CONFIG_PSI in their kernel but leave the feature disabled
unless a user requests it at boot-time.
To avoid double negatives, rename psi_disabled= to psi=.
2. Make psi_disabled a static branch to eliminate any branch costs
when the feature is disabled.
In terms of numbers before and after this patch, Mel says:
: The following is a comparision using CONFIG_PSI=n as a baseline against
: your patch and a vanilla kernel
:
: 4.20.0-rc4 4.20.0-rc4 4.20.0-rc4
: kconfigdisable-v1r1 vanilla psidisable-v1r1
: Amean 1 1.3100 ( 0.00%) 1.3923 ( -6.28%) 1.3427 ( -2.49%)
: Amean 3 3.8860 ( 0.00%) 4.1230 * -6.10%* 3.8860 ( -0.00%)
: Amean 5 6.8847 ( 0.00%) 8.0390 * -16.77%* 6.7727 ( 1.63%)
: Amean 7 9.9310 ( 0.00%) 10.8367 * -9.12%* 9.9910 ( -0.60%)
: Amean 12 16.6577 ( 0.00%) 18.2363 * -9.48%* 17.1083 ( -2.71%)
: Amean 18 26.5133 ( 0.00%) 27.8833 * -5.17%* 25.7663 ( 2.82%)
: Amean 24 34.3003 ( 0.00%) 34.6830 ( -1.12%) 32.0450 ( 6.58%)
: Amean 30 40.0063 ( 0.00%) 40.5800 ( -1.43%) 41.5087 ( -3.76%)
: Amean 32 40.1407 ( 0.00%) 41.2273 ( -2.71%) 39.9417 ( 0.50%)
:
: It's showing that the vanilla kernel takes a hit (as the bisection
: indicated it would) and that disabling PSI by default is reasonably
: close in terms of performance for this particular workload on this
: particular machine so;
Link: http://lkml.kernel.org/r/20181127165329.GA29728@cmpxchg.org
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Tested-by: Mel Gorman <mgorman@techsingularity.net>
Reported-by: Mel Gorman <mgorman@techsingularity.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
| -rw-r--r-- | Documentation/admin-guide/kernel-parameters.txt | 4 | ||||
| -rw-r--r-- | include/linux/psi.h | 3 | ||||
| -rw-r--r-- | init/Kconfig | 9 | ||||
| -rw-r--r-- | kernel/sched/psi.c | 30 | ||||
| -rw-r--r-- | kernel/sched/stats.h | 8 |
5 files changed, 40 insertions, 14 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 675170c36078..5d6ba930d4f4 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt | |||
| @@ -3505,6 +3505,10 @@ | |||
| 3505 | before loading. | 3505 | before loading. |
| 3506 | See Documentation/blockdev/ramdisk.txt. | 3506 | See Documentation/blockdev/ramdisk.txt. |
| 3507 | 3507 | ||
| 3508 | psi= [KNL] Enable or disable pressure stall information | ||
| 3509 | tracking. | ||
| 3510 | Format: <bool> | ||
| 3511 | |||
| 3508 | psmouse.proto= [HW,MOUSE] Highest PS2 mouse protocol extension to | 3512 | psmouse.proto= [HW,MOUSE] Highest PS2 mouse protocol extension to |
| 3509 | probe for; one of (bare|imps|exps|lifebook|any). | 3513 | probe for; one of (bare|imps|exps|lifebook|any). |
| 3510 | psmouse.rate= [HW,MOUSE] Set desired mouse report rate, in reports | 3514 | psmouse.rate= [HW,MOUSE] Set desired mouse report rate, in reports |
diff --git a/include/linux/psi.h b/include/linux/psi.h index 8e0725aac0aa..7006008d5b72 100644 --- a/include/linux/psi.h +++ b/include/linux/psi.h | |||
| @@ -1,6 +1,7 @@ | |||
| 1 | #ifndef _LINUX_PSI_H | 1 | #ifndef _LINUX_PSI_H |
| 2 | #define _LINUX_PSI_H | 2 | #define _LINUX_PSI_H |
| 3 | 3 | ||
| 4 | #include <linux/jump_label.h> | ||
| 4 | #include <linux/psi_types.h> | 5 | #include <linux/psi_types.h> |
| 5 | #include <linux/sched.h> | 6 | #include <linux/sched.h> |
| 6 | 7 | ||
| @@ -9,7 +10,7 @@ struct css_set; | |||
| 9 | 10 | ||
| 10 | #ifdef CONFIG_PSI | 11 | #ifdef CONFIG_PSI |
| 11 | 12 | ||
| 12 | extern bool psi_disabled; | 13 | extern struct static_key_false psi_disabled; |
| 13 | 14 | ||
| 14 | void psi_init(void); | 15 | void psi_init(void); |
| 15 | 16 | ||
diff --git a/init/Kconfig b/init/Kconfig index a4112e95724a..cf5b5a0dcbc2 100644 --- a/init/Kconfig +++ b/init/Kconfig | |||
| @@ -509,6 +509,15 @@ config PSI | |||
| 509 | 509 | ||
| 510 | Say N if unsure. | 510 | Say N if unsure. |
| 511 | 511 | ||
| 512 | config PSI_DEFAULT_DISABLED | ||
| 513 | bool "Require boot parameter to enable pressure stall information tracking" | ||
| 514 | default n | ||
| 515 | depends on PSI | ||
| 516 | help | ||
| 517 | If set, pressure stall information tracking will be disabled | ||
| 518 | per default but can be enabled through passing psi_enable=1 | ||
| 519 | on the kernel commandline during boot. | ||
| 520 | |||
| 512 | endmenu # "CPU/Task time and stats accounting" | 521 | endmenu # "CPU/Task time and stats accounting" |
| 513 | 522 | ||
| 514 | config CPU_ISOLATION | 523 | config CPU_ISOLATION |
diff --git a/kernel/sched/psi.c b/kernel/sched/psi.c index 3d7355d7c3e3..fe24de3fbc93 100644 --- a/kernel/sched/psi.c +++ b/kernel/sched/psi.c | |||
| @@ -136,8 +136,18 @@ | |||
| 136 | 136 | ||
| 137 | static int psi_bug __read_mostly; | 137 | static int psi_bug __read_mostly; |
| 138 | 138 | ||
| 139 | bool psi_disabled __read_mostly; | 139 | DEFINE_STATIC_KEY_FALSE(psi_disabled); |
| 140 | core_param(psi_disabled, psi_disabled, bool, 0644); | 140 | |
| 141 | #ifdef CONFIG_PSI_DEFAULT_DISABLED | ||
| 142 | bool psi_enable; | ||
| 143 | #else | ||
| 144 | bool psi_enable = true; | ||
| 145 | #endif | ||
| 146 | static int __init setup_psi(char *str) | ||
| 147 | { | ||
| 148 | return kstrtobool(str, &psi_enable) == 0; | ||
| 149 | } | ||
| 150 | __setup("psi=", setup_psi); | ||
| 141 | 151 | ||
| 142 | /* Running averages - we need to be higher-res than loadavg */ | 152 | /* Running averages - we need to be higher-res than loadavg */ |
| 143 | #define PSI_FREQ (2*HZ+1) /* 2 sec intervals */ | 153 | #define PSI_FREQ (2*HZ+1) /* 2 sec intervals */ |
| @@ -169,8 +179,10 @@ static void group_init(struct psi_group *group) | |||
| 169 | 179 | ||
| 170 | void __init psi_init(void) | 180 | void __init psi_init(void) |
| 171 | { | 181 | { |
| 172 | if (psi_disabled) | 182 | if (!psi_enable) { |
| 183 | static_branch_enable(&psi_disabled); | ||
| 173 | return; | 184 | return; |
| 185 | } | ||
| 174 | 186 | ||
| 175 | psi_period = jiffies_to_nsecs(PSI_FREQ); | 187 | psi_period = jiffies_to_nsecs(PSI_FREQ); |
| 176 | group_init(&psi_system); | 188 | group_init(&psi_system); |
| @@ -549,7 +561,7 @@ void psi_memstall_enter(unsigned long *flags) | |||
| 549 | struct rq_flags rf; | 561 | struct rq_flags rf; |
| 550 | struct rq *rq; | 562 | struct rq *rq; |
| 551 | 563 | ||
| 552 | if (psi_disabled) | 564 | if (static_branch_likely(&psi_disabled)) |
| 553 | return; | 565 | return; |
| 554 | 566 | ||
| 555 | *flags = current->flags & PF_MEMSTALL; | 567 | *flags = current->flags & PF_MEMSTALL; |
| @@ -579,7 +591,7 @@ void psi_memstall_leave(unsigned long *flags) | |||
| 579 | struct rq_flags rf; | 591 | struct rq_flags rf; |
| 580 | struct rq *rq; | 592 | struct rq *rq; |
| 581 | 593 | ||
| 582 | if (psi_disabled) | 594 | if (static_branch_likely(&psi_disabled)) |
| 583 | return; | 595 | return; |
| 584 | 596 | ||
| 585 | if (*flags) | 597 | if (*flags) |
| @@ -600,7 +612,7 @@ void psi_memstall_leave(unsigned long *flags) | |||
| 600 | #ifdef CONFIG_CGROUPS | 612 | #ifdef CONFIG_CGROUPS |
| 601 | int psi_cgroup_alloc(struct cgroup *cgroup) | 613 | int psi_cgroup_alloc(struct cgroup *cgroup) |
| 602 | { | 614 | { |
| 603 | if (psi_disabled) | 615 | if (static_branch_likely(&psi_disabled)) |
| 604 | return 0; | 616 | return 0; |
| 605 | 617 | ||
| 606 | cgroup->psi.pcpu = alloc_percpu(struct psi_group_cpu); | 618 | cgroup->psi.pcpu = alloc_percpu(struct psi_group_cpu); |
| @@ -612,7 +624,7 @@ int psi_cgroup_alloc(struct cgroup *cgroup) | |||
| 612 | 624 | ||
| 613 | void psi_cgroup_free(struct cgroup *cgroup) | 625 | void psi_cgroup_free(struct cgroup *cgroup) |
| 614 | { | 626 | { |
| 615 | if (psi_disabled) | 627 | if (static_branch_likely(&psi_disabled)) |
| 616 | return; | 628 | return; |
| 617 | 629 | ||
| 618 | cancel_delayed_work_sync(&cgroup->psi.clock_work); | 630 | cancel_delayed_work_sync(&cgroup->psi.clock_work); |
| @@ -637,7 +649,7 @@ void cgroup_move_task(struct task_struct *task, struct css_set *to) | |||
| 637 | struct rq_flags rf; | 649 | struct rq_flags rf; |
| 638 | struct rq *rq; | 650 | struct rq *rq; |
| 639 | 651 | ||
| 640 | if (psi_disabled) { | 652 | if (static_branch_likely(&psi_disabled)) { |
| 641 | /* | 653 | /* |
| 642 | * Lame to do this here, but the scheduler cannot be locked | 654 | * Lame to do this here, but the scheduler cannot be locked |
| 643 | * from the outside, so we move cgroups from inside sched/. | 655 | * from the outside, so we move cgroups from inside sched/. |
| @@ -673,7 +685,7 @@ int psi_show(struct seq_file *m, struct psi_group *group, enum psi_res res) | |||
| 673 | { | 685 | { |
| 674 | int full; | 686 | int full; |
| 675 | 687 | ||
| 676 | if (psi_disabled) | 688 | if (static_branch_likely(&psi_disabled)) |
| 677 | return -EOPNOTSUPP; | 689 | return -EOPNOTSUPP; |
| 678 | 690 | ||
| 679 | update_stats(group); | 691 | update_stats(group); |
diff --git a/kernel/sched/stats.h b/kernel/sched/stats.h index 4904c4677000..aa0de240fb41 100644 --- a/kernel/sched/stats.h +++ b/kernel/sched/stats.h | |||
| @@ -66,7 +66,7 @@ static inline void psi_enqueue(struct task_struct *p, bool wakeup) | |||
| 66 | { | 66 | { |
| 67 | int clear = 0, set = TSK_RUNNING; | 67 | int clear = 0, set = TSK_RUNNING; |
| 68 | 68 | ||
| 69 | if (psi_disabled) | 69 | if (static_branch_likely(&psi_disabled)) |
| 70 | return; | 70 | return; |
| 71 | 71 | ||
| 72 | if (!wakeup || p->sched_psi_wake_requeue) { | 72 | if (!wakeup || p->sched_psi_wake_requeue) { |
| @@ -86,7 +86,7 @@ static inline void psi_dequeue(struct task_struct *p, bool sleep) | |||
| 86 | { | 86 | { |
| 87 | int clear = TSK_RUNNING, set = 0; | 87 | int clear = TSK_RUNNING, set = 0; |
| 88 | 88 | ||
| 89 | if (psi_disabled) | 89 | if (static_branch_likely(&psi_disabled)) |
| 90 | return; | 90 | return; |
| 91 | 91 | ||
| 92 | if (!sleep) { | 92 | if (!sleep) { |
| @@ -102,7 +102,7 @@ static inline void psi_dequeue(struct task_struct *p, bool sleep) | |||
| 102 | 102 | ||
| 103 | static inline void psi_ttwu_dequeue(struct task_struct *p) | 103 | static inline void psi_ttwu_dequeue(struct task_struct *p) |
| 104 | { | 104 | { |
| 105 | if (psi_disabled) | 105 | if (static_branch_likely(&psi_disabled)) |
| 106 | return; | 106 | return; |
| 107 | /* | 107 | /* |
| 108 | * Is the task being migrated during a wakeup? Make sure to | 108 | * Is the task being migrated during a wakeup? Make sure to |
| @@ -128,7 +128,7 @@ static inline void psi_ttwu_dequeue(struct task_struct *p) | |||
| 128 | 128 | ||
| 129 | static inline void psi_task_tick(struct rq *rq) | 129 | static inline void psi_task_tick(struct rq *rq) |
| 130 | { | 130 | { |
| 131 | if (psi_disabled) | 131 | if (static_branch_likely(&psi_disabled)) |
| 132 | return; | 132 | return; |
| 133 | 133 | ||
| 134 | if (unlikely(rq->curr->flags & PF_MEMSTALL)) | 134 | if (unlikely(rq->curr->flags & PF_MEMSTALL)) |
