diff options
author | Marcelo Tosatti <mtosatti@redhat.com> | 2019-07-03 19:51:28 -0400 |
---|---|---|
committer | Rafael J. Wysocki <rafael.j.wysocki@intel.com> | 2019-07-30 11:27:37 -0400 |
commit | 2cffe9f6b96fece065ee8522673c90e92ef2085d (patch) | |
tree | 9d8f97f6710e7abfd7698705ba75bad51401be6f /drivers/cpuidle | |
parent | 7d4daeedd575bbc3c40c87fc6708a8b88c50fe7e (diff) |
cpuidle: add haltpoll governor
The cpuidle_haltpoll governor, in conjunction with the haltpoll cpuidle
driver, allows guest vcpus to poll for a specified amount of time before
halting.
This provides the following benefits to host side polling:
1) The POLL flag is set while polling is performed, which allows
a remote vCPU to avoid sending an IPI (and the associated
cost of handling the IPI) when performing a wakeup.
2) The VM-exit cost can be avoided.
The downside of guest side polling is that polling is performed
even with other runnable tasks in the host.
Results comparing halt_poll_ns and server/client application
where a small packet is ping-ponged:
host --> 31.33
halt_poll_ns=300000 / no guest busy spin --> 33.40 (93.8%)
halt_poll_ns=0 / guest_halt_poll_ns=300000 --> 32.73 (95.7%)
For the SAP HANA benchmarks (where idle_spin is a parameter
of the previous version of the patch, results should be the
same):
hpns == halt_poll_ns
idle_spin=0/ idle_spin=800/ idle_spin=0/
hpns=200000 hpns=0 hpns=800000
DeleteC06T03 (100 thread) 1.76 1.71 (-3%) 1.78 (+1%)
InsertC16T02 (100 thread) 2.14 2.07 (-3%) 2.18 (+1.8%)
DeleteC00T01 (1 thread) 1.34 1.28 (-4.5%) 1.29 (-3.7%)
UpdateC00T03 (1 thread) 4.72 4.18 (-12%) 4.53 (-5%)
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Diffstat (limited to 'drivers/cpuidle')
-rw-r--r-- | drivers/cpuidle/Kconfig | 11 | ||||
-rw-r--r-- | drivers/cpuidle/governors/Makefile | 1 | ||||
-rw-r--r-- | drivers/cpuidle/governors/haltpoll.c | 150 |
3 files changed, 162 insertions, 0 deletions
diff --git a/drivers/cpuidle/Kconfig b/drivers/cpuidle/Kconfig index cc8efc56be7d..88727b7c0d59 100644 --- a/drivers/cpuidle/Kconfig +++ b/drivers/cpuidle/Kconfig | |||
@@ -33,6 +33,17 @@ config CPU_IDLE_GOV_TEO | |||
33 | Some workloads benefit from using it and it generally should be safe | 33 | Some workloads benefit from using it and it generally should be safe |
34 | to use. Say Y here if you are not happy with the alternatives. | 34 | to use. Say Y here if you are not happy with the alternatives. |
35 | 35 | ||
36 | config CPU_IDLE_GOV_HALTPOLL | ||
37 | bool "Haltpoll governor (for virtualized systems)" | ||
38 | depends on KVM_GUEST | ||
39 | help | ||
40 | This governor implements haltpoll idle state selection, to be | ||
41 | used in conjunction with the haltpoll cpuidle driver, allowing | ||
42 | for polling for a certain amount of time before entering idle | ||
43 | state. | ||
44 | |||
45 | Some virtualized workloads benefit from using it. | ||
46 | |||
36 | config DT_IDLE_STATES | 47 | config DT_IDLE_STATES |
37 | bool | 48 | bool |
38 | 49 | ||
diff --git a/drivers/cpuidle/governors/Makefile b/drivers/cpuidle/governors/Makefile index 42f44cc610dd..63abb5393a4d 100644 --- a/drivers/cpuidle/governors/Makefile +++ b/drivers/cpuidle/governors/Makefile | |||
@@ -6,3 +6,4 @@ | |||
6 | obj-$(CONFIG_CPU_IDLE_GOV_LADDER) += ladder.o | 6 | obj-$(CONFIG_CPU_IDLE_GOV_LADDER) += ladder.o |
7 | obj-$(CONFIG_CPU_IDLE_GOV_MENU) += menu.o | 7 | obj-$(CONFIG_CPU_IDLE_GOV_MENU) += menu.o |
8 | obj-$(CONFIG_CPU_IDLE_GOV_TEO) += teo.o | 8 | obj-$(CONFIG_CPU_IDLE_GOV_TEO) += teo.o |
9 | obj-$(CONFIG_CPU_IDLE_GOV_HALTPOLL) += haltpoll.o | ||
diff --git a/drivers/cpuidle/governors/haltpoll.c b/drivers/cpuidle/governors/haltpoll.c new file mode 100644 index 000000000000..797477bda486 --- /dev/null +++ b/drivers/cpuidle/governors/haltpoll.c | |||
@@ -0,0 +1,150 @@ | |||
1 | // SPDX-License-Identifier: GPL-2.0 | ||
2 | /* | ||
3 | * haltpoll.c - haltpoll idle governor | ||
4 | * | ||
5 | * Copyright 2019 Red Hat, Inc. and/or its affiliates. | ||
6 | * | ||
7 | * This work is licensed under the terms of the GNU GPL, version 2. See | ||
8 | * the COPYING file in the top-level directory. | ||
9 | * | ||
10 | * Authors: Marcelo Tosatti <mtosatti@redhat.com> | ||
11 | */ | ||
12 | |||
13 | #include <linux/kernel.h> | ||
14 | #include <linux/cpuidle.h> | ||
15 | #include <linux/time.h> | ||
16 | #include <linux/ktime.h> | ||
17 | #include <linux/hrtimer.h> | ||
18 | #include <linux/tick.h> | ||
19 | #include <linux/sched.h> | ||
20 | #include <linux/module.h> | ||
21 | #include <linux/kvm_para.h> | ||
22 | |||
23 | static unsigned int guest_halt_poll_ns __read_mostly = 200000; | ||
24 | module_param(guest_halt_poll_ns, uint, 0644); | ||
25 | |||
26 | /* division factor to shrink halt_poll_ns */ | ||
27 | static unsigned int guest_halt_poll_shrink __read_mostly = 2; | ||
28 | module_param(guest_halt_poll_shrink, uint, 0644); | ||
29 | |||
30 | /* multiplication factor to grow per-cpu poll_limit_ns */ | ||
31 | static unsigned int guest_halt_poll_grow __read_mostly = 2; | ||
32 | module_param(guest_halt_poll_grow, uint, 0644); | ||
33 | |||
34 | /* value in us to start growing per-cpu halt_poll_ns */ | ||
35 | static unsigned int guest_halt_poll_grow_start __read_mostly = 50000; | ||
36 | module_param(guest_halt_poll_grow_start, uint, 0644); | ||
37 | |||
38 | /* allow shrinking guest halt poll */ | ||
39 | static bool guest_halt_poll_allow_shrink __read_mostly = true; | ||
40 | module_param(guest_halt_poll_allow_shrink, bool, 0644); | ||
41 | |||
42 | /** | ||
43 | * haltpoll_select - selects the next idle state to enter | ||
44 | * @drv: cpuidle driver containing state data | ||
45 | * @dev: the CPU | ||
46 | * @stop_tick: indication on whether or not to stop the tick | ||
47 | */ | ||
48 | static int haltpoll_select(struct cpuidle_driver *drv, | ||
49 | struct cpuidle_device *dev, | ||
50 | bool *stop_tick) | ||
51 | { | ||
52 | int latency_req = cpuidle_governor_latency_req(dev->cpu); | ||
53 | |||
54 | if (!drv->state_count || latency_req == 0) { | ||
55 | *stop_tick = false; | ||
56 | return 0; | ||
57 | } | ||
58 | |||
59 | if (dev->poll_limit_ns == 0) | ||
60 | return 1; | ||
61 | |||
62 | /* Last state was poll? */ | ||
63 | if (dev->last_state_idx == 0) { | ||
64 | /* Halt if no event occurred on poll window */ | ||
65 | if (dev->poll_time_limit == true) | ||
66 | return 1; | ||
67 | |||
68 | *stop_tick = false; | ||
69 | /* Otherwise, poll again */ | ||
70 | return 0; | ||
71 | } | ||
72 | |||
73 | *stop_tick = false; | ||
74 | /* Last state was halt: poll */ | ||
75 | return 0; | ||
76 | } | ||
77 | |||
78 | static void adjust_poll_limit(struct cpuidle_device *dev, unsigned int block_us) | ||
79 | { | ||
80 | unsigned int val; | ||
81 | u64 block_ns = block_us*NSEC_PER_USEC; | ||
82 | |||
83 | /* Grow cpu_halt_poll_us if | ||
84 | * cpu_halt_poll_us < block_ns < guest_halt_poll_us | ||
85 | */ | ||
86 | if (block_ns > dev->poll_limit_ns && block_ns <= guest_halt_poll_ns) { | ||
87 | val = dev->poll_limit_ns * guest_halt_poll_grow; | ||
88 | |||
89 | if (val < guest_halt_poll_grow_start) | ||
90 | val = guest_halt_poll_grow_start; | ||
91 | if (val > guest_halt_poll_ns) | ||
92 | val = guest_halt_poll_ns; | ||
93 | |||
94 | dev->poll_limit_ns = val; | ||
95 | } else if (block_ns > guest_halt_poll_ns && | ||
96 | guest_halt_poll_allow_shrink) { | ||
97 | unsigned int shrink = guest_halt_poll_shrink; | ||
98 | |||
99 | val = dev->poll_limit_ns; | ||
100 | if (shrink == 0) | ||
101 | val = 0; | ||
102 | else | ||
103 | val /= shrink; | ||
104 | dev->poll_limit_ns = val; | ||
105 | } | ||
106 | } | ||
107 | |||
108 | /** | ||
109 | * haltpoll_reflect - update variables and update poll time | ||
110 | * @dev: the CPU | ||
111 | * @index: the index of actual entered state | ||
112 | */ | ||
113 | static void haltpoll_reflect(struct cpuidle_device *dev, int index) | ||
114 | { | ||
115 | dev->last_state_idx = index; | ||
116 | |||
117 | if (index != 0) | ||
118 | adjust_poll_limit(dev, dev->last_residency); | ||
119 | } | ||
120 | |||
121 | /** | ||
122 | * haltpoll_enable_device - scans a CPU's states and does setup | ||
123 | * @drv: cpuidle driver | ||
124 | * @dev: the CPU | ||
125 | */ | ||
126 | static int haltpoll_enable_device(struct cpuidle_driver *drv, | ||
127 | struct cpuidle_device *dev) | ||
128 | { | ||
129 | dev->poll_limit_ns = 0; | ||
130 | |||
131 | return 0; | ||
132 | } | ||
133 | |||
134 | static struct cpuidle_governor haltpoll_governor = { | ||
135 | .name = "haltpoll", | ||
136 | .rating = 21, | ||
137 | .enable = haltpoll_enable_device, | ||
138 | .select = haltpoll_select, | ||
139 | .reflect = haltpoll_reflect, | ||
140 | }; | ||
141 | |||
142 | static int __init init_haltpoll(void) | ||
143 | { | ||
144 | if (kvm_para_available()) | ||
145 | return cpuidle_register_governor(&haltpoll_governor); | ||
146 | |||
147 | return 0; | ||
148 | } | ||
149 | |||
150 | postcore_initcall(init_haltpoll); | ||