summaryrefslogtreecommitdiffstats
path: root/drivers/cpuidle
diff options
context:
space:
mode:
authorMarcelo Tosatti <mtosatti@redhat.com>2019-07-03 19:51:28 -0400
committerRafael J. Wysocki <rafael.j.wysocki@intel.com>2019-07-30 11:27:37 -0400
commit2cffe9f6b96fece065ee8522673c90e92ef2085d (patch)
tree9d8f97f6710e7abfd7698705ba75bad51401be6f /drivers/cpuidle
parent7d4daeedd575bbc3c40c87fc6708a8b88c50fe7e (diff)
cpuidle: add haltpoll governor
The cpuidle_haltpoll governor, in conjunction with the haltpoll cpuidle driver, allows guest vcpus to poll for a specified amount of time before halting. This provides the following benefits to host side polling: 1) The POLL flag is set while polling is performed, which allows a remote vCPU to avoid sending an IPI (and the associated cost of handling the IPI) when performing a wakeup. 2) The VM-exit cost can be avoided. The downside of guest side polling is that polling is performed even with other runnable tasks in the host. Results comparing halt_poll_ns and server/client application where a small packet is ping-ponged: host --> 31.33 halt_poll_ns=300000 / no guest busy spin --> 33.40 (93.8%) halt_poll_ns=0 / guest_halt_poll_ns=300000 --> 32.73 (95.7%) For the SAP HANA benchmarks (where idle_spin is a parameter of the previous version of the patch, results should be the same): hpns == halt_poll_ns idle_spin=0/ idle_spin=800/ idle_spin=0/ hpns=200000 hpns=0 hpns=800000 DeleteC06T03 (100 thread) 1.76 1.71 (-3%) 1.78 (+1%) InsertC16T02 (100 thread) 2.14 2.07 (-3%) 2.18 (+1.8%) DeleteC00T01 (1 thread) 1.34 1.28 (-4.5%) 1.29 (-3.7%) UpdateC00T03 (1 thread) 4.72 4.18 (-12%) 4.53 (-5%) Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com> Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Diffstat (limited to 'drivers/cpuidle')
-rw-r--r--drivers/cpuidle/Kconfig11
-rw-r--r--drivers/cpuidle/governors/Makefile1
-rw-r--r--drivers/cpuidle/governors/haltpoll.c150
3 files changed, 162 insertions, 0 deletions
diff --git a/drivers/cpuidle/Kconfig b/drivers/cpuidle/Kconfig
index cc8efc56be7d..88727b7c0d59 100644
--- a/drivers/cpuidle/Kconfig
+++ b/drivers/cpuidle/Kconfig
@@ -33,6 +33,17 @@ config CPU_IDLE_GOV_TEO
33 Some workloads benefit from using it and it generally should be safe 33 Some workloads benefit from using it and it generally should be safe
34 to use. Say Y here if you are not happy with the alternatives. 34 to use. Say Y here if you are not happy with the alternatives.
35 35
36config CPU_IDLE_GOV_HALTPOLL
37 bool "Haltpoll governor (for virtualized systems)"
38 depends on KVM_GUEST
39 help
40 This governor implements haltpoll idle state selection, to be
41 used in conjunction with the haltpoll cpuidle driver, allowing
42 for polling for a certain amount of time before entering idle
43 state.
44
45 Some virtualized workloads benefit from using it.
46
36config DT_IDLE_STATES 47config DT_IDLE_STATES
37 bool 48 bool
38 49
diff --git a/drivers/cpuidle/governors/Makefile b/drivers/cpuidle/governors/Makefile
index 42f44cc610dd..63abb5393a4d 100644
--- a/drivers/cpuidle/governors/Makefile
+++ b/drivers/cpuidle/governors/Makefile
@@ -6,3 +6,4 @@
6obj-$(CONFIG_CPU_IDLE_GOV_LADDER) += ladder.o 6obj-$(CONFIG_CPU_IDLE_GOV_LADDER) += ladder.o
7obj-$(CONFIG_CPU_IDLE_GOV_MENU) += menu.o 7obj-$(CONFIG_CPU_IDLE_GOV_MENU) += menu.o
8obj-$(CONFIG_CPU_IDLE_GOV_TEO) += teo.o 8obj-$(CONFIG_CPU_IDLE_GOV_TEO) += teo.o
9obj-$(CONFIG_CPU_IDLE_GOV_HALTPOLL) += haltpoll.o
diff --git a/drivers/cpuidle/governors/haltpoll.c b/drivers/cpuidle/governors/haltpoll.c
new file mode 100644
index 000000000000..797477bda486
--- /dev/null
+++ b/drivers/cpuidle/governors/haltpoll.c
@@ -0,0 +1,150 @@
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * haltpoll.c - haltpoll idle governor
4 *
5 * Copyright 2019 Red Hat, Inc. and/or its affiliates.
6 *
7 * This work is licensed under the terms of the GNU GPL, version 2. See
8 * the COPYING file in the top-level directory.
9 *
10 * Authors: Marcelo Tosatti <mtosatti@redhat.com>
11 */
12
13#include <linux/kernel.h>
14#include <linux/cpuidle.h>
15#include <linux/time.h>
16#include <linux/ktime.h>
17#include <linux/hrtimer.h>
18#include <linux/tick.h>
19#include <linux/sched.h>
20#include <linux/module.h>
21#include <linux/kvm_para.h>
22
23static unsigned int guest_halt_poll_ns __read_mostly = 200000;
24module_param(guest_halt_poll_ns, uint, 0644);
25
26/* division factor to shrink halt_poll_ns */
27static unsigned int guest_halt_poll_shrink __read_mostly = 2;
28module_param(guest_halt_poll_shrink, uint, 0644);
29
30/* multiplication factor to grow per-cpu poll_limit_ns */
31static unsigned int guest_halt_poll_grow __read_mostly = 2;
32module_param(guest_halt_poll_grow, uint, 0644);
33
34/* value in us to start growing per-cpu halt_poll_ns */
35static unsigned int guest_halt_poll_grow_start __read_mostly = 50000;
36module_param(guest_halt_poll_grow_start, uint, 0644);
37
38/* allow shrinking guest halt poll */
39static bool guest_halt_poll_allow_shrink __read_mostly = true;
40module_param(guest_halt_poll_allow_shrink, bool, 0644);
41
42/**
43 * haltpoll_select - selects the next idle state to enter
44 * @drv: cpuidle driver containing state data
45 * @dev: the CPU
46 * @stop_tick: indication on whether or not to stop the tick
47 */
48static int haltpoll_select(struct cpuidle_driver *drv,
49 struct cpuidle_device *dev,
50 bool *stop_tick)
51{
52 int latency_req = cpuidle_governor_latency_req(dev->cpu);
53
54 if (!drv->state_count || latency_req == 0) {
55 *stop_tick = false;
56 return 0;
57 }
58
59 if (dev->poll_limit_ns == 0)
60 return 1;
61
62 /* Last state was poll? */
63 if (dev->last_state_idx == 0) {
64 /* Halt if no event occurred on poll window */
65 if (dev->poll_time_limit == true)
66 return 1;
67
68 *stop_tick = false;
69 /* Otherwise, poll again */
70 return 0;
71 }
72
73 *stop_tick = false;
74 /* Last state was halt: poll */
75 return 0;
76}
77
78static void adjust_poll_limit(struct cpuidle_device *dev, unsigned int block_us)
79{
80 unsigned int val;
81 u64 block_ns = block_us*NSEC_PER_USEC;
82
83 /* Grow cpu_halt_poll_us if
84 * cpu_halt_poll_us < block_ns < guest_halt_poll_us
85 */
86 if (block_ns > dev->poll_limit_ns && block_ns <= guest_halt_poll_ns) {
87 val = dev->poll_limit_ns * guest_halt_poll_grow;
88
89 if (val < guest_halt_poll_grow_start)
90 val = guest_halt_poll_grow_start;
91 if (val > guest_halt_poll_ns)
92 val = guest_halt_poll_ns;
93
94 dev->poll_limit_ns = val;
95 } else if (block_ns > guest_halt_poll_ns &&
96 guest_halt_poll_allow_shrink) {
97 unsigned int shrink = guest_halt_poll_shrink;
98
99 val = dev->poll_limit_ns;
100 if (shrink == 0)
101 val = 0;
102 else
103 val /= shrink;
104 dev->poll_limit_ns = val;
105 }
106}
107
108/**
109 * haltpoll_reflect - update variables and update poll time
110 * @dev: the CPU
111 * @index: the index of actual entered state
112 */
113static void haltpoll_reflect(struct cpuidle_device *dev, int index)
114{
115 dev->last_state_idx = index;
116
117 if (index != 0)
118 adjust_poll_limit(dev, dev->last_residency);
119}
120
121/**
122 * haltpoll_enable_device - scans a CPU's states and does setup
123 * @drv: cpuidle driver
124 * @dev: the CPU
125 */
126static int haltpoll_enable_device(struct cpuidle_driver *drv,
127 struct cpuidle_device *dev)
128{
129 dev->poll_limit_ns = 0;
130
131 return 0;
132}
133
134static struct cpuidle_governor haltpoll_governor = {
135 .name = "haltpoll",
136 .rating = 21,
137 .enable = haltpoll_enable_device,
138 .select = haltpoll_select,
139 .reflect = haltpoll_reflect,
140};
141
142static int __init init_haltpoll(void)
143{
144 if (kvm_para_available())
145 return cpuidle_register_governor(&haltpoll_governor);
146
147 return 0;
148}
149
150postcore_initcall(init_haltpoll);