aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTim Chen <tim.c.chen@linux.intel.com>2011-02-11 15:49:04 -0500
committerLen Brown <len.brown@intel.com>2011-05-29 00:50:59 -0400
commit333c5ae9948194428fe6c5ef5c088304fc98263b (patch)
tree83d1cf3a781642e2c366086e0b9e244a7b60fae5
parent7467571f4480b273007517b26297c07154c73924 (diff)
idle governor: Avoid lock acquisition to read pm_qos before entering idle
Thanks to the reviews and comments by Rafael, James, Mark and Andi. Here's version 2 of the patch incorporating your comments and also some update to my previous patch comments. I noticed that before entering idle state, the menu idle governor will look up the current pm_qos target value according to the list of qos requests received. This look up currently needs the acquisition of a lock to access the list of qos requests to find the qos target value, slowing down the entrance into idle state due to contention by multiple cpus to access this list. The contention is severe when there are a lot of cpus waking and going into idle. For example, for a simple workload that has 32 pair of processes ping ponging messages to each other, where 64 cpu cores are active in test system, I see the following profile with 37.82% of cpu cycles spent in contention of pm_qos_lock: - 37.82% swapper [kernel.kallsyms] [k] _raw_spin_lock_irqsave - _raw_spin_lock_irqsave - 95.65% pm_qos_request menu_select cpuidle_idle_call - cpu_idle 99.98% start_secondary A better approach will be to cache the updated pm_qos target value so reading it does not require lock acquisition as in the patch below. With this patch the contention for pm_qos_lock is removed and I saw a 2.2X increase in throughput for my message passing workload. cc: stable@kernel.org Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com> Acked-by: Andi Kleen <ak@linux.intel.com> Acked-by: James Bottomley <James.Bottomley@suse.de> Acked-by: mark gross <markgross@thegnar.org> Signed-off-by: Len Brown <len.brown@intel.com>
-rw-r--r--include/linux/pm_qos_params.h4
-rw-r--r--kernel/pm_qos_params.c37
2 files changed, 29 insertions, 12 deletions
diff --git a/include/linux/pm_qos_params.h b/include/linux/pm_qos_params.h
index 77cbddb3784c..a7d87f911cab 100644
--- a/include/linux/pm_qos_params.h
+++ b/include/linux/pm_qos_params.h
@@ -16,6 +16,10 @@
16#define PM_QOS_NUM_CLASSES 4 16#define PM_QOS_NUM_CLASSES 4
17#define PM_QOS_DEFAULT_VALUE -1 17#define PM_QOS_DEFAULT_VALUE -1
18 18
19#define PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE (2000 * USEC_PER_SEC)
20#define PM_QOS_NETWORK_LAT_DEFAULT_VALUE (2000 * USEC_PER_SEC)
21#define PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE 0
22
19struct pm_qos_request_list { 23struct pm_qos_request_list {
20 struct plist_node list; 24 struct plist_node list;
21 int pm_qos_class; 25 int pm_qos_class;
diff --git a/kernel/pm_qos_params.c b/kernel/pm_qos_params.c
index aeaa7f846821..6a8fad82a3ad 100644
--- a/kernel/pm_qos_params.c
+++ b/kernel/pm_qos_params.c
@@ -53,11 +53,17 @@ enum pm_qos_type {
53 PM_QOS_MIN /* return the smallest value */ 53 PM_QOS_MIN /* return the smallest value */
54}; 54};
55 55
56/*
57 * Note: The lockless read path depends on the CPU accessing
58 * target_value atomically. Atomic access is only guaranteed on all CPU
59 * types linux supports for 32 bit quantites
60 */
56struct pm_qos_object { 61struct pm_qos_object {
57 struct plist_head requests; 62 struct plist_head requests;
58 struct blocking_notifier_head *notifiers; 63 struct blocking_notifier_head *notifiers;
59 struct miscdevice pm_qos_power_miscdev; 64 struct miscdevice pm_qos_power_miscdev;
60 char *name; 65 char *name;
66 s32 target_value; /* Do not change to 64 bit */
61 s32 default_value; 67 s32 default_value;
62 enum pm_qos_type type; 68 enum pm_qos_type type;
63}; 69};
@@ -70,7 +76,8 @@ static struct pm_qos_object cpu_dma_pm_qos = {
70 .requests = PLIST_HEAD_INIT(cpu_dma_pm_qos.requests, pm_qos_lock), 76 .requests = PLIST_HEAD_INIT(cpu_dma_pm_qos.requests, pm_qos_lock),
71 .notifiers = &cpu_dma_lat_notifier, 77 .notifiers = &cpu_dma_lat_notifier,
72 .name = "cpu_dma_latency", 78 .name = "cpu_dma_latency",
73 .default_value = 2000 * USEC_PER_SEC, 79 .target_value = PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE,
80 .default_value = PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE,
74 .type = PM_QOS_MIN, 81 .type = PM_QOS_MIN,
75}; 82};
76 83
@@ -79,7 +86,8 @@ static struct pm_qos_object network_lat_pm_qos = {
79 .requests = PLIST_HEAD_INIT(network_lat_pm_qos.requests, pm_qos_lock), 86 .requests = PLIST_HEAD_INIT(network_lat_pm_qos.requests, pm_qos_lock),
80 .notifiers = &network_lat_notifier, 87 .notifiers = &network_lat_notifier,
81 .name = "network_latency", 88 .name = "network_latency",
82 .default_value = 2000 * USEC_PER_SEC, 89 .target_value = PM_QOS_NETWORK_LAT_DEFAULT_VALUE,
90 .default_value = PM_QOS_NETWORK_LAT_DEFAULT_VALUE,
83 .type = PM_QOS_MIN 91 .type = PM_QOS_MIN
84}; 92};
85 93
@@ -89,7 +97,8 @@ static struct pm_qos_object network_throughput_pm_qos = {
89 .requests = PLIST_HEAD_INIT(network_throughput_pm_qos.requests, pm_qos_lock), 97 .requests = PLIST_HEAD_INIT(network_throughput_pm_qos.requests, pm_qos_lock),
90 .notifiers = &network_throughput_notifier, 98 .notifiers = &network_throughput_notifier,
91 .name = "network_throughput", 99 .name = "network_throughput",
92 .default_value = 0, 100 .target_value = PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE,
101 .default_value = PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE,
93 .type = PM_QOS_MAX, 102 .type = PM_QOS_MAX,
94}; 103};
95 104
@@ -132,6 +141,16 @@ static inline int pm_qos_get_value(struct pm_qos_object *o)
132 } 141 }
133} 142}
134 143
144static inline s32 pm_qos_read_value(struct pm_qos_object *o)
145{
146 return o->target_value;
147}
148
149static inline void pm_qos_set_value(struct pm_qos_object *o, s32 value)
150{
151 o->target_value = value;
152}
153
135static void update_target(struct pm_qos_object *o, struct plist_node *node, 154static void update_target(struct pm_qos_object *o, struct plist_node *node,
136 int del, int value) 155 int del, int value)
137{ 156{
@@ -156,6 +175,7 @@ static void update_target(struct pm_qos_object *o, struct plist_node *node,
156 plist_add(node, &o->requests); 175 plist_add(node, &o->requests);
157 } 176 }
158 curr_value = pm_qos_get_value(o); 177 curr_value = pm_qos_get_value(o);
178 pm_qos_set_value(o, curr_value);
159 spin_unlock_irqrestore(&pm_qos_lock, flags); 179 spin_unlock_irqrestore(&pm_qos_lock, flags);
160 180
161 if (prev_value != curr_value) 181 if (prev_value != curr_value)
@@ -190,18 +210,11 @@ static int find_pm_qos_object_by_minor(int minor)
190 * pm_qos_request - returns current system wide qos expectation 210 * pm_qos_request - returns current system wide qos expectation
191 * @pm_qos_class: identification of which qos value is requested 211 * @pm_qos_class: identification of which qos value is requested
192 * 212 *
193 * This function returns the current target value in an atomic manner. 213 * This function returns the current target value.
194 */ 214 */
195int pm_qos_request(int pm_qos_class) 215int pm_qos_request(int pm_qos_class)
196{ 216{
197 unsigned long flags; 217 return pm_qos_read_value(pm_qos_array[pm_qos_class]);
198 int value;
199
200 spin_lock_irqsave(&pm_qos_lock, flags);
201 value = pm_qos_get_value(pm_qos_array[pm_qos_class]);
202 spin_unlock_irqrestore(&pm_qos_lock, flags);
203
204 return value;
205} 218}
206EXPORT_SYMBOL_GPL(pm_qos_request); 219EXPORT_SYMBOL_GPL(pm_qos_request);
207 220