aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJuri Lelli <juri.lelli@gmail.com>2013-11-07 08:43:47 -0500
committerIngo Molnar <mingo@kernel.org>2014-01-13 07:46:46 -0500
commit6bfd6d72f51c51177676f2b1ba113fe0a85fdae4 (patch)
tree8c3c4c49f18ba3218da4274623b50da0a317f2d6
parent332ac17ef5bfcff4766dfdfd3b4cdf10b8f8f155 (diff)
sched/deadline: speed up SCHED_DEADLINE pushes with a push-heap
Data from tests confirmed that the original active load balancing logic didn't scale neither in the number of CPU nor in the number of tasks (as sched_rt does). Here we provide a global data structure to keep track of deadlines of the running tasks in the system. The structure is composed by a bitmask showing the free CPUs and a max-heap, needed when the system is heavily loaded. The implementation and concurrent access scheme are kept simple by design. However, our measurements show that we can compete with sched_rt on large multi-CPUs machines [1]. Only the push path is addressed, the extension to use this structure also for pull decisions is straightforward. However, we are currently evaluating different (in order to decrease/avoid contention) data structures to solve possibly both problems. We are also going to re-run tests considering recent changes inside cpupri [2]. [1] http://retis.sssup.it/~jlelli/papers/Ospert11Lelli.pdf [2] http://www.spinics.net/lists/linux-rt-users/msg06778.html Signed-off-by: Juri Lelli <juri.lelli@gmail.com> Signed-off-by: Peter Zijlstra <peterz@infradead.org> Link: http://lkml.kernel.org/r/1383831828-15501-14-git-send-email-juri.lelli@gmail.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--kernel/sched/Makefile2
-rw-r--r--kernel/sched/core.c3
-rw-r--r--kernel/sched/cpudeadline.c216
-rw-r--r--kernel/sched/cpudeadline.h33
-rw-r--r--kernel/sched/deadline.c53
-rw-r--r--kernel/sched/sched.h2
6 files changed, 269 insertions, 40 deletions
diff --git a/kernel/sched/Makefile b/kernel/sched/Makefile
index b039035a9376..9a95c8c2af2a 100644
--- a/kernel/sched/Makefile
+++ b/kernel/sched/Makefile
@@ -14,7 +14,7 @@ endif
14obj-y += core.o proc.o clock.o cputime.o 14obj-y += core.o proc.o clock.o cputime.o
15obj-y += idle_task.o fair.o rt.o deadline.o stop_task.o 15obj-y += idle_task.o fair.o rt.o deadline.o stop_task.o
16obj-y += wait.o completion.o 16obj-y += wait.o completion.o
17obj-$(CONFIG_SMP) += cpupri.o 17obj-$(CONFIG_SMP) += cpupri.o cpudeadline.o
18obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o 18obj-$(CONFIG_SCHED_AUTOGROUP) += auto_group.o
19obj-$(CONFIG_SCHEDSTATS) += stats.o 19obj-$(CONFIG_SCHEDSTATS) += stats.o
20obj-$(CONFIG_SCHED_DEBUG) += debug.o 20obj-$(CONFIG_SCHED_DEBUG) += debug.o
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index c7c68e6b5c51..e30356d6b31f 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -5287,6 +5287,7 @@ static void free_rootdomain(struct rcu_head *rcu)
5287 struct root_domain *rd = container_of(rcu, struct root_domain, rcu); 5287 struct root_domain *rd = container_of(rcu, struct root_domain, rcu);
5288 5288
5289 cpupri_cleanup(&rd->cpupri); 5289 cpupri_cleanup(&rd->cpupri);
5290 cpudl_cleanup(&rd->cpudl);
5290 free_cpumask_var(rd->dlo_mask); 5291 free_cpumask_var(rd->dlo_mask);
5291 free_cpumask_var(rd->rto_mask); 5292 free_cpumask_var(rd->rto_mask);
5292 free_cpumask_var(rd->online); 5293 free_cpumask_var(rd->online);
@@ -5345,6 +5346,8 @@ static int init_rootdomain(struct root_domain *rd)
5345 goto free_dlo_mask; 5346 goto free_dlo_mask;
5346 5347
5347 init_dl_bw(&rd->dl_bw); 5348 init_dl_bw(&rd->dl_bw);
5349 if (cpudl_init(&rd->cpudl) != 0)
5350 goto free_dlo_mask;
5348 5351
5349 if (cpupri_init(&rd->cpupri) != 0) 5352 if (cpupri_init(&rd->cpupri) != 0)
5350 goto free_rto_mask; 5353 goto free_rto_mask;
diff --git a/kernel/sched/cpudeadline.c b/kernel/sched/cpudeadline.c
new file mode 100644
index 000000000000..3bcade554343
--- /dev/null
+++ b/kernel/sched/cpudeadline.c
@@ -0,0 +1,216 @@
1/*
2 * kernel/sched/cpudl.c
3 *
4 * Global CPU deadline management
5 *
6 * Author: Juri Lelli <j.lelli@sssup.it>
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public License
10 * as published by the Free Software Foundation; version 2
11 * of the License.
12 */
13
14#include <linux/gfp.h>
15#include <linux/kernel.h>
16#include "cpudeadline.h"
17
18static inline int parent(int i)
19{
20 return (i - 1) >> 1;
21}
22
23static inline int left_child(int i)
24{
25 return (i << 1) + 1;
26}
27
28static inline int right_child(int i)
29{
30 return (i << 1) + 2;
31}
32
33static inline int dl_time_before(u64 a, u64 b)
34{
35 return (s64)(a - b) < 0;
36}
37
38void cpudl_exchange(struct cpudl *cp, int a, int b)
39{
40 int cpu_a = cp->elements[a].cpu, cpu_b = cp->elements[b].cpu;
41
42 swap(cp->elements[a], cp->elements[b]);
43 swap(cp->cpu_to_idx[cpu_a], cp->cpu_to_idx[cpu_b]);
44}
45
46void cpudl_heapify(struct cpudl *cp, int idx)
47{
48 int l, r, largest;
49
50 /* adapted from lib/prio_heap.c */
51 while(1) {
52 l = left_child(idx);
53 r = right_child(idx);
54 largest = idx;
55
56 if ((l < cp->size) && dl_time_before(cp->elements[idx].dl,
57 cp->elements[l].dl))
58 largest = l;
59 if ((r < cp->size) && dl_time_before(cp->elements[largest].dl,
60 cp->elements[r].dl))
61 largest = r;
62 if (largest == idx)
63 break;
64
65 /* Push idx down the heap one level and bump one up */
66 cpudl_exchange(cp, largest, idx);
67 idx = largest;
68 }
69}
70
71void cpudl_change_key(struct cpudl *cp, int idx, u64 new_dl)
72{
73 WARN_ON(idx > num_present_cpus() || idx == IDX_INVALID);
74
75 if (dl_time_before(new_dl, cp->elements[idx].dl)) {
76 cp->elements[idx].dl = new_dl;
77 cpudl_heapify(cp, idx);
78 } else {
79 cp->elements[idx].dl = new_dl;
80 while (idx > 0 && dl_time_before(cp->elements[parent(idx)].dl,
81 cp->elements[idx].dl)) {
82 cpudl_exchange(cp, idx, parent(idx));
83 idx = parent(idx);
84 }
85 }
86}
87
88static inline int cpudl_maximum(struct cpudl *cp)
89{
90 return cp->elements[0].cpu;
91}
92
93/*
94 * cpudl_find - find the best (later-dl) CPU in the system
95 * @cp: the cpudl max-heap context
96 * @p: the task
97 * @later_mask: a mask to fill in with the selected CPUs (or NULL)
98 *
99 * Returns: int - best CPU (heap maximum if suitable)
100 */
101int cpudl_find(struct cpudl *cp, struct task_struct *p,
102 struct cpumask *later_mask)
103{
104 int best_cpu = -1;
105 const struct sched_dl_entity *dl_se = &p->dl;
106
107 if (later_mask && cpumask_and(later_mask, cp->free_cpus,
108 &p->cpus_allowed) && cpumask_and(later_mask,
109 later_mask, cpu_active_mask)) {
110 best_cpu = cpumask_any(later_mask);
111 goto out;
112 } else if (cpumask_test_cpu(cpudl_maximum(cp), &p->cpus_allowed) &&
113 dl_time_before(dl_se->deadline, cp->elements[0].dl)) {
114 best_cpu = cpudl_maximum(cp);
115 if (later_mask)
116 cpumask_set_cpu(best_cpu, later_mask);
117 }
118
119out:
120 WARN_ON(best_cpu > num_present_cpus() && best_cpu != -1);
121
122 return best_cpu;
123}
124
125/*
126 * cpudl_set - update the cpudl max-heap
127 * @cp: the cpudl max-heap context
128 * @cpu: the target cpu
129 * @dl: the new earliest deadline for this cpu
130 *
131 * Notes: assumes cpu_rq(cpu)->lock is locked
132 *
133 * Returns: (void)
134 */
135void cpudl_set(struct cpudl *cp, int cpu, u64 dl, int is_valid)
136{
137 int old_idx, new_cpu;
138 unsigned long flags;
139
140 WARN_ON(cpu > num_present_cpus());
141
142 raw_spin_lock_irqsave(&cp->lock, flags);
143 old_idx = cp->cpu_to_idx[cpu];
144 if (!is_valid) {
145 /* remove item */
146 if (old_idx == IDX_INVALID) {
147 /*
148 * Nothing to remove if old_idx was invalid.
149 * This could happen if a rq_offline_dl is
150 * called for a CPU without -dl tasks running.
151 */
152 goto out;
153 }
154 new_cpu = cp->elements[cp->size - 1].cpu;
155 cp->elements[old_idx].dl = cp->elements[cp->size - 1].dl;
156 cp->elements[old_idx].cpu = new_cpu;
157 cp->size--;
158 cp->cpu_to_idx[new_cpu] = old_idx;
159 cp->cpu_to_idx[cpu] = IDX_INVALID;
160 while (old_idx > 0 && dl_time_before(
161 cp->elements[parent(old_idx)].dl,
162 cp->elements[old_idx].dl)) {
163 cpudl_exchange(cp, old_idx, parent(old_idx));
164 old_idx = parent(old_idx);
165 }
166 cpumask_set_cpu(cpu, cp->free_cpus);
167 cpudl_heapify(cp, old_idx);
168
169 goto out;
170 }
171
172 if (old_idx == IDX_INVALID) {
173 cp->size++;
174 cp->elements[cp->size - 1].dl = 0;
175 cp->elements[cp->size - 1].cpu = cpu;
176 cp->cpu_to_idx[cpu] = cp->size - 1;
177 cpudl_change_key(cp, cp->size - 1, dl);
178 cpumask_clear_cpu(cpu, cp->free_cpus);
179 } else {
180 cpudl_change_key(cp, old_idx, dl);
181 }
182
183out:
184 raw_spin_unlock_irqrestore(&cp->lock, flags);
185}
186
187/*
188 * cpudl_init - initialize the cpudl structure
189 * @cp: the cpudl max-heap context
190 */
191int cpudl_init(struct cpudl *cp)
192{
193 int i;
194
195 memset(cp, 0, sizeof(*cp));
196 raw_spin_lock_init(&cp->lock);
197 cp->size = 0;
198 for (i = 0; i < NR_CPUS; i++)
199 cp->cpu_to_idx[i] = IDX_INVALID;
200 if (!alloc_cpumask_var(&cp->free_cpus, GFP_KERNEL))
201 return -ENOMEM;
202 cpumask_setall(cp->free_cpus);
203
204 return 0;
205}
206
207/*
208 * cpudl_cleanup - clean up the cpudl structure
209 * @cp: the cpudl max-heap context
210 */
211void cpudl_cleanup(struct cpudl *cp)
212{
213 /*
214 * nothing to do for the moment
215 */
216}
diff --git a/kernel/sched/cpudeadline.h b/kernel/sched/cpudeadline.h
new file mode 100644
index 000000000000..a202789a412c
--- /dev/null
+++ b/kernel/sched/cpudeadline.h
@@ -0,0 +1,33 @@
1#ifndef _LINUX_CPUDL_H
2#define _LINUX_CPUDL_H
3
4#include <linux/sched.h>
5
6#define IDX_INVALID -1
7
8struct array_item {
9 u64 dl;
10 int cpu;
11};
12
13struct cpudl {
14 raw_spinlock_t lock;
15 int size;
16 int cpu_to_idx[NR_CPUS];
17 struct array_item elements[NR_CPUS];
18 cpumask_var_t free_cpus;
19};
20
21
22#ifdef CONFIG_SMP
23int cpudl_find(struct cpudl *cp, struct task_struct *p,
24 struct cpumask *later_mask);
25void cpudl_set(struct cpudl *cp, int cpu, u64 dl, int is_valid);
26int cpudl_init(struct cpudl *cp);
27void cpudl_cleanup(struct cpudl *cp);
28#else
29#define cpudl_set(cp, cpu, dl) do { } while (0)
30#define cpudl_init() do { } while (0)
31#endif /* CONFIG_SMP */
32
33#endif /* _LINUX_CPUDL_H */
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 802188fb6338..0c6b1d089cd4 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -16,6 +16,8 @@
16 */ 16 */
17#include "sched.h" 17#include "sched.h"
18 18
19#include <linux/slab.h>
20
19struct dl_bandwidth def_dl_bandwidth; 21struct dl_bandwidth def_dl_bandwidth;
20 22
21static inline struct task_struct *dl_task_of(struct sched_dl_entity *dl_se) 23static inline struct task_struct *dl_task_of(struct sched_dl_entity *dl_se)
@@ -640,6 +642,7 @@ static void inc_dl_deadline(struct dl_rq *dl_rq, u64 deadline)
640 */ 642 */
641 dl_rq->earliest_dl.next = dl_rq->earliest_dl.curr; 643 dl_rq->earliest_dl.next = dl_rq->earliest_dl.curr;
642 dl_rq->earliest_dl.curr = deadline; 644 dl_rq->earliest_dl.curr = deadline;
645 cpudl_set(&rq->rd->cpudl, rq->cpu, deadline, 1);
643 } else if (dl_rq->earliest_dl.next == 0 || 646 } else if (dl_rq->earliest_dl.next == 0 ||
644 dl_time_before(deadline, dl_rq->earliest_dl.next)) { 647 dl_time_before(deadline, dl_rq->earliest_dl.next)) {
645 /* 648 /*
@@ -663,6 +666,7 @@ static void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline)
663 if (!dl_rq->dl_nr_running) { 666 if (!dl_rq->dl_nr_running) {
664 dl_rq->earliest_dl.curr = 0; 667 dl_rq->earliest_dl.curr = 0;
665 dl_rq->earliest_dl.next = 0; 668 dl_rq->earliest_dl.next = 0;
669 cpudl_set(&rq->rd->cpudl, rq->cpu, 0, 0);
666 } else { 670 } else {
667 struct rb_node *leftmost = dl_rq->rb_leftmost; 671 struct rb_node *leftmost = dl_rq->rb_leftmost;
668 struct sched_dl_entity *entry; 672 struct sched_dl_entity *entry;
@@ -670,6 +674,7 @@ static void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline)
670 entry = rb_entry(leftmost, struct sched_dl_entity, rb_node); 674 entry = rb_entry(leftmost, struct sched_dl_entity, rb_node);
671 dl_rq->earliest_dl.curr = entry->deadline; 675 dl_rq->earliest_dl.curr = entry->deadline;
672 dl_rq->earliest_dl.next = next_deadline(rq); 676 dl_rq->earliest_dl.next = next_deadline(rq);
677 cpudl_set(&rq->rd->cpudl, rq->cpu, entry->deadline, 1);
673 } 678 }
674} 679}
675 680
@@ -855,9 +860,6 @@ static void yield_task_dl(struct rq *rq)
855#ifdef CONFIG_SMP 860#ifdef CONFIG_SMP
856 861
857static int find_later_rq(struct task_struct *task); 862static int find_later_rq(struct task_struct *task);
858static int latest_cpu_find(struct cpumask *span,
859 struct task_struct *task,
860 struct cpumask *later_mask);
861 863
862static int 864static int
863select_task_rq_dl(struct task_struct *p, int cpu, int sd_flag, int flags) 865select_task_rq_dl(struct task_struct *p, int cpu, int sd_flag, int flags)
@@ -904,7 +906,7 @@ static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p)
904 * let's hope p can move out. 906 * let's hope p can move out.
905 */ 907 */
906 if (rq->curr->nr_cpus_allowed == 1 || 908 if (rq->curr->nr_cpus_allowed == 1 ||
907 latest_cpu_find(rq->rd->span, rq->curr, NULL) == -1) 909 cpudl_find(&rq->rd->cpudl, rq->curr, NULL) == -1)
908 return; 910 return;
909 911
910 /* 912 /*
@@ -912,7 +914,7 @@ static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p)
912 * see if it is pushed or pulled somewhere else. 914 * see if it is pushed or pulled somewhere else.
913 */ 915 */
914 if (p->nr_cpus_allowed != 1 && 916 if (p->nr_cpus_allowed != 1 &&
915 latest_cpu_find(rq->rd->span, p, NULL) != -1) 917 cpudl_find(&rq->rd->cpudl, p, NULL) != -1)
916 return; 918 return;
917 919
918 resched_task(rq->curr); 920 resched_task(rq->curr);
@@ -1085,39 +1087,6 @@ next_node:
1085 return NULL; 1087 return NULL;
1086} 1088}
1087 1089
1088static int latest_cpu_find(struct cpumask *span,
1089 struct task_struct *task,
1090 struct cpumask *later_mask)
1091{
1092 const struct sched_dl_entity *dl_se = &task->dl;
1093 int cpu, found = -1, best = 0;
1094 u64 max_dl = 0;
1095
1096 for_each_cpu(cpu, span) {
1097 struct rq *rq = cpu_rq(cpu);
1098 struct dl_rq *dl_rq = &rq->dl;
1099
1100 if (cpumask_test_cpu(cpu, &task->cpus_allowed) &&
1101 (!dl_rq->dl_nr_running || dl_time_before(dl_se->deadline,
1102 dl_rq->earliest_dl.curr))) {
1103 if (later_mask)
1104 cpumask_set_cpu(cpu, later_mask);
1105 if (!best && !dl_rq->dl_nr_running) {
1106 best = 1;
1107 found = cpu;
1108 } else if (!best &&
1109 dl_time_before(max_dl,
1110 dl_rq->earliest_dl.curr)) {
1111 max_dl = dl_rq->earliest_dl.curr;
1112 found = cpu;
1113 }
1114 } else if (later_mask)
1115 cpumask_clear_cpu(cpu, later_mask);
1116 }
1117
1118 return found;
1119}
1120
1121static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask_dl); 1090static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask_dl);
1122 1091
1123static int find_later_rq(struct task_struct *task) 1092static int find_later_rq(struct task_struct *task)
@@ -1134,7 +1103,8 @@ static int find_later_rq(struct task_struct *task)
1134 if (task->nr_cpus_allowed == 1) 1103 if (task->nr_cpus_allowed == 1)
1135 return -1; 1104 return -1;
1136 1105
1137 best_cpu = latest_cpu_find(task_rq(task)->rd->span, task, later_mask); 1106 best_cpu = cpudl_find(&task_rq(task)->rd->cpudl,
1107 task, later_mask);
1138 if (best_cpu == -1) 1108 if (best_cpu == -1)
1139 return -1; 1109 return -1;
1140 1110
@@ -1510,6 +1480,9 @@ static void rq_online_dl(struct rq *rq)
1510{ 1480{
1511 if (rq->dl.overloaded) 1481 if (rq->dl.overloaded)
1512 dl_set_overload(rq); 1482 dl_set_overload(rq);
1483
1484 if (rq->dl.dl_nr_running > 0)
1485 cpudl_set(&rq->rd->cpudl, rq->cpu, rq->dl.earliest_dl.curr, 1);
1513} 1486}
1514 1487
1515/* Assumes rq->lock is held */ 1488/* Assumes rq->lock is held */
@@ -1517,6 +1490,8 @@ static void rq_offline_dl(struct rq *rq)
1517{ 1490{
1518 if (rq->dl.overloaded) 1491 if (rq->dl.overloaded)
1519 dl_clear_overload(rq); 1492 dl_clear_overload(rq);
1493
1494 cpudl_set(&rq->rd->cpudl, rq->cpu, 0, 0);
1520} 1495}
1521 1496
1522void init_sched_dl_class(void) 1497void init_sched_dl_class(void)
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index ad4f4fbd002e..2b7421db6c41 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -10,6 +10,7 @@
10#include <linux/slab.h> 10#include <linux/slab.h>
11 11
12#include "cpupri.h" 12#include "cpupri.h"
13#include "cpudeadline.h"
13#include "cpuacct.h" 14#include "cpuacct.h"
14 15
15struct rq; 16struct rq;
@@ -503,6 +504,7 @@ struct root_domain {
503 cpumask_var_t dlo_mask; 504 cpumask_var_t dlo_mask;
504 atomic_t dlo_count; 505 atomic_t dlo_count;
505 struct dl_bw dl_bw; 506 struct dl_bw dl_bw;
507 struct cpudl cpudl;
506 508
507 /* 509 /*
508 * The "RT overload" flag: it gets set if a CPU has more than 510 * The "RT overload" flag: it gets set if a CPU has more than