diff options
author | Juri Lelli <juri.lelli@gmail.com> | 2013-11-07 08:43:47 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2014-01-13 07:46:46 -0500 |
commit | 6bfd6d72f51c51177676f2b1ba113fe0a85fdae4 (patch) | |
tree | 8c3c4c49f18ba3218da4274623b50da0a317f2d6 /kernel/sched/deadline.c | |
parent | 332ac17ef5bfcff4766dfdfd3b4cdf10b8f8f155 (diff) |
sched/deadline: speed up SCHED_DEADLINE pushes with a push-heap
Data from tests confirmed that the original active load balancing
logic didn't scale neither in the number of CPU nor in the number of
tasks (as sched_rt does).
Here we provide a global data structure to keep track of deadlines
of the running tasks in the system. The structure is composed by
a bitmask showing the free CPUs and a max-heap, needed when the system
is heavily loaded.
The implementation and concurrent access scheme are kept simple by
design. However, our measurements show that we can compete with sched_rt
on large multi-CPUs machines [1].
Only the push path is addressed, the extension to use this structure
also for pull decisions is straightforward. However, we are currently
evaluating different (in order to decrease/avoid contention) data
structures to solve possibly both problems. We are also going to re-run
tests considering recent changes inside cpupri [2].
[1] http://retis.sssup.it/~jlelli/papers/Ospert11Lelli.pdf
[2] http://www.spinics.net/lists/linux-rt-users/msg06778.html
Signed-off-by: Juri Lelli <juri.lelli@gmail.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1383831828-15501-14-git-send-email-juri.lelli@gmail.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/sched/deadline.c')
-rw-r--r-- | kernel/sched/deadline.c | 53 |
1 files changed, 14 insertions, 39 deletions
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 802188fb6338..0c6b1d089cd4 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c | |||
@@ -16,6 +16,8 @@ | |||
16 | */ | 16 | */ |
17 | #include "sched.h" | 17 | #include "sched.h" |
18 | 18 | ||
19 | #include <linux/slab.h> | ||
20 | |||
19 | struct dl_bandwidth def_dl_bandwidth; | 21 | struct dl_bandwidth def_dl_bandwidth; |
20 | 22 | ||
21 | static inline struct task_struct *dl_task_of(struct sched_dl_entity *dl_se) | 23 | static inline struct task_struct *dl_task_of(struct sched_dl_entity *dl_se) |
@@ -640,6 +642,7 @@ static void inc_dl_deadline(struct dl_rq *dl_rq, u64 deadline) | |||
640 | */ | 642 | */ |
641 | dl_rq->earliest_dl.next = dl_rq->earliest_dl.curr; | 643 | dl_rq->earliest_dl.next = dl_rq->earliest_dl.curr; |
642 | dl_rq->earliest_dl.curr = deadline; | 644 | dl_rq->earliest_dl.curr = deadline; |
645 | cpudl_set(&rq->rd->cpudl, rq->cpu, deadline, 1); | ||
643 | } else if (dl_rq->earliest_dl.next == 0 || | 646 | } else if (dl_rq->earliest_dl.next == 0 || |
644 | dl_time_before(deadline, dl_rq->earliest_dl.next)) { | 647 | dl_time_before(deadline, dl_rq->earliest_dl.next)) { |
645 | /* | 648 | /* |
@@ -663,6 +666,7 @@ static void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline) | |||
663 | if (!dl_rq->dl_nr_running) { | 666 | if (!dl_rq->dl_nr_running) { |
664 | dl_rq->earliest_dl.curr = 0; | 667 | dl_rq->earliest_dl.curr = 0; |
665 | dl_rq->earliest_dl.next = 0; | 668 | dl_rq->earliest_dl.next = 0; |
669 | cpudl_set(&rq->rd->cpudl, rq->cpu, 0, 0); | ||
666 | } else { | 670 | } else { |
667 | struct rb_node *leftmost = dl_rq->rb_leftmost; | 671 | struct rb_node *leftmost = dl_rq->rb_leftmost; |
668 | struct sched_dl_entity *entry; | 672 | struct sched_dl_entity *entry; |
@@ -670,6 +674,7 @@ static void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline) | |||
670 | entry = rb_entry(leftmost, struct sched_dl_entity, rb_node); | 674 | entry = rb_entry(leftmost, struct sched_dl_entity, rb_node); |
671 | dl_rq->earliest_dl.curr = entry->deadline; | 675 | dl_rq->earliest_dl.curr = entry->deadline; |
672 | dl_rq->earliest_dl.next = next_deadline(rq); | 676 | dl_rq->earliest_dl.next = next_deadline(rq); |
677 | cpudl_set(&rq->rd->cpudl, rq->cpu, entry->deadline, 1); | ||
673 | } | 678 | } |
674 | } | 679 | } |
675 | 680 | ||
@@ -855,9 +860,6 @@ static void yield_task_dl(struct rq *rq) | |||
855 | #ifdef CONFIG_SMP | 860 | #ifdef CONFIG_SMP |
856 | 861 | ||
857 | static int find_later_rq(struct task_struct *task); | 862 | static int find_later_rq(struct task_struct *task); |
858 | static int latest_cpu_find(struct cpumask *span, | ||
859 | struct task_struct *task, | ||
860 | struct cpumask *later_mask); | ||
861 | 863 | ||
862 | static int | 864 | static int |
863 | select_task_rq_dl(struct task_struct *p, int cpu, int sd_flag, int flags) | 865 | select_task_rq_dl(struct task_struct *p, int cpu, int sd_flag, int flags) |
@@ -904,7 +906,7 @@ static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p) | |||
904 | * let's hope p can move out. | 906 | * let's hope p can move out. |
905 | */ | 907 | */ |
906 | if (rq->curr->nr_cpus_allowed == 1 || | 908 | if (rq->curr->nr_cpus_allowed == 1 || |
907 | latest_cpu_find(rq->rd->span, rq->curr, NULL) == -1) | 909 | cpudl_find(&rq->rd->cpudl, rq->curr, NULL) == -1) |
908 | return; | 910 | return; |
909 | 911 | ||
910 | /* | 912 | /* |
@@ -912,7 +914,7 @@ static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p) | |||
912 | * see if it is pushed or pulled somewhere else. | 914 | * see if it is pushed or pulled somewhere else. |
913 | */ | 915 | */ |
914 | if (p->nr_cpus_allowed != 1 && | 916 | if (p->nr_cpus_allowed != 1 && |
915 | latest_cpu_find(rq->rd->span, p, NULL) != -1) | 917 | cpudl_find(&rq->rd->cpudl, p, NULL) != -1) |
916 | return; | 918 | return; |
917 | 919 | ||
918 | resched_task(rq->curr); | 920 | resched_task(rq->curr); |
@@ -1085,39 +1087,6 @@ next_node: | |||
1085 | return NULL; | 1087 | return NULL; |
1086 | } | 1088 | } |
1087 | 1089 | ||
1088 | static int latest_cpu_find(struct cpumask *span, | ||
1089 | struct task_struct *task, | ||
1090 | struct cpumask *later_mask) | ||
1091 | { | ||
1092 | const struct sched_dl_entity *dl_se = &task->dl; | ||
1093 | int cpu, found = -1, best = 0; | ||
1094 | u64 max_dl = 0; | ||
1095 | |||
1096 | for_each_cpu(cpu, span) { | ||
1097 | struct rq *rq = cpu_rq(cpu); | ||
1098 | struct dl_rq *dl_rq = &rq->dl; | ||
1099 | |||
1100 | if (cpumask_test_cpu(cpu, &task->cpus_allowed) && | ||
1101 | (!dl_rq->dl_nr_running || dl_time_before(dl_se->deadline, | ||
1102 | dl_rq->earliest_dl.curr))) { | ||
1103 | if (later_mask) | ||
1104 | cpumask_set_cpu(cpu, later_mask); | ||
1105 | if (!best && !dl_rq->dl_nr_running) { | ||
1106 | best = 1; | ||
1107 | found = cpu; | ||
1108 | } else if (!best && | ||
1109 | dl_time_before(max_dl, | ||
1110 | dl_rq->earliest_dl.curr)) { | ||
1111 | max_dl = dl_rq->earliest_dl.curr; | ||
1112 | found = cpu; | ||
1113 | } | ||
1114 | } else if (later_mask) | ||
1115 | cpumask_clear_cpu(cpu, later_mask); | ||
1116 | } | ||
1117 | |||
1118 | return found; | ||
1119 | } | ||
1120 | |||
1121 | static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask_dl); | 1090 | static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask_dl); |
1122 | 1091 | ||
1123 | static int find_later_rq(struct task_struct *task) | 1092 | static int find_later_rq(struct task_struct *task) |
@@ -1134,7 +1103,8 @@ static int find_later_rq(struct task_struct *task) | |||
1134 | if (task->nr_cpus_allowed == 1) | 1103 | if (task->nr_cpus_allowed == 1) |
1135 | return -1; | 1104 | return -1; |
1136 | 1105 | ||
1137 | best_cpu = latest_cpu_find(task_rq(task)->rd->span, task, later_mask); | 1106 | best_cpu = cpudl_find(&task_rq(task)->rd->cpudl, |
1107 | task, later_mask); | ||
1138 | if (best_cpu == -1) | 1108 | if (best_cpu == -1) |
1139 | return -1; | 1109 | return -1; |
1140 | 1110 | ||
@@ -1510,6 +1480,9 @@ static void rq_online_dl(struct rq *rq) | |||
1510 | { | 1480 | { |
1511 | if (rq->dl.overloaded) | 1481 | if (rq->dl.overloaded) |
1512 | dl_set_overload(rq); | 1482 | dl_set_overload(rq); |
1483 | |||
1484 | if (rq->dl.dl_nr_running > 0) | ||
1485 | cpudl_set(&rq->rd->cpudl, rq->cpu, rq->dl.earliest_dl.curr, 1); | ||
1513 | } | 1486 | } |
1514 | 1487 | ||
1515 | /* Assumes rq->lock is held */ | 1488 | /* Assumes rq->lock is held */ |
@@ -1517,6 +1490,8 @@ static void rq_offline_dl(struct rq *rq) | |||
1517 | { | 1490 | { |
1518 | if (rq->dl.overloaded) | 1491 | if (rq->dl.overloaded) |
1519 | dl_clear_overload(rq); | 1492 | dl_clear_overload(rq); |
1493 | |||
1494 | cpudl_set(&rq->rd->cpudl, rq->cpu, 0, 0); | ||
1520 | } | 1495 | } |
1521 | 1496 | ||
1522 | void init_sched_dl_class(void) | 1497 | void init_sched_dl_class(void) |