summaryrefslogtreecommitdiffstats
path: root/kernel/sched/deadline.c
diff options
context:
space:
mode:
authorJuri Lelli <juri.lelli@gmail.com>2013-11-07 08:43:47 -0500
committerIngo Molnar <mingo@kernel.org>2014-01-13 07:46:46 -0500
commit6bfd6d72f51c51177676f2b1ba113fe0a85fdae4 (patch)
tree8c3c4c49f18ba3218da4274623b50da0a317f2d6 /kernel/sched/deadline.c
parent332ac17ef5bfcff4766dfdfd3b4cdf10b8f8f155 (diff)
sched/deadline: speed up SCHED_DEADLINE pushes with a push-heap
Data from tests confirmed that the original active load balancing logic didn't scale neither in the number of CPU nor in the number of tasks (as sched_rt does). Here we provide a global data structure to keep track of deadlines of the running tasks in the system. The structure is composed by a bitmask showing the free CPUs and a max-heap, needed when the system is heavily loaded. The implementation and concurrent access scheme are kept simple by design. However, our measurements show that we can compete with sched_rt on large multi-CPUs machines [1]. Only the push path is addressed, the extension to use this structure also for pull decisions is straightforward. However, we are currently evaluating different (in order to decrease/avoid contention) data structures to solve possibly both problems. We are also going to re-run tests considering recent changes inside cpupri [2]. [1] http://retis.sssup.it/~jlelli/papers/Ospert11Lelli.pdf [2] http://www.spinics.net/lists/linux-rt-users/msg06778.html Signed-off-by: Juri Lelli <juri.lelli@gmail.com> Signed-off-by: Peter Zijlstra <peterz@infradead.org> Link: http://lkml.kernel.org/r/1383831828-15501-14-git-send-email-juri.lelli@gmail.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/sched/deadline.c')
-rw-r--r--kernel/sched/deadline.c53
1 files changed, 14 insertions, 39 deletions
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 802188fb6338..0c6b1d089cd4 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -16,6 +16,8 @@
16 */ 16 */
17#include "sched.h" 17#include "sched.h"
18 18
19#include <linux/slab.h>
20
19struct dl_bandwidth def_dl_bandwidth; 21struct dl_bandwidth def_dl_bandwidth;
20 22
21static inline struct task_struct *dl_task_of(struct sched_dl_entity *dl_se) 23static inline struct task_struct *dl_task_of(struct sched_dl_entity *dl_se)
@@ -640,6 +642,7 @@ static void inc_dl_deadline(struct dl_rq *dl_rq, u64 deadline)
640 */ 642 */
641 dl_rq->earliest_dl.next = dl_rq->earliest_dl.curr; 643 dl_rq->earliest_dl.next = dl_rq->earliest_dl.curr;
642 dl_rq->earliest_dl.curr = deadline; 644 dl_rq->earliest_dl.curr = deadline;
645 cpudl_set(&rq->rd->cpudl, rq->cpu, deadline, 1);
643 } else if (dl_rq->earliest_dl.next == 0 || 646 } else if (dl_rq->earliest_dl.next == 0 ||
644 dl_time_before(deadline, dl_rq->earliest_dl.next)) { 647 dl_time_before(deadline, dl_rq->earliest_dl.next)) {
645 /* 648 /*
@@ -663,6 +666,7 @@ static void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline)
663 if (!dl_rq->dl_nr_running) { 666 if (!dl_rq->dl_nr_running) {
664 dl_rq->earliest_dl.curr = 0; 667 dl_rq->earliest_dl.curr = 0;
665 dl_rq->earliest_dl.next = 0; 668 dl_rq->earliest_dl.next = 0;
669 cpudl_set(&rq->rd->cpudl, rq->cpu, 0, 0);
666 } else { 670 } else {
667 struct rb_node *leftmost = dl_rq->rb_leftmost; 671 struct rb_node *leftmost = dl_rq->rb_leftmost;
668 struct sched_dl_entity *entry; 672 struct sched_dl_entity *entry;
@@ -670,6 +674,7 @@ static void dec_dl_deadline(struct dl_rq *dl_rq, u64 deadline)
670 entry = rb_entry(leftmost, struct sched_dl_entity, rb_node); 674 entry = rb_entry(leftmost, struct sched_dl_entity, rb_node);
671 dl_rq->earliest_dl.curr = entry->deadline; 675 dl_rq->earliest_dl.curr = entry->deadline;
672 dl_rq->earliest_dl.next = next_deadline(rq); 676 dl_rq->earliest_dl.next = next_deadline(rq);
677 cpudl_set(&rq->rd->cpudl, rq->cpu, entry->deadline, 1);
673 } 678 }
674} 679}
675 680
@@ -855,9 +860,6 @@ static void yield_task_dl(struct rq *rq)
855#ifdef CONFIG_SMP 860#ifdef CONFIG_SMP
856 861
857static int find_later_rq(struct task_struct *task); 862static int find_later_rq(struct task_struct *task);
858static int latest_cpu_find(struct cpumask *span,
859 struct task_struct *task,
860 struct cpumask *later_mask);
861 863
862static int 864static int
863select_task_rq_dl(struct task_struct *p, int cpu, int sd_flag, int flags) 865select_task_rq_dl(struct task_struct *p, int cpu, int sd_flag, int flags)
@@ -904,7 +906,7 @@ static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p)
904 * let's hope p can move out. 906 * let's hope p can move out.
905 */ 907 */
906 if (rq->curr->nr_cpus_allowed == 1 || 908 if (rq->curr->nr_cpus_allowed == 1 ||
907 latest_cpu_find(rq->rd->span, rq->curr, NULL) == -1) 909 cpudl_find(&rq->rd->cpudl, rq->curr, NULL) == -1)
908 return; 910 return;
909 911
910 /* 912 /*
@@ -912,7 +914,7 @@ static void check_preempt_equal_dl(struct rq *rq, struct task_struct *p)
912 * see if it is pushed or pulled somewhere else. 914 * see if it is pushed or pulled somewhere else.
913 */ 915 */
914 if (p->nr_cpus_allowed != 1 && 916 if (p->nr_cpus_allowed != 1 &&
915 latest_cpu_find(rq->rd->span, p, NULL) != -1) 917 cpudl_find(&rq->rd->cpudl, p, NULL) != -1)
916 return; 918 return;
917 919
918 resched_task(rq->curr); 920 resched_task(rq->curr);
@@ -1085,39 +1087,6 @@ next_node:
1085 return NULL; 1087 return NULL;
1086} 1088}
1087 1089
1088static int latest_cpu_find(struct cpumask *span,
1089 struct task_struct *task,
1090 struct cpumask *later_mask)
1091{
1092 const struct sched_dl_entity *dl_se = &task->dl;
1093 int cpu, found = -1, best = 0;
1094 u64 max_dl = 0;
1095
1096 for_each_cpu(cpu, span) {
1097 struct rq *rq = cpu_rq(cpu);
1098 struct dl_rq *dl_rq = &rq->dl;
1099
1100 if (cpumask_test_cpu(cpu, &task->cpus_allowed) &&
1101 (!dl_rq->dl_nr_running || dl_time_before(dl_se->deadline,
1102 dl_rq->earliest_dl.curr))) {
1103 if (later_mask)
1104 cpumask_set_cpu(cpu, later_mask);
1105 if (!best && !dl_rq->dl_nr_running) {
1106 best = 1;
1107 found = cpu;
1108 } else if (!best &&
1109 dl_time_before(max_dl,
1110 dl_rq->earliest_dl.curr)) {
1111 max_dl = dl_rq->earliest_dl.curr;
1112 found = cpu;
1113 }
1114 } else if (later_mask)
1115 cpumask_clear_cpu(cpu, later_mask);
1116 }
1117
1118 return found;
1119}
1120
1121static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask_dl); 1090static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask_dl);
1122 1091
1123static int find_later_rq(struct task_struct *task) 1092static int find_later_rq(struct task_struct *task)
@@ -1134,7 +1103,8 @@ static int find_later_rq(struct task_struct *task)
1134 if (task->nr_cpus_allowed == 1) 1103 if (task->nr_cpus_allowed == 1)
1135 return -1; 1104 return -1;
1136 1105
1137 best_cpu = latest_cpu_find(task_rq(task)->rd->span, task, later_mask); 1106 best_cpu = cpudl_find(&task_rq(task)->rd->cpudl,
1107 task, later_mask);
1138 if (best_cpu == -1) 1108 if (best_cpu == -1)
1139 return -1; 1109 return -1;
1140 1110
@@ -1510,6 +1480,9 @@ static void rq_online_dl(struct rq *rq)
1510{ 1480{
1511 if (rq->dl.overloaded) 1481 if (rq->dl.overloaded)
1512 dl_set_overload(rq); 1482 dl_set_overload(rq);
1483
1484 if (rq->dl.dl_nr_running > 0)
1485 cpudl_set(&rq->rd->cpudl, rq->cpu, rq->dl.earliest_dl.curr, 1);
1513} 1486}
1514 1487
1515/* Assumes rq->lock is held */ 1488/* Assumes rq->lock is held */
@@ -1517,6 +1490,8 @@ static void rq_offline_dl(struct rq *rq)
1517{ 1490{
1518 if (rq->dl.overloaded) 1491 if (rq->dl.overloaded)
1519 dl_clear_overload(rq); 1492 dl_clear_overload(rq);
1493
1494 cpudl_set(&rq->rd->cpudl, rq->cpu, 0, 0);
1520} 1495}
1521 1496
1522void init_sched_dl_class(void) 1497void init_sched_dl_class(void)