sched/deadline: Add SCHED_DEADLINE SMP-related data structures & logic

Introduces data structures relevant for implementing dynamic migration of -deadline tasks and the logic for checking if runqueues are overloaded with -deadline tasks and for choosing where a task should migrate, when it is the case. Adds also dynamic migrations to SCHED_DEADLINE, so that tasks can be moved among CPUs when necessary. It is also possible to bind a task to a (set of) CPU(s), thus restricting its capability of migrating, or forbidding migrations at all. The very same approach used in sched_rt is utilised: - -deadline tasks are kept into CPU-specific runqueues, - -deadline tasks are migrated among runqueues to achieve the following: * on an M-CPU system the M earliest deadline ready tasks are always running; * affinity/cpusets settings of all the -deadline tasks is always respected. Therefore, this very special form of "load balancing" is done with an active method, i.e., the scheduler pushes or pulls tasks between runqueues when they are woken up and/or (de)scheduled. IOW, every time a preemption occurs, the descheduled task might be sent to some other CPU (depending on its deadline) to continue executing (push). On the other hand, every time a CPU becomes idle, it might pull the second earliest deadline ready task from some other CPU. To enforce this, a pull operation is always attempted before taking any scheduling decision (pre_schedule()), as well as a push one after each scheduling decision (post_schedule()). In addition, when a task arrives or wakes up, the best CPU where to resume it is selected taking into account its affinity mask, the system topology, but also its deadline. E.g., from the scheduling point of view, the best CPU where to wake up (and also where to push) a task is the one which is running the task with the latest deadline among the M executing ones. In order to facilitate these decisions, per-runqueue "caching" of the deadlines of the currently running and of the first ready task is used. Queued but not running tasks are also parked in another rb-tree to speed-up pushes. Signed-off-by: Juri Lelli <juri.lelli@gmail.com> Signed-off-by: Dario Faggioli <raistlin@linux.it> Signed-off-by: Peter Zijlstra <peterz@infradead.org> Link: http://lkml.kernel.org/r/1383831828-15501-5-git-send-email-juri.lelli@gmail.com Signed-off-by: Ingo Molnar <mingo@kernel.org>
author: Juri Lelli <juri.lelli@gmail.com> 2013-11-07 08:43:38 -0500
committer: Ingo Molnar <mingo@kernel.org> 2014-01-13 07:41:07 -0500
commit: 1baca4ce16b8cc7d4f50be1f7914799af30a2861 (patch)
tree: 10fcce2b53389aeb5a6386fcb318dabeaa78db9b /kernel/sched/sched.h
parent: aab03e05e8f7e26f51dee792beddcb5cca9215a5 (diff)
1 files changed, 34 insertions, 0 deletions
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 83eb5390f753..93ea62754f11 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -385,6 +385,31 @@ struct dl_rq {
        struct rb_node *rb_leftmost;
        unsigned long dl_nr_running;
+#ifdef CONFIG_SMP
+        /*
+         * Deadline values of the currently executing and the
+         * earliest ready task on this rq. Caching these facilitates
+         * the decision wether or not a ready but not running task
+         * should migrate somewhere else.
+         */
+        struct {
+                u64 curr;
+                u64 next;
+        } earliest_dl;
+        unsigned long dl_nr_migratory;
+        unsigned long dl_nr_total;
+        int overloaded;
+        /*
+         * Tasks on this rq that can be pushed away. They are kept in
+         * an rb-tree, ordered by tasks' deadlines, with caching
+         * of the leftmost (earliest deadline) element.
+         */
+        struct rb_root pushable_dl_tasks_root;
+        struct rb_node *pushable_dl_tasks_leftmost;
+#endif
 };
 #ifdef CONFIG_SMP
@@ -405,6 +430,13 @@ struct root_domain {
        cpumask_var_t online;
        /*
+         * The bit corresponding to a CPU gets set here if such CPU has more
+         * than one runnable -deadline task (as it is below for RT tasks).
+         */
+        cpumask_var_t dlo_mask;
+        atomic_t dlo_count;
+        /*
         * The "RT overload" flag: it gets set if a CPU has more than
         * one runnable RT task.
         */
@@ -1095,6 +1127,8 @@ static inline void idle_balance(int cpu, struct rq *rq)
 extern void sysrq_sched_debug_show(void);
 extern void sched_init_granularity(void);
 extern void update_max_interval(void);
+extern void init_sched_dl_class(void);
 extern void init_sched_rt_class(void);
 extern void init_sched_fair_class(void);
author	Juri Lelli <juri.lelli@gmail.com>	2013-11-07 08:43:38 -0500
committer	Ingo Molnar <mingo@kernel.org>	2014-01-13 07:41:07 -0500
commit	1baca4ce16b8cc7d4f50be1f7914799af30a2861 (patch)
tree	10fcce2b53389aeb5a6386fcb318dabeaa78db9b /kernel/sched/sched.h
parent	aab03e05e8f7e26f51dee792beddcb5cca9215a5 (diff)

diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 83eb5390f753..93ea62754f11 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h
@@ -385,6 +385,31 @@ struct dl_rq {
385	struct rb_node *rb_leftmost;	385	struct rb_node *rb_leftmost;
386		386
387	unsigned long dl_nr_running;	387	unsigned long dl_nr_running;
		388
		389	#ifdef CONFIG_SMP
		390	/*
		391	* Deadline values of the currently executing and the
		392	* earliest ready task on this rq. Caching these facilitates
		393	* the decision wether or not a ready but not running task
		394	* should migrate somewhere else.
		395	*/
		396	struct {
		397	u64 curr;
		398	u64 next;
		399	} earliest_dl;
		400
		401	unsigned long dl_nr_migratory;
		402	unsigned long dl_nr_total;
		403	int overloaded;
		404
		405	/*
		406	* Tasks on this rq that can be pushed away. They are kept in
		407	* an rb-tree, ordered by tasks' deadlines, with caching
		408	* of the leftmost (earliest deadline) element.
		409	*/
		410	struct rb_root pushable_dl_tasks_root;
		411	struct rb_node *pushable_dl_tasks_leftmost;
		412	#endif
388	};	413	};
389		414
390	#ifdef CONFIG_SMP	415	#ifdef CONFIG_SMP
@@ -405,6 +430,13 @@ struct root_domain {
405	cpumask_var_t online;	430	cpumask_var_t online;
406		431
407	/*	432	/*
		433	* The bit corresponding to a CPU gets set here if such CPU has more
		434	* than one runnable -deadline task (as it is below for RT tasks).
		435	*/
		436	cpumask_var_t dlo_mask;
		437	atomic_t dlo_count;
		438
		439	/*
408	* The "RT overload" flag: it gets set if a CPU has more than	440	* The "RT overload" flag: it gets set if a CPU has more than
409	* one runnable RT task.	441	* one runnable RT task.
410	*/	442	*/
@@ -1095,6 +1127,8 @@ static inline void idle_balance(int cpu, struct rq *rq)
1095	extern void sysrq_sched_debug_show(void);	1127	extern void sysrq_sched_debug_show(void);
1096	extern void sched_init_granularity(void);	1128	extern void sched_init_granularity(void);
1097	extern void update_max_interval(void);	1129	extern void update_max_interval(void);
		1130
		1131	extern void init_sched_dl_class(void);
1098	extern void init_sched_rt_class(void);	1132	extern void init_sched_rt_class(void);
1099	extern void init_sched_fair_class(void);	1133	extern void init_sched_fair_class(void);
1100		1134