1 files changed, 350 insertions, 0 deletions
diff --git a/kernel/sched/litmus.c b/kernel/sched/litmus.c
new file mode 100644
index 000000000000..924358babde2
--- /dev/null
+++ b/kernel/sched/litmus.c
@@ -0,0 +1,350 @@
+/* This file is included from kernel/sched.c */
+#include "sched.h"
+#include <litmus/trace.h>
+#include <litmus/sched_trace.h>
+#include <litmus/litmus.h>
+#include <litmus/budget.h>
+#include <litmus/sched_plugin.h>
+#include <litmus/preempt.h>
+static void update_time_litmus(struct rq *rq, struct task_struct *p)
+{
+        u64 delta = rq->clock - p->se.exec_start;
+        if (unlikely((s64)delta < 0))
+                delta = 0;
+        /* per job counter */
+        p->rt_param.job_params.exec_time += delta;
+        /* task counter */
+        p->se.sum_exec_runtime += delta;
+        if (delta) {
+                TRACE_TASK(p, "charged %llu exec time (total:%llu, rem:%llu)\n",
+                        delta, p->rt_param.job_params.exec_time, budget_remaining(p));
+        }
+        /* sched_clock() */
+        p->se.exec_start = rq->clock;
+        cpuacct_charge(p, delta);
+}
+static void double_rq_lock(struct rq *rq1, struct rq *rq2);
+static void double_rq_unlock(struct rq *rq1, struct rq *rq2);
+static struct task_struct *
+litmus_schedule(struct rq *rq, struct task_struct *prev)
+{
+        struct task_struct *next;
+#ifdef CONFIG_SMP
+        struct rq* other_rq;
+        long was_running;
+        lt_t _maybe_deadlock = 0;
+#endif
+        /* let the plugin schedule */
+        next = litmus->schedule(prev);
+        sched_state_plugin_check();
+#ifdef CONFIG_SMP
+        /* check if a global plugin pulled a task from a different RQ */
+        if (next && task_rq(next) != rq) {
+                /* we need to migrate the task */
+                other_rq = task_rq(next);
+                TRACE_TASK(next, "migrate from %d\n", other_rq->cpu);
+                /* while we drop the lock, the prev task could change its
+                 * state
+                 */
+                BUG_ON(prev != current);
+                was_running = is_current_running();
+                mb();
+                raw_spin_unlock(&rq->lock);
+                /* Don't race with a concurrent switch.  This could deadlock in
+                 * the case of cross or circular migrations.  It's the job of
+                 * the plugin to make sure that doesn't happen.
+                 */
+                TRACE_TASK(next, "stack_in_use=%d\n",
+                           next->rt_param.stack_in_use);
+                if (next->rt_param.stack_in_use != NO_CPU) {
+                        TRACE_TASK(next, "waiting to deschedule\n");
+                        _maybe_deadlock = litmus_clock();
+                }
+                while (next->rt_param.stack_in_use != NO_CPU) {
+                        cpu_relax();
+                        mb();
+                        if (next->rt_param.stack_in_use == NO_CPU)
+                                TRACE_TASK(next,"descheduled. Proceeding.\n");
+                        if (lt_before(_maybe_deadlock + 1000000000L,
+                                      litmus_clock())) {
+                                /* We've been spinning for 1s.
+                                 * Something can't be right!
+                                 * Let's abandon the task and bail out; at least
+                                 * we will have debug info instead of a hard
+                                 * deadlock.
+                                 */
+#ifdef CONFIG_BUG_ON_MIGRATION_DEADLOCK
+                                BUG();
+#else
+                                TRACE_TASK(next,"stack too long in use. "
+                                           "Deadlock?\n");
+                                next = NULL;
+                                /* bail out */
+                                raw_spin_lock(&rq->lock);
+                                return next;
+#endif
+                        }
+                }
+#ifdef  __ARCH_WANT_UNLOCKED_CTXSW
+                if (next->on_cpu)
+                        TRACE_TASK(next, "waiting for !oncpu");
+                while (next->on_cpu) {
+                        cpu_relax();
+                        mb();
+                }
+#endif
+                double_rq_lock(rq, other_rq);
+                mb();
+                if (is_realtime(current) && is_current_running() != was_running) {
+                        TRACE_TASK(prev,
+                                   "state changed while we dropped"
+                                   " the lock: is_running=%d, was_running=%d\n",
+                                   is_current_running(), was_running);
+                        if (is_current_running() && !was_running) {
+                                /* prev task became unblocked
+                                 * we need to simulate normal sequence of events
+                                 * to scheduler plugins.
+                                 */
+                                litmus->task_block(prev);
+                                litmus->task_wake_up(prev);
+                        }
+                }
+                set_task_cpu(next, smp_processor_id());
+                /* DEBUG: now that we have the lock we need to make sure a
+                 *  couple of things still hold:
+                 *  - it is still a real-time task
+                 *  - it is still runnable (could have been stopped)
+                 * If either is violated, then the active plugin is
+                 * doing something wrong.
+                 */
+                if (!is_realtime(next) || !tsk_rt(next)->present) {
+                        /* BAD BAD BAD */
+                        TRACE_TASK(next,"BAD: migration invariant FAILED: "
+                                   "rt=%d present=%d\n",
+                                   is_realtime(next),
+                                   tsk_rt(next)->present);
+                        /* drop the task */
+                        next = NULL;
+                }
+                /* release the other CPU's runqueue, but keep ours */
+                raw_spin_unlock(&other_rq->lock);
+        }
+#endif
+        if (next) {
+#ifdef CONFIG_SMP
+                next->rt_param.stack_in_use = rq->cpu;
+#else
+                next->rt_param.stack_in_use = 0;
+#endif
+                update_rq_clock(rq);
+                next->se.exec_start = rq->clock;
+        }
+        update_enforcement_timer(next);
+        return next;
+}
+static void enqueue_task_litmus(struct rq *rq, struct task_struct *p,
+                                int flags)
+{
+        if (flags & ENQUEUE_WAKEUP) {
+                sched_trace_task_resume(p);
+                tsk_rt(p)->present = 1;
+                /* LITMUS^RT plugins need to update the state
+                 * _before_ making it available in global structures.
+                 * Linux gets away with being lazy about the task state
+                 * update. We can't do that, hence we update the task
+                 * state already here.
+                 *
+                 * WARNING: this needs to be re-evaluated when porting
+                 *          to newer kernel versions.
+                 */
+                p->state = TASK_RUNNING;
+                litmus->task_wake_up(p);
+                rq->litmus.nr_running++;
+        } else {
+                TRACE_TASK(p, "ignoring an enqueue, not a wake up.\n");
+                p->se.exec_start = rq->clock;
+        }
+}
+static void dequeue_task_litmus(struct rq *rq, struct task_struct *p,
+                                int flags)
+{
+        if (flags & DEQUEUE_SLEEP) {
+                litmus->task_block(p);
+                tsk_rt(p)->present = 0;
+                sched_trace_task_block(p);
+                rq->litmus.nr_running--;
+        } else
+                TRACE_TASK(p, "ignoring a dequeue, not going to sleep.\n");
+}
+static void yield_task_litmus(struct rq *rq)
+{
+        TS_SYSCALL_IN_START;
+        TS_SYSCALL_IN_END;
+        BUG_ON(rq->curr != current);
+        /* sched_yield() is called to trigger delayed preemptions.
+         * Thus, mark the current task as needing to be rescheduled.
+         * This will cause the scheduler plugin to be invoked, which can
+         * then determine if a preemption is still required.
+         */
+        clear_exit_np(current);
+        litmus_reschedule_local();
+        TS_SYSCALL_OUT_START;
+}
+/* Plugins are responsible for this.
+ */
+static void check_preempt_curr_litmus(struct rq *rq, struct task_struct *p, int flags)
+{
+}
+static void put_prev_task_litmus(struct rq *rq, struct task_struct *p)
+{
+}
+/* pick_next_task_litmus() - litmus_schedule() function
+ *
+ * return the next task to be scheduled
+ */
+static struct task_struct *pick_next_task_litmus(struct rq *rq, struct task_struct *prev)
+{
+        struct task_struct *next;
+        if (is_realtime(prev))
+                update_time_litmus(rq, prev);
+        TS_PLUGIN_SCHED_START;
+        next = litmus_schedule(rq, prev);
+        TS_PLUGIN_SCHED_END;
+        /* This is a bit backwards: the other classes call put_prev_task()
+         * _after_ they've determined that the class has some queued tasks.
+         * We can't determine this easily because each plugin manages its own
+         * ready queues, and because in the case of globally shared queues,
+         * we really don't know whether we'll have something ready even if
+         * we test here. So we do it in reverse: first ask the plugin to
+         * provide a task, and if we find one, call put_prev_task() on the
+         * previously scheduled task.
+         */
+        if (next)
+                put_prev_task(rq, prev);
+        return next;
+}
+static void task_tick_litmus(struct rq *rq, struct task_struct *p, int queued)
+{
+        if (is_realtime(p) && !queued) {
+                update_time_litmus(rq, p);
+                /* budget check for QUANTUM_ENFORCEMENT tasks */
+                if (budget_enforced(p) && budget_exhausted(p)) {
+                        litmus_reschedule_local();
+                }
+        }
+}
+static void switched_to_litmus(struct rq *rq, struct task_struct *p)
+{
+}
+static void prio_changed_litmus(struct rq *rq, struct task_struct *p,
+                                int oldprio)
+{
+}
+unsigned int get_rr_interval_litmus(struct rq *rq, struct task_struct *p)
+{
+        /* return infinity */
+        return 0;
+}
+/* This is called when a task became a real-time task, either due to a SCHED_*
+ * class transition or due to PI mutex inheritance. We don't handle Linux PI
+ * mutex inheritance yet (and probably never will). Use LITMUS provided
+ * synchronization primitives instead.
+ */
+static void set_curr_task_litmus(struct rq *rq)
+{
+        rq->curr->se.exec_start = rq->clock;
+}
+#ifdef CONFIG_SMP
+/* execve tries to rebalance task in this scheduling domain.
+ * We don't care about the scheduling domain; can gets called from
+ * exec, fork, wakeup.
+ */
+static int
+select_task_rq_litmus(struct task_struct *p, int cpu, int sd_flag, int flags)
+{
+        /* preemption is already disabled.
+         * We don't want to change cpu here
+         */
+        return task_cpu(p);
+}
+#endif
+static void update_curr_litmus(struct rq *rq)
+{
+        struct task_struct *p = rq->curr;
+        if (!is_realtime(p))
+                return;
+        update_time_litmus(rq, p);
+}
+const struct sched_class litmus_sched_class = {
+        /* From 34f971f6 the stop/migrate worker threads have a class on
+         * their own, which is the highest prio class. We don't support
+         * cpu-hotplug or cpu throttling. Allows Litmus to use up to 1.0
+         * CPU capacity.
+         */
+        .next                   = &dl_sched_class,
+        .enqueue_task           = enqueue_task_litmus,
+        .dequeue_task           = dequeue_task_litmus,
+        .yield_task             = yield_task_litmus,
+        .check_preempt_curr     = check_preempt_curr_litmus,
+        .pick_next_task         = pick_next_task_litmus,
+        .put_prev_task          = put_prev_task_litmus,
+#ifdef CONFIG_SMP
+        .select_task_rq         = select_task_rq_litmus,
+#endif
+        .set_curr_task          = set_curr_task_litmus,
+        .task_tick              = task_tick_litmus,
+        .get_rr_interval        = get_rr_interval_litmus,
+        .prio_changed           = prio_changed_litmus,
+        .switched_to            = switched_to_litmus,
+        .update_curr            = update_curr_litmus,
+};

diff --git a/kernel/sched/litmus.c b/kernel/sched/litmus.c new file mode 100644 index 000000000000..924358babde2 --- /dev/null +++ b/kernel/sched/litmus.c
@@ -0,0 +1,350 @@
	1	/* This file is included from kernel/sched.c */
	2
	3	#include "sched.h"
	4
	5	#include <litmus/trace.h>
	6	#include <litmus/sched_trace.h>
	7
	8	#include <litmus/litmus.h>
	9	#include <litmus/budget.h>
	10	#include <litmus/sched_plugin.h>
	11	#include <litmus/preempt.h>
	12
	13	static void update_time_litmus(struct rq rq, struct task_struct p)
	14	{
	15	u64 delta = rq->clock - p->se.exec_start;
	16	if (unlikely((s64)delta < 0))
	17	delta = 0;
	18	/* per job counter */
	19	p->rt_param.job_params.exec_time += delta;
	20	/* task counter */
	21	p->se.sum_exec_runtime += delta;
	22	if (delta) {
	23	TRACE_TASK(p, "charged %llu exec time (total:%llu, rem:%llu)\n",
	24	delta, p->rt_param.job_params.exec_time, budget_remaining(p));
	25	}
	26	/* sched_clock() */
	27	p->se.exec_start = rq->clock;
	28	cpuacct_charge(p, delta);
	29	}
	30
	31	static void double_rq_lock(struct rq rq1, struct rq rq2);
	32	static void double_rq_unlock(struct rq rq1, struct rq rq2);
	33
	34	static struct task_struct *
	35	litmus_schedule(struct rq rq, struct task_struct prev)
	36	{
	37	struct task_struct *next;
	38
	39	#ifdef CONFIG_SMP
	40	struct rq* other_rq;
	41	long was_running;
	42	lt_t _maybe_deadlock = 0;
	43	#endif
	44
	45	/* let the plugin schedule */
	46	next = litmus->schedule(prev);
	47
	48	sched_state_plugin_check();
	49
	50	#ifdef CONFIG_SMP
	51	/* check if a global plugin pulled a task from a different RQ */
	52	if (next && task_rq(next) != rq) {
	53	/* we need to migrate the task */
	54	other_rq = task_rq(next);
	55	TRACE_TASK(next, "migrate from %d\n", other_rq->cpu);
	56
	57	/* while we drop the lock, the prev task could change its
	58	* state
	59	*/
	60	BUG_ON(prev != current);
	61	was_running = is_current_running();
	62	mb();
	63	raw_spin_unlock(&rq->lock);
	64
	65	/* Don't race with a concurrent switch. This could deadlock in
	66	* the case of cross or circular migrations. It's the job of
	67	* the plugin to make sure that doesn't happen.
	68	*/
	69	TRACE_TASK(next, "stack_in_use=%d\n",
	70	next->rt_param.stack_in_use);
	71	if (next->rt_param.stack_in_use != NO_CPU) {
	72	TRACE_TASK(next, "waiting to deschedule\n");
	73	_maybe_deadlock = litmus_clock();
	74	}
	75	while (next->rt_param.stack_in_use != NO_CPU) {
	76	cpu_relax();
	77	mb();
	78	if (next->rt_param.stack_in_use == NO_CPU)
	79	TRACE_TASK(next,"descheduled. Proceeding.\n");
	80
	81	if (lt_before(_maybe_deadlock + 1000000000L,
	82	litmus_clock())) {
	83	/* We've been spinning for 1s.
	84	* Something can't be right!
	85	* Let's abandon the task and bail out; at least
	86	* we will have debug info instead of a hard
	87	* deadlock.
	88	*/
	89	#ifdef CONFIG_BUG_ON_MIGRATION_DEADLOCK
	90	BUG();
	91	#else
	92	TRACE_TASK(next,"stack too long in use. "
	93	"Deadlock?\n");
	94	next = NULL;
	95
	96	/* bail out */
	97	raw_spin_lock(&rq->lock);
	98	return next;
	99	#endif
	100	}
	101	}
	102	#ifdef __ARCH_WANT_UNLOCKED_CTXSW
	103	if (next->on_cpu)
	104	TRACE_TASK(next, "waiting for !oncpu");
	105	while (next->on_cpu) {
	106	cpu_relax();
	107	mb();
	108	}
	109	#endif
	110	double_rq_lock(rq, other_rq);
	111	mb();
	112	if (is_realtime(current) && is_current_running() != was_running) {
	113	TRACE_TASK(prev,
	114	"state changed while we dropped"
	115	" the lock: is_running=%d, was_running=%d\n",
	116	is_current_running(), was_running);
	117	if (is_current_running() && !was_running) {
	118	/* prev task became unblocked
	119	* we need to simulate normal sequence of events
	120	* to scheduler plugins.
	121	*/
	122	litmus->task_block(prev);
	123	litmus->task_wake_up(prev);
	124	}
	125	}
	126
	127	set_task_cpu(next, smp_processor_id());
	128
	129	/* DEBUG: now that we have the lock we need to make sure a
	130	* couple of things still hold:
	131	* - it is still a real-time task
	132	* - it is still runnable (could have been stopped)
	133	* If either is violated, then the active plugin is
	134	* doing something wrong.
	135	*/
	136	if (!is_realtime(next) \|\| !tsk_rt(next)->present) {
	137	/* BAD BAD BAD */
	138	TRACE_TASK(next,"BAD: migration invariant FAILED: "
	139	"rt=%d present=%d\n",
	140	is_realtime(next),
	141	tsk_rt(next)->present);
	142	/* drop the task */
	143	next = NULL;
	144	}
	145	/* release the other CPU's runqueue, but keep ours */
	146	raw_spin_unlock(&other_rq->lock);
	147	}
	148	#endif
	149
	150	if (next) {
	151	#ifdef CONFIG_SMP
	152	next->rt_param.stack_in_use = rq->cpu;
	153	#else
	154	next->rt_param.stack_in_use = 0;
	155	#endif
	156	update_rq_clock(rq);
	157	next->se.exec_start = rq->clock;
	158	}
	159
	160	update_enforcement_timer(next);
	161	return next;
	162	}
	163
	164	static void enqueue_task_litmus(struct rq rq, struct task_struct p,
	165	int flags)
	166	{
	167	if (flags & ENQUEUE_WAKEUP) {
	168	sched_trace_task_resume(p);
	169	tsk_rt(p)->present = 1;
	170	/* LITMUS^RT plugins need to update the state
	171	* _before_ making it available in global structures.
	172	* Linux gets away with being lazy about the task state
	173	* update. We can't do that, hence we update the task
	174	* state already here.
	175	*
	176	* WARNING: this needs to be re-evaluated when porting
	177	* to newer kernel versions.
	178	*/
	179	p->state = TASK_RUNNING;
	180	litmus->task_wake_up(p);
	181
	182	rq->litmus.nr_running++;
	183	} else {
	184	TRACE_TASK(p, "ignoring an enqueue, not a wake up.\n");
	185	p->se.exec_start = rq->clock;
	186	}
	187	}
	188
	189	static void dequeue_task_litmus(struct rq rq, struct task_struct p,
	190	int flags)
	191	{
	192	if (flags & DEQUEUE_SLEEP) {
	193	litmus->task_block(p);
	194	tsk_rt(p)->present = 0;
	195	sched_trace_task_block(p);
	196
	197	rq->litmus.nr_running--;
	198	} else
	199	TRACE_TASK(p, "ignoring a dequeue, not going to sleep.\n");
	200	}
	201
	202	static void yield_task_litmus(struct rq *rq)
	203	{
	204	TS_SYSCALL_IN_START;
	205	TS_SYSCALL_IN_END;
	206
	207	BUG_ON(rq->curr != current);
	208	/* sched_yield() is called to trigger delayed preemptions.
	209	* Thus, mark the current task as needing to be rescheduled.
	210	* This will cause the scheduler plugin to be invoked, which can
	211	* then determine if a preemption is still required.
	212	*/
	213	clear_exit_np(current);
	214	litmus_reschedule_local();
	215
	216	TS_SYSCALL_OUT_START;
	217	}
	218
	219	/* Plugins are responsible for this.
	220	*/
	221	static void check_preempt_curr_litmus(struct rq rq, struct task_struct p, int flags)
	222	{
	223	}
	224
	225	static void put_prev_task_litmus(struct rq rq, struct task_struct p)
	226	{
	227	}
	228
	229	/* pick_next_task_litmus() - litmus_schedule() function
	230	*
	231	* return the next task to be scheduled
	232	*/
	233	static struct task_struct pick_next_task_litmus(struct rq rq, struct task_struct *prev)
	234	{
	235	struct task_struct *next;
	236
	237	if (is_realtime(prev))
	238	update_time_litmus(rq, prev);
	239
	240	TS_PLUGIN_SCHED_START;
	241	next = litmus_schedule(rq, prev);
	242	TS_PLUGIN_SCHED_END;
	243
	244	/* This is a bit backwards: the other classes call put_prev_task()
	245	* _after_ they've determined that the class has some queued tasks.
	246	* We can't determine this easily because each plugin manages its own
	247	* ready queues, and because in the case of globally shared queues,
	248	* we really don't know whether we'll have something ready even if
	249	* we test here. So we do it in reverse: first ask the plugin to
	250	* provide a task, and if we find one, call put_prev_task() on the
	251	* previously scheduled task.
	252	*/
	253	if (next)
	254	put_prev_task(rq, prev);
	255
	256	return next;
	257	}
	258
	259	static void task_tick_litmus(struct rq rq, struct task_struct p, int queued)
	260	{
	261	if (is_realtime(p) && !queued) {
	262	update_time_litmus(rq, p);
	263	/* budget check for QUANTUM_ENFORCEMENT tasks */
	264	if (budget_enforced(p) && budget_exhausted(p)) {
	265	litmus_reschedule_local();
	266	}
	267	}
	268	}
	269
	270	static void switched_to_litmus(struct rq rq, struct task_struct p)
	271	{
	272	}
	273
	274	static void prio_changed_litmus(struct rq rq, struct task_struct p,
	275	int oldprio)
	276	{
	277	}
	278
	279	unsigned int get_rr_interval_litmus(struct rq rq, struct task_struct p)
	280	{
	281	/* return infinity */
	282	return 0;
	283	}
	284
	285	/* This is called when a task became a real-time task, either due to a SCHED_*
	286	* class transition or due to PI mutex inheritance. We don't handle Linux PI
	287	* mutex inheritance yet (and probably never will). Use LITMUS provided
	288	* synchronization primitives instead.
	289	*/
	290	static void set_curr_task_litmus(struct rq *rq)
	291	{
	292	rq->curr->se.exec_start = rq->clock;
	293	}
	294
	295
	296	#ifdef CONFIG_SMP
	297	/* execve tries to rebalance task in this scheduling domain.
	298	* We don't care about the scheduling domain; can gets called from
	299	* exec, fork, wakeup.
	300	*/
	301	static int
	302	select_task_rq_litmus(struct task_struct *p, int cpu, int sd_flag, int flags)
	303	{
	304	/* preemption is already disabled.
	305	* We don't want to change cpu here
	306	*/
	307	return task_cpu(p);
	308	}
	309	#endif
	310
	311	static void update_curr_litmus(struct rq *rq)
	312	{
	313	struct task_struct *p = rq->curr;
	314
	315	if (!is_realtime(p))
	316	return;
	317
	318	update_time_litmus(rq, p);
	319	}
	320
	321	const struct sched_class litmus_sched_class = {
	322	/* From 34f971f6 the stop/migrate worker threads have a class on
	323	* their own, which is the highest prio class. We don't support
	324	* cpu-hotplug or cpu throttling. Allows Litmus to use up to 1.0
	325	* CPU capacity.
	326	*/
	327	.next = &dl_sched_class,
	328	.enqueue_task = enqueue_task_litmus,
	329	.dequeue_task = dequeue_task_litmus,
	330	.yield_task = yield_task_litmus,
	331
	332	.check_preempt_curr = check_preempt_curr_litmus,
	333
	334	.pick_next_task = pick_next_task_litmus,
	335	.put_prev_task = put_prev_task_litmus,
	336
	337	#ifdef CONFIG_SMP
	338	.select_task_rq = select_task_rq_litmus,
	339	#endif
	340
	341	.set_curr_task = set_curr_task_litmus,
	342	.task_tick = task_tick_litmus,
	343
	344	.get_rr_interval = get_rr_interval_litmus,
	345
	346	.prio_changed = prio_changed_litmus,
	347	.switched_to = switched_to_litmus,
	348
	349	.update_curr = update_curr_litmus,
	350	};