1 files changed, 138 insertions, 14 deletions
diff --git a/kernel/sched.c b/kernel/sched.c
index f6cf5cbc64b..a1bf2646d12 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -87,6 +87,11 @@
 #define CREATE_TRACE_POINTS
 #include <trace/events/sched.h>
+#include <litmus/sched_trace.h>
+#include <litmus/trace.h>
+static void litmus_tick(struct rq*, struct task_struct*);
 /*
 * Convert user-nice values [ -20 ... 0 ... 19 ]
 * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ],
@@ -414,6 +419,12 @@ struct rt_rq {
 #endif
 };
+/* Litmus related fields in a runqueue */
+struct litmus_rq {
+        unsigned long nr_running;
+        struct task_struct *prev;
+};
 #ifdef CONFIG_SMP
 /*
@@ -479,6 +490,7 @@ struct rq {
        struct cfs_rq cfs;
        struct rt_rq rt;
+        struct litmus_rq litmus;
 #ifdef CONFIG_FAIR_GROUP_SCHED
        /* list of leaf cfs_rq on this cpu: */
@@ -1054,6 +1066,7 @@ static enum hrtimer_restart hrtick(struct hrtimer *timer)
        raw_spin_lock(&rq->lock);
        update_rq_clock(rq);
        rq->curr->sched_class->task_tick(rq, rq->curr, 1);
+        litmus_tick(rq, rq->curr);
        raw_spin_unlock(&rq->lock);
        return HRTIMER_NORESTART;
@@ -1750,7 +1763,7 @@ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
 static const struct sched_class rt_sched_class;
-#define sched_class_highest (&stop_sched_class)
+#define sched_class_highest (&litmus_sched_class)
 #define for_each_class(class) \
   for (class = sched_class_highest; class; class = class->next)
@@ -2044,6 +2057,7 @@ static int irqtime_account_si_update(void)
 #include "sched_rt.c"
 #include "sched_autogroup.c"
 #include "sched_stoptask.c"
+#include "../litmus/sched_litmus.c"
 #ifdef CONFIG_SCHED_DEBUG
 # include "sched_debug.c"
 #endif
@@ -2166,6 +2180,10 @@ static void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
         * A queue event has occurred, and we're going to schedule.  In
         * this case, we can save a useless back to back clock update.
         */
+        /* LITMUS^RT:
+         * The "disable-clock-update" approach was buggy in Linux 2.6.36.
+         * The issue has been solved in 2.6.37.
+         */
        if (rq->curr->on_rq && test_tsk_need_resched(rq->curr))
                rq->skip_clock_update = 1;
 }
@@ -2592,8 +2610,12 @@ void scheduler_ipi(void)
        struct rq *rq = this_rq();
        struct task_struct *list = xchg(&rq->wake_list, NULL);
-        if (!list)
+        if (!list) {
+                /* If we don't call irq_enter(), we need to trigger the IRQ
+                 * tracing manually. */
+                ft_irq_fired();
                return;
+        }
        /*
         * Not all reschedule IPI handlers call irq_enter/irq_exit, since
@@ -2656,7 +2678,12 @@ static void ttwu_queue(struct task_struct *p, int cpu)
        struct rq *rq = cpu_rq(cpu);
 #if defined(CONFIG_SMP)
-        if (sched_feat(TTWU_QUEUE) && cpu != smp_processor_id()) {
+        /*
+         * LITMUS^RT: whether to send an IPI to the remote CPU
+         * is plugin specific.
+         */
+        if (!is_realtime(p) &&
+                        sched_feat(TTWU_QUEUE) && cpu != smp_processor_id()) {
                sched_clock_cpu(cpu); /* sync clocks x-cpu */
                ttwu_queue_remote(p, cpu);
                return;
@@ -2689,6 +2716,9 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
        unsigned long flags;
        int cpu, success = 0;
+        if (is_realtime(p))
+                TRACE_TASK(p, "try_to_wake_up() state:%d\n", p->state);
        smp_wmb();
        raw_spin_lock_irqsave(&p->pi_lock, flags);
        if (!(p->state & state))
@@ -2725,6 +2755,12 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
         */
        smp_rmb();
+        /* LITMUS^RT: once the task can be safely referenced by this
+         * CPU, don't mess up with Linux load balancing stuff.
+         */
+        if (is_realtime(p))
+                goto litmus_out_activate;
        p->sched_contributes_to_load = !!task_contributes_to_load(p);
        p->state = TASK_WAKING;
@@ -2736,12 +2772,16 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
                wake_flags |= WF_MIGRATED;
                set_task_cpu(p, cpu);
        }
+litmus_out_activate:
 #endif /* CONFIG_SMP */
        ttwu_queue(p, cpu);
 stat:
        ttwu_stat(p, cpu, wake_flags);
 out:
+        if (is_realtime(p))
+                TRACE_TASK(p, "try_to_wake_up() done state:%d\n", p->state);
        raw_spin_unlock_irqrestore(&p->pi_lock, flags);
        return success;
@@ -2852,7 +2892,8 @@ void sched_fork(struct task_struct *p)
         * Revert to default priority/policy on fork if requested.
         */
        if (unlikely(p->sched_reset_on_fork)) {
-                if (p->policy == SCHED_FIFO || p->policy == SCHED_RR) {
+                if (p->policy == SCHED_FIFO || p->policy == SCHED_RR ||
+                    p->policy == SCHED_LITMUS) {
                        p->policy = SCHED_NORMAL;
                        p->normal_prio = p->static_prio;
                }
@@ -3063,6 +3104,8 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
         */
        prev_state = prev->state;
        finish_arch_switch(prev);
+        litmus->finish_switch(prev);
+        prev->rt_param.stack_in_use = NO_CPU;
 #ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
        local_irq_disable();
 #endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
@@ -3092,6 +3135,15 @@ static inline void pre_schedule(struct rq *rq, struct task_struct *prev)
 {
        if (prev->sched_class->pre_schedule)
                prev->sched_class->pre_schedule(rq, prev);
+        /* LITMUS^RT not very clean hack: we need to save the prev task
+         * as our scheduling decision rely on it (as we drop the rq lock
+         * something in prev can change...); there is no way to escape
+         * this ack apart from modifying pick_nex_task(rq, _prev_) or
+         * falling back on the previous solution of decoupling
+         * scheduling decisions
+         */
+        rq->litmus.prev = prev;
 }
 /* rq->lock is NOT held, but preemption is disabled */
@@ -3128,16 +3180,26 @@ static inline void post_schedule(struct rq *rq)
 asmlinkage void schedule_tail(struct task_struct *prev)
        __releases(rq->lock)
 {
-        struct rq *rq = this_rq();
+        struct rq *rq;
+        
+        preempt_disable();
+        
+        rq = this_rq();
        finish_task_switch(rq, prev);
+        sched_trace_task_switch_to(current);
        /*
         * FIXME: do we need to worry about rq being invalidated by the
         * task_switch?
         */
        post_schedule(rq);
+        if (sched_state_validate_switch())
+                litmus_reschedule_local();
+        preempt_enable();
 #ifdef __ARCH_WANT_UNLOCKED_CTXSW
        /* In this case, finish_task_switch does not reenable preemption */
        preempt_enable();
@@ -4108,18 +4170,26 @@ void scheduler_tick(void)
        sched_clock_tick();
+        TS_TICK_START(current);
        raw_spin_lock(&rq->lock);
        update_rq_clock(rq);
        update_cpu_load_active(rq);
        curr->sched_class->task_tick(rq, curr, 0);
+        /* litmus_tick may force current to resched */
+        litmus_tick(rq, curr);
        raw_spin_unlock(&rq->lock);
        perf_event_task_tick();
 #ifdef CONFIG_SMP
        rq->idle_at_tick = idle_cpu(cpu);
-        trigger_load_balance(rq, cpu);
+        if (!is_realtime(current))
+                trigger_load_balance(rq, cpu);
 #endif
+        TS_TICK_END(current);
 }
 notrace unsigned long get_parent_ip(unsigned long addr)
@@ -4239,12 +4309,20 @@ pick_next_task(struct rq *rq)
        /*
         * Optimization: we know that if all tasks are in
         * the fair class we can call that function directly:
-         */
-        if (likely(rq->nr_running == rq->cfs.nr_running)) {
+         * NOT IN LITMUS^RT!
+         * This breaks many assumptions in the plugins.
+         * Do not uncomment without thinking long and hard
+         * about how this affects global plugins such as GSN-EDF.
+        if (rq->nr_running == rq->cfs.nr_running) {
+                TRACE("taking shortcut in pick_next_task()\n");
                p = fair_sched_class.pick_next_task(rq);
                if (likely(p))
                        return p;
        }
+        */
        for_each_class(class) {
                p = class->pick_next_task(rq);
@@ -4267,11 +4345,19 @@ static void __sched __schedule(void)
 need_resched:
        preempt_disable();
+        sched_state_entered_schedule();
        cpu = smp_processor_id();
        rq = cpu_rq(cpu);
        rcu_note_context_switch(cpu);
        prev = rq->curr;
+        /* LITMUS^RT: quickly re-evaluate the scheduling decision
+         * if the previous one is no longer valid after CTX.
+         */
+litmus_need_resched_nonpreemptible:
+        TS_SCHED_START;
+        sched_trace_task_switch_away(prev);
        schedule_debug(prev);
        if (sched_feat(HRTICK))
@@ -4318,7 +4404,10 @@ need_resched:
                rq->curr = next;
                ++*switch_count;
+                TS_SCHED_END(next);
+                TS_CXS_START(next);
                context_switch(rq, prev, next); /* unlocks the rq */
+                TS_CXS_END(current);
                /*
                 * The context switch have flipped the stack from under us
                 * and restored the local variables which were saved when
@@ -4327,14 +4416,29 @@ need_resched:
                 */
                cpu = smp_processor_id();
                rq = cpu_rq(cpu);
-        } else
+        } else {
+                TS_SCHED_END(prev);
                raw_spin_unlock_irq(&rq->lock);
+        }
+        TS_SCHED2_START(prev);
+        sched_trace_task_switch_to(current);
        post_schedule(rq);
+        if (sched_state_validate_switch()) {
+                TS_SCHED2_END(prev);
+                goto litmus_need_resched_nonpreemptible;
+        }
        preempt_enable_no_resched();
+        TS_SCHED2_END(prev);
        if (need_resched())
                goto need_resched;
+        srp_ceiling_block();
 }
 static inline void sched_submit_work(struct task_struct *tsk)
@@ -5056,7 +5160,9 @@ __setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio)
        p->normal_prio = normal_prio(p);
        /* we are holding p->pi_lock already */
        p->prio = rt_mutex_getprio(p);
-        if (rt_prio(p->prio))
+        if (p->policy == SCHED_LITMUS)
+                p->sched_class = &litmus_sched_class;
+        else if (rt_prio(p->prio))
                p->sched_class = &rt_sched_class;
        else
                p->sched_class = &fair_sched_class;
@@ -5104,7 +5210,7 @@ recheck:
                if (policy != SCHED_FIFO && policy != SCHED_RR &&
                                policy != SCHED_NORMAL && policy != SCHED_BATCH &&
-                                policy != SCHED_IDLE)
+                                policy != SCHED_IDLE && policy != SCHED_LITMUS)
                        return -EINVAL;
        }
@@ -5119,6 +5225,8 @@ recheck:
                return -EINVAL;
        if (rt_policy(policy) != (param->sched_priority != 0))
                return -EINVAL;
+        if (policy == SCHED_LITMUS && policy == p->policy)
+                return -EINVAL;
        /*
         * Allow unprivileged RT tasks to decrease priority:
@@ -5162,6 +5270,12 @@ recheck:
                        return retval;
        }
+        if (policy == SCHED_LITMUS) {
+                retval = litmus_admit_task(p);
+                if (retval)
+                        return retval;
+        }
        /*
         * make sure no PI-waiters arrive (or leave) while we are
         * changing the priority of the task:
@@ -5220,10 +5334,19 @@ recheck:
        p->sched_reset_on_fork = reset_on_fork;
+        if (p->policy == SCHED_LITMUS)
+                litmus_exit_task(p);
        oldprio = p->prio;
        prev_class = p->sched_class;
        __setscheduler(rq, p, policy, param->sched_priority);
+        if (policy == SCHED_LITMUS) {
+                p->rt_param.stack_in_use = running ? rq->cpu : NO_CPU;
+                p->rt_param.present = running;
+                litmus->task_new(p, on_rq, running);
+        }
        if (running)
                p->sched_class->set_curr_task(rq);
        if (on_rq)
@@ -5391,10 +5514,11 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
        rcu_read_lock();
        p = find_process_by_pid(pid);
-        if (!p) {
+        /* Don't set affinity if task not found and for LITMUS tasks */
+        if (!p || is_realtime(p)) {
                rcu_read_unlock();
                put_online_cpus();
-                return -ESRCH;
+                return p ? -EPERM : -ESRCH;
        }
        /* Prevent p going away */

diff --git a/kernel/sched.c b/kernel/sched.c index f6cf5cbc64b..a1bf2646d12 100644 --- a/kernel/sched.c +++ b/kernel/sched.c
@@ -87,6 +87,11 @@
87	#define CREATE_TRACE_POINTS	87	#define CREATE_TRACE_POINTS
88	#include <trace/events/sched.h>	88	#include <trace/events/sched.h>
89		89
		90	#include <litmus/sched_trace.h>
		91	#include <litmus/trace.h>
		92
		93	static void litmus_tick(struct rq, struct task_struct);
		94
90	/*	95	/*
91	* Convert user-nice values [ -20 ... 0 ... 19 ]	96	* Convert user-nice values [ -20 ... 0 ... 19 ]
92	* to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ],	97	* to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ],
@@ -414,6 +419,12 @@ struct rt_rq {
414	#endif	419	#endif
415	};	420	};
416		421
		422	/* Litmus related fields in a runqueue */
		423	struct litmus_rq {
		424	unsigned long nr_running;
		425	struct task_struct *prev;
		426	};
		427
417	#ifdef CONFIG_SMP	428	#ifdef CONFIG_SMP
418		429
419	/*	430	/*
@@ -479,6 +490,7 @@ struct rq {
479		490
480	struct cfs_rq cfs;	491	struct cfs_rq cfs;
481	struct rt_rq rt;	492	struct rt_rq rt;
		493	struct litmus_rq litmus;
482		494
483	#ifdef CONFIG_FAIR_GROUP_SCHED	495	#ifdef CONFIG_FAIR_GROUP_SCHED
484	/* list of leaf cfs_rq on this cpu: */	496	/* list of leaf cfs_rq on this cpu: */
@@ -1054,6 +1066,7 @@ static enum hrtimer_restart hrtick(struct hrtimer *timer)
1054	raw_spin_lock(&rq->lock);	1066	raw_spin_lock(&rq->lock);
1055	update_rq_clock(rq);	1067	update_rq_clock(rq);
1056	rq->curr->sched_class->task_tick(rq, rq->curr, 1);	1068	rq->curr->sched_class->task_tick(rq, rq->curr, 1);
		1069	litmus_tick(rq, rq->curr);
1057	raw_spin_unlock(&rq->lock);	1070	raw_spin_unlock(&rq->lock);
1058		1071
1059	return HRTIMER_NORESTART;	1072	return HRTIMER_NORESTART;
@@ -1750,7 +1763,7 @@ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
1750		1763
1751	static const struct sched_class rt_sched_class;	1764	static const struct sched_class rt_sched_class;
1752		1765
1753	#define sched_class_highest (&stop_sched_class)	1766	#define sched_class_highest (&litmus_sched_class)
1754	#define for_each_class(class) \	1767	#define for_each_class(class) \
1755	for (class = sched_class_highest; class; class = class->next)	1768	for (class = sched_class_highest; class; class = class->next)
1756		1769
@@ -2044,6 +2057,7 @@ static int irqtime_account_si_update(void)
2044	#include "sched_rt.c"	2057	#include "sched_rt.c"
2045	#include "sched_autogroup.c"	2058	#include "sched_autogroup.c"
2046	#include "sched_stoptask.c"	2059	#include "sched_stoptask.c"
		2060	#include "../litmus/sched_litmus.c"
2047	#ifdef CONFIG_SCHED_DEBUG	2061	#ifdef CONFIG_SCHED_DEBUG
2048	# include "sched_debug.c"	2062	# include "sched_debug.c"
2049	#endif	2063	#endif
@@ -2166,6 +2180,10 @@ static void check_preempt_curr(struct rq rq, struct task_struct p, int flags)
2166	* A queue event has occurred, and we're going to schedule. In	2180	* A queue event has occurred, and we're going to schedule. In
2167	* this case, we can save a useless back to back clock update.	2181	* this case, we can save a useless back to back clock update.
2168	*/	2182	*/
		2183	/* LITMUS^RT:
		2184	* The "disable-clock-update" approach was buggy in Linux 2.6.36.
		2185	* The issue has been solved in 2.6.37.
		2186	*/
2169	if (rq->curr->on_rq && test_tsk_need_resched(rq->curr))	2187	if (rq->curr->on_rq && test_tsk_need_resched(rq->curr))
2170	rq->skip_clock_update = 1;	2188	rq->skip_clock_update = 1;
2171	}	2189	}
@@ -2592,8 +2610,12 @@ void scheduler_ipi(void)
2592	struct rq *rq = this_rq();	2610	struct rq *rq = this_rq();
2593	struct task_struct *list = xchg(&rq->wake_list, NULL);	2611	struct task_struct *list = xchg(&rq->wake_list, NULL);
2594		2612
2595	if (!list)	2613	if (!list) {
		2614	/* If we don't call irq_enter(), we need to trigger the IRQ
		2615	* tracing manually. */
		2616	ft_irq_fired();
2596	return;	2617	return;
		2618	}
2597		2619
2598	/*	2620	/*
2599	* Not all reschedule IPI handlers call irq_enter/irq_exit, since	2621	* Not all reschedule IPI handlers call irq_enter/irq_exit, since
@@ -2656,7 +2678,12 @@ static void ttwu_queue(struct task_struct *p, int cpu)
2656	struct rq *rq = cpu_rq(cpu);	2678	struct rq *rq = cpu_rq(cpu);
2657		2679
2658	#if defined(CONFIG_SMP)	2680	#if defined(CONFIG_SMP)
2659	if (sched_feat(TTWU_QUEUE) && cpu != smp_processor_id()) {	2681	/*
		2682	* LITMUS^RT: whether to send an IPI to the remote CPU
		2683	* is plugin specific.
		2684	*/
		2685	if (!is_realtime(p) &&
		2686	sched_feat(TTWU_QUEUE) && cpu != smp_processor_id()) {
2660	sched_clock_cpu(cpu); /* sync clocks x-cpu */	2687	sched_clock_cpu(cpu); /* sync clocks x-cpu */
2661	ttwu_queue_remote(p, cpu);	2688	ttwu_queue_remote(p, cpu);
2662	return;	2689	return;
@@ -2689,6 +2716,9 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
2689	unsigned long flags;	2716	unsigned long flags;
2690	int cpu, success = 0;	2717	int cpu, success = 0;
2691		2718
		2719	if (is_realtime(p))
		2720	TRACE_TASK(p, "try_to_wake_up() state:%d\n", p->state);
		2721
2692	smp_wmb();	2722	smp_wmb();
2693	raw_spin_lock_irqsave(&p->pi_lock, flags);	2723	raw_spin_lock_irqsave(&p->pi_lock, flags);
2694	if (!(p->state & state))	2724	if (!(p->state & state))
@@ -2725,6 +2755,12 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
2725	*/	2755	*/
2726	smp_rmb();	2756	smp_rmb();
2727		2757
		2758	/* LITMUS^RT: once the task can be safely referenced by this
		2759	* CPU, don't mess up with Linux load balancing stuff.
		2760	*/
		2761	if (is_realtime(p))
		2762	goto litmus_out_activate;
		2763
2728	p->sched_contributes_to_load = !!task_contributes_to_load(p);	2764	p->sched_contributes_to_load = !!task_contributes_to_load(p);
2729	p->state = TASK_WAKING;	2765	p->state = TASK_WAKING;
2730		2766
@@ -2736,12 +2772,16 @@ try_to_wake_up(struct task_struct *p, unsigned int state, int wake_flags)
2736	wake_flags \|= WF_MIGRATED;	2772	wake_flags \|= WF_MIGRATED;
2737	set_task_cpu(p, cpu);	2773	set_task_cpu(p, cpu);
2738	}	2774	}
		2775
		2776	litmus_out_activate:
2739	#endif /* CONFIG_SMP */	2777	#endif /* CONFIG_SMP */
2740		2778
2741	ttwu_queue(p, cpu);	2779	ttwu_queue(p, cpu);
2742	stat:	2780	stat:
2743	ttwu_stat(p, cpu, wake_flags);	2781	ttwu_stat(p, cpu, wake_flags);
2744	out:	2782	out:
		2783	if (is_realtime(p))
		2784	TRACE_TASK(p, "try_to_wake_up() done state:%d\n", p->state);
2745	raw_spin_unlock_irqrestore(&p->pi_lock, flags);	2785	raw_spin_unlock_irqrestore(&p->pi_lock, flags);
2746		2786
2747	return success;	2787	return success;
@@ -2852,7 +2892,8 @@ void sched_fork(struct task_struct *p)
2852	* Revert to default priority/policy on fork if requested.	2892	* Revert to default priority/policy on fork if requested.
2853	*/	2893	*/
2854	if (unlikely(p->sched_reset_on_fork)) {	2894	if (unlikely(p->sched_reset_on_fork)) {
2855	if (p->policy == SCHED_FIFO \|\| p->policy == SCHED_RR) {	2895	if (p->policy == SCHED_FIFO \|\| p->policy == SCHED_RR \|\|
		2896	p->policy == SCHED_LITMUS) {
2856	p->policy = SCHED_NORMAL;	2897	p->policy = SCHED_NORMAL;
2857	p->normal_prio = p->static_prio;	2898	p->normal_prio = p->static_prio;
2858	}	2899	}
@@ -3063,6 +3104,8 @@ static void finish_task_switch(struct rq rq, struct task_struct prev)
3063	*/	3104	*/
3064	prev_state = prev->state;	3105	prev_state = prev->state;
3065	finish_arch_switch(prev);	3106	finish_arch_switch(prev);
		3107	litmus->finish_switch(prev);
		3108	prev->rt_param.stack_in_use = NO_CPU;
3066	#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW	3109	#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
3067	local_irq_disable();	3110	local_irq_disable();
3068	#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */	3111	#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
@@ -3092,6 +3135,15 @@ static inline void pre_schedule(struct rq rq, struct task_struct prev)
3092	{	3135	{
3093	if (prev->sched_class->pre_schedule)	3136	if (prev->sched_class->pre_schedule)
3094	prev->sched_class->pre_schedule(rq, prev);	3137	prev->sched_class->pre_schedule(rq, prev);
		3138
		3139	/* LITMUS^RT not very clean hack: we need to save the prev task
		3140	* as our scheduling decision rely on it (as we drop the rq lock
		3141	* something in prev can change...); there is no way to escape
		3142	* this ack apart from modifying pick_nex_task(rq, _prev_) or
		3143	* falling back on the previous solution of decoupling
		3144	* scheduling decisions
		3145	*/
		3146	rq->litmus.prev = prev;
3095	}	3147	}
3096		3148
3097	/* rq->lock is NOT held, but preemption is disabled */	3149	/* rq->lock is NOT held, but preemption is disabled */
@@ -3128,16 +3180,26 @@ static inline void post_schedule(struct rq *rq)
3128	asmlinkage void schedule_tail(struct task_struct *prev)	3180	asmlinkage void schedule_tail(struct task_struct *prev)
3129	__releases(rq->lock)	3181	__releases(rq->lock)
3130	{	3182	{
3131	struct rq *rq = this_rq();	3183	struct rq *rq;
3132		3184
		3185	preempt_disable();
		3186
		3187	rq = this_rq();
3133	finish_task_switch(rq, prev);	3188	finish_task_switch(rq, prev);
3134		3189
		3190	sched_trace_task_switch_to(current);
		3191
3135	/*	3192	/*
3136	* FIXME: do we need to worry about rq being invalidated by the	3193	* FIXME: do we need to worry about rq being invalidated by the
3137	* task_switch?	3194	* task_switch?
3138	*/	3195	*/
3139	post_schedule(rq);	3196	post_schedule(rq);
3140		3197
		3198	if (sched_state_validate_switch())
		3199	litmus_reschedule_local();
		3200
		3201	preempt_enable();
		3202
3141	#ifdef __ARCH_WANT_UNLOCKED_CTXSW	3203	#ifdef __ARCH_WANT_UNLOCKED_CTXSW
3142	/* In this case, finish_task_switch does not reenable preemption */	3204	/* In this case, finish_task_switch does not reenable preemption */
3143	preempt_enable();	3205	preempt_enable();
@@ -4108,18 +4170,26 @@ void scheduler_tick(void)
4108		4170
4109	sched_clock_tick();	4171	sched_clock_tick();
4110		4172
		4173	TS_TICK_START(current);
		4174
4111	raw_spin_lock(&rq->lock);	4175	raw_spin_lock(&rq->lock);
4112	update_rq_clock(rq);	4176	update_rq_clock(rq);
4113	update_cpu_load_active(rq);	4177	update_cpu_load_active(rq);
4114	curr->sched_class->task_tick(rq, curr, 0);	4178	curr->sched_class->task_tick(rq, curr, 0);
		4179
		4180	/* litmus_tick may force current to resched */
		4181	litmus_tick(rq, curr);
		4182
4115	raw_spin_unlock(&rq->lock);	4183	raw_spin_unlock(&rq->lock);
4116		4184
4117	perf_event_task_tick();	4185	perf_event_task_tick();
4118		4186
4119	#ifdef CONFIG_SMP	4187	#ifdef CONFIG_SMP
4120	rq->idle_at_tick = idle_cpu(cpu);	4188	rq->idle_at_tick = idle_cpu(cpu);
4121	trigger_load_balance(rq, cpu);	4189	if (!is_realtime(current))
		4190	trigger_load_balance(rq, cpu);
4122	#endif	4191	#endif
		4192	TS_TICK_END(current);
4123	}	4193	}
4124		4194
4125	notrace unsigned long get_parent_ip(unsigned long addr)	4195	notrace unsigned long get_parent_ip(unsigned long addr)
@@ -4239,12 +4309,20 @@ pick_next_task(struct rq *rq)
4239	/*	4309	/*
4240	* Optimization: we know that if all tasks are in	4310	* Optimization: we know that if all tasks are in
4241	* the fair class we can call that function directly:	4311	* the fair class we can call that function directly:
4242	*/	4312
4243	if (likely(rq->nr_running == rq->cfs.nr_running)) {	4313	* NOT IN LITMUS^RT!
		4314
		4315	* This breaks many assumptions in the plugins.
		4316	* Do not uncomment without thinking long and hard
		4317	* about how this affects global plugins such as GSN-EDF.
		4318
		4319	if (rq->nr_running == rq->cfs.nr_running) {
		4320	TRACE("taking shortcut in pick_next_task()\n");
4244	p = fair_sched_class.pick_next_task(rq);	4321	p = fair_sched_class.pick_next_task(rq);
4245	if (likely(p))	4322	if (likely(p))
4246	return p;	4323	return p;
4247	}	4324	}
		4325	*/
4248		4326
4249	for_each_class(class) {	4327	for_each_class(class) {
4250	p = class->pick_next_task(rq);	4328	p = class->pick_next_task(rq);
@@ -4267,11 +4345,19 @@ static void __sched __schedule(void)
4267		4345
4268	need_resched:	4346	need_resched:
4269	preempt_disable();	4347	preempt_disable();
		4348	sched_state_entered_schedule();
4270	cpu = smp_processor_id();	4349	cpu = smp_processor_id();
4271	rq = cpu_rq(cpu);	4350	rq = cpu_rq(cpu);
4272	rcu_note_context_switch(cpu);	4351	rcu_note_context_switch(cpu);
4273	prev = rq->curr;	4352	prev = rq->curr;
4274		4353
		4354	/* LITMUS^RT: quickly re-evaluate the scheduling decision
		4355	* if the previous one is no longer valid after CTX.
		4356	*/
		4357	litmus_need_resched_nonpreemptible:
		4358	TS_SCHED_START;
		4359	sched_trace_task_switch_away(prev);
		4360
4275	schedule_debug(prev);	4361	schedule_debug(prev);
4276		4362
4277	if (sched_feat(HRTICK))	4363	if (sched_feat(HRTICK))
@@ -4318,7 +4404,10 @@ need_resched:
4318	rq->curr = next;	4404	rq->curr = next;
4319	++*switch_count;	4405	++*switch_count;
4320		4406
		4407	TS_SCHED_END(next);
		4408	TS_CXS_START(next);
4321	context_switch(rq, prev, next); /* unlocks the rq */	4409	context_switch(rq, prev, next); /* unlocks the rq */
		4410	TS_CXS_END(current);
4322	/*	4411	/*
4323	* The context switch have flipped the stack from under us	4412	* The context switch have flipped the stack from under us
4324	* and restored the local variables which were saved when	4413	* and restored the local variables which were saved when
@@ -4327,14 +4416,29 @@ need_resched:
4327	*/	4416	*/
4328	cpu = smp_processor_id();	4417	cpu = smp_processor_id();
4329	rq = cpu_rq(cpu);	4418	rq = cpu_rq(cpu);
4330	} else	4419	} else {
		4420	TS_SCHED_END(prev);
4331	raw_spin_unlock_irq(&rq->lock);	4421	raw_spin_unlock_irq(&rq->lock);
		4422	}
		4423
		4424	TS_SCHED2_START(prev);
		4425	sched_trace_task_switch_to(current);
4332		4426
4333	post_schedule(rq);	4427	post_schedule(rq);
4334		4428
		4429	if (sched_state_validate_switch()) {
		4430	TS_SCHED2_END(prev);
		4431	goto litmus_need_resched_nonpreemptible;
		4432	}
		4433
4335	preempt_enable_no_resched();	4434	preempt_enable_no_resched();
		4435
		4436	TS_SCHED2_END(prev);
		4437
4336	if (need_resched())	4438	if (need_resched())
4337	goto need_resched;	4439	goto need_resched;
		4440
		4441	srp_ceiling_block();
4338	}	4442	}
4339		4443
4340	static inline void sched_submit_work(struct task_struct *tsk)	4444	static inline void sched_submit_work(struct task_struct *tsk)
@@ -5056,7 +5160,9 @@ __setscheduler(struct rq rq, struct task_struct p, int policy, int prio)
5056	p->normal_prio = normal_prio(p);	5160	p->normal_prio = normal_prio(p);
5057	/* we are holding p->pi_lock already */	5161	/* we are holding p->pi_lock already */
5058	p->prio = rt_mutex_getprio(p);	5162	p->prio = rt_mutex_getprio(p);
5059	if (rt_prio(p->prio))	5163	if (p->policy == SCHED_LITMUS)
		5164	p->sched_class = &litmus_sched_class;
		5165	else if (rt_prio(p->prio))
5060	p->sched_class = &rt_sched_class;	5166	p->sched_class = &rt_sched_class;
5061	else	5167	else
5062	p->sched_class = &fair_sched_class;	5168	p->sched_class = &fair_sched_class;
@@ -5104,7 +5210,7 @@ recheck:
5104		5210
5105	if (policy != SCHED_FIFO && policy != SCHED_RR &&	5211	if (policy != SCHED_FIFO && policy != SCHED_RR &&
5106	policy != SCHED_NORMAL && policy != SCHED_BATCH &&	5212	policy != SCHED_NORMAL && policy != SCHED_BATCH &&
5107	policy != SCHED_IDLE)	5213	policy != SCHED_IDLE && policy != SCHED_LITMUS)
5108	return -EINVAL;	5214	return -EINVAL;
5109	}	5215	}
5110		5216
@@ -5119,6 +5225,8 @@ recheck:
5119	return -EINVAL;	5225	return -EINVAL;
5120	if (rt_policy(policy) != (param->sched_priority != 0))	5226	if (rt_policy(policy) != (param->sched_priority != 0))
5121	return -EINVAL;	5227	return -EINVAL;
		5228	if (policy == SCHED_LITMUS && policy == p->policy)
		5229	return -EINVAL;
5122		5230
5123	/*	5231	/*
5124	* Allow unprivileged RT tasks to decrease priority:	5232	* Allow unprivileged RT tasks to decrease priority:
@@ -5162,6 +5270,12 @@ recheck:
5162	return retval;	5270	return retval;
5163	}	5271	}
5164		5272
		5273	if (policy == SCHED_LITMUS) {
		5274	retval = litmus_admit_task(p);
		5275	if (retval)
		5276	return retval;
		5277	}
		5278
5165	/*	5279	/*
5166	* make sure no PI-waiters arrive (or leave) while we are	5280	* make sure no PI-waiters arrive (or leave) while we are
5167	* changing the priority of the task:	5281	* changing the priority of the task:
@@ -5220,10 +5334,19 @@ recheck:
5220		5334
5221	p->sched_reset_on_fork = reset_on_fork;	5335	p->sched_reset_on_fork = reset_on_fork;
5222		5336
		5337	if (p->policy == SCHED_LITMUS)
		5338	litmus_exit_task(p);
		5339
5223	oldprio = p->prio;	5340	oldprio = p->prio;
5224	prev_class = p->sched_class;	5341	prev_class = p->sched_class;
5225	__setscheduler(rq, p, policy, param->sched_priority);	5342	__setscheduler(rq, p, policy, param->sched_priority);
5226		5343
		5344	if (policy == SCHED_LITMUS) {
		5345	p->rt_param.stack_in_use = running ? rq->cpu : NO_CPU;
		5346	p->rt_param.present = running;
		5347	litmus->task_new(p, on_rq, running);
		5348	}
		5349
5227	if (running)	5350	if (running)
5228	p->sched_class->set_curr_task(rq);	5351	p->sched_class->set_curr_task(rq);
5229	if (on_rq)	5352	if (on_rq)
@@ -5391,10 +5514,11 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
5391	rcu_read_lock();	5514	rcu_read_lock();
5392		5515
5393	p = find_process_by_pid(pid);	5516	p = find_process_by_pid(pid);
5394	if (!p) {	5517	/* Don't set affinity if task not found and for LITMUS tasks */
		5518	if (!p \|\| is_realtime(p)) {
5395	rcu_read_unlock();	5519	rcu_read_unlock();
5396	put_online_cpus();	5520	put_online_cpus();
5397	return -ESRCH;	5521	return p ? -EPERM : -ESRCH;
5398	}	5522	}
5399		5523
5400	/* Prevent p going away */	5524	/* Prevent p going away */