aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorJeremy Erickson <jerickso@cs.unc.edu>2012-08-30 21:01:47 -0400
committerJeremy Erickson <jerickso@cs.unc.edu>2012-08-30 21:01:47 -0400
commitb1e1fea67bca3796d5f9133a92c300ec4fa93a4f (patch)
tree5cc1336e1fe1d6f93b1067e73e43381dd20db690 /kernel
parentf6f94e2ab1b33f0082ac22d71f66385a60d8157f (diff)
Bjoern's Dissertation Code with Priority Donationwip-splitting-omlp-jerickso
Diffstat (limited to 'kernel')
-rw-r--r--kernel/exit.c4
-rw-r--r--kernel/fork.c7
-rw-r--r--kernel/hrtimer.c95
-rw-r--r--kernel/printk.c14
-rw-r--r--kernel/sched.c127
-rw-r--r--kernel/sched_fair.c2
-rw-r--r--kernel/sched_rt.c2
-rw-r--r--kernel/time/tick-sched.c47
8 files changed, 282 insertions, 16 deletions
diff --git a/kernel/exit.c b/kernel/exit.c
index 03120229db28..b9d3bc6c21ec 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -56,6 +56,8 @@
56#include <asm/pgtable.h> 56#include <asm/pgtable.h>
57#include <asm/mmu_context.h> 57#include <asm/mmu_context.h>
58 58
59extern void exit_od_table(struct task_struct *t);
60
59static void exit_mm(struct task_struct * tsk); 61static void exit_mm(struct task_struct * tsk);
60 62
61static void __unhash_process(struct task_struct *p, bool group_dead) 63static void __unhash_process(struct task_struct *p, bool group_dead)
@@ -960,6 +962,8 @@ NORET_TYPE void do_exit(long code)
960 if (unlikely(tsk->audit_context)) 962 if (unlikely(tsk->audit_context))
961 audit_free(tsk); 963 audit_free(tsk);
962 964
965 exit_od_table(tsk);
966
963 tsk->exit_code = code; 967 tsk->exit_code = code;
964 taskstats_exit(tsk, group_dead); 968 taskstats_exit(tsk, group_dead);
965 969
diff --git a/kernel/fork.c b/kernel/fork.c
index c445f8cc408d..ab7f29d906c7 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -75,6 +75,9 @@
75 75
76#include <trace/events/sched.h> 76#include <trace/events/sched.h>
77 77
78#include <litmus/litmus.h>
79#include <litmus/sched_plugin.h>
80
78/* 81/*
79 * Protected counters by write_lock_irq(&tasklist_lock) 82 * Protected counters by write_lock_irq(&tasklist_lock)
80 */ 83 */
@@ -183,6 +186,7 @@ void __put_task_struct(struct task_struct *tsk)
183 WARN_ON(atomic_read(&tsk->usage)); 186 WARN_ON(atomic_read(&tsk->usage));
184 WARN_ON(tsk == current); 187 WARN_ON(tsk == current);
185 188
189 exit_litmus(tsk);
186 exit_creds(tsk); 190 exit_creds(tsk);
187 delayacct_tsk_free(tsk); 191 delayacct_tsk_free(tsk);
188 put_signal_struct(tsk->signal); 192 put_signal_struct(tsk->signal);
@@ -266,6 +270,9 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
266 270
267 tsk->stack = ti; 271 tsk->stack = ti;
268 272
273 /* Don't let the new task be a real-time task. */
274 litmus_fork(tsk);
275
269 err = prop_local_init_single(&tsk->dirties); 276 err = prop_local_init_single(&tsk->dirties);
270 if (err) 277 if (err)
271 goto out; 278 goto out;
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index 72206cf5c6cf..cb49883b64e5 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -46,6 +46,8 @@
46#include <linux/sched.h> 46#include <linux/sched.h>
47#include <linux/timer.h> 47#include <linux/timer.h>
48 48
49#include <litmus/litmus.h>
50
49#include <asm/uaccess.h> 51#include <asm/uaccess.h>
50 52
51#include <trace/events/timer.h> 53#include <trace/events/timer.h>
@@ -1042,6 +1044,98 @@ hrtimer_start(struct hrtimer *timer, ktime_t tim, const enum hrtimer_mode mode)
1042} 1044}
1043EXPORT_SYMBOL_GPL(hrtimer_start); 1045EXPORT_SYMBOL_GPL(hrtimer_start);
1044 1046
1047#ifdef CONFIG_ARCH_HAS_SEND_PULL_TIMERS
1048
1049/**
1050 * hrtimer_start_on_info_init - Initialize hrtimer_start_on_info
1051 */
1052void hrtimer_start_on_info_init(struct hrtimer_start_on_info *info)
1053{
1054 memset(info, 0, sizeof(struct hrtimer_start_on_info));
1055 atomic_set(&info->state, HRTIMER_START_ON_INACTIVE);
1056}
1057
1058/**
1059 * hrtimer_pull - PULL_TIMERS_VECTOR callback on remote cpu
1060 */
1061void hrtimer_pull(void)
1062{
1063 struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases);
1064 struct hrtimer_start_on_info *info;
1065 struct list_head *pos, *safe, list;
1066
1067 raw_spin_lock(&base->lock);
1068 list_replace_init(&base->to_pull, &list);
1069 raw_spin_unlock(&base->lock);
1070
1071 list_for_each_safe(pos, safe, &list) {
1072 info = list_entry(pos, struct hrtimer_start_on_info, list);
1073 TRACE("pulled timer 0x%x\n", info->timer);
1074 list_del(pos);
1075 hrtimer_start(info->timer, info->time, info->mode);
1076 }
1077}
1078
1079/**
1080 * hrtimer_start_on - trigger timer arming on remote cpu
1081 * @cpu: remote cpu
1082 * @info: save timer information for enqueuing on remote cpu
1083 * @timer: timer to be pulled
1084 * @time: expire time
1085 * @mode: timer mode
1086 */
1087int hrtimer_start_on(int cpu, struct hrtimer_start_on_info* info,
1088 struct hrtimer *timer, ktime_t time,
1089 const enum hrtimer_mode mode)
1090{
1091 unsigned long flags;
1092 struct hrtimer_cpu_base* base;
1093 int in_use = 0, was_empty;
1094
1095 /* serialize access to info through the timer base */
1096 lock_hrtimer_base(timer, &flags);
1097
1098 in_use = (atomic_read(&info->state) != HRTIMER_START_ON_INACTIVE);
1099 if (!in_use) {
1100 INIT_LIST_HEAD(&info->list);
1101 info->timer = timer;
1102 info->time = time;
1103 info->mode = mode;
1104 /* mark as in use */
1105 atomic_set(&info->state, HRTIMER_START_ON_QUEUED);
1106 }
1107
1108 unlock_hrtimer_base(timer, &flags);
1109
1110 if (!in_use) {
1111 /* initiate pull */
1112 preempt_disable();
1113 if (cpu == smp_processor_id()) {
1114 /* start timer locally; we may get called
1115 * with rq->lock held, do not wake up anything
1116 */
1117 TRACE("hrtimer_start_on: starting on local CPU\n");
1118 __hrtimer_start_range_ns(info->timer, info->time,
1119 0, info->mode, 0);
1120 } else {
1121 TRACE("hrtimer_start_on: pulling to remote CPU\n");
1122 base = &per_cpu(hrtimer_bases, cpu);
1123 raw_spin_lock_irqsave(&base->lock, flags);
1124 was_empty = list_empty(&base->to_pull);
1125 list_add(&info->list, &base->to_pull);
1126 raw_spin_unlock_irqrestore(&base->lock, flags);
1127 if (was_empty)
1128 /* only send IPI if other no else
1129 * has done so already
1130 */
1131 smp_send_pull_timers(cpu);
1132 }
1133 preempt_enable();
1134 }
1135 return in_use;
1136}
1137
1138#endif
1045 1139
1046/** 1140/**
1047 * hrtimer_try_to_cancel - try to deactivate a timer 1141 * hrtimer_try_to_cancel - try to deactivate a timer
@@ -1634,6 +1728,7 @@ static void __cpuinit init_hrtimers_cpu(int cpu)
1634 cpu_base->clock_base[i].cpu_base = cpu_base; 1728 cpu_base->clock_base[i].cpu_base = cpu_base;
1635 1729
1636 hrtimer_init_hres(cpu_base); 1730 hrtimer_init_hres(cpu_base);
1731 INIT_LIST_HEAD(&cpu_base->to_pull);
1637} 1732}
1638 1733
1639#ifdef CONFIG_HOTPLUG_CPU 1734#ifdef CONFIG_HOTPLUG_CPU
diff --git a/kernel/printk.c b/kernel/printk.c
index 8fe465ac008a..9dc8ea140426 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -74,6 +74,13 @@ int console_printk[4] = {
74}; 74};
75 75
76/* 76/*
77 * divert printk() messages when there is a LITMUS^RT debug listener
78 */
79#include <litmus/litmus.h>
80int trace_override = 0;
81int trace_recurse = 0;
82
83/*
77 * Low level drivers may need that to know if they can schedule in 84 * Low level drivers may need that to know if they can schedule in
78 * their unblank() callback or not. So let's export it. 85 * their unblank() callback or not. So let's export it.
79 */ 86 */
@@ -735,6 +742,9 @@ asmlinkage int vprintk(const char *fmt, va_list args)
735 /* Emit the output into the temporary buffer */ 742 /* Emit the output into the temporary buffer */
736 printed_len += vscnprintf(printk_buf + printed_len, 743 printed_len += vscnprintf(printk_buf + printed_len,
737 sizeof(printk_buf) - printed_len, fmt, args); 744 sizeof(printk_buf) - printed_len, fmt, args);
745 /* if LITMUS^RT tracer is active divert printk() msgs */
746 if (trace_override && !trace_recurse)
747 TRACE("%s", printk_buf);
738 748
739 749
740 p = printk_buf; 750 p = printk_buf;
@@ -804,7 +814,7 @@ asmlinkage int vprintk(const char *fmt, va_list args)
804 * Try to acquire and then immediately release the 814 * Try to acquire and then immediately release the
805 * console semaphore. The release will do all the 815 * console semaphore. The release will do all the
806 * actual magic (print out buffers, wake up klogd, 816 * actual magic (print out buffers, wake up klogd,
807 * etc). 817 * etc).
808 * 818 *
809 * The acquire_console_semaphore_for_printk() function 819 * The acquire_console_semaphore_for_printk() function
810 * will release 'logbuf_lock' regardless of whether it 820 * will release 'logbuf_lock' regardless of whether it
@@ -1067,7 +1077,7 @@ int printk_needs_cpu(int cpu)
1067 1077
1068void wake_up_klogd(void) 1078void wake_up_klogd(void)
1069{ 1079{
1070 if (waitqueue_active(&log_wait)) 1080 if (!trace_override && waitqueue_active(&log_wait))
1071 __raw_get_cpu_var(printk_pending) = 1; 1081 __raw_get_cpu_var(printk_pending) = 1;
1072} 1082}
1073 1083
diff --git a/kernel/sched.c b/kernel/sched.c
index dc85ceb90832..1f5327f8c012 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -79,6 +79,11 @@
79#include "sched_cpupri.h" 79#include "sched_cpupri.h"
80#include "workqueue_sched.h" 80#include "workqueue_sched.h"
81 81
82#include <litmus/sched_trace.h>
83#include <litmus/trace.h>
84
85static void litmus_tick(struct rq*, struct task_struct*);
86
82#define CREATE_TRACE_POINTS 87#define CREATE_TRACE_POINTS
83#include <trace/events/sched.h> 88#include <trace/events/sched.h>
84 89
@@ -405,6 +410,12 @@ struct rt_rq {
405#endif 410#endif
406}; 411};
407 412
413/* Litmus related fields in a runqueue */
414struct litmus_rq {
415 unsigned long nr_running;
416 struct task_struct *prev;
417};
418
408#ifdef CONFIG_SMP 419#ifdef CONFIG_SMP
409 420
410/* 421/*
@@ -471,6 +482,7 @@ struct rq {
471 482
472 struct cfs_rq cfs; 483 struct cfs_rq cfs;
473 struct rt_rq rt; 484 struct rt_rq rt;
485 struct litmus_rq litmus;
474 486
475#ifdef CONFIG_FAIR_GROUP_SCHED 487#ifdef CONFIG_FAIR_GROUP_SCHED
476 /* list of leaf cfs_rq on this cpu: */ 488 /* list of leaf cfs_rq on this cpu: */
@@ -566,8 +578,14 @@ void check_preempt_curr(struct rq *rq, struct task_struct *p, int flags)
566 * A queue event has occurred, and we're going to schedule. In 578 * A queue event has occurred, and we're going to schedule. In
567 * this case, we can save a useless back to back clock update. 579 * this case, we can save a useless back to back clock update.
568 */ 580 */
581 /* LITMUS^RT: turning off the clock update is buggy in Linux 2.6.36;
582 * the scheduler can "forget" to renable the runqueue clock in some
583 * cases. LITMUS^RT amplifies the effects of this problem. Hence, we
584 * turn it off to avoid stalling clocks. */
585 /*
569 if (test_tsk_need_resched(p)) 586 if (test_tsk_need_resched(p))
570 rq->skip_clock_update = 1; 587 rq->skip_clock_update = 1;
588 */
571} 589}
572 590
573static inline int cpu_of(struct rq *rq) 591static inline int cpu_of(struct rq *rq)
@@ -1042,6 +1060,7 @@ static enum hrtimer_restart hrtick(struct hrtimer *timer)
1042 raw_spin_lock(&rq->lock); 1060 raw_spin_lock(&rq->lock);
1043 update_rq_clock(rq); 1061 update_rq_clock(rq);
1044 rq->curr->sched_class->task_tick(rq, rq->curr, 1); 1062 rq->curr->sched_class->task_tick(rq, rq->curr, 1);
1063 litmus_tick(rq, rq->curr);
1045 raw_spin_unlock(&rq->lock); 1064 raw_spin_unlock(&rq->lock);
1046 1065
1047 return HRTIMER_NORESTART; 1066 return HRTIMER_NORESTART;
@@ -1840,7 +1859,7 @@ static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
1840 1859
1841static const struct sched_class rt_sched_class; 1860static const struct sched_class rt_sched_class;
1842 1861
1843#define sched_class_highest (&rt_sched_class) 1862#define sched_class_highest (&litmus_sched_class)
1844#define for_each_class(class) \ 1863#define for_each_class(class) \
1845 for (class = sched_class_highest; class; class = class->next) 1864 for (class = sched_class_highest; class; class = class->next)
1846 1865
@@ -1920,6 +1939,7 @@ static void deactivate_task(struct rq *rq, struct task_struct *p, int flags)
1920#include "sched_idletask.c" 1939#include "sched_idletask.c"
1921#include "sched_fair.c" 1940#include "sched_fair.c"
1922#include "sched_rt.c" 1941#include "sched_rt.c"
1942#include "../litmus/sched_litmus.c"
1923#ifdef CONFIG_SCHED_DEBUG 1943#ifdef CONFIG_SCHED_DEBUG
1924# include "sched_debug.c" 1944# include "sched_debug.c"
1925#endif 1945#endif
@@ -2352,6 +2372,9 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
2352 unsigned long en_flags = ENQUEUE_WAKEUP; 2372 unsigned long en_flags = ENQUEUE_WAKEUP;
2353 struct rq *rq; 2373 struct rq *rq;
2354 2374
2375 if (is_realtime(p))
2376 TRACE_TASK(p, "try_to_wake_up() state:%d\n", p->state);
2377
2355 this_cpu = get_cpu(); 2378 this_cpu = get_cpu();
2356 2379
2357 smp_wmb(); 2380 smp_wmb();
@@ -2366,7 +2389,7 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
2366 orig_cpu = cpu; 2389 orig_cpu = cpu;
2367 2390
2368#ifdef CONFIG_SMP 2391#ifdef CONFIG_SMP
2369 if (unlikely(task_running(rq, p))) 2392 if (unlikely(task_running(rq, p)) || is_realtime(p))
2370 goto out_activate; 2393 goto out_activate;
2371 2394
2372 /* 2395 /*
@@ -2428,6 +2451,8 @@ out_activate:
2428out_running: 2451out_running:
2429 ttwu_post_activation(p, rq, wake_flags, success); 2452 ttwu_post_activation(p, rq, wake_flags, success);
2430out: 2453out:
2454 if (is_realtime(p))
2455 TRACE_TASK(p, "try_to_wake_up() done state:%d\n", p->state);
2431 task_rq_unlock(rq, &flags); 2456 task_rq_unlock(rq, &flags);
2432 put_cpu(); 2457 put_cpu();
2433 2458
@@ -2532,7 +2557,8 @@ void sched_fork(struct task_struct *p, int clone_flags)
2532 * Revert to default priority/policy on fork if requested. 2557 * Revert to default priority/policy on fork if requested.
2533 */ 2558 */
2534 if (unlikely(p->sched_reset_on_fork)) { 2559 if (unlikely(p->sched_reset_on_fork)) {
2535 if (p->policy == SCHED_FIFO || p->policy == SCHED_RR) { 2560 if (p->policy == SCHED_FIFO || p->policy == SCHED_RR ||
2561 p->policy == SCHED_LITMUS) {
2536 p->policy = SCHED_NORMAL; 2562 p->policy = SCHED_NORMAL;
2537 p->normal_prio = p->static_prio; 2563 p->normal_prio = p->static_prio;
2538 } 2564 }
@@ -2748,6 +2774,8 @@ static void finish_task_switch(struct rq *rq, struct task_struct *prev)
2748 */ 2774 */
2749 prev_state = prev->state; 2775 prev_state = prev->state;
2750 finish_arch_switch(prev); 2776 finish_arch_switch(prev);
2777 litmus->finish_switch(prev);
2778 prev->rt_param.stack_in_use = NO_CPU;
2751#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW 2779#ifdef __ARCH_WANT_INTERRUPTS_ON_CTXSW
2752 local_irq_disable(); 2780 local_irq_disable();
2753#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */ 2781#endif /* __ARCH_WANT_INTERRUPTS_ON_CTXSW */
@@ -2777,6 +2805,15 @@ static inline void pre_schedule(struct rq *rq, struct task_struct *prev)
2777{ 2805{
2778 if (prev->sched_class->pre_schedule) 2806 if (prev->sched_class->pre_schedule)
2779 prev->sched_class->pre_schedule(rq, prev); 2807 prev->sched_class->pre_schedule(rq, prev);
2808
2809 /* LITMUS^RT not very clean hack: we need to save the prev task
2810 * as our scheduling decision rely on it (as we drop the rq lock
2811 * something in prev can change...); there is no way to escape
2812 * this ack apart from modifying pick_nex_task(rq, _prev_) or
2813 * falling back on the previous solution of decoupling
2814 * scheduling decisions
2815 */
2816 rq->litmus.prev = prev;
2780} 2817}
2781 2818
2782/* rq->lock is NOT held, but preemption is disabled */ 2819/* rq->lock is NOT held, but preemption is disabled */
@@ -3578,18 +3615,26 @@ void scheduler_tick(void)
3578 3615
3579 sched_clock_tick(); 3616 sched_clock_tick();
3580 3617
3618 TS_TICK_START(current);
3619
3581 raw_spin_lock(&rq->lock); 3620 raw_spin_lock(&rq->lock);
3582 update_rq_clock(rq); 3621 update_rq_clock(rq);
3583 update_cpu_load_active(rq); 3622 update_cpu_load_active(rq);
3584 curr->sched_class->task_tick(rq, curr, 0); 3623 curr->sched_class->task_tick(rq, curr, 0);
3624
3625 /* litmus_tick may force current to resched */
3626 litmus_tick(rq, curr);
3627
3585 raw_spin_unlock(&rq->lock); 3628 raw_spin_unlock(&rq->lock);
3586 3629
3587 perf_event_task_tick(curr); 3630 perf_event_task_tick(curr);
3588 3631
3589#ifdef CONFIG_SMP 3632#ifdef CONFIG_SMP
3590 rq->idle_at_tick = idle_cpu(cpu); 3633 rq->idle_at_tick = idle_cpu(cpu);
3591 trigger_load_balance(rq, cpu); 3634 if (!is_realtime(current))
3635 trigger_load_balance(rq, cpu);
3592#endif 3636#endif
3637 TS_TICK_END(current);
3593} 3638}
3594 3639
3595notrace unsigned long get_parent_ip(unsigned long addr) 3640notrace unsigned long get_parent_ip(unsigned long addr)
@@ -3716,12 +3761,20 @@ pick_next_task(struct rq *rq)
3716 /* 3761 /*
3717 * Optimization: we know that if all tasks are in 3762 * Optimization: we know that if all tasks are in
3718 * the fair class we can call that function directly: 3763 * the fair class we can call that function directly:
3719 */ 3764
3720 if (likely(rq->nr_running == rq->cfs.nr_running)) { 3765 * NOT IN LITMUS^RT!
3766
3767 * This breaks many assumptions in the plugins.
3768 * Do not uncomment without thinking long and hard
3769 * about how this affects global plugins such as GSN-EDF.
3770
3771 if (rq->nr_running == rq->cfs.nr_running) {
3772 TRACE("taking shortcut in pick_next_task()\n");
3721 p = fair_sched_class.pick_next_task(rq); 3773 p = fair_sched_class.pick_next_task(rq);
3722 if (likely(p)) 3774 if (likely(p))
3723 return p; 3775 return p;
3724 } 3776 }
3777 */
3725 3778
3726 class = sched_class_highest; 3779 class = sched_class_highest;
3727 for ( ; ; ) { 3780 for ( ; ; ) {
@@ -3748,6 +3801,7 @@ asmlinkage void __sched schedule(void)
3748 3801
3749need_resched: 3802need_resched:
3750 preempt_disable(); 3803 preempt_disable();
3804 sched_state_entered_schedule();
3751 cpu = smp_processor_id(); 3805 cpu = smp_processor_id();
3752 rq = cpu_rq(cpu); 3806 rq = cpu_rq(cpu);
3753 rcu_note_context_switch(cpu); 3807 rcu_note_context_switch(cpu);
@@ -3755,6 +3809,8 @@ need_resched:
3755 3809
3756 release_kernel_lock(prev); 3810 release_kernel_lock(prev);
3757need_resched_nonpreemptible: 3811need_resched_nonpreemptible:
3812 TS_SCHED_START;
3813 sched_trace_task_switch_away(prev);
3758 3814
3759 schedule_debug(prev); 3815 schedule_debug(prev);
3760 3816
@@ -3803,7 +3859,10 @@ need_resched_nonpreemptible:
3803 rq->curr = next; 3859 rq->curr = next;
3804 ++*switch_count; 3860 ++*switch_count;
3805 3861
3862 TS_SCHED_END(next);
3863 TS_CXS_START(next);
3806 context_switch(rq, prev, next); /* unlocks the rq */ 3864 context_switch(rq, prev, next); /* unlocks the rq */
3865 TS_CXS_END(current);
3807 /* 3866 /*
3808 * The context switch have flipped the stack from under us 3867 * The context switch have flipped the stack from under us
3809 * and restored the local variables which were saved when 3868 * and restored the local variables which were saved when
@@ -3812,17 +3871,23 @@ need_resched_nonpreemptible:
3812 */ 3871 */
3813 cpu = smp_processor_id(); 3872 cpu = smp_processor_id();
3814 rq = cpu_rq(cpu); 3873 rq = cpu_rq(cpu);
3815 } else 3874 } else {
3875 TS_SCHED_END(prev);
3816 raw_spin_unlock_irq(&rq->lock); 3876 raw_spin_unlock_irq(&rq->lock);
3877 }
3878
3879 sched_trace_task_switch_to(current);
3817 3880
3818 post_schedule(rq); 3881 post_schedule(rq);
3819 3882
3820 if (unlikely(reacquire_kernel_lock(prev))) 3883 if (sched_state_validate_switch() || unlikely(reacquire_kernel_lock(prev)))
3821 goto need_resched_nonpreemptible; 3884 goto need_resched_nonpreemptible;
3822 3885
3823 preempt_enable_no_resched(); 3886 preempt_enable_no_resched();
3824 if (need_resched()) 3887 if (need_resched())
3825 goto need_resched; 3888 goto need_resched;
3889
3890 srp_ceiling_block();
3826} 3891}
3827EXPORT_SYMBOL(schedule); 3892EXPORT_SYMBOL(schedule);
3828 3893
@@ -4108,6 +4173,17 @@ void complete_all(struct completion *x)
4108} 4173}
4109EXPORT_SYMBOL(complete_all); 4174EXPORT_SYMBOL(complete_all);
4110 4175
4176void complete_n(struct completion *x, int n)
4177{
4178 unsigned long flags;
4179
4180 spin_lock_irqsave(&x->wait.lock, flags);
4181 x->done += n;
4182 __wake_up_common(&x->wait, TASK_NORMAL, n, 0, NULL);
4183 spin_unlock_irqrestore(&x->wait.lock, flags);
4184}
4185EXPORT_SYMBOL(complete_n);
4186
4111static inline long __sched 4187static inline long __sched
4112do_wait_for_common(struct completion *x, long timeout, int state) 4188do_wait_for_common(struct completion *x, long timeout, int state)
4113{ 4189{
@@ -4550,7 +4626,9 @@ __setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio)
4550 p->normal_prio = normal_prio(p); 4626 p->normal_prio = normal_prio(p);
4551 /* we are holding p->pi_lock already */ 4627 /* we are holding p->pi_lock already */
4552 p->prio = rt_mutex_getprio(p); 4628 p->prio = rt_mutex_getprio(p);
4553 if (rt_prio(p->prio)) 4629 if (p->policy == SCHED_LITMUS)
4630 p->sched_class = &litmus_sched_class;
4631 else if (rt_prio(p->prio))
4554 p->sched_class = &rt_sched_class; 4632 p->sched_class = &rt_sched_class;
4555 else 4633 else
4556 p->sched_class = &fair_sched_class; 4634 p->sched_class = &fair_sched_class;
@@ -4595,7 +4673,7 @@ recheck:
4595 4673
4596 if (policy != SCHED_FIFO && policy != SCHED_RR && 4674 if (policy != SCHED_FIFO && policy != SCHED_RR &&
4597 policy != SCHED_NORMAL && policy != SCHED_BATCH && 4675 policy != SCHED_NORMAL && policy != SCHED_BATCH &&
4598 policy != SCHED_IDLE) 4676 policy != SCHED_IDLE && policy != SCHED_LITMUS)
4599 return -EINVAL; 4677 return -EINVAL;
4600 } 4678 }
4601 4679
@@ -4610,6 +4688,8 @@ recheck:
4610 return -EINVAL; 4688 return -EINVAL;
4611 if (rt_policy(policy) != (param->sched_priority != 0)) 4689 if (rt_policy(policy) != (param->sched_priority != 0))
4612 return -EINVAL; 4690 return -EINVAL;
4691 if (policy == SCHED_LITMUS && policy == p->policy)
4692 return -EINVAL;
4613 4693
4614 /* 4694 /*
4615 * Allow unprivileged RT tasks to decrease priority: 4695 * Allow unprivileged RT tasks to decrease priority:
@@ -4650,6 +4730,12 @@ recheck:
4650 return retval; 4730 return retval;
4651 } 4731 }
4652 4732
4733 if (policy == SCHED_LITMUS) {
4734 retval = litmus_admit_task(p);
4735 if (retval)
4736 return retval;
4737 }
4738
4653 /* 4739 /*
4654 * make sure no PI-waiters arrive (or leave) while we are 4740 * make sure no PI-waiters arrive (or leave) while we are
4655 * changing the priority of the task: 4741 * changing the priority of the task:
@@ -4692,10 +4778,19 @@ recheck:
4692 4778
4693 p->sched_reset_on_fork = reset_on_fork; 4779 p->sched_reset_on_fork = reset_on_fork;
4694 4780
4781 if (p->policy == SCHED_LITMUS)
4782 litmus_exit_task(p);
4783
4695 oldprio = p->prio; 4784 oldprio = p->prio;
4696 prev_class = p->sched_class; 4785 prev_class = p->sched_class;
4697 __setscheduler(rq, p, policy, param->sched_priority); 4786 __setscheduler(rq, p, policy, param->sched_priority);
4698 4787
4788 if (policy == SCHED_LITMUS) {
4789 p->rt_param.stack_in_use = running ? rq->cpu : NO_CPU;
4790 p->rt_param.present = running;
4791 litmus->task_new(p, on_rq, running);
4792 }
4793
4699 if (running) 4794 if (running)
4700 p->sched_class->set_curr_task(rq); 4795 p->sched_class->set_curr_task(rq);
4701 if (on_rq) { 4796 if (on_rq) {
@@ -4755,6 +4850,13 @@ do_sched_setscheduler(pid_t pid, int policy, struct sched_param __user *param)
4755 if (copy_from_user(&lparam, param, sizeof(struct sched_param))) 4850 if (copy_from_user(&lparam, param, sizeof(struct sched_param)))
4756 return -EFAULT; 4851 return -EFAULT;
4757 4852
4853#ifdef CONFIG_LITMUS_LOCKING
4854 /* Hack to allow plugin to call into schedule
4855 * prio to a setscheduler() call. */
4856 if (is_realtime(current))
4857 litmus->pre_setsched(current, policy);
4858#endif
4859
4758 rcu_read_lock(); 4860 rcu_read_lock();
4759 retval = -ESRCH; 4861 retval = -ESRCH;
4760 p = find_process_by_pid(pid); 4862 p = find_process_by_pid(pid);
@@ -4865,10 +4967,11 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
4865 rcu_read_lock(); 4967 rcu_read_lock();
4866 4968
4867 p = find_process_by_pid(pid); 4969 p = find_process_by_pid(pid);
4868 if (!p) { 4970 /* Don't set affinity if task not found and for LITMUS tasks */
4971 if (!p || is_realtime(p)) {
4869 rcu_read_unlock(); 4972 rcu_read_unlock();
4870 put_online_cpus(); 4973 put_online_cpus();
4871 return -ESRCH; 4974 return p ? -EPERM : -ESRCH;
4872 } 4975 }
4873 4976
4874 /* Prevent p going away */ 4977 /* Prevent p going away */
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index db3f674ca49d..e0e8d5ca3c98 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1654,7 +1654,7 @@ static void check_preempt_wakeup(struct rq *rq, struct task_struct *p, int wake_
1654 struct cfs_rq *cfs_rq = task_cfs_rq(curr); 1654 struct cfs_rq *cfs_rq = task_cfs_rq(curr);
1655 int scale = cfs_rq->nr_running >= sched_nr_latency; 1655 int scale = cfs_rq->nr_running >= sched_nr_latency;
1656 1656
1657 if (unlikely(rt_prio(p->prio))) 1657 if (unlikely(rt_prio(p->prio)) || p->policy == SCHED_LITMUS)
1658 goto preempt; 1658 goto preempt;
1659 1659
1660 if (unlikely(p->sched_class != &fair_sched_class)) 1660 if (unlikely(p->sched_class != &fair_sched_class))
diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
index d10c80ebb67a..e40e7fe43170 100644
--- a/kernel/sched_rt.c
+++ b/kernel/sched_rt.c
@@ -1013,7 +1013,7 @@ static void check_preempt_equal_prio(struct rq *rq, struct task_struct *p)
1013 */ 1013 */
1014static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flags) 1014static void check_preempt_curr_rt(struct rq *rq, struct task_struct *p, int flags)
1015{ 1015{
1016 if (p->prio < rq->curr->prio) { 1016 if (p->prio < rq->curr->prio || p->policy == SCHED_LITMUS) {
1017 resched_task(rq->curr); 1017 resched_task(rq->curr);
1018 return; 1018 return;
1019 } 1019 }
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 3e216e01bbd1..bb2d8b7850a3 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -768,12 +768,53 @@ static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer)
768} 768}
769 769
770/** 770/**
771 * tick_set_quanta_type - get the quanta type as a boot option
772 * Default is standard setup with ticks staggered over first
773 * half of tick period.
774 */
775int quanta_type = LINUX_DEFAULT_TICKS;
776static int __init tick_set_quanta_type(char *str)
777{
778 if (strcmp("aligned", str) == 0) {
779 quanta_type = LITMUS_ALIGNED_TICKS;
780 printk(KERN_INFO "LITMUS^RT: setting aligned quanta\n");
781 }
782 else if (strcmp("staggered", str) == 0) {
783 quanta_type = LITMUS_STAGGERED_TICKS;
784 printk(KERN_INFO "LITMUS^RT: setting staggered quanta\n");
785 }
786 return 1;
787}
788__setup("quanta=", tick_set_quanta_type);
789
790u64 cpu_stagger_offset(int cpu)
791{
792 u64 offset = 0;
793 switch (quanta_type) {
794 case LITMUS_ALIGNED_TICKS:
795 offset = 0;
796 break;
797 case LITMUS_STAGGERED_TICKS:
798 offset = ktime_to_ns(tick_period);
799 do_div(offset, num_possible_cpus());
800 offset *= cpu;
801 break;
802 default:
803 offset = ktime_to_ns(tick_period) >> 1;
804 do_div(offset, num_possible_cpus());
805 offset *= cpu;
806 }
807 return offset;
808}
809
810/**
771 * tick_setup_sched_timer - setup the tick emulation timer 811 * tick_setup_sched_timer - setup the tick emulation timer
772 */ 812 */
773void tick_setup_sched_timer(void) 813void tick_setup_sched_timer(void)
774{ 814{
775 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched); 815 struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
776 ktime_t now = ktime_get(); 816 ktime_t now = ktime_get();
817 u64 offset;
777 818
778 /* 819 /*
779 * Emulate tick processing via per-CPU hrtimers: 820 * Emulate tick processing via per-CPU hrtimers:
@@ -784,6 +825,12 @@ void tick_setup_sched_timer(void)
784 /* Get the next period (per cpu) */ 825 /* Get the next period (per cpu) */
785 hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update()); 826 hrtimer_set_expires(&ts->sched_timer, tick_init_jiffy_update());
786 827
828 /* Offset must be set correctly to achieve desired quanta type. */
829 offset = cpu_stagger_offset(smp_processor_id());
830
831 /* Add the correct offset to expiration time */
832 hrtimer_add_expires_ns(&ts->sched_timer, offset);
833
787 for (;;) { 834 for (;;) {
788 hrtimer_forward(&ts->sched_timer, now, tick_period); 835 hrtimer_forward(&ts->sched_timer, now, tick_period);
789 hrtimer_start_expires(&ts->sched_timer, 836 hrtimer_start_expires(&ts->sched_timer,