aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrea Bastoni <bastoni@cs.unc.edu>2010-09-22 17:18:07 -0400
committerAndrea Bastoni <bastoni@cs.unc.edu>2010-09-22 17:18:07 -0400
commit44c034799bb7a4e8090b5460e36c3993d1924006 (patch)
tree4a1299358bf7effd2745c0e14bc49e52c67ac197
parentf49ec60b4d6394cf199837bbe4c10258e5a877c7 (diff)
parent870eaa144c59ee2665b70dcbb9d8649e99e3c0c4 (diff)
Merge branch 'wip-edf-wm' into wip-semi-part
This version of litmus2010 contains the implementation of three semi-partitioned scheduling algorithms: EDF-fm, EDF-WM, and NPS-F. Conflicts: include/litmus/rt_param.h litmus/Makefile
-rw-r--r--include/litmus/rt_domain.h16
-rw-r--r--include/litmus/rt_param.h66
-rw-r--r--litmus/Makefile3
-rw-r--r--litmus/litmus.c7
-rw-r--r--litmus/rt_domain.c40
-rw-r--r--litmus/sched_edf_fm.c6
-rw-r--r--litmus/sched_edf_wm.c664
7 files changed, 780 insertions, 22 deletions
diff --git a/include/litmus/rt_domain.h b/include/litmus/rt_domain.h
index 59e6b54e9281..ac249292e866 100644
--- a/include/litmus/rt_domain.h
+++ b/include/litmus/rt_domain.h
@@ -143,12 +143,26 @@ static inline struct task_struct* take_ready(rt_domain_t* rt)
143static inline void add_release(rt_domain_t* rt, struct task_struct *task) 143static inline void add_release(rt_domain_t* rt, struct task_struct *task)
144{ 144{
145 unsigned long flags; 145 unsigned long flags;
146 /* first we need the write lock for rt_ready_queue */
147 raw_spin_lock_irqsave(&rt->tobe_lock, flags); 146 raw_spin_lock_irqsave(&rt->tobe_lock, flags);
148 __add_release(rt, task); 147 __add_release(rt, task);
149 raw_spin_unlock_irqrestore(&rt->tobe_lock, flags); 148 raw_spin_unlock_irqrestore(&rt->tobe_lock, flags);
150} 149}
151 150
151#ifdef CONFIG_RELEASE_MASTER
152void __add_release_on(rt_domain_t* rt, struct task_struct *task,
153 int target_cpu);
154
155static inline void add_release_on(rt_domain_t* rt,
156 struct task_struct *task,
157 int target_cpu)
158{
159 unsigned long flags;
160 raw_spin_lock_irqsave(&rt->tobe_lock, flags);
161 __add_release_on(rt, task, target_cpu);
162 raw_spin_unlock_irqrestore(&rt->tobe_lock, flags);
163}
164#endif
165
152static inline int __jobs_pending(rt_domain_t* rt) 166static inline int __jobs_pending(rt_domain_t* rt)
153{ 167{
154 return !bheap_empty(&rt->ready_queue); 168 return !bheap_empty(&rt->ready_queue);
diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
index be8f5528ff87..6f43deacb3e1 100644
--- a/include/litmus/rt_param.h
+++ b/include/litmus/rt_param.h
@@ -1,3 +1,5 @@
1#include <linux/threads.h>
2
1/* 3/*
2 * Definition of the scheduler plugin interface. 4 * Definition of the scheduler plugin interface.
3 * 5 *
@@ -34,7 +36,7 @@ typedef enum {
34} budget_policy_t; 36} budget_policy_t;
35 37
36 38
37/* Parameters for EDF-Fm scheduling algorithm. 39/* The parameters for EDF-Fm scheduling algorithm.
38 * Each task may be fixed or migratory. Migratory tasks may 40 * Each task may be fixed or migratory. Migratory tasks may
39 * migrate on 2 (contiguous) CPU only. NR_CPUS_EDF_FM = 2. 41 * migrate on 2 (contiguous) CPU only. NR_CPUS_EDF_FM = 2.
40 */ 42 */
@@ -58,7 +60,7 @@ struct edffm_params {
58 lt_t fraction[2][NR_CPUS_EDF_FM]; 60 lt_t fraction[2][NR_CPUS_EDF_FM];
59}; 61};
60 62
61/* NPS-F semi-part. plugin. 63/* Parameters for NPS-F semi-partitioned scheduling algorithm.
62 * Each (cpu, budget) entry defines the share ('budget' in ns, a % of 64 * Each (cpu, budget) entry defines the share ('budget' in ns, a % of
63 * the slot_length) of the notional processor on the CPU 'cpu'. 65 * the slot_length) of the notional processor on the CPU 'cpu'.
64 * This structure is used by the library - syscall interface in order 66 * This structure is used by the library - syscall interface in order
@@ -69,6 +71,36 @@ struct npsf_budgets {
69 lt_t budget; 71 lt_t budget;
70}; 72};
71 73
74/* The parameters for the EDF-WM semi-partitioned scheduler.
75 * Each task may be split across multiple cpus. Each per-cpu allocation
76 * is called a 'slice'.
77 */
78#define MAX_EDF_WM_SLICES 24
79#define MIN_EDF_WM_SLICE_SIZE 50000 /* .05 millisecond = 50us */
80
81struct edf_wm_slice {
82 /* on which CPU is this slice allocated */
83 unsigned int cpu;
84 /* relative deadline from job release (not from slice release!) */
85 lt_t deadline;
86 /* budget of this slice; must be precisely enforced */
87 lt_t budget;
88 /* offset of this slice relative to the job release */
89 lt_t offset;
90};
91
92/* If a job is not sliced across multiple CPUs, then
93 * count is set to zero and none of the slices is used.
94 * This implies that count == 1 is illegal.
95 */
96struct edf_wm_params {
97 /* enumeration of all slices */
98 struct edf_wm_slice slices[MAX_EDF_WM_SLICES];
99
100 /* how many slices are defined? */
101 unsigned int count;
102};
103
72struct rt_task { 104struct rt_task {
73 lt_t exec_cost; 105 lt_t exec_cost;
74 lt_t period; 106 lt_t period;
@@ -81,11 +113,16 @@ struct rt_task {
81 union { 113 union {
82 /* EDF-Fm; defined in sched_edf_fm.c */ 114 /* EDF-Fm; defined in sched_edf_fm.c */
83 struct edffm_params fm; 115 struct edffm_params fm;
84 /* NPS-F: id for the server (notional processor) that holds 116
117 /* NPS-F; defined in sched_npsf.c
118 * id for the server (notional processor) that holds
85 * this task; the same npfs_id can be assigned to "the same" 119 * this task; the same npfs_id can be assigned to "the same"
86 * server split on different cpus 120 * server split on different cpus
87 * */ 121 */
88 int npsf_id; 122 int npsf_id;
123
124 /* EDF-WM; defined in sched_edf_wm.c */
125 struct edf_wm_params wm;
89 } semi_part; 126 } semi_part;
90}; 127};
91 128
@@ -234,12 +271,23 @@ struct rt_param {
234 271
235 /* runtime info for the semi-part plugins */ 272 /* runtime info for the semi-part plugins */
236 union { 273 union {
274 /* EDF-Fm runtime information
275 * number of jobs handled by this cpu
276 * (to determine next cpu for a migrating task)
277 */
278 unsigned int cpu_job_no[NR_CPUS_EDF_FM];
279
280 /* EDF-WM runtime information */
237 struct { 281 struct {
238 /* EDF-fm: number of jobs handled by this cpu 282 /* at which exec time did the current slice start? */
239 * (to determine next cpu for a migrating task) 283 lt_t exec_time;
240 */ 284 /* when did the job suspend? */
241 unsigned int cpu_job_no[NR_CPUS_EDF_FM]; 285 lt_t suspend_time;
242 } fm; 286 /* cached job parameters */
287 lt_t job_release, job_deadline;
288 /* pointer to the current slice */
289 struct edf_wm_slice* slice;
290 } wm;
243 } semi_part; 291 } semi_part;
244}; 292};
245 293
diff --git a/litmus/Makefile b/litmus/Makefile
index 89c22198a53a..014c6c6ec590 100644
--- a/litmus/Makefile
+++ b/litmus/Makefile
@@ -15,7 +15,8 @@ obj-y = sched_plugin.o litmus.o \
15 sched_gsn_edf.o \ 15 sched_gsn_edf.o \
16 sched_psn_edf.o \ 16 sched_psn_edf.o \
17 sched_npsf.o \ 17 sched_npsf.o \
18 sched_edf_fm.o 18 sched_edf_fm.o \
19 sched_edf_wm.o
19 20
20obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o 21obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o
21obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o 22obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o
diff --git a/litmus/litmus.c b/litmus/litmus.c
index b1c627db694a..2f780222d8e8 100644
--- a/litmus/litmus.c
+++ b/litmus/litmus.c
@@ -115,10 +115,13 @@ asmlinkage long sys_set_rt_task_param(pid_t pid, struct rt_task __user * param)
115 goto out_unlock; 115 goto out_unlock;
116 } 116 }
117 if (tp.budget_policy != NO_ENFORCEMENT && 117 if (tp.budget_policy != NO_ENFORCEMENT &&
118 tp.budget_policy != QUANTUM_ENFORCEMENT) 118 tp.budget_policy != QUANTUM_ENFORCEMENT &&
119 tp.budget_policy != PRECISE_ENFORCEMENT)
119 { 120 {
120 printk(KERN_INFO "litmus: real-time task %d rejected " 121 printk(KERN_INFO "litmus: real-time task %d rejected "
121 "because unsupported budget enforcement policy specified\n", pid); 122 "because unsupported budget enforcement policy "
123 "specified (%d)\n",
124 pid, tp.budget_policy);
122 goto out_unlock; 125 goto out_unlock;
123 } 126 }
124 127
diff --git a/litmus/rt_domain.c b/litmus/rt_domain.c
index 8a3ff706c208..d27af48e0298 100644
--- a/litmus/rt_domain.c
+++ b/litmus/rt_domain.c
@@ -169,7 +169,12 @@ static void reinit_release_heap(struct task_struct* t)
169 * - tobe_lock taken 169 * - tobe_lock taken
170 * - IRQ disabled 170 * - IRQ disabled
171 */ 171 */
172static void arm_release_timer(rt_domain_t *_rt) 172#ifdef CONFIG_RELEASE_MASTER
173#define arm_release_timer(t) arm_release_timer_on((t), NO_CPU)
174static void arm_release_timer_on(rt_domain_t *_rt , int target_cpu)
175#else
176static void arm_release_master(rt_domain_t *_rt)
177#endif
173{ 178{
174 rt_domain_t *rt = _rt; 179 rt_domain_t *rt = _rt;
175 struct list_head list; 180 struct list_head list;
@@ -224,17 +229,21 @@ static void arm_release_timer(rt_domain_t *_rt)
224 * PINNED mode is ok on both local and remote CPU 229 * PINNED mode is ok on both local and remote CPU
225 */ 230 */
226#ifdef CONFIG_RELEASE_MASTER 231#ifdef CONFIG_RELEASE_MASTER
227 if (rt->release_master == NO_CPU) 232 if (rt->release_master == NO_CPU &&
233 target_cpu == NO_CPU)
228#endif 234#endif
229 __hrtimer_start_range_ns(&rh->timer, 235 __hrtimer_start_range_ns(&rh->timer,
230 ns_to_ktime(rh->release_time), 236 ns_to_ktime(rh->release_time),
231 0, HRTIMER_MODE_ABS_PINNED, 0); 237 0, HRTIMER_MODE_ABS_PINNED, 0);
232#ifdef CONFIG_RELEASE_MASTER 238#ifdef CONFIG_RELEASE_MASTER
233 else 239 else
234 hrtimer_start_on(rt->release_master, 240 hrtimer_start_on(
235 &rh->info, &rh->timer, 241 /* target_cpu overrides release master */
236 ns_to_ktime(rh->release_time), 242 (target_cpu != NO_CPU ?
237 HRTIMER_MODE_ABS_PINNED); 243 target_cpu : rt->release_master),
244 &rh->info, &rh->timer,
245 ns_to_ktime(rh->release_time),
246 HRTIMER_MODE_ABS_PINNED);
238#endif 247#endif
239 } else 248 } else
240 TRACE_TASK(t, "0x%p is not my timer\n", &rh->timer); 249 TRACE_TASK(t, "0x%p is not my timer\n", &rh->timer);
@@ -299,6 +308,25 @@ void __merge_ready(rt_domain_t* rt, struct bheap* tasks)
299 rt->check_resched(rt); 308 rt->check_resched(rt);
300} 309}
301 310
311
312#ifdef CONFIG_RELEASE_MASTER
313void __add_release_on(rt_domain_t* rt, struct task_struct *task,
314 int target_cpu)
315{
316 TRACE_TASK(task, "add_release_on(), rel=%llu, target=%d\n",
317 get_release(task), target_cpu);
318 list_add(&tsk_rt(task)->list, &rt->tobe_released);
319 task->rt_param.domain = rt;
320
321 /* start release timer */
322 TS_SCHED2_START(task);
323
324 arm_release_timer_on(rt, target_cpu);
325
326 TS_SCHED2_END(task);
327}
328#endif
329
302/* add_release - add a real-time task to the rt release queue. 330/* add_release - add a real-time task to the rt release queue.
303 * @task: the sleeping task 331 * @task: the sleeping task
304 */ 332 */
diff --git a/litmus/sched_edf_fm.c b/litmus/sched_edf_fm.c
index e14f8588c7a4..38b5a3f97101 100644
--- a/litmus/sched_edf_fm.c
+++ b/litmus/sched_edf_fm.c
@@ -60,8 +60,8 @@ DEFINE_PER_CPU(edffm_domain_t, edffm_domains);
60/* Get job number for current cpu */ 60/* Get job number for current cpu */
61#define cur_cpu_job_no(t) \ 61#define cur_cpu_job_no(t) \
62 ((tsk_rt(t)->task_params.cpu == edffm_params(t).cpus[0]) ? \ 62 ((tsk_rt(t)->task_params.cpu == edffm_params(t).cpus[0]) ? \
63 tsk_rt(t)->semi_part.fm.cpu_job_no[0] : \ 63 tsk_rt(t)->semi_part.cpu_job_no[0] : \
64 tsk_rt(t)->semi_part.fm.cpu_job_no[1]) 64 tsk_rt(t)->semi_part.cpu_job_no[1])
65/* What is the current cpu position in the array? */ 65/* What is the current cpu position in the array? */
66#define edffm_cpu_pos(cpu,t) \ 66#define edffm_cpu_pos(cpu,t) \
67 ((cpu == edffm_params(t).cpus[0]) ? \ 67 ((cpu == edffm_params(t).cpus[0]) ? \
@@ -259,7 +259,7 @@ static void update_job_counter(struct task_struct *t)
259 259
260 /* Which CPU counter should be incremented? */ 260 /* Which CPU counter should be incremented? */
261 cpu_pos = edffm_cpu_pos(t->rt_param.task_params.cpu, t); 261 cpu_pos = edffm_cpu_pos(t->rt_param.task_params.cpu, t);
262 t->rt_param.semi_part.fm.cpu_job_no[cpu_pos]++; 262 t->rt_param.semi_part.cpu_job_no[cpu_pos]++;
263 263
264 TRACE_TASK(t, "job_no = %d, cpu_job_no(pos %d) = %d, cpu %d\n", 264 TRACE_TASK(t, "job_no = %d, cpu_job_no(pos %d) = %d, cpu %d\n",
265 t->rt_param.job_params.job_no, cpu_pos, cur_cpu_job_no(t), 265 t->rt_param.job_params.job_no, cpu_pos, cur_cpu_job_no(t),
diff --git a/litmus/sched_edf_wm.c b/litmus/sched_edf_wm.c
new file mode 100644
index 000000000000..44e05948980c
--- /dev/null
+++ b/litmus/sched_edf_wm.c
@@ -0,0 +1,664 @@
1/* EDF-WM: based on PSN-EDF.
2 */
3
4#include <linux/percpu.h>
5#include <linux/sched.h>
6#include <linux/list.h>
7#include <linux/spinlock.h>
8
9#include <linux/module.h>
10
11#include <litmus/litmus.h>
12#include <litmus/jobs.h>
13#include <litmus/sched_plugin.h>
14#include <litmus/edf_common.h>
15
16typedef struct {
17 rt_domain_t domain;
18 int cpu;
19 struct task_struct* scheduled; /* only RT tasks */
20
21 /* The enforcement timer is used to accurately police
22 * slice budgets. */
23 struct hrtimer enforcement_timer;
24 int timer_armed;
25/*
26 * scheduling lock slock
27 * protects the domain and serializes scheduling decisions
28 */
29#define slock domain.ready_lock
30
31} wm_domain_t;
32
33DEFINE_PER_CPU(wm_domain_t, wm_domains);
34
35#define TRACE_DOM(dom, fmt, args...) \
36 TRACE("(wm_domains[%d]) " fmt, (dom)->cpu, ##args)
37
38
39#define local_domain (&__get_cpu_var(wm_domains))
40#define remote_domain(cpu) (&per_cpu(wm_domains, cpu))
41#define domain_of_task(task) (remote_domain(get_partition(task)))
42#define domain_from_timer(t) (container_of((t), wm_domain_t, enforcement_timer))
43
44static int is_sliced_task(struct task_struct* t)
45{
46 return tsk_rt(t)->task_params.semi_part.wm.count;
47}
48
49static struct edf_wm_slice* get_last_slice(struct task_struct* t)
50{
51 int idx = tsk_rt(t)->task_params.semi_part.wm.count - 1;
52 return tsk_rt(t)->task_params.semi_part.wm.slices + idx;
53}
54
55static void compute_slice_params(struct task_struct* t)
56{
57 struct rt_param* p = tsk_rt(t);
58 /* Here we do a little trick to make the generic EDF code
59 * play well with job slices. We overwrite the job-level
60 * release and deadline fields with the slice-specific values
61 * so that we can enqueue this task in an EDF rt_domain_t
62 * without issue. The actual values are cached in the semi_part.wm
63 * structure. */
64 p->job_params.deadline = p->semi_part.wm.job_release +
65 p->semi_part.wm.slice->deadline;
66 p->job_params.release = p->semi_part.wm.job_release +
67 p->semi_part.wm.slice->offset;
68
69 /* Similarly, we play a trick on the cpu field. */
70 p->task_params.cpu = p->semi_part.wm.slice->cpu;
71
72 /* update the per-slice budget reference */
73 p->semi_part.wm.exec_time = p->job_params.exec_time;
74}
75
76static void complete_sliced_job(struct task_struct* t)
77{
78 struct rt_param* p = tsk_rt(t);
79
80 /* We need to undo our trickery to the
81 * job parameters (see above). */
82 p->job_params.release = p->semi_part.wm.job_release;
83 p->job_params.deadline = p->semi_part.wm.job_deadline;
84
85 /* Ok, now let generic code do the actual work. */
86 prepare_for_next_period(t);
87
88 /* And finally cache the updated parameters. */
89 p->semi_part.wm.job_release = p->job_params.release;
90 p->semi_part.wm.job_deadline = p->job_params.deadline;
91}
92
93static void advance_next_slice(struct task_struct* t, int completion_signaled)
94{
95 int idx;
96 struct rt_param* p = tsk_rt(t);
97
98 /* make sure this is actually a sliced job */
99 BUG_ON(!is_sliced_task(t));
100 BUG_ON(is_queued(t));
101
102 /* determine index of current slice */
103 idx = p->semi_part.wm.slice -
104 p->task_params.semi_part.wm.slices;
105
106 if (completion_signaled)
107 idx = 0;
108 else
109 /* increment and wrap around, if necessary */
110 idx = (idx + 1) % p->task_params.semi_part.wm.count;
111
112 /* point to next slice */
113 p->semi_part.wm.slice =
114 p->task_params.semi_part.wm.slices + idx;
115
116 /* Check if we need to update essential job parameters. */
117 if (!idx) {
118 /* job completion */
119 sched_trace_task_completion(t, !completion_signaled);
120 TRACE_TASK(t, "completed sliced job"
121 "(signaled:%d)\n", completion_signaled);
122 complete_sliced_job(t);
123 }
124
125 /* Update job parameters for new slice. */
126 compute_slice_params(t);
127}
128
129static lt_t slice_exec_time(struct task_struct* t)
130{
131 struct rt_param* p = tsk_rt(t);
132
133 /* Compute how much execution time has been consumed
134 * since last slice advancement. */
135 return p->job_params.exec_time - p->semi_part.wm.exec_time;
136}
137
138static lt_t slice_budget(struct task_struct* t)
139{
140 return tsk_rt(t)->semi_part.wm.slice->budget;
141}
142
143static int slice_budget_exhausted(struct task_struct* t)
144{
145 return slice_exec_time(t) >= slice_budget(t);
146}
147
148/* assumes positive remainder; overflows otherwise */
149static lt_t slice_budget_remaining(struct task_struct* t)
150{
151 return slice_budget(t) - slice_exec_time(t);
152}
153
154/* assumes time_passed does not advance past the last slice */
155static void fast_forward_slices(struct task_struct* t, lt_t time_passed)
156{
157 while (time_passed &&
158 time_passed >= slice_budget_remaining(t)) {
159 /* slice completely exhausted */
160 time_passed -= slice_budget_remaining(t);
161 tsk_rt(t)->job_params.exec_time +=
162 slice_budget_remaining(t);
163 BUG_ON(!slice_budget_exhausted(t));
164 BUG_ON(slice_budget_remaining(t) != 0);
165 advance_next_slice(t, 0);
166 }
167 /* add remainder to exec cost */
168 tsk_rt(t)->job_params.exec_time += time_passed;
169}
170
171/* we assume the lock is being held */
172static void preempt(wm_domain_t *dom)
173{
174 TRACE_DOM(dom, "will be preempted.\n");
175 /* We pass NULL as the task since non-preemptive sections are not
176 * supported in this plugin, so per-task checks are not needed. */
177 preempt_if_preemptable(NULL, dom->cpu);
178}
179
180static enum hrtimer_restart on_enforcement_timeout(struct hrtimer *timer)
181{
182 wm_domain_t *dom = domain_from_timer(timer);
183 unsigned long flags;
184
185 raw_spin_lock_irqsave(&dom->slock, flags);
186 if (likely(dom->timer_armed)) {
187 TRACE_DOM(dom, "enforcement timer fired.\n");
188 dom->timer_armed = 0;
189 preempt(dom);
190 } else
191 TRACE_DOM(dom, "timer fired but not armed???\n");
192 raw_spin_unlock_irqrestore(&dom->slock, flags);
193 return HRTIMER_NORESTART;
194}
195
196static void wm_domain_init(wm_domain_t* dom,
197 check_resched_needed_t check,
198 release_jobs_t release,
199 int cpu)
200{
201 edf_domain_init(&dom->domain, check, release);
202 dom->cpu = cpu;
203 dom->scheduled = NULL;
204 dom->timer_armed = 0;
205 hrtimer_init(&dom->enforcement_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
206 dom->enforcement_timer.function = on_enforcement_timeout;
207}
208
209static void wm_requeue_remote(struct task_struct *t)
210{
211 wm_domain_t *dom = domain_of_task(t);
212
213 set_rt_flags(t, RT_F_RUNNING);
214 if (is_released(t, litmus_clock()))
215 /* acquires necessary lock */
216 add_ready(&dom->domain, t);
217 else
218 /* force timer on remote CPU */
219 add_release_on(&dom->domain, t, get_partition(t));
220}
221
222static void wm_requeue_local(struct task_struct* t, rt_domain_t *edf)
223{
224 if (t->state != TASK_RUNNING)
225 TRACE_TASK(t, "requeue: !TASK_RUNNING\n");
226
227 set_rt_flags(t, RT_F_RUNNING);
228 if (is_released(t, litmus_clock()))
229 __add_ready(edf, t);
230 else
231 add_release(edf, t); /* it has got to wait */
232}
233
234static int wm_check_resched(rt_domain_t *edf)
235{
236 wm_domain_t *dom = container_of(edf, wm_domain_t, domain);
237
238 /* because this is a callback from rt_domain_t we already hold
239 * the necessary lock for the ready queue
240 */
241 if (edf_preemption_needed(edf, dom->scheduled)) {
242 preempt(dom);
243 return 1;
244 } else
245 return 0;
246}
247
248static void regular_job_completion(struct task_struct* t, int forced)
249{
250 sched_trace_task_completion(t, forced);
251 TRACE_TASK(t, "job_completion().\n");
252
253 set_rt_flags(t, RT_F_SLEEP);
254 prepare_for_next_period(t);
255}
256
257static void wm_job_or_slice_completion(struct task_struct* t,
258 int completion_signaled)
259{
260 if (is_sliced_task(t))
261 advance_next_slice(t, completion_signaled);
262 else
263 regular_job_completion(t, !completion_signaled);
264}
265
266static int wm_budget_exhausted(struct task_struct* t)
267{
268 if (is_sliced_task(t))
269 return slice_budget_exhausted(t);
270 else
271 return budget_exhausted(t);
272}
273
274static void wm_tick(struct task_struct *t)
275{
276 wm_domain_t *dom = local_domain;
277
278 /* Check for inconsistency. We don't need the lock for this since
279 * ->scheduled is only changed in schedule, which obviously is not
280 * executing in parallel on this CPU
281 */
282 BUG_ON(is_realtime(t) && t != dom->scheduled);
283
284 if (is_realtime(t) && budget_enforced(t) && wm_budget_exhausted(t)) {
285 set_tsk_need_resched(t);
286 TRACE_DOM(dom, "budget of %d exhausted in tick\n",
287 t->pid);
288 }
289}
290
291static struct task_struct* wm_schedule(struct task_struct * prev)
292{
293 wm_domain_t *dom = local_domain;
294 rt_domain_t *edf = &dom->domain;
295 struct task_struct *next, *migrate = NULL;
296
297 int out_of_time, sleep, preempt,
298 exists, blocks, resched;
299
300 raw_spin_lock(&dom->slock);
301
302 /* Sanity checking:
303 * When a task exits (dead) dom->schedule may be null
304 * and prev _is_ realtime. */
305 BUG_ON(dom->scheduled && dom->scheduled != prev);
306 BUG_ON(dom->scheduled && !is_realtime(prev));
307
308 /* (0) Determine state */
309 exists = dom->scheduled != NULL;
310 blocks = exists && !is_running(dom->scheduled);
311 out_of_time = exists
312 && budget_enforced(dom->scheduled)
313 && wm_budget_exhausted(dom->scheduled);
314 sleep = exists && get_rt_flags(dom->scheduled) == RT_F_SLEEP;
315 preempt = edf_preemption_needed(edf, prev);
316
317 /* If we need to preempt do so.
318 * The following checks set resched to 1 in case of special
319 * circumstances.
320 */
321 resched = preempt;
322
323 /* If a task blocks we have no choice but to reschedule.
324 */
325 if (blocks)
326 resched = 1;
327
328 /* Any task that is preemptable and either exhausts its execution
329 * budget or wants to sleep completes. We may have to reschedule after
330 * this.
331 */
332 if ((out_of_time || sleep) && !blocks) {
333 wm_job_or_slice_completion(dom->scheduled, sleep);
334 resched = 1;
335 }
336
337 /* The final scheduling decision. Do we need to switch for some reason?
338 * Switch if we are in RT mode and have no task or if we need to
339 * resched.
340 */
341 next = NULL;
342 if (resched || !exists) {
343 if (dom->scheduled && !blocks) {
344 if (get_partition(dom->scheduled) == dom->cpu)
345 /* local task */
346 wm_requeue_local(dom->scheduled, edf);
347 else
348 /* not local anymore; wait until we drop the
349 * ready queue lock */
350 migrate = dom->scheduled;
351 }
352 next = __take_ready(edf);
353 } else
354 /* Only override Linux scheduler if we have a real-time task
355 * scheduled that needs to continue. */
356 if (exists)
357 next = prev;
358
359 if (next) {
360 TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
361 set_rt_flags(next, RT_F_RUNNING);
362 } else if (exists) {
363 TRACE("becoming idle at %llu\n", litmus_clock());
364 }
365
366 dom->scheduled = next;
367 raw_spin_unlock(&dom->slock);
368
369 /* check if we need to push the previous task onto another queue */
370 if (migrate) {
371 TRACE_TASK(migrate, "schedule-initiated migration to %d\n",
372 get_partition(migrate));
373 wm_requeue_remote(migrate);
374 }
375
376 return next;
377}
378
379
380/* Prepare a task for running in RT mode
381 */
382static void wm_task_new(struct task_struct * t, int on_rq, int running)
383{
384 wm_domain_t* dom = domain_of_task(t);
385 rt_domain_t* edf = &dom->domain;
386 unsigned long flags;
387
388 TRACE_TASK(t, "edf-wm: task new, cpu = %d\n",
389 t->rt_param.task_params.cpu);
390
391 /* setup job parameters */
392 release_at(t, litmus_clock());
393
394 /* The task should be running in the queue, otherwise signal
395 * code will try to wake it up with fatal consequences.
396 */
397 raw_spin_lock_irqsave(&dom->slock, flags);
398
399 if (is_sliced_task(t)) {
400 /* make sure parameters are initialized consistently */
401 tsk_rt(t)->semi_part.wm.exec_time = 0;
402 tsk_rt(t)->semi_part.wm.job_release = get_release(t);
403 tsk_rt(t)->semi_part.wm.job_deadline = get_deadline(t);
404 tsk_rt(t)->semi_part.wm.slice = tsk_rt(t)->task_params.semi_part.wm.slices;
405 tsk_rt(t)->job_params.exec_time = 0;
406 }
407
408 if (running) {
409 /* there shouldn't be anything else running at the time */
410 BUG_ON(dom->scheduled);
411 dom->scheduled = t;
412 } else {
413 wm_requeue_local(t, edf);
414 /* maybe we have to reschedule */
415 preempt(dom);
416 }
417 raw_spin_unlock_irqrestore(&dom->slock, flags);
418}
419
420static void wm_release_at(struct task_struct *t, lt_t start)
421{
422 struct rt_param* p = tsk_rt(t);
423
424 if (is_sliced_task(t)) {
425 /* simulate wrapping to the first slice */
426 p->semi_part.wm.job_deadline = start;
427 p->semi_part.wm.slice = get_last_slice(t);
428 /* FIXME: creates bogus completion event... */
429 advance_next_slice(t, 0);
430 set_rt_flags(t, RT_F_RUNNING);
431 } else
432 /* generic code handles it */
433 release_at(t, start);
434}
435
436static lt_t wm_earliest_release(struct task_struct *t, lt_t now)
437{
438 lt_t deadline;
439 if (is_sliced_task(t))
440 deadline = tsk_rt(t)->semi_part.wm.job_deadline;
441 else
442 deadline = get_deadline(t);
443 if (lt_before(deadline, now))
444 return now;
445 else
446 return deadline;
447}
448
449static void wm_task_wake_up(struct task_struct *t)
450{
451 unsigned long flags;
452 wm_domain_t* dom = domain_of_task(t);
453 rt_domain_t* edf = &dom->domain;
454 struct rt_param* p = tsk_rt(t);
455 lt_t now, sleep_time;
456 int migrate = 0;
457
458 raw_spin_lock_irqsave(&dom->slock, flags);
459 BUG_ON(is_queued(t));
460
461 now = litmus_clock();
462
463 sleep_time = now - p->semi_part.wm.suspend_time;
464
465 TRACE_TASK(t, "wake_up at %llu after %llu\n", now, sleep_time);
466
467 /* account sleep time as execution time */
468 if (get_exec_time(t) + sleep_time >= get_exec_cost(t)) {
469 /* new sporadic release */
470 wm_release_at(t, wm_earliest_release(t, now));
471 sched_trace_task_release(t);
472 } else if (is_sliced_task(t)) {
473 /* figure out which slice we should be executing on */
474 fast_forward_slices(t, sleep_time);
475 } else {
476 /* simply add to the execution time */
477 tsk_rt(t)->job_params.exec_time += sleep_time;
478 }
479
480
481 /* Only add to ready queue if it is not the currently-scheduled
482 * task. This could be the case if a task was woken up concurrently
483 * on a remote CPU before the executing CPU got around to actually
484 * de-scheduling the task, i.e., wake_up() raced with schedule()
485 * and won.
486 */
487 if (dom->scheduled != t) {
488 if (get_partition(t) == dom->cpu)
489 wm_requeue_local(t, edf);
490 else
491 /* post-pone migration until after unlocking */
492 migrate = 1;
493 }
494
495 raw_spin_unlock_irqrestore(&dom->slock, flags);
496
497 if (migrate) {
498 TRACE_TASK(t, "wake_up-initiated migration to %d\n",
499 get_partition(t));
500 wm_requeue_remote(t);
501 }
502
503 TRACE_TASK(t, "wake up done\n");
504}
505
506static void wm_task_block(struct task_struct *t)
507{
508 wm_domain_t* dom = domain_of_task(t);
509 unsigned long flags;
510 lt_t now = litmus_clock();
511
512 TRACE_TASK(t, "block at %llu, state=%d\n", now, t->state);
513
514 tsk_rt(t)->semi_part.wm.suspend_time = now;
515
516 raw_spin_lock_irqsave(&dom->slock, flags);
517 if (is_queued(t)) {
518 TRACE_TASK(t, "still queued; migration invariant failed?\n");
519 remove(&dom->domain, t);
520 }
521 raw_spin_unlock_irqrestore(&dom->slock, flags);
522
523 BUG_ON(!is_realtime(t));
524}
525
526static void wm_task_exit(struct task_struct * t)
527{
528 unsigned long flags;
529 wm_domain_t* dom = domain_of_task(t);
530 rt_domain_t* edf = &dom->domain;
531
532 raw_spin_lock_irqsave(&dom->slock, flags);
533 if (is_queued(t)) {
534 /* dequeue */
535 remove(edf, t);
536 }
537 if (dom->scheduled == t)
538 dom->scheduled = NULL;
539
540 TRACE_TASK(t, "RIP, now reschedule\n");
541
542 preempt(dom);
543 raw_spin_unlock_irqrestore(&dom->slock, flags);
544}
545
546static long wm_check_params(struct task_struct *t)
547{
548 struct rt_param* p = tsk_rt(t);
549 struct edf_wm_params* wm = &p->task_params.semi_part.wm;
550 int i;
551 lt_t tmp;
552
553 if (!is_sliced_task(t)) {
554 /* regular task; nothing to check */
555 TRACE_TASK(t, "accepted regular (non-sliced) task with "
556 "%d slices\n",
557 wm->count);
558 return 0;
559 }
560
561 /* (1) Either not sliced, or more than 1 slice. */
562 if (wm->count == 1 || wm->count > MAX_EDF_WM_SLICES) {
563 TRACE_TASK(t, "bad number of slices (%u) \n",
564 wm->count);
565 return -EINVAL;
566 }
567
568 /* (2) The partition has to agree with the first slice. */
569 if (get_partition(t) != wm->slices[0].cpu) {
570 TRACE_TASK(t, "partition and first slice CPU differ "
571 "(%d != %d)\n", get_partition(t), wm->slices[0].cpu);
572 return -EINVAL;
573 }
574
575 /* (3) The total budget must agree. */
576 for (i = 0, tmp = 0; i < wm->count; i++)
577 tmp += wm->slices[i].budget;
578 if (get_exec_cost(t) != tmp) {
579 TRACE_TASK(t, "total budget and sum of slice budgets differ\n");
580 return -EINVAL;
581 }
582
583 /* (4) The release of each slice must not precede the previous
584 * deadline. */
585 for (i = 0; i < wm->count - 1; i++)
586 if (wm->slices[i].deadline > wm->slices[i + 1].offset) {
587 TRACE_TASK(t, "slice %d overlaps with slice %d\n",
588 i, i + 1);
589 return -EINVAL;
590 }
591
592 /* (5) The budget of each slice must fit within [offset, deadline] */
593 for (i = 0; i < wm->count; i++)
594 if (lt_before(wm->slices[i].deadline, wm->slices[i].offset) ||
595 wm->slices[i].deadline - wm->slices[i].offset <
596 wm->slices[i].budget) {
597 TRACE_TASK(t, "slice %d is overloaded\n", i);
598 return -EINVAL;
599 }
600
601 /* (6) The budget of each slice must exceed the minimum budget size. */
602 for (i = 0; i < wm->count; i++)
603 if (wm->slices[i].budget < MIN_EDF_WM_SLICE_SIZE) {
604 TRACE_TASK(t, "slice %d is too short\n", i);
605 return -EINVAL;
606 }
607
608 /* (7) The CPU of each slice must be different from the previous CPU. */
609 for (i = 0; i < wm->count - 1; i++)
610 if (wm->slices[i].cpu == wm->slices[i + 1].cpu) {
611 TRACE_TASK(t, "slice %d does not migrate\n", i);
612 return -EINVAL;
613 }
614
615 /* (8) The CPU of each slice must be online. */
616 for (i = 0; i < wm->count; i++)
617 if (!cpu_online(wm->slices[i].cpu)) {
618 TRACE_TASK(t, "slice %d is allocated on offline CPU\n",
619 i);
620 return -EINVAL;
621 }
622
623 TRACE_TASK(t, "accepted sliced task with %d slices\n",
624 wm->count);
625
626 return 0;
627}
628
629static long wm_admit_task(struct task_struct* t)
630{
631 return task_cpu(t) == get_partition(t) ? wm_check_params(t) : -EINVAL;
632}
633
634/* Plugin object */
635static struct sched_plugin edf_wm_plugin __cacheline_aligned_in_smp = {
636 .plugin_name = "EDF-WM",
637 .tick = wm_tick,
638 .task_new = wm_task_new,
639 .complete_job = complete_job,
640 .task_exit = wm_task_exit,
641 .schedule = wm_schedule,
642 .release_at = wm_release_at,
643 .task_wake_up = wm_task_wake_up,
644 .task_block = wm_task_block,
645 .admit_task = wm_admit_task
646};
647
648
649static int __init init_edf_wm(void)
650{
651 int i;
652
653 /* FIXME: breaks with CPU hotplug
654 */
655 for (i = 0; i < num_online_cpus(); i++) {
656 wm_domain_init(remote_domain(i),
657 wm_check_resched,
658 NULL, i);
659 }
660 return register_sched_plugin(&edf_wm_plugin);
661}
662
663module_init(init_edf_wm);
664