aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAndrea Bastoni <bastoni@cs.unc.edu>2009-12-17 21:39:14 -0500
committerAndrea Bastoni <bastoni@cs.unc.edu>2009-12-17 21:39:14 -0500
commitd029e051b75f25bae435b9b37fd5d86893523aa9 (patch)
treefddb1e3b0fb7d85e8210edc5d3982024d138f102
parent54e6e1699c919e9fce53f9c2bd49174387735e39 (diff)
Add GSN-EDF plugin
- insert arm_release_timer() in add_relese() path
-rw-r--r--arch/x86/kernel/smp.c9
-rw-r--r--litmus/Makefile3
-rw-r--r--litmus/rt_domain.c46
-rw-r--r--litmus/sched_gsn_edf.c816
4 files changed, 856 insertions, 18 deletions
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
index ec1de97600e7..337ce0c44f92 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
@@ -22,6 +22,9 @@
22#include <linux/interrupt.h> 22#include <linux/interrupt.h>
23#include <linux/cpu.h> 23#include <linux/cpu.h>
24 24
25#include <litmus/litmus.h>
26#include <litmus/trace.h>
27
25#include <asm/mtrr.h> 28#include <asm/mtrr.h>
26#include <asm/tlbflush.h> 29#include <asm/tlbflush.h>
27#include <asm/mmu_context.h> 30#include <asm/mmu_context.h>
@@ -117,6 +120,7 @@ static void native_smp_send_reschedule(int cpu)
117 WARN_ON(1); 120 WARN_ON(1);
118 return; 121 return;
119 } 122 }
123 TS_SEND_RESCHED_START(cpu);
120 apic->send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR); 124 apic->send_IPI_mask(cpumask_of(cpu), RESCHEDULE_VECTOR);
121} 125}
122 126
@@ -197,7 +201,12 @@ static void native_smp_send_stop(void)
197void smp_reschedule_interrupt(struct pt_regs *regs) 201void smp_reschedule_interrupt(struct pt_regs *regs)
198{ 202{
199 ack_APIC_irq(); 203 ack_APIC_irq();
204 /* LITMUS^RT needs this interrupt to proper reschedule
205 * on this cpu
206 */
207 set_tsk_need_resched(current);
200 inc_irq_stat(irq_resched_count); 208 inc_irq_stat(irq_resched_count);
209 TS_SEND_RESCHED_END;
201 /* 210 /*
202 * KVM uses this interrupt to force a cpu out of guest mode 211 * KVM uses this interrupt to force a cpu out of guest mode
203 */ 212 */
diff --git a/litmus/Makefile b/litmus/Makefile
index 0720c95918af..b15e65ee7f37 100644
--- a/litmus/Makefile
+++ b/litmus/Makefile
@@ -10,7 +10,8 @@ obj-y = sched_plugin.o litmus.o \
10 fdso.o \ 10 fdso.o \
11 srp.o \ 11 srp.o \
12 fmlp.o \ 12 fmlp.o \
13 heap.o 13 heap.o \
14 sched_gsn_edf.o
14 15
15obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o 16obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o
16obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o 17obj-$(CONFIG_SCHED_TASK_TRACE) += sched_task_trace.o
diff --git a/litmus/rt_domain.c b/litmus/rt_domain.c
index 4fa834018efa..78e76421aeba 100644
--- a/litmus/rt_domain.c
+++ b/litmus/rt_domain.c
@@ -159,23 +159,23 @@ static void reinit_release_heap(struct task_struct* t)
159 /* initialize */ 159 /* initialize */
160 heap_init(&rh->heap); 160 heap_init(&rh->heap);
161} 161}
162 162/* arm_release_timer() - start local release timer or trigger
163static void arm_release_timer(unsigned long _rt) 163 * remote timer (pull timer)
164 *
165 * Called by add_release() with:
166 * - tobe_lock taken
167 * - IRQ disabled
168 */
169static void arm_release_timer(rt_domain_t *_rt)
164{ 170{
165 rt_domain_t *rt = (rt_domain_t*) _rt; 171 rt_domain_t *rt = _rt;
166 unsigned long flags;
167 struct list_head list; 172 struct list_head list;
168 struct list_head *pos, *safe; 173 struct list_head *pos, *safe;
169 struct task_struct* t; 174 struct task_struct* t;
170 struct release_heap* rh; 175 struct release_heap* rh;
171 176
172 /* We only have to defend against the ISR since norq callbacks
173 * are serialized.
174 */
175 TRACE("arm_release_timer() at %llu\n", litmus_clock()); 177 TRACE("arm_release_timer() at %llu\n", litmus_clock());
176 spin_lock_irqsave(&rt->tobe_lock, flags);
177 list_replace_init(&rt->tobe_released, &list); 178 list_replace_init(&rt->tobe_released, &list);
178 spin_unlock_irqrestore(&rt->tobe_lock, flags);
179 179
180 list_for_each_safe(pos, safe, &list) { 180 list_for_each_safe(pos, safe, &list) {
181 /* pick task of work list */ 181 /* pick task of work list */
@@ -184,24 +184,29 @@ static void arm_release_timer(unsigned long _rt)
184 list_del(pos); 184 list_del(pos);
185 185
186 /* put into release heap while holding release_lock */ 186 /* put into release heap while holding release_lock */
187 spin_lock_irqsave(&rt->release_lock, flags); 187 spin_lock(&rt->release_lock);
188 TRACE_TASK(t, "I have the release_lock 0x%p\n", &rt->release_lock); 188 TRACE_TASK(t, "I have the release_lock 0x%p\n", &rt->release_lock);
189
189 rh = get_release_heap(rt, t, 0); 190 rh = get_release_heap(rt, t, 0);
190 if (!rh) { 191 if (!rh) {
191 /* need to use our own, but drop lock first */ 192 /* need to use our own, but drop lock first */
192 spin_unlock(&rt->release_lock); 193 spin_unlock(&rt->release_lock);
193 TRACE_TASK(t, "Dropped release_lock 0x%p\n", 194 TRACE_TASK(t, "Dropped release_lock 0x%p\n",
194 &rt->release_lock); 195 &rt->release_lock);
196
195 reinit_release_heap(t); 197 reinit_release_heap(t);
196 TRACE_TASK(t, "release_heap ready\n"); 198 TRACE_TASK(t, "release_heap ready\n");
199
197 spin_lock(&rt->release_lock); 200 spin_lock(&rt->release_lock);
198 TRACE_TASK(t, "Re-acquired release_lock 0x%p\n", 201 TRACE_TASK(t, "Re-acquired release_lock 0x%p\n",
199 &rt->release_lock); 202 &rt->release_lock);
203
200 rh = get_release_heap(rt, t, 1); 204 rh = get_release_heap(rt, t, 1);
201 } 205 }
202 heap_insert(rt->order, &rh->heap, tsk_rt(t)->heap_node); 206 heap_insert(rt->order, &rh->heap, tsk_rt(t)->heap_node);
203 TRACE_TASK(t, "arm_release_timer(): added to release heap\n"); 207 TRACE_TASK(t, "arm_release_timer(): added to release heap\n");
204 spin_unlock_irqrestore(&rt->release_lock, flags); 208
209 spin_unlock(&rt->release_lock);
205 TRACE_TASK(t, "Returned the release_lock 0x%p\n", &rt->release_lock); 210 TRACE_TASK(t, "Returned the release_lock 0x%p\n", &rt->release_lock);
206 211
207 /* To avoid arming the timer multiple times, we only let the 212 /* To avoid arming the timer multiple times, we only let the
@@ -210,9 +215,16 @@ static void arm_release_timer(unsigned long _rt)
210 */ 215 */
211 if (rh == tsk_rt(t)->rel_heap) { 216 if (rh == tsk_rt(t)->rel_heap) {
212 TRACE_TASK(t, "arming timer 0x%p\n", &rh->timer); 217 TRACE_TASK(t, "arming timer 0x%p\n", &rh->timer);
213 hrtimer_start(&rh->timer, 218 /* we cannot arm the timer using hrtimer_start()
214 ns_to_ktime(rh->release_time), 219 * as it may deadlock on rq->lock
215 HRTIMER_MODE_ABS); 220 */
221 /* FIXME now only one cpu without pulling
222 * later more cpus; hrtimer_pull should call
223 * __hrtimer_start... always with PINNED mode
224 */
225 __hrtimer_start_range_ns(&rh->timer,
226 ns_to_ktime(rh->release_time),
227 0, HRTIMER_MODE_ABS_PINNED, 0);
216 } else 228 } else
217 TRACE_TASK(t, "0x%p is not my timer\n", &rh->timer); 229 TRACE_TASK(t, "0x%p is not my timer\n", &rh->timer);
218 } 230 }
@@ -280,8 +292,8 @@ void __add_release(rt_domain_t* rt, struct task_struct *task)
280 TRACE_TASK(task, "add_release(), rel=%llu\n", get_release(task)); 292 TRACE_TASK(task, "add_release(), rel=%llu\n", get_release(task));
281 list_add(&tsk_rt(task)->list, &rt->tobe_released); 293 list_add(&tsk_rt(task)->list, &rt->tobe_released);
282 task->rt_param.domain = rt; 294 task->rt_param.domain = rt;
283 /* XXX arm_release_timer() used to be activated here 295
284 * such that it would be called with the runqueue lock dropped. 296 /* start release timer */
285 */ 297 arm_release_timer(rt);
286} 298}
287 299
diff --git a/litmus/sched_gsn_edf.c b/litmus/sched_gsn_edf.c
new file mode 100644
index 000000000000..69990805e16a
--- /dev/null
+++ b/litmus/sched_gsn_edf.c
@@ -0,0 +1,816 @@
1/*
2 * litmus/sched_gsn_edf.c
3 *
4 * Implementation of the GSN-EDF scheduling algorithm.
5 *
6 * This version uses the simple approach and serializes all scheduling
7 * decisions by the use of a queue lock. This is probably not the
8 * best way to do it, but it should suffice for now.
9 */
10
11#include <linux/spinlock.h>
12#include <linux/percpu.h>
13#include <linux/sched.h>
14
15#include <litmus/litmus.h>
16#include <litmus/jobs.h>
17#include <litmus/sched_plugin.h>
18#include <litmus/edf_common.h>
19#include <litmus/sched_trace.h>
20
21#include <litmus/heap.h>
22
23#include <linux/module.h>
24
25/* Overview of GSN-EDF operations.
26 *
27 * For a detailed explanation of GSN-EDF have a look at the FMLP paper. This
28 * description only covers how the individual operations are implemented in
29 * LITMUS.
30 *
31 * link_task_to_cpu(T, cpu) - Low-level operation to update the linkage
32 * structure (NOT the actually scheduled
33 * task). If there is another linked task To
34 * already it will set To->linked_on = NO_CPU
35 * (thereby removing its association with this
36 * CPU). However, it will not requeue the
37 * previously linked task (if any). It will set
38 * T's state to RT_F_RUNNING and check whether
39 * it is already running somewhere else. If T
40 * is scheduled somewhere else it will link
41 * it to that CPU instead (and pull the linked
42 * task to cpu). T may be NULL.
43 *
44 * unlink(T) - Unlink removes T from all scheduler data
45 * structures. If it is linked to some CPU it
46 * will link NULL to that CPU. If it is
47 * currently queued in the gsnedf queue it will
48 * be removed from the rt_domain. It is safe to
49 * call unlink(T) if T is not linked. T may not
50 * be NULL.
51 *
52 * requeue(T) - Requeue will insert T into the appropriate
53 * queue. If the system is in real-time mode and
54 * the T is released already, it will go into the
55 * ready queue. If the system is not in
56 * real-time mode is T, then T will go into the
57 * release queue. If T's release time is in the
58 * future, it will go into the release
59 * queue. That means that T's release time/job
60 * no/etc. has to be updated before requeu(T) is
61 * called. It is not safe to call requeue(T)
62 * when T is already queued. T may not be NULL.
63 *
64 * gsnedf_job_arrival(T) - This is the catch all function when T enters
65 * the system after either a suspension or at a
66 * job release. It will queue T (which means it
67 * is not safe to call gsnedf_job_arrival(T) if
68 * T is already queued) and then check whether a
69 * preemption is necessary. If a preemption is
70 * necessary it will update the linkage
71 * accordingly and cause scheduled to be called
72 * (either with an IPI or need_resched). It is
73 * safe to call gsnedf_job_arrival(T) if T's
74 * next job has not been actually released yet
75 * (releast time in the future). T will be put
76 * on the release queue in that case.
77 *
78 * job_completion(T) - Take care of everything that needs to be done
79 * to prepare T for its next release and place
80 * it in the right queue with
81 * gsnedf_job_arrival().
82 *
83 *
84 * When we now that T is linked to CPU then link_task_to_cpu(NULL, CPU) is
85 * equivalent to unlink(T). Note that if you unlink a task from a CPU none of
86 * the functions will automatically propagate pending task from the ready queue
87 * to a linked task. This is the job of the calling function ( by means of
88 * __take_ready).
89 */
90
91
92/* cpu_entry_t - maintain the linked and scheduled state
93 */
94typedef struct {
95 int cpu;
96 struct task_struct* linked; /* only RT tasks */
97 struct task_struct* scheduled; /* only RT tasks */
98 atomic_t will_schedule; /* prevent unneeded IPIs */
99 struct heap_node* hn;
100} cpu_entry_t;
101DEFINE_PER_CPU(cpu_entry_t, gsnedf_cpu_entries);
102
103cpu_entry_t* gsnedf_cpus[NR_CPUS];
104
105#define set_will_schedule() \
106 (atomic_set(&__get_cpu_var(gsnedf_cpu_entries).will_schedule, 1))
107#define clear_will_schedule() \
108 (atomic_set(&__get_cpu_var(gsnedf_cpu_entries).will_schedule, 0))
109#define test_will_schedule(cpu) \
110 (atomic_read(&per_cpu(gsnedf_cpu_entries, cpu).will_schedule))
111
112
113/* the cpus queue themselves according to priority in here */
114static struct heap_node gsnedf_heap_node[NR_CPUS];
115static struct heap gsnedf_cpu_heap;
116
117static rt_domain_t gsnedf;
118#define gsnedf_lock (gsnedf.ready_lock)
119
120
121static int cpu_lower_prio(struct heap_node *_a, struct heap_node *_b)
122{
123 cpu_entry_t *a, *b;
124 a = _a->value;
125 b = _b->value;
126 /* Note that a and b are inverted: we want the lowest-priority CPU at
127 * the top of the heap.
128 */
129 return edf_higher_prio(b->linked, a->linked);
130}
131
132/* update_cpu_position - Move the cpu entry to the correct place to maintain
133 * order in the cpu queue. Caller must hold gsnedf lock.
134 */
135static void update_cpu_position(cpu_entry_t *entry)
136{
137 if (likely(heap_node_in_heap(entry->hn)))
138 heap_delete(cpu_lower_prio, &gsnedf_cpu_heap, entry->hn);
139 heap_insert(cpu_lower_prio, &gsnedf_cpu_heap, entry->hn);
140}
141
142/* caller must hold gsnedf lock */
143static cpu_entry_t* lowest_prio_cpu(void)
144{
145 struct heap_node* hn;
146 hn = heap_peek(cpu_lower_prio, &gsnedf_cpu_heap);
147 return hn->value;
148}
149
150
151/* link_task_to_cpu - Update the link of a CPU.
152 * Handles the case where the to-be-linked task is already
153 * scheduled on a different CPU.
154 */
155static noinline void link_task_to_cpu(struct task_struct* linked,
156 cpu_entry_t *entry)
157{
158 cpu_entry_t *sched;
159 struct task_struct* tmp;
160 int on_cpu;
161
162 BUG_ON(linked && !is_realtime(linked));
163
164 /* Currently linked task is set to be unlinked. */
165 if (entry->linked) {
166 entry->linked->rt_param.linked_on = NO_CPU;
167 }
168
169 /* Link new task to CPU. */
170 if (linked) {
171 set_rt_flags(linked, RT_F_RUNNING);
172 /* handle task is already scheduled somewhere! */
173 on_cpu = linked->rt_param.scheduled_on;
174 if (on_cpu != NO_CPU) {
175 sched = &per_cpu(gsnedf_cpu_entries, on_cpu);
176 /* this should only happen if not linked already */
177 BUG_ON(sched->linked == linked);
178
179 /* If we are already scheduled on the CPU to which we
180 * wanted to link, we don't need to do the swap --
181 * we just link ourselves to the CPU and depend on
182 * the caller to get things right.
183 */
184 if (entry != sched) {
185 TRACE_TASK(linked,
186 "already scheduled on %d, updating link.\n",
187 sched->cpu);
188 tmp = sched->linked;
189 linked->rt_param.linked_on = sched->cpu;
190 sched->linked = linked;
191 update_cpu_position(sched);
192 linked = tmp;
193 }
194 }
195 if (linked) /* might be NULL due to swap */
196 linked->rt_param.linked_on = entry->cpu;
197 }
198 entry->linked = linked;
199 if (linked)
200 TRACE_TASK(linked, "linked to %d.\n", entry->cpu);
201 else
202 TRACE("NULL linked to %d.\n", entry->cpu);
203 update_cpu_position(entry);
204}
205
206/* unlink - Make sure a task is not linked any longer to an entry
207 * where it was linked before. Must hold gsnedf_lock.
208 */
209static noinline void unlink(struct task_struct* t)
210{
211 cpu_entry_t *entry;
212
213 if (unlikely(!t)) {
214 TRACE_BUG_ON(!t);
215 return;
216 }
217
218 if (t->rt_param.linked_on != NO_CPU) {
219 /* unlink */
220 entry = &per_cpu(gsnedf_cpu_entries, t->rt_param.linked_on);
221 t->rt_param.linked_on = NO_CPU;
222 link_task_to_cpu(NULL, entry);
223 } else if (is_queued(t)) {
224 /* This is an interesting situation: t is scheduled,
225 * but was just recently unlinked. It cannot be
226 * linked anywhere else (because then it would have
227 * been relinked to this CPU), thus it must be in some
228 * queue. We must remove it from the list in this
229 * case.
230 */
231 remove(&gsnedf, t);
232 }
233}
234
235
236/* preempt - force a CPU to reschedule
237 */
238static noinline void preempt(cpu_entry_t *entry)
239{
240 /* We cannot make the is_np() decision here if it is a remote CPU
241 * because requesting exit_np() requires that we currently use the
242 * address space of the task. Thus, in the remote case we just send
243 * the IPI and let schedule() handle the problem.
244 */
245
246 if (smp_processor_id() == entry->cpu) {
247 if (entry->scheduled && is_np(entry->scheduled))
248 request_exit_np(entry->scheduled);
249 else
250 set_tsk_need_resched(current);
251 } else
252 /* in case that it is a remote CPU we have to defer the
253 * the decision to the remote CPU
254 * FIXME: We could save a few IPI's here if we leave the flag
255 * set when we are waiting for a np_exit().
256 */
257 if (!test_will_schedule(entry->cpu))
258 smp_send_reschedule(entry->cpu);
259}
260
261/* requeue - Put an unlinked task into gsn-edf domain.
262 * Caller must hold gsnedf_lock.
263 */
264static noinline void requeue(struct task_struct* task)
265{
266 BUG_ON(!task);
267 /* sanity check before insertion */
268 BUG_ON(is_queued(task));
269
270 if (is_released(task, litmus_clock()))
271 __add_ready(&gsnedf, task);
272 else {
273 /* it has got to wait */
274 add_release(&gsnedf, task);
275 }
276}
277
278/* check for any necessary preemptions */
279static void check_for_preemptions(void)
280{
281 struct task_struct *task;
282 cpu_entry_t* last;
283
284 for(last = lowest_prio_cpu();
285 edf_preemption_needed(&gsnedf, last->linked);
286 last = lowest_prio_cpu()) {
287 /* preemption necessary */
288 task = __take_ready(&gsnedf);
289 TRACE("check_for_preemptions: attempting to link task %d to %d\n",
290 task->pid, last->cpu);
291 if (last->linked)
292 requeue(last->linked);
293 link_task_to_cpu(task, last);
294 preempt(last);
295 }
296}
297
298/* gsnedf_job_arrival: task is either resumed or released */
299static noinline void gsnedf_job_arrival(struct task_struct* task)
300{
301 BUG_ON(!task);
302
303 requeue(task);
304 check_for_preemptions();
305}
306
307static void gsnedf_release_jobs(rt_domain_t* rt, struct heap* tasks)
308{
309 unsigned long flags;
310
311 spin_lock_irqsave(&gsnedf_lock, flags);
312
313 __merge_ready(rt, tasks);
314 check_for_preemptions();
315
316 spin_unlock_irqrestore(&gsnedf_lock, flags);
317}
318
319/* caller holds gsnedf_lock */
320static noinline void job_completion(struct task_struct *t, int forced)
321{
322 BUG_ON(!t);
323
324 sched_trace_task_completion(t, forced);
325
326 TRACE_TASK(t, "job_completion().\n");
327
328 /* set flags */
329 set_rt_flags(t, RT_F_SLEEP);
330 /* prepare for next period */
331 prepare_for_next_period(t);
332 if (is_released(t, litmus_clock()))
333 sched_trace_task_release(t);
334 /* unlink */
335 unlink(t);
336 /* requeue
337 * But don't requeue a blocking task. */
338 if (is_running(t))
339 gsnedf_job_arrival(t);
340}
341
342/* gsnedf_tick - this function is called for every local timer
343 * interrupt.
344 *
345 * checks whether the current task has expired and checks
346 * whether we need to preempt it if it has not expired
347 */
348static void gsnedf_tick(struct task_struct* t)
349{
350 if (is_realtime(t) && budget_exhausted(t)) {
351 if (!is_np(t)) {
352 /* np tasks will be preempted when they become
353 * preemptable again
354 */
355 set_tsk_need_resched(t);
356 set_will_schedule();
357 TRACE("gsnedf_scheduler_tick: "
358 "%d is preemptable "
359 " => FORCE_RESCHED\n", t->pid);
360 } else {
361 TRACE("gsnedf_scheduler_tick: "
362 "%d is non-preemptable, "
363 "preemption delayed.\n", t->pid);
364 request_exit_np(t);
365 }
366 }
367}
368
369/* Getting schedule() right is a bit tricky. schedule() may not make any
370 * assumptions on the state of the current task since it may be called for a
371 * number of reasons. The reasons include a scheduler_tick() determined that it
372 * was necessary, because sys_exit_np() was called, because some Linux
373 * subsystem determined so, or even (in the worst case) because there is a bug
374 * hidden somewhere. Thus, we must take extreme care to determine what the
375 * current state is.
376 *
377 * The CPU could currently be scheduling a task (or not), be linked (or not).
378 *
379 * The following assertions for the scheduled task could hold:
380 *
381 * - !is_running(scheduled) // the job blocks
382 * - scheduled->timeslice == 0 // the job completed (forcefully)
383 * - get_rt_flag() == RT_F_SLEEP // the job completed (by syscall)
384 * - linked != scheduled // we need to reschedule (for any reason)
385 * - is_np(scheduled) // rescheduling must be delayed,
386 * sys_exit_np must be requested
387 *
388 * Any of these can occur together.
389 */
390static struct task_struct* gsnedf_schedule(struct task_struct * prev)
391{
392 cpu_entry_t* entry = &__get_cpu_var(gsnedf_cpu_entries);
393 int out_of_time, sleep, preempt, np, exists, blocks;
394 struct task_struct* next = NULL;
395
396 spin_lock(&gsnedf_lock);
397 clear_will_schedule();
398
399 /* sanity checking */
400 BUG_ON(entry->scheduled && entry->scheduled != prev);
401 BUG_ON(entry->scheduled && !is_realtime(prev));
402 BUG_ON(is_realtime(prev) && !entry->scheduled);
403
404 /* (0) Determine state */
405 exists = entry->scheduled != NULL;
406 blocks = exists && !is_running(entry->scheduled);
407 out_of_time = exists && budget_exhausted(entry->scheduled);
408 np = exists && is_np(entry->scheduled);
409 sleep = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP;
410 preempt = entry->scheduled != entry->linked;
411
412 TRACE_TASK(prev, "invoked gsnedf_schedule.\n");
413
414 if (exists)
415 TRACE_TASK(prev,
416 "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d "
417 "state:%d sig:%d\n",
418 blocks, out_of_time, np, sleep, preempt,
419 prev->state, signal_pending(prev));
420 if (entry->linked && preempt)
421 TRACE_TASK(prev, "will be preempted by %s/%d\n",
422 entry->linked->comm, entry->linked->pid);
423
424
425 /* If a task blocks we have no choice but to reschedule.
426 */
427 if (blocks)
428 unlink(entry->scheduled);
429
430 /* Request a sys_exit_np() call if we would like to preempt but cannot.
431 * We need to make sure to update the link structure anyway in case
432 * that we are still linked. Multiple calls to request_exit_np() don't
433 * hurt.
434 */
435 if (np && (out_of_time || preempt || sleep)) {
436 unlink(entry->scheduled);
437 request_exit_np(entry->scheduled);
438 }
439
440 /* Any task that is preemptable and either exhausts its execution
441 * budget or wants to sleep completes. We may have to reschedule after
442 * this. Don't do a job completion if we block (can't have timers running
443 * for blocked jobs). Preemption go first for the same reason.
444 */
445 if (!np && (out_of_time || sleep) && !blocks && !preempt)
446 job_completion(entry->scheduled, !sleep);
447
448 /* Link pending task if we became unlinked.
449 */
450 if (!entry->linked)
451 link_task_to_cpu(__take_ready(&gsnedf), entry);
452
453 /* The final scheduling decision. Do we need to switch for some reason?
454 * If linked is different from scheduled, then select linked as next.
455 */
456 if ((!np || blocks) &&
457 entry->linked != entry->scheduled) {
458 /* Schedule a linked job? */
459 if (entry->linked) {
460 entry->linked->rt_param.scheduled_on = entry->cpu;
461 next = entry->linked;
462 }
463 if (entry->scheduled) {
464 /* not gonna be scheduled soon */
465 entry->scheduled->rt_param.scheduled_on = NO_CPU;
466 TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n");
467 }
468 } else
469 /* Only override Linux scheduler if we have a real-time task
470 * scheduled that needs to continue.
471 */
472 if (exists)
473 next = prev;
474
475 spin_unlock(&gsnedf_lock);
476
477 TRACE("gsnedf_lock released, next=0x%p\n", next);
478
479
480 if (next)
481 TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
482 else if (exists && !next)
483 TRACE("becomes idle at %llu.\n", litmus_clock());
484
485
486 return next;
487}
488
489
490/* _finish_switch - we just finished the switch away from prev
491 */
492static void gsnedf_finish_switch(struct task_struct *prev)
493{
494 cpu_entry_t* entry = &__get_cpu_var(gsnedf_cpu_entries);
495
496 entry->scheduled = is_realtime(current) ? current : NULL;
497 TRACE_TASK(prev, "switched away from\n");
498}
499
500
501/* Prepare a task for running in RT mode
502 */
503static void gsnedf_task_new(struct task_struct * t, int on_rq, int running)
504{
505 unsigned long flags;
506 cpu_entry_t* entry;
507
508 TRACE("gsn edf: task new %d\n", t->pid);
509
510 spin_lock_irqsave(&gsnedf_lock, flags);
511
512 /* setup job params */
513 release_at(t, litmus_clock());
514
515 if (running) {
516 entry = &per_cpu(gsnedf_cpu_entries, task_cpu(t));
517 BUG_ON(entry->scheduled);
518 entry->scheduled = t;
519 tsk_rt(t)->scheduled_on = task_cpu(t);
520 } else {
521 t->rt_param.scheduled_on = NO_CPU;
522 }
523 t->rt_param.linked_on = NO_CPU;
524
525 gsnedf_job_arrival(t);
526 spin_unlock_irqrestore(&gsnedf_lock, flags);
527}
528
529static void gsnedf_task_wake_up(struct task_struct *task)
530{
531 unsigned long flags;
532 lt_t now;
533
534 TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
535
536 spin_lock_irqsave(&gsnedf_lock, flags);
537 /* We need to take suspensions because of semaphores into
538 * account! If a job resumes after being suspended due to acquiring
539 * a semaphore, it should never be treated as a new job release.
540 */
541 if (get_rt_flags(task) == RT_F_EXIT_SEM) {
542 set_rt_flags(task, RT_F_RUNNING);
543 } else {
544 now = litmus_clock();
545 if (is_tardy(task, now)) {
546 /* new sporadic release */
547 release_at(task, now);
548 sched_trace_task_release(task);
549 }
550 else {
551 if (task->rt.time_slice) {
552 /* came back in time before deadline
553 */
554 set_rt_flags(task, RT_F_RUNNING);
555 }
556 }
557 }
558 gsnedf_job_arrival(task);
559 spin_unlock_irqrestore(&gsnedf_lock, flags);
560}
561
562static void gsnedf_task_block(struct task_struct *t)
563{
564 unsigned long flags;
565
566 TRACE_TASK(t, "block at %llu\n", litmus_clock());
567
568 /* unlink if necessary */
569 spin_lock_irqsave(&gsnedf_lock, flags);
570 unlink(t);
571 spin_unlock_irqrestore(&gsnedf_lock, flags);
572
573 BUG_ON(!is_realtime(t));
574}
575
576
577static void gsnedf_task_exit(struct task_struct * t)
578{
579 unsigned long flags;
580
581 /* unlink if necessary */
582 spin_lock_irqsave(&gsnedf_lock, flags);
583 unlink(t);
584 if (tsk_rt(t)->scheduled_on != NO_CPU) {
585 gsnedf_cpus[tsk_rt(t)->scheduled_on]->scheduled = NULL;
586 tsk_rt(t)->scheduled_on = NO_CPU;
587 }
588 spin_unlock_irqrestore(&gsnedf_lock, flags);
589
590 BUG_ON(!is_realtime(t));
591 TRACE_TASK(t, "RIP\n");
592}
593
594#ifdef CONFIG_FMLP
595
596/* Update the queue position of a task that got it's priority boosted via
597 * priority inheritance. */
598static void update_queue_position(struct task_struct *holder)
599{
600 /* We don't know whether holder is in the ready queue. It should, but
601 * on a budget overrun it may already be in a release queue. Hence,
602 * calling unlink() is not possible since it assumes that the task is
603 * not in a release queue. However, we can safely check whether
604 * sem->holder is currently in a queue or scheduled after locking both
605 * the release and the ready queue lock. */
606
607 /* Assumption: caller holds gsnedf_lock */
608
609 int check_preempt = 0;
610
611 if (tsk_rt(holder)->linked_on != NO_CPU) {
612 TRACE_TASK(holder, "%s: linked on %d\n",
613 __FUNCTION__, tsk_rt(holder)->linked_on);
614 /* Holder is scheduled; need to re-order CPUs.
615 * We can't use heap_decrease() here since
616 * the cpu_heap is ordered in reverse direction, so
617 * it is actually an increase. */
618 heap_delete(cpu_lower_prio, &gsnedf_cpu_heap,
619 gsnedf_cpus[tsk_rt(holder)->linked_on]->hn);
620 heap_insert(cpu_lower_prio, &gsnedf_cpu_heap,
621 gsnedf_cpus[tsk_rt(holder)->linked_on]->hn);
622 } else {
623 /* holder may be queued: first stop queue changes */
624 spin_lock(&gsnedf.release_lock);
625 if (is_queued(holder)) {
626 TRACE_TASK(holder, "%s: is queued\n",
627 __FUNCTION__);
628 /* We need to update the position
629 * of holder in some heap. Note that this
630 * may be a release heap. */
631 check_preempt =
632 !heap_decrease(edf_ready_order,
633 tsk_rt(holder)->heap_node);
634 } else {
635 /* Nothing to do: if it is not queued and not linked
636 * then it is currently being moved by other code
637 * (e.g., a timer interrupt handler) that will use the
638 * correct priority when enqueuing the task. */
639 TRACE_TASK(holder, "%s: is NOT queued => Done.\n",
640 __FUNCTION__);
641 }
642 spin_unlock(&gsnedf.release_lock);
643
644 /* If holder was enqueued in a release heap, then the following
645 * preemption check is pointless, but we can't easily detect
646 * that case. If you want to fix this, then consider that
647 * simply adding a state flag requires O(n) time to update when
648 * releasing n tasks, which conflicts with the goal to have
649 * O(log n) merges. */
650 if (check_preempt) {
651 /* heap_decrease() hit the top level of the heap: make
652 * sure preemption checks get the right task, not the
653 * potentially stale cache. */
654 heap_uncache_min(edf_ready_order,
655 &gsnedf.ready_queue);
656 check_for_preemptions();
657 }
658 }
659}
660
661static long gsnedf_pi_block(struct pi_semaphore *sem,
662 struct task_struct *new_waiter)
663{
664 /* This callback has to handle the situation where a new waiter is
665 * added to the wait queue of the semaphore.
666 *
667 * We must check if has a higher priority than the currently
668 * highest-priority task, and then potentially reschedule.
669 */
670
671 BUG_ON(!new_waiter);
672
673 if (edf_higher_prio(new_waiter, sem->hp.task)) {
674 TRACE_TASK(new_waiter, " boosts priority via %p\n", sem);
675 /* called with IRQs disabled */
676 spin_lock(&gsnedf_lock);
677 /* store new highest-priority task */
678 sem->hp.task = new_waiter;
679 if (sem->holder) {
680 TRACE_TASK(sem->holder,
681 " holds %p and will inherit from %s/%d\n",
682 sem,
683 new_waiter->comm, new_waiter->pid);
684 /* let holder inherit */
685 sem->holder->rt_param.inh_task = new_waiter;
686 update_queue_position(sem->holder);
687 }
688 spin_unlock(&gsnedf_lock);
689 }
690
691 return 0;
692}
693
694static long gsnedf_inherit_priority(struct pi_semaphore *sem,
695 struct task_struct *new_owner)
696{
697 /* We don't need to acquire the gsnedf_lock since at the time of this
698 * call new_owner isn't actually scheduled yet (it's still sleeping)
699 * and since the calling function already holds sem->wait.lock, which
700 * prevents concurrent sem->hp.task changes.
701 */
702
703 if (sem->hp.task && sem->hp.task != new_owner) {
704 new_owner->rt_param.inh_task = sem->hp.task;
705 TRACE_TASK(new_owner, "inherited priority from %s/%d\n",
706 sem->hp.task->comm, sem->hp.task->pid);
707 } else
708 TRACE_TASK(new_owner,
709 "cannot inherit priority, "
710 "no higher priority job waits.\n");
711 return 0;
712}
713
714/* This function is called on a semaphore release, and assumes that
715 * the current task is also the semaphore holder.
716 */
717static long gsnedf_return_priority(struct pi_semaphore *sem)
718{
719 struct task_struct* t = current;
720 int ret = 0;
721
722 /* Find new highest-priority semaphore task
723 * if holder task is the current hp.task.
724 *
725 * Calling function holds sem->wait.lock.
726 */
727 if (t == sem->hp.task)
728 edf_set_hp_task(sem);
729
730 TRACE_CUR("gsnedf_return_priority for lock %p\n", sem);
731
732 if (t->rt_param.inh_task) {
733 /* interrupts already disabled by PI code */
734 spin_lock(&gsnedf_lock);
735
736 /* Reset inh_task to NULL. */
737 t->rt_param.inh_task = NULL;
738
739 /* Check if rescheduling is necessary */
740 unlink(t);
741 gsnedf_job_arrival(t);
742 spin_unlock(&gsnedf_lock);
743 }
744
745 return ret;
746}
747
748#endif
749
750static long gsnedf_admit_task(struct task_struct* tsk)
751{
752 return 0;
753}
754
755static long gsnedf_activate_plugin(void)
756{
757 int cpu;
758 cpu_entry_t *entry;
759
760 heap_init(&gsnedf_cpu_heap);
761
762 for_each_online_cpu(cpu) {
763 entry = &per_cpu(gsnedf_cpu_entries, cpu);
764 heap_node_init(&entry->hn, entry);
765 atomic_set(&entry->will_schedule, 0);
766 entry->linked = NULL;
767 entry->scheduled = NULL;
768 TRACE("GSN-EDF: Initializing CPU #%d.\n", cpu);
769 update_cpu_position(entry);
770 }
771 return 0;
772}
773
774/* Plugin object */
775static struct sched_plugin gsn_edf_plugin __cacheline_aligned_in_smp = {
776 .plugin_name = "GSN-EDF",
777 .finish_switch = gsnedf_finish_switch,
778 .tick = gsnedf_tick,
779 .task_new = gsnedf_task_new,
780 .complete_job = complete_job,
781 .task_exit = gsnedf_task_exit,
782 .schedule = gsnedf_schedule,
783 .task_wake_up = gsnedf_task_wake_up,
784 .task_block = gsnedf_task_block,
785#ifdef CONFIG_FMLP
786 .fmlp_active = 1,
787 .pi_block = gsnedf_pi_block,
788 .inherit_priority = gsnedf_inherit_priority,
789 .return_priority = gsnedf_return_priority,
790#endif
791 .admit_task = gsnedf_admit_task,
792 .activate_plugin = gsnedf_activate_plugin,
793};
794
795
796static int __init init_gsn_edf(void)
797{
798 int cpu;
799 cpu_entry_t *entry;
800
801 heap_init(&gsnedf_cpu_heap);
802 /* initialize CPU state */
803 for (cpu = 0; cpu < NR_CPUS; cpu++) {
804 entry = &per_cpu(gsnedf_cpu_entries, cpu);
805 gsnedf_cpus[cpu] = entry;
806 atomic_set(&entry->will_schedule, 0);
807 entry->cpu = cpu;
808 entry->hn = &gsnedf_heap_node[cpu];
809 heap_node_init(&entry->hn, entry);
810 }
811 edf_domain_init(&gsnedf, NULL, gsnedf_release_jobs);
812 return register_sched_plugin(&gsn_edf_plugin);
813}
814
815
816module_init(init_gsn_edf);