aboutsummaryrefslogtreecommitdiffstats
path: root/litmus
diff options
context:
space:
mode:
authorBjoern Brandenburg <bbb@mpi-sws.org>2015-08-09 07:18:55 -0400
committerBjoern Brandenburg <bbb@mpi-sws.org>2015-08-09 07:20:35 -0400
commit2a45e01a8827379c709d228a5c9b5f21011d4277 (patch)
tree8bfd7342ecd5ec6cb7063e95847ef7efa36d7a0b /litmus
parent02da1bac9739050917862c82bdc75c3a0eb43179 (diff)
Add P-FP scheduler plugin
Diffstat (limited to 'litmus')
-rw-r--r--litmus/Makefile4
-rw-r--r--litmus/fp_common.c17
-rw-r--r--litmus/sched_pfp.c2036
3 files changed, 2051 insertions, 6 deletions
diff --git a/litmus/Makefile b/litmus/Makefile
index 895cf3a2d599..fb12398c4b92 100644
--- a/litmus/Makefile
+++ b/litmus/Makefile
@@ -20,7 +20,9 @@ obj-y = sched_plugin.o litmus.o \
20 ctrldev.o \ 20 ctrldev.o \
21 uncachedev.o \ 21 uncachedev.o \
22 sched_gsn_edf.o \ 22 sched_gsn_edf.o \
23 sched_psn_edf.o 23 sched_psn_edf.o \
24 sched_pfp.o
25
24 26
25 27
26obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o 28obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o
diff --git a/litmus/fp_common.c b/litmus/fp_common.c
index 964a4729deff..ff0f30a9f536 100644
--- a/litmus/fp_common.c
+++ b/litmus/fp_common.c
@@ -32,7 +32,6 @@ int fp_higher_prio(struct task_struct* first,
32 return 0; 32 return 0;
33 } 33 }
34 34
35
36 /* check for NULL tasks */ 35 /* check for NULL tasks */
37 if (!first || !second) 36 if (!first || !second)
38 return first && !second; 37 return first && !second;
@@ -50,6 +49,15 @@ int fp_higher_prio(struct task_struct* first,
50 if (unlikely(second->rt_param.inh_task)) 49 if (unlikely(second->rt_param.inh_task))
51 second_task = second->rt_param.inh_task; 50 second_task = second->rt_param.inh_task;
52 51
52 /* Comparisons to itself are only possible with
53 * priority inheritance when svc_preempt interrupt just
54 * before scheduling (and everything that could follow in the
55 * ready queue). Always favour the original job, as that one will just
56 * suspend itself to resolve this.
57 */
58 if(first_task == second_task)
59 return first_task == first;
60
53 /* Check for priority boosting. Tie-break by start of boosting. 61 /* Check for priority boosting. Tie-break by start of boosting.
54 */ 62 */
55 if (unlikely(is_priority_boosted(first_task))) { 63 if (unlikely(is_priority_boosted(first_task))) {
@@ -65,11 +73,10 @@ int fp_higher_prio(struct task_struct* first,
65 /* second_task is boosted, first is not*/ 73 /* second_task is boosted, first is not*/
66 return 0; 74 return 0;
67 75
68#endif 76#else
69 77 /* No locks, no priority inheritance, no comparisons to itself */
70 /* Comparisons to itself are not expected; priority inheritance
71 * should also not cause this to happen. */
72 BUG_ON(first_task == second_task); 78 BUG_ON(first_task == second_task);
79#endif
73 80
74 if (get_priority(first_task) < get_priority(second_task)) 81 if (get_priority(first_task) < get_priority(second_task))
75 return 1; 82 return 1;
diff --git a/litmus/sched_pfp.c b/litmus/sched_pfp.c
new file mode 100644
index 000000000000..f38e9bc175b5
--- /dev/null
+++ b/litmus/sched_pfp.c
@@ -0,0 +1,2036 @@
1/*
2 * litmus/sched_pfp.c
3 *
4 * Implementation of partitioned fixed-priority scheduling.
5 * Based on PSN-EDF.
6 */
7
8#include <linux/percpu.h>
9#include <linux/sched.h>
10#include <linux/list.h>
11#include <linux/spinlock.h>
12#include <linux/module.h>
13
14#include <litmus/litmus.h>
15#include <litmus/wait.h>
16#include <litmus/jobs.h>
17#include <litmus/preempt.h>
18#include <litmus/fp_common.h>
19#include <litmus/sched_plugin.h>
20#include <litmus/sched_trace.h>
21#include <litmus/trace.h>
22#include <litmus/budget.h>
23
24/* to set up domain/cpu mappings */
25#include <litmus/litmus_proc.h>
26#include <linux/uaccess.h>
27
28
29typedef struct {
30 rt_domain_t domain;
31 struct fp_prio_queue ready_queue;
32 int cpu;
33 struct task_struct* scheduled; /* only RT tasks */
34/*
35 * scheduling lock slock
36 * protects the domain and serializes scheduling decisions
37 */
38#define slock domain.ready_lock
39
40} pfp_domain_t;
41
42DEFINE_PER_CPU(pfp_domain_t, pfp_domains);
43
44pfp_domain_t* pfp_doms[NR_CPUS];
45
46#define local_pfp (this_cpu_ptr(&pfp_domains))
47#define remote_dom(cpu) (&per_cpu(pfp_domains, cpu).domain)
48#define remote_pfp(cpu) (&per_cpu(pfp_domains, cpu))
49#define task_dom(task) remote_dom(get_partition(task))
50#define task_pfp(task) remote_pfp(get_partition(task))
51
52
53#ifdef CONFIG_LITMUS_LOCKING
54DEFINE_PER_CPU(uint64_t,fmlp_timestamp);
55#endif
56
57/* we assume the lock is being held */
58static void preempt(pfp_domain_t *pfp)
59{
60 preempt_if_preemptable(pfp->scheduled, pfp->cpu);
61}
62
63static unsigned int priority_index(struct task_struct* t)
64{
65#ifdef CONFIG_LITMUS_LOCKING
66 if (unlikely(t->rt_param.inh_task))
67 /* use effective priority */
68 t = t->rt_param.inh_task;
69
70 if (is_priority_boosted(t)) {
71 /* zero is reserved for priority-boosted tasks */
72 return 0;
73 } else
74#endif
75 return get_priority(t);
76}
77
78static void pfp_release_jobs(rt_domain_t* rt, struct bheap* tasks)
79{
80 pfp_domain_t *pfp = container_of(rt, pfp_domain_t, domain);
81 unsigned long flags;
82 struct task_struct* t;
83 struct bheap_node* hn;
84
85 raw_spin_lock_irqsave(&pfp->slock, flags);
86
87 while (!bheap_empty(tasks)) {
88 hn = bheap_take(fp_ready_order, tasks);
89 t = bheap2task(hn);
90 TRACE_TASK(t, "released (part:%d prio:%d)\n",
91 get_partition(t), get_priority(t));
92 fp_prio_add(&pfp->ready_queue, t, priority_index(t));
93 }
94
95 /* do we need to preempt? */
96 if (fp_higher_prio(fp_prio_peek(&pfp->ready_queue), pfp->scheduled)) {
97 TRACE_CUR("preempted by new release\n");
98 preempt(pfp);
99 }
100
101 raw_spin_unlock_irqrestore(&pfp->slock, flags);
102}
103
104static void pfp_preempt_check(pfp_domain_t *pfp)
105{
106 if (fp_higher_prio(fp_prio_peek(&pfp->ready_queue), pfp->scheduled))
107 preempt(pfp);
108}
109
110static void pfp_domain_init(pfp_domain_t* pfp,
111 int cpu)
112{
113 fp_domain_init(&pfp->domain, NULL, pfp_release_jobs);
114 pfp->cpu = cpu;
115 pfp->scheduled = NULL;
116 fp_prio_queue_init(&pfp->ready_queue);
117}
118
119static void requeue(struct task_struct* t, pfp_domain_t *pfp)
120{
121 tsk_rt(t)->completed = 0;
122 if (is_released(t, litmus_clock()))
123 fp_prio_add(&pfp->ready_queue, t, priority_index(t));
124 else
125 add_release(&pfp->domain, t); /* it has got to wait */
126}
127
128static void job_completion(struct task_struct* t, int forced)
129{
130 sched_trace_task_completion(t, forced);
131 TRACE_TASK(t, "job_completion(forced=%d).\n", forced);
132
133 tsk_rt(t)->completed = 0;
134 prepare_for_next_period(t);
135 if (is_released(t, litmus_clock()))
136 sched_trace_task_release(t);
137}
138
139static struct task_struct* pfp_schedule(struct task_struct * prev)
140{
141 pfp_domain_t* pfp = local_pfp;
142 struct task_struct* next;
143
144 int out_of_time, sleep, preempt, np, exists, blocks, resched, migrate;
145
146 raw_spin_lock(&pfp->slock);
147
148 /* sanity checking
149 * differently from gedf, when a task exits (dead)
150 * pfp->schedule may be null and prev _is_ realtime
151 */
152 BUG_ON(pfp->scheduled && pfp->scheduled != prev);
153 BUG_ON(pfp->scheduled && !is_realtime(prev));
154
155 /* (0) Determine state */
156 exists = pfp->scheduled != NULL;
157 blocks = exists && !is_current_running();
158 out_of_time = exists && budget_enforced(pfp->scheduled)
159 && budget_exhausted(pfp->scheduled);
160 np = exists && is_np(pfp->scheduled);
161 sleep = exists && is_completed(pfp->scheduled);
162 migrate = exists && get_partition(pfp->scheduled) != pfp->cpu;
163 preempt = !blocks && (migrate || fp_preemption_needed(&pfp->ready_queue, prev));
164
165 /* If we need to preempt do so.
166 * The following checks set resched to 1 in case of special
167 * circumstances.
168 */
169 resched = preempt;
170
171 /* If a task blocks we have no choice but to reschedule.
172 */
173 if (blocks)
174 resched = 1;
175
176 /* Request a sys_exit_np() call if we would like to preempt but cannot.
177 * Multiple calls to request_exit_np() don't hurt.
178 */
179 if (np && (out_of_time || preempt || sleep))
180 request_exit_np(pfp->scheduled);
181
182 /* Any task that is preemptable and either exhausts its execution
183 * budget or wants to sleep completes. We may have to reschedule after
184 * this.
185 */
186 if (!np && (out_of_time || sleep)) {
187 job_completion(pfp->scheduled, !sleep);
188 resched = 1;
189 }
190
191 /* The final scheduling decision. Do we need to switch for some reason?
192 * Switch if we are in RT mode and have no task or if we need to
193 * resched.
194 */
195 next = NULL;
196 if ((!np || blocks) && (resched || !exists)) {
197 /* When preempting a task that does not block, then
198 * re-insert it into either the ready queue or the
199 * release queue (if it completed). requeue() picks
200 * the appropriate queue.
201 */
202 if (pfp->scheduled && !blocks && !migrate)
203 requeue(pfp->scheduled, pfp);
204 next = fp_prio_take(&pfp->ready_queue);
205 if (next == prev) {
206 struct task_struct *t = fp_prio_peek(&pfp->ready_queue);
207 TRACE_TASK(next, "next==prev sleep=%d oot=%d np=%d preempt=%d migrate=%d "
208 "boost=%d empty=%d prio-idx=%u prio=%u\n",
209 sleep, out_of_time, np, preempt, migrate,
210 is_priority_boosted(next),
211 t == NULL,
212 priority_index(next),
213 get_priority(next));
214 if (t)
215 TRACE_TASK(t, "waiter boost=%d prio-idx=%u prio=%u\n",
216 is_priority_boosted(t),
217 priority_index(t),
218 get_priority(t));
219 }
220 /* If preempt is set, we should not see the same task again. */
221 BUG_ON(preempt && next == prev);
222 /* Similarly, if preempt is set, then next may not be NULL,
223 * unless it's a migration. */
224 BUG_ON(preempt && !migrate && next == NULL);
225 } else
226 /* Only override Linux scheduler if we have a real-time task
227 * scheduled that needs to continue.
228 */
229 if (exists)
230 next = prev;
231
232 if (next) {
233 TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
234 } else if (exists) {
235 TRACE("becoming idle at %llu\n", litmus_clock());
236 }
237
238 pfp->scheduled = next;
239 sched_state_task_picked();
240 raw_spin_unlock(&pfp->slock);
241
242 return next;
243}
244
245#ifdef CONFIG_LITMUS_LOCKING
246
247/* prev is no longer scheduled --- see if it needs to migrate */
248static void pfp_finish_switch(struct task_struct *prev)
249{
250 pfp_domain_t *to;
251
252 if (is_realtime(prev) &&
253 prev->state == TASK_RUNNING &&
254 get_partition(prev) != smp_processor_id()) {
255 TRACE_TASK(prev, "needs to migrate from P%d to P%d\n",
256 smp_processor_id(), get_partition(prev));
257
258 to = task_pfp(prev);
259
260 raw_spin_lock(&to->slock);
261
262 TRACE_TASK(prev, "adding to queue on P%d\n", to->cpu);
263 requeue(prev, to);
264 if (fp_preemption_needed(&to->ready_queue, to->scheduled))
265 preempt(to);
266
267 raw_spin_unlock(&to->slock);
268
269 }
270}
271
272#endif
273
274/* Prepare a task for running in RT mode
275 */
276static void pfp_task_new(struct task_struct * t, int on_rq, int is_scheduled)
277{
278 pfp_domain_t* pfp = task_pfp(t);
279 unsigned long flags;
280
281 TRACE_TASK(t, "P-FP: task new, cpu = %d\n",
282 t->rt_param.task_params.cpu);
283
284 /* setup job parameters */
285 release_at(t, litmus_clock());
286
287 raw_spin_lock_irqsave(&pfp->slock, flags);
288 if (is_scheduled) {
289 /* there shouldn't be anything else running at the time */
290 BUG_ON(pfp->scheduled);
291 pfp->scheduled = t;
292 } else if (on_rq) {
293 requeue(t, pfp);
294 /* maybe we have to reschedule */
295 pfp_preempt_check(pfp);
296 }
297 raw_spin_unlock_irqrestore(&pfp->slock, flags);
298}
299
300static void pfp_task_wake_up(struct task_struct *task)
301{
302 unsigned long flags;
303 pfp_domain_t* pfp = task_pfp(task);
304 lt_t now;
305
306 TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
307 raw_spin_lock_irqsave(&pfp->slock, flags);
308
309#ifdef CONFIG_LITMUS_LOCKING
310 /* Should only be queued when processing a fake-wake up due to a
311 * migration-related state change. */
312 if (unlikely(is_queued(task))) {
313 TRACE_TASK(task, "WARNING: waking task still queued. Is this right?\n");
314 goto out_unlock;
315 }
316#else
317 BUG_ON(is_queued(task));
318#endif
319 now = litmus_clock();
320 if (is_sporadic(task) && is_tardy(task, now)
321#ifdef CONFIG_LITMUS_LOCKING
322 /* We need to take suspensions because of semaphores into
323 * account! If a job resumes after being suspended due to acquiring
324 * a semaphore, it should never be treated as a new job release.
325 */
326 && !is_priority_boosted(task)
327#endif
328 ) {
329 /* new sporadic release */
330 release_at(task, now);
331 sched_trace_task_release(task);
332 }
333
334 /* Only add to ready queue if it is not the currently-scheduled
335 * task. This could be the case if a task was woken up concurrently
336 * on a remote CPU before the executing CPU got around to actually
337 * de-scheduling the task, i.e., wake_up() raced with schedule()
338 * and won. Also, don't requeue if it is still queued, which can
339 * happen under the DPCP due wake-ups racing with migrations.
340 */
341 if (pfp->scheduled != task) {
342 requeue(task, pfp);
343 pfp_preempt_check(pfp);
344 }
345
346#ifdef CONFIG_LITMUS_LOCKING
347out_unlock:
348#endif
349 raw_spin_unlock_irqrestore(&pfp->slock, flags);
350 TRACE_TASK(task, "wake up done\n");
351}
352
353static void pfp_task_block(struct task_struct *t)
354{
355 /* only running tasks can block, thus t is in no queue */
356 TRACE_TASK(t, "block at %llu, state=%d\n", litmus_clock(), t->state);
357
358 BUG_ON(!is_realtime(t));
359
360 /* If this task blocked normally, it shouldn't be queued. The exception is
361 * if this is a simulated block()/wakeup() pair from the pull-migration code path.
362 * This should only happen if the DPCP is being used.
363 */
364#ifdef CONFIG_LITMUS_LOCKING
365 if (unlikely(is_queued(t)))
366 TRACE_TASK(t, "WARNING: blocking task still queued. Is this right?\n");
367#else
368 BUG_ON(is_queued(t));
369#endif
370}
371
372static void pfp_task_exit(struct task_struct * t)
373{
374 unsigned long flags;
375 pfp_domain_t* pfp = task_pfp(t);
376 rt_domain_t* dom;
377
378 raw_spin_lock_irqsave(&pfp->slock, flags);
379 if (is_queued(t)) {
380 BUG(); /* This currently doesn't work. */
381 /* dequeue */
382 dom = task_dom(t);
383 remove(dom, t);
384 }
385 if (pfp->scheduled == t) {
386 pfp->scheduled = NULL;
387 preempt(pfp);
388 }
389 TRACE_TASK(t, "RIP, now reschedule\n");
390
391 raw_spin_unlock_irqrestore(&pfp->slock, flags);
392}
393
394#ifdef CONFIG_LITMUS_LOCKING
395
396#include <litmus/fdso.h>
397#include <litmus/srp.h>
398
399static void fp_dequeue(pfp_domain_t* pfp, struct task_struct* t)
400{
401 BUG_ON(pfp->scheduled == t && is_queued(t));
402 if (is_queued(t))
403 fp_prio_remove(&pfp->ready_queue, t, priority_index(t));
404}
405
406static void fp_set_prio_inh(pfp_domain_t* pfp, struct task_struct* t,
407 struct task_struct* prio_inh)
408{
409 int requeue;
410
411 if (!t || t->rt_param.inh_task == prio_inh) {
412 /* no update required */
413 if (t)
414 TRACE_TASK(t, "no prio-inh update required\n");
415 return;
416 }
417
418 requeue = is_queued(t);
419 TRACE_TASK(t, "prio-inh: is_queued:%d\n", requeue);
420
421 if (requeue)
422 /* first remove */
423 fp_dequeue(pfp, t);
424
425 t->rt_param.inh_task = prio_inh;
426
427 if (requeue)
428 /* add again to the right queue */
429 fp_prio_add(&pfp->ready_queue, t, priority_index(t));
430}
431
432static int effective_agent_priority(int prio)
433{
434 /* make sure agents have higher priority */
435 return prio - LITMUS_MAX_PRIORITY;
436}
437
438static lt_t prio_point(int eprio)
439{
440 /* make sure we have non-negative prio points */
441 return eprio + LITMUS_MAX_PRIORITY;
442}
443
444static void boost_priority(struct task_struct* t, lt_t priority_point)
445{
446 unsigned long flags;
447 pfp_domain_t* pfp = task_pfp(t);
448
449 raw_spin_lock_irqsave(&pfp->slock, flags);
450
451
452 TRACE_TASK(t, "priority boosted at %llu\n", litmus_clock());
453
454 tsk_rt(t)->priority_boosted = 1;
455 /* tie-break by protocol-specific priority point */
456 tsk_rt(t)->boost_start_time = priority_point;
457
458 /* Priority boosting currently only takes effect for already-scheduled
459 * tasks. This is sufficient since priority boosting only kicks in as
460 * part of lock acquisitions. */
461 BUG_ON(pfp->scheduled != t);
462
463 raw_spin_unlock_irqrestore(&pfp->slock, flags);
464}
465
466static void unboost_priority(struct task_struct* t)
467{
468 unsigned long flags;
469 pfp_domain_t* pfp = task_pfp(t);
470
471 raw_spin_lock_irqsave(&pfp->slock, flags);
472
473 /* Assumption: this only happens when the job is scheduled.
474 * Exception: If t transitioned to non-real-time mode, we no longer
475 * care abou tit. */
476 BUG_ON(pfp->scheduled != t && is_realtime(t));
477
478 TRACE_TASK(t, "priority restored at %llu\n", litmus_clock());
479
480 tsk_rt(t)->priority_boosted = 0;
481 tsk_rt(t)->boost_start_time = 0;
482
483 /* check if this changes anything */
484 if (fp_preemption_needed(&pfp->ready_queue, pfp->scheduled))
485 preempt(pfp);
486
487 raw_spin_unlock_irqrestore(&pfp->slock, flags);
488}
489
490/* ******************** SRP support ************************ */
491
492static unsigned int pfp_get_srp_prio(struct task_struct* t)
493{
494 return get_priority(t);
495}
496
497/* ******************** FMLP support ********************** */
498
499struct fmlp_semaphore {
500 struct litmus_lock litmus_lock;
501
502 /* current resource holder */
503 struct task_struct *owner;
504
505 /* FIFO queue of waiting tasks */
506 wait_queue_head_t wait;
507};
508
509static inline struct fmlp_semaphore* fmlp_from_lock(struct litmus_lock* lock)
510{
511 return container_of(lock, struct fmlp_semaphore, litmus_lock);
512}
513
514static inline lt_t
515fmlp_clock(void)
516{
517 return (lt_t) this_cpu_inc_return(fmlp_timestamp);
518}
519
520int pfp_fmlp_lock(struct litmus_lock* l)
521{
522 struct task_struct* t = current;
523 struct fmlp_semaphore *sem = fmlp_from_lock(l);
524 wait_queue_t wait;
525 unsigned long flags;
526 lt_t time_of_request;
527
528 if (!is_realtime(t))
529 return -EPERM;
530
531 /* prevent nested lock acquisition --- not supported by FMLP */
532 if (tsk_rt(t)->num_locks_held ||
533 tsk_rt(t)->num_local_locks_held)
534 return -EBUSY;
535
536 spin_lock_irqsave(&sem->wait.lock, flags);
537
538 /* tie-break by this point in time */
539 time_of_request = fmlp_clock();
540
541 /* Priority-boost ourself *before* we suspend so that
542 * our priority is boosted when we resume. */
543 boost_priority(t, time_of_request);
544
545 if (sem->owner) {
546 /* resource is not free => must suspend and wait */
547
548 init_waitqueue_entry(&wait, t);
549
550 /* FIXME: interruptible would be nice some day */
551 set_task_state(t, TASK_UNINTERRUPTIBLE);
552
553 __add_wait_queue_tail_exclusive(&sem->wait, &wait);
554
555 TS_LOCK_SUSPEND;
556
557 /* release lock before sleeping */
558 spin_unlock_irqrestore(&sem->wait.lock, flags);
559
560 /* We depend on the FIFO order. Thus, we don't need to recheck
561 * when we wake up; we are guaranteed to have the lock since
562 * there is only one wake up per release.
563 */
564
565 schedule();
566
567 TS_LOCK_RESUME;
568
569 /* Since we hold the lock, no other task will change
570 * ->owner. We can thus check it without acquiring the spin
571 * lock. */
572 BUG_ON(sem->owner != t);
573 } else {
574 /* it's ours now */
575 sem->owner = t;
576
577 spin_unlock_irqrestore(&sem->wait.lock, flags);
578 }
579
580 tsk_rt(t)->num_locks_held++;
581
582 return 0;
583}
584
585int pfp_fmlp_unlock(struct litmus_lock* l)
586{
587 struct task_struct *t = current, *next = NULL;
588 struct fmlp_semaphore *sem = fmlp_from_lock(l);
589 unsigned long flags;
590 int err = 0;
591
592 preempt_disable();
593
594 spin_lock_irqsave(&sem->wait.lock, flags);
595
596 if (sem->owner != t) {
597 err = -EINVAL;
598 goto out;
599 }
600
601 tsk_rt(t)->num_locks_held--;
602
603 /* we lose the benefit of priority boosting */
604
605 unboost_priority(t);
606
607 /* check if there are jobs waiting for this resource */
608 next = __waitqueue_remove_first(&sem->wait);
609 sem->owner = next;
610
611out:
612 spin_unlock_irqrestore(&sem->wait.lock, flags);
613
614 /* Wake up next. The waiting job is already priority-boosted. */
615 if(next) {
616 wake_up_process(next);
617 }
618
619 preempt_enable();
620
621 return err;
622}
623
624int pfp_fmlp_close(struct litmus_lock* l)
625{
626 struct task_struct *t = current;
627 struct fmlp_semaphore *sem = fmlp_from_lock(l);
628 unsigned long flags;
629
630 int owner;
631
632 spin_lock_irqsave(&sem->wait.lock, flags);
633
634 owner = sem->owner == t;
635
636 spin_unlock_irqrestore(&sem->wait.lock, flags);
637
638 if (owner)
639 pfp_fmlp_unlock(l);
640
641 return 0;
642}
643
644void pfp_fmlp_free(struct litmus_lock* lock)
645{
646 kfree(fmlp_from_lock(lock));
647}
648
649static struct litmus_lock_ops pfp_fmlp_lock_ops = {
650 .close = pfp_fmlp_close,
651 .lock = pfp_fmlp_lock,
652 .unlock = pfp_fmlp_unlock,
653 .deallocate = pfp_fmlp_free,
654};
655
656static struct litmus_lock* pfp_new_fmlp(void)
657{
658 struct fmlp_semaphore* sem;
659
660 sem = kmalloc(sizeof(*sem), GFP_KERNEL);
661 if (!sem)
662 return NULL;
663
664 sem->owner = NULL;
665 init_waitqueue_head(&sem->wait);
666 sem->litmus_lock.ops = &pfp_fmlp_lock_ops;
667
668 return &sem->litmus_lock;
669}
670
671/* ******************** MPCP support ********************** */
672
673struct mpcp_semaphore {
674 struct litmus_lock litmus_lock;
675
676 /* current resource holder */
677 struct task_struct *owner;
678
679 /* priority queue of waiting tasks */
680 wait_queue_head_t wait;
681
682 /* priority ceiling per cpu */
683 unsigned int prio_ceiling[NR_CPUS];
684
685 /* should jobs spin "virtually" for this resource? */
686 int vspin;
687};
688
689#define OMEGA_CEILING UINT_MAX
690
691/* Since jobs spin "virtually" while waiting to acquire a lock,
692 * they first must aquire a local per-cpu resource.
693 */
694static DEFINE_PER_CPU(wait_queue_head_t, mpcpvs_vspin_wait);
695static DEFINE_PER_CPU(struct task_struct*, mpcpvs_vspin);
696
697/* called with preemptions off <=> no local modifications */
698static void mpcp_vspin_enter(void)
699{
700 struct task_struct* t = current;
701
702 while (1) {
703 if (this_cpu_read(mpcpvs_vspin) == NULL) {
704 /* good, we get to issue our request */
705 this_cpu_write(mpcpvs_vspin, t);
706 break;
707 } else {
708 /* some job is spinning => enqueue in request queue */
709 prio_wait_queue_t wait;
710 wait_queue_head_t* vspin = this_cpu_ptr(&mpcpvs_vspin_wait);
711 unsigned long flags;
712
713 /* ordered by regular priority */
714 init_prio_waitqueue_entry(&wait, t, prio_point(get_priority(t)));
715
716 spin_lock_irqsave(&vspin->lock, flags);
717
718 set_task_state(t, TASK_UNINTERRUPTIBLE);
719
720 __add_wait_queue_prio_exclusive(vspin, &wait);
721
722 spin_unlock_irqrestore(&vspin->lock, flags);
723
724 TS_LOCK_SUSPEND;
725
726 preempt_enable_no_resched();
727
728 schedule();
729
730 preempt_disable();
731
732 TS_LOCK_RESUME;
733 /* Recheck if we got it --- some higher-priority process might
734 * have swooped in. */
735 }
736 }
737 /* ok, now it is ours */
738}
739
740/* called with preemptions off */
741static void mpcp_vspin_exit(void)
742{
743 struct task_struct* t = current, *next;
744 unsigned long flags;
745 wait_queue_head_t* vspin = this_cpu_ptr(&mpcpvs_vspin_wait);
746
747 BUG_ON(this_cpu_read(mpcpvs_vspin) != t);
748
749 /* no spinning job */
750 this_cpu_write(mpcpvs_vspin, NULL);
751
752 /* see if anyone is waiting for us to stop "spinning" */
753 spin_lock_irqsave(&vspin->lock, flags);
754 next = __waitqueue_remove_first(vspin);
755
756 if (next)
757 wake_up_process(next);
758
759 spin_unlock_irqrestore(&vspin->lock, flags);
760}
761
762static inline struct mpcp_semaphore* mpcp_from_lock(struct litmus_lock* lock)
763{
764 return container_of(lock, struct mpcp_semaphore, litmus_lock);
765}
766
767int pfp_mpcp_lock(struct litmus_lock* l)
768{
769 struct task_struct* t = current;
770 struct mpcp_semaphore *sem = mpcp_from_lock(l);
771 prio_wait_queue_t wait;
772 unsigned long flags;
773
774 if (!is_realtime(t))
775 return -EPERM;
776
777 /* prevent nested lock acquisition */
778 if (tsk_rt(t)->num_locks_held ||
779 tsk_rt(t)->num_local_locks_held)
780 return -EBUSY;
781
782 preempt_disable();
783
784 if (sem->vspin)
785 mpcp_vspin_enter();
786
787 /* Priority-boost ourself *before* we suspend so that
788 * our priority is boosted when we resume. Use the priority
789 * ceiling for the local partition. */
790 boost_priority(t, sem->prio_ceiling[get_partition(t)]);
791
792 spin_lock_irqsave(&sem->wait.lock, flags);
793
794 preempt_enable_no_resched();
795
796 if (sem->owner) {
797 /* resource is not free => must suspend and wait */
798
799 /* ordered by regular priority */
800 init_prio_waitqueue_entry(&wait, t, prio_point(get_priority(t)));
801
802 /* FIXME: interruptible would be nice some day */
803 set_task_state(t, TASK_UNINTERRUPTIBLE);
804
805 __add_wait_queue_prio_exclusive(&sem->wait, &wait);
806
807 TS_LOCK_SUSPEND;
808
809 /* release lock before sleeping */
810 spin_unlock_irqrestore(&sem->wait.lock, flags);
811
812 /* We depend on the FIFO order. Thus, we don't need to recheck
813 * when we wake up; we are guaranteed to have the lock since
814 * there is only one wake up per release.
815 */
816
817 schedule();
818
819 TS_LOCK_RESUME;
820
821 /* Since we hold the lock, no other task will change
822 * ->owner. We can thus check it without acquiring the spin
823 * lock. */
824 BUG_ON(sem->owner != t);
825 } else {
826 /* it's ours now */
827 sem->owner = t;
828
829 spin_unlock_irqrestore(&sem->wait.lock, flags);
830 }
831
832 tsk_rt(t)->num_locks_held++;
833
834 return 0;
835}
836
837int pfp_mpcp_unlock(struct litmus_lock* l)
838{
839 struct task_struct *t = current, *next = NULL;
840 struct mpcp_semaphore *sem = mpcp_from_lock(l);
841 unsigned long flags;
842 int err = 0;
843
844 preempt_disable();
845
846 spin_lock_irqsave(&sem->wait.lock, flags);
847
848 if (sem->owner != t) {
849 err = -EINVAL;
850 goto out;
851 }
852
853 tsk_rt(t)->num_locks_held--;
854
855 /* we lose the benefit of priority boosting */
856 unboost_priority(t);
857
858 /* check if there are jobs waiting for this resource */
859 next = __waitqueue_remove_first(&sem->wait);
860 sem->owner = next;
861
862out:
863 spin_unlock_irqrestore(&sem->wait.lock, flags);
864
865 /* Wake up next. The waiting job is already priority-boosted. */
866 if(next) {
867 wake_up_process(next);
868 }
869
870 if (sem->vspin && err == 0) {
871 mpcp_vspin_exit();
872 }
873
874 preempt_enable();
875
876 return err;
877}
878
879int pfp_mpcp_open(struct litmus_lock* l, void* config)
880{
881 struct task_struct *t = current;
882 int cpu, local_cpu;
883 struct mpcp_semaphore *sem = mpcp_from_lock(l);
884 unsigned long flags;
885
886 if (!is_realtime(t))
887 /* we need to know the real-time priority */
888 return -EPERM;
889
890 local_cpu = get_partition(t);
891
892 spin_lock_irqsave(&sem->wait.lock, flags);
893 for (cpu = 0; cpu < NR_CPUS; cpu++) {
894 if (cpu != local_cpu) {
895 sem->prio_ceiling[cpu] = min(sem->prio_ceiling[cpu],
896 get_priority(t));
897 TRACE_CUR("priority ceiling for sem %p is now %d on cpu %d\n",
898 sem, sem->prio_ceiling[cpu], cpu);
899 }
900 }
901 spin_unlock_irqrestore(&sem->wait.lock, flags);
902
903 return 0;
904}
905
906int pfp_mpcp_close(struct litmus_lock* l)
907{
908 struct task_struct *t = current;
909 struct mpcp_semaphore *sem = mpcp_from_lock(l);
910 unsigned long flags;
911
912 int owner;
913
914 spin_lock_irqsave(&sem->wait.lock, flags);
915
916 owner = sem->owner == t;
917
918 spin_unlock_irqrestore(&sem->wait.lock, flags);
919
920 if (owner)
921 pfp_mpcp_unlock(l);
922
923 return 0;
924}
925
926void pfp_mpcp_free(struct litmus_lock* lock)
927{
928 kfree(mpcp_from_lock(lock));
929}
930
931static struct litmus_lock_ops pfp_mpcp_lock_ops = {
932 .close = pfp_mpcp_close,
933 .lock = pfp_mpcp_lock,
934 .open = pfp_mpcp_open,
935 .unlock = pfp_mpcp_unlock,
936 .deallocate = pfp_mpcp_free,
937};
938
939static struct litmus_lock* pfp_new_mpcp(int vspin)
940{
941 struct mpcp_semaphore* sem;
942 int cpu;
943
944 sem = kmalloc(sizeof(*sem), GFP_KERNEL);
945 if (!sem)
946 return NULL;
947
948 sem->owner = NULL;
949 init_waitqueue_head(&sem->wait);
950 sem->litmus_lock.ops = &pfp_mpcp_lock_ops;
951
952 for (cpu = 0; cpu < NR_CPUS; cpu++)
953 sem->prio_ceiling[cpu] = OMEGA_CEILING;
954
955 /* mark as virtual spinning */
956 sem->vspin = vspin;
957
958 return &sem->litmus_lock;
959}
960
961
962/* ******************** PCP support ********************** */
963
964
965struct pcp_semaphore {
966 struct litmus_lock litmus_lock;
967
968 struct list_head ceiling;
969
970 /* current resource holder */
971 struct task_struct *owner;
972
973 /* priority ceiling --- can be negative due to DPCP support */
974 int prio_ceiling;
975
976 /* on which processor is this PCP semaphore allocated? */
977 int on_cpu;
978};
979
980static inline struct pcp_semaphore* pcp_from_lock(struct litmus_lock* lock)
981{
982 return container_of(lock, struct pcp_semaphore, litmus_lock);
983}
984
985
986struct pcp_state {
987 struct list_head system_ceiling;
988
989 /* highest-priority waiting task */
990 struct task_struct* hp_waiter;
991
992 /* list of jobs waiting to get past the system ceiling */
993 wait_queue_head_t ceiling_blocked;
994};
995
996static void pcp_init_state(struct pcp_state* s)
997{
998 INIT_LIST_HEAD(&s->system_ceiling);
999 s->hp_waiter = NULL;
1000 init_waitqueue_head(&s->ceiling_blocked);
1001}
1002
1003static DEFINE_PER_CPU(struct pcp_state, pcp_state);
1004
1005/* assumes preemptions are off */
1006static struct pcp_semaphore* pcp_get_ceiling(void)
1007{
1008 struct list_head* top = &(this_cpu_ptr(&pcp_state)->system_ceiling);
1009 return list_first_entry_or_null(top, struct pcp_semaphore, ceiling);
1010}
1011
1012/* assumes preempt off */
1013static void pcp_add_ceiling(struct pcp_semaphore* sem)
1014{
1015 struct list_head *pos;
1016 struct list_head *in_use = &(this_cpu_ptr(&pcp_state)->system_ceiling);
1017 struct pcp_semaphore* held;
1018
1019 BUG_ON(sem->on_cpu != smp_processor_id());
1020 BUG_ON(in_list(&sem->ceiling));
1021
1022 list_for_each(pos, in_use) {
1023 held = list_entry(pos, struct pcp_semaphore, ceiling);
1024 if (held->prio_ceiling >= sem->prio_ceiling) {
1025 __list_add(&sem->ceiling, pos->prev, pos);
1026 return;
1027 }
1028 }
1029
1030 /* we hit the end of the list */
1031
1032 list_add_tail(&sem->ceiling, in_use);
1033}
1034
1035/* assumes preempt off */
1036static int pcp_exceeds_ceiling(struct pcp_semaphore* ceiling,
1037 struct task_struct* task,
1038 int effective_prio)
1039{
1040 return ceiling == NULL ||
1041 ceiling->prio_ceiling > effective_prio ||
1042 ceiling->owner == task;
1043}
1044
1045/* assumes preempt off */
1046static void pcp_priority_inheritance(void)
1047{
1048 unsigned long flags;
1049 pfp_domain_t* pfp = local_pfp;
1050
1051 struct pcp_semaphore* ceiling = pcp_get_ceiling();
1052 struct task_struct *blocker, *blocked;
1053
1054 blocker = ceiling ? ceiling->owner : NULL;
1055 blocked = this_cpu_ptr(&pcp_state)->hp_waiter;
1056
1057 raw_spin_lock_irqsave(&pfp->slock, flags);
1058
1059 /* Current is no longer inheriting anything by default. This should be
1060 * the currently scheduled job, and hence not currently queued.
1061 * Special case: if current stopped being a real-time task, it will no longer
1062 * be registered as pfp->scheduled. */
1063 BUG_ON(current != pfp->scheduled && is_realtime(current));
1064
1065 fp_set_prio_inh(pfp, current, NULL);
1066 fp_set_prio_inh(pfp, blocked, NULL);
1067 fp_set_prio_inh(pfp, blocker, NULL);
1068
1069 /* Let blocking job inherit priority of blocked job, if required. */
1070 if (blocker && blocked &&
1071 fp_higher_prio(blocked, blocker)) {
1072 TRACE_TASK(blocker, "PCP inherits from %s/%d (prio %u -> %u) \n",
1073 blocked->comm, blocked->pid,
1074 get_priority(blocker), get_priority(blocked));
1075 fp_set_prio_inh(pfp, blocker, blocked);
1076 }
1077
1078 /* Check if anything changed. If the blocked job is current, then it is
1079 * just blocking and hence is going to call the scheduler anyway. */
1080 if (blocked != current &&
1081 fp_higher_prio(fp_prio_peek(&pfp->ready_queue), pfp->scheduled))
1082 preempt(pfp);
1083
1084 raw_spin_unlock_irqrestore(&pfp->slock, flags);
1085}
1086
1087/* called with preemptions off */
1088static void pcp_raise_ceiling(struct pcp_semaphore* sem,
1089 int effective_prio)
1090{
1091 struct task_struct* t = current;
1092 struct pcp_semaphore* ceiling;
1093 prio_wait_queue_t wait;
1094 unsigned int waiting_higher_prio;
1095
1096 while(1) {
1097 ceiling = pcp_get_ceiling();
1098 if (pcp_exceeds_ceiling(ceiling, t, effective_prio))
1099 break;
1100
1101 TRACE_CUR("PCP ceiling-blocked, wanted sem %p, but %s/%d has the ceiling \n",
1102 sem, ceiling->owner->comm, ceiling->owner->pid);
1103
1104 /* we need to wait until the ceiling is lowered */
1105
1106 /* enqueue in priority order */
1107 init_prio_waitqueue_entry(&wait, t, effective_prio);
1108 set_task_state(t, TASK_UNINTERRUPTIBLE);
1109 waiting_higher_prio = add_wait_queue_prio_exclusive(
1110 &(this_cpu_ptr(&pcp_state)->ceiling_blocked), &wait);
1111
1112 if (waiting_higher_prio == 0) {
1113 TRACE_CUR("PCP new highest-prio waiter => prio inheritance\n");
1114
1115 /* we are the new highest-priority waiting job
1116 * => update inheritance */
1117 this_cpu_ptr(&pcp_state)->hp_waiter = t;
1118 pcp_priority_inheritance();
1119 }
1120
1121 TS_LOCK_SUSPEND;
1122
1123 preempt_enable_no_resched();
1124 schedule();
1125 preempt_disable();
1126
1127 /* pcp_resume_unblocked() removed us from wait queue */
1128
1129 TS_LOCK_RESUME;
1130 }
1131
1132 TRACE_CUR("PCP got the ceiling and sem %p\n", sem);
1133
1134 /* We are good to go. The semaphore should be available. */
1135 BUG_ON(sem->owner != NULL);
1136
1137 sem->owner = t;
1138
1139 pcp_add_ceiling(sem);
1140}
1141
1142static void pcp_resume_unblocked(void)
1143{
1144 wait_queue_head_t *blocked = &(this_cpu_ptr(&pcp_state)->ceiling_blocked);
1145 unsigned long flags;
1146 prio_wait_queue_t* q;
1147 struct task_struct* t = NULL;
1148
1149 struct pcp_semaphore* ceiling = pcp_get_ceiling();
1150
1151 spin_lock_irqsave(&blocked->lock, flags);
1152
1153 while (waitqueue_active(blocked)) {
1154 /* check first == highest-priority waiting job */
1155 q = list_entry(blocked->task_list.next,
1156 prio_wait_queue_t, wq.task_list);
1157 t = (struct task_struct*) q->wq.private;
1158
1159 /* can it proceed now? => let it go */
1160 if (pcp_exceeds_ceiling(ceiling, t, q->priority)) {
1161 __remove_wait_queue(blocked, &q->wq);
1162 wake_up_process(t);
1163 } else {
1164 /* We are done. Update highest-priority waiter. */
1165 this_cpu_ptr(&pcp_state)->hp_waiter = t;
1166 goto out;
1167 }
1168 }
1169 /* If we get here, then there are no more waiting
1170 * jobs. */
1171 this_cpu_ptr(&pcp_state)->hp_waiter = NULL;
1172out:
1173 spin_unlock_irqrestore(&blocked->lock, flags);
1174}
1175
1176/* assumes preempt off */
1177static void pcp_lower_ceiling(struct pcp_semaphore* sem)
1178{
1179 BUG_ON(!in_list(&sem->ceiling));
1180 BUG_ON(sem->owner != current);
1181 BUG_ON(sem->on_cpu != smp_processor_id());
1182
1183 /* remove from ceiling list */
1184 list_del(&sem->ceiling);
1185
1186 /* release */
1187 sem->owner = NULL;
1188
1189 TRACE_CUR("PCP released sem %p\n", sem);
1190
1191 /* Wake up all ceiling-blocked jobs that now pass the ceiling. */
1192 pcp_resume_unblocked();
1193
1194 pcp_priority_inheritance();
1195}
1196
1197static void pcp_update_prio_ceiling(struct pcp_semaphore* sem,
1198 int effective_prio)
1199{
1200 /* This needs to be synchronized on something.
1201 * Might as well use waitqueue lock for the processor.
1202 * We assume this happens only before the task set starts execution,
1203 * (i.e., during initialization), but it may happen on multiple processors
1204 * at the same time.
1205 */
1206 unsigned long flags;
1207
1208 struct pcp_state* s = &per_cpu(pcp_state, sem->on_cpu);
1209
1210 spin_lock_irqsave(&s->ceiling_blocked.lock, flags);
1211
1212 sem->prio_ceiling = min(sem->prio_ceiling, effective_prio);
1213
1214 spin_unlock_irqrestore(&s->ceiling_blocked.lock, flags);
1215}
1216
1217static void pcp_init_semaphore(struct pcp_semaphore* sem, int cpu)
1218{
1219 sem->owner = NULL;
1220 INIT_LIST_HEAD(&sem->ceiling);
1221 sem->prio_ceiling = INT_MAX;
1222 sem->on_cpu = cpu;
1223}
1224
1225int pfp_pcp_lock(struct litmus_lock* l)
1226{
1227 struct task_struct* t = current;
1228 struct pcp_semaphore *sem = pcp_from_lock(l);
1229
1230 /* The regular PCP uses the regular task priorities, not agent
1231 * priorities. */
1232 int eprio = get_priority(t);
1233 int from = get_partition(t);
1234 int to = sem->on_cpu;
1235
1236 if (!is_realtime(t) || from != to)
1237 return -EPERM;
1238
1239 /* prevent nested lock acquisition in global critical section */
1240 if (tsk_rt(t)->num_locks_held)
1241 return -EBUSY;
1242
1243 preempt_disable();
1244
1245 pcp_raise_ceiling(sem, eprio);
1246
1247 preempt_enable();
1248
1249 tsk_rt(t)->num_local_locks_held++;
1250
1251 return 0;
1252}
1253
1254int pfp_pcp_unlock(struct litmus_lock* l)
1255{
1256 struct task_struct *t = current;
1257 struct pcp_semaphore *sem = pcp_from_lock(l);
1258
1259 int err = 0;
1260
1261 preempt_disable();
1262
1263 if (sem->owner != t) {
1264 err = -EINVAL;
1265 goto out;
1266 }
1267
1268 /* The current owner should be executing on the correct CPU.
1269 *
1270 * If the owner transitioned out of RT mode or is exiting, then
1271 * we it might have already been migrated away by the best-effort
1272 * scheduler and we just have to deal with it. */
1273 if (unlikely(!is_realtime(t) && sem->on_cpu != smp_processor_id())) {
1274 TRACE_TASK(t, "PCP unlock cpu=%d, sem->on_cpu=%d\n",
1275 smp_processor_id(), sem->on_cpu);
1276 preempt_enable();
1277 err = litmus_be_migrate_to(sem->on_cpu);
1278 preempt_disable();
1279 TRACE_TASK(t, "post-migrate: cpu=%d, sem->on_cpu=%d err=%d\n",
1280 smp_processor_id(), sem->on_cpu, err);
1281 }
1282 BUG_ON(sem->on_cpu != smp_processor_id());
1283 err = 0;
1284
1285 tsk_rt(t)->num_local_locks_held--;
1286
1287 /* give it back */
1288 pcp_lower_ceiling(sem);
1289
1290out:
1291 preempt_enable();
1292
1293 return err;
1294}
1295
1296int pfp_pcp_open(struct litmus_lock* l, void* __user config)
1297{
1298 struct task_struct *t = current;
1299 struct pcp_semaphore *sem = pcp_from_lock(l);
1300
1301 int cpu, eprio;
1302
1303 if (!is_realtime(t))
1304 /* we need to know the real-time priority */
1305 return -EPERM;
1306
1307 if (!config)
1308 cpu = get_partition(t);
1309 else if (get_user(cpu, (int*) config))
1310 return -EFAULT;
1311
1312 /* make sure the resource location matches */
1313 if (cpu != sem->on_cpu)
1314 return -EINVAL;
1315
1316 /* The regular PCP uses regular task priorites, not agent
1317 * priorities. */
1318 eprio = get_priority(t);
1319
1320 pcp_update_prio_ceiling(sem, eprio);
1321
1322 return 0;
1323}
1324
1325int pfp_pcp_close(struct litmus_lock* l)
1326{
1327 struct task_struct *t = current;
1328 struct pcp_semaphore *sem = pcp_from_lock(l);
1329
1330 int owner = 0;
1331
1332 preempt_disable();
1333
1334 if (sem->on_cpu == smp_processor_id())
1335 owner = sem->owner == t;
1336
1337 preempt_enable();
1338
1339 if (owner)
1340 pfp_pcp_unlock(l);
1341
1342 return 0;
1343}
1344
1345void pfp_pcp_free(struct litmus_lock* lock)
1346{
1347 kfree(pcp_from_lock(lock));
1348}
1349
1350
1351static struct litmus_lock_ops pfp_pcp_lock_ops = {
1352 .close = pfp_pcp_close,
1353 .lock = pfp_pcp_lock,
1354 .open = pfp_pcp_open,
1355 .unlock = pfp_pcp_unlock,
1356 .deallocate = pfp_pcp_free,
1357};
1358
1359
1360static struct litmus_lock* pfp_new_pcp(int on_cpu)
1361{
1362 struct pcp_semaphore* sem;
1363
1364 sem = kmalloc(sizeof(*sem), GFP_KERNEL);
1365 if (!sem)
1366 return NULL;
1367
1368 sem->litmus_lock.ops = &pfp_pcp_lock_ops;
1369 pcp_init_semaphore(sem, on_cpu);
1370
1371 return &sem->litmus_lock;
1372}
1373
1374/* ******************** DPCP support ********************** */
1375
1376struct dpcp_semaphore {
1377 struct litmus_lock litmus_lock;
1378 struct pcp_semaphore pcp;
1379 int owner_cpu;
1380};
1381
1382static inline struct dpcp_semaphore* dpcp_from_lock(struct litmus_lock* lock)
1383{
1384 return container_of(lock, struct dpcp_semaphore, litmus_lock);
1385}
1386
1387/* called with preemptions disabled */
1388static void pfp_migrate_to(int target_cpu)
1389{
1390 struct task_struct* t = current;
1391 pfp_domain_t *from;
1392
1393 if (get_partition(t) == target_cpu)
1394 return;
1395
1396 if (!is_realtime(t))
1397 {
1398 TRACE_TASK(t, "not migrating, not a RT task (anymore?)\n");
1399 return;
1400 }
1401
1402 /* make sure target_cpu makes sense */
1403 BUG_ON(target_cpu >= NR_CPUS || !cpu_online(target_cpu));
1404
1405 local_irq_disable();
1406
1407 from = task_pfp(t);
1408 raw_spin_lock(&from->slock);
1409
1410 /* Scheduled task should not be in any ready or release queue. Check
1411 * this while holding the lock to avoid RT mode transitions.*/
1412 BUG_ON(is_realtime(t) && is_queued(t));
1413
1414 /* switch partitions */
1415 tsk_rt(t)->task_params.cpu = target_cpu;
1416
1417 raw_spin_unlock(&from->slock);
1418
1419 /* Don't trace scheduler costs as part of
1420 * locking overhead. Scheduling costs are accounted for
1421 * explicitly. */
1422 TS_LOCK_SUSPEND;
1423
1424 local_irq_enable();
1425 preempt_enable_no_resched();
1426
1427 /* deschedule to be migrated */
1428 schedule();
1429
1430 /* we are now on the target processor */
1431 preempt_disable();
1432
1433 /* start recording costs again */
1434 TS_LOCK_RESUME;
1435
1436 BUG_ON(smp_processor_id() != target_cpu && is_realtime(t));
1437}
1438
1439int pfp_dpcp_lock(struct litmus_lock* l)
1440{
1441 struct task_struct* t = current;
1442 struct dpcp_semaphore *sem = dpcp_from_lock(l);
1443 int eprio = effective_agent_priority(get_priority(t));
1444 int from = get_partition(t);
1445 int to = sem->pcp.on_cpu;
1446
1447 if (!is_realtime(t))
1448 return -EPERM;
1449
1450 /* prevent nested lock accquisition */
1451 if (tsk_rt(t)->num_locks_held ||
1452 tsk_rt(t)->num_local_locks_held)
1453 return -EBUSY;
1454
1455 preempt_disable();
1456
1457 /* Priority-boost ourself *before* we suspend so that
1458 * our priority is boosted when we resume. */
1459
1460 boost_priority(t, get_priority(t));
1461
1462 pfp_migrate_to(to);
1463
1464 pcp_raise_ceiling(&sem->pcp, eprio);
1465
1466 /* yep, we got it => execute request */
1467 sem->owner_cpu = from;
1468
1469 preempt_enable();
1470
1471 tsk_rt(t)->num_locks_held++;
1472
1473 return 0;
1474}
1475
1476int pfp_dpcp_unlock(struct litmus_lock* l)
1477{
1478 struct task_struct *t = current;
1479 struct dpcp_semaphore *sem = dpcp_from_lock(l);
1480 int err = 0;
1481 int home;
1482
1483 preempt_disable();
1484
1485 if (sem->pcp.owner != t) {
1486 err = -EINVAL;
1487 goto out;
1488 }
1489
1490 /* The current owner should be executing on the correct CPU.
1491 *
1492 * If the owner transitioned out of RT mode or is exiting, then
1493 * we it might have already been migrated away by the best-effort
1494 * scheduler and we just have to deal with it. */
1495 if (unlikely(!is_realtime(t) && sem->pcp.on_cpu != smp_processor_id())) {
1496 TRACE_TASK(t, "DPCP unlock cpu=%d, sem->pcp.on_cpu=%d\n", smp_processor_id(), sem->pcp.on_cpu);
1497 preempt_enable();
1498 err = litmus_be_migrate_to(sem->pcp.on_cpu);
1499 preempt_disable();
1500 TRACE_TASK(t, "post-migrate: cpu=%d, sem->pcp.on_cpu=%d err=%d\n", smp_processor_id(), sem->pcp.on_cpu, err);
1501 }
1502 BUG_ON(sem->pcp.on_cpu != smp_processor_id());
1503 err = 0;
1504
1505 tsk_rt(t)->num_locks_held--;
1506
1507 home = sem->owner_cpu;
1508
1509 /* give it back */
1510 pcp_lower_ceiling(&sem->pcp);
1511
1512 /* we lose the benefit of priority boosting */
1513 unboost_priority(t);
1514
1515 pfp_migrate_to(home);
1516
1517out:
1518 preempt_enable();
1519
1520 return err;
1521}
1522
1523int pfp_dpcp_open(struct litmus_lock* l, void* __user config)
1524{
1525 struct task_struct *t = current;
1526 struct dpcp_semaphore *sem = dpcp_from_lock(l);
1527 int cpu, eprio;
1528
1529 if (!is_realtime(t))
1530 /* we need to know the real-time priority */
1531 return -EPERM;
1532
1533 if (get_user(cpu, (int*) config))
1534 return -EFAULT;
1535
1536 /* make sure the resource location matches */
1537 if (cpu != sem->pcp.on_cpu)
1538 return -EINVAL;
1539
1540 eprio = effective_agent_priority(get_priority(t));
1541
1542 pcp_update_prio_ceiling(&sem->pcp, eprio);
1543
1544 return 0;
1545}
1546
1547int pfp_dpcp_close(struct litmus_lock* l)
1548{
1549 struct task_struct *t = current;
1550 struct dpcp_semaphore *sem = dpcp_from_lock(l);
1551 int owner = 0;
1552
1553 preempt_disable();
1554
1555 if (sem->pcp.on_cpu == smp_processor_id())
1556 owner = sem->pcp.owner == t;
1557
1558 preempt_enable();
1559
1560 if (owner)
1561 pfp_dpcp_unlock(l);
1562
1563 return 0;
1564}
1565
1566void pfp_dpcp_free(struct litmus_lock* lock)
1567{
1568 kfree(dpcp_from_lock(lock));
1569}
1570
1571static struct litmus_lock_ops pfp_dpcp_lock_ops = {
1572 .close = pfp_dpcp_close,
1573 .lock = pfp_dpcp_lock,
1574 .open = pfp_dpcp_open,
1575 .unlock = pfp_dpcp_unlock,
1576 .deallocate = pfp_dpcp_free,
1577};
1578
1579static struct litmus_lock* pfp_new_dpcp(int on_cpu)
1580{
1581 struct dpcp_semaphore* sem;
1582
1583 sem = kmalloc(sizeof(*sem), GFP_KERNEL);
1584 if (!sem)
1585 return NULL;
1586
1587 sem->litmus_lock.ops = &pfp_dpcp_lock_ops;
1588 sem->owner_cpu = NO_CPU;
1589 pcp_init_semaphore(&sem->pcp, on_cpu);
1590
1591 return &sem->litmus_lock;
1592}
1593
1594
1595/* ******************** DFLP support ********************** */
1596
1597struct dflp_semaphore {
1598 struct litmus_lock litmus_lock;
1599
1600 /* current resource holder */
1601 struct task_struct *owner;
1602 int owner_cpu;
1603
1604 /* FIFO queue of waiting tasks */
1605 wait_queue_head_t wait;
1606
1607 /* where is the resource assigned to */
1608 int on_cpu;
1609};
1610
1611static inline struct dflp_semaphore* dflp_from_lock(struct litmus_lock* lock)
1612{
1613 return container_of(lock, struct dflp_semaphore, litmus_lock);
1614}
1615
1616int pfp_dflp_lock(struct litmus_lock* l)
1617{
1618 struct task_struct* t = current;
1619 struct dflp_semaphore *sem = dflp_from_lock(l);
1620 int from = get_partition(t);
1621 int to = sem->on_cpu;
1622 unsigned long flags;
1623 wait_queue_t wait;
1624 lt_t time_of_request;
1625
1626 if (!is_realtime(t))
1627 return -EPERM;
1628
1629 /* prevent nested lock accquisition */
1630 if (tsk_rt(t)->num_locks_held ||
1631 tsk_rt(t)->num_local_locks_held)
1632 return -EBUSY;
1633
1634 preempt_disable();
1635
1636 /* tie-break by this point in time */
1637 time_of_request = litmus_clock();
1638
1639 /* Priority-boost ourself *before* we suspend so that
1640 * our priority is boosted when we resume. */
1641 boost_priority(t, time_of_request);
1642
1643 pfp_migrate_to(to);
1644
1645 /* Now on the right CPU, preemptions still disabled. */
1646
1647 spin_lock_irqsave(&sem->wait.lock, flags);
1648
1649 if (sem->owner) {
1650 /* resource is not free => must suspend and wait */
1651
1652 init_waitqueue_entry(&wait, t);
1653
1654 /* FIXME: interruptible would be nice some day */
1655 set_task_state(t, TASK_UNINTERRUPTIBLE);
1656
1657 __add_wait_queue_tail_exclusive(&sem->wait, &wait);
1658
1659 TS_LOCK_SUSPEND;
1660
1661 /* release lock before sleeping */
1662 spin_unlock_irqrestore(&sem->wait.lock, flags);
1663
1664 /* We depend on the FIFO order. Thus, we don't need to recheck
1665 * when we wake up; we are guaranteed to have the lock since
1666 * there is only one wake up per release.
1667 */
1668
1669 preempt_enable_no_resched();
1670
1671 schedule();
1672
1673 preempt_disable();
1674
1675 TS_LOCK_RESUME;
1676
1677 /* Since we hold the lock, no other task will change
1678 * ->owner. We can thus check it without acquiring the spin
1679 * lock. */
1680 BUG_ON(sem->owner != t);
1681 } else {
1682 /* it's ours now */
1683 sem->owner = t;
1684
1685 spin_unlock_irqrestore(&sem->wait.lock, flags);
1686 }
1687
1688 sem->owner_cpu = from;
1689
1690 preempt_enable();
1691
1692 tsk_rt(t)->num_locks_held++;
1693
1694 return 0;
1695}
1696
1697int pfp_dflp_unlock(struct litmus_lock* l)
1698{
1699 struct task_struct *t = current, *next;
1700 struct dflp_semaphore *sem = dflp_from_lock(l);
1701 int err = 0;
1702 int home;
1703 unsigned long flags;
1704
1705 preempt_disable();
1706
1707 spin_lock_irqsave(&sem->wait.lock, flags);
1708
1709 if (sem->owner != t) {
1710 err = -EINVAL;
1711 spin_unlock_irqrestore(&sem->wait.lock, flags);
1712 goto out;
1713 }
1714
1715 /* check if there are jobs waiting for this resource */
1716 next = __waitqueue_remove_first(&sem->wait);
1717 if (next) {
1718 /* next becomes the resouce holder */
1719 sem->owner = next;
1720
1721 /* Wake up next. The waiting job is already priority-boosted. */
1722 wake_up_process(next);
1723 } else
1724 /* resource becomes available */
1725 sem->owner = NULL;
1726
1727 tsk_rt(t)->num_locks_held--;
1728
1729 home = sem->owner_cpu;
1730
1731 spin_unlock_irqrestore(&sem->wait.lock, flags);
1732
1733 /* we lose the benefit of priority boosting */
1734 unboost_priority(t);
1735
1736 pfp_migrate_to(home);
1737
1738out:
1739 preempt_enable();
1740
1741 return err;
1742}
1743
1744int pfp_dflp_open(struct litmus_lock* l, void* __user config)
1745{
1746 struct dflp_semaphore *sem = dflp_from_lock(l);
1747 int cpu;
1748
1749 if (get_user(cpu, (int*) config))
1750 return -EFAULT;
1751
1752 /* make sure the resource location matches */
1753 if (cpu != sem->on_cpu)
1754 return -EINVAL;
1755
1756 return 0;
1757}
1758
1759int pfp_dflp_close(struct litmus_lock* l)
1760{
1761 struct task_struct *t = current;
1762 struct dflp_semaphore *sem = dflp_from_lock(l);
1763 int owner = 0;
1764
1765 preempt_disable();
1766
1767 if (sem->on_cpu == smp_processor_id())
1768 owner = sem->owner == t;
1769
1770 preempt_enable();
1771
1772 if (owner)
1773 pfp_dflp_unlock(l);
1774
1775 return 0;
1776}
1777
1778void pfp_dflp_free(struct litmus_lock* lock)
1779{
1780 kfree(dflp_from_lock(lock));
1781}
1782
1783static struct litmus_lock_ops pfp_dflp_lock_ops = {
1784 .close = pfp_dflp_close,
1785 .lock = pfp_dflp_lock,
1786 .open = pfp_dflp_open,
1787 .unlock = pfp_dflp_unlock,
1788 .deallocate = pfp_dflp_free,
1789};
1790
1791static struct litmus_lock* pfp_new_dflp(int on_cpu)
1792{
1793 struct dflp_semaphore* sem;
1794
1795 sem = kmalloc(sizeof(*sem), GFP_KERNEL);
1796 if (!sem)
1797 return NULL;
1798
1799 sem->litmus_lock.ops = &pfp_dflp_lock_ops;
1800 sem->owner_cpu = NO_CPU;
1801 sem->owner = NULL;
1802 sem->on_cpu = on_cpu;
1803 init_waitqueue_head(&sem->wait);
1804
1805 return &sem->litmus_lock;
1806}
1807
1808
1809/* **** lock constructor **** */
1810
1811
1812static long pfp_allocate_lock(struct litmus_lock **lock, int type,
1813 void* __user config)
1814{
1815 int err = -ENXIO, cpu;
1816 struct srp_semaphore* srp;
1817
1818 /* P-FP currently supports the SRP for local resources and the FMLP
1819 * for global resources. */
1820 switch (type) {
1821 case FMLP_SEM:
1822 /* FIFO Mutex Locking Protocol */
1823 *lock = pfp_new_fmlp();
1824 if (*lock)
1825 err = 0;
1826 else
1827 err = -ENOMEM;
1828 break;
1829
1830 case MPCP_SEM:
1831 /* Multiprocesor Priority Ceiling Protocol */
1832 *lock = pfp_new_mpcp(0);
1833 if (*lock)
1834 err = 0;
1835 else
1836 err = -ENOMEM;
1837 break;
1838
1839 case MPCP_VS_SEM:
1840 /* Multiprocesor Priority Ceiling Protocol with virtual spinning */
1841 *lock = pfp_new_mpcp(1);
1842 if (*lock)
1843 err = 0;
1844 else
1845 err = -ENOMEM;
1846 break;
1847
1848 case DPCP_SEM:
1849 /* Distributed Priority Ceiling Protocol */
1850 if (get_user(cpu, (int*) config))
1851 return -EFAULT;
1852
1853 TRACE("DPCP_SEM: provided cpu=%d\n", cpu);
1854
1855 if (cpu >= NR_CPUS || !cpu_online(cpu))
1856 return -EINVAL;
1857
1858 *lock = pfp_new_dpcp(cpu);
1859 if (*lock)
1860 err = 0;
1861 else
1862 err = -ENOMEM;
1863 break;
1864
1865 case DFLP_SEM:
1866 /* Distributed FIFO Locking Protocol */
1867 if (get_user(cpu, (int*) config))
1868 return -EFAULT;
1869
1870 TRACE("DPCP_SEM: provided cpu=%d\n", cpu);
1871
1872 if (cpu >= NR_CPUS || !cpu_online(cpu))
1873 return -EINVAL;
1874
1875 *lock = pfp_new_dflp(cpu);
1876 if (*lock)
1877 err = 0;
1878 else
1879 err = -ENOMEM;
1880 break;
1881
1882 case SRP_SEM:
1883 /* Baker's Stack Resource Policy */
1884 srp = allocate_srp_semaphore();
1885 if (srp) {
1886 *lock = &srp->litmus_lock;
1887 err = 0;
1888 } else
1889 err = -ENOMEM;
1890 break;
1891
1892 case PCP_SEM:
1893 /* Priority Ceiling Protocol */
1894 if (!config)
1895 cpu = get_partition(current);
1896 else if (get_user(cpu, (int*) config))
1897 return -EFAULT;
1898
1899 if (cpu >= NR_CPUS || !cpu_online(cpu))
1900 return -EINVAL;
1901
1902 *lock = pfp_new_pcp(cpu);
1903 if (*lock)
1904 err = 0;
1905 else
1906 err = -ENOMEM;
1907 break;
1908 };
1909
1910 return err;
1911}
1912
1913#endif
1914
1915static long pfp_admit_task(struct task_struct* tsk)
1916{
1917 if (task_cpu(tsk) == tsk->rt_param.task_params.cpu &&
1918#ifdef CONFIG_RELEASE_MASTER
1919 /* don't allow tasks on release master CPU */
1920 task_cpu(tsk) != remote_dom(task_cpu(tsk))->release_master &&
1921#endif
1922 litmus_is_valid_fixed_prio(get_priority(tsk)))
1923 return 0;
1924 else
1925 return -EINVAL;
1926}
1927
1928static struct domain_proc_info pfp_domain_proc_info;
1929static long pfp_get_domain_proc_info(struct domain_proc_info **ret)
1930{
1931 *ret = &pfp_domain_proc_info;
1932 return 0;
1933}
1934
1935static void pfp_setup_domain_proc(void)
1936{
1937 int i, cpu;
1938 int release_master =
1939#ifdef CONFIG_RELEASE_MASTER
1940 atomic_read(&release_master_cpu);
1941#else
1942 NO_CPU;
1943#endif
1944 int num_rt_cpus = num_online_cpus() - (release_master != NO_CPU);
1945 struct cd_mapping *cpu_map, *domain_map;
1946
1947 memset(&pfp_domain_proc_info, sizeof(pfp_domain_proc_info), 0);
1948 init_domain_proc_info(&pfp_domain_proc_info, num_rt_cpus, num_rt_cpus);
1949 pfp_domain_proc_info.num_cpus = num_rt_cpus;
1950 pfp_domain_proc_info.num_domains = num_rt_cpus;
1951 for (cpu = 0, i = 0; cpu < num_online_cpus(); ++cpu) {
1952 if (cpu == release_master)
1953 continue;
1954 cpu_map = &pfp_domain_proc_info.cpu_to_domains[i];
1955 domain_map = &pfp_domain_proc_info.domain_to_cpus[i];
1956
1957 cpu_map->id = cpu;
1958 domain_map->id = i; /* enumerate w/o counting the release master */
1959 cpumask_set_cpu(i, cpu_map->mask);
1960 cpumask_set_cpu(cpu, domain_map->mask);
1961 ++i;
1962 }
1963}
1964
1965static long pfp_activate_plugin(void)
1966{
1967#if defined(CONFIG_RELEASE_MASTER) || defined(CONFIG_LITMUS_LOCKING)
1968 int cpu;
1969#endif
1970
1971#ifdef CONFIG_RELEASE_MASTER
1972 for_each_online_cpu(cpu) {
1973 remote_dom(cpu)->release_master = atomic_read(&release_master_cpu);
1974 }
1975#endif
1976
1977#ifdef CONFIG_LITMUS_LOCKING
1978 get_srp_prio = pfp_get_srp_prio;
1979
1980 for_each_online_cpu(cpu) {
1981 init_waitqueue_head(&per_cpu(mpcpvs_vspin_wait, cpu));
1982 per_cpu(mpcpvs_vspin, cpu) = NULL;
1983
1984 pcp_init_state(&per_cpu(pcp_state, cpu));
1985 pfp_doms[cpu] = remote_pfp(cpu);
1986 per_cpu(fmlp_timestamp,cpu) = 0;
1987 }
1988
1989#endif
1990
1991 pfp_setup_domain_proc();
1992
1993 return 0;
1994}
1995
1996static long pfp_deactivate_plugin(void)
1997{
1998 destroy_domain_proc_info(&pfp_domain_proc_info);
1999 return 0;
2000}
2001
2002/* Plugin object */
2003static struct sched_plugin pfp_plugin __cacheline_aligned_in_smp = {
2004 .plugin_name = "P-FP",
2005 .task_new = pfp_task_new,
2006 .complete_job = complete_job,
2007 .task_exit = pfp_task_exit,
2008 .schedule = pfp_schedule,
2009 .task_wake_up = pfp_task_wake_up,
2010 .task_block = pfp_task_block,
2011 .admit_task = pfp_admit_task,
2012 .activate_plugin = pfp_activate_plugin,
2013 .deactivate_plugin = pfp_deactivate_plugin,
2014 .get_domain_proc_info = pfp_get_domain_proc_info,
2015#ifdef CONFIG_LITMUS_LOCKING
2016 .allocate_lock = pfp_allocate_lock,
2017 .finish_switch = pfp_finish_switch,
2018#endif
2019};
2020
2021
2022static int __init init_pfp(void)
2023{
2024 int i;
2025
2026 /* We do not really want to support cpu hotplug, do we? ;)
2027 * However, if we are so crazy to do so,
2028 * we cannot use num_online_cpu()
2029 */
2030 for (i = 0; i < num_online_cpus(); i++) {
2031 pfp_domain_init(remote_pfp(i), i);
2032 }
2033 return register_sched_plugin(&pfp_plugin);
2034}
2035
2036module_init(init_pfp);