aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBjoern Brandenburg <bbb@mpi-sws.org>2013-02-12 13:17:08 -0500
committerNamhoon Kim <namhoonk@cs.unc.edu>2014-10-21 10:08:28 -0400
commit7cf2307d2c200a960c9e54839ba2134730adda52 (patch)
tree7ae250d721ce37059c105b7401aab1da7037dc20
parent8fcdf62f4db13de12ae638c8e7e3535858fb8d95 (diff)
Add P-FP scheduler plugin
-rw-r--r--litmus/Makefile4
-rw-r--r--litmus/fp_common.c17
-rw-r--r--litmus/sched_pfp.c2013
3 files changed, 2028 insertions, 6 deletions
diff --git a/litmus/Makefile b/litmus/Makefile
index c01ce3e7a101..2d2e0a584d04 100644
--- a/litmus/Makefile
+++ b/litmus/Makefile
@@ -20,7 +20,9 @@ obj-y = sched_plugin.o litmus.o \
20 ctrldev.o \ 20 ctrldev.o \
21 uncachedev.o \ 21 uncachedev.o \
22 sched_gsn_edf.o \ 22 sched_gsn_edf.o \
23 sched_psn_edf.o 23 sched_psn_edf.o \
24 sched_pfp.o
25
24 26
25 27
26obj-$(CONFIG_SCHED_CPU_AFFINITY) += affinity.o 28obj-$(CONFIG_SCHED_CPU_AFFINITY) += affinity.o
diff --git a/litmus/fp_common.c b/litmus/fp_common.c
index 964a4729deff..ff0f30a9f536 100644
--- a/litmus/fp_common.c
+++ b/litmus/fp_common.c
@@ -32,7 +32,6 @@ int fp_higher_prio(struct task_struct* first,
32 return 0; 32 return 0;
33 } 33 }
34 34
35
36 /* check for NULL tasks */ 35 /* check for NULL tasks */
37 if (!first || !second) 36 if (!first || !second)
38 return first && !second; 37 return first && !second;
@@ -50,6 +49,15 @@ int fp_higher_prio(struct task_struct* first,
50 if (unlikely(second->rt_param.inh_task)) 49 if (unlikely(second->rt_param.inh_task))
51 second_task = second->rt_param.inh_task; 50 second_task = second->rt_param.inh_task;
52 51
52 /* Comparisons to itself are only possible with
53 * priority inheritance when svc_preempt interrupt just
54 * before scheduling (and everything that could follow in the
55 * ready queue). Always favour the original job, as that one will just
56 * suspend itself to resolve this.
57 */
58 if(first_task == second_task)
59 return first_task == first;
60
53 /* Check for priority boosting. Tie-break by start of boosting. 61 /* Check for priority boosting. Tie-break by start of boosting.
54 */ 62 */
55 if (unlikely(is_priority_boosted(first_task))) { 63 if (unlikely(is_priority_boosted(first_task))) {
@@ -65,11 +73,10 @@ int fp_higher_prio(struct task_struct* first,
65 /* second_task is boosted, first is not*/ 73 /* second_task is boosted, first is not*/
66 return 0; 74 return 0;
67 75
68#endif 76#else
69 77 /* No locks, no priority inheritance, no comparisons to itself */
70 /* Comparisons to itself are not expected; priority inheritance
71 * should also not cause this to happen. */
72 BUG_ON(first_task == second_task); 78 BUG_ON(first_task == second_task);
79#endif
73 80
74 if (get_priority(first_task) < get_priority(second_task)) 81 if (get_priority(first_task) < get_priority(second_task))
75 return 1; 82 return 1;
diff --git a/litmus/sched_pfp.c b/litmus/sched_pfp.c
new file mode 100644
index 000000000000..af7de76e8105
--- /dev/null
+++ b/litmus/sched_pfp.c
@@ -0,0 +1,2013 @@
1/*
2 * litmus/sched_pfp.c
3 *
4 * Implementation of partitioned fixed-priority scheduling.
5 * Based on PSN-EDF.
6 */
7
8#include <linux/percpu.h>
9#include <linux/sched.h>
10#include <linux/list.h>
11#include <linux/spinlock.h>
12#include <linux/module.h>
13
14#include <litmus/litmus.h>
15#include <litmus/wait.h>
16#include <litmus/jobs.h>
17#include <litmus/preempt.h>
18#include <litmus/fp_common.h>
19#include <litmus/sched_plugin.h>
20#include <litmus/sched_trace.h>
21#include <litmus/trace.h>
22#include <litmus/budget.h>
23
24/* to set up domain/cpu mappings */
25#include <litmus/litmus_proc.h>
26#include <linux/uaccess.h>
27
28
29typedef struct {
30 rt_domain_t domain;
31 struct fp_prio_queue ready_queue;
32 int cpu;
33 struct task_struct* scheduled; /* only RT tasks */
34/*
35 * scheduling lock slock
36 * protects the domain and serializes scheduling decisions
37 */
38#define slock domain.ready_lock
39
40} pfp_domain_t;
41
42DEFINE_PER_CPU(pfp_domain_t, pfp_domains);
43
44pfp_domain_t* pfp_doms[NR_CPUS];
45
46#define local_pfp (&__get_cpu_var(pfp_domains))
47#define remote_dom(cpu) (&per_cpu(pfp_domains, cpu).domain)
48#define remote_pfp(cpu) (&per_cpu(pfp_domains, cpu))
49#define task_dom(task) remote_dom(get_partition(task))
50#define task_pfp(task) remote_pfp(get_partition(task))
51
52
53#ifdef CONFIG_LITMUS_LOCKING
54DEFINE_PER_CPU(uint64_t,fmlp_timestamp);
55#endif
56
57/* we assume the lock is being held */
58static void preempt(pfp_domain_t *pfp)
59{
60 preempt_if_preemptable(pfp->scheduled, pfp->cpu);
61}
62
63static unsigned int priority_index(struct task_struct* t)
64{
65#ifdef CONFIG_LITMUS_LOCKING
66 if (unlikely(t->rt_param.inh_task))
67 /* use effective priority */
68 t = t->rt_param.inh_task;
69
70 if (is_priority_boosted(t)) {
71 /* zero is reserved for priority-boosted tasks */
72 return 0;
73 } else
74#endif
75 return get_priority(t);
76}
77
78static void pfp_release_jobs(rt_domain_t* rt, struct bheap* tasks)
79{
80 pfp_domain_t *pfp = container_of(rt, pfp_domain_t, domain);
81 unsigned long flags;
82 struct task_struct* t;
83 struct bheap_node* hn;
84
85 raw_spin_lock_irqsave(&pfp->slock, flags);
86
87 while (!bheap_empty(tasks)) {
88 hn = bheap_take(fp_ready_order, tasks);
89 t = bheap2task(hn);
90 TRACE_TASK(t, "released (part:%d prio:%d)\n",
91 get_partition(t), get_priority(t));
92 fp_prio_add(&pfp->ready_queue, t, priority_index(t));
93 }
94
95 /* do we need to preempt? */
96 if (fp_higher_prio(fp_prio_peek(&pfp->ready_queue), pfp->scheduled)) {
97 TRACE_CUR("preempted by new release\n");
98 preempt(pfp);
99 }
100
101 raw_spin_unlock_irqrestore(&pfp->slock, flags);
102}
103
104static void pfp_preempt_check(pfp_domain_t *pfp)
105{
106 if (fp_higher_prio(fp_prio_peek(&pfp->ready_queue), pfp->scheduled))
107 preempt(pfp);
108}
109
110static void pfp_domain_init(pfp_domain_t* pfp,
111 int cpu)
112{
113 fp_domain_init(&pfp->domain, NULL, pfp_release_jobs);
114 pfp->cpu = cpu;
115 pfp->scheduled = NULL;
116 fp_prio_queue_init(&pfp->ready_queue);
117}
118
119static void requeue(struct task_struct* t, pfp_domain_t *pfp)
120{
121 BUG_ON(!is_running(t));
122
123 tsk_rt(t)->completed = 0;
124 if (is_released(t, litmus_clock()))
125 fp_prio_add(&pfp->ready_queue, t, priority_index(t));
126 else
127 add_release(&pfp->domain, t); /* it has got to wait */
128}
129
130static void job_completion(struct task_struct* t, int forced)
131{
132 sched_trace_task_completion(t,forced);
133 TRACE_TASK(t, "job_completion().\n");
134
135 tsk_rt(t)->completed = 0;
136 prepare_for_next_period(t);
137 if (is_released(t, litmus_clock()))
138 sched_trace_task_release(t);
139}
140
141static struct task_struct* pfp_schedule(struct task_struct * prev)
142{
143 pfp_domain_t* pfp = local_pfp;
144 struct task_struct* next;
145
146 int out_of_time, sleep, preempt, np, exists, blocks, resched, migrate;
147
148 raw_spin_lock(&pfp->slock);
149
150 /* sanity checking
151 * differently from gedf, when a task exits (dead)
152 * pfp->schedule may be null and prev _is_ realtime
153 */
154 BUG_ON(pfp->scheduled && pfp->scheduled != prev);
155 BUG_ON(pfp->scheduled && !is_realtime(prev));
156
157 /* (0) Determine state */
158 exists = pfp->scheduled != NULL;
159 blocks = exists && !is_running(pfp->scheduled);
160 out_of_time = exists &&
161 budget_enforced(pfp->scheduled) &&
162 budget_exhausted(pfp->scheduled);
163 np = exists && is_np(pfp->scheduled);
164 sleep = exists && is_completed(pfp->scheduled);
165 migrate = exists && get_partition(pfp->scheduled) != pfp->cpu;
166 preempt = !blocks && (migrate || fp_preemption_needed(&pfp->ready_queue, prev));
167
168 /* If we need to preempt do so.
169 * The following checks set resched to 1 in case of special
170 * circumstances.
171 */
172 resched = preempt;
173
174 /* If a task blocks we have no choice but to reschedule.
175 */
176 if (blocks)
177 resched = 1;
178
179 /* Request a sys_exit_np() call if we would like to preempt but cannot.
180 * Multiple calls to request_exit_np() don't hurt.
181 */
182 if (np && (out_of_time || preempt || sleep))
183 request_exit_np(pfp->scheduled);
184
185 /* Any task that is preemptable and either exhausts its execution
186 * budget or wants to sleep completes. We may have to reschedule after
187 * this.
188 */
189 if (!np && (out_of_time || sleep) && !blocks && !migrate) {
190 job_completion(pfp->scheduled, !sleep);
191 resched = 1;
192 }
193
194 /* The final scheduling decision. Do we need to switch for some reason?
195 * Switch if we are in RT mode and have no task or if we need to
196 * resched.
197 */
198 next = NULL;
199 if ((!np || blocks) && (resched || !exists)) {
200 /* When preempting a task that does not block, then
201 * re-insert it into either the ready queue or the
202 * release queue (if it completed). requeue() picks
203 * the appropriate queue.
204 */
205 if (pfp->scheduled && !blocks && !migrate)
206 requeue(pfp->scheduled, pfp);
207 next = fp_prio_take(&pfp->ready_queue);
208 if (next == prev) {
209 struct task_struct *t = fp_prio_peek(&pfp->ready_queue);
210 TRACE_TASK(next, "next==prev sleep=%d oot=%d np=%d preempt=%d migrate=%d "
211 "boost=%d empty=%d prio-idx=%u prio=%u\n",
212 sleep, out_of_time, np, preempt, migrate,
213 is_priority_boosted(next),
214 t == NULL,
215 priority_index(next),
216 get_priority(next));
217 if (t)
218 TRACE_TASK(t, "waiter boost=%d prio-idx=%u prio=%u\n",
219 is_priority_boosted(t),
220 priority_index(t),
221 get_priority(t));
222 }
223 /* If preempt is set, we should not see the same task again. */
224 BUG_ON(preempt && next == prev);
225 /* Similarly, if preempt is set, then next may not be NULL,
226 * unless it's a migration. */
227 BUG_ON(preempt && !migrate && next == NULL);
228 } else
229 /* Only override Linux scheduler if we have a real-time task
230 * scheduled that needs to continue.
231 */
232 if (exists)
233 next = prev;
234
235 if (next) {
236 TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
237 } else {
238 TRACE("becoming idle at %llu\n", litmus_clock());
239 }
240
241 pfp->scheduled = next;
242 sched_state_task_picked();
243 raw_spin_unlock(&pfp->slock);
244
245 return next;
246}
247
248#ifdef CONFIG_LITMUS_LOCKING
249
250/* prev is no longer scheduled --- see if it needs to migrate */
251static void pfp_finish_switch(struct task_struct *prev)
252{
253 pfp_domain_t *to;
254
255 if (is_realtime(prev) &&
256 is_running(prev) &&
257 get_partition(prev) != smp_processor_id()) {
258 TRACE_TASK(prev, "needs to migrate from P%d to P%d\n",
259 smp_processor_id(), get_partition(prev));
260
261 to = task_pfp(prev);
262
263 raw_spin_lock(&to->slock);
264
265 TRACE_TASK(prev, "adding to queue on P%d\n", to->cpu);
266 requeue(prev, to);
267 if (fp_preemption_needed(&to->ready_queue, to->scheduled))
268 preempt(to);
269
270 raw_spin_unlock(&to->slock);
271
272 }
273}
274
275#endif
276
277/* Prepare a task for running in RT mode
278 */
279static void pfp_task_new(struct task_struct * t, int on_rq, int is_scheduled)
280{
281 pfp_domain_t* pfp = task_pfp(t);
282 unsigned long flags;
283
284 TRACE_TASK(t, "P-FP: task new, cpu = %d\n",
285 t->rt_param.task_params.cpu);
286
287 /* setup job parameters */
288 release_at(t, litmus_clock());
289
290 raw_spin_lock_irqsave(&pfp->slock, flags);
291 if (is_scheduled) {
292 /* there shouldn't be anything else running at the time */
293 BUG_ON(pfp->scheduled);
294 pfp->scheduled = t;
295 } else if (is_running(t)) {
296 requeue(t, pfp);
297 /* maybe we have to reschedule */
298 pfp_preempt_check(pfp);
299 }
300 raw_spin_unlock_irqrestore(&pfp->slock, flags);
301}
302
303static void pfp_task_wake_up(struct task_struct *task)
304{
305 unsigned long flags;
306 pfp_domain_t* pfp = task_pfp(task);
307 lt_t now;
308
309 TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
310 raw_spin_lock_irqsave(&pfp->slock, flags);
311
312#ifdef CONFIG_LITMUS_LOCKING
313 /* Should only be queued when processing a fake-wake up due to a
314 * migration-related state change. */
315 if (unlikely(is_queued(task))) {
316 TRACE_TASK(task, "WARNING: waking task still queued. Is this right?\n");
317 goto out_unlock;
318 }
319#else
320 BUG_ON(is_queued(task));
321#endif
322 now = litmus_clock();
323 if (is_sporadic(task) && is_tardy(task, now)
324#ifdef CONFIG_LITMUS_LOCKING
325 /* We need to take suspensions because of semaphores into
326 * account! If a job resumes after being suspended due to acquiring
327 * a semaphore, it should never be treated as a new job release.
328 */
329 && !is_priority_boosted(task)
330#endif
331 ) {
332 /* new sporadic release */
333 release_at(task, now);
334 sched_trace_task_release(task);
335 }
336
337 /* Only add to ready queue if it is not the currently-scheduled
338 * task. This could be the case if a task was woken up concurrently
339 * on a remote CPU before the executing CPU got around to actually
340 * de-scheduling the task, i.e., wake_up() raced with schedule()
341 * and won. Also, don't requeue if it is still queued, which can
342 * happen under the DPCP due wake-ups racing with migrations.
343 */
344 if (pfp->scheduled != task) {
345 requeue(task, pfp);
346 pfp_preempt_check(pfp);
347 }
348
349#ifdef CONFIG_LITMUS_LOCKING
350out_unlock:
351#endif
352 raw_spin_unlock_irqrestore(&pfp->slock, flags);
353 TRACE_TASK(task, "wake up done\n");
354}
355
356static void pfp_task_block(struct task_struct *t)
357{
358 /* only running tasks can block, thus t is in no queue */
359 TRACE_TASK(t, "block at %llu, state=%d\n", litmus_clock(), t->state);
360
361 BUG_ON(!is_realtime(t));
362
363 /* If this task blocked normally, it shouldn't be queued. The exception is
364 * if this is a simulated block()/wakeup() pair from the pull-migration code path.
365 * This should only happen if the DPCP is being used.
366 */
367#ifdef CONFIG_LITMUS_LOCKING
368 if (unlikely(is_queued(t)))
369 TRACE_TASK(t, "WARNING: blocking task still queued. Is this right?\n");
370#else
371 BUG_ON(is_queued(t));
372#endif
373}
374
375static void pfp_task_exit(struct task_struct * t)
376{
377 unsigned long flags;
378 pfp_domain_t* pfp = task_pfp(t);
379 rt_domain_t* dom;
380
381 raw_spin_lock_irqsave(&pfp->slock, flags);
382 if (is_queued(t)) {
383 BUG(); /* This currently doesn't work. */
384 /* dequeue */
385 dom = task_dom(t);
386 remove(dom, t);
387 }
388 if (pfp->scheduled == t) {
389 pfp->scheduled = NULL;
390 preempt(pfp);
391 }
392 TRACE_TASK(t, "RIP, now reschedule\n");
393
394 raw_spin_unlock_irqrestore(&pfp->slock, flags);
395}
396
397#ifdef CONFIG_LITMUS_LOCKING
398
399#include <litmus/fdso.h>
400#include <litmus/srp.h>
401
402static void fp_dequeue(pfp_domain_t* pfp, struct task_struct* t)
403{
404 BUG_ON(pfp->scheduled == t && is_queued(t));
405 if (is_queued(t))
406 fp_prio_remove(&pfp->ready_queue, t, priority_index(t));
407}
408
409static void fp_set_prio_inh(pfp_domain_t* pfp, struct task_struct* t,
410 struct task_struct* prio_inh)
411{
412 int requeue;
413
414 if (!t || t->rt_param.inh_task == prio_inh) {
415 /* no update required */
416 if (t)
417 TRACE_TASK(t, "no prio-inh update required\n");
418 return;
419 }
420
421 requeue = is_queued(t);
422 TRACE_TASK(t, "prio-inh: is_queued:%d\n", requeue);
423
424 if (requeue)
425 /* first remove */
426 fp_dequeue(pfp, t);
427
428 t->rt_param.inh_task = prio_inh;
429
430 if (requeue)
431 /* add again to the right queue */
432 fp_prio_add(&pfp->ready_queue, t, priority_index(t));
433}
434
435static int effective_agent_priority(int prio)
436{
437 /* make sure agents have higher priority */
438 return prio - LITMUS_MAX_PRIORITY;
439}
440
441static lt_t prio_point(int eprio)
442{
443 /* make sure we have non-negative prio points */
444 return eprio + LITMUS_MAX_PRIORITY;
445}
446
447static void boost_priority(struct task_struct* t, lt_t priority_point)
448{
449 unsigned long flags;
450 pfp_domain_t* pfp = task_pfp(t);
451
452 raw_spin_lock_irqsave(&pfp->slock, flags);
453
454
455 TRACE_TASK(t, "priority boosted at %llu\n", litmus_clock());
456
457 tsk_rt(t)->priority_boosted = 1;
458 /* tie-break by protocol-specific priority point */
459 tsk_rt(t)->boost_start_time = priority_point;
460
461 /* Priority boosting currently only takes effect for already-scheduled
462 * tasks. This is sufficient since priority boosting only kicks in as
463 * part of lock acquisitions. */
464 BUG_ON(pfp->scheduled != t);
465
466 raw_spin_unlock_irqrestore(&pfp->slock, flags);
467}
468
469static void unboost_priority(struct task_struct* t)
470{
471 unsigned long flags;
472 pfp_domain_t* pfp = task_pfp(t);
473
474 raw_spin_lock_irqsave(&pfp->slock, flags);
475
476 /* Assumption: this only happens when the job is scheduled.
477 * Exception: If t transitioned to non-real-time mode, we no longer
478 * care abou tit. */
479 BUG_ON(pfp->scheduled != t && is_realtime(t));
480
481 TRACE_TASK(t, "priority restored at %llu\n", litmus_clock());
482
483 tsk_rt(t)->priority_boosted = 0;
484 tsk_rt(t)->boost_start_time = 0;
485
486 /* check if this changes anything */
487 if (fp_preemption_needed(&pfp->ready_queue, pfp->scheduled))
488 preempt(pfp);
489
490 raw_spin_unlock_irqrestore(&pfp->slock, flags);
491}
492
493/* ******************** SRP support ************************ */
494
495static unsigned int pfp_get_srp_prio(struct task_struct* t)
496{
497 return get_priority(t);
498}
499
500/* ******************** FMLP support ********************** */
501
502struct fmlp_semaphore {
503 struct litmus_lock litmus_lock;
504
505 /* current resource holder */
506 struct task_struct *owner;
507
508 /* FIFO queue of waiting tasks */
509 wait_queue_head_t wait;
510};
511
512static inline struct fmlp_semaphore* fmlp_from_lock(struct litmus_lock* lock)
513{
514 return container_of(lock, struct fmlp_semaphore, litmus_lock);
515}
516
517static inline lt_t
518fmlp_clock(void)
519{
520 return (lt_t) __get_cpu_var(fmlp_timestamp)++;
521}
522
523int pfp_fmlp_lock(struct litmus_lock* l)
524{
525 struct task_struct* t = current;
526 struct fmlp_semaphore *sem = fmlp_from_lock(l);
527 wait_queue_t wait;
528 unsigned long flags;
529 lt_t time_of_request;
530
531 if (!is_realtime(t))
532 return -EPERM;
533
534 /* prevent nested lock acquisition --- not supported by FMLP */
535 if (tsk_rt(t)->num_locks_held ||
536 tsk_rt(t)->num_local_locks_held)
537 return -EBUSY;
538
539 spin_lock_irqsave(&sem->wait.lock, flags);
540
541 /* tie-break by this point in time */
542 time_of_request = fmlp_clock();
543
544 /* Priority-boost ourself *before* we suspend so that
545 * our priority is boosted when we resume. */
546 boost_priority(t, time_of_request);
547
548 if (sem->owner) {
549 /* resource is not free => must suspend and wait */
550
551 init_waitqueue_entry(&wait, t);
552
553 /* FIXME: interruptible would be nice some day */
554 set_task_state(t, TASK_UNINTERRUPTIBLE);
555
556 __add_wait_queue_tail_exclusive(&sem->wait, &wait);
557
558 TS_LOCK_SUSPEND;
559
560 /* release lock before sleeping */
561 spin_unlock_irqrestore(&sem->wait.lock, flags);
562
563 /* We depend on the FIFO order. Thus, we don't need to recheck
564 * when we wake up; we are guaranteed to have the lock since
565 * there is only one wake up per release.
566 */
567
568 schedule();
569
570 TS_LOCK_RESUME;
571
572 /* Since we hold the lock, no other task will change
573 * ->owner. We can thus check it without acquiring the spin
574 * lock. */
575 BUG_ON(sem->owner != t);
576 } else {
577 /* it's ours now */
578 sem->owner = t;
579
580 spin_unlock_irqrestore(&sem->wait.lock, flags);
581 }
582
583 tsk_rt(t)->num_locks_held++;
584
585 return 0;
586}
587
588int pfp_fmlp_unlock(struct litmus_lock* l)
589{
590 struct task_struct *t = current, *next = NULL;
591 struct fmlp_semaphore *sem = fmlp_from_lock(l);
592 unsigned long flags;
593 int err = 0;
594
595 preempt_disable();
596
597 spin_lock_irqsave(&sem->wait.lock, flags);
598
599 if (sem->owner != t) {
600 err = -EINVAL;
601 goto out;
602 }
603
604 tsk_rt(t)->num_locks_held--;
605
606 /* we lose the benefit of priority boosting */
607
608 unboost_priority(t);
609
610 /* check if there are jobs waiting for this resource */
611 next = __waitqueue_remove_first(&sem->wait);
612 sem->owner = next;
613
614out:
615 spin_unlock_irqrestore(&sem->wait.lock, flags);
616
617 /* Wake up next. The waiting job is already priority-boosted. */
618 if(next) {
619 wake_up_process(next);
620 }
621
622 preempt_enable();
623
624 return err;
625}
626
627int pfp_fmlp_close(struct litmus_lock* l)
628{
629 struct task_struct *t = current;
630 struct fmlp_semaphore *sem = fmlp_from_lock(l);
631 unsigned long flags;
632
633 int owner;
634
635 spin_lock_irqsave(&sem->wait.lock, flags);
636
637 owner = sem->owner == t;
638
639 spin_unlock_irqrestore(&sem->wait.lock, flags);
640
641 if (owner)
642 pfp_fmlp_unlock(l);
643
644 return 0;
645}
646
647void pfp_fmlp_free(struct litmus_lock* lock)
648{
649 kfree(fmlp_from_lock(lock));
650}
651
652static struct litmus_lock_ops pfp_fmlp_lock_ops = {
653 .close = pfp_fmlp_close,
654 .lock = pfp_fmlp_lock,
655 .unlock = pfp_fmlp_unlock,
656 .deallocate = pfp_fmlp_free,
657};
658
659static struct litmus_lock* pfp_new_fmlp(void)
660{
661 struct fmlp_semaphore* sem;
662
663 sem = kmalloc(sizeof(*sem), GFP_KERNEL);
664 if (!sem)
665 return NULL;
666
667 sem->owner = NULL;
668 init_waitqueue_head(&sem->wait);
669 sem->litmus_lock.ops = &pfp_fmlp_lock_ops;
670
671 return &sem->litmus_lock;
672}
673
674/* ******************** MPCP support ********************** */
675
676struct mpcp_semaphore {
677 struct litmus_lock litmus_lock;
678
679 /* current resource holder */
680 struct task_struct *owner;
681
682 /* priority queue of waiting tasks */
683 wait_queue_head_t wait;
684
685 /* priority ceiling per cpu */
686 unsigned int prio_ceiling[NR_CPUS];
687
688 /* should jobs spin "virtually" for this resource? */
689 int vspin;
690};
691
692#define OMEGA_CEILING UINT_MAX
693
694/* Since jobs spin "virtually" while waiting to acquire a lock,
695 * they first must aquire a local per-cpu resource.
696 */
697static DEFINE_PER_CPU(wait_queue_head_t, mpcpvs_vspin_wait);
698static DEFINE_PER_CPU(struct task_struct*, mpcpvs_vspin);
699
700/* called with preemptions off <=> no local modifications */
701static void mpcp_vspin_enter(void)
702{
703 struct task_struct* t = current;
704
705 while (1) {
706 if (__get_cpu_var(mpcpvs_vspin) == NULL) {
707 /* good, we get to issue our request */
708 __get_cpu_var(mpcpvs_vspin) = t;
709 break;
710 } else {
711 /* some job is spinning => enqueue in request queue */
712 prio_wait_queue_t wait;
713 wait_queue_head_t* vspin = &__get_cpu_var(mpcpvs_vspin_wait);
714 unsigned long flags;
715
716 /* ordered by regular priority */
717 init_prio_waitqueue_entry(&wait, t, prio_point(get_priority(t)));
718
719 spin_lock_irqsave(&vspin->lock, flags);
720
721 set_task_state(t, TASK_UNINTERRUPTIBLE);
722
723 __add_wait_queue_prio_exclusive(vspin, &wait);
724
725 spin_unlock_irqrestore(&vspin->lock, flags);
726
727 TS_LOCK_SUSPEND;
728
729 preempt_enable_no_resched();
730
731 schedule();
732
733 preempt_disable();
734
735 TS_LOCK_RESUME;
736 /* Recheck if we got it --- some higher-priority process might
737 * have swooped in. */
738 }
739 }
740 /* ok, now it is ours */
741}
742
743/* called with preemptions off */
744static void mpcp_vspin_exit(void)
745{
746 struct task_struct* t = current, *next;
747 unsigned long flags;
748 wait_queue_head_t* vspin = &__get_cpu_var(mpcpvs_vspin_wait);
749
750 BUG_ON(__get_cpu_var(mpcpvs_vspin) != t);
751
752 /* no spinning job */
753 __get_cpu_var(mpcpvs_vspin) = NULL;
754
755 /* see if anyone is waiting for us to stop "spinning" */
756 spin_lock_irqsave(&vspin->lock, flags);
757 next = __waitqueue_remove_first(vspin);
758
759 if (next)
760 wake_up_process(next);
761
762 spin_unlock_irqrestore(&vspin->lock, flags);
763}
764
765static inline struct mpcp_semaphore* mpcp_from_lock(struct litmus_lock* lock)
766{
767 return container_of(lock, struct mpcp_semaphore, litmus_lock);
768}
769
770int pfp_mpcp_lock(struct litmus_lock* l)
771{
772 struct task_struct* t = current;
773 struct mpcp_semaphore *sem = mpcp_from_lock(l);
774 prio_wait_queue_t wait;
775 unsigned long flags;
776
777 if (!is_realtime(t))
778 return -EPERM;
779
780 /* prevent nested lock acquisition */
781 if (tsk_rt(t)->num_locks_held ||
782 tsk_rt(t)->num_local_locks_held)
783 return -EBUSY;
784
785 preempt_disable();
786
787 if (sem->vspin)
788 mpcp_vspin_enter();
789
790 /* Priority-boost ourself *before* we suspend so that
791 * our priority is boosted when we resume. Use the priority
792 * ceiling for the local partition. */
793 boost_priority(t, sem->prio_ceiling[get_partition(t)]);
794
795 spin_lock_irqsave(&sem->wait.lock, flags);
796
797 preempt_enable_no_resched();
798
799 if (sem->owner) {
800 /* resource is not free => must suspend and wait */
801
802 /* ordered by regular priority */
803 init_prio_waitqueue_entry(&wait, t, prio_point(get_priority(t)));
804
805 /* FIXME: interruptible would be nice some day */
806 set_task_state(t, TASK_UNINTERRUPTIBLE);
807
808 __add_wait_queue_prio_exclusive(&sem->wait, &wait);
809
810 TS_LOCK_SUSPEND;
811
812 /* release lock before sleeping */
813 spin_unlock_irqrestore(&sem->wait.lock, flags);
814
815 /* We depend on the FIFO order. Thus, we don't need to recheck
816 * when we wake up; we are guaranteed to have the lock since
817 * there is only one wake up per release.
818 */
819
820 schedule();
821
822 TS_LOCK_RESUME;
823
824 /* Since we hold the lock, no other task will change
825 * ->owner. We can thus check it without acquiring the spin
826 * lock. */
827 BUG_ON(sem->owner != t);
828 } else {
829 /* it's ours now */
830 sem->owner = t;
831
832 spin_unlock_irqrestore(&sem->wait.lock, flags);
833 }
834
835 tsk_rt(t)->num_locks_held++;
836
837 return 0;
838}
839
840int pfp_mpcp_unlock(struct litmus_lock* l)
841{
842 struct task_struct *t = current, *next = NULL;
843 struct mpcp_semaphore *sem = mpcp_from_lock(l);
844 unsigned long flags;
845 int err = 0;
846
847 preempt_disable();
848
849 spin_lock_irqsave(&sem->wait.lock, flags);
850
851 if (sem->owner != t) {
852 err = -EINVAL;
853 goto out;
854 }
855
856 tsk_rt(t)->num_locks_held--;
857
858 /* we lose the benefit of priority boosting */
859 unboost_priority(t);
860
861 /* check if there are jobs waiting for this resource */
862 next = __waitqueue_remove_first(&sem->wait);
863 sem->owner = next;
864
865out:
866 spin_unlock_irqrestore(&sem->wait.lock, flags);
867
868 /* Wake up next. The waiting job is already priority-boosted. */
869 if(next) {
870 wake_up_process(next);
871 }
872
873 if (sem->vspin && err == 0) {
874 mpcp_vspin_exit();
875 }
876
877 preempt_enable();
878
879 return err;
880}
881
882int pfp_mpcp_open(struct litmus_lock* l, void* config)
883{
884 struct task_struct *t = current;
885 int cpu, local_cpu;
886 struct mpcp_semaphore *sem = mpcp_from_lock(l);
887 unsigned long flags;
888
889 if (!is_realtime(t))
890 /* we need to know the real-time priority */
891 return -EPERM;
892
893 local_cpu = get_partition(t);
894
895 spin_lock_irqsave(&sem->wait.lock, flags);
896 for (cpu = 0; cpu < NR_CPUS; cpu++) {
897 if (cpu != local_cpu) {
898 sem->prio_ceiling[cpu] = min(sem->prio_ceiling[cpu],
899 get_priority(t));
900 TRACE_CUR("priority ceiling for sem %p is now %d on cpu %d\n",
901 sem, sem->prio_ceiling[cpu], cpu);
902 }
903 }
904 spin_unlock_irqrestore(&sem->wait.lock, flags);
905
906 return 0;
907}
908
909int pfp_mpcp_close(struct litmus_lock* l)
910{
911 struct task_struct *t = current;
912 struct mpcp_semaphore *sem = mpcp_from_lock(l);
913 unsigned long flags;
914
915 int owner;
916
917 spin_lock_irqsave(&sem->wait.lock, flags);
918
919 owner = sem->owner == t;
920
921 spin_unlock_irqrestore(&sem->wait.lock, flags);
922
923 if (owner)
924 pfp_mpcp_unlock(l);
925
926 return 0;
927}
928
929void pfp_mpcp_free(struct litmus_lock* lock)
930{
931 kfree(mpcp_from_lock(lock));
932}
933
934static struct litmus_lock_ops pfp_mpcp_lock_ops = {
935 .close = pfp_mpcp_close,
936 .lock = pfp_mpcp_lock,
937 .open = pfp_mpcp_open,
938 .unlock = pfp_mpcp_unlock,
939 .deallocate = pfp_mpcp_free,
940};
941
942static struct litmus_lock* pfp_new_mpcp(int vspin)
943{
944 struct mpcp_semaphore* sem;
945 int cpu;
946
947 sem = kmalloc(sizeof(*sem), GFP_KERNEL);
948 if (!sem)
949 return NULL;
950
951 sem->owner = NULL;
952 init_waitqueue_head(&sem->wait);
953 sem->litmus_lock.ops = &pfp_mpcp_lock_ops;
954
955 for (cpu = 0; cpu < NR_CPUS; cpu++)
956 sem->prio_ceiling[cpu] = OMEGA_CEILING;
957
958 /* mark as virtual spinning */
959 sem->vspin = vspin;
960
961 return &sem->litmus_lock;
962}
963
964
965/* ******************** PCP support ********************** */
966
967
968struct pcp_semaphore {
969 struct litmus_lock litmus_lock;
970
971 struct list_head ceiling;
972
973 /* current resource holder */
974 struct task_struct *owner;
975
976 /* priority ceiling --- can be negative due to DPCP support */
977 int prio_ceiling;
978
979 /* on which processor is this PCP semaphore allocated? */
980 int on_cpu;
981};
982
983static inline struct pcp_semaphore* pcp_from_lock(struct litmus_lock* lock)
984{
985 return container_of(lock, struct pcp_semaphore, litmus_lock);
986}
987
988
989struct pcp_state {
990 struct list_head system_ceiling;
991
992 /* highest-priority waiting task */
993 struct task_struct* hp_waiter;
994
995 /* list of jobs waiting to get past the system ceiling */
996 wait_queue_head_t ceiling_blocked;
997};
998
999static void pcp_init_state(struct pcp_state* s)
1000{
1001 INIT_LIST_HEAD(&s->system_ceiling);
1002 s->hp_waiter = NULL;
1003 init_waitqueue_head(&s->ceiling_blocked);
1004}
1005
1006static DEFINE_PER_CPU(struct pcp_state, pcp_state);
1007
1008/* assumes preemptions are off */
1009static struct pcp_semaphore* pcp_get_ceiling(void)
1010{
1011 struct list_head* top = &__get_cpu_var(pcp_state).system_ceiling;
1012 return list_first_entry_or_null(top, struct pcp_semaphore, ceiling);
1013}
1014
1015/* assumes preempt off */
1016static void pcp_add_ceiling(struct pcp_semaphore* sem)
1017{
1018 struct list_head *pos;
1019 struct list_head *in_use = &__get_cpu_var(pcp_state).system_ceiling;
1020 struct pcp_semaphore* held;
1021
1022 BUG_ON(sem->on_cpu != smp_processor_id());
1023 BUG_ON(in_list(&sem->ceiling));
1024
1025 list_for_each(pos, in_use) {
1026 held = list_entry(pos, struct pcp_semaphore, ceiling);
1027 if (held->prio_ceiling >= sem->prio_ceiling) {
1028 __list_add(&sem->ceiling, pos->prev, pos);
1029 return;
1030 }
1031 }
1032
1033 /* we hit the end of the list */
1034
1035 list_add_tail(&sem->ceiling, in_use);
1036}
1037
1038/* assumes preempt off */
1039static int pcp_exceeds_ceiling(struct pcp_semaphore* ceiling,
1040 struct task_struct* task,
1041 int effective_prio)
1042{
1043 return ceiling == NULL ||
1044 ceiling->prio_ceiling > effective_prio ||
1045 ceiling->owner == task;
1046}
1047
1048/* assumes preempt off */
1049static void pcp_priority_inheritance(void)
1050{
1051 unsigned long flags;
1052 pfp_domain_t* pfp = local_pfp;
1053
1054 struct pcp_semaphore* ceiling = pcp_get_ceiling();
1055 struct task_struct *blocker, *blocked;
1056
1057 blocker = ceiling ? ceiling->owner : NULL;
1058 blocked = __get_cpu_var(pcp_state).hp_waiter;
1059
1060 raw_spin_lock_irqsave(&pfp->slock, flags);
1061
1062 /* Current is no longer inheriting anything by default. This should be
1063 * the currently scheduled job, and hence not currently queued.
1064 * Special case: if current stopped being a real-time task, it will no longer
1065 * be registered as pfp->scheduled. */
1066 BUG_ON(current != pfp->scheduled && is_realtime(current));
1067
1068 fp_set_prio_inh(pfp, current, NULL);
1069 fp_set_prio_inh(pfp, blocked, NULL);
1070 fp_set_prio_inh(pfp, blocker, NULL);
1071
1072 /* Let blocking job inherit priority of blocked job, if required. */
1073 if (blocker && blocked &&
1074 fp_higher_prio(blocked, blocker)) {
1075 TRACE_TASK(blocker, "PCP inherits from %s/%d (prio %u -> %u) \n",
1076 blocked->comm, blocked->pid,
1077 get_priority(blocker), get_priority(blocked));
1078 fp_set_prio_inh(pfp, blocker, blocked);
1079 }
1080
1081 /* Check if anything changed. If the blocked job is current, then it is
1082 * just blocking and hence is going to call the scheduler anyway. */
1083 if (blocked != current &&
1084 fp_higher_prio(fp_prio_peek(&pfp->ready_queue), pfp->scheduled))
1085 preempt(pfp);
1086
1087 raw_spin_unlock_irqrestore(&pfp->slock, flags);
1088}
1089
1090/* called with preemptions off */
1091static void pcp_raise_ceiling(struct pcp_semaphore* sem,
1092 int effective_prio)
1093{
1094 struct task_struct* t = current;
1095 struct pcp_semaphore* ceiling;
1096 prio_wait_queue_t wait;
1097 unsigned int waiting_higher_prio;
1098
1099 while(1) {
1100 ceiling = pcp_get_ceiling();
1101 if (pcp_exceeds_ceiling(ceiling, t, effective_prio))
1102 break;
1103
1104 TRACE_CUR("PCP ceiling-blocked, wanted sem %p, but %s/%d has the ceiling \n",
1105 sem, ceiling->owner->comm, ceiling->owner->pid);
1106
1107 /* we need to wait until the ceiling is lowered */
1108
1109 /* enqueue in priority order */
1110 init_prio_waitqueue_entry(&wait, t, effective_prio);
1111 set_task_state(t, TASK_UNINTERRUPTIBLE);
1112 waiting_higher_prio = add_wait_queue_prio_exclusive(
1113 &__get_cpu_var(pcp_state).ceiling_blocked, &wait);
1114
1115 if (waiting_higher_prio == 0) {
1116 TRACE_CUR("PCP new highest-prio waiter => prio inheritance\n");
1117
1118 /* we are the new highest-priority waiting job
1119 * => update inheritance */
1120 __get_cpu_var(pcp_state).hp_waiter = t;
1121 pcp_priority_inheritance();
1122 }
1123
1124 TS_LOCK_SUSPEND;
1125
1126 preempt_enable_no_resched();
1127 schedule();
1128 preempt_disable();
1129
1130 /* pcp_resume_unblocked() removed us from wait queue */
1131
1132 TS_LOCK_RESUME;
1133 }
1134
1135 TRACE_CUR("PCP got the ceiling and sem %p\n", sem);
1136
1137 /* We are good to go. The semaphore should be available. */
1138 BUG_ON(sem->owner != NULL);
1139
1140 sem->owner = t;
1141
1142 pcp_add_ceiling(sem);
1143}
1144
1145static void pcp_resume_unblocked(void)
1146{
1147 wait_queue_head_t *blocked = &__get_cpu_var(pcp_state).ceiling_blocked;
1148 unsigned long flags;
1149 prio_wait_queue_t* q;
1150 struct task_struct* t = NULL;
1151
1152 struct pcp_semaphore* ceiling = pcp_get_ceiling();
1153
1154 spin_lock_irqsave(&blocked->lock, flags);
1155
1156 while (waitqueue_active(blocked)) {
1157 /* check first == highest-priority waiting job */
1158 q = list_entry(blocked->task_list.next,
1159 prio_wait_queue_t, wq.task_list);
1160 t = (struct task_struct*) q->wq.private;
1161
1162 /* can it proceed now? => let it go */
1163 if (pcp_exceeds_ceiling(ceiling, t, q->priority)) {
1164 __remove_wait_queue(blocked, &q->wq);
1165 wake_up_process(t);
1166 } else {
1167 /* We are done. Update highest-priority waiter. */
1168 __get_cpu_var(pcp_state).hp_waiter = t;
1169 goto out;
1170 }
1171 }
1172 /* If we get here, then there are no more waiting
1173 * jobs. */
1174 __get_cpu_var(pcp_state).hp_waiter = NULL;
1175out:
1176 spin_unlock_irqrestore(&blocked->lock, flags);
1177}
1178
1179/* assumes preempt off */
1180static void pcp_lower_ceiling(struct pcp_semaphore* sem)
1181{
1182 BUG_ON(!in_list(&sem->ceiling));
1183 BUG_ON(sem->owner != current);
1184 BUG_ON(sem->on_cpu != smp_processor_id());
1185
1186 /* remove from ceiling list */
1187 list_del(&sem->ceiling);
1188
1189 /* release */
1190 sem->owner = NULL;
1191
1192 TRACE_CUR("PCP released sem %p\n", sem);
1193
1194 /* Wake up all ceiling-blocked jobs that now pass the ceiling. */
1195 pcp_resume_unblocked();
1196
1197 pcp_priority_inheritance();
1198}
1199
1200static void pcp_update_prio_ceiling(struct pcp_semaphore* sem,
1201 int effective_prio)
1202{
1203 /* This needs to be synchronized on something.
1204 * Might as well use waitqueue lock for the processor.
1205 * We assume this happens only before the task set starts execution,
1206 * (i.e., during initialization), but it may happen on multiple processors
1207 * at the same time.
1208 */
1209 unsigned long flags;
1210
1211 struct pcp_state* s = &per_cpu(pcp_state, sem->on_cpu);
1212
1213 spin_lock_irqsave(&s->ceiling_blocked.lock, flags);
1214
1215 sem->prio_ceiling = min(sem->prio_ceiling, effective_prio);
1216
1217 spin_unlock_irqrestore(&s->ceiling_blocked.lock, flags);
1218}
1219
1220static void pcp_init_semaphore(struct pcp_semaphore* sem, int cpu)
1221{
1222 sem->owner = NULL;
1223 INIT_LIST_HEAD(&sem->ceiling);
1224 sem->prio_ceiling = INT_MAX;
1225 sem->on_cpu = cpu;
1226}
1227
1228int pfp_pcp_lock(struct litmus_lock* l)
1229{
1230 struct task_struct* t = current;
1231 struct pcp_semaphore *sem = pcp_from_lock(l);
1232
1233 /* The regular PCP uses the regular task priorities, not agent
1234 * priorities. */
1235 int eprio = get_priority(t);
1236 int from = get_partition(t);
1237 int to = sem->on_cpu;
1238
1239 if (!is_realtime(t) || from != to)
1240 return -EPERM;
1241
1242 /* prevent nested lock acquisition in global critical section */
1243 if (tsk_rt(t)->num_locks_held)
1244 return -EBUSY;
1245
1246 preempt_disable();
1247
1248 pcp_raise_ceiling(sem, eprio);
1249
1250 preempt_enable();
1251
1252 tsk_rt(t)->num_local_locks_held++;
1253
1254 return 0;
1255}
1256
1257int pfp_pcp_unlock(struct litmus_lock* l)
1258{
1259 struct task_struct *t = current;
1260 struct pcp_semaphore *sem = pcp_from_lock(l);
1261
1262 int err = 0;
1263
1264 preempt_disable();
1265
1266 if (sem->owner != t) {
1267 err = -EINVAL;
1268 goto out;
1269 }
1270
1271 /* The current owner should be executing on the correct CPU.
1272 *
1273 * FIXME: if the owner transitioned out of RT mode or is exiting, then
1274 * we it might have already been migrated away by the best-effort
1275 * scheduler and we just have to deal with it. This is currently not
1276 * supported. */
1277 BUG_ON(sem->on_cpu != smp_processor_id());
1278
1279 tsk_rt(t)->num_local_locks_held--;
1280
1281 /* give it back */
1282 pcp_lower_ceiling(sem);
1283
1284out:
1285 preempt_enable();
1286
1287 return err;
1288}
1289
1290int pfp_pcp_open(struct litmus_lock* l, void* __user config)
1291{
1292 struct task_struct *t = current;
1293 struct pcp_semaphore *sem = pcp_from_lock(l);
1294
1295 int cpu, eprio;
1296
1297 if (!is_realtime(t))
1298 /* we need to know the real-time priority */
1299 return -EPERM;
1300
1301 if (!config)
1302 cpu = get_partition(t);
1303 else if (get_user(cpu, (int*) config))
1304 return -EFAULT;
1305
1306 /* make sure the resource location matches */
1307 if (cpu != sem->on_cpu)
1308 return -EINVAL;
1309
1310 /* The regular PCP uses regular task priorites, not agent
1311 * priorities. */
1312 eprio = get_priority(t);
1313
1314 pcp_update_prio_ceiling(sem, eprio);
1315
1316 return 0;
1317}
1318
1319int pfp_pcp_close(struct litmus_lock* l)
1320{
1321 struct task_struct *t = current;
1322 struct pcp_semaphore *sem = pcp_from_lock(l);
1323
1324 int owner = 0;
1325
1326 preempt_disable();
1327
1328 if (sem->on_cpu == smp_processor_id())
1329 owner = sem->owner == t;
1330
1331 preempt_enable();
1332
1333 if (owner)
1334 pfp_pcp_unlock(l);
1335
1336 return 0;
1337}
1338
1339void pfp_pcp_free(struct litmus_lock* lock)
1340{
1341 kfree(pcp_from_lock(lock));
1342}
1343
1344
1345static struct litmus_lock_ops pfp_pcp_lock_ops = {
1346 .close = pfp_pcp_close,
1347 .lock = pfp_pcp_lock,
1348 .open = pfp_pcp_open,
1349 .unlock = pfp_pcp_unlock,
1350 .deallocate = pfp_pcp_free,
1351};
1352
1353
1354static struct litmus_lock* pfp_new_pcp(int on_cpu)
1355{
1356 struct pcp_semaphore* sem;
1357
1358 sem = kmalloc(sizeof(*sem), GFP_KERNEL);
1359 if (!sem)
1360 return NULL;
1361
1362 sem->litmus_lock.ops = &pfp_pcp_lock_ops;
1363 pcp_init_semaphore(sem, on_cpu);
1364
1365 return &sem->litmus_lock;
1366}
1367
1368/* ******************** DPCP support ********************** */
1369
1370struct dpcp_semaphore {
1371 struct litmus_lock litmus_lock;
1372 struct pcp_semaphore pcp;
1373 int owner_cpu;
1374};
1375
1376static inline struct dpcp_semaphore* dpcp_from_lock(struct litmus_lock* lock)
1377{
1378 return container_of(lock, struct dpcp_semaphore, litmus_lock);
1379}
1380
1381/* called with preemptions disabled */
1382static void pfp_migrate_to(int target_cpu)
1383{
1384 struct task_struct* t = current;
1385 pfp_domain_t *from;
1386
1387 if (get_partition(t) == target_cpu)
1388 return;
1389
1390 /* make sure target_cpu makes sense */
1391 BUG_ON(!cpu_online(target_cpu));
1392
1393 local_irq_disable();
1394
1395 from = task_pfp(t);
1396 raw_spin_lock(&from->slock);
1397
1398 /* Scheduled task should not be in any ready or release queue. Check
1399 * this while holding the lock to avoid RT mode transitions.*/
1400 BUG_ON(is_realtime(t) && is_queued(t));
1401
1402 /* switch partitions */
1403 tsk_rt(t)->task_params.cpu = target_cpu;
1404
1405 raw_spin_unlock(&from->slock);
1406
1407 /* Don't trace scheduler costs as part of
1408 * locking overhead. Scheduling costs are accounted for
1409 * explicitly. */
1410 TS_LOCK_SUSPEND;
1411
1412 local_irq_enable();
1413 preempt_enable_no_resched();
1414
1415 /* deschedule to be migrated */
1416 schedule();
1417
1418 /* we are now on the target processor */
1419 preempt_disable();
1420
1421 /* start recording costs again */
1422 TS_LOCK_RESUME;
1423
1424 BUG_ON(smp_processor_id() != target_cpu && is_realtime(t));
1425}
1426
1427int pfp_dpcp_lock(struct litmus_lock* l)
1428{
1429 struct task_struct* t = current;
1430 struct dpcp_semaphore *sem = dpcp_from_lock(l);
1431 int eprio = effective_agent_priority(get_priority(t));
1432 int from = get_partition(t);
1433 int to = sem->pcp.on_cpu;
1434
1435 if (!is_realtime(t))
1436 return -EPERM;
1437
1438 /* prevent nested lock accquisition */
1439 if (tsk_rt(t)->num_locks_held ||
1440 tsk_rt(t)->num_local_locks_held)
1441 return -EBUSY;
1442
1443 preempt_disable();
1444
1445 /* Priority-boost ourself *before* we suspend so that
1446 * our priority is boosted when we resume. */
1447
1448 boost_priority(t, get_priority(t));
1449
1450 pfp_migrate_to(to);
1451
1452 pcp_raise_ceiling(&sem->pcp, eprio);
1453
1454 /* yep, we got it => execute request */
1455 sem->owner_cpu = from;
1456
1457 preempt_enable();
1458
1459 tsk_rt(t)->num_locks_held++;
1460
1461 return 0;
1462}
1463
1464int pfp_dpcp_unlock(struct litmus_lock* l)
1465{
1466 struct task_struct *t = current;
1467 struct dpcp_semaphore *sem = dpcp_from_lock(l);
1468 int err = 0;
1469 int home;
1470
1471 preempt_disable();
1472
1473 if (sem->pcp.owner != t) {
1474 err = -EINVAL;
1475 goto out;
1476 }
1477
1478 /* The current owner should be executing on the correct CPU.
1479 *
1480 * FIXME: if the owner transitioned out of RT mode or is exiting, then
1481 * we it might have already been migrated away by the best-effort
1482 * scheduler and we just have to deal with it. This is currently not
1483 * supported. */
1484 BUG_ON(sem->pcp.on_cpu != smp_processor_id());
1485
1486 tsk_rt(t)->num_locks_held--;
1487
1488 home = sem->owner_cpu;
1489
1490 /* give it back */
1491 pcp_lower_ceiling(&sem->pcp);
1492
1493 /* we lose the benefit of priority boosting */
1494 unboost_priority(t);
1495
1496 pfp_migrate_to(home);
1497
1498out:
1499 preempt_enable();
1500
1501 return err;
1502}
1503
1504int pfp_dpcp_open(struct litmus_lock* l, void* __user config)
1505{
1506 struct task_struct *t = current;
1507 struct dpcp_semaphore *sem = dpcp_from_lock(l);
1508 int cpu, eprio;
1509
1510 if (!is_realtime(t))
1511 /* we need to know the real-time priority */
1512 return -EPERM;
1513
1514 if (get_user(cpu, (int*) config))
1515 return -EFAULT;
1516
1517 /* make sure the resource location matches */
1518 if (cpu != sem->pcp.on_cpu)
1519 return -EINVAL;
1520
1521 eprio = effective_agent_priority(get_priority(t));
1522
1523 pcp_update_prio_ceiling(&sem->pcp, eprio);
1524
1525 return 0;
1526}
1527
1528int pfp_dpcp_close(struct litmus_lock* l)
1529{
1530 struct task_struct *t = current;
1531 struct dpcp_semaphore *sem = dpcp_from_lock(l);
1532 int owner = 0;
1533
1534 preempt_disable();
1535
1536 if (sem->pcp.on_cpu == smp_processor_id())
1537 owner = sem->pcp.owner == t;
1538
1539 preempt_enable();
1540
1541 if (owner)
1542 pfp_dpcp_unlock(l);
1543
1544 return 0;
1545}
1546
1547void pfp_dpcp_free(struct litmus_lock* lock)
1548{
1549 kfree(dpcp_from_lock(lock));
1550}
1551
1552static struct litmus_lock_ops pfp_dpcp_lock_ops = {
1553 .close = pfp_dpcp_close,
1554 .lock = pfp_dpcp_lock,
1555 .open = pfp_dpcp_open,
1556 .unlock = pfp_dpcp_unlock,
1557 .deallocate = pfp_dpcp_free,
1558};
1559
1560static struct litmus_lock* pfp_new_dpcp(int on_cpu)
1561{
1562 struct dpcp_semaphore* sem;
1563
1564 sem = kmalloc(sizeof(*sem), GFP_KERNEL);
1565 if (!sem)
1566 return NULL;
1567
1568 sem->litmus_lock.ops = &pfp_dpcp_lock_ops;
1569 sem->owner_cpu = NO_CPU;
1570 pcp_init_semaphore(&sem->pcp, on_cpu);
1571
1572 return &sem->litmus_lock;
1573}
1574
1575
1576/* ******************** DFLP support ********************** */
1577
1578struct dflp_semaphore {
1579 struct litmus_lock litmus_lock;
1580
1581 /* current resource holder */
1582 struct task_struct *owner;
1583 int owner_cpu;
1584
1585 /* FIFO queue of waiting tasks */
1586 wait_queue_head_t wait;
1587
1588 /* where is the resource assigned to */
1589 int on_cpu;
1590};
1591
1592static inline struct dflp_semaphore* dflp_from_lock(struct litmus_lock* lock)
1593{
1594 return container_of(lock, struct dflp_semaphore, litmus_lock);
1595}
1596
1597int pfp_dflp_lock(struct litmus_lock* l)
1598{
1599 struct task_struct* t = current;
1600 struct dflp_semaphore *sem = dflp_from_lock(l);
1601 int from = get_partition(t);
1602 int to = sem->on_cpu;
1603 unsigned long flags;
1604 wait_queue_t wait;
1605 lt_t time_of_request;
1606
1607 if (!is_realtime(t))
1608 return -EPERM;
1609
1610 /* prevent nested lock accquisition */
1611 if (tsk_rt(t)->num_locks_held ||
1612 tsk_rt(t)->num_local_locks_held)
1613 return -EBUSY;
1614
1615 preempt_disable();
1616
1617 /* tie-break by this point in time */
1618 time_of_request = litmus_clock();
1619
1620 /* Priority-boost ourself *before* we suspend so that
1621 * our priority is boosted when we resume. */
1622 boost_priority(t, time_of_request);
1623
1624 pfp_migrate_to(to);
1625
1626 /* Now on the right CPU, preemptions still disabled. */
1627
1628 spin_lock_irqsave(&sem->wait.lock, flags);
1629
1630 if (sem->owner) {
1631 /* resource is not free => must suspend and wait */
1632
1633 init_waitqueue_entry(&wait, t);
1634
1635 /* FIXME: interruptible would be nice some day */
1636 set_task_state(t, TASK_UNINTERRUPTIBLE);
1637
1638 __add_wait_queue_tail_exclusive(&sem->wait, &wait);
1639
1640 TS_LOCK_SUSPEND;
1641
1642 /* release lock before sleeping */
1643 spin_unlock_irqrestore(&sem->wait.lock, flags);
1644
1645 /* We depend on the FIFO order. Thus, we don't need to recheck
1646 * when we wake up; we are guaranteed to have the lock since
1647 * there is only one wake up per release.
1648 */
1649
1650 preempt_enable_no_resched();
1651
1652 schedule();
1653
1654 preempt_disable();
1655
1656 TS_LOCK_RESUME;
1657
1658 /* Since we hold the lock, no other task will change
1659 * ->owner. We can thus check it without acquiring the spin
1660 * lock. */
1661 BUG_ON(sem->owner != t);
1662 } else {
1663 /* it's ours now */
1664 sem->owner = t;
1665
1666 spin_unlock_irqrestore(&sem->wait.lock, flags);
1667 }
1668
1669 sem->owner_cpu = from;
1670
1671 preempt_enable();
1672
1673 tsk_rt(t)->num_locks_held++;
1674
1675 return 0;
1676}
1677
1678int pfp_dflp_unlock(struct litmus_lock* l)
1679{
1680 struct task_struct *t = current, *next;
1681 struct dflp_semaphore *sem = dflp_from_lock(l);
1682 int err = 0;
1683 int home;
1684 unsigned long flags;
1685
1686 preempt_disable();
1687
1688 spin_lock_irqsave(&sem->wait.lock, flags);
1689
1690 if (sem->owner != t) {
1691 err = -EINVAL;
1692 spin_unlock_irqrestore(&sem->wait.lock, flags);
1693 goto out;
1694 }
1695
1696 /* check if there are jobs waiting for this resource */
1697 next = __waitqueue_remove_first(&sem->wait);
1698 if (next) {
1699 /* next becomes the resouce holder */
1700 sem->owner = next;
1701
1702 /* Wake up next. The waiting job is already priority-boosted. */
1703 wake_up_process(next);
1704 } else
1705 /* resource becomes available */
1706 sem->owner = NULL;
1707
1708 tsk_rt(t)->num_locks_held--;
1709
1710 home = sem->owner_cpu;
1711
1712 spin_unlock_irqrestore(&sem->wait.lock, flags);
1713
1714 /* we lose the benefit of priority boosting */
1715 unboost_priority(t);
1716
1717 pfp_migrate_to(home);
1718
1719out:
1720 preempt_enable();
1721
1722 return err;
1723}
1724
1725int pfp_dflp_open(struct litmus_lock* l, void* __user config)
1726{
1727 struct dflp_semaphore *sem = dflp_from_lock(l);
1728 int cpu;
1729
1730 if (get_user(cpu, (int*) config))
1731 return -EFAULT;
1732
1733 /* make sure the resource location matches */
1734 if (cpu != sem->on_cpu)
1735 return -EINVAL;
1736
1737 return 0;
1738}
1739
1740int pfp_dflp_close(struct litmus_lock* l)
1741{
1742 struct task_struct *t = current;
1743 struct dflp_semaphore *sem = dflp_from_lock(l);
1744 int owner = 0;
1745
1746 preempt_disable();
1747
1748 if (sem->on_cpu == smp_processor_id())
1749 owner = sem->owner == t;
1750
1751 preempt_enable();
1752
1753 if (owner)
1754 pfp_dflp_unlock(l);
1755
1756 return 0;
1757}
1758
1759void pfp_dflp_free(struct litmus_lock* lock)
1760{
1761 kfree(dflp_from_lock(lock));
1762}
1763
1764static struct litmus_lock_ops pfp_dflp_lock_ops = {
1765 .close = pfp_dflp_close,
1766 .lock = pfp_dflp_lock,
1767 .open = pfp_dflp_open,
1768 .unlock = pfp_dflp_unlock,
1769 .deallocate = pfp_dflp_free,
1770};
1771
1772static struct litmus_lock* pfp_new_dflp(int on_cpu)
1773{
1774 struct dflp_semaphore* sem;
1775
1776 sem = kmalloc(sizeof(*sem), GFP_KERNEL);
1777 if (!sem)
1778 return NULL;
1779
1780 sem->litmus_lock.ops = &pfp_dflp_lock_ops;
1781 sem->owner_cpu = NO_CPU;
1782 sem->owner = NULL;
1783 sem->on_cpu = on_cpu;
1784 init_waitqueue_head(&sem->wait);
1785
1786 return &sem->litmus_lock;
1787}
1788
1789
1790/* **** lock constructor **** */
1791
1792
1793static long pfp_allocate_lock(struct litmus_lock **lock, int type,
1794 void* __user config)
1795{
1796 int err = -ENXIO, cpu;
1797 struct srp_semaphore* srp;
1798
1799 /* P-FP currently supports the SRP for local resources and the FMLP
1800 * for global resources. */
1801 switch (type) {
1802 case FMLP_SEM:
1803 /* FIFO Mutex Locking Protocol */
1804 *lock = pfp_new_fmlp();
1805 if (*lock)
1806 err = 0;
1807 else
1808 err = -ENOMEM;
1809 break;
1810
1811 case MPCP_SEM:
1812 /* Multiprocesor Priority Ceiling Protocol */
1813 *lock = pfp_new_mpcp(0);
1814 if (*lock)
1815 err = 0;
1816 else
1817 err = -ENOMEM;
1818 break;
1819
1820 case MPCP_VS_SEM:
1821 /* Multiprocesor Priority Ceiling Protocol with virtual spinning */
1822 *lock = pfp_new_mpcp(1);
1823 if (*lock)
1824 err = 0;
1825 else
1826 err = -ENOMEM;
1827 break;
1828
1829 case DPCP_SEM:
1830 /* Distributed Priority Ceiling Protocol */
1831 if (get_user(cpu, (int*) config))
1832 return -EFAULT;
1833
1834 if (!cpu_online(cpu))
1835 return -EINVAL;
1836
1837 *lock = pfp_new_dpcp(cpu);
1838 if (*lock)
1839 err = 0;
1840 else
1841 err = -ENOMEM;
1842 break;
1843
1844 case DFLP_SEM:
1845 /* Distributed FIFO Locking Protocol */
1846 if (get_user(cpu, (int*) config))
1847 return -EFAULT;
1848
1849 if (!cpu_online(cpu))
1850 return -EINVAL;
1851
1852 *lock = pfp_new_dflp(cpu);
1853 if (*lock)
1854 err = 0;
1855 else
1856 err = -ENOMEM;
1857 break;
1858
1859 case SRP_SEM:
1860 /* Baker's Stack Resource Policy */
1861 srp = allocate_srp_semaphore();
1862 if (srp) {
1863 *lock = &srp->litmus_lock;
1864 err = 0;
1865 } else
1866 err = -ENOMEM;
1867 break;
1868
1869 case PCP_SEM:
1870 /* Priority Ceiling Protocol */
1871 if (!config)
1872 cpu = get_partition(current);
1873 else if (get_user(cpu, (int*) config))
1874 return -EFAULT;
1875
1876 if (!cpu_online(cpu))
1877 return -EINVAL;
1878
1879 *lock = pfp_new_pcp(cpu);
1880 if (*lock)
1881 err = 0;
1882 else
1883 err = -ENOMEM;
1884 break;
1885 };
1886
1887 return err;
1888}
1889
1890#endif
1891
1892static long pfp_admit_task(struct task_struct* tsk)
1893{
1894 if (task_cpu(tsk) == tsk->rt_param.task_params.cpu &&
1895#ifdef CONFIG_RELEASE_MASTER
1896 /* don't allow tasks on release master CPU */
1897 task_cpu(tsk) != remote_dom(task_cpu(tsk))->release_master &&
1898#endif
1899 litmus_is_valid_fixed_prio(get_priority(tsk)))
1900 return 0;
1901 else
1902 return -EINVAL;
1903}
1904
1905static struct domain_proc_info pfp_domain_proc_info;
1906static long pfp_get_domain_proc_info(struct domain_proc_info **ret)
1907{
1908 *ret = &pfp_domain_proc_info;
1909 return 0;
1910}
1911
1912static void pfp_setup_domain_proc(void)
1913{
1914 int i, cpu;
1915 int release_master =
1916#ifdef CONFIG_RELEASE_MASTER
1917 atomic_read(&release_master_cpu);
1918#else
1919 NO_CPU;
1920#endif
1921 int num_rt_cpus = num_online_cpus() - (release_master != NO_CPU);
1922 struct cd_mapping *cpu_map, *domain_map;
1923
1924 memset(&pfp_domain_proc_info, sizeof(pfp_domain_proc_info), 0);
1925 init_domain_proc_info(&pfp_domain_proc_info, num_rt_cpus, num_rt_cpus);
1926 pfp_domain_proc_info.num_cpus = num_rt_cpus;
1927 pfp_domain_proc_info.num_domains = num_rt_cpus;
1928 for (cpu = 0, i = 0; cpu < num_online_cpus(); ++cpu) {
1929 if (cpu == release_master)
1930 continue;
1931 cpu_map = &pfp_domain_proc_info.cpu_to_domains[i];
1932 domain_map = &pfp_domain_proc_info.domain_to_cpus[i];
1933
1934 cpu_map->id = cpu;
1935 domain_map->id = i; /* enumerate w/o counting the release master */
1936 cpumask_set_cpu(i, cpu_map->mask);
1937 cpumask_set_cpu(cpu, domain_map->mask);
1938 ++i;
1939 }
1940}
1941
1942static long pfp_activate_plugin(void)
1943{
1944#if defined(CONFIG_RELEASE_MASTER) || defined(CONFIG_LITMUS_LOCKING)
1945 int cpu;
1946#endif
1947
1948#ifdef CONFIG_RELEASE_MASTER
1949 for_each_online_cpu(cpu) {
1950 remote_dom(cpu)->release_master = atomic_read(&release_master_cpu);
1951 }
1952#endif
1953
1954#ifdef CONFIG_LITMUS_LOCKING
1955 get_srp_prio = pfp_get_srp_prio;
1956
1957 for_each_online_cpu(cpu) {
1958 init_waitqueue_head(&per_cpu(mpcpvs_vspin_wait, cpu));
1959 per_cpu(mpcpvs_vspin, cpu) = NULL;
1960
1961 pcp_init_state(&per_cpu(pcp_state, cpu));
1962 pfp_doms[cpu] = remote_pfp(cpu);
1963 per_cpu(fmlp_timestamp,cpu) = 0;
1964 }
1965
1966#endif
1967
1968 pfp_setup_domain_proc();
1969
1970 return 0;
1971}
1972
1973static long pfp_deactivate_plugin(void)
1974{
1975 destroy_domain_proc_info(&pfp_domain_proc_info);
1976 return 0;
1977}
1978
1979/* Plugin object */
1980static struct sched_plugin pfp_plugin __cacheline_aligned_in_smp = {
1981 .plugin_name = "P-FP",
1982 .task_new = pfp_task_new,
1983 .complete_job = complete_job,
1984 .task_exit = pfp_task_exit,
1985 .schedule = pfp_schedule,
1986 .task_wake_up = pfp_task_wake_up,
1987 .task_block = pfp_task_block,
1988 .admit_task = pfp_admit_task,
1989 .activate_plugin = pfp_activate_plugin,
1990 .deactivate_plugin = pfp_deactivate_plugin,
1991 .get_domain_proc_info = pfp_get_domain_proc_info,
1992#ifdef CONFIG_LITMUS_LOCKING
1993 .allocate_lock = pfp_allocate_lock,
1994 .finish_switch = pfp_finish_switch,
1995#endif
1996};
1997
1998
1999static int __init init_pfp(void)
2000{
2001 int i;
2002
2003 /* We do not really want to support cpu hotplug, do we? ;)
2004 * However, if we are so crazy to do so,
2005 * we cannot use num_online_cpu()
2006 */
2007 for (i = 0; i < num_online_cpus(); i++) {
2008 pfp_domain_init(remote_pfp(i), i);
2009 }
2010 return register_sched_plugin(&pfp_plugin);
2011}
2012
2013module_init(init_pfp);