aboutsummaryrefslogtreecommitdiffstats
path: root/litmus
diff options
context:
space:
mode:
authorBjoern Brandenburg <bbb@mpi-sws.org>2015-08-09 07:18:55 -0400
committerBjoern Brandenburg <bbb@mpi-sws.org>2017-05-26 17:12:41 -0400
commitbda81b58bc0e67358d05eb096cc19a2cc22d228a (patch)
tree3708726335317dcaa0b31e1ef31866c3a639fac2 /litmus
parentb410e1d8a4699e4a1c1edc0fc7d442032e1af7da (diff)
Add P-FP scheduler plugin
P-FP: fix wrong memset() P-FP: use sched_trace_last_suspension_as_completion() P-FP: use inferred_sporadic_job_release_at() P-FP: include np.h P-FP: improve debug tracing
Diffstat (limited to 'litmus')
-rw-r--r--litmus/Makefile4
-rw-r--r--litmus/fp_common.c17
-rw-r--r--litmus/sched_pfp.c2047
3 files changed, 2062 insertions, 6 deletions
diff --git a/litmus/Makefile b/litmus/Makefile
index c7bf0af79764..5594d4e86cf1 100644
--- a/litmus/Makefile
+++ b/litmus/Makefile
@@ -20,7 +20,9 @@ obj-y = sched_plugin.o litmus.o \
20 ctrldev.o \ 20 ctrldev.o \
21 uncachedev.o \ 21 uncachedev.o \
22 sched_gsn_edf.o \ 22 sched_gsn_edf.o \
23 sched_psn_edf.o 23 sched_psn_edf.o \
24 sched_pfp.o
25
24 26
25 27
26obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o 28obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o
diff --git a/litmus/fp_common.c b/litmus/fp_common.c
index 242542a510d3..595c7b8e561d 100644
--- a/litmus/fp_common.c
+++ b/litmus/fp_common.c
@@ -33,7 +33,6 @@ int fp_higher_prio(struct task_struct* first,
33 return 0; 33 return 0;
34 } 34 }
35 35
36
37 /* check for NULL tasks */ 36 /* check for NULL tasks */
38 if (!first || !second) 37 if (!first || !second)
39 return first && !second; 38 return first && !second;
@@ -51,6 +50,15 @@ int fp_higher_prio(struct task_struct* first,
51 if (unlikely(second->rt_param.inh_task)) 50 if (unlikely(second->rt_param.inh_task))
52 second_task = second->rt_param.inh_task; 51 second_task = second->rt_param.inh_task;
53 52
53 /* Comparisons to itself are only possible with
54 * priority inheritance when svc_preempt interrupt just
55 * before scheduling (and everything that could follow in the
56 * ready queue). Always favour the original job, as that one will just
57 * suspend itself to resolve this.
58 */
59 if(first_task == second_task)
60 return first_task == first;
61
54 /* Check for priority boosting. Tie-break by start of boosting. 62 /* Check for priority boosting. Tie-break by start of boosting.
55 */ 63 */
56 if (unlikely(is_priority_boosted(first_task))) { 64 if (unlikely(is_priority_boosted(first_task))) {
@@ -66,11 +74,10 @@ int fp_higher_prio(struct task_struct* first,
66 /* second_task is boosted, first is not*/ 74 /* second_task is boosted, first is not*/
67 return 0; 75 return 0;
68 76
69#endif 77#else
70 78 /* No locks, no priority inheritance, no comparisons to itself */
71 /* Comparisons to itself are not expected; priority inheritance
72 * should also not cause this to happen. */
73 BUG_ON(first_task == second_task); 79 BUG_ON(first_task == second_task);
80#endif
74 81
75 if (get_priority(first_task) < get_priority(second_task)) 82 if (get_priority(first_task) < get_priority(second_task))
76 return 1; 83 return 1;
diff --git a/litmus/sched_pfp.c b/litmus/sched_pfp.c
new file mode 100644
index 000000000000..eba5c46ac797
--- /dev/null
+++ b/litmus/sched_pfp.c
@@ -0,0 +1,2047 @@
1/*
2 * litmus/sched_pfp.c
3 *
4 * Implementation of partitioned fixed-priority scheduling.
5 * Based on PSN-EDF.
6 */
7
8#include <linux/percpu.h>
9#include <linux/sched.h>
10#include <linux/list.h>
11#include <linux/spinlock.h>
12#include <linux/module.h>
13
14#include <litmus/litmus.h>
15#include <litmus/wait.h>
16#include <litmus/jobs.h>
17#include <litmus/preempt.h>
18#include <litmus/fp_common.h>
19#include <litmus/sched_plugin.h>
20#include <litmus/sched_trace.h>
21#include <litmus/trace.h>
22#include <litmus/budget.h>
23#include <litmus/np.h>
24
25/* to set up domain/cpu mappings */
26#include <litmus/litmus_proc.h>
27#include <linux/uaccess.h>
28
29
30typedef struct {
31 rt_domain_t domain;
32 struct fp_prio_queue ready_queue;
33 int cpu;
34 struct task_struct* scheduled; /* only RT tasks */
35/*
36 * scheduling lock slock
37 * protects the domain and serializes scheduling decisions
38 */
39#define slock domain.ready_lock
40
41} pfp_domain_t;
42
43DEFINE_PER_CPU(pfp_domain_t, pfp_domains);
44
45pfp_domain_t* pfp_doms[NR_CPUS];
46
47#define local_pfp (this_cpu_ptr(&pfp_domains))
48#define remote_dom(cpu) (&per_cpu(pfp_domains, cpu).domain)
49#define remote_pfp(cpu) (&per_cpu(pfp_domains, cpu))
50#define task_dom(task) remote_dom(get_partition(task))
51#define task_pfp(task) remote_pfp(get_partition(task))
52
53
54#ifdef CONFIG_LITMUS_LOCKING
55DEFINE_PER_CPU(uint64_t,fmlp_timestamp);
56#endif
57
58/* we assume the lock is being held */
59static void preempt(pfp_domain_t *pfp)
60{
61 preempt_if_preemptable(pfp->scheduled, pfp->cpu);
62}
63
64static unsigned int priority_index(struct task_struct* t)
65{
66#ifdef CONFIG_LITMUS_LOCKING
67 if (unlikely(t->rt_param.inh_task))
68 /* use effective priority */
69 t = t->rt_param.inh_task;
70
71 if (is_priority_boosted(t)) {
72 /* zero is reserved for priority-boosted tasks */
73 return 0;
74 } else
75#endif
76 return get_priority(t);
77}
78
79static void pfp_release_jobs(rt_domain_t* rt, struct bheap* tasks)
80{
81 pfp_domain_t *pfp = container_of(rt, pfp_domain_t, domain);
82 unsigned long flags;
83 struct task_struct* t;
84 struct bheap_node* hn;
85
86 raw_spin_lock_irqsave(&pfp->slock, flags);
87
88 while (!bheap_empty(tasks)) {
89 hn = bheap_take(fp_ready_order, tasks);
90 t = bheap2task(hn);
91 TRACE_TASK(t, "released (part:%d prio:%d)\n",
92 get_partition(t), get_priority(t));
93 fp_prio_add(&pfp->ready_queue, t, priority_index(t));
94 }
95
96 /* do we need to preempt? */
97 if (fp_higher_prio(fp_prio_peek(&pfp->ready_queue), pfp->scheduled)) {
98 TRACE_CUR("preempted by new release\n");
99 preempt(pfp);
100 }
101
102 raw_spin_unlock_irqrestore(&pfp->slock, flags);
103}
104
105static void pfp_preempt_check(pfp_domain_t *pfp)
106{
107 if (fp_higher_prio(fp_prio_peek(&pfp->ready_queue), pfp->scheduled))
108 preempt(pfp);
109}
110
111static void pfp_domain_init(pfp_domain_t* pfp,
112 int cpu)
113{
114 fp_domain_init(&pfp->domain, NULL, pfp_release_jobs);
115 pfp->cpu = cpu;
116 pfp->scheduled = NULL;
117 fp_prio_queue_init(&pfp->ready_queue);
118}
119
120static void requeue(struct task_struct* t, pfp_domain_t *pfp)
121{
122 tsk_rt(t)->completed = 0;
123 if (is_released(t, litmus_clock())) {
124 TRACE_TASK(t, "add to ready\n");
125 fp_prio_add(&pfp->ready_queue, t, priority_index(t));
126 } else
127 add_release(&pfp->domain, t); /* it has got to wait */
128}
129
130static void job_completion(struct task_struct* t, int forced)
131{
132 sched_trace_task_completion(t, forced);
133 TRACE_TASK(t, "job_completion(forced=%d).\n", forced);
134
135 tsk_rt(t)->completed = 0;
136 prepare_for_next_period(t);
137 if (is_released(t, litmus_clock()))
138 sched_trace_task_release(t);
139}
140
141static struct task_struct* pfp_schedule(struct task_struct * prev)
142{
143 pfp_domain_t* pfp = local_pfp;
144 struct task_struct* next;
145
146 int out_of_time, sleep, preempt, np, exists, blocks, resched, migrate;
147
148 raw_spin_lock(&pfp->slock);
149
150 /* sanity checking
151 * differently from gedf, when a task exits (dead)
152 * pfp->schedule may be null and prev _is_ realtime
153 */
154 BUG_ON(pfp->scheduled && pfp->scheduled != prev);
155 BUG_ON(pfp->scheduled && !is_realtime(prev));
156
157 /* (0) Determine state */
158 exists = pfp->scheduled != NULL;
159 blocks = exists && !is_current_running();
160 out_of_time = exists && budget_enforced(pfp->scheduled)
161 && budget_exhausted(pfp->scheduled);
162 np = exists && is_np(pfp->scheduled);
163 sleep = exists && is_completed(pfp->scheduled);
164 migrate = exists && get_partition(pfp->scheduled) != pfp->cpu;
165 preempt = !blocks && (migrate || fp_preemption_needed(&pfp->ready_queue, prev));
166
167 /* If we need to preempt do so.
168 * The following checks set resched to 1 in case of special
169 * circumstances.
170 */
171 resched = preempt;
172
173 /* If a task blocks we have no choice but to reschedule.
174 */
175 if (blocks)
176 resched = 1;
177
178 /* Request a sys_exit_np() call if we would like to preempt but cannot.
179 * Multiple calls to request_exit_np() don't hurt.
180 */
181 if (np && (out_of_time || preempt || sleep))
182 request_exit_np(pfp->scheduled);
183
184 /* Any task that is preemptable and either exhausts its execution
185 * budget or wants to sleep completes. We may have to reschedule after
186 * this.
187 */
188 if (!np && (out_of_time || sleep)) {
189 job_completion(pfp->scheduled, !sleep);
190 resched = 1;
191 }
192
193 if (exists)
194 TRACE_TASK(pfp->scheduled, "state:%d blocks:%d oot:%d np:%d sleep:%d "
195 "mig:%d preempt:%d resched:%d on_rq:%d on_cpu:%d\n",
196 pfp->scheduled->state,
197 blocks, out_of_time, np, sleep, migrate, preempt, resched,
198 pfp->scheduled->on_rq, pfp->scheduled->on_cpu);
199
200 /* The final scheduling decision. Do we need to switch for some reason?
201 * Switch if we are in RT mode and have no task or if we need to
202 * resched.
203 */
204 next = NULL;
205 if ((!np || blocks) && (resched || !exists)) {
206 /* When preempting a task that does not block, then
207 * re-insert it into either the ready queue or the
208 * release queue (if it completed). requeue() picks
209 * the appropriate queue.
210 */
211 if (pfp->scheduled && !blocks && !migrate)
212 requeue(pfp->scheduled, pfp);
213 next = fp_prio_take(&pfp->ready_queue);
214 if (next == prev) {
215 struct task_struct *t = fp_prio_peek(&pfp->ready_queue);
216 TRACE_TASK(next, "next==prev sleep=%d oot=%d np=%d preempt=%d migrate=%d "
217 "boost=%d empty=%d prio-idx=%u prio=%u\n",
218 sleep, out_of_time, np, preempt, migrate,
219 is_priority_boosted(next),
220 t == NULL,
221 priority_index(next),
222 get_priority(next));
223 if (t)
224 TRACE_TASK(t, "waiter boost=%d prio-idx=%u prio=%u\n",
225 is_priority_boosted(t),
226 priority_index(t),
227 get_priority(t));
228 }
229 /* If preempt is set, we should not see the same task again. */
230 BUG_ON(preempt && next == prev);
231 /* Similarly, if preempt is set, then next may not be NULL,
232 * unless it's a migration. */
233 BUG_ON(preempt && !migrate && next == NULL);
234 } else
235 /* Only override Linux scheduler if we have a real-time task
236 * scheduled that needs to continue.
237 */
238 if (exists)
239 next = prev;
240
241 if (next) {
242 TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
243 } else if (exists) {
244 TRACE("becoming idle at %llu\n", litmus_clock());
245 }
246
247 pfp->scheduled = next;
248 sched_state_task_picked();
249 raw_spin_unlock(&pfp->slock);
250
251 return next;
252}
253
254#ifdef CONFIG_LITMUS_LOCKING
255
256/* prev is no longer scheduled --- see if it needs to migrate */
257static void pfp_finish_switch(struct task_struct *prev)
258{
259 pfp_domain_t *to;
260
261 if (is_realtime(prev))
262 TRACE_TASK(prev, "state:%d on_rq:%d on_cpu:%d\n",
263 prev->state, prev->on_rq, prev->on_cpu);
264
265 if (is_realtime(prev) &&
266 prev->state == TASK_RUNNING &&
267 get_partition(prev) != smp_processor_id()) {
268 TRACE_TASK(prev, "needs to migrate from P%d to P%d\n",
269 smp_processor_id(), get_partition(prev));
270
271 to = task_pfp(prev);
272
273 raw_spin_lock(&to->slock);
274
275 TRACE_TASK(prev, "adding to queue on P%d\n", to->cpu);
276 requeue(prev, to);
277 if (fp_preemption_needed(&to->ready_queue, to->scheduled))
278 preempt(to);
279
280 raw_spin_unlock(&to->slock);
281
282 }
283}
284
285#endif
286
287/* Prepare a task for running in RT mode
288 */
289static void pfp_task_new(struct task_struct * t, int on_rq, int is_scheduled)
290{
291 pfp_domain_t* pfp = task_pfp(t);
292 unsigned long flags;
293
294 TRACE_TASK(t, "P-FP: task new, cpu = %d\n",
295 t->rt_param.task_params.cpu);
296
297 /* setup job parameters */
298 release_at(t, litmus_clock());
299
300 raw_spin_lock_irqsave(&pfp->slock, flags);
301 if (is_scheduled) {
302 /* there shouldn't be anything else running at the time */
303 BUG_ON(pfp->scheduled);
304 pfp->scheduled = t;
305 } else if (on_rq) {
306 requeue(t, pfp);
307 /* maybe we have to reschedule */
308 pfp_preempt_check(pfp);
309 }
310 raw_spin_unlock_irqrestore(&pfp->slock, flags);
311}
312
313static void pfp_task_wake_up(struct task_struct *task)
314{
315 unsigned long flags;
316 pfp_domain_t* pfp = task_pfp(task);
317 lt_t now;
318
319 TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
320 raw_spin_lock_irqsave(&pfp->slock, flags);
321
322#ifdef CONFIG_LITMUS_LOCKING
323 /* Should only be queued when processing a fake-wake up due to a
324 * migration-related state change. */
325 if (unlikely(is_queued(task))) {
326 TRACE_TASK(task, "WARNING: waking task still queued. Is this right?\n");
327 goto out_unlock;
328 }
329#else
330 BUG_ON(is_queued(task));
331#endif
332 now = litmus_clock();
333 if (is_sporadic(task) && is_tardy(task, now)
334#ifdef CONFIG_LITMUS_LOCKING
335 /* We need to take suspensions because of semaphores into
336 * account! If a job resumes after being suspended due to acquiring
337 * a semaphore, it should never be treated as a new job release.
338 */
339 && !is_priority_boosted(task)
340#endif
341 ) {
342 inferred_sporadic_job_release_at(task, now);
343 }
344
345 /* Only add to ready queue if it is not the currently-scheduled
346 * task. This could be the case if a task was woken up concurrently
347 * on a remote CPU before the executing CPU got around to actually
348 * de-scheduling the task, i.e., wake_up() raced with schedule()
349 * and won. Also, don't requeue if it is still queued, which can
350 * happen under the DPCP due wake-ups racing with migrations.
351 */
352 if (pfp->scheduled != task) {
353 requeue(task, pfp);
354 pfp_preempt_check(pfp);
355 }
356
357#ifdef CONFIG_LITMUS_LOCKING
358out_unlock:
359#endif
360 raw_spin_unlock_irqrestore(&pfp->slock, flags);
361 TRACE_TASK(task, "wake up done\n");
362}
363
364static void pfp_task_block(struct task_struct *t)
365{
366 /* only running tasks can block, thus t is in no queue */
367 TRACE_TASK(t, "block at %llu, state=%d\n", litmus_clock(), t->state);
368
369 BUG_ON(!is_realtime(t));
370
371 /* If this task blocked normally, it shouldn't be queued. The exception is
372 * if this is a simulated block()/wakeup() pair from the pull-migration code path.
373 * This should only happen if the DPCP is being used.
374 */
375#ifdef CONFIG_LITMUS_LOCKING
376 if (unlikely(is_queued(t)))
377 TRACE_TASK(t, "WARNING: blocking task still queued. Is this right?\n");
378#else
379 BUG_ON(is_queued(t));
380#endif
381}
382
383static void pfp_task_exit(struct task_struct * t)
384{
385 unsigned long flags;
386 pfp_domain_t* pfp = task_pfp(t);
387 rt_domain_t* dom;
388
389 raw_spin_lock_irqsave(&pfp->slock, flags);
390 if (is_queued(t)) {
391 BUG(); /* This currently doesn't work. */
392 /* dequeue */
393 dom = task_dom(t);
394 remove(dom, t);
395 }
396 if (pfp->scheduled == t) {
397 pfp->scheduled = NULL;
398 preempt(pfp);
399 }
400 TRACE_TASK(t, "RIP, now reschedule\n");
401
402 raw_spin_unlock_irqrestore(&pfp->slock, flags);
403}
404
405#ifdef CONFIG_LITMUS_LOCKING
406
407#include <litmus/fdso.h>
408#include <litmus/srp.h>
409
410static void fp_dequeue(pfp_domain_t* pfp, struct task_struct* t)
411{
412 BUG_ON(pfp->scheduled == t && is_queued(t));
413 if (is_queued(t))
414 fp_prio_remove(&pfp->ready_queue, t, priority_index(t));
415}
416
417static void fp_set_prio_inh(pfp_domain_t* pfp, struct task_struct* t,
418 struct task_struct* prio_inh)
419{
420 int requeue;
421
422 if (!t || t->rt_param.inh_task == prio_inh) {
423 /* no update required */
424 if (t)
425 TRACE_TASK(t, "no prio-inh update required\n");
426 return;
427 }
428
429 requeue = is_queued(t);
430 TRACE_TASK(t, "prio-inh: is_queued:%d\n", requeue);
431
432 if (requeue)
433 /* first remove */
434 fp_dequeue(pfp, t);
435
436 t->rt_param.inh_task = prio_inh;
437
438 if (requeue)
439 /* add again to the right queue */
440 fp_prio_add(&pfp->ready_queue, t, priority_index(t));
441}
442
443static int effective_agent_priority(int prio)
444{
445 /* make sure agents have higher priority */
446 return prio - LITMUS_MAX_PRIORITY;
447}
448
449static lt_t prio_point(int eprio)
450{
451 /* make sure we have non-negative prio points */
452 return eprio + LITMUS_MAX_PRIORITY;
453}
454
455static void boost_priority(struct task_struct* t, lt_t priority_point)
456{
457 unsigned long flags;
458 pfp_domain_t* pfp = task_pfp(t);
459
460 raw_spin_lock_irqsave(&pfp->slock, flags);
461
462
463 TRACE_TASK(t, "priority boosted at %llu\n", litmus_clock());
464
465 tsk_rt(t)->priority_boosted = 1;
466 /* tie-break by protocol-specific priority point */
467 tsk_rt(t)->boost_start_time = priority_point;
468
469 /* Priority boosting currently only takes effect for already-scheduled
470 * tasks. This is sufficient since priority boosting only kicks in as
471 * part of lock acquisitions. */
472 BUG_ON(pfp->scheduled != t);
473
474 raw_spin_unlock_irqrestore(&pfp->slock, flags);
475}
476
477static void unboost_priority(struct task_struct* t)
478{
479 unsigned long flags;
480 pfp_domain_t* pfp = task_pfp(t);
481
482 raw_spin_lock_irqsave(&pfp->slock, flags);
483
484 /* Assumption: this only happens when the job is scheduled.
485 * Exception: If t transitioned to non-real-time mode, we no longer
486 * care abou tit. */
487 BUG_ON(pfp->scheduled != t && is_realtime(t));
488
489 TRACE_TASK(t, "priority restored at %llu\n", litmus_clock());
490
491 tsk_rt(t)->priority_boosted = 0;
492 tsk_rt(t)->boost_start_time = 0;
493
494 /* check if this changes anything */
495 if (fp_preemption_needed(&pfp->ready_queue, pfp->scheduled))
496 preempt(pfp);
497
498 raw_spin_unlock_irqrestore(&pfp->slock, flags);
499}
500
501/* ******************** SRP support ************************ */
502
503static unsigned int pfp_get_srp_prio(struct task_struct* t)
504{
505 return get_priority(t);
506}
507
508/* ******************** FMLP support ********************** */
509
510struct fmlp_semaphore {
511 struct litmus_lock litmus_lock;
512
513 /* current resource holder */
514 struct task_struct *owner;
515
516 /* FIFO queue of waiting tasks */
517 wait_queue_head_t wait;
518};
519
520static inline struct fmlp_semaphore* fmlp_from_lock(struct litmus_lock* lock)
521{
522 return container_of(lock, struct fmlp_semaphore, litmus_lock);
523}
524
525static inline lt_t
526fmlp_clock(void)
527{
528 return (lt_t) this_cpu_inc_return(fmlp_timestamp);
529}
530
531int pfp_fmlp_lock(struct litmus_lock* l)
532{
533 struct task_struct* t = current;
534 struct fmlp_semaphore *sem = fmlp_from_lock(l);
535 wait_queue_t wait;
536 unsigned long flags;
537 lt_t time_of_request;
538
539 if (!is_realtime(t))
540 return -EPERM;
541
542 /* prevent nested lock acquisition --- not supported by FMLP */
543 if (tsk_rt(t)->num_locks_held ||
544 tsk_rt(t)->num_local_locks_held)
545 return -EBUSY;
546
547 spin_lock_irqsave(&sem->wait.lock, flags);
548
549 /* tie-break by this point in time */
550 time_of_request = fmlp_clock();
551
552 /* Priority-boost ourself *before* we suspend so that
553 * our priority is boosted when we resume. */
554 boost_priority(t, time_of_request);
555
556 if (sem->owner) {
557 /* resource is not free => must suspend and wait */
558
559 init_waitqueue_entry(&wait, t);
560
561 /* FIXME: interruptible would be nice some day */
562 set_task_state(t, TASK_UNINTERRUPTIBLE);
563
564 __add_wait_queue_tail_exclusive(&sem->wait, &wait);
565
566 TS_LOCK_SUSPEND;
567
568 /* release lock before sleeping */
569 spin_unlock_irqrestore(&sem->wait.lock, flags);
570
571 /* We depend on the FIFO order. Thus, we don't need to recheck
572 * when we wake up; we are guaranteed to have the lock since
573 * there is only one wake up per release.
574 */
575
576 schedule();
577
578 TS_LOCK_RESUME;
579
580 /* Since we hold the lock, no other task will change
581 * ->owner. We can thus check it without acquiring the spin
582 * lock. */
583 BUG_ON(sem->owner != t);
584 } else {
585 /* it's ours now */
586 sem->owner = t;
587
588 spin_unlock_irqrestore(&sem->wait.lock, flags);
589 }
590
591 tsk_rt(t)->num_locks_held++;
592
593 return 0;
594}
595
596int pfp_fmlp_unlock(struct litmus_lock* l)
597{
598 struct task_struct *t = current, *next = NULL;
599 struct fmlp_semaphore *sem = fmlp_from_lock(l);
600 unsigned long flags;
601 int err = 0;
602
603 preempt_disable();
604
605 spin_lock_irqsave(&sem->wait.lock, flags);
606
607 if (sem->owner != t) {
608 err = -EINVAL;
609 goto out;
610 }
611
612 tsk_rt(t)->num_locks_held--;
613
614 /* we lose the benefit of priority boosting */
615
616 unboost_priority(t);
617
618 /* check if there are jobs waiting for this resource */
619 next = __waitqueue_remove_first(&sem->wait);
620 sem->owner = next;
621
622out:
623 spin_unlock_irqrestore(&sem->wait.lock, flags);
624
625 /* Wake up next. The waiting job is already priority-boosted. */
626 if(next) {
627 wake_up_process(next);
628 }
629
630 preempt_enable();
631
632 return err;
633}
634
635int pfp_fmlp_close(struct litmus_lock* l)
636{
637 struct task_struct *t = current;
638 struct fmlp_semaphore *sem = fmlp_from_lock(l);
639 unsigned long flags;
640
641 int owner;
642
643 spin_lock_irqsave(&sem->wait.lock, flags);
644
645 owner = sem->owner == t;
646
647 spin_unlock_irqrestore(&sem->wait.lock, flags);
648
649 if (owner)
650 pfp_fmlp_unlock(l);
651
652 return 0;
653}
654
655void pfp_fmlp_free(struct litmus_lock* lock)
656{
657 kfree(fmlp_from_lock(lock));
658}
659
660static struct litmus_lock_ops pfp_fmlp_lock_ops = {
661 .close = pfp_fmlp_close,
662 .lock = pfp_fmlp_lock,
663 .unlock = pfp_fmlp_unlock,
664 .deallocate = pfp_fmlp_free,
665};
666
667static struct litmus_lock* pfp_new_fmlp(void)
668{
669 struct fmlp_semaphore* sem;
670
671 sem = kmalloc(sizeof(*sem), GFP_KERNEL);
672 if (!sem)
673 return NULL;
674
675 sem->owner = NULL;
676 init_waitqueue_head(&sem->wait);
677 sem->litmus_lock.ops = &pfp_fmlp_lock_ops;
678
679 return &sem->litmus_lock;
680}
681
682/* ******************** MPCP support ********************** */
683
684struct mpcp_semaphore {
685 struct litmus_lock litmus_lock;
686
687 /* current resource holder */
688 struct task_struct *owner;
689
690 /* priority queue of waiting tasks */
691 wait_queue_head_t wait;
692
693 /* priority ceiling per cpu */
694 unsigned int prio_ceiling[NR_CPUS];
695
696 /* should jobs spin "virtually" for this resource? */
697 int vspin;
698};
699
700#define OMEGA_CEILING UINT_MAX
701
702/* Since jobs spin "virtually" while waiting to acquire a lock,
703 * they first must aquire a local per-cpu resource.
704 */
705static DEFINE_PER_CPU(wait_queue_head_t, mpcpvs_vspin_wait);
706static DEFINE_PER_CPU(struct task_struct*, mpcpvs_vspin);
707
708/* called with preemptions off <=> no local modifications */
709static void mpcp_vspin_enter(void)
710{
711 struct task_struct* t = current;
712
713 while (1) {
714 if (this_cpu_read(mpcpvs_vspin) == NULL) {
715 /* good, we get to issue our request */
716 this_cpu_write(mpcpvs_vspin, t);
717 break;
718 } else {
719 /* some job is spinning => enqueue in request queue */
720 prio_wait_queue_t wait;
721 wait_queue_head_t* vspin = this_cpu_ptr(&mpcpvs_vspin_wait);
722 unsigned long flags;
723
724 /* ordered by regular priority */
725 init_prio_waitqueue_entry(&wait, t, prio_point(get_priority(t)));
726
727 spin_lock_irqsave(&vspin->lock, flags);
728
729 set_task_state(t, TASK_UNINTERRUPTIBLE);
730
731 __add_wait_queue_prio_exclusive(vspin, &wait);
732
733 spin_unlock_irqrestore(&vspin->lock, flags);
734
735 TS_LOCK_SUSPEND;
736
737 preempt_enable_no_resched();
738
739 schedule();
740
741 preempt_disable();
742
743 TS_LOCK_RESUME;
744 /* Recheck if we got it --- some higher-priority process might
745 * have swooped in. */
746 }
747 }
748 /* ok, now it is ours */
749}
750
751/* called with preemptions off */
752static void mpcp_vspin_exit(void)
753{
754 struct task_struct* t = current, *next;
755 unsigned long flags;
756 wait_queue_head_t* vspin = this_cpu_ptr(&mpcpvs_vspin_wait);
757
758 BUG_ON(this_cpu_read(mpcpvs_vspin) != t);
759
760 /* no spinning job */
761 this_cpu_write(mpcpvs_vspin, NULL);
762
763 /* see if anyone is waiting for us to stop "spinning" */
764 spin_lock_irqsave(&vspin->lock, flags);
765 next = __waitqueue_remove_first(vspin);
766
767 if (next)
768 wake_up_process(next);
769
770 spin_unlock_irqrestore(&vspin->lock, flags);
771}
772
773static inline struct mpcp_semaphore* mpcp_from_lock(struct litmus_lock* lock)
774{
775 return container_of(lock, struct mpcp_semaphore, litmus_lock);
776}
777
778int pfp_mpcp_lock(struct litmus_lock* l)
779{
780 struct task_struct* t = current;
781 struct mpcp_semaphore *sem = mpcp_from_lock(l);
782 prio_wait_queue_t wait;
783 unsigned long flags;
784
785 if (!is_realtime(t))
786 return -EPERM;
787
788 /* prevent nested lock acquisition */
789 if (tsk_rt(t)->num_locks_held ||
790 tsk_rt(t)->num_local_locks_held)
791 return -EBUSY;
792
793 preempt_disable();
794
795 if (sem->vspin)
796 mpcp_vspin_enter();
797
798 /* Priority-boost ourself *before* we suspend so that
799 * our priority is boosted when we resume. Use the priority
800 * ceiling for the local partition. */
801 boost_priority(t, sem->prio_ceiling[get_partition(t)]);
802
803 spin_lock_irqsave(&sem->wait.lock, flags);
804
805 preempt_enable_no_resched();
806
807 if (sem->owner) {
808 /* resource is not free => must suspend and wait */
809
810 /* ordered by regular priority */
811 init_prio_waitqueue_entry(&wait, t, prio_point(get_priority(t)));
812
813 /* FIXME: interruptible would be nice some day */
814 set_task_state(t, TASK_UNINTERRUPTIBLE);
815
816 __add_wait_queue_prio_exclusive(&sem->wait, &wait);
817
818 TS_LOCK_SUSPEND;
819
820 /* release lock before sleeping */
821 spin_unlock_irqrestore(&sem->wait.lock, flags);
822
823 /* We depend on the FIFO order. Thus, we don't need to recheck
824 * when we wake up; we are guaranteed to have the lock since
825 * there is only one wake up per release.
826 */
827
828 schedule();
829
830 TS_LOCK_RESUME;
831
832 /* Since we hold the lock, no other task will change
833 * ->owner. We can thus check it without acquiring the spin
834 * lock. */
835 BUG_ON(sem->owner != t);
836 } else {
837 /* it's ours now */
838 sem->owner = t;
839
840 spin_unlock_irqrestore(&sem->wait.lock, flags);
841 }
842
843 tsk_rt(t)->num_locks_held++;
844
845 return 0;
846}
847
848int pfp_mpcp_unlock(struct litmus_lock* l)
849{
850 struct task_struct *t = current, *next = NULL;
851 struct mpcp_semaphore *sem = mpcp_from_lock(l);
852 unsigned long flags;
853 int err = 0;
854
855 preempt_disable();
856
857 spin_lock_irqsave(&sem->wait.lock, flags);
858
859 if (sem->owner != t) {
860 err = -EINVAL;
861 goto out;
862 }
863
864 tsk_rt(t)->num_locks_held--;
865
866 /* we lose the benefit of priority boosting */
867 unboost_priority(t);
868
869 /* check if there are jobs waiting for this resource */
870 next = __waitqueue_remove_first(&sem->wait);
871 sem->owner = next;
872
873out:
874 spin_unlock_irqrestore(&sem->wait.lock, flags);
875
876 /* Wake up next. The waiting job is already priority-boosted. */
877 if(next) {
878 wake_up_process(next);
879 }
880
881 if (sem->vspin && err == 0) {
882 mpcp_vspin_exit();
883 }
884
885 preempt_enable();
886
887 return err;
888}
889
890int pfp_mpcp_open(struct litmus_lock* l, void* config)
891{
892 struct task_struct *t = current;
893 int cpu, local_cpu;
894 struct mpcp_semaphore *sem = mpcp_from_lock(l);
895 unsigned long flags;
896
897 if (!is_realtime(t))
898 /* we need to know the real-time priority */
899 return -EPERM;
900
901 local_cpu = get_partition(t);
902
903 spin_lock_irqsave(&sem->wait.lock, flags);
904 for (cpu = 0; cpu < NR_CPUS; cpu++) {
905 if (cpu != local_cpu) {
906 sem->prio_ceiling[cpu] = min(sem->prio_ceiling[cpu],
907 get_priority(t));
908 TRACE_CUR("priority ceiling for sem %p is now %d on cpu %d\n",
909 sem, sem->prio_ceiling[cpu], cpu);
910 }
911 }
912 spin_unlock_irqrestore(&sem->wait.lock, flags);
913
914 return 0;
915}
916
917int pfp_mpcp_close(struct litmus_lock* l)
918{
919 struct task_struct *t = current;
920 struct mpcp_semaphore *sem = mpcp_from_lock(l);
921 unsigned long flags;
922
923 int owner;
924
925 spin_lock_irqsave(&sem->wait.lock, flags);
926
927 owner = sem->owner == t;
928
929 spin_unlock_irqrestore(&sem->wait.lock, flags);
930
931 if (owner)
932 pfp_mpcp_unlock(l);
933
934 return 0;
935}
936
937void pfp_mpcp_free(struct litmus_lock* lock)
938{
939 kfree(mpcp_from_lock(lock));
940}
941
942static struct litmus_lock_ops pfp_mpcp_lock_ops = {
943 .close = pfp_mpcp_close,
944 .lock = pfp_mpcp_lock,
945 .open = pfp_mpcp_open,
946 .unlock = pfp_mpcp_unlock,
947 .deallocate = pfp_mpcp_free,
948};
949
950static struct litmus_lock* pfp_new_mpcp(int vspin)
951{
952 struct mpcp_semaphore* sem;
953 int cpu;
954
955 sem = kmalloc(sizeof(*sem), GFP_KERNEL);
956 if (!sem)
957 return NULL;
958
959 sem->owner = NULL;
960 init_waitqueue_head(&sem->wait);
961 sem->litmus_lock.ops = &pfp_mpcp_lock_ops;
962
963 for (cpu = 0; cpu < NR_CPUS; cpu++)
964 sem->prio_ceiling[cpu] = OMEGA_CEILING;
965
966 /* mark as virtual spinning */
967 sem->vspin = vspin;
968
969 return &sem->litmus_lock;
970}
971
972
973/* ******************** PCP support ********************** */
974
975
976struct pcp_semaphore {
977 struct litmus_lock litmus_lock;
978
979 struct list_head ceiling;
980
981 /* current resource holder */
982 struct task_struct *owner;
983
984 /* priority ceiling --- can be negative due to DPCP support */
985 int prio_ceiling;
986
987 /* on which processor is this PCP semaphore allocated? */
988 int on_cpu;
989};
990
991static inline struct pcp_semaphore* pcp_from_lock(struct litmus_lock* lock)
992{
993 return container_of(lock, struct pcp_semaphore, litmus_lock);
994}
995
996
997struct pcp_state {
998 struct list_head system_ceiling;
999
1000 /* highest-priority waiting task */
1001 struct task_struct* hp_waiter;
1002
1003 /* list of jobs waiting to get past the system ceiling */
1004 wait_queue_head_t ceiling_blocked;
1005};
1006
1007static void pcp_init_state(struct pcp_state* s)
1008{
1009 INIT_LIST_HEAD(&s->system_ceiling);
1010 s->hp_waiter = NULL;
1011 init_waitqueue_head(&s->ceiling_blocked);
1012}
1013
1014static DEFINE_PER_CPU(struct pcp_state, pcp_state);
1015
1016/* assumes preemptions are off */
1017static struct pcp_semaphore* pcp_get_ceiling(void)
1018{
1019 struct list_head* top = &(this_cpu_ptr(&pcp_state)->system_ceiling);
1020 return list_first_entry_or_null(top, struct pcp_semaphore, ceiling);
1021}
1022
1023/* assumes preempt off */
1024static void pcp_add_ceiling(struct pcp_semaphore* sem)
1025{
1026 struct list_head *pos;
1027 struct list_head *in_use = &(this_cpu_ptr(&pcp_state)->system_ceiling);
1028 struct pcp_semaphore* held;
1029
1030 BUG_ON(sem->on_cpu != smp_processor_id());
1031 BUG_ON(in_list(&sem->ceiling));
1032
1033 list_for_each(pos, in_use) {
1034 held = list_entry(pos, struct pcp_semaphore, ceiling);
1035 if (held->prio_ceiling >= sem->prio_ceiling) {
1036 __list_add(&sem->ceiling, pos->prev, pos);
1037 return;
1038 }
1039 }
1040
1041 /* we hit the end of the list */
1042
1043 list_add_tail(&sem->ceiling, in_use);
1044}
1045
1046/* assumes preempt off */
1047static int pcp_exceeds_ceiling(struct pcp_semaphore* ceiling,
1048 struct task_struct* task,
1049 int effective_prio)
1050{
1051 return ceiling == NULL ||
1052 ceiling->prio_ceiling > effective_prio ||
1053 ceiling->owner == task;
1054}
1055
1056/* assumes preempt off */
1057static void pcp_priority_inheritance(void)
1058{
1059 unsigned long flags;
1060 pfp_domain_t* pfp = local_pfp;
1061
1062 struct pcp_semaphore* ceiling = pcp_get_ceiling();
1063 struct task_struct *blocker, *blocked;
1064
1065 blocker = ceiling ? ceiling->owner : NULL;
1066 blocked = this_cpu_ptr(&pcp_state)->hp_waiter;
1067
1068 raw_spin_lock_irqsave(&pfp->slock, flags);
1069
1070 /* Current is no longer inheriting anything by default. This should be
1071 * the currently scheduled job, and hence not currently queued.
1072 * Special case: if current stopped being a real-time task, it will no longer
1073 * be registered as pfp->scheduled. */
1074 BUG_ON(current != pfp->scheduled && is_realtime(current));
1075
1076 fp_set_prio_inh(pfp, current, NULL);
1077 fp_set_prio_inh(pfp, blocked, NULL);
1078 fp_set_prio_inh(pfp, blocker, NULL);
1079
1080 /* Let blocking job inherit priority of blocked job, if required. */
1081 if (blocker && blocked &&
1082 fp_higher_prio(blocked, blocker)) {
1083 TRACE_TASK(blocker, "PCP inherits from %s/%d (prio %u -> %u) \n",
1084 blocked->comm, blocked->pid,
1085 get_priority(blocker), get_priority(blocked));
1086 fp_set_prio_inh(pfp, blocker, blocked);
1087 }
1088
1089 /* Check if anything changed. If the blocked job is current, then it is
1090 * just blocking and hence is going to call the scheduler anyway. */
1091 if (blocked != current &&
1092 fp_higher_prio(fp_prio_peek(&pfp->ready_queue), pfp->scheduled))
1093 preempt(pfp);
1094
1095 raw_spin_unlock_irqrestore(&pfp->slock, flags);
1096}
1097
1098/* called with preemptions off */
1099static void pcp_raise_ceiling(struct pcp_semaphore* sem,
1100 int effective_prio)
1101{
1102 struct task_struct* t = current;
1103 struct pcp_semaphore* ceiling;
1104 prio_wait_queue_t wait;
1105 unsigned int waiting_higher_prio;
1106
1107 while(1) {
1108 ceiling = pcp_get_ceiling();
1109 if (pcp_exceeds_ceiling(ceiling, t, effective_prio))
1110 break;
1111
1112 TRACE_CUR("PCP ceiling-blocked, wanted sem %p, but %s/%d has the ceiling \n",
1113 sem, ceiling->owner->comm, ceiling->owner->pid);
1114
1115 /* we need to wait until the ceiling is lowered */
1116
1117 /* enqueue in priority order */
1118 init_prio_waitqueue_entry(&wait, t, effective_prio);
1119 set_task_state(t, TASK_UNINTERRUPTIBLE);
1120 waiting_higher_prio = add_wait_queue_prio_exclusive(
1121 &(this_cpu_ptr(&pcp_state)->ceiling_blocked), &wait);
1122
1123 if (waiting_higher_prio == 0) {
1124 TRACE_CUR("PCP new highest-prio waiter => prio inheritance\n");
1125
1126 /* we are the new highest-priority waiting job
1127 * => update inheritance */
1128 this_cpu_ptr(&pcp_state)->hp_waiter = t;
1129 pcp_priority_inheritance();
1130 }
1131
1132 TS_LOCK_SUSPEND;
1133
1134 preempt_enable_no_resched();
1135 schedule();
1136 preempt_disable();
1137
1138 /* pcp_resume_unblocked() removed us from wait queue */
1139
1140 TS_LOCK_RESUME;
1141 }
1142
1143 TRACE_CUR("PCP got the ceiling and sem %p\n", sem);
1144
1145 /* We are good to go. The semaphore should be available. */
1146 BUG_ON(sem->owner != NULL);
1147
1148 sem->owner = t;
1149
1150 pcp_add_ceiling(sem);
1151}
1152
1153static void pcp_resume_unblocked(void)
1154{
1155 wait_queue_head_t *blocked = &(this_cpu_ptr(&pcp_state)->ceiling_blocked);
1156 unsigned long flags;
1157 prio_wait_queue_t* q;
1158 struct task_struct* t = NULL;
1159
1160 struct pcp_semaphore* ceiling = pcp_get_ceiling();
1161
1162 spin_lock_irqsave(&blocked->lock, flags);
1163
1164 while (waitqueue_active(blocked)) {
1165 /* check first == highest-priority waiting job */
1166 q = list_entry(blocked->task_list.next,
1167 prio_wait_queue_t, wq.task_list);
1168 t = (struct task_struct*) q->wq.private;
1169
1170 /* can it proceed now? => let it go */
1171 if (pcp_exceeds_ceiling(ceiling, t, q->priority)) {
1172 __remove_wait_queue(blocked, &q->wq);
1173 wake_up_process(t);
1174 } else {
1175 /* We are done. Update highest-priority waiter. */
1176 this_cpu_ptr(&pcp_state)->hp_waiter = t;
1177 goto out;
1178 }
1179 }
1180 /* If we get here, then there are no more waiting
1181 * jobs. */
1182 this_cpu_ptr(&pcp_state)->hp_waiter = NULL;
1183out:
1184 spin_unlock_irqrestore(&blocked->lock, flags);
1185}
1186
1187/* assumes preempt off */
1188static void pcp_lower_ceiling(struct pcp_semaphore* sem)
1189{
1190 BUG_ON(!in_list(&sem->ceiling));
1191 BUG_ON(sem->owner != current);
1192 BUG_ON(sem->on_cpu != smp_processor_id());
1193
1194 /* remove from ceiling list */
1195 list_del(&sem->ceiling);
1196
1197 /* release */
1198 sem->owner = NULL;
1199
1200 TRACE_CUR("PCP released sem %p\n", sem);
1201
1202 /* Wake up all ceiling-blocked jobs that now pass the ceiling. */
1203 pcp_resume_unblocked();
1204
1205 pcp_priority_inheritance();
1206}
1207
1208static void pcp_update_prio_ceiling(struct pcp_semaphore* sem,
1209 int effective_prio)
1210{
1211 /* This needs to be synchronized on something.
1212 * Might as well use waitqueue lock for the processor.
1213 * We assume this happens only before the task set starts execution,
1214 * (i.e., during initialization), but it may happen on multiple processors
1215 * at the same time.
1216 */
1217 unsigned long flags;
1218
1219 struct pcp_state* s = &per_cpu(pcp_state, sem->on_cpu);
1220
1221 spin_lock_irqsave(&s->ceiling_blocked.lock, flags);
1222
1223 sem->prio_ceiling = min(sem->prio_ceiling, effective_prio);
1224
1225 spin_unlock_irqrestore(&s->ceiling_blocked.lock, flags);
1226}
1227
1228static void pcp_init_semaphore(struct pcp_semaphore* sem, int cpu)
1229{
1230 sem->owner = NULL;
1231 INIT_LIST_HEAD(&sem->ceiling);
1232 sem->prio_ceiling = INT_MAX;
1233 sem->on_cpu = cpu;
1234}
1235
1236int pfp_pcp_lock(struct litmus_lock* l)
1237{
1238 struct task_struct* t = current;
1239 struct pcp_semaphore *sem = pcp_from_lock(l);
1240
1241 /* The regular PCP uses the regular task priorities, not agent
1242 * priorities. */
1243 int eprio = get_priority(t);
1244 int from = get_partition(t);
1245 int to = sem->on_cpu;
1246
1247 if (!is_realtime(t) || from != to)
1248 return -EPERM;
1249
1250 /* prevent nested lock acquisition in global critical section */
1251 if (tsk_rt(t)->num_locks_held)
1252 return -EBUSY;
1253
1254 preempt_disable();
1255
1256 pcp_raise_ceiling(sem, eprio);
1257
1258 preempt_enable();
1259
1260 tsk_rt(t)->num_local_locks_held++;
1261
1262 return 0;
1263}
1264
1265int pfp_pcp_unlock(struct litmus_lock* l)
1266{
1267 struct task_struct *t = current;
1268 struct pcp_semaphore *sem = pcp_from_lock(l);
1269
1270 int err = 0;
1271
1272 preempt_disable();
1273
1274 if (sem->owner != t) {
1275 err = -EINVAL;
1276 goto out;
1277 }
1278
1279 /* The current owner should be executing on the correct CPU.
1280 *
1281 * If the owner transitioned out of RT mode or is exiting, then
1282 * we it might have already been migrated away by the best-effort
1283 * scheduler and we just have to deal with it. */
1284 if (unlikely(!is_realtime(t) && sem->on_cpu != smp_processor_id())) {
1285 TRACE_TASK(t, "PCP unlock cpu=%d, sem->on_cpu=%d\n",
1286 smp_processor_id(), sem->on_cpu);
1287 preempt_enable();
1288 err = litmus_be_migrate_to(sem->on_cpu);
1289 preempt_disable();
1290 TRACE_TASK(t, "post-migrate: cpu=%d, sem->on_cpu=%d err=%d\n",
1291 smp_processor_id(), sem->on_cpu, err);
1292 }
1293 BUG_ON(sem->on_cpu != smp_processor_id());
1294 err = 0;
1295
1296 tsk_rt(t)->num_local_locks_held--;
1297
1298 /* give it back */
1299 pcp_lower_ceiling(sem);
1300
1301out:
1302 preempt_enable();
1303
1304 return err;
1305}
1306
1307int pfp_pcp_open(struct litmus_lock* l, void* __user config)
1308{
1309 struct task_struct *t = current;
1310 struct pcp_semaphore *sem = pcp_from_lock(l);
1311
1312 int cpu, eprio;
1313
1314 if (!is_realtime(t))
1315 /* we need to know the real-time priority */
1316 return -EPERM;
1317
1318 if (!config)
1319 cpu = get_partition(t);
1320 else if (get_user(cpu, (int*) config))
1321 return -EFAULT;
1322
1323 /* make sure the resource location matches */
1324 if (cpu != sem->on_cpu)
1325 return -EINVAL;
1326
1327 /* The regular PCP uses regular task priorites, not agent
1328 * priorities. */
1329 eprio = get_priority(t);
1330
1331 pcp_update_prio_ceiling(sem, eprio);
1332
1333 return 0;
1334}
1335
1336int pfp_pcp_close(struct litmus_lock* l)
1337{
1338 struct task_struct *t = current;
1339 struct pcp_semaphore *sem = pcp_from_lock(l);
1340
1341 int owner = 0;
1342
1343 preempt_disable();
1344
1345 if (sem->on_cpu == smp_processor_id())
1346 owner = sem->owner == t;
1347
1348 preempt_enable();
1349
1350 if (owner)
1351 pfp_pcp_unlock(l);
1352
1353 return 0;
1354}
1355
1356void pfp_pcp_free(struct litmus_lock* lock)
1357{
1358 kfree(pcp_from_lock(lock));
1359}
1360
1361
1362static struct litmus_lock_ops pfp_pcp_lock_ops = {
1363 .close = pfp_pcp_close,
1364 .lock = pfp_pcp_lock,
1365 .open = pfp_pcp_open,
1366 .unlock = pfp_pcp_unlock,
1367 .deallocate = pfp_pcp_free,
1368};
1369
1370
1371static struct litmus_lock* pfp_new_pcp(int on_cpu)
1372{
1373 struct pcp_semaphore* sem;
1374
1375 sem = kmalloc(sizeof(*sem), GFP_KERNEL);
1376 if (!sem)
1377 return NULL;
1378
1379 sem->litmus_lock.ops = &pfp_pcp_lock_ops;
1380 pcp_init_semaphore(sem, on_cpu);
1381
1382 return &sem->litmus_lock;
1383}
1384
1385/* ******************** DPCP support ********************** */
1386
1387struct dpcp_semaphore {
1388 struct litmus_lock litmus_lock;
1389 struct pcp_semaphore pcp;
1390 int owner_cpu;
1391};
1392
1393static inline struct dpcp_semaphore* dpcp_from_lock(struct litmus_lock* lock)
1394{
1395 return container_of(lock, struct dpcp_semaphore, litmus_lock);
1396}
1397
1398/* called with preemptions disabled */
1399static void pfp_migrate_to(int target_cpu)
1400{
1401 struct task_struct* t = current;
1402 pfp_domain_t *from;
1403
1404 if (get_partition(t) == target_cpu)
1405 return;
1406
1407 if (!is_realtime(t))
1408 {
1409 TRACE_TASK(t, "not migrating, not a RT task (anymore?)\n");
1410 return;
1411 }
1412
1413 /* make sure target_cpu makes sense */
1414 BUG_ON(target_cpu >= NR_CPUS || !cpu_online(target_cpu));
1415
1416 local_irq_disable();
1417
1418 from = task_pfp(t);
1419 raw_spin_lock(&from->slock);
1420
1421 /* Scheduled task should not be in any ready or release queue. Check
1422 * this while holding the lock to avoid RT mode transitions.*/
1423 BUG_ON(is_realtime(t) && is_queued(t));
1424
1425 /* switch partitions */
1426 tsk_rt(t)->task_params.cpu = target_cpu;
1427
1428 raw_spin_unlock(&from->slock);
1429
1430 /* Don't trace scheduler costs as part of
1431 * locking overhead. Scheduling costs are accounted for
1432 * explicitly. */
1433 TS_LOCK_SUSPEND;
1434
1435 local_irq_enable();
1436 preempt_enable_no_resched();
1437
1438 /* deschedule to be migrated */
1439 schedule();
1440
1441 /* we are now on the target processor */
1442 preempt_disable();
1443
1444 /* start recording costs again */
1445 TS_LOCK_RESUME;
1446
1447 BUG_ON(smp_processor_id() != target_cpu && is_realtime(t));
1448}
1449
1450int pfp_dpcp_lock(struct litmus_lock* l)
1451{
1452 struct task_struct* t = current;
1453 struct dpcp_semaphore *sem = dpcp_from_lock(l);
1454 int eprio = effective_agent_priority(get_priority(t));
1455 int from = get_partition(t);
1456 int to = sem->pcp.on_cpu;
1457
1458 if (!is_realtime(t))
1459 return -EPERM;
1460
1461 /* prevent nested lock accquisition */
1462 if (tsk_rt(t)->num_locks_held ||
1463 tsk_rt(t)->num_local_locks_held)
1464 return -EBUSY;
1465
1466 preempt_disable();
1467
1468 /* Priority-boost ourself *before* we suspend so that
1469 * our priority is boosted when we resume. */
1470
1471 boost_priority(t, get_priority(t));
1472
1473 pfp_migrate_to(to);
1474
1475 pcp_raise_ceiling(&sem->pcp, eprio);
1476
1477 /* yep, we got it => execute request */
1478 sem->owner_cpu = from;
1479
1480 preempt_enable();
1481
1482 tsk_rt(t)->num_locks_held++;
1483
1484 return 0;
1485}
1486
1487int pfp_dpcp_unlock(struct litmus_lock* l)
1488{
1489 struct task_struct *t = current;
1490 struct dpcp_semaphore *sem = dpcp_from_lock(l);
1491 int err = 0;
1492 int home;
1493
1494 preempt_disable();
1495
1496 if (sem->pcp.owner != t) {
1497 err = -EINVAL;
1498 goto out;
1499 }
1500
1501 /* The current owner should be executing on the correct CPU.
1502 *
1503 * If the owner transitioned out of RT mode or is exiting, then
1504 * we it might have already been migrated away by the best-effort
1505 * scheduler and we just have to deal with it. */
1506 if (unlikely(!is_realtime(t) && sem->pcp.on_cpu != smp_processor_id())) {
1507 TRACE_TASK(t, "DPCP unlock cpu=%d, sem->pcp.on_cpu=%d\n", smp_processor_id(), sem->pcp.on_cpu);
1508 preempt_enable();
1509 err = litmus_be_migrate_to(sem->pcp.on_cpu);
1510 preempt_disable();
1511 TRACE_TASK(t, "post-migrate: cpu=%d, sem->pcp.on_cpu=%d err=%d\n", smp_processor_id(), sem->pcp.on_cpu, err);
1512 }
1513 BUG_ON(sem->pcp.on_cpu != smp_processor_id());
1514 err = 0;
1515
1516 tsk_rt(t)->num_locks_held--;
1517
1518 home = sem->owner_cpu;
1519
1520 /* give it back */
1521 pcp_lower_ceiling(&sem->pcp);
1522
1523 /* we lose the benefit of priority boosting */
1524 unboost_priority(t);
1525
1526 pfp_migrate_to(home);
1527
1528out:
1529 preempt_enable();
1530
1531 return err;
1532}
1533
1534int pfp_dpcp_open(struct litmus_lock* l, void* __user config)
1535{
1536 struct task_struct *t = current;
1537 struct dpcp_semaphore *sem = dpcp_from_lock(l);
1538 int cpu, eprio;
1539
1540 if (!is_realtime(t))
1541 /* we need to know the real-time priority */
1542 return -EPERM;
1543
1544 if (get_user(cpu, (int*) config))
1545 return -EFAULT;
1546
1547 /* make sure the resource location matches */
1548 if (cpu != sem->pcp.on_cpu)
1549 return -EINVAL;
1550
1551 eprio = effective_agent_priority(get_priority(t));
1552
1553 pcp_update_prio_ceiling(&sem->pcp, eprio);
1554
1555 return 0;
1556}
1557
1558int pfp_dpcp_close(struct litmus_lock* l)
1559{
1560 struct task_struct *t = current;
1561 struct dpcp_semaphore *sem = dpcp_from_lock(l);
1562 int owner = 0;
1563
1564 preempt_disable();
1565
1566 if (sem->pcp.on_cpu == smp_processor_id())
1567 owner = sem->pcp.owner == t;
1568
1569 preempt_enable();
1570
1571 if (owner)
1572 pfp_dpcp_unlock(l);
1573
1574 return 0;
1575}
1576
1577void pfp_dpcp_free(struct litmus_lock* lock)
1578{
1579 kfree(dpcp_from_lock(lock));
1580}
1581
1582static struct litmus_lock_ops pfp_dpcp_lock_ops = {
1583 .close = pfp_dpcp_close,
1584 .lock = pfp_dpcp_lock,
1585 .open = pfp_dpcp_open,
1586 .unlock = pfp_dpcp_unlock,
1587 .deallocate = pfp_dpcp_free,
1588};
1589
1590static struct litmus_lock* pfp_new_dpcp(int on_cpu)
1591{
1592 struct dpcp_semaphore* sem;
1593
1594 sem = kmalloc(sizeof(*sem), GFP_KERNEL);
1595 if (!sem)
1596 return NULL;
1597
1598 sem->litmus_lock.ops = &pfp_dpcp_lock_ops;
1599 sem->owner_cpu = NO_CPU;
1600 pcp_init_semaphore(&sem->pcp, on_cpu);
1601
1602 return &sem->litmus_lock;
1603}
1604
1605
1606/* ******************** DFLP support ********************** */
1607
1608struct dflp_semaphore {
1609 struct litmus_lock litmus_lock;
1610
1611 /* current resource holder */
1612 struct task_struct *owner;
1613 int owner_cpu;
1614
1615 /* FIFO queue of waiting tasks */
1616 wait_queue_head_t wait;
1617
1618 /* where is the resource assigned to */
1619 int on_cpu;
1620};
1621
1622static inline struct dflp_semaphore* dflp_from_lock(struct litmus_lock* lock)
1623{
1624 return container_of(lock, struct dflp_semaphore, litmus_lock);
1625}
1626
1627int pfp_dflp_lock(struct litmus_lock* l)
1628{
1629 struct task_struct* t = current;
1630 struct dflp_semaphore *sem = dflp_from_lock(l);
1631 int from = get_partition(t);
1632 int to = sem->on_cpu;
1633 unsigned long flags;
1634 wait_queue_t wait;
1635 lt_t time_of_request;
1636
1637 if (!is_realtime(t))
1638 return -EPERM;
1639
1640 /* prevent nested lock accquisition */
1641 if (tsk_rt(t)->num_locks_held ||
1642 tsk_rt(t)->num_local_locks_held)
1643 return -EBUSY;
1644
1645 preempt_disable();
1646
1647 /* tie-break by this point in time */
1648 time_of_request = litmus_clock();
1649
1650 /* Priority-boost ourself *before* we suspend so that
1651 * our priority is boosted when we resume. */
1652 boost_priority(t, time_of_request);
1653
1654 pfp_migrate_to(to);
1655
1656 /* Now on the right CPU, preemptions still disabled. */
1657
1658 spin_lock_irqsave(&sem->wait.lock, flags);
1659
1660 if (sem->owner) {
1661 /* resource is not free => must suspend and wait */
1662
1663 init_waitqueue_entry(&wait, t);
1664
1665 /* FIXME: interruptible would be nice some day */
1666 set_task_state(t, TASK_UNINTERRUPTIBLE);
1667
1668 __add_wait_queue_tail_exclusive(&sem->wait, &wait);
1669
1670 TS_LOCK_SUSPEND;
1671
1672 /* release lock before sleeping */
1673 spin_unlock_irqrestore(&sem->wait.lock, flags);
1674
1675 /* We depend on the FIFO order. Thus, we don't need to recheck
1676 * when we wake up; we are guaranteed to have the lock since
1677 * there is only one wake up per release.
1678 */
1679
1680 preempt_enable_no_resched();
1681
1682 schedule();
1683
1684 preempt_disable();
1685
1686 TS_LOCK_RESUME;
1687
1688 /* Since we hold the lock, no other task will change
1689 * ->owner. We can thus check it without acquiring the spin
1690 * lock. */
1691 BUG_ON(sem->owner != t);
1692 } else {
1693 /* it's ours now */
1694 sem->owner = t;
1695
1696 spin_unlock_irqrestore(&sem->wait.lock, flags);
1697 }
1698
1699 sem->owner_cpu = from;
1700
1701 preempt_enable();
1702
1703 tsk_rt(t)->num_locks_held++;
1704
1705 return 0;
1706}
1707
1708int pfp_dflp_unlock(struct litmus_lock* l)
1709{
1710 struct task_struct *t = current, *next;
1711 struct dflp_semaphore *sem = dflp_from_lock(l);
1712 int err = 0;
1713 int home;
1714 unsigned long flags;
1715
1716 preempt_disable();
1717
1718 spin_lock_irqsave(&sem->wait.lock, flags);
1719
1720 if (sem->owner != t) {
1721 err = -EINVAL;
1722 spin_unlock_irqrestore(&sem->wait.lock, flags);
1723 goto out;
1724 }
1725
1726 /* check if there are jobs waiting for this resource */
1727 next = __waitqueue_remove_first(&sem->wait);
1728 if (next) {
1729 /* next becomes the resouce holder */
1730 sem->owner = next;
1731
1732 /* Wake up next. The waiting job is already priority-boosted. */
1733 wake_up_process(next);
1734 } else
1735 /* resource becomes available */
1736 sem->owner = NULL;
1737
1738 tsk_rt(t)->num_locks_held--;
1739
1740 home = sem->owner_cpu;
1741
1742 spin_unlock_irqrestore(&sem->wait.lock, flags);
1743
1744 /* we lose the benefit of priority boosting */
1745 unboost_priority(t);
1746
1747 pfp_migrate_to(home);
1748
1749out:
1750 preempt_enable();
1751
1752 return err;
1753}
1754
1755int pfp_dflp_open(struct litmus_lock* l, void* __user config)
1756{
1757 struct dflp_semaphore *sem = dflp_from_lock(l);
1758 int cpu;
1759
1760 if (get_user(cpu, (int*) config))
1761 return -EFAULT;
1762
1763 /* make sure the resource location matches */
1764 if (cpu != sem->on_cpu)
1765 return -EINVAL;
1766
1767 return 0;
1768}
1769
1770int pfp_dflp_close(struct litmus_lock* l)
1771{
1772 struct task_struct *t = current;
1773 struct dflp_semaphore *sem = dflp_from_lock(l);
1774 int owner = 0;
1775
1776 preempt_disable();
1777
1778 if (sem->on_cpu == smp_processor_id())
1779 owner = sem->owner == t;
1780
1781 preempt_enable();
1782
1783 if (owner)
1784 pfp_dflp_unlock(l);
1785
1786 return 0;
1787}
1788
1789void pfp_dflp_free(struct litmus_lock* lock)
1790{
1791 kfree(dflp_from_lock(lock));
1792}
1793
1794static struct litmus_lock_ops pfp_dflp_lock_ops = {
1795 .close = pfp_dflp_close,
1796 .lock = pfp_dflp_lock,
1797 .open = pfp_dflp_open,
1798 .unlock = pfp_dflp_unlock,
1799 .deallocate = pfp_dflp_free,
1800};
1801
1802static struct litmus_lock* pfp_new_dflp(int on_cpu)
1803{
1804 struct dflp_semaphore* sem;
1805
1806 sem = kmalloc(sizeof(*sem), GFP_KERNEL);
1807 if (!sem)
1808 return NULL;
1809
1810 sem->litmus_lock.ops = &pfp_dflp_lock_ops;
1811 sem->owner_cpu = NO_CPU;
1812 sem->owner = NULL;
1813 sem->on_cpu = on_cpu;
1814 init_waitqueue_head(&sem->wait);
1815
1816 return &sem->litmus_lock;
1817}
1818
1819
1820/* **** lock constructor **** */
1821
1822
1823static long pfp_allocate_lock(struct litmus_lock **lock, int type,
1824 void* __user config)
1825{
1826 int err = -ENXIO, cpu;
1827 struct srp_semaphore* srp;
1828
1829 /* P-FP currently supports the SRP for local resources and the FMLP
1830 * for global resources. */
1831 switch (type) {
1832 case FMLP_SEM:
1833 /* FIFO Mutex Locking Protocol */
1834 *lock = pfp_new_fmlp();
1835 if (*lock)
1836 err = 0;
1837 else
1838 err = -ENOMEM;
1839 break;
1840
1841 case MPCP_SEM:
1842 /* Multiprocesor Priority Ceiling Protocol */
1843 *lock = pfp_new_mpcp(0);
1844 if (*lock)
1845 err = 0;
1846 else
1847 err = -ENOMEM;
1848 break;
1849
1850 case MPCP_VS_SEM:
1851 /* Multiprocesor Priority Ceiling Protocol with virtual spinning */
1852 *lock = pfp_new_mpcp(1);
1853 if (*lock)
1854 err = 0;
1855 else
1856 err = -ENOMEM;
1857 break;
1858
1859 case DPCP_SEM:
1860 /* Distributed Priority Ceiling Protocol */
1861 if (get_user(cpu, (int*) config))
1862 return -EFAULT;
1863
1864 TRACE("DPCP_SEM: provided cpu=%d\n", cpu);
1865
1866 if (cpu >= NR_CPUS || !cpu_online(cpu))
1867 return -EINVAL;
1868
1869 *lock = pfp_new_dpcp(cpu);
1870 if (*lock)
1871 err = 0;
1872 else
1873 err = -ENOMEM;
1874 break;
1875
1876 case DFLP_SEM:
1877 /* Distributed FIFO Locking Protocol */
1878 if (get_user(cpu, (int*) config))
1879 return -EFAULT;
1880
1881 TRACE("DPCP_SEM: provided cpu=%d\n", cpu);
1882
1883 if (cpu >= NR_CPUS || !cpu_online(cpu))
1884 return -EINVAL;
1885
1886 *lock = pfp_new_dflp(cpu);
1887 if (*lock)
1888 err = 0;
1889 else
1890 err = -ENOMEM;
1891 break;
1892
1893 case SRP_SEM:
1894 /* Baker's Stack Resource Policy */
1895 srp = allocate_srp_semaphore();
1896 if (srp) {
1897 *lock = &srp->litmus_lock;
1898 err = 0;
1899 } else
1900 err = -ENOMEM;
1901 break;
1902
1903 case PCP_SEM:
1904 /* Priority Ceiling Protocol */
1905 if (!config)
1906 cpu = get_partition(current);
1907 else if (get_user(cpu, (int*) config))
1908 return -EFAULT;
1909
1910 if (cpu >= NR_CPUS || !cpu_online(cpu))
1911 return -EINVAL;
1912
1913 *lock = pfp_new_pcp(cpu);
1914 if (*lock)
1915 err = 0;
1916 else
1917 err = -ENOMEM;
1918 break;
1919 };
1920
1921 return err;
1922}
1923
1924#endif
1925
1926static long pfp_admit_task(struct task_struct* tsk)
1927{
1928 if (task_cpu(tsk) == tsk->rt_param.task_params.cpu &&
1929#ifdef CONFIG_RELEASE_MASTER
1930 /* don't allow tasks on release master CPU */
1931 task_cpu(tsk) != remote_dom(task_cpu(tsk))->release_master &&
1932#endif
1933 litmus_is_valid_fixed_prio(get_priority(tsk)))
1934 return 0;
1935 else
1936 return -EINVAL;
1937}
1938
1939static struct domain_proc_info pfp_domain_proc_info;
1940static long pfp_get_domain_proc_info(struct domain_proc_info **ret)
1941{
1942 *ret = &pfp_domain_proc_info;
1943 return 0;
1944}
1945
1946static void pfp_setup_domain_proc(void)
1947{
1948 int i, cpu;
1949 int release_master =
1950#ifdef CONFIG_RELEASE_MASTER
1951 atomic_read(&release_master_cpu);
1952#else
1953 NO_CPU;
1954#endif
1955 int num_rt_cpus = num_online_cpus() - (release_master != NO_CPU);
1956 struct cd_mapping *cpu_map, *domain_map;
1957
1958 memset(&pfp_domain_proc_info, 0, sizeof(pfp_domain_proc_info));
1959 init_domain_proc_info(&pfp_domain_proc_info, num_rt_cpus, num_rt_cpus);
1960 pfp_domain_proc_info.num_cpus = num_rt_cpus;
1961 pfp_domain_proc_info.num_domains = num_rt_cpus;
1962 for (cpu = 0, i = 0; cpu < num_online_cpus(); ++cpu) {
1963 if (cpu == release_master)
1964 continue;
1965 cpu_map = &pfp_domain_proc_info.cpu_to_domains[i];
1966 domain_map = &pfp_domain_proc_info.domain_to_cpus[i];
1967
1968 cpu_map->id = cpu;
1969 domain_map->id = i; /* enumerate w/o counting the release master */
1970 cpumask_set_cpu(i, cpu_map->mask);
1971 cpumask_set_cpu(cpu, domain_map->mask);
1972 ++i;
1973 }
1974}
1975
1976static long pfp_activate_plugin(void)
1977{
1978#if defined(CONFIG_RELEASE_MASTER) || defined(CONFIG_LITMUS_LOCKING)
1979 int cpu;
1980#endif
1981
1982#ifdef CONFIG_RELEASE_MASTER
1983 for_each_online_cpu(cpu) {
1984 remote_dom(cpu)->release_master = atomic_read(&release_master_cpu);
1985 }
1986#endif
1987
1988#ifdef CONFIG_LITMUS_LOCKING
1989 get_srp_prio = pfp_get_srp_prio;
1990
1991 for_each_online_cpu(cpu) {
1992 init_waitqueue_head(&per_cpu(mpcpvs_vspin_wait, cpu));
1993 per_cpu(mpcpvs_vspin, cpu) = NULL;
1994
1995 pcp_init_state(&per_cpu(pcp_state, cpu));
1996 pfp_doms[cpu] = remote_pfp(cpu);
1997 per_cpu(fmlp_timestamp,cpu) = 0;
1998 }
1999
2000#endif
2001
2002 pfp_setup_domain_proc();
2003
2004 return 0;
2005}
2006
2007static long pfp_deactivate_plugin(void)
2008{
2009 destroy_domain_proc_info(&pfp_domain_proc_info);
2010 return 0;
2011}
2012
2013/* Plugin object */
2014static struct sched_plugin pfp_plugin __cacheline_aligned_in_smp = {
2015 .plugin_name = "P-FP",
2016 .task_new = pfp_task_new,
2017 .complete_job = complete_job,
2018 .task_exit = pfp_task_exit,
2019 .schedule = pfp_schedule,
2020 .task_wake_up = pfp_task_wake_up,
2021 .task_block = pfp_task_block,
2022 .admit_task = pfp_admit_task,
2023 .activate_plugin = pfp_activate_plugin,
2024 .deactivate_plugin = pfp_deactivate_plugin,
2025 .get_domain_proc_info = pfp_get_domain_proc_info,
2026#ifdef CONFIG_LITMUS_LOCKING
2027 .allocate_lock = pfp_allocate_lock,
2028 .finish_switch = pfp_finish_switch,
2029#endif
2030};
2031
2032
2033static int __init init_pfp(void)
2034{
2035 int i;
2036
2037 /* We do not really want to support cpu hotplug, do we? ;)
2038 * However, if we are so crazy to do so,
2039 * we cannot use num_online_cpu()
2040 */
2041 for (i = 0; i < num_online_cpus(); i++) {
2042 pfp_domain_init(remote_pfp(i), i);
2043 }
2044 return register_sched_plugin(&pfp_plugin);
2045}
2046
2047module_init(init_pfp);