From 2a45e01a8827379c709d228a5c9b5f21011d4277 Mon Sep 17 00:00:00 2001 From: Bjoern Brandenburg <bbb@mpi-sws.org> Date: Sun, 9 Aug 2015 13:18:55 +0200 Subject: Add P-FP scheduler plugin --- litmus/Makefile | 4 +- litmus/fp_common.c | 17 +- litmus/sched_pfp.c | 2036 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 2051 insertions(+), 6 deletions(-) create mode 100644 litmus/sched_pfp.c diff --git a/litmus/Makefile b/litmus/Makefile index 895cf3a2d599..fb12398c4b92 100644 --- a/litmus/Makefile +++ b/litmus/Makefile @@ -20,7 +20,9 @@ obj-y = sched_plugin.o litmus.o \ ctrldev.o \ uncachedev.o \ sched_gsn_edf.o \ - sched_psn_edf.o + sched_psn_edf.o \ + sched_pfp.o + obj-$(CONFIG_FEATHER_TRACE) += ft_event.o ftdev.o diff --git a/litmus/fp_common.c b/litmus/fp_common.c index 964a4729deff..ff0f30a9f536 100644 --- a/litmus/fp_common.c +++ b/litmus/fp_common.c @@ -32,7 +32,6 @@ int fp_higher_prio(struct task_struct* first, return 0; } - /* check for NULL tasks */ if (!first || !second) return first && !second; @@ -50,6 +49,15 @@ int fp_higher_prio(struct task_struct* first, if (unlikely(second->rt_param.inh_task)) second_task = second->rt_param.inh_task; + /* Comparisons to itself are only possible with + * priority inheritance when svc_preempt interrupt just + * before scheduling (and everything that could follow in the + * ready queue). Always favour the original job, as that one will just + * suspend itself to resolve this. + */ + if(first_task == second_task) + return first_task == first; + /* Check for priority boosting. Tie-break by start of boosting. */ if (unlikely(is_priority_boosted(first_task))) { @@ -65,11 +73,10 @@ int fp_higher_prio(struct task_struct* first, /* second_task is boosted, first is not*/ return 0; -#endif - - /* Comparisons to itself are not expected; priority inheritance - * should also not cause this to happen. */ +#else + /* No locks, no priority inheritance, no comparisons to itself */ BUG_ON(first_task == second_task); +#endif if (get_priority(first_task) < get_priority(second_task)) return 1; diff --git a/litmus/sched_pfp.c b/litmus/sched_pfp.c new file mode 100644 index 000000000000..f38e9bc175b5 --- /dev/null +++ b/litmus/sched_pfp.c @@ -0,0 +1,2036 @@ +/* + * litmus/sched_pfp.c + * + * Implementation of partitioned fixed-priority scheduling. + * Based on PSN-EDF. + */ + +#include <linux/percpu.h> +#include <linux/sched.h> +#include <linux/list.h> +#include <linux/spinlock.h> +#include <linux/module.h> + +#include <litmus/litmus.h> +#include <litmus/wait.h> +#include <litmus/jobs.h> +#include <litmus/preempt.h> +#include <litmus/fp_common.h> +#include <litmus/sched_plugin.h> +#include <litmus/sched_trace.h> +#include <litmus/trace.h> +#include <litmus/budget.h> + +/* to set up domain/cpu mappings */ +#include <litmus/litmus_proc.h> +#include <linux/uaccess.h> + + +typedef struct { + rt_domain_t domain; + struct fp_prio_queue ready_queue; + int cpu; + struct task_struct* scheduled; /* only RT tasks */ +/* + * scheduling lock slock + * protects the domain and serializes scheduling decisions + */ +#define slock domain.ready_lock + +} pfp_domain_t; + +DEFINE_PER_CPU(pfp_domain_t, pfp_domains); + +pfp_domain_t* pfp_doms[NR_CPUS]; + +#define local_pfp (this_cpu_ptr(&pfp_domains)) +#define remote_dom(cpu) (&per_cpu(pfp_domains, cpu).domain) +#define remote_pfp(cpu) (&per_cpu(pfp_domains, cpu)) +#define task_dom(task) remote_dom(get_partition(task)) +#define task_pfp(task) remote_pfp(get_partition(task)) + + +#ifdef CONFIG_LITMUS_LOCKING +DEFINE_PER_CPU(uint64_t,fmlp_timestamp); +#endif + +/* we assume the lock is being held */ +static void preempt(pfp_domain_t *pfp) +{ + preempt_if_preemptable(pfp->scheduled, pfp->cpu); +} + +static unsigned int priority_index(struct task_struct* t) +{ +#ifdef CONFIG_LITMUS_LOCKING + if (unlikely(t->rt_param.inh_task)) + /* use effective priority */ + t = t->rt_param.inh_task; + + if (is_priority_boosted(t)) { + /* zero is reserved for priority-boosted tasks */ + return 0; + } else +#endif + return get_priority(t); +} + +static void pfp_release_jobs(rt_domain_t* rt, struct bheap* tasks) +{ + pfp_domain_t *pfp = container_of(rt, pfp_domain_t, domain); + unsigned long flags; + struct task_struct* t; + struct bheap_node* hn; + + raw_spin_lock_irqsave(&pfp->slock, flags); + + while (!bheap_empty(tasks)) { + hn = bheap_take(fp_ready_order, tasks); + t = bheap2task(hn); + TRACE_TASK(t, "released (part:%d prio:%d)\n", + get_partition(t), get_priority(t)); + fp_prio_add(&pfp->ready_queue, t, priority_index(t)); + } + + /* do we need to preempt? */ + if (fp_higher_prio(fp_prio_peek(&pfp->ready_queue), pfp->scheduled)) { + TRACE_CUR("preempted by new release\n"); + preempt(pfp); + } + + raw_spin_unlock_irqrestore(&pfp->slock, flags); +} + +static void pfp_preempt_check(pfp_domain_t *pfp) +{ + if (fp_higher_prio(fp_prio_peek(&pfp->ready_queue), pfp->scheduled)) + preempt(pfp); +} + +static void pfp_domain_init(pfp_domain_t* pfp, + int cpu) +{ + fp_domain_init(&pfp->domain, NULL, pfp_release_jobs); + pfp->cpu = cpu; + pfp->scheduled = NULL; + fp_prio_queue_init(&pfp->ready_queue); +} + +static void requeue(struct task_struct* t, pfp_domain_t *pfp) +{ + tsk_rt(t)->completed = 0; + if (is_released(t, litmus_clock())) + fp_prio_add(&pfp->ready_queue, t, priority_index(t)); + else + add_release(&pfp->domain, t); /* it has got to wait */ +} + +static void job_completion(struct task_struct* t, int forced) +{ + sched_trace_task_completion(t, forced); + TRACE_TASK(t, "job_completion(forced=%d).\n", forced); + + tsk_rt(t)->completed = 0; + prepare_for_next_period(t); + if (is_released(t, litmus_clock())) + sched_trace_task_release(t); +} + +static struct task_struct* pfp_schedule(struct task_struct * prev) +{ + pfp_domain_t* pfp = local_pfp; + struct task_struct* next; + + int out_of_time, sleep, preempt, np, exists, blocks, resched, migrate; + + raw_spin_lock(&pfp->slock); + + /* sanity checking + * differently from gedf, when a task exits (dead) + * pfp->schedule may be null and prev _is_ realtime + */ + BUG_ON(pfp->scheduled && pfp->scheduled != prev); + BUG_ON(pfp->scheduled && !is_realtime(prev)); + + /* (0) Determine state */ + exists = pfp->scheduled != NULL; + blocks = exists && !is_current_running(); + out_of_time = exists && budget_enforced(pfp->scheduled) + && budget_exhausted(pfp->scheduled); + np = exists && is_np(pfp->scheduled); + sleep = exists && is_completed(pfp->scheduled); + migrate = exists && get_partition(pfp->scheduled) != pfp->cpu; + preempt = !blocks && (migrate || fp_preemption_needed(&pfp->ready_queue, prev)); + + /* If we need to preempt do so. + * The following checks set resched to 1 in case of special + * circumstances. + */ + resched = preempt; + + /* If a task blocks we have no choice but to reschedule. + */ + if (blocks) + resched = 1; + + /* Request a sys_exit_np() call if we would like to preempt but cannot. + * Multiple calls to request_exit_np() don't hurt. + */ + if (np && (out_of_time || preempt || sleep)) + request_exit_np(pfp->scheduled); + + /* Any task that is preemptable and either exhausts its execution + * budget or wants to sleep completes. We may have to reschedule after + * this. + */ + if (!np && (out_of_time || sleep)) { + job_completion(pfp->scheduled, !sleep); + resched = 1; + } + + /* The final scheduling decision. Do we need to switch for some reason? + * Switch if we are in RT mode and have no task or if we need to + * resched. + */ + next = NULL; + if ((!np || blocks) && (resched || !exists)) { + /* When preempting a task that does not block, then + * re-insert it into either the ready queue or the + * release queue (if it completed). requeue() picks + * the appropriate queue. + */ + if (pfp->scheduled && !blocks && !migrate) + requeue(pfp->scheduled, pfp); + next = fp_prio_take(&pfp->ready_queue); + if (next == prev) { + struct task_struct *t = fp_prio_peek(&pfp->ready_queue); + TRACE_TASK(next, "next==prev sleep=%d oot=%d np=%d preempt=%d migrate=%d " + "boost=%d empty=%d prio-idx=%u prio=%u\n", + sleep, out_of_time, np, preempt, migrate, + is_priority_boosted(next), + t == NULL, + priority_index(next), + get_priority(next)); + if (t) + TRACE_TASK(t, "waiter boost=%d prio-idx=%u prio=%u\n", + is_priority_boosted(t), + priority_index(t), + get_priority(t)); + } + /* If preempt is set, we should not see the same task again. */ + BUG_ON(preempt && next == prev); + /* Similarly, if preempt is set, then next may not be NULL, + * unless it's a migration. */ + BUG_ON(preempt && !migrate && next == NULL); + } else + /* Only override Linux scheduler if we have a real-time task + * scheduled that needs to continue. + */ + if (exists) + next = prev; + + if (next) { + TRACE_TASK(next, "scheduled at %llu\n", litmus_clock()); + } else if (exists) { + TRACE("becoming idle at %llu\n", litmus_clock()); + } + + pfp->scheduled = next; + sched_state_task_picked(); + raw_spin_unlock(&pfp->slock); + + return next; +} + +#ifdef CONFIG_LITMUS_LOCKING + +/* prev is no longer scheduled --- see if it needs to migrate */ +static void pfp_finish_switch(struct task_struct *prev) +{ + pfp_domain_t *to; + + if (is_realtime(prev) && + prev->state == TASK_RUNNING && + get_partition(prev) != smp_processor_id()) { + TRACE_TASK(prev, "needs to migrate from P%d to P%d\n", + smp_processor_id(), get_partition(prev)); + + to = task_pfp(prev); + + raw_spin_lock(&to->slock); + + TRACE_TASK(prev, "adding to queue on P%d\n", to->cpu); + requeue(prev, to); + if (fp_preemption_needed(&to->ready_queue, to->scheduled)) + preempt(to); + + raw_spin_unlock(&to->slock); + + } +} + +#endif + +/* Prepare a task for running in RT mode + */ +static void pfp_task_new(struct task_struct * t, int on_rq, int is_scheduled) +{ + pfp_domain_t* pfp = task_pfp(t); + unsigned long flags; + + TRACE_TASK(t, "P-FP: task new, cpu = %d\n", + t->rt_param.task_params.cpu); + + /* setup job parameters */ + release_at(t, litmus_clock()); + + raw_spin_lock_irqsave(&pfp->slock, flags); + if (is_scheduled) { + /* there shouldn't be anything else running at the time */ + BUG_ON(pfp->scheduled); + pfp->scheduled = t; + } else if (on_rq) { + requeue(t, pfp); + /* maybe we have to reschedule */ + pfp_preempt_check(pfp); + } + raw_spin_unlock_irqrestore(&pfp->slock, flags); +} + +static void pfp_task_wake_up(struct task_struct *task) +{ + unsigned long flags; + pfp_domain_t* pfp = task_pfp(task); + lt_t now; + + TRACE_TASK(task, "wake_up at %llu\n", litmus_clock()); + raw_spin_lock_irqsave(&pfp->slock, flags); + +#ifdef CONFIG_LITMUS_LOCKING + /* Should only be queued when processing a fake-wake up due to a + * migration-related state change. */ + if (unlikely(is_queued(task))) { + TRACE_TASK(task, "WARNING: waking task still queued. Is this right?\n"); + goto out_unlock; + } +#else + BUG_ON(is_queued(task)); +#endif + now = litmus_clock(); + if (is_sporadic(task) && is_tardy(task, now) +#ifdef CONFIG_LITMUS_LOCKING + /* We need to take suspensions because of semaphores into + * account! If a job resumes after being suspended due to acquiring + * a semaphore, it should never be treated as a new job release. + */ + && !is_priority_boosted(task) +#endif + ) { + /* new sporadic release */ + release_at(task, now); + sched_trace_task_release(task); + } + + /* Only add to ready queue if it is not the currently-scheduled + * task. This could be the case if a task was woken up concurrently + * on a remote CPU before the executing CPU got around to actually + * de-scheduling the task, i.e., wake_up() raced with schedule() + * and won. Also, don't requeue if it is still queued, which can + * happen under the DPCP due wake-ups racing with migrations. + */ + if (pfp->scheduled != task) { + requeue(task, pfp); + pfp_preempt_check(pfp); + } + +#ifdef CONFIG_LITMUS_LOCKING +out_unlock: +#endif + raw_spin_unlock_irqrestore(&pfp->slock, flags); + TRACE_TASK(task, "wake up done\n"); +} + +static void pfp_task_block(struct task_struct *t) +{ + /* only running tasks can block, thus t is in no queue */ + TRACE_TASK(t, "block at %llu, state=%d\n", litmus_clock(), t->state); + + BUG_ON(!is_realtime(t)); + + /* If this task blocked normally, it shouldn't be queued. The exception is + * if this is a simulated block()/wakeup() pair from the pull-migration code path. + * This should only happen if the DPCP is being used. + */ +#ifdef CONFIG_LITMUS_LOCKING + if (unlikely(is_queued(t))) + TRACE_TASK(t, "WARNING: blocking task still queued. Is this right?\n"); +#else + BUG_ON(is_queued(t)); +#endif +} + +static void pfp_task_exit(struct task_struct * t) +{ + unsigned long flags; + pfp_domain_t* pfp = task_pfp(t); + rt_domain_t* dom; + + raw_spin_lock_irqsave(&pfp->slock, flags); + if (is_queued(t)) { + BUG(); /* This currently doesn't work. */ + /* dequeue */ + dom = task_dom(t); + remove(dom, t); + } + if (pfp->scheduled == t) { + pfp->scheduled = NULL; + preempt(pfp); + } + TRACE_TASK(t, "RIP, now reschedule\n"); + + raw_spin_unlock_irqrestore(&pfp->slock, flags); +} + +#ifdef CONFIG_LITMUS_LOCKING + +#include <litmus/fdso.h> +#include <litmus/srp.h> + +static void fp_dequeue(pfp_domain_t* pfp, struct task_struct* t) +{ + BUG_ON(pfp->scheduled == t && is_queued(t)); + if (is_queued(t)) + fp_prio_remove(&pfp->ready_queue, t, priority_index(t)); +} + +static void fp_set_prio_inh(pfp_domain_t* pfp, struct task_struct* t, + struct task_struct* prio_inh) +{ + int requeue; + + if (!t || t->rt_param.inh_task == prio_inh) { + /* no update required */ + if (t) + TRACE_TASK(t, "no prio-inh update required\n"); + return; + } + + requeue = is_queued(t); + TRACE_TASK(t, "prio-inh: is_queued:%d\n", requeue); + + if (requeue) + /* first remove */ + fp_dequeue(pfp, t); + + t->rt_param.inh_task = prio_inh; + + if (requeue) + /* add again to the right queue */ + fp_prio_add(&pfp->ready_queue, t, priority_index(t)); +} + +static int effective_agent_priority(int prio) +{ + /* make sure agents have higher priority */ + return prio - LITMUS_MAX_PRIORITY; +} + +static lt_t prio_point(int eprio) +{ + /* make sure we have non-negative prio points */ + return eprio + LITMUS_MAX_PRIORITY; +} + +static void boost_priority(struct task_struct* t, lt_t priority_point) +{ + unsigned long flags; + pfp_domain_t* pfp = task_pfp(t); + + raw_spin_lock_irqsave(&pfp->slock, flags); + + + TRACE_TASK(t, "priority boosted at %llu\n", litmus_clock()); + + tsk_rt(t)->priority_boosted = 1; + /* tie-break by protocol-specific priority point */ + tsk_rt(t)->boost_start_time = priority_point; + + /* Priority boosting currently only takes effect for already-scheduled + * tasks. This is sufficient since priority boosting only kicks in as + * part of lock acquisitions. */ + BUG_ON(pfp->scheduled != t); + + raw_spin_unlock_irqrestore(&pfp->slock, flags); +} + +static void unboost_priority(struct task_struct* t) +{ + unsigned long flags; + pfp_domain_t* pfp = task_pfp(t); + + raw_spin_lock_irqsave(&pfp->slock, flags); + + /* Assumption: this only happens when the job is scheduled. + * Exception: If t transitioned to non-real-time mode, we no longer + * care abou tit. */ + BUG_ON(pfp->scheduled != t && is_realtime(t)); + + TRACE_TASK(t, "priority restored at %llu\n", litmus_clock()); + + tsk_rt(t)->priority_boosted = 0; + tsk_rt(t)->boost_start_time = 0; + + /* check if this changes anything */ + if (fp_preemption_needed(&pfp->ready_queue, pfp->scheduled)) + preempt(pfp); + + raw_spin_unlock_irqrestore(&pfp->slock, flags); +} + +/* ******************** SRP support ************************ */ + +static unsigned int pfp_get_srp_prio(struct task_struct* t) +{ + return get_priority(t); +} + +/* ******************** FMLP support ********************** */ + +struct fmlp_semaphore { + struct litmus_lock litmus_lock; + + /* current resource holder */ + struct task_struct *owner; + + /* FIFO queue of waiting tasks */ + wait_queue_head_t wait; +}; + +static inline struct fmlp_semaphore* fmlp_from_lock(struct litmus_lock* lock) +{ + return container_of(lock, struct fmlp_semaphore, litmus_lock); +} + +static inline lt_t +fmlp_clock(void) +{ + return (lt_t) this_cpu_inc_return(fmlp_timestamp); +} + +int pfp_fmlp_lock(struct litmus_lock* l) +{ + struct task_struct* t = current; + struct fmlp_semaphore *sem = fmlp_from_lock(l); + wait_queue_t wait; + unsigned long flags; + lt_t time_of_request; + + if (!is_realtime(t)) + return -EPERM; + + /* prevent nested lock acquisition --- not supported by FMLP */ + if (tsk_rt(t)->num_locks_held || + tsk_rt(t)->num_local_locks_held) + return -EBUSY; + + spin_lock_irqsave(&sem->wait.lock, flags); + + /* tie-break by this point in time */ + time_of_request = fmlp_clock(); + + /* Priority-boost ourself *before* we suspend so that + * our priority is boosted when we resume. */ + boost_priority(t, time_of_request); + + if (sem->owner) { + /* resource is not free => must suspend and wait */ + + init_waitqueue_entry(&wait, t); + + /* FIXME: interruptible would be nice some day */ + set_task_state(t, TASK_UNINTERRUPTIBLE); + + __add_wait_queue_tail_exclusive(&sem->wait, &wait); + + TS_LOCK_SUSPEND; + + /* release lock before sleeping */ + spin_unlock_irqrestore(&sem->wait.lock, flags); + + /* We depend on the FIFO order. Thus, we don't need to recheck + * when we wake up; we are guaranteed to have the lock since + * there is only one wake up per release. + */ + + schedule(); + + TS_LOCK_RESUME; + + /* Since we hold the lock, no other task will change + * ->owner. We can thus check it without acquiring the spin + * lock. */ + BUG_ON(sem->owner != t); + } else { + /* it's ours now */ + sem->owner = t; + + spin_unlock_irqrestore(&sem->wait.lock, flags); + } + + tsk_rt(t)->num_locks_held++; + + return 0; +} + +int pfp_fmlp_unlock(struct litmus_lock* l) +{ + struct task_struct *t = current, *next = NULL; + struct fmlp_semaphore *sem = fmlp_from_lock(l); + unsigned long flags; + int err = 0; + + preempt_disable(); + + spin_lock_irqsave(&sem->wait.lock, flags); + + if (sem->owner != t) { + err = -EINVAL; + goto out; + } + + tsk_rt(t)->num_locks_held--; + + /* we lose the benefit of priority boosting */ + + unboost_priority(t); + + /* check if there are jobs waiting for this resource */ + next = __waitqueue_remove_first(&sem->wait); + sem->owner = next; + +out: + spin_unlock_irqrestore(&sem->wait.lock, flags); + + /* Wake up next. The waiting job is already priority-boosted. */ + if(next) { + wake_up_process(next); + } + + preempt_enable(); + + return err; +} + +int pfp_fmlp_close(struct litmus_lock* l) +{ + struct task_struct *t = current; + struct fmlp_semaphore *sem = fmlp_from_lock(l); + unsigned long flags; + + int owner; + + spin_lock_irqsave(&sem->wait.lock, flags); + + owner = sem->owner == t; + + spin_unlock_irqrestore(&sem->wait.lock, flags); + + if (owner) + pfp_fmlp_unlock(l); + + return 0; +} + +void pfp_fmlp_free(struct litmus_lock* lock) +{ + kfree(fmlp_from_lock(lock)); +} + +static struct litmus_lock_ops pfp_fmlp_lock_ops = { + .close = pfp_fmlp_close, + .lock = pfp_fmlp_lock, + .unlock = pfp_fmlp_unlock, + .deallocate = pfp_fmlp_free, +}; + +static struct litmus_lock* pfp_new_fmlp(void) +{ + struct fmlp_semaphore* sem; + + sem = kmalloc(sizeof(*sem), GFP_KERNEL); + if (!sem) + return NULL; + + sem->owner = NULL; + init_waitqueue_head(&sem->wait); + sem->litmus_lock.ops = &pfp_fmlp_lock_ops; + + return &sem->litmus_lock; +} + +/* ******************** MPCP support ********************** */ + +struct mpcp_semaphore { + struct litmus_lock litmus_lock; + + /* current resource holder */ + struct task_struct *owner; + + /* priority queue of waiting tasks */ + wait_queue_head_t wait; + + /* priority ceiling per cpu */ + unsigned int prio_ceiling[NR_CPUS]; + + /* should jobs spin "virtually" for this resource? */ + int vspin; +}; + +#define OMEGA_CEILING UINT_MAX + +/* Since jobs spin "virtually" while waiting to acquire a lock, + * they first must aquire a local per-cpu resource. + */ +static DEFINE_PER_CPU(wait_queue_head_t, mpcpvs_vspin_wait); +static DEFINE_PER_CPU(struct task_struct*, mpcpvs_vspin); + +/* called with preemptions off <=> no local modifications */ +static void mpcp_vspin_enter(void) +{ + struct task_struct* t = current; + + while (1) { + if (this_cpu_read(mpcpvs_vspin) == NULL) { + /* good, we get to issue our request */ + this_cpu_write(mpcpvs_vspin, t); + break; + } else { + /* some job is spinning => enqueue in request queue */ + prio_wait_queue_t wait; + wait_queue_head_t* vspin = this_cpu_ptr(&mpcpvs_vspin_wait); + unsigned long flags; + + /* ordered by regular priority */ + init_prio_waitqueue_entry(&wait, t, prio_point(get_priority(t))); + + spin_lock_irqsave(&vspin->lock, flags); + + set_task_state(t, TASK_UNINTERRUPTIBLE); + + __add_wait_queue_prio_exclusive(vspin, &wait); + + spin_unlock_irqrestore(&vspin->lock, flags); + + TS_LOCK_SUSPEND; + + preempt_enable_no_resched(); + + schedule(); + + preempt_disable(); + + TS_LOCK_RESUME; + /* Recheck if we got it --- some higher-priority process might + * have swooped in. */ + } + } + /* ok, now it is ours */ +} + +/* called with preemptions off */ +static void mpcp_vspin_exit(void) +{ + struct task_struct* t = current, *next; + unsigned long flags; + wait_queue_head_t* vspin = this_cpu_ptr(&mpcpvs_vspin_wait); + + BUG_ON(this_cpu_read(mpcpvs_vspin) != t); + + /* no spinning job */ + this_cpu_write(mpcpvs_vspin, NULL); + + /* see if anyone is waiting for us to stop "spinning" */ + spin_lock_irqsave(&vspin->lock, flags); + next = __waitqueue_remove_first(vspin); + + if (next) + wake_up_process(next); + + spin_unlock_irqrestore(&vspin->lock, flags); +} + +static inline struct mpcp_semaphore* mpcp_from_lock(struct litmus_lock* lock) +{ + return container_of(lock, struct mpcp_semaphore, litmus_lock); +} + +int pfp_mpcp_lock(struct litmus_lock* l) +{ + struct task_struct* t = current; + struct mpcp_semaphore *sem = mpcp_from_lock(l); + prio_wait_queue_t wait; + unsigned long flags; + + if (!is_realtime(t)) + return -EPERM; + + /* prevent nested lock acquisition */ + if (tsk_rt(t)->num_locks_held || + tsk_rt(t)->num_local_locks_held) + return -EBUSY; + + preempt_disable(); + + if (sem->vspin) + mpcp_vspin_enter(); + + /* Priority-boost ourself *before* we suspend so that + * our priority is boosted when we resume. Use the priority + * ceiling for the local partition. */ + boost_priority(t, sem->prio_ceiling[get_partition(t)]); + + spin_lock_irqsave(&sem->wait.lock, flags); + + preempt_enable_no_resched(); + + if (sem->owner) { + /* resource is not free => must suspend and wait */ + + /* ordered by regular priority */ + init_prio_waitqueue_entry(&wait, t, prio_point(get_priority(t))); + + /* FIXME: interruptible would be nice some day */ + set_task_state(t, TASK_UNINTERRUPTIBLE); + + __add_wait_queue_prio_exclusive(&sem->wait, &wait); + + TS_LOCK_SUSPEND; + + /* release lock before sleeping */ + spin_unlock_irqrestore(&sem->wait.lock, flags); + + /* We depend on the FIFO order. Thus, we don't need to recheck + * when we wake up; we are guaranteed to have the lock since + * there is only one wake up per release. + */ + + schedule(); + + TS_LOCK_RESUME; + + /* Since we hold the lock, no other task will change + * ->owner. We can thus check it without acquiring the spin + * lock. */ + BUG_ON(sem->owner != t); + } else { + /* it's ours now */ + sem->owner = t; + + spin_unlock_irqrestore(&sem->wait.lock, flags); + } + + tsk_rt(t)->num_locks_held++; + + return 0; +} + +int pfp_mpcp_unlock(struct litmus_lock* l) +{ + struct task_struct *t = current, *next = NULL; + struct mpcp_semaphore *sem = mpcp_from_lock(l); + unsigned long flags; + int err = 0; + + preempt_disable(); + + spin_lock_irqsave(&sem->wait.lock, flags); + + if (sem->owner != t) { + err = -EINVAL; + goto out; + } + + tsk_rt(t)->num_locks_held--; + + /* we lose the benefit of priority boosting */ + unboost_priority(t); + + /* check if there are jobs waiting for this resource */ + next = __waitqueue_remove_first(&sem->wait); + sem->owner = next; + +out: + spin_unlock_irqrestore(&sem->wait.lock, flags); + + /* Wake up next. The waiting job is already priority-boosted. */ + if(next) { + wake_up_process(next); + } + + if (sem->vspin && err == 0) { + mpcp_vspin_exit(); + } + + preempt_enable(); + + return err; +} + +int pfp_mpcp_open(struct litmus_lock* l, void* config) +{ + struct task_struct *t = current; + int cpu, local_cpu; + struct mpcp_semaphore *sem = mpcp_from_lock(l); + unsigned long flags; + + if (!is_realtime(t)) + /* we need to know the real-time priority */ + return -EPERM; + + local_cpu = get_partition(t); + + spin_lock_irqsave(&sem->wait.lock, flags); + for (cpu = 0; cpu < NR_CPUS; cpu++) { + if (cpu != local_cpu) { + sem->prio_ceiling[cpu] = min(sem->prio_ceiling[cpu], + get_priority(t)); + TRACE_CUR("priority ceiling for sem %p is now %d on cpu %d\n", + sem, sem->prio_ceiling[cpu], cpu); + } + } + spin_unlock_irqrestore(&sem->wait.lock, flags); + + return 0; +} + +int pfp_mpcp_close(struct litmus_lock* l) +{ + struct task_struct *t = current; + struct mpcp_semaphore *sem = mpcp_from_lock(l); + unsigned long flags; + + int owner; + + spin_lock_irqsave(&sem->wait.lock, flags); + + owner = sem->owner == t; + + spin_unlock_irqrestore(&sem->wait.lock, flags); + + if (owner) + pfp_mpcp_unlock(l); + + return 0; +} + +void pfp_mpcp_free(struct litmus_lock* lock) +{ + kfree(mpcp_from_lock(lock)); +} + +static struct litmus_lock_ops pfp_mpcp_lock_ops = { + .close = pfp_mpcp_close, + .lock = pfp_mpcp_lock, + .open = pfp_mpcp_open, + .unlock = pfp_mpcp_unlock, + .deallocate = pfp_mpcp_free, +}; + +static struct litmus_lock* pfp_new_mpcp(int vspin) +{ + struct mpcp_semaphore* sem; + int cpu; + + sem = kmalloc(sizeof(*sem), GFP_KERNEL); + if (!sem) + return NULL; + + sem->owner = NULL; + init_waitqueue_head(&sem->wait); + sem->litmus_lock.ops = &pfp_mpcp_lock_ops; + + for (cpu = 0; cpu < NR_CPUS; cpu++) + sem->prio_ceiling[cpu] = OMEGA_CEILING; + + /* mark as virtual spinning */ + sem->vspin = vspin; + + return &sem->litmus_lock; +} + + +/* ******************** PCP support ********************** */ + + +struct pcp_semaphore { + struct litmus_lock litmus_lock; + + struct list_head ceiling; + + /* current resource holder */ + struct task_struct *owner; + + /* priority ceiling --- can be negative due to DPCP support */ + int prio_ceiling; + + /* on which processor is this PCP semaphore allocated? */ + int on_cpu; +}; + +static inline struct pcp_semaphore* pcp_from_lock(struct litmus_lock* lock) +{ + return container_of(lock, struct pcp_semaphore, litmus_lock); +} + + +struct pcp_state { + struct list_head system_ceiling; + + /* highest-priority waiting task */ + struct task_struct* hp_waiter; + + /* list of jobs waiting to get past the system ceiling */ + wait_queue_head_t ceiling_blocked; +}; + +static void pcp_init_state(struct pcp_state* s) +{ + INIT_LIST_HEAD(&s->system_ceiling); + s->hp_waiter = NULL; + init_waitqueue_head(&s->ceiling_blocked); +} + +static DEFINE_PER_CPU(struct pcp_state, pcp_state); + +/* assumes preemptions are off */ +static struct pcp_semaphore* pcp_get_ceiling(void) +{ + struct list_head* top = &(this_cpu_ptr(&pcp_state)->system_ceiling); + return list_first_entry_or_null(top, struct pcp_semaphore, ceiling); +} + +/* assumes preempt off */ +static void pcp_add_ceiling(struct pcp_semaphore* sem) +{ + struct list_head *pos; + struct list_head *in_use = &(this_cpu_ptr(&pcp_state)->system_ceiling); + struct pcp_semaphore* held; + + BUG_ON(sem->on_cpu != smp_processor_id()); + BUG_ON(in_list(&sem->ceiling)); + + list_for_each(pos, in_use) { + held = list_entry(pos, struct pcp_semaphore, ceiling); + if (held->prio_ceiling >= sem->prio_ceiling) { + __list_add(&sem->ceiling, pos->prev, pos); + return; + } + } + + /* we hit the end of the list */ + + list_add_tail(&sem->ceiling, in_use); +} + +/* assumes preempt off */ +static int pcp_exceeds_ceiling(struct pcp_semaphore* ceiling, + struct task_struct* task, + int effective_prio) +{ + return ceiling == NULL || + ceiling->prio_ceiling > effective_prio || + ceiling->owner == task; +} + +/* assumes preempt off */ +static void pcp_priority_inheritance(void) +{ + unsigned long flags; + pfp_domain_t* pfp = local_pfp; + + struct pcp_semaphore* ceiling = pcp_get_ceiling(); + struct task_struct *blocker, *blocked; + + blocker = ceiling ? ceiling->owner : NULL; + blocked = this_cpu_ptr(&pcp_state)->hp_waiter; + + raw_spin_lock_irqsave(&pfp->slock, flags); + + /* Current is no longer inheriting anything by default. This should be + * the currently scheduled job, and hence not currently queued. + * Special case: if current stopped being a real-time task, it will no longer + * be registered as pfp->scheduled. */ + BUG_ON(current != pfp->scheduled && is_realtime(current)); + + fp_set_prio_inh(pfp, current, NULL); + fp_set_prio_inh(pfp, blocked, NULL); + fp_set_prio_inh(pfp, blocker, NULL); + + /* Let blocking job inherit priority of blocked job, if required. */ + if (blocker && blocked && + fp_higher_prio(blocked, blocker)) { + TRACE_TASK(blocker, "PCP inherits from %s/%d (prio %u -> %u) \n", + blocked->comm, blocked->pid, + get_priority(blocker), get_priority(blocked)); + fp_set_prio_inh(pfp, blocker, blocked); + } + + /* Check if anything changed. If the blocked job is current, then it is + * just blocking and hence is going to call the scheduler anyway. */ + if (blocked != current && + fp_higher_prio(fp_prio_peek(&pfp->ready_queue), pfp->scheduled)) + preempt(pfp); + + raw_spin_unlock_irqrestore(&pfp->slock, flags); +} + +/* called with preemptions off */ +static void pcp_raise_ceiling(struct pcp_semaphore* sem, + int effective_prio) +{ + struct task_struct* t = current; + struct pcp_semaphore* ceiling; + prio_wait_queue_t wait; + unsigned int waiting_higher_prio; + + while(1) { + ceiling = pcp_get_ceiling(); + if (pcp_exceeds_ceiling(ceiling, t, effective_prio)) + break; + + TRACE_CUR("PCP ceiling-blocked, wanted sem %p, but %s/%d has the ceiling \n", + sem, ceiling->owner->comm, ceiling->owner->pid); + + /* we need to wait until the ceiling is lowered */ + + /* enqueue in priority order */ + init_prio_waitqueue_entry(&wait, t, effective_prio); + set_task_state(t, TASK_UNINTERRUPTIBLE); + waiting_higher_prio = add_wait_queue_prio_exclusive( + &(this_cpu_ptr(&pcp_state)->ceiling_blocked), &wait); + + if (waiting_higher_prio == 0) { + TRACE_CUR("PCP new highest-prio waiter => prio inheritance\n"); + + /* we are the new highest-priority waiting job + * => update inheritance */ + this_cpu_ptr(&pcp_state)->hp_waiter = t; + pcp_priority_inheritance(); + } + + TS_LOCK_SUSPEND; + + preempt_enable_no_resched(); + schedule(); + preempt_disable(); + + /* pcp_resume_unblocked() removed us from wait queue */ + + TS_LOCK_RESUME; + } + + TRACE_CUR("PCP got the ceiling and sem %p\n", sem); + + /* We are good to go. The semaphore should be available. */ + BUG_ON(sem->owner != NULL); + + sem->owner = t; + + pcp_add_ceiling(sem); +} + +static void pcp_resume_unblocked(void) +{ + wait_queue_head_t *blocked = &(this_cpu_ptr(&pcp_state)->ceiling_blocked); + unsigned long flags; + prio_wait_queue_t* q; + struct task_struct* t = NULL; + + struct pcp_semaphore* ceiling = pcp_get_ceiling(); + + spin_lock_irqsave(&blocked->lock, flags); + + while (waitqueue_active(blocked)) { + /* check first == highest-priority waiting job */ + q = list_entry(blocked->task_list.next, + prio_wait_queue_t, wq.task_list); + t = (struct task_struct*) q->wq.private; + + /* can it proceed now? => let it go */ + if (pcp_exceeds_ceiling(ceiling, t, q->priority)) { + __remove_wait_queue(blocked, &q->wq); + wake_up_process(t); + } else { + /* We are done. Update highest-priority waiter. */ + this_cpu_ptr(&pcp_state)->hp_waiter = t; + goto out; + } + } + /* If we get here, then there are no more waiting + * jobs. */ + this_cpu_ptr(&pcp_state)->hp_waiter = NULL; +out: + spin_unlock_irqrestore(&blocked->lock, flags); +} + +/* assumes preempt off */ +static void pcp_lower_ceiling(struct pcp_semaphore* sem) +{ + BUG_ON(!in_list(&sem->ceiling)); + BUG_ON(sem->owner != current); + BUG_ON(sem->on_cpu != smp_processor_id()); + + /* remove from ceiling list */ + list_del(&sem->ceiling); + + /* release */ + sem->owner = NULL; + + TRACE_CUR("PCP released sem %p\n", sem); + + /* Wake up all ceiling-blocked jobs that now pass the ceiling. */ + pcp_resume_unblocked(); + + pcp_priority_inheritance(); +} + +static void pcp_update_prio_ceiling(struct pcp_semaphore* sem, + int effective_prio) +{ + /* This needs to be synchronized on something. + * Might as well use waitqueue lock for the processor. + * We assume this happens only before the task set starts execution, + * (i.e., during initialization), but it may happen on multiple processors + * at the same time. + */ + unsigned long flags; + + struct pcp_state* s = &per_cpu(pcp_state, sem->on_cpu); + + spin_lock_irqsave(&s->ceiling_blocked.lock, flags); + + sem->prio_ceiling = min(sem->prio_ceiling, effective_prio); + + spin_unlock_irqrestore(&s->ceiling_blocked.lock, flags); +} + +static void pcp_init_semaphore(struct pcp_semaphore* sem, int cpu) +{ + sem->owner = NULL; + INIT_LIST_HEAD(&sem->ceiling); + sem->prio_ceiling = INT_MAX; + sem->on_cpu = cpu; +} + +int pfp_pcp_lock(struct litmus_lock* l) +{ + struct task_struct* t = current; + struct pcp_semaphore *sem = pcp_from_lock(l); + + /* The regular PCP uses the regular task priorities, not agent + * priorities. */ + int eprio = get_priority(t); + int from = get_partition(t); + int to = sem->on_cpu; + + if (!is_realtime(t) || from != to) + return -EPERM; + + /* prevent nested lock acquisition in global critical section */ + if (tsk_rt(t)->num_locks_held) + return -EBUSY; + + preempt_disable(); + + pcp_raise_ceiling(sem, eprio); + + preempt_enable(); + + tsk_rt(t)->num_local_locks_held++; + + return 0; +} + +int pfp_pcp_unlock(struct litmus_lock* l) +{ + struct task_struct *t = current; + struct pcp_semaphore *sem = pcp_from_lock(l); + + int err = 0; + + preempt_disable(); + + if (sem->owner != t) { + err = -EINVAL; + goto out; + } + + /* The current owner should be executing on the correct CPU. + * + * If the owner transitioned out of RT mode or is exiting, then + * we it might have already been migrated away by the best-effort + * scheduler and we just have to deal with it. */ + if (unlikely(!is_realtime(t) && sem->on_cpu != smp_processor_id())) { + TRACE_TASK(t, "PCP unlock cpu=%d, sem->on_cpu=%d\n", + smp_processor_id(), sem->on_cpu); + preempt_enable(); + err = litmus_be_migrate_to(sem->on_cpu); + preempt_disable(); + TRACE_TASK(t, "post-migrate: cpu=%d, sem->on_cpu=%d err=%d\n", + smp_processor_id(), sem->on_cpu, err); + } + BUG_ON(sem->on_cpu != smp_processor_id()); + err = 0; + + tsk_rt(t)->num_local_locks_held--; + + /* give it back */ + pcp_lower_ceiling(sem); + +out: + preempt_enable(); + + return err; +} + +int pfp_pcp_open(struct litmus_lock* l, void* __user config) +{ + struct task_struct *t = current; + struct pcp_semaphore *sem = pcp_from_lock(l); + + int cpu, eprio; + + if (!is_realtime(t)) + /* we need to know the real-time priority */ + return -EPERM; + + if (!config) + cpu = get_partition(t); + else if (get_user(cpu, (int*) config)) + return -EFAULT; + + /* make sure the resource location matches */ + if (cpu != sem->on_cpu) + return -EINVAL; + + /* The regular PCP uses regular task priorites, not agent + * priorities. */ + eprio = get_priority(t); + + pcp_update_prio_ceiling(sem, eprio); + + return 0; +} + +int pfp_pcp_close(struct litmus_lock* l) +{ + struct task_struct *t = current; + struct pcp_semaphore *sem = pcp_from_lock(l); + + int owner = 0; + + preempt_disable(); + + if (sem->on_cpu == smp_processor_id()) + owner = sem->owner == t; + + preempt_enable(); + + if (owner) + pfp_pcp_unlock(l); + + return 0; +} + +void pfp_pcp_free(struct litmus_lock* lock) +{ + kfree(pcp_from_lock(lock)); +} + + +static struct litmus_lock_ops pfp_pcp_lock_ops = { + .close = pfp_pcp_close, + .lock = pfp_pcp_lock, + .open = pfp_pcp_open, + .unlock = pfp_pcp_unlock, + .deallocate = pfp_pcp_free, +}; + + +static struct litmus_lock* pfp_new_pcp(int on_cpu) +{ + struct pcp_semaphore* sem; + + sem = kmalloc(sizeof(*sem), GFP_KERNEL); + if (!sem) + return NULL; + + sem->litmus_lock.ops = &pfp_pcp_lock_ops; + pcp_init_semaphore(sem, on_cpu); + + return &sem->litmus_lock; +} + +/* ******************** DPCP support ********************** */ + +struct dpcp_semaphore { + struct litmus_lock litmus_lock; + struct pcp_semaphore pcp; + int owner_cpu; +}; + +static inline struct dpcp_semaphore* dpcp_from_lock(struct litmus_lock* lock) +{ + return container_of(lock, struct dpcp_semaphore, litmus_lock); +} + +/* called with preemptions disabled */ +static void pfp_migrate_to(int target_cpu) +{ + struct task_struct* t = current; + pfp_domain_t *from; + + if (get_partition(t) == target_cpu) + return; + + if (!is_realtime(t)) + { + TRACE_TASK(t, "not migrating, not a RT task (anymore?)\n"); + return; + } + + /* make sure target_cpu makes sense */ + BUG_ON(target_cpu >= NR_CPUS || !cpu_online(target_cpu)); + + local_irq_disable(); + + from = task_pfp(t); + raw_spin_lock(&from->slock); + + /* Scheduled task should not be in any ready or release queue. Check + * this while holding the lock to avoid RT mode transitions.*/ + BUG_ON(is_realtime(t) && is_queued(t)); + + /* switch partitions */ + tsk_rt(t)->task_params.cpu = target_cpu; + + raw_spin_unlock(&from->slock); + + /* Don't trace scheduler costs as part of + * locking overhead. Scheduling costs are accounted for + * explicitly. */ + TS_LOCK_SUSPEND; + + local_irq_enable(); + preempt_enable_no_resched(); + + /* deschedule to be migrated */ + schedule(); + + /* we are now on the target processor */ + preempt_disable(); + + /* start recording costs again */ + TS_LOCK_RESUME; + + BUG_ON(smp_processor_id() != target_cpu && is_realtime(t)); +} + +int pfp_dpcp_lock(struct litmus_lock* l) +{ + struct task_struct* t = current; + struct dpcp_semaphore *sem = dpcp_from_lock(l); + int eprio = effective_agent_priority(get_priority(t)); + int from = get_partition(t); + int to = sem->pcp.on_cpu; + + if (!is_realtime(t)) + return -EPERM; + + /* prevent nested lock accquisition */ + if (tsk_rt(t)->num_locks_held || + tsk_rt(t)->num_local_locks_held) + return -EBUSY; + + preempt_disable(); + + /* Priority-boost ourself *before* we suspend so that + * our priority is boosted when we resume. */ + + boost_priority(t, get_priority(t)); + + pfp_migrate_to(to); + + pcp_raise_ceiling(&sem->pcp, eprio); + + /* yep, we got it => execute request */ + sem->owner_cpu = from; + + preempt_enable(); + + tsk_rt(t)->num_locks_held++; + + return 0; +} + +int pfp_dpcp_unlock(struct litmus_lock* l) +{ + struct task_struct *t = current; + struct dpcp_semaphore *sem = dpcp_from_lock(l); + int err = 0; + int home; + + preempt_disable(); + + if (sem->pcp.owner != t) { + err = -EINVAL; + goto out; + } + + /* The current owner should be executing on the correct CPU. + * + * If the owner transitioned out of RT mode or is exiting, then + * we it might have already been migrated away by the best-effort + * scheduler and we just have to deal with it. */ + if (unlikely(!is_realtime(t) && sem->pcp.on_cpu != smp_processor_id())) { + TRACE_TASK(t, "DPCP unlock cpu=%d, sem->pcp.on_cpu=%d\n", smp_processor_id(), sem->pcp.on_cpu); + preempt_enable(); + err = litmus_be_migrate_to(sem->pcp.on_cpu); + preempt_disable(); + TRACE_TASK(t, "post-migrate: cpu=%d, sem->pcp.on_cpu=%d err=%d\n", smp_processor_id(), sem->pcp.on_cpu, err); + } + BUG_ON(sem->pcp.on_cpu != smp_processor_id()); + err = 0; + + tsk_rt(t)->num_locks_held--; + + home = sem->owner_cpu; + + /* give it back */ + pcp_lower_ceiling(&sem->pcp); + + /* we lose the benefit of priority boosting */ + unboost_priority(t); + + pfp_migrate_to(home); + +out: + preempt_enable(); + + return err; +} + +int pfp_dpcp_open(struct litmus_lock* l, void* __user config) +{ + struct task_struct *t = current; + struct dpcp_semaphore *sem = dpcp_from_lock(l); + int cpu, eprio; + + if (!is_realtime(t)) + /* we need to know the real-time priority */ + return -EPERM; + + if (get_user(cpu, (int*) config)) + return -EFAULT; + + /* make sure the resource location matches */ + if (cpu != sem->pcp.on_cpu) + return -EINVAL; + + eprio = effective_agent_priority(get_priority(t)); + + pcp_update_prio_ceiling(&sem->pcp, eprio); + + return 0; +} + +int pfp_dpcp_close(struct litmus_lock* l) +{ + struct task_struct *t = current; + struct dpcp_semaphore *sem = dpcp_from_lock(l); + int owner = 0; + + preempt_disable(); + + if (sem->pcp.on_cpu == smp_processor_id()) + owner = sem->pcp.owner == t; + + preempt_enable(); + + if (owner) + pfp_dpcp_unlock(l); + + return 0; +} + +void pfp_dpcp_free(struct litmus_lock* lock) +{ + kfree(dpcp_from_lock(lock)); +} + +static struct litmus_lock_ops pfp_dpcp_lock_ops = { + .close = pfp_dpcp_close, + .lock = pfp_dpcp_lock, + .open = pfp_dpcp_open, + .unlock = pfp_dpcp_unlock, + .deallocate = pfp_dpcp_free, +}; + +static struct litmus_lock* pfp_new_dpcp(int on_cpu) +{ + struct dpcp_semaphore* sem; + + sem = kmalloc(sizeof(*sem), GFP_KERNEL); + if (!sem) + return NULL; + + sem->litmus_lock.ops = &pfp_dpcp_lock_ops; + sem->owner_cpu = NO_CPU; + pcp_init_semaphore(&sem->pcp, on_cpu); + + return &sem->litmus_lock; +} + + +/* ******************** DFLP support ********************** */ + +struct dflp_semaphore { + struct litmus_lock litmus_lock; + + /* current resource holder */ + struct task_struct *owner; + int owner_cpu; + + /* FIFO queue of waiting tasks */ + wait_queue_head_t wait; + + /* where is the resource assigned to */ + int on_cpu; +}; + +static inline struct dflp_semaphore* dflp_from_lock(struct litmus_lock* lock) +{ + return container_of(lock, struct dflp_semaphore, litmus_lock); +} + +int pfp_dflp_lock(struct litmus_lock* l) +{ + struct task_struct* t = current; + struct dflp_semaphore *sem = dflp_from_lock(l); + int from = get_partition(t); + int to = sem->on_cpu; + unsigned long flags; + wait_queue_t wait; + lt_t time_of_request; + + if (!is_realtime(t)) + return -EPERM; + + /* prevent nested lock accquisition */ + if (tsk_rt(t)->num_locks_held || + tsk_rt(t)->num_local_locks_held) + return -EBUSY; + + preempt_disable(); + + /* tie-break by this point in time */ + time_of_request = litmus_clock(); + + /* Priority-boost ourself *before* we suspend so that + * our priority is boosted when we resume. */ + boost_priority(t, time_of_request); + + pfp_migrate_to(to); + + /* Now on the right CPU, preemptions still disabled. */ + + spin_lock_irqsave(&sem->wait.lock, flags); + + if (sem->owner) { + /* resource is not free => must suspend and wait */ + + init_waitqueue_entry(&wait, t); + + /* FIXME: interruptible would be nice some day */ + set_task_state(t, TASK_UNINTERRUPTIBLE); + + __add_wait_queue_tail_exclusive(&sem->wait, &wait); + + TS_LOCK_SUSPEND; + + /* release lock before sleeping */ + spin_unlock_irqrestore(&sem->wait.lock, flags); + + /* We depend on the FIFO order. Thus, we don't need to recheck + * when we wake up; we are guaranteed to have the lock since + * there is only one wake up per release. + */ + + preempt_enable_no_resched(); + + schedule(); + + preempt_disable(); + + TS_LOCK_RESUME; + + /* Since we hold the lock, no other task will change + * ->owner. We can thus check it without acquiring the spin + * lock. */ + BUG_ON(sem->owner != t); + } else { + /* it's ours now */ + sem->owner = t; + + spin_unlock_irqrestore(&sem->wait.lock, flags); + } + + sem->owner_cpu = from; + + preempt_enable(); + + tsk_rt(t)->num_locks_held++; + + return 0; +} + +int pfp_dflp_unlock(struct litmus_lock* l) +{ + struct task_struct *t = current, *next; + struct dflp_semaphore *sem = dflp_from_lock(l); + int err = 0; + int home; + unsigned long flags; + + preempt_disable(); + + spin_lock_irqsave(&sem->wait.lock, flags); + + if (sem->owner != t) { + err = -EINVAL; + spin_unlock_irqrestore(&sem->wait.lock, flags); + goto out; + } + + /* check if there are jobs waiting for this resource */ + next = __waitqueue_remove_first(&sem->wait); + if (next) { + /* next becomes the resouce holder */ + sem->owner = next; + + /* Wake up next. The waiting job is already priority-boosted. */ + wake_up_process(next); + } else + /* resource becomes available */ + sem->owner = NULL; + + tsk_rt(t)->num_locks_held--; + + home = sem->owner_cpu; + + spin_unlock_irqrestore(&sem->wait.lock, flags); + + /* we lose the benefit of priority boosting */ + unboost_priority(t); + + pfp_migrate_to(home); + +out: + preempt_enable(); + + return err; +} + +int pfp_dflp_open(struct litmus_lock* l, void* __user config) +{ + struct dflp_semaphore *sem = dflp_from_lock(l); + int cpu; + + if (get_user(cpu, (int*) config)) + return -EFAULT; + + /* make sure the resource location matches */ + if (cpu != sem->on_cpu) + return -EINVAL; + + return 0; +} + +int pfp_dflp_close(struct litmus_lock* l) +{ + struct task_struct *t = current; + struct dflp_semaphore *sem = dflp_from_lock(l); + int owner = 0; + + preempt_disable(); + + if (sem->on_cpu == smp_processor_id()) + owner = sem->owner == t; + + preempt_enable(); + + if (owner) + pfp_dflp_unlock(l); + + return 0; +} + +void pfp_dflp_free(struct litmus_lock* lock) +{ + kfree(dflp_from_lock(lock)); +} + +static struct litmus_lock_ops pfp_dflp_lock_ops = { + .close = pfp_dflp_close, + .lock = pfp_dflp_lock, + .open = pfp_dflp_open, + .unlock = pfp_dflp_unlock, + .deallocate = pfp_dflp_free, +}; + +static struct litmus_lock* pfp_new_dflp(int on_cpu) +{ + struct dflp_semaphore* sem; + + sem = kmalloc(sizeof(*sem), GFP_KERNEL); + if (!sem) + return NULL; + + sem->litmus_lock.ops = &pfp_dflp_lock_ops; + sem->owner_cpu = NO_CPU; + sem->owner = NULL; + sem->on_cpu = on_cpu; + init_waitqueue_head(&sem->wait); + + return &sem->litmus_lock; +} + + +/* **** lock constructor **** */ + + +static long pfp_allocate_lock(struct litmus_lock **lock, int type, + void* __user config) +{ + int err = -ENXIO, cpu; + struct srp_semaphore* srp; + + /* P-FP currently supports the SRP for local resources and the FMLP + * for global resources. */ + switch (type) { + case FMLP_SEM: + /* FIFO Mutex Locking Protocol */ + *lock = pfp_new_fmlp(); + if (*lock) + err = 0; + else + err = -ENOMEM; + break; + + case MPCP_SEM: + /* Multiprocesor Priority Ceiling Protocol */ + *lock = pfp_new_mpcp(0); + if (*lock) + err = 0; + else + err = -ENOMEM; + break; + + case MPCP_VS_SEM: + /* Multiprocesor Priority Ceiling Protocol with virtual spinning */ + *lock = pfp_new_mpcp(1); + if (*lock) + err = 0; + else + err = -ENOMEM; + break; + + case DPCP_SEM: + /* Distributed Priority Ceiling Protocol */ + if (get_user(cpu, (int*) config)) + return -EFAULT; + + TRACE("DPCP_SEM: provided cpu=%d\n", cpu); + + if (cpu >= NR_CPUS || !cpu_online(cpu)) + return -EINVAL; + + *lock = pfp_new_dpcp(cpu); + if (*lock) + err = 0; + else + err = -ENOMEM; + break; + + case DFLP_SEM: + /* Distributed FIFO Locking Protocol */ + if (get_user(cpu, (int*) config)) + return -EFAULT; + + TRACE("DPCP_SEM: provided cpu=%d\n", cpu); + + if (cpu >= NR_CPUS || !cpu_online(cpu)) + return -EINVAL; + + *lock = pfp_new_dflp(cpu); + if (*lock) + err = 0; + else + err = -ENOMEM; + break; + + case SRP_SEM: + /* Baker's Stack Resource Policy */ + srp = allocate_srp_semaphore(); + if (srp) { + *lock = &srp->litmus_lock; + err = 0; + } else + err = -ENOMEM; + break; + + case PCP_SEM: + /* Priority Ceiling Protocol */ + if (!config) + cpu = get_partition(current); + else if (get_user(cpu, (int*) config)) + return -EFAULT; + + if (cpu >= NR_CPUS || !cpu_online(cpu)) + return -EINVAL; + + *lock = pfp_new_pcp(cpu); + if (*lock) + err = 0; + else + err = -ENOMEM; + break; + }; + + return err; +} + +#endif + +static long pfp_admit_task(struct task_struct* tsk) +{ + if (task_cpu(tsk) == tsk->rt_param.task_params.cpu && +#ifdef CONFIG_RELEASE_MASTER + /* don't allow tasks on release master CPU */ + task_cpu(tsk) != remote_dom(task_cpu(tsk))->release_master && +#endif + litmus_is_valid_fixed_prio(get_priority(tsk))) + return 0; + else + return -EINVAL; +} + +static struct domain_proc_info pfp_domain_proc_info; +static long pfp_get_domain_proc_info(struct domain_proc_info **ret) +{ + *ret = &pfp_domain_proc_info; + return 0; +} + +static void pfp_setup_domain_proc(void) +{ + int i, cpu; + int release_master = +#ifdef CONFIG_RELEASE_MASTER + atomic_read(&release_master_cpu); +#else + NO_CPU; +#endif + int num_rt_cpus = num_online_cpus() - (release_master != NO_CPU); + struct cd_mapping *cpu_map, *domain_map; + + memset(&pfp_domain_proc_info, sizeof(pfp_domain_proc_info), 0); + init_domain_proc_info(&pfp_domain_proc_info, num_rt_cpus, num_rt_cpus); + pfp_domain_proc_info.num_cpus = num_rt_cpus; + pfp_domain_proc_info.num_domains = num_rt_cpus; + for (cpu = 0, i = 0; cpu < num_online_cpus(); ++cpu) { + if (cpu == release_master) + continue; + cpu_map = &pfp_domain_proc_info.cpu_to_domains[i]; + domain_map = &pfp_domain_proc_info.domain_to_cpus[i]; + + cpu_map->id = cpu; + domain_map->id = i; /* enumerate w/o counting the release master */ + cpumask_set_cpu(i, cpu_map->mask); + cpumask_set_cpu(cpu, domain_map->mask); + ++i; + } +} + +static long pfp_activate_plugin(void) +{ +#if defined(CONFIG_RELEASE_MASTER) || defined(CONFIG_LITMUS_LOCKING) + int cpu; +#endif + +#ifdef CONFIG_RELEASE_MASTER + for_each_online_cpu(cpu) { + remote_dom(cpu)->release_master = atomic_read(&release_master_cpu); + } +#endif + +#ifdef CONFIG_LITMUS_LOCKING + get_srp_prio = pfp_get_srp_prio; + + for_each_online_cpu(cpu) { + init_waitqueue_head(&per_cpu(mpcpvs_vspin_wait, cpu)); + per_cpu(mpcpvs_vspin, cpu) = NULL; + + pcp_init_state(&per_cpu(pcp_state, cpu)); + pfp_doms[cpu] = remote_pfp(cpu); + per_cpu(fmlp_timestamp,cpu) = 0; + } + +#endif + + pfp_setup_domain_proc(); + + return 0; +} + +static long pfp_deactivate_plugin(void) +{ + destroy_domain_proc_info(&pfp_domain_proc_info); + return 0; +} + +/* Plugin object */ +static struct sched_plugin pfp_plugin __cacheline_aligned_in_smp = { + .plugin_name = "P-FP", + .task_new = pfp_task_new, + .complete_job = complete_job, + .task_exit = pfp_task_exit, + .schedule = pfp_schedule, + .task_wake_up = pfp_task_wake_up, + .task_block = pfp_task_block, + .admit_task = pfp_admit_task, + .activate_plugin = pfp_activate_plugin, + .deactivate_plugin = pfp_deactivate_plugin, + .get_domain_proc_info = pfp_get_domain_proc_info, +#ifdef CONFIG_LITMUS_LOCKING + .allocate_lock = pfp_allocate_lock, + .finish_switch = pfp_finish_switch, +#endif +}; + + +static int __init init_pfp(void) +{ + int i; + + /* We do not really want to support cpu hotplug, do we? ;) + * However, if we are so crazy to do so, + * we cannot use num_online_cpu() + */ + for (i = 0; i < num_online_cpus(); i++) { + pfp_domain_init(remote_pfp(i), i); + } + return register_sched_plugin(&pfp_plugin); +} + +module_init(init_pfp); -- cgit v1.2.2