aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSven Dziadek <s9svdzia@stud.uni-saarland.de>2012-04-16 15:00:33 -0400
committerBjoern Brandenburg <bbb@mpi-sws.org>2012-07-23 05:57:59 -0400
commit16c1fb2d4ac691e941456a084284020c63fce93a (patch)
tree0f7cf24be334788c9f9bb8242966df83f6266dc6
parent4bc55d3b64fdf0af17f4777013a74fbef7f40ced (diff)
P-FP: port P-FP plugin used in B. Brandenburg's
dissertation (branch bbb-diss) to current version of litmus This is needed for ongoing projects I took the unchanged code but removed some leftovers of OMLP which is not implemented
-rw-r--r--include/litmus/fdso.h8
-rw-r--r--include/litmus/fp_common.h105
-rw-r--r--include/litmus/litmus.h1
-rw-r--r--include/litmus/rt_param.h3
-rw-r--r--include/litmus/wait.h57
-rw-r--r--litmus/Makefile4
-rw-r--r--litmus/fdso.c3
-rw-r--r--litmus/fp_common.c119
-rw-r--r--litmus/locking.c32
-rw-r--r--litmus/sched_pfp.c1543
10 files changed, 1872 insertions, 3 deletions
diff --git a/include/litmus/fdso.h b/include/litmus/fdso.h
index caf2a1e6918c..2b0537ce7260 100644
--- a/include/litmus/fdso.h
+++ b/include/litmus/fdso.h
@@ -12,7 +12,7 @@
12#include <linux/fs.h> 12#include <linux/fs.h>
13#include <linux/slab.h> 13#include <linux/slab.h>
14 14
15#define MAX_OBJECT_DESCRIPTORS 32 15#define MAX_OBJECT_DESCRIPTORS 85
16 16
17typedef enum { 17typedef enum {
18 MIN_OBJ_TYPE = 0, 18 MIN_OBJ_TYPE = 0,
@@ -20,7 +20,11 @@ typedef enum {
20 FMLP_SEM = 0, 20 FMLP_SEM = 0,
21 SRP_SEM = 1, 21 SRP_SEM = 1,
22 22
23 MAX_OBJ_TYPE = 1 23 MPCP_SEM = 2,
24 MPCP_VS_SEM = 3,
25 DPCP_SEM = 4,
26
27 MAX_OBJ_TYPE = 4
24} obj_type_t; 28} obj_type_t;
25 29
26struct inode_obj_id { 30struct inode_obj_id {
diff --git a/include/litmus/fp_common.h b/include/litmus/fp_common.h
new file mode 100644
index 000000000000..dd1f7bf1e347
--- /dev/null
+++ b/include/litmus/fp_common.h
@@ -0,0 +1,105 @@
1/* Fixed-priority scheduler support.
2 */
3
4#ifndef __FP_COMMON_H__
5#define __FP_COMMON_H__
6
7#include <litmus/rt_domain.h>
8
9#include <asm/bitops.h>
10
11
12void fp_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
13 release_jobs_t release);
14
15int fp_higher_prio(struct task_struct* first,
16 struct task_struct* second);
17
18int fp_ready_order(struct bheap_node* a, struct bheap_node* b);
19
20#define FP_PRIO_BIT_WORDS (LITMUS_MAX_PRIORITY / BITS_PER_LONG)
21
22#if (LITMUS_MAX_PRIORITY % BITS_PER_LONG)
23#error LITMUS_MAX_PRIORITY must be a multiple of BITS_PER_LONG
24#endif
25
26/* bitmask-inexed priority queue */
27struct fp_prio_queue {
28 unsigned long bitmask[FP_PRIO_BIT_WORDS];
29 struct bheap queue[LITMUS_MAX_PRIORITY];
30};
31
32void fp_prio_queue_init(struct fp_prio_queue* q);
33
34static inline void fpq_set(struct fp_prio_queue* q, unsigned int index)
35{
36 unsigned long *word = q->bitmask + (index / BITS_PER_LONG);
37 __set_bit(index % BITS_PER_LONG, word);
38}
39
40static inline void fpq_clear(struct fp_prio_queue* q, unsigned int index)
41{
42 unsigned long *word = q->bitmask + (index / BITS_PER_LONG);
43 __clear_bit(index % BITS_PER_LONG, word);
44}
45
46static inline unsigned int fpq_find(struct fp_prio_queue* q)
47{
48 int i;
49
50 /* loop optimizer should unroll this */
51 for (i = 0; i < FP_PRIO_BIT_WORDS; i++)
52 if (q->bitmask[i])
53 return __ffs(q->bitmask[i]) + i * BITS_PER_LONG;
54
55 return LITMUS_MAX_PRIORITY; /* nothing found */
56}
57
58static inline void fp_prio_add(struct fp_prio_queue* q, struct task_struct* t, unsigned int index)
59{
60
61 BUG_ON(bheap_node_in_heap(tsk_rt(t)->heap_node));
62
63 fpq_set(q, index);
64 bheap_insert(fp_ready_order, &q->queue[index], tsk_rt(t)->heap_node);
65}
66
67static inline void fp_prio_remove(struct fp_prio_queue* q, struct task_struct* t, unsigned int index)
68{
69 BUG_ON(!is_queued(t));
70
71 bheap_delete(fp_ready_order, &q->queue[index], tsk_rt(t)->heap_node);
72 if (likely(bheap_empty(&q->queue[index])))
73 fpq_clear(q, index);
74}
75
76static inline struct task_struct* fp_prio_peek(struct fp_prio_queue* q)
77{
78 unsigned int idx = fpq_find(q);
79 struct bheap_node* hn;
80
81 if (idx < LITMUS_MAX_PRIORITY) {
82 hn = bheap_peek(fp_ready_order, &q->queue[idx]);
83 return bheap2task(hn);
84 } else
85 return NULL;
86}
87
88static inline struct task_struct* fp_prio_take(struct fp_prio_queue* q)
89{
90 unsigned int idx = fpq_find(q);
91 struct bheap_node* hn;
92
93 if (idx < LITMUS_MAX_PRIORITY) {
94 hn = bheap_take(fp_ready_order, &q->queue[idx]);
95 if (likely(bheap_empty(&q->queue[idx])))
96 fpq_clear(q, idx);
97 return bheap2task(hn);
98 } else
99 return NULL;
100}
101
102int fp_preemption_needed(struct fp_prio_queue* q, struct task_struct *t);
103
104
105#endif
diff --git a/include/litmus/litmus.h b/include/litmus/litmus.h
index 160119abfe30..aa56eeef7e7d 100644
--- a/include/litmus/litmus.h
+++ b/include/litmus/litmus.h
@@ -52,6 +52,7 @@ void litmus_exit_task(struct task_struct *tsk);
52#define get_rt_period(t) (tsk_rt(t)->task_params.period) 52#define get_rt_period(t) (tsk_rt(t)->task_params.period)
53#define get_rt_phase(t) (tsk_rt(t)->task_params.phase) 53#define get_rt_phase(t) (tsk_rt(t)->task_params.phase)
54#define get_partition(t) (tsk_rt(t)->task_params.cpu) 54#define get_partition(t) (tsk_rt(t)->task_params.cpu)
55#define get_priority(t) (tsk_rt(t)->task_params.priority)
55#define get_deadline(t) (tsk_rt(t)->job_params.deadline) 56#define get_deadline(t) (tsk_rt(t)->job_params.deadline)
56#define get_release(t) (tsk_rt(t)->job_params.release) 57#define get_release(t) (tsk_rt(t)->job_params.release)
57#define get_class(t) (tsk_rt(t)->task_params.cls) 58#define get_class(t) (tsk_rt(t)->task_params.cls)
diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h
index d6d799174160..1ce96af51287 100644
--- a/include/litmus/rt_param.h
+++ b/include/litmus/rt_param.h
@@ -33,11 +33,14 @@ typedef enum {
33 PRECISE_ENFORCEMENT /* budgets are enforced with hrtimers */ 33 PRECISE_ENFORCEMENT /* budgets are enforced with hrtimers */
34} budget_policy_t; 34} budget_policy_t;
35 35
36#define LITMUS_MAX_PRIORITY 512
37
36struct rt_task { 38struct rt_task {
37 lt_t exec_cost; 39 lt_t exec_cost;
38 lt_t period; 40 lt_t period;
39 lt_t phase; 41 lt_t phase;
40 unsigned int cpu; 42 unsigned int cpu;
43 unsigned int priority;
41 task_class_t cls; 44 task_class_t cls;
42 budget_policy_t budget_policy; /* ignored by pfair */ 45 budget_policy_t budget_policy; /* ignored by pfair */
43}; 46};
diff --git a/include/litmus/wait.h b/include/litmus/wait.h
new file mode 100644
index 000000000000..ce1347c355f8
--- /dev/null
+++ b/include/litmus/wait.h
@@ -0,0 +1,57 @@
1#ifndef _LITMUS_WAIT_H_
2#define _LITMUS_WAIT_H_
3
4struct task_struct* __waitqueue_remove_first(wait_queue_head_t *wq);
5
6/* wrap regular wait_queue_t head */
7struct __prio_wait_queue {
8 wait_queue_t wq;
9
10 /* some priority point */
11 lt_t priority;
12 /* break ties in priority by lower tie_breaker */
13 unsigned int tie_breaker;
14};
15
16typedef struct __prio_wait_queue prio_wait_queue_t;
17
18static inline void init_prio_waitqueue_entry(prio_wait_queue_t *pwq,
19 struct task_struct* t,
20 lt_t priority)
21{
22 init_waitqueue_entry(&pwq->wq, t);
23 pwq->priority = priority;
24 pwq->tie_breaker = 0;
25}
26
27static inline void init_prio_waitqueue_entry_tie(prio_wait_queue_t *pwq,
28 struct task_struct* t,
29 lt_t priority,
30 unsigned int tie_breaker)
31{
32 init_waitqueue_entry(&pwq->wq, t);
33 pwq->priority = priority;
34 pwq->tie_breaker = tie_breaker;
35}
36
37unsigned int __add_wait_queue_prio_exclusive(
38 wait_queue_head_t* head,
39 prio_wait_queue_t *new);
40
41static inline unsigned int add_wait_queue_prio_exclusive(
42 wait_queue_head_t* head,
43 prio_wait_queue_t *new)
44{
45 unsigned long flags;
46 unsigned int passed;
47
48 spin_lock_irqsave(&head->lock, flags);
49 passed = __add_wait_queue_prio_exclusive(head, new);
50
51 spin_unlock_irqrestore(&head->lock, flags);
52
53 return passed;
54}
55
56
57#endif
diff --git a/litmus/Makefile b/litmus/Makefile
index 4650d332fb11..d26ca7076b62 100644
--- a/litmus/Makefile
+++ b/litmus/Makefile
@@ -11,6 +11,7 @@ obj-y = sched_plugin.o litmus.o \
11 sync.o \ 11 sync.o \
12 rt_domain.o \ 12 rt_domain.o \
13 edf_common.o \ 13 edf_common.o \
14 fp_common.o \
14 fdso.o \ 15 fdso.o \
15 locking.o \ 16 locking.o \
16 srp.o \ 17 srp.o \
@@ -18,7 +19,8 @@ obj-y = sched_plugin.o litmus.o \
18 binheap.o \ 19 binheap.o \
19 ctrldev.o \ 20 ctrldev.o \
20 sched_gsn_edf.o \ 21 sched_gsn_edf.o \
21 sched_psn_edf.o 22 sched_psn_edf.o \
23 sched_pfp.o
22 24
23obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o 25obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o
24obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o 26obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o
diff --git a/litmus/fdso.c b/litmus/fdso.c
index aa7b384264e3..04c0b55e41d3 100644
--- a/litmus/fdso.c
+++ b/litmus/fdso.c
@@ -23,6 +23,9 @@ extern struct fdso_ops generic_lock_ops;
23static const struct fdso_ops* fdso_ops[] = { 23static const struct fdso_ops* fdso_ops[] = {
24 &generic_lock_ops, /* FMLP_SEM */ 24 &generic_lock_ops, /* FMLP_SEM */
25 &generic_lock_ops, /* SRP_SEM */ 25 &generic_lock_ops, /* SRP_SEM */
26 &generic_lock_ops, /* MPCP_SEM */
27 &generic_lock_ops, /* MPCP_VS_SEM */
28 &generic_lock_ops, /* DPCP_SEM */
26}; 29};
27 30
28static int fdso_create(void** obj_ref, obj_type_t type, void* __user config) 31static int fdso_create(void** obj_ref, obj_type_t type, void* __user config)
diff --git a/litmus/fp_common.c b/litmus/fp_common.c
new file mode 100644
index 000000000000..31fc2db20adf
--- /dev/null
+++ b/litmus/fp_common.c
@@ -0,0 +1,119 @@
1/*
2 * litmus/fp_common.c
3 *
4 * Common functions for fixed-priority scheduler.
5 */
6
7#include <linux/percpu.h>
8#include <linux/sched.h>
9#include <linux/list.h>
10
11#include <litmus/litmus.h>
12#include <litmus/sched_plugin.h>
13#include <litmus/sched_trace.h>
14
15#include <litmus/fp_common.h>
16
17/* fp_higher_prio - returns true if first has a higher static priority
18 * than second. Deadline ties are broken by PID.
19 *
20 * both first and second may be NULL
21 */
22int fp_higher_prio(struct task_struct* first,
23 struct task_struct* second)
24{
25 struct task_struct *first_task = first;
26 struct task_struct *second_task = second;
27
28 /* There is no point in comparing a task to itself. */
29 if (unlikely(first && first == second)) {
30 TRACE_TASK(first,
31 "WARNING: pointless FP priority comparison.\n");
32 return 0;
33 }
34
35
36 /* check for NULL tasks */
37 if (!first || !second)
38 return first && !second;
39
40#ifdef CONFIG_LITMUS_LOCKING
41
42 /* Check for inherited priorities. Change task
43 * used for comparison in such a case.
44 */
45 if (unlikely(first->rt_param.inh_task))
46 first_task = first->rt_param.inh_task;
47 if (unlikely(second->rt_param.inh_task))
48 second_task = second->rt_param.inh_task;
49
50 /* Check for priority boosting. Tie-break by start of boosting.
51 */
52 if (unlikely(is_priority_boosted(first_task))) {
53 /* first_task is boosted, how about second_task? */
54 if (!is_priority_boosted(second_task) ||
55 lt_before(get_boost_start(first_task),
56 get_boost_start(second_task)))
57 return 1;
58 else
59 return 0;
60 } else if (unlikely(is_priority_boosted(second_task)))
61 /* second_task is boosted, first is not*/
62 return 0;
63
64#endif
65
66
67 return !is_realtime(second_task) ||
68
69 get_priority(first_task) < get_priority(second_task) ||
70
71 /* Break by PID.
72 */
73 (get_priority(first_task) == get_priority(second_task) &&
74 (first_task->pid < second_task->pid ||
75
76 /* If the PIDs are the same then the task with the inherited
77 * priority wins.
78 */
79 (first_task->pid == second_task->pid &&
80 !second->rt_param.inh_task)));
81}
82
83int fp_ready_order(struct bheap_node* a, struct bheap_node* b)
84{
85 return fp_higher_prio(bheap2task(a), bheap2task(b));
86}
87
88void fp_domain_init(rt_domain_t* rt, check_resched_needed_t resched,
89 release_jobs_t release)
90{
91 rt_domain_init(rt, fp_ready_order, resched, release);
92}
93
94/* need_to_preempt - check whether the task t needs to be preempted
95 */
96int fp_preemption_needed(struct fp_prio_queue *q, struct task_struct *t)
97{
98 struct task_struct *pending;
99
100 pending = fp_prio_peek(q);
101
102 if (!pending)
103 return 0;
104 if (!t)
105 return 1;
106
107 /* make sure to get non-rt stuff out of the way */
108 return !is_realtime(t) || fp_higher_prio(pending, t);
109}
110
111void fp_prio_queue_init(struct fp_prio_queue* q)
112{
113 int i;
114
115 for (i = 0; i < FP_PRIO_BIT_WORDS; i++)
116 q->bitmask[i] = 0;
117 for (i = 0; i < LITMUS_MAX_PRIORITY; i++)
118 bheap_init(&q->queue[i]);
119}
diff --git a/litmus/locking.c b/litmus/locking.c
index 0c1aa6aa40b7..ca5a073a989e 100644
--- a/litmus/locking.c
+++ b/litmus/locking.c
@@ -4,6 +4,7 @@
4 4
5#include <litmus/sched_plugin.h> 5#include <litmus/sched_plugin.h>
6#include <litmus/trace.h> 6#include <litmus/trace.h>
7#include <litmus/wait.h>
7 8
8static int create_generic_lock(void** obj_ref, obj_type_t type, void* __user arg); 9static int create_generic_lock(void** obj_ref, obj_type_t type, void* __user arg);
9static int open_generic_lock(struct od_table_entry* entry, void* __user arg); 10static int open_generic_lock(struct od_table_entry* entry, void* __user arg);
@@ -121,6 +122,37 @@ struct task_struct* __waitqueue_remove_first(wait_queue_head_t *wq)
121 return(t); 122 return(t);
122} 123}
123 124
125unsigned int __add_wait_queue_prio_exclusive(
126 wait_queue_head_t* head,
127 prio_wait_queue_t *new)
128{
129 struct list_head *pos;
130 unsigned int passed = 0;
131
132 new->wq.flags |= WQ_FLAG_EXCLUSIVE;
133
134 /* find a spot where the new entry is less than the next */
135 list_for_each(pos, &head->task_list) {
136 prio_wait_queue_t* queued = list_entry(pos, prio_wait_queue_t,
137 wq.task_list);
138
139 if (unlikely(lt_before(new->priority, queued->priority) ||
140 (new->priority == queued->priority &&
141 new->tie_breaker < queued->tie_breaker))) {
142 /* pos is not less than new, thus insert here */
143 __list_add(&new->wq.task_list, pos->prev, pos);
144 goto out;
145 }
146 passed++;
147 }
148
149 /* if we get to this point either the list is empty or every entry
150 * queued element is less than new.
151 * Let's add new to the end. */
152 list_add_tail(&new->wq.task_list, &head->task_list);
153out:
154 return passed;
155}
124 156
125#else 157#else
126 158
diff --git a/litmus/sched_pfp.c b/litmus/sched_pfp.c
new file mode 100644
index 000000000000..d5dd78d941c6
--- /dev/null
+++ b/litmus/sched_pfp.c
@@ -0,0 +1,1543 @@
1/*
2 * litmus/sched_pfp.c
3 *
4 * Implementation of partitioned fixed-priority scheduling.
5 * Based on PSN-EDF.
6 */
7
8#include <linux/percpu.h>
9#include <linux/sched.h>
10#include <linux/list.h>
11#include <linux/spinlock.h>
12#include <linux/module.h>
13
14#include <litmus/litmus.h>
15#include <litmus/wait.h>
16#include <litmus/jobs.h>
17#include <litmus/preempt.h>
18#include <litmus/fp_common.h>
19#include <litmus/sched_plugin.h>
20#include <litmus/sched_trace.h>
21#include <litmus/trace.h>
22#include <litmus/budget.h>
23
24#include <linux/uaccess.h>
25
26
27typedef struct {
28 rt_domain_t domain;
29 struct fp_prio_queue ready_queue;
30 int cpu;
31 struct task_struct* scheduled; /* only RT tasks */
32/*
33 * scheduling lock slock
34 * protects the domain and serializes scheduling decisions
35 */
36#define slock domain.ready_lock
37
38} pfp_domain_t;
39
40DEFINE_PER_CPU(pfp_domain_t, pfp_domains);
41
42pfp_domain_t* pfp_doms[NR_CPUS];
43
44#define local_pfp (&__get_cpu_var(pfp_domains))
45#define remote_dom(cpu) (&per_cpu(pfp_domains, cpu).domain)
46#define remote_pfp(cpu) (&per_cpu(pfp_domains, cpu))
47#define task_dom(task) remote_dom(get_partition(task))
48#define task_pfp(task) remote_pfp(get_partition(task))
49
50/* we assume the lock is being held */
51static void preempt(pfp_domain_t *pfp)
52{
53 preempt_if_preemptable(pfp->scheduled, pfp->cpu);
54}
55
56static unsigned int priority_index(struct task_struct* t)
57{
58#ifdef CONFIG_LOCKING
59 if (unlikely(t->rt_param.inh_task))
60 /* use effective priority */
61 t = t->rt_param.inh_task;
62
63 if (is_priority_boosted(t)) {
64 /* zero is reserved for priority-boosted tasks */
65 return 0;
66 } else
67#endif
68 return get_priority(t);
69}
70
71
72static void pfp_release_jobs(rt_domain_t* rt, struct bheap* tasks)
73{
74 pfp_domain_t *pfp = container_of(rt, pfp_domain_t, domain);
75 unsigned long flags;
76 struct task_struct* t;
77 struct bheap_node* hn;
78
79 raw_spin_lock_irqsave(&pfp->slock, flags);
80
81 while (!bheap_empty(tasks)) {
82 hn = bheap_take(fp_ready_order, tasks);
83 t = bheap2task(hn);
84 TRACE_TASK(t, "released (part:%d prio:%d)\n",
85 get_partition(t), get_priority(t));
86 fp_prio_add(&pfp->ready_queue, t, priority_index(t));
87 }
88
89 /* do we need to preempt? */
90 if (fp_higher_prio(fp_prio_peek(&pfp->ready_queue), pfp->scheduled)) {
91 TRACE_CUR("preempted by new release\n");
92 preempt(pfp);
93 }
94
95 raw_spin_unlock_irqrestore(&pfp->slock, flags);
96}
97
98static void pfp_domain_init(pfp_domain_t* pfp,
99 int cpu)
100{
101 fp_domain_init(&pfp->domain, NULL, pfp_release_jobs);
102 pfp->cpu = cpu;
103 pfp->scheduled = NULL;
104 fp_prio_queue_init(&pfp->ready_queue);
105}
106
107static void requeue(struct task_struct* t, pfp_domain_t *pfp)
108{
109 if (t->state != TASK_RUNNING)
110 TRACE_TASK(t, "requeue: !TASK_RUNNING\n");
111
112 set_rt_flags(t, RT_F_RUNNING);
113 if (is_released(t, litmus_clock()))
114 fp_prio_add(&pfp->ready_queue, t, priority_index(t));
115 else
116 add_release(&pfp->domain, t); /* it has got to wait */
117}
118
119static void job_completion(struct task_struct* t, int forced)
120{
121 sched_trace_task_completion(t,forced);
122 TRACE_TASK(t, "job_completion().\n");
123
124 set_rt_flags(t, RT_F_SLEEP);
125 prepare_for_next_period(t);
126}
127
128static void pfp_tick(struct task_struct *t)
129{
130 pfp_domain_t *pfp = local_pfp;
131
132 /* Check for inconsistency. We don't need the lock for this since
133 * ->scheduled is only changed in schedule, which obviously is not
134 * executing in parallel on this CPU
135 */
136 BUG_ON(is_realtime(t) && t != pfp->scheduled);
137
138 if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) {
139 if (!is_np(t)) {
140 litmus_reschedule_local();
141 TRACE("pfp_scheduler_tick: "
142 "%d is preemptable "
143 " => FORCE_RESCHED\n", t->pid);
144 } else if (is_user_np(t)) {
145 TRACE("pfp_scheduler_tick: "
146 "%d is non-preemptable, "
147 "preemption delayed.\n", t->pid);
148 request_exit_np(t);
149 }
150 }
151}
152
153static struct task_struct* pfp_schedule(struct task_struct * prev)
154{
155 pfp_domain_t* pfp = local_pfp;
156 struct task_struct* next;
157
158 int out_of_time, sleep, preempt, np, exists, blocks, resched, migrate;
159
160 raw_spin_lock(&pfp->slock);
161
162 /* sanity checking
163 * differently from gedf, when a task exits (dead)
164 * pfp->schedule may be null and prev _is_ realtime
165 */
166 BUG_ON(pfp->scheduled && pfp->scheduled != prev);
167 BUG_ON(pfp->scheduled && !is_realtime(prev));
168
169 /* (0) Determine state */
170 exists = pfp->scheduled != NULL;
171 blocks = exists && !is_running(pfp->scheduled);
172 out_of_time = exists &&
173 budget_enforced(pfp->scheduled) &&
174 budget_exhausted(pfp->scheduled);
175 np = exists && is_np(pfp->scheduled);
176 sleep = exists && get_rt_flags(pfp->scheduled) == RT_F_SLEEP;
177 migrate = exists && get_partition(pfp->scheduled) != pfp->cpu;
178 preempt = migrate || fp_preemption_needed(&pfp->ready_queue, prev);
179
180 /* If we need to preempt do so.
181 * The following checks set resched to 1 in case of special
182 * circumstances.
183 */
184 resched = preempt;
185
186 /* If a task blocks we have no choice but to reschedule.
187 */
188 if (blocks)
189 resched = 1;
190
191 /* Request a sys_exit_np() call if we would like to preempt but cannot.
192 * Multiple calls to request_exit_np() don't hurt.
193 */
194 if (np && (out_of_time || preempt || sleep))
195 request_exit_np(pfp->scheduled);
196
197 /* Any task that is preemptable and either exhausts its execution
198 * budget or wants to sleep completes. We may have to reschedule after
199 * this.
200 */
201 if (!np && (out_of_time || sleep) && !blocks && !migrate) {
202 job_completion(pfp->scheduled, !sleep);
203 resched = 1;
204 }
205
206 /* The final scheduling decision. Do we need to switch for some reason?
207 * Switch if we are in RT mode and have no task or if we need to
208 * resched.
209 */
210 next = NULL;
211 if ((!np || blocks) && (resched || !exists)) {
212 /* When preempting a task that does not block, then
213 * re-insert it into either the ready queue or the
214 * release queue (if it completed). requeue() picks
215 * the appropriate queue.
216 */
217 if (pfp->scheduled && !blocks && !migrate)
218 requeue(pfp->scheduled, pfp);
219 next = fp_prio_take(&pfp->ready_queue);
220 } else
221 /* Only override Linux scheduler if we have a real-time task
222 * scheduled that needs to continue.
223 */
224 if (exists)
225 next = prev;
226
227 if (next) {
228 TRACE_TASK(next, "scheduled at %llu\n", litmus_clock());
229 set_rt_flags(next, RT_F_RUNNING);
230 } else {
231 TRACE("becoming idle at %llu\n", litmus_clock());
232 }
233
234 pfp->scheduled = next;
235 sched_state_task_picked();
236 raw_spin_unlock(&pfp->slock);
237
238 return next;
239}
240
241#ifdef CONFIG_LITMUS_LOCKING
242
243/* prev is no longer scheduled --- see if it needs to migrate */
244static void pfp_finish_switch(struct task_struct *prev)
245{
246 pfp_domain_t *to;
247
248 if (is_realtime(prev) &&
249 is_running(prev) &&
250 get_partition(prev) != smp_processor_id()) {
251 TRACE_TASK(prev, "needs to migrate from P%d to P%d\n",
252 smp_processor_id(), get_partition(prev));
253
254 to = task_pfp(prev);
255
256 raw_spin_lock(&to->slock);
257
258 TRACE_TASK(prev, "adding to queue on P%d\n", to->cpu);
259 requeue(prev, to);
260 if (fp_preemption_needed(&to->ready_queue, to->scheduled))
261 preempt(to);
262
263 raw_spin_unlock(&to->slock);
264
265 }
266}
267
268#endif
269
270/* Prepare a task for running in RT mode
271 */
272static void pfp_task_new(struct task_struct * t, int on_rq, int running)
273{
274 pfp_domain_t* pfp = task_pfp(t);
275 unsigned long flags;
276
277 TRACE_TASK(t, "P-FP: task new, cpu = %d\n",
278 t->rt_param.task_params.cpu);
279
280 /* setup job parameters */
281 release_at(t, litmus_clock());
282
283 /* The task should be running in the queue, otherwise signal
284 * code will try to wake it up with fatal consequences.
285 */
286 raw_spin_lock_irqsave(&pfp->slock, flags);
287 if (running) {
288 /* there shouldn't be anything else running at the time */
289 BUG_ON(pfp->scheduled);
290 pfp->scheduled = t;
291 } else {
292 requeue(t, pfp);
293 /* maybe we have to reschedule */
294 preempt(pfp);
295 }
296 raw_spin_unlock_irqrestore(&pfp->slock, flags);
297}
298
299static void pfp_task_wake_up(struct task_struct *task)
300{
301 unsigned long flags;
302 pfp_domain_t* pfp = task_pfp(task);
303 lt_t now;
304
305 TRACE_TASK(task, "wake_up at %llu\n", litmus_clock());
306 raw_spin_lock_irqsave(&pfp->slock, flags);
307
308#ifdef CONFIG_LITMUS_LOCKING
309 /* Should only be queued when processing a fake-wake up due to a
310 * migration-related state change. */
311 if (unlikely(is_queued(task))) {
312 TRACE_TASK(task, "WARNING: waking task still queued. Is this right?\n");
313 goto out_unlock;
314 }
315#else
316 BUG_ON(is_queued(task));
317#endif
318 now = litmus_clock();
319 if (is_tardy(task, now)
320#ifdef CONFIG_LITMUS_LOCKING
321 /* We need to take suspensions because of semaphores into
322 * account! If a job resumes after being suspended due to acquiring
323 * a semaphore, it should never be treated as a new job release.
324 */
325 && !is_priority_boosted(task)
326#endif
327 ) {
328 /* new sporadic release */
329 release_at(task, now);
330 sched_trace_task_release(task);
331 }
332
333 /* Only add to ready queue if it is not the currently-scheduled
334 * task. This could be the case if a task was woken up concurrently
335 * on a remote CPU before the executing CPU got around to actually
336 * de-scheduling the task, i.e., wake_up() raced with schedule()
337 * and won. Also, don't requeue if it is still queued, which can
338 * happen under the DPCP due wake-ups racing with migrations.
339 */
340 if (pfp->scheduled != task)
341 requeue(task, pfp);
342
343out_unlock:
344 raw_spin_unlock_irqrestore(&pfp->slock, flags);
345 TRACE_TASK(task, "wake up done\n");
346}
347
348static void pfp_task_block(struct task_struct *t)
349{
350 /* only running tasks can block, thus t is in no queue */
351 TRACE_TASK(t, "block at %llu, state=%d\n", litmus_clock(), t->state);
352
353 BUG_ON(!is_realtime(t));
354
355 /* If this task blocked normally, it shouldn't be queued. The exception is
356 * if this is a simulated block()/wakeup() pair from the pull-migration code path.
357 * This should only happen if the DPCP is being used.
358 */
359#ifdef CONFIG_LITMUS_LOCKING
360 if (unlikely(is_queued(t)))
361 TRACE_TASK(t, "WARNING: blocking task still queued. Is this right?\n");
362#else
363 BUG_ON(is_queued(t));
364#endif
365}
366
367static void pfp_task_exit(struct task_struct * t)
368{
369 unsigned long flags;
370 pfp_domain_t* pfp = task_pfp(t);
371 rt_domain_t* dom;
372
373 raw_spin_lock_irqsave(&pfp->slock, flags);
374 if (is_queued(t)) {
375 BUG(); /* This currently doesn't work. */
376 /* dequeue */
377 dom = task_dom(t);
378 remove(dom, t);
379 }
380 if (pfp->scheduled == t) {
381 pfp->scheduled = NULL;
382 preempt(pfp);
383 }
384 TRACE_TASK(t, "RIP, now reschedule\n");
385
386 raw_spin_unlock_irqrestore(&pfp->slock, flags);
387}
388
389#ifdef CONFIG_LITMUS_LOCKING
390
391#include <litmus/fdso.h>
392#include <litmus/srp.h>
393
394static void fp_dequeue(pfp_domain_t* pfp, struct task_struct* t)
395{
396 BUG_ON(pfp->scheduled == t && is_queued(t));
397 if (is_queued(t))
398 fp_prio_remove(&pfp->ready_queue, t, priority_index(t));
399}
400
401static void fp_set_prio_inh(pfp_domain_t* pfp, struct task_struct* t,
402 struct task_struct* prio_inh)
403{
404 int requeue;
405
406 if (!t || t->rt_param.inh_task == prio_inh) {
407 /* no update required */
408 if (t)
409 TRACE_TASK(t, "no prio-inh update required\n");
410 return;
411 }
412
413 requeue = is_queued(t);
414 TRACE_TASK(t, "prio-inh: is_queued:%d\n", requeue);
415
416 if (requeue)
417 /* first remove */
418 fp_dequeue(pfp, t);
419
420 t->rt_param.inh_task = prio_inh;
421
422 if (requeue)
423 /* add again to the right queue */
424 fp_prio_add(&pfp->ready_queue, t, priority_index(t));
425}
426
427static int effective_agent_priority(int prio)
428{
429 /* make sure agents have higher priority */
430 return prio - LITMUS_MAX_PRIORITY;
431}
432
433static lt_t prio_point(int eprio)
434{
435 /* make sure we have non-negative prio points */
436 return eprio + LITMUS_MAX_PRIORITY;
437}
438
439static int prio_from_point(lt_t prio_point)
440{
441 return ((int) prio_point) - LITMUS_MAX_PRIORITY;
442}
443
444static void boost_priority(struct task_struct* t, lt_t priority_point)
445{
446 unsigned long flags;
447 pfp_domain_t* pfp = task_pfp(t);
448
449 raw_spin_lock_irqsave(&pfp->slock, flags);
450
451
452 TRACE_TASK(t, "priority boosted at %llu\n", litmus_clock());
453
454 tsk_rt(t)->priority_boosted = 1;
455 /* tie-break by protocol-specific priority point */
456 tsk_rt(t)->boost_start_time = priority_point;
457
458 if (pfp->scheduled != t) {
459 /* holder may be queued: first stop queue changes */
460 raw_spin_lock(&pfp->domain.release_lock);
461 if (is_queued(t) &&
462 /* If it is queued, then we need to re-order. */
463 bheap_decrease(fp_ready_order, tsk_rt(t)->heap_node) &&
464 /* If we bubbled to the top, then we need to check for preemptions. */
465 fp_preemption_needed(&pfp->ready_queue, pfp->scheduled))
466 preempt(pfp);
467 raw_spin_unlock(&pfp->domain.release_lock);
468 } /* else: nothing to do since the job is not queued while scheduled */
469
470 raw_spin_unlock_irqrestore(&pfp->slock, flags);
471}
472
473static void unboost_priority(struct task_struct* t)
474{
475 unsigned long flags;
476 pfp_domain_t* pfp = task_pfp(t);
477 lt_t now;
478
479 raw_spin_lock_irqsave(&pfp->slock, flags);
480 now = litmus_clock();
481
482 /* assumption: this only happens when the job is scheduled */
483 BUG_ON(pfp->scheduled != t);
484
485 TRACE_TASK(t, "priority restored at %llu\n", now);
486
487 /* priority boosted jobs must be scheduled */
488 BUG_ON(pfp->scheduled != t);
489
490 tsk_rt(t)->priority_boosted = 0;
491 tsk_rt(t)->boost_start_time = 0;
492
493 /* check if this changes anything */
494 if (fp_preemption_needed(&pfp->ready_queue, pfp->scheduled))
495 preempt(pfp);
496
497 raw_spin_unlock_irqrestore(&pfp->slock, flags);
498}
499
500/* ******************** SRP support ************************ */
501
502static unsigned int pfp_get_srp_prio(struct task_struct* t)
503{
504 return get_priority(t);
505}
506
507/* ******************** FMLP support ********************** */
508
509struct fmlp_semaphore {
510 struct litmus_lock litmus_lock;
511
512 /* current resource holder */
513 struct task_struct *owner;
514
515 /* FIFO queue of waiting tasks */
516 wait_queue_head_t wait;
517};
518
519static inline struct fmlp_semaphore* fmlp_from_lock(struct litmus_lock* lock)
520{
521 return container_of(lock, struct fmlp_semaphore, litmus_lock);
522}
523int pfp_fmlp_lock(struct litmus_lock* l)
524{
525 struct task_struct* t = current;
526 struct fmlp_semaphore *sem = fmlp_from_lock(l);
527 wait_queue_t wait;
528 unsigned long flags;
529 lt_t time_of_request;
530
531 if (!is_realtime(t))
532 return -EPERM;
533
534 spin_lock_irqsave(&sem->wait.lock, flags);
535
536 /* tie-break by this point in time */
537 time_of_request = litmus_clock();
538
539 /* Priority-boost ourself *before* we suspend so that
540 * our priority is boosted when we resume. */
541 boost_priority(t, time_of_request);
542
543 if (sem->owner) {
544 /* resource is not free => must suspend and wait */
545
546 init_waitqueue_entry(&wait, t);
547
548 /* FIXME: interruptible would be nice some day */
549 set_task_state(t, TASK_UNINTERRUPTIBLE);
550
551 __add_wait_queue_tail_exclusive(&sem->wait, &wait);
552
553 TS_LOCK_SUSPEND;
554
555 /* release lock before sleeping */
556 spin_unlock_irqrestore(&sem->wait.lock, flags);
557
558 /* We depend on the FIFO order. Thus, we don't need to recheck
559 * when we wake up; we are guaranteed to have the lock since
560 * there is only one wake up per release.
561 */
562
563 schedule();
564
565 TS_LOCK_RESUME;
566
567 /* Since we hold the lock, no other task will change
568 * ->owner. We can thus check it without acquiring the spin
569 * lock. */
570 BUG_ON(sem->owner != t);
571 } else {
572 /* it's ours now */
573 sem->owner = t;
574
575 spin_unlock_irqrestore(&sem->wait.lock, flags);
576 }
577
578 return 0;
579}
580
581int pfp_fmlp_unlock(struct litmus_lock* l)
582{
583 struct task_struct *t = current, *next;
584 struct fmlp_semaphore *sem = fmlp_from_lock(l);
585 unsigned long flags;
586 int err = 0;
587
588 spin_lock_irqsave(&sem->wait.lock, flags);
589
590 if (sem->owner != t) {
591 err = -EINVAL;
592 goto out;
593 }
594
595 /* we lose the benefit of priority boosting */
596
597 unboost_priority(t);
598
599 /* check if there are jobs waiting for this resource */
600 next = __waitqueue_remove_first(&sem->wait);
601 if (next) {
602 /* next becomes the resouce holder */
603 sem->owner = next;
604
605 /* Wake up next. The waiting job is already priority-boosted. */
606 wake_up_process(next);
607 } else
608 /* resource becomes available */
609 sem->owner = NULL;
610
611out:
612 spin_unlock_irqrestore(&sem->wait.lock, flags);
613 return err;
614}
615
616int pfp_fmlp_close(struct litmus_lock* l)
617{
618 struct task_struct *t = current;
619 struct fmlp_semaphore *sem = fmlp_from_lock(l);
620 unsigned long flags;
621
622 int owner;
623
624 spin_lock_irqsave(&sem->wait.lock, flags);
625
626 owner = sem->owner == t;
627
628 spin_unlock_irqrestore(&sem->wait.lock, flags);
629
630 if (owner)
631 pfp_fmlp_unlock(l);
632
633 return 0;
634}
635
636void pfp_fmlp_free(struct litmus_lock* lock)
637{
638 kfree(fmlp_from_lock(lock));
639}
640
641static struct litmus_lock_ops pfp_fmlp_lock_ops = {
642 .close = pfp_fmlp_close,
643 .lock = pfp_fmlp_lock,
644 .unlock = pfp_fmlp_unlock,
645 .deallocate = pfp_fmlp_free,
646};
647
648static struct litmus_lock* pfp_new_fmlp(void)
649{
650 struct fmlp_semaphore* sem;
651
652 sem = kmalloc(sizeof(*sem), GFP_KERNEL);
653 if (!sem)
654 return NULL;
655
656 sem->owner = NULL;
657 init_waitqueue_head(&sem->wait);
658 sem->litmus_lock.ops = &pfp_fmlp_lock_ops;
659
660 return &sem->litmus_lock;
661}
662
663/* ******************** MPCP support ********************** */
664
665struct mpcp_semaphore {
666 struct litmus_lock litmus_lock;
667
668 /* current resource holder */
669 struct task_struct *owner;
670
671 /* priority queue of waiting tasks */
672 wait_queue_head_t wait;
673
674 /* priority ceiling per cpu */
675 unsigned int prio_ceiling[NR_CPUS];
676
677 /* should jobs spin "virtually" for this resource? */
678 int vspin;
679};
680
681#define OMEGA_CEILING UINT_MAX
682
683/* Since jobs spin "virtually" while waiting to acquire a lock,
684 * they first must aquire a local per-cpu resource.
685 */
686static DEFINE_PER_CPU(wait_queue_head_t, mpcpvs_vspin_wait);
687static DEFINE_PER_CPU(struct task_struct*, mpcpvs_vspin);
688
689/* called with preemptions off <=> no local modifications */
690static void mpcp_vspin_enter(void)
691{
692 struct task_struct* t = current;
693
694 while (1) {
695 if (__get_cpu_var(mpcpvs_vspin) == NULL) {
696 /* good, we get to issue our request */
697 __get_cpu_var(mpcpvs_vspin) = t;
698 break;
699 } else {
700 /* some job is spinning => enqueue in request queue */
701 prio_wait_queue_t wait;
702 wait_queue_head_t* vspin = &__get_cpu_var(mpcpvs_vspin_wait);
703 unsigned long flags;
704
705 /* ordered by regular priority */
706 init_prio_waitqueue_entry(&wait, t, prio_point(get_priority(t)));
707
708 spin_lock_irqsave(&vspin->lock, flags);
709
710 set_task_state(t, TASK_UNINTERRUPTIBLE);
711
712 __add_wait_queue_prio_exclusive(vspin, &wait);
713
714 spin_unlock_irqrestore(&vspin->lock, flags);
715
716 TS_LOCK_SUSPEND;
717
718 preempt_enable_no_resched();
719
720 schedule();
721
722 preempt_disable();
723
724 TS_LOCK_RESUME;
725 /* Recheck if we got it --- some higher-priority process might
726 * have swooped in. */
727 }
728 }
729 /* ok, now it is ours */
730}
731
732/* called with preemptions off */
733static void mpcp_vspin_exit(void)
734{
735 struct task_struct* t = current, *next;
736 unsigned long flags;
737 wait_queue_head_t* vspin = &__get_cpu_var(mpcpvs_vspin_wait);
738
739 BUG_ON(__get_cpu_var(mpcpvs_vspin) != t);
740
741 /* no spinning job */
742 __get_cpu_var(mpcpvs_vspin) = NULL;
743
744 /* see if anyone is waiting for us to stop "spinning" */
745 spin_lock_irqsave(&vspin->lock, flags);
746 next = __waitqueue_remove_first(vspin);
747
748 if (next)
749 wake_up_process(next);
750
751 spin_unlock_irqrestore(&vspin->lock, flags);
752}
753
754static inline struct mpcp_semaphore* mpcp_from_lock(struct litmus_lock* lock)
755{
756 return container_of(lock, struct mpcp_semaphore, litmus_lock);
757}
758
759int pfp_mpcp_lock(struct litmus_lock* l)
760{
761 struct task_struct* t = current;
762 struct mpcp_semaphore *sem = mpcp_from_lock(l);
763 prio_wait_queue_t wait;
764 unsigned long flags;
765
766 if (!is_realtime(t))
767 return -EPERM;
768
769 preempt_disable();
770
771 if (sem->vspin)
772 mpcp_vspin_enter();
773
774 /* Priority-boost ourself *before* we suspend so that
775 * our priority is boosted when we resume. Use the priority
776 * ceiling for the local partition. */
777 boost_priority(t, sem->prio_ceiling[get_partition(t)]);
778
779 spin_lock_irqsave(&sem->wait.lock, flags);
780
781 preempt_enable_no_resched();
782
783 if (sem->owner) {
784 /* resource is not free => must suspend and wait */
785
786 /* ordered by regular priority */
787 init_prio_waitqueue_entry(&wait, t, prio_point(get_priority(t)));
788
789 /* FIXME: interruptible would be nice some day */
790 set_task_state(t, TASK_UNINTERRUPTIBLE);
791
792 __add_wait_queue_prio_exclusive(&sem->wait, &wait);
793
794 TS_LOCK_SUSPEND;
795
796 /* release lock before sleeping */
797 spin_unlock_irqrestore(&sem->wait.lock, flags);
798
799 /* We depend on the FIFO order. Thus, we don't need to recheck
800 * when we wake up; we are guaranteed to have the lock since
801 * there is only one wake up per release.
802 */
803
804 schedule();
805
806 TS_LOCK_RESUME;
807
808 /* Since we hold the lock, no other task will change
809 * ->owner. We can thus check it without acquiring the spin
810 * lock. */
811 BUG_ON(sem->owner != t);
812 } else {
813 /* it's ours now */
814 sem->owner = t;
815
816 spin_unlock_irqrestore(&sem->wait.lock, flags);
817 }
818
819 return 0;
820}
821
822int pfp_mpcp_unlock(struct litmus_lock* l)
823{
824 struct task_struct *t = current, *next;
825 struct mpcp_semaphore *sem = mpcp_from_lock(l);
826 unsigned long flags;
827 int err = 0;
828
829 spin_lock_irqsave(&sem->wait.lock, flags);
830
831 if (sem->owner != t) {
832 err = -EINVAL;
833 goto out;
834 }
835
836 /* we lose the benefit of priority boosting */
837
838 unboost_priority(t);
839
840 /* check if there are jobs waiting for this resource */
841 next = __waitqueue_remove_first(&sem->wait);
842 if (next) {
843 /* next becomes the resouce holder */
844 sem->owner = next;
845
846 /* Wake up next. The waiting job is already priority-boosted. */
847 wake_up_process(next);
848 } else
849 /* resource becomes available */
850 sem->owner = NULL;
851
852out:
853 spin_unlock_irqrestore(&sem->wait.lock, flags);
854
855 if (sem->vspin && err == 0) {
856 preempt_disable();
857 mpcp_vspin_exit();
858 preempt_enable();
859 }
860
861 return err;
862}
863
864int pfp_mpcp_open(struct litmus_lock* l, void* config)
865{
866 struct task_struct *t = current;
867 struct mpcp_semaphore *sem = mpcp_from_lock(l);
868 int cpu, local_cpu;
869 unsigned long flags;
870
871 if (!is_realtime(t))
872 /* we need to know the real-time priority */
873 return -EPERM;
874
875 local_cpu = get_partition(t);
876
877 spin_lock_irqsave(&sem->wait.lock, flags);
878
879 for (cpu = 0; cpu < NR_CPUS; cpu++)
880 if (cpu != local_cpu)
881 {
882 sem->prio_ceiling[cpu] = min(sem->prio_ceiling[cpu],
883 get_priority(t));
884 TRACE_CUR("priority ceiling for sem %p is now %d on cpu %d\n",
885 sem, sem->prio_ceiling[cpu], cpu);
886 }
887
888 spin_unlock_irqrestore(&sem->wait.lock, flags);
889
890 return 0;
891}
892
893int pfp_mpcp_close(struct litmus_lock* l)
894{
895 struct task_struct *t = current;
896 struct mpcp_semaphore *sem = mpcp_from_lock(l);
897 unsigned long flags;
898
899 int owner;
900
901 spin_lock_irqsave(&sem->wait.lock, flags);
902
903 owner = sem->owner == t;
904
905 spin_unlock_irqrestore(&sem->wait.lock, flags);
906
907 if (owner)
908 pfp_mpcp_unlock(l);
909
910 return 0;
911}
912
913void pfp_mpcp_free(struct litmus_lock* lock)
914{
915 kfree(mpcp_from_lock(lock));
916}
917
918static struct litmus_lock_ops pfp_mpcp_lock_ops = {
919 .close = pfp_mpcp_close,
920 .lock = pfp_mpcp_lock,
921 .open = pfp_mpcp_open,
922 .unlock = pfp_mpcp_unlock,
923 .deallocate = pfp_mpcp_free,
924};
925
926static struct litmus_lock* pfp_new_mpcp(int vspin)
927{
928 struct mpcp_semaphore* sem;
929 int cpu;
930
931 sem = kmalloc(sizeof(*sem), GFP_KERNEL);
932 if (!sem)
933 return NULL;
934
935 sem->owner = NULL;
936 init_waitqueue_head(&sem->wait);
937 sem->litmus_lock.ops = &pfp_mpcp_lock_ops;
938
939 for (cpu = 0; cpu < NR_CPUS; cpu++)
940 sem->prio_ceiling[cpu] = OMEGA_CEILING;
941
942 /* mark as virtual spinning */
943 sem->vspin = vspin;
944
945 return &sem->litmus_lock;
946}
947
948
949/* ******************** PCP support ********************** */
950
951
952struct pcp_semaphore {
953 struct list_head ceiling;
954
955 /* current resource holder */
956 struct task_struct *owner;
957
958 /* priority ceiling --- can be negative due to DPCP support */
959 int prio_ceiling;
960
961 /* on which processor is this PCP semaphore allocated? */
962 int on_cpu;
963};
964
965struct pcp_state {
966 struct list_head system_ceiling;
967
968 /* highest-priority waiting task */
969 struct task_struct* hp_waiter;
970
971 /* list of jobs waiting to get past the system ceiling */
972 wait_queue_head_t ceiling_blocked;
973};
974
975static void pcp_init_state(struct pcp_state* s)
976{
977 INIT_LIST_HEAD(&s->system_ceiling);
978 s->hp_waiter = NULL;
979 init_waitqueue_head(&s->ceiling_blocked);
980}
981
982static DEFINE_PER_CPU(struct pcp_state, pcp_state);
983
984/* assumes preemptions are off */
985static struct pcp_semaphore* pcp_get_ceiling(void)
986{
987 struct list_head* top = __get_cpu_var(pcp_state).system_ceiling.next;
988
989 if (top)
990 return list_entry(top, struct pcp_semaphore, ceiling);
991 else
992 return NULL;
993}
994
995/* assumes preempt off */
996static void pcp_add_ceiling(struct pcp_semaphore* sem)
997{
998 struct list_head *pos;
999 struct list_head *in_use = &__get_cpu_var(pcp_state).system_ceiling;
1000 struct pcp_semaphore* held;
1001
1002 BUG_ON(sem->on_cpu != smp_processor_id());
1003 BUG_ON(in_list(&sem->ceiling));
1004
1005 list_for_each(pos, in_use) {
1006 held = list_entry(pos, struct pcp_semaphore, ceiling);
1007 if (held->prio_ceiling >= sem->prio_ceiling) {
1008 __list_add(&sem->ceiling, pos->prev, pos);
1009 return;
1010 }
1011 }
1012
1013 /* we hit the end of the list */
1014
1015 list_add_tail(&sem->ceiling, in_use);
1016}
1017
1018/* assumes preempt off */
1019static int pcp_exceeds_ceiling(struct pcp_semaphore* ceiling,
1020 struct task_struct* task,
1021 int effective_prio)
1022{
1023 return ceiling == NULL ||
1024 ceiling->prio_ceiling > effective_prio ||
1025 ceiling->owner == task;
1026}
1027
1028/* assumes preempt off */
1029static void pcp_priority_inheritance(void)
1030{
1031 unsigned long flags;
1032 pfp_domain_t* pfp = local_pfp;
1033
1034 struct pcp_semaphore* ceiling = pcp_get_ceiling();
1035 struct task_struct *blocker, *blocked;
1036
1037 blocker = ceiling ? ceiling->owner : NULL;
1038 blocked = __get_cpu_var(pcp_state).hp_waiter;
1039
1040 raw_spin_lock_irqsave(&pfp->slock, flags);
1041
1042 /* Current is no longer inheriting anything by default. This should be
1043 * the currently scheduled job, and hence not currently queued. */
1044 BUG_ON(current != pfp->scheduled);
1045
1046 fp_set_prio_inh(pfp, current, NULL);
1047 fp_set_prio_inh(pfp, blocked, NULL);
1048 fp_set_prio_inh(pfp, blocker, NULL);
1049
1050
1051 /* Let blocking job inherit priority of blocked job, if required. */
1052 if (blocker && blocked &&
1053 fp_higher_prio(blocked, blocker)) {
1054 TRACE_TASK(blocker, "PCP inherits from %s/%d (prio %u -> %u) \n",
1055 blocked->comm, blocked->pid,
1056 get_priority(blocker), get_priority(blocked));
1057 fp_set_prio_inh(pfp, blocker, blocked);
1058 }
1059
1060 /* check if anything changed */
1061 if (fp_higher_prio(fp_prio_peek(&pfp->ready_queue), pfp->scheduled))
1062 preempt(pfp);
1063
1064 raw_spin_unlock_irqrestore(&pfp->slock, flags);
1065}
1066
1067/* called with preemptions off */
1068static void pcp_raise_ceiling(struct pcp_semaphore* sem,
1069 int effective_prio)
1070{
1071 struct task_struct* t = current;
1072 struct pcp_semaphore* ceiling;
1073 prio_wait_queue_t wait;
1074 unsigned int waiting_higher_prio;
1075
1076 do {
1077 ceiling = pcp_get_ceiling();
1078 if (pcp_exceeds_ceiling(ceiling, t, effective_prio))
1079 break;
1080
1081 TRACE_CUR("PCP ceiling-blocked, wanted sem %p, but %s/%d has the ceiling \n",
1082 sem, ceiling->owner->comm, ceiling->owner->pid);
1083
1084 /* we need to wait until the ceiling is lowered */
1085
1086 /* enqueue in priority order */
1087 init_prio_waitqueue_entry(&wait, t, prio_point(effective_prio));
1088 set_task_state(t, TASK_UNINTERRUPTIBLE);
1089 waiting_higher_prio = add_wait_queue_prio_exclusive(
1090 &__get_cpu_var(pcp_state).ceiling_blocked, &wait);
1091
1092 if (waiting_higher_prio == 0) {
1093 TRACE_CUR("PCP new highest-prio waiter => prio inheritance\n");
1094
1095 /* we are the new highest-priority waiting job
1096 * => update inheritance */
1097 __get_cpu_var(pcp_state).hp_waiter = t;
1098 pcp_priority_inheritance();
1099 }
1100
1101 TS_LOCK_SUSPEND;
1102
1103 preempt_enable_no_resched();
1104 schedule();
1105 preempt_disable();
1106
1107 /* pcp_resume_unblocked() removed us from wait queue */
1108
1109 TS_LOCK_RESUME;
1110 } while(1);
1111
1112 TRACE_CUR("PCP got the ceiling and sem %p\n", sem);
1113
1114 /* We are good to go. The semaphore should be available. */
1115 BUG_ON(sem->owner != NULL);
1116
1117 sem->owner = t;
1118
1119 pcp_add_ceiling(sem);
1120}
1121
1122static void pcp_resume_unblocked(void)
1123{
1124 wait_queue_head_t *blocked = &__get_cpu_var(pcp_state).ceiling_blocked;
1125 unsigned long flags;
1126 prio_wait_queue_t* q;
1127 struct task_struct* t = NULL;
1128
1129 struct pcp_semaphore* ceiling = pcp_get_ceiling();
1130
1131 spin_lock_irqsave(&blocked->lock, flags);
1132
1133 while (waitqueue_active(blocked)) {
1134 /* check first == highest-priority waiting job */
1135 q = list_entry(blocked->task_list.next,
1136 prio_wait_queue_t, wq.task_list);
1137 t = (struct task_struct*) q->wq.private;
1138
1139 /* can it proceed now? => let it go */
1140 if (pcp_exceeds_ceiling(ceiling, t,
1141 prio_from_point(q->priority))) {
1142 __remove_wait_queue(blocked, &q->wq);
1143 wake_up_process(t);
1144 } else {
1145 /* We are done. Update highest-priority waiter. */
1146 __get_cpu_var(pcp_state).hp_waiter = t;
1147 goto out;
1148 }
1149 }
1150 /* If we get here, then there are no more waiting
1151 * jobs. */
1152 __get_cpu_var(pcp_state).hp_waiter = NULL;
1153out:
1154 spin_unlock_irqrestore(&blocked->lock, flags);
1155}
1156
1157/* assumes preempt off */
1158static void pcp_lower_ceiling(struct pcp_semaphore* sem)
1159{
1160 BUG_ON(!in_list(&sem->ceiling));
1161 BUG_ON(sem->owner != current);
1162 BUG_ON(sem->on_cpu != smp_processor_id());
1163
1164 /* remove from ceiling list */
1165 list_del(&sem->ceiling);
1166
1167 /* release */
1168 sem->owner = NULL;
1169
1170 TRACE_CUR("PCP released sem %p\n", sem);
1171
1172 /* Wake up all ceiling-blocked jobs that now pass the ceiling. */
1173 pcp_resume_unblocked();
1174
1175 pcp_priority_inheritance();
1176}
1177
1178static void pcp_update_prio_ceiling(struct pcp_semaphore* sem,
1179 int effective_prio)
1180{
1181 /* This needs to be synchronized on something.
1182 * Might as well use waitqueue lock for the processor.
1183 * We assume this happens only before the task set starts execution,
1184 * (i.e., during initialization), but it may happen on multiple processors
1185 * at the same time.
1186 */
1187 unsigned long flags;
1188
1189 struct pcp_state* s = &per_cpu(pcp_state, sem->on_cpu);
1190
1191 spin_lock_irqsave(&s->ceiling_blocked.lock, flags);
1192
1193 sem->prio_ceiling = min(sem->prio_ceiling, effective_prio);
1194
1195 spin_unlock_irqrestore(&s->ceiling_blocked.lock, flags);
1196}
1197
1198static void pcp_init_semaphore(struct pcp_semaphore* sem, int cpu)
1199{
1200 sem->owner = NULL;
1201 INIT_LIST_HEAD(&sem->ceiling);
1202 sem->prio_ceiling = INT_MAX;
1203 sem->on_cpu = cpu;
1204}
1205
1206
1207/* ******************** DPCP support ********************** */
1208
1209struct dpcp_semaphore {
1210 struct litmus_lock litmus_lock;
1211 struct pcp_semaphore pcp;
1212 int owner_cpu;
1213};
1214
1215static inline struct dpcp_semaphore* dpcp_from_lock(struct litmus_lock* lock)
1216{
1217 return container_of(lock, struct dpcp_semaphore, litmus_lock);
1218}
1219
1220/* called with preemptions disabled */
1221static void pfp_migrate_to(int target_cpu)
1222{
1223 struct task_struct* t = current;
1224 pfp_domain_t *from;
1225
1226 if (get_partition(t) == target_cpu)
1227 return;
1228
1229 /* make sure target_cpu makes sense */
1230 BUG_ON(!cpu_online(target_cpu));
1231
1232 local_irq_disable();
1233
1234 /* scheduled task should not be in any ready or release queue */
1235 BUG_ON(is_queued(t));
1236
1237 /* lock both pfp domains in order of address */
1238 from = task_pfp(t);
1239
1240 raw_spin_lock(&from->slock);
1241
1242 /* switch partitions */
1243 tsk_rt(t)->task_params.cpu = target_cpu;
1244
1245 raw_spin_unlock(&from->slock);
1246
1247 /* Don't trace scheduler costs as part of
1248 * locking overhead. Scheduling costs are accounted for
1249 * explicitly. */
1250 TS_LOCK_SUSPEND;
1251
1252 local_irq_enable();
1253 preempt_enable_no_resched();
1254
1255 /* deschedule to be migrated */
1256 schedule();
1257
1258 /* we are now on the target processor */
1259 preempt_disable();
1260
1261 /* start recording costs again */
1262 TS_LOCK_RESUME;
1263
1264 BUG_ON(smp_processor_id() != target_cpu);
1265}
1266
1267int pfp_dpcp_lock(struct litmus_lock* l)
1268{
1269 struct task_struct* t = current;
1270 struct dpcp_semaphore *sem = dpcp_from_lock(l);
1271 int eprio = effective_agent_priority(get_priority(t));
1272 int from = get_partition(t);
1273 int to = sem->pcp.on_cpu;
1274
1275 if (!is_realtime(t))
1276 return -EPERM;
1277
1278 preempt_disable();
1279
1280 /* Priority-boost ourself *before* we suspend so that
1281 * our priority is boosted when we resume. */
1282
1283 boost_priority(t, get_priority(t));
1284
1285 pfp_migrate_to(to);
1286
1287 pcp_raise_ceiling(&sem->pcp, eprio);
1288
1289 /* yep, we got it => execute request */
1290 sem->owner_cpu = from;
1291
1292 preempt_enable();
1293
1294 return 0;
1295}
1296
1297int pfp_dpcp_unlock(struct litmus_lock* l)
1298{
1299 struct task_struct *t = current;
1300 struct dpcp_semaphore *sem = dpcp_from_lock(l);
1301 int err = 0;
1302 int home;
1303
1304 preempt_disable();
1305
1306 if (sem->pcp.on_cpu != smp_processor_id() || sem->pcp.owner != t) {
1307 err = -EINVAL;
1308 goto out;
1309 }
1310
1311 home = sem->owner_cpu;
1312
1313 /* give it back */
1314 pcp_lower_ceiling(&sem->pcp);
1315
1316 /* we lose the benefit of priority boosting */
1317 unboost_priority(t);
1318
1319 pfp_migrate_to(home);
1320
1321out:
1322 preempt_enable();
1323
1324 return err;
1325}
1326
1327int pfp_dpcp_open(struct litmus_lock* l, void* __user config)
1328{
1329 struct task_struct *t = current;
1330 struct dpcp_semaphore *sem = dpcp_from_lock(l);
1331 int cpu, eprio;
1332
1333 if (!is_realtime(t))
1334 /* we need to know the real-time priority */
1335 return -EPERM;
1336
1337 if (get_user(cpu, (int*) config))
1338 return -EFAULT;
1339
1340 /* make sure the resource location matches */
1341 if (cpu != sem->pcp.on_cpu)
1342 return -EINVAL;
1343
1344 eprio = effective_agent_priority(get_priority(t));
1345
1346 pcp_update_prio_ceiling(&sem->pcp, eprio);
1347
1348 return 0;
1349}
1350
1351int pfp_dpcp_close(struct litmus_lock* l)
1352{
1353 struct task_struct *t = current;
1354 struct dpcp_semaphore *sem = dpcp_from_lock(l);
1355 int owner = 0;
1356
1357 preempt_disable();
1358
1359 if (sem->pcp.on_cpu == smp_processor_id())
1360 owner = sem->pcp.owner == t;
1361
1362 preempt_enable();
1363
1364 if (owner)
1365 pfp_dpcp_unlock(l);
1366
1367 return 0;
1368}
1369
1370void pfp_dpcp_free(struct litmus_lock* lock)
1371{
1372 kfree(dpcp_from_lock(lock));
1373}
1374
1375static struct litmus_lock_ops pfp_dpcp_lock_ops = {
1376 .close = pfp_dpcp_close,
1377 .lock = pfp_dpcp_lock,
1378 .open = pfp_dpcp_open,
1379 .unlock = pfp_dpcp_unlock,
1380 .deallocate = pfp_dpcp_free,
1381};
1382
1383static struct litmus_lock* pfp_new_dpcp(int on_cpu)
1384{
1385 struct dpcp_semaphore* sem;
1386
1387 sem = kmalloc(sizeof(*sem), GFP_KERNEL);
1388 if (!sem)
1389 return NULL;
1390
1391 sem->litmus_lock.ops = &pfp_dpcp_lock_ops;
1392 sem->owner_cpu = NO_CPU;
1393 pcp_init_semaphore(&sem->pcp, on_cpu);
1394
1395 return &sem->litmus_lock;
1396}
1397
1398
1399/* **** lock constructor **** */
1400
1401
1402static long pfp_allocate_lock(struct litmus_lock **lock, int type,
1403 void* __user config)
1404{
1405 int err = -ENXIO, cpu;
1406 struct srp_semaphore* srp;
1407
1408 /* P-FP currently supports the SRP for local resources and the FMLP
1409 * for global resources. */
1410 switch (type) {
1411 case FMLP_SEM:
1412 /* FIFO Mutex Locking Protocol */
1413 *lock = pfp_new_fmlp();
1414 if (*lock)
1415 err = 0;
1416 else
1417 err = -ENOMEM;
1418 break;
1419
1420 case MPCP_SEM:
1421 /* Multiprocesor Priority Ceiling Protocol */
1422 *lock = pfp_new_mpcp(0);
1423 if (*lock)
1424 err = 0;
1425 else
1426 err = -ENOMEM;
1427 break;
1428
1429 case MPCP_VS_SEM:
1430 /* Multiprocesor Priority Ceiling Protocol with virtual spinning */
1431 *lock = pfp_new_mpcp(1);
1432 if (*lock)
1433 err = 0;
1434 else
1435 err = -ENOMEM;
1436 break;
1437
1438 case DPCP_SEM:
1439 /* Distributed Priority Ceiling Protocol */
1440 if (get_user(cpu, (int*) config))
1441 return -EFAULT;
1442
1443 if (!cpu_online(cpu))
1444 return -EINVAL;
1445
1446 *lock = pfp_new_dpcp(cpu);
1447 if (*lock)
1448 err = 0;
1449 else
1450 err = -ENOMEM;
1451 break;
1452
1453 case SRP_SEM:
1454 /* Baker's Stack Resource Policy */
1455 srp = allocate_srp_semaphore();
1456 if (srp) {
1457 *lock = &srp->litmus_lock;
1458 err = 0;
1459 } else
1460 err = -ENOMEM;
1461 break;
1462 };
1463
1464 return err;
1465}
1466
1467#endif
1468
1469static long pfp_admit_task(struct task_struct* tsk)
1470{
1471 if (task_cpu(tsk) == tsk->rt_param.task_params.cpu &&
1472#ifdef CONFIG_RELEASE_MASTER
1473 /* don't allow tasks on release master CPU */
1474 task_cpu(tsk) != remote_dom(task_cpu(tsk))->release_master &&
1475#endif
1476 get_priority(tsk) > 0)
1477 return 0;
1478 else
1479 return -EINVAL;
1480}
1481
1482static long pfp_activate_plugin(void)
1483{
1484#ifdef CONFIG_RELEASE_MASTER
1485 int cpu;
1486
1487 for_each_online_cpu(cpu) {
1488 remote_dom(cpu)->release_master = atomic_read(&release_master_cpu);
1489 }
1490#endif
1491
1492#ifdef CONFIG_LITMUS_LOCKING
1493 get_srp_prio = pfp_get_srp_prio;
1494
1495 for_each_online_cpu(cpu) {
1496 init_waitqueue_head(&per_cpu(mpcpvs_vspin_wait, cpu));
1497 per_cpu(mpcpvs_vspin, cpu) = NULL;
1498
1499 pcp_init_state(&per_cpu(pcp_state, cpu));
1500 pfp_doms[cpu] = remote_pfp(cpu);
1501 }
1502
1503#endif
1504
1505 return 0;
1506}
1507
1508
1509/* Plugin object */
1510static struct sched_plugin pfp_plugin __cacheline_aligned_in_smp = {
1511 .plugin_name = "P-FP",
1512 .tick = pfp_tick,
1513 .task_new = pfp_task_new,
1514 .complete_job = complete_job,
1515 .task_exit = pfp_task_exit,
1516 .schedule = pfp_schedule,
1517 .task_wake_up = pfp_task_wake_up,
1518 .task_block = pfp_task_block,
1519 .admit_task = pfp_admit_task,
1520 .activate_plugin = pfp_activate_plugin,
1521#ifdef CONFIG_LITMUS_LOCKING
1522 .allocate_lock = pfp_allocate_lock,
1523 .finish_switch = pfp_finish_switch,
1524#endif
1525};
1526
1527
1528static int __init init_pfp(void)
1529{
1530 int i;
1531
1532 /* We do not really want to support cpu hotplug, do we? ;)
1533 * However, if we are so crazy to do so,
1534 * we cannot use num_online_cpu()
1535 */
1536 for (i = 0; i < num_online_cpus(); i++) {
1537 pfp_domain_init(remote_pfp(i), i);
1538 }
1539 return register_sched_plugin(&pfp_plugin);
1540}
1541
1542module_init(init_pfp);
1543