diff options
author | Andrea Bastoni <bastoni@cs.unc.edu> | 2010-09-22 17:18:07 -0400 |
---|---|---|
committer | Andrea Bastoni <bastoni@cs.unc.edu> | 2010-09-22 17:18:07 -0400 |
commit | 44c034799bb7a4e8090b5460e36c3993d1924006 (patch) | |
tree | 4a1299358bf7effd2745c0e14bc49e52c67ac197 | |
parent | f49ec60b4d6394cf199837bbe4c10258e5a877c7 (diff) | |
parent | 870eaa144c59ee2665b70dcbb9d8649e99e3c0c4 (diff) |
Merge branch 'wip-edf-wm' into wip-semi-part
This version of litmus2010 contains the implementation of three
semi-partitioned scheduling algorithms: EDF-fm, EDF-WM, and NPS-F.
Conflicts:
include/litmus/rt_param.h
litmus/Makefile
-rw-r--r-- | include/litmus/rt_domain.h | 16 | ||||
-rw-r--r-- | include/litmus/rt_param.h | 66 | ||||
-rw-r--r-- | litmus/Makefile | 3 | ||||
-rw-r--r-- | litmus/litmus.c | 7 | ||||
-rw-r--r-- | litmus/rt_domain.c | 40 | ||||
-rw-r--r-- | litmus/sched_edf_fm.c | 6 | ||||
-rw-r--r-- | litmus/sched_edf_wm.c | 664 |
7 files changed, 780 insertions, 22 deletions
diff --git a/include/litmus/rt_domain.h b/include/litmus/rt_domain.h index 59e6b54e9281..ac249292e866 100644 --- a/include/litmus/rt_domain.h +++ b/include/litmus/rt_domain.h | |||
@@ -143,12 +143,26 @@ static inline struct task_struct* take_ready(rt_domain_t* rt) | |||
143 | static inline void add_release(rt_domain_t* rt, struct task_struct *task) | 143 | static inline void add_release(rt_domain_t* rt, struct task_struct *task) |
144 | { | 144 | { |
145 | unsigned long flags; | 145 | unsigned long flags; |
146 | /* first we need the write lock for rt_ready_queue */ | ||
147 | raw_spin_lock_irqsave(&rt->tobe_lock, flags); | 146 | raw_spin_lock_irqsave(&rt->tobe_lock, flags); |
148 | __add_release(rt, task); | 147 | __add_release(rt, task); |
149 | raw_spin_unlock_irqrestore(&rt->tobe_lock, flags); | 148 | raw_spin_unlock_irqrestore(&rt->tobe_lock, flags); |
150 | } | 149 | } |
151 | 150 | ||
151 | #ifdef CONFIG_RELEASE_MASTER | ||
152 | void __add_release_on(rt_domain_t* rt, struct task_struct *task, | ||
153 | int target_cpu); | ||
154 | |||
155 | static inline void add_release_on(rt_domain_t* rt, | ||
156 | struct task_struct *task, | ||
157 | int target_cpu) | ||
158 | { | ||
159 | unsigned long flags; | ||
160 | raw_spin_lock_irqsave(&rt->tobe_lock, flags); | ||
161 | __add_release_on(rt, task, target_cpu); | ||
162 | raw_spin_unlock_irqrestore(&rt->tobe_lock, flags); | ||
163 | } | ||
164 | #endif | ||
165 | |||
152 | static inline int __jobs_pending(rt_domain_t* rt) | 166 | static inline int __jobs_pending(rt_domain_t* rt) |
153 | { | 167 | { |
154 | return !bheap_empty(&rt->ready_queue); | 168 | return !bheap_empty(&rt->ready_queue); |
diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h index be8f5528ff87..6f43deacb3e1 100644 --- a/include/litmus/rt_param.h +++ b/include/litmus/rt_param.h | |||
@@ -1,3 +1,5 @@ | |||
1 | #include <linux/threads.h> | ||
2 | |||
1 | /* | 3 | /* |
2 | * Definition of the scheduler plugin interface. | 4 | * Definition of the scheduler plugin interface. |
3 | * | 5 | * |
@@ -34,7 +36,7 @@ typedef enum { | |||
34 | } budget_policy_t; | 36 | } budget_policy_t; |
35 | 37 | ||
36 | 38 | ||
37 | /* Parameters for EDF-Fm scheduling algorithm. | 39 | /* The parameters for EDF-Fm scheduling algorithm. |
38 | * Each task may be fixed or migratory. Migratory tasks may | 40 | * Each task may be fixed or migratory. Migratory tasks may |
39 | * migrate on 2 (contiguous) CPU only. NR_CPUS_EDF_FM = 2. | 41 | * migrate on 2 (contiguous) CPU only. NR_CPUS_EDF_FM = 2. |
40 | */ | 42 | */ |
@@ -58,7 +60,7 @@ struct edffm_params { | |||
58 | lt_t fraction[2][NR_CPUS_EDF_FM]; | 60 | lt_t fraction[2][NR_CPUS_EDF_FM]; |
59 | }; | 61 | }; |
60 | 62 | ||
61 | /* NPS-F semi-part. plugin. | 63 | /* Parameters for NPS-F semi-partitioned scheduling algorithm. |
62 | * Each (cpu, budget) entry defines the share ('budget' in ns, a % of | 64 | * Each (cpu, budget) entry defines the share ('budget' in ns, a % of |
63 | * the slot_length) of the notional processor on the CPU 'cpu'. | 65 | * the slot_length) of the notional processor on the CPU 'cpu'. |
64 | * This structure is used by the library - syscall interface in order | 66 | * This structure is used by the library - syscall interface in order |
@@ -69,6 +71,36 @@ struct npsf_budgets { | |||
69 | lt_t budget; | 71 | lt_t budget; |
70 | }; | 72 | }; |
71 | 73 | ||
74 | /* The parameters for the EDF-WM semi-partitioned scheduler. | ||
75 | * Each task may be split across multiple cpus. Each per-cpu allocation | ||
76 | * is called a 'slice'. | ||
77 | */ | ||
78 | #define MAX_EDF_WM_SLICES 24 | ||
79 | #define MIN_EDF_WM_SLICE_SIZE 50000 /* .05 millisecond = 50us */ | ||
80 | |||
81 | struct edf_wm_slice { | ||
82 | /* on which CPU is this slice allocated */ | ||
83 | unsigned int cpu; | ||
84 | /* relative deadline from job release (not from slice release!) */ | ||
85 | lt_t deadline; | ||
86 | /* budget of this slice; must be precisely enforced */ | ||
87 | lt_t budget; | ||
88 | /* offset of this slice relative to the job release */ | ||
89 | lt_t offset; | ||
90 | }; | ||
91 | |||
92 | /* If a job is not sliced across multiple CPUs, then | ||
93 | * count is set to zero and none of the slices is used. | ||
94 | * This implies that count == 1 is illegal. | ||
95 | */ | ||
96 | struct edf_wm_params { | ||
97 | /* enumeration of all slices */ | ||
98 | struct edf_wm_slice slices[MAX_EDF_WM_SLICES]; | ||
99 | |||
100 | /* how many slices are defined? */ | ||
101 | unsigned int count; | ||
102 | }; | ||
103 | |||
72 | struct rt_task { | 104 | struct rt_task { |
73 | lt_t exec_cost; | 105 | lt_t exec_cost; |
74 | lt_t period; | 106 | lt_t period; |
@@ -81,11 +113,16 @@ struct rt_task { | |||
81 | union { | 113 | union { |
82 | /* EDF-Fm; defined in sched_edf_fm.c */ | 114 | /* EDF-Fm; defined in sched_edf_fm.c */ |
83 | struct edffm_params fm; | 115 | struct edffm_params fm; |
84 | /* NPS-F: id for the server (notional processor) that holds | 116 | |
117 | /* NPS-F; defined in sched_npsf.c | ||
118 | * id for the server (notional processor) that holds | ||
85 | * this task; the same npfs_id can be assigned to "the same" | 119 | * this task; the same npfs_id can be assigned to "the same" |
86 | * server split on different cpus | 120 | * server split on different cpus |
87 | * */ | 121 | */ |
88 | int npsf_id; | 122 | int npsf_id; |
123 | |||
124 | /* EDF-WM; defined in sched_edf_wm.c */ | ||
125 | struct edf_wm_params wm; | ||
89 | } semi_part; | 126 | } semi_part; |
90 | }; | 127 | }; |
91 | 128 | ||
@@ -234,12 +271,23 @@ struct rt_param { | |||
234 | 271 | ||
235 | /* runtime info for the semi-part plugins */ | 272 | /* runtime info for the semi-part plugins */ |
236 | union { | 273 | union { |
274 | /* EDF-Fm runtime information | ||
275 | * number of jobs handled by this cpu | ||
276 | * (to determine next cpu for a migrating task) | ||
277 | */ | ||
278 | unsigned int cpu_job_no[NR_CPUS_EDF_FM]; | ||
279 | |||
280 | /* EDF-WM runtime information */ | ||
237 | struct { | 281 | struct { |
238 | /* EDF-fm: number of jobs handled by this cpu | 282 | /* at which exec time did the current slice start? */ |
239 | * (to determine next cpu for a migrating task) | 283 | lt_t exec_time; |
240 | */ | 284 | /* when did the job suspend? */ |
241 | unsigned int cpu_job_no[NR_CPUS_EDF_FM]; | 285 | lt_t suspend_time; |
242 | } fm; | 286 | /* cached job parameters */ |
287 | lt_t job_release, job_deadline; | ||
288 | /* pointer to the current slice */ | ||
289 | struct edf_wm_slice* slice; | ||
290 | } wm; | ||
243 | } semi_part; | 291 | } semi_part; |
244 | }; | 292 | }; |
245 | 293 | ||
diff --git a/litmus/Makefile b/litmus/Makefile index 89c22198a53a..014c6c6ec590 100644 --- a/litmus/Makefile +++ b/litmus/Makefile | |||
@@ -15,7 +15,8 @@ obj-y = sched_plugin.o litmus.o \ | |||
15 | sched_gsn_edf.o \ | 15 | sched_gsn_edf.o \ |
16 | sched_psn_edf.o \ | 16 | sched_psn_edf.o \ |
17 | sched_npsf.o \ | 17 | sched_npsf.o \ |
18 | sched_edf_fm.o | 18 | sched_edf_fm.o \ |
19 | sched_edf_wm.o | ||
19 | 20 | ||
20 | obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o | 21 | obj-$(CONFIG_PLUGIN_CEDF) += sched_cedf.o |
21 | obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o | 22 | obj-$(CONFIG_PLUGIN_PFAIR) += sched_pfair.o |
diff --git a/litmus/litmus.c b/litmus/litmus.c index b1c627db694a..2f780222d8e8 100644 --- a/litmus/litmus.c +++ b/litmus/litmus.c | |||
@@ -115,10 +115,13 @@ asmlinkage long sys_set_rt_task_param(pid_t pid, struct rt_task __user * param) | |||
115 | goto out_unlock; | 115 | goto out_unlock; |
116 | } | 116 | } |
117 | if (tp.budget_policy != NO_ENFORCEMENT && | 117 | if (tp.budget_policy != NO_ENFORCEMENT && |
118 | tp.budget_policy != QUANTUM_ENFORCEMENT) | 118 | tp.budget_policy != QUANTUM_ENFORCEMENT && |
119 | tp.budget_policy != PRECISE_ENFORCEMENT) | ||
119 | { | 120 | { |
120 | printk(KERN_INFO "litmus: real-time task %d rejected " | 121 | printk(KERN_INFO "litmus: real-time task %d rejected " |
121 | "because unsupported budget enforcement policy specified\n", pid); | 122 | "because unsupported budget enforcement policy " |
123 | "specified (%d)\n", | ||
124 | pid, tp.budget_policy); | ||
122 | goto out_unlock; | 125 | goto out_unlock; |
123 | } | 126 | } |
124 | 127 | ||
diff --git a/litmus/rt_domain.c b/litmus/rt_domain.c index 8a3ff706c208..d27af48e0298 100644 --- a/litmus/rt_domain.c +++ b/litmus/rt_domain.c | |||
@@ -169,7 +169,12 @@ static void reinit_release_heap(struct task_struct* t) | |||
169 | * - tobe_lock taken | 169 | * - tobe_lock taken |
170 | * - IRQ disabled | 170 | * - IRQ disabled |
171 | */ | 171 | */ |
172 | static void arm_release_timer(rt_domain_t *_rt) | 172 | #ifdef CONFIG_RELEASE_MASTER |
173 | #define arm_release_timer(t) arm_release_timer_on((t), NO_CPU) | ||
174 | static void arm_release_timer_on(rt_domain_t *_rt , int target_cpu) | ||
175 | #else | ||
176 | static void arm_release_master(rt_domain_t *_rt) | ||
177 | #endif | ||
173 | { | 178 | { |
174 | rt_domain_t *rt = _rt; | 179 | rt_domain_t *rt = _rt; |
175 | struct list_head list; | 180 | struct list_head list; |
@@ -224,17 +229,21 @@ static void arm_release_timer(rt_domain_t *_rt) | |||
224 | * PINNED mode is ok on both local and remote CPU | 229 | * PINNED mode is ok on both local and remote CPU |
225 | */ | 230 | */ |
226 | #ifdef CONFIG_RELEASE_MASTER | 231 | #ifdef CONFIG_RELEASE_MASTER |
227 | if (rt->release_master == NO_CPU) | 232 | if (rt->release_master == NO_CPU && |
233 | target_cpu == NO_CPU) | ||
228 | #endif | 234 | #endif |
229 | __hrtimer_start_range_ns(&rh->timer, | 235 | __hrtimer_start_range_ns(&rh->timer, |
230 | ns_to_ktime(rh->release_time), | 236 | ns_to_ktime(rh->release_time), |
231 | 0, HRTIMER_MODE_ABS_PINNED, 0); | 237 | 0, HRTIMER_MODE_ABS_PINNED, 0); |
232 | #ifdef CONFIG_RELEASE_MASTER | 238 | #ifdef CONFIG_RELEASE_MASTER |
233 | else | 239 | else |
234 | hrtimer_start_on(rt->release_master, | 240 | hrtimer_start_on( |
235 | &rh->info, &rh->timer, | 241 | /* target_cpu overrides release master */ |
236 | ns_to_ktime(rh->release_time), | 242 | (target_cpu != NO_CPU ? |
237 | HRTIMER_MODE_ABS_PINNED); | 243 | target_cpu : rt->release_master), |
244 | &rh->info, &rh->timer, | ||
245 | ns_to_ktime(rh->release_time), | ||
246 | HRTIMER_MODE_ABS_PINNED); | ||
238 | #endif | 247 | #endif |
239 | } else | 248 | } else |
240 | TRACE_TASK(t, "0x%p is not my timer\n", &rh->timer); | 249 | TRACE_TASK(t, "0x%p is not my timer\n", &rh->timer); |
@@ -299,6 +308,25 @@ void __merge_ready(rt_domain_t* rt, struct bheap* tasks) | |||
299 | rt->check_resched(rt); | 308 | rt->check_resched(rt); |
300 | } | 309 | } |
301 | 310 | ||
311 | |||
312 | #ifdef CONFIG_RELEASE_MASTER | ||
313 | void __add_release_on(rt_domain_t* rt, struct task_struct *task, | ||
314 | int target_cpu) | ||
315 | { | ||
316 | TRACE_TASK(task, "add_release_on(), rel=%llu, target=%d\n", | ||
317 | get_release(task), target_cpu); | ||
318 | list_add(&tsk_rt(task)->list, &rt->tobe_released); | ||
319 | task->rt_param.domain = rt; | ||
320 | |||
321 | /* start release timer */ | ||
322 | TS_SCHED2_START(task); | ||
323 | |||
324 | arm_release_timer_on(rt, target_cpu); | ||
325 | |||
326 | TS_SCHED2_END(task); | ||
327 | } | ||
328 | #endif | ||
329 | |||
302 | /* add_release - add a real-time task to the rt release queue. | 330 | /* add_release - add a real-time task to the rt release queue. |
303 | * @task: the sleeping task | 331 | * @task: the sleeping task |
304 | */ | 332 | */ |
diff --git a/litmus/sched_edf_fm.c b/litmus/sched_edf_fm.c index e14f8588c7a4..38b5a3f97101 100644 --- a/litmus/sched_edf_fm.c +++ b/litmus/sched_edf_fm.c | |||
@@ -60,8 +60,8 @@ DEFINE_PER_CPU(edffm_domain_t, edffm_domains); | |||
60 | /* Get job number for current cpu */ | 60 | /* Get job number for current cpu */ |
61 | #define cur_cpu_job_no(t) \ | 61 | #define cur_cpu_job_no(t) \ |
62 | ((tsk_rt(t)->task_params.cpu == edffm_params(t).cpus[0]) ? \ | 62 | ((tsk_rt(t)->task_params.cpu == edffm_params(t).cpus[0]) ? \ |
63 | tsk_rt(t)->semi_part.fm.cpu_job_no[0] : \ | 63 | tsk_rt(t)->semi_part.cpu_job_no[0] : \ |
64 | tsk_rt(t)->semi_part.fm.cpu_job_no[1]) | 64 | tsk_rt(t)->semi_part.cpu_job_no[1]) |
65 | /* What is the current cpu position in the array? */ | 65 | /* What is the current cpu position in the array? */ |
66 | #define edffm_cpu_pos(cpu,t) \ | 66 | #define edffm_cpu_pos(cpu,t) \ |
67 | ((cpu == edffm_params(t).cpus[0]) ? \ | 67 | ((cpu == edffm_params(t).cpus[0]) ? \ |
@@ -259,7 +259,7 @@ static void update_job_counter(struct task_struct *t) | |||
259 | 259 | ||
260 | /* Which CPU counter should be incremented? */ | 260 | /* Which CPU counter should be incremented? */ |
261 | cpu_pos = edffm_cpu_pos(t->rt_param.task_params.cpu, t); | 261 | cpu_pos = edffm_cpu_pos(t->rt_param.task_params.cpu, t); |
262 | t->rt_param.semi_part.fm.cpu_job_no[cpu_pos]++; | 262 | t->rt_param.semi_part.cpu_job_no[cpu_pos]++; |
263 | 263 | ||
264 | TRACE_TASK(t, "job_no = %d, cpu_job_no(pos %d) = %d, cpu %d\n", | 264 | TRACE_TASK(t, "job_no = %d, cpu_job_no(pos %d) = %d, cpu %d\n", |
265 | t->rt_param.job_params.job_no, cpu_pos, cur_cpu_job_no(t), | 265 | t->rt_param.job_params.job_no, cpu_pos, cur_cpu_job_no(t), |
diff --git a/litmus/sched_edf_wm.c b/litmus/sched_edf_wm.c new file mode 100644 index 000000000000..44e05948980c --- /dev/null +++ b/litmus/sched_edf_wm.c | |||
@@ -0,0 +1,664 @@ | |||
1 | /* EDF-WM: based on PSN-EDF. | ||
2 | */ | ||
3 | |||
4 | #include <linux/percpu.h> | ||
5 | #include <linux/sched.h> | ||
6 | #include <linux/list.h> | ||
7 | #include <linux/spinlock.h> | ||
8 | |||
9 | #include <linux/module.h> | ||
10 | |||
11 | #include <litmus/litmus.h> | ||
12 | #include <litmus/jobs.h> | ||
13 | #include <litmus/sched_plugin.h> | ||
14 | #include <litmus/edf_common.h> | ||
15 | |||
16 | typedef struct { | ||
17 | rt_domain_t domain; | ||
18 | int cpu; | ||
19 | struct task_struct* scheduled; /* only RT tasks */ | ||
20 | |||
21 | /* The enforcement timer is used to accurately police | ||
22 | * slice budgets. */ | ||
23 | struct hrtimer enforcement_timer; | ||
24 | int timer_armed; | ||
25 | /* | ||
26 | * scheduling lock slock | ||
27 | * protects the domain and serializes scheduling decisions | ||
28 | */ | ||
29 | #define slock domain.ready_lock | ||
30 | |||
31 | } wm_domain_t; | ||
32 | |||
33 | DEFINE_PER_CPU(wm_domain_t, wm_domains); | ||
34 | |||
35 | #define TRACE_DOM(dom, fmt, args...) \ | ||
36 | TRACE("(wm_domains[%d]) " fmt, (dom)->cpu, ##args) | ||
37 | |||
38 | |||
39 | #define local_domain (&__get_cpu_var(wm_domains)) | ||
40 | #define remote_domain(cpu) (&per_cpu(wm_domains, cpu)) | ||
41 | #define domain_of_task(task) (remote_domain(get_partition(task))) | ||
42 | #define domain_from_timer(t) (container_of((t), wm_domain_t, enforcement_timer)) | ||
43 | |||
44 | static int is_sliced_task(struct task_struct* t) | ||
45 | { | ||
46 | return tsk_rt(t)->task_params.semi_part.wm.count; | ||
47 | } | ||
48 | |||
49 | static struct edf_wm_slice* get_last_slice(struct task_struct* t) | ||
50 | { | ||
51 | int idx = tsk_rt(t)->task_params.semi_part.wm.count - 1; | ||
52 | return tsk_rt(t)->task_params.semi_part.wm.slices + idx; | ||
53 | } | ||
54 | |||
55 | static void compute_slice_params(struct task_struct* t) | ||
56 | { | ||
57 | struct rt_param* p = tsk_rt(t); | ||
58 | /* Here we do a little trick to make the generic EDF code | ||
59 | * play well with job slices. We overwrite the job-level | ||
60 | * release and deadline fields with the slice-specific values | ||
61 | * so that we can enqueue this task in an EDF rt_domain_t | ||
62 | * without issue. The actual values are cached in the semi_part.wm | ||
63 | * structure. */ | ||
64 | p->job_params.deadline = p->semi_part.wm.job_release + | ||
65 | p->semi_part.wm.slice->deadline; | ||
66 | p->job_params.release = p->semi_part.wm.job_release + | ||
67 | p->semi_part.wm.slice->offset; | ||
68 | |||
69 | /* Similarly, we play a trick on the cpu field. */ | ||
70 | p->task_params.cpu = p->semi_part.wm.slice->cpu; | ||
71 | |||
72 | /* update the per-slice budget reference */ | ||
73 | p->semi_part.wm.exec_time = p->job_params.exec_time; | ||
74 | } | ||
75 | |||
76 | static void complete_sliced_job(struct task_struct* t) | ||
77 | { | ||
78 | struct rt_param* p = tsk_rt(t); | ||
79 | |||
80 | /* We need to undo our trickery to the | ||
81 | * job parameters (see above). */ | ||
82 | p->job_params.release = p->semi_part.wm.job_release; | ||
83 | p->job_params.deadline = p->semi_part.wm.job_deadline; | ||
84 | |||
85 | /* Ok, now let generic code do the actual work. */ | ||
86 | prepare_for_next_period(t); | ||
87 | |||
88 | /* And finally cache the updated parameters. */ | ||
89 | p->semi_part.wm.job_release = p->job_params.release; | ||
90 | p->semi_part.wm.job_deadline = p->job_params.deadline; | ||
91 | } | ||
92 | |||
93 | static void advance_next_slice(struct task_struct* t, int completion_signaled) | ||
94 | { | ||
95 | int idx; | ||
96 | struct rt_param* p = tsk_rt(t); | ||
97 | |||
98 | /* make sure this is actually a sliced job */ | ||
99 | BUG_ON(!is_sliced_task(t)); | ||
100 | BUG_ON(is_queued(t)); | ||
101 | |||
102 | /* determine index of current slice */ | ||
103 | idx = p->semi_part.wm.slice - | ||
104 | p->task_params.semi_part.wm.slices; | ||
105 | |||
106 | if (completion_signaled) | ||
107 | idx = 0; | ||
108 | else | ||
109 | /* increment and wrap around, if necessary */ | ||
110 | idx = (idx + 1) % p->task_params.semi_part.wm.count; | ||
111 | |||
112 | /* point to next slice */ | ||
113 | p->semi_part.wm.slice = | ||
114 | p->task_params.semi_part.wm.slices + idx; | ||
115 | |||
116 | /* Check if we need to update essential job parameters. */ | ||
117 | if (!idx) { | ||
118 | /* job completion */ | ||
119 | sched_trace_task_completion(t, !completion_signaled); | ||
120 | TRACE_TASK(t, "completed sliced job" | ||
121 | "(signaled:%d)\n", completion_signaled); | ||
122 | complete_sliced_job(t); | ||
123 | } | ||
124 | |||
125 | /* Update job parameters for new slice. */ | ||
126 | compute_slice_params(t); | ||
127 | } | ||
128 | |||
129 | static lt_t slice_exec_time(struct task_struct* t) | ||
130 | { | ||
131 | struct rt_param* p = tsk_rt(t); | ||
132 | |||
133 | /* Compute how much execution time has been consumed | ||
134 | * since last slice advancement. */ | ||
135 | return p->job_params.exec_time - p->semi_part.wm.exec_time; | ||
136 | } | ||
137 | |||
138 | static lt_t slice_budget(struct task_struct* t) | ||
139 | { | ||
140 | return tsk_rt(t)->semi_part.wm.slice->budget; | ||
141 | } | ||
142 | |||
143 | static int slice_budget_exhausted(struct task_struct* t) | ||
144 | { | ||
145 | return slice_exec_time(t) >= slice_budget(t); | ||
146 | } | ||
147 | |||
148 | /* assumes positive remainder; overflows otherwise */ | ||
149 | static lt_t slice_budget_remaining(struct task_struct* t) | ||
150 | { | ||
151 | return slice_budget(t) - slice_exec_time(t); | ||
152 | } | ||
153 | |||
154 | /* assumes time_passed does not advance past the last slice */ | ||
155 | static void fast_forward_slices(struct task_struct* t, lt_t time_passed) | ||
156 | { | ||
157 | while (time_passed && | ||
158 | time_passed >= slice_budget_remaining(t)) { | ||
159 | /* slice completely exhausted */ | ||
160 | time_passed -= slice_budget_remaining(t); | ||
161 | tsk_rt(t)->job_params.exec_time += | ||
162 | slice_budget_remaining(t); | ||
163 | BUG_ON(!slice_budget_exhausted(t)); | ||
164 | BUG_ON(slice_budget_remaining(t) != 0); | ||
165 | advance_next_slice(t, 0); | ||
166 | } | ||
167 | /* add remainder to exec cost */ | ||
168 | tsk_rt(t)->job_params.exec_time += time_passed; | ||
169 | } | ||
170 | |||
171 | /* we assume the lock is being held */ | ||
172 | static void preempt(wm_domain_t *dom) | ||
173 | { | ||
174 | TRACE_DOM(dom, "will be preempted.\n"); | ||
175 | /* We pass NULL as the task since non-preemptive sections are not | ||
176 | * supported in this plugin, so per-task checks are not needed. */ | ||
177 | preempt_if_preemptable(NULL, dom->cpu); | ||
178 | } | ||
179 | |||
180 | static enum hrtimer_restart on_enforcement_timeout(struct hrtimer *timer) | ||
181 | { | ||
182 | wm_domain_t *dom = domain_from_timer(timer); | ||
183 | unsigned long flags; | ||
184 | |||
185 | raw_spin_lock_irqsave(&dom->slock, flags); | ||
186 | if (likely(dom->timer_armed)) { | ||
187 | TRACE_DOM(dom, "enforcement timer fired.\n"); | ||
188 | dom->timer_armed = 0; | ||
189 | preempt(dom); | ||
190 | } else | ||
191 | TRACE_DOM(dom, "timer fired but not armed???\n"); | ||
192 | raw_spin_unlock_irqrestore(&dom->slock, flags); | ||
193 | return HRTIMER_NORESTART; | ||
194 | } | ||
195 | |||
196 | static void wm_domain_init(wm_domain_t* dom, | ||
197 | check_resched_needed_t check, | ||
198 | release_jobs_t release, | ||
199 | int cpu) | ||
200 | { | ||
201 | edf_domain_init(&dom->domain, check, release); | ||
202 | dom->cpu = cpu; | ||
203 | dom->scheduled = NULL; | ||
204 | dom->timer_armed = 0; | ||
205 | hrtimer_init(&dom->enforcement_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); | ||
206 | dom->enforcement_timer.function = on_enforcement_timeout; | ||
207 | } | ||
208 | |||
209 | static void wm_requeue_remote(struct task_struct *t) | ||
210 | { | ||
211 | wm_domain_t *dom = domain_of_task(t); | ||
212 | |||
213 | set_rt_flags(t, RT_F_RUNNING); | ||
214 | if (is_released(t, litmus_clock())) | ||
215 | /* acquires necessary lock */ | ||
216 | add_ready(&dom->domain, t); | ||
217 | else | ||
218 | /* force timer on remote CPU */ | ||
219 | add_release_on(&dom->domain, t, get_partition(t)); | ||
220 | } | ||
221 | |||
222 | static void wm_requeue_local(struct task_struct* t, rt_domain_t *edf) | ||
223 | { | ||
224 | if (t->state != TASK_RUNNING) | ||
225 | TRACE_TASK(t, "requeue: !TASK_RUNNING\n"); | ||
226 | |||
227 | set_rt_flags(t, RT_F_RUNNING); | ||
228 | if (is_released(t, litmus_clock())) | ||
229 | __add_ready(edf, t); | ||
230 | else | ||
231 | add_release(edf, t); /* it has got to wait */ | ||
232 | } | ||
233 | |||
234 | static int wm_check_resched(rt_domain_t *edf) | ||
235 | { | ||
236 | wm_domain_t *dom = container_of(edf, wm_domain_t, domain); | ||
237 | |||
238 | /* because this is a callback from rt_domain_t we already hold | ||
239 | * the necessary lock for the ready queue | ||
240 | */ | ||
241 | if (edf_preemption_needed(edf, dom->scheduled)) { | ||
242 | preempt(dom); | ||
243 | return 1; | ||
244 | } else | ||
245 | return 0; | ||
246 | } | ||
247 | |||
248 | static void regular_job_completion(struct task_struct* t, int forced) | ||
249 | { | ||
250 | sched_trace_task_completion(t, forced); | ||
251 | TRACE_TASK(t, "job_completion().\n"); | ||
252 | |||
253 | set_rt_flags(t, RT_F_SLEEP); | ||
254 | prepare_for_next_period(t); | ||
255 | } | ||
256 | |||
257 | static void wm_job_or_slice_completion(struct task_struct* t, | ||
258 | int completion_signaled) | ||
259 | { | ||
260 | if (is_sliced_task(t)) | ||
261 | advance_next_slice(t, completion_signaled); | ||
262 | else | ||
263 | regular_job_completion(t, !completion_signaled); | ||
264 | } | ||
265 | |||
266 | static int wm_budget_exhausted(struct task_struct* t) | ||
267 | { | ||
268 | if (is_sliced_task(t)) | ||
269 | return slice_budget_exhausted(t); | ||
270 | else | ||
271 | return budget_exhausted(t); | ||
272 | } | ||
273 | |||
274 | static void wm_tick(struct task_struct *t) | ||
275 | { | ||
276 | wm_domain_t *dom = local_domain; | ||
277 | |||
278 | /* Check for inconsistency. We don't need the lock for this since | ||
279 | * ->scheduled is only changed in schedule, which obviously is not | ||
280 | * executing in parallel on this CPU | ||
281 | */ | ||
282 | BUG_ON(is_realtime(t) && t != dom->scheduled); | ||
283 | |||
284 | if (is_realtime(t) && budget_enforced(t) && wm_budget_exhausted(t)) { | ||
285 | set_tsk_need_resched(t); | ||
286 | TRACE_DOM(dom, "budget of %d exhausted in tick\n", | ||
287 | t->pid); | ||
288 | } | ||
289 | } | ||
290 | |||
291 | static struct task_struct* wm_schedule(struct task_struct * prev) | ||
292 | { | ||
293 | wm_domain_t *dom = local_domain; | ||
294 | rt_domain_t *edf = &dom->domain; | ||
295 | struct task_struct *next, *migrate = NULL; | ||
296 | |||
297 | int out_of_time, sleep, preempt, | ||
298 | exists, blocks, resched; | ||
299 | |||
300 | raw_spin_lock(&dom->slock); | ||
301 | |||
302 | /* Sanity checking: | ||
303 | * When a task exits (dead) dom->schedule may be null | ||
304 | * and prev _is_ realtime. */ | ||
305 | BUG_ON(dom->scheduled && dom->scheduled != prev); | ||
306 | BUG_ON(dom->scheduled && !is_realtime(prev)); | ||
307 | |||
308 | /* (0) Determine state */ | ||
309 | exists = dom->scheduled != NULL; | ||
310 | blocks = exists && !is_running(dom->scheduled); | ||
311 | out_of_time = exists | ||
312 | && budget_enforced(dom->scheduled) | ||
313 | && wm_budget_exhausted(dom->scheduled); | ||
314 | sleep = exists && get_rt_flags(dom->scheduled) == RT_F_SLEEP; | ||
315 | preempt = edf_preemption_needed(edf, prev); | ||
316 | |||
317 | /* If we need to preempt do so. | ||
318 | * The following checks set resched to 1 in case of special | ||
319 | * circumstances. | ||
320 | */ | ||
321 | resched = preempt; | ||
322 | |||
323 | /* If a task blocks we have no choice but to reschedule. | ||
324 | */ | ||
325 | if (blocks) | ||
326 | resched = 1; | ||
327 | |||
328 | /* Any task that is preemptable and either exhausts its execution | ||
329 | * budget or wants to sleep completes. We may have to reschedule after | ||
330 | * this. | ||
331 | */ | ||
332 | if ((out_of_time || sleep) && !blocks) { | ||
333 | wm_job_or_slice_completion(dom->scheduled, sleep); | ||
334 | resched = 1; | ||
335 | } | ||
336 | |||
337 | /* The final scheduling decision. Do we need to switch for some reason? | ||
338 | * Switch if we are in RT mode and have no task or if we need to | ||
339 | * resched. | ||
340 | */ | ||
341 | next = NULL; | ||
342 | if (resched || !exists) { | ||
343 | if (dom->scheduled && !blocks) { | ||
344 | if (get_partition(dom->scheduled) == dom->cpu) | ||
345 | /* local task */ | ||
346 | wm_requeue_local(dom->scheduled, edf); | ||
347 | else | ||
348 | /* not local anymore; wait until we drop the | ||
349 | * ready queue lock */ | ||
350 | migrate = dom->scheduled; | ||
351 | } | ||
352 | next = __take_ready(edf); | ||
353 | } else | ||
354 | /* Only override Linux scheduler if we have a real-time task | ||
355 | * scheduled that needs to continue. */ | ||
356 | if (exists) | ||
357 | next = prev; | ||
358 | |||
359 | if (next) { | ||
360 | TRACE_TASK(next, "scheduled at %llu\n", litmus_clock()); | ||
361 | set_rt_flags(next, RT_F_RUNNING); | ||
362 | } else if (exists) { | ||
363 | TRACE("becoming idle at %llu\n", litmus_clock()); | ||
364 | } | ||
365 | |||
366 | dom->scheduled = next; | ||
367 | raw_spin_unlock(&dom->slock); | ||
368 | |||
369 | /* check if we need to push the previous task onto another queue */ | ||
370 | if (migrate) { | ||
371 | TRACE_TASK(migrate, "schedule-initiated migration to %d\n", | ||
372 | get_partition(migrate)); | ||
373 | wm_requeue_remote(migrate); | ||
374 | } | ||
375 | |||
376 | return next; | ||
377 | } | ||
378 | |||
379 | |||
380 | /* Prepare a task for running in RT mode | ||
381 | */ | ||
382 | static void wm_task_new(struct task_struct * t, int on_rq, int running) | ||
383 | { | ||
384 | wm_domain_t* dom = domain_of_task(t); | ||
385 | rt_domain_t* edf = &dom->domain; | ||
386 | unsigned long flags; | ||
387 | |||
388 | TRACE_TASK(t, "edf-wm: task new, cpu = %d\n", | ||
389 | t->rt_param.task_params.cpu); | ||
390 | |||
391 | /* setup job parameters */ | ||
392 | release_at(t, litmus_clock()); | ||
393 | |||
394 | /* The task should be running in the queue, otherwise signal | ||
395 | * code will try to wake it up with fatal consequences. | ||
396 | */ | ||
397 | raw_spin_lock_irqsave(&dom->slock, flags); | ||
398 | |||
399 | if (is_sliced_task(t)) { | ||
400 | /* make sure parameters are initialized consistently */ | ||
401 | tsk_rt(t)->semi_part.wm.exec_time = 0; | ||
402 | tsk_rt(t)->semi_part.wm.job_release = get_release(t); | ||
403 | tsk_rt(t)->semi_part.wm.job_deadline = get_deadline(t); | ||
404 | tsk_rt(t)->semi_part.wm.slice = tsk_rt(t)->task_params.semi_part.wm.slices; | ||
405 | tsk_rt(t)->job_params.exec_time = 0; | ||
406 | } | ||
407 | |||
408 | if (running) { | ||
409 | /* there shouldn't be anything else running at the time */ | ||
410 | BUG_ON(dom->scheduled); | ||
411 | dom->scheduled = t; | ||
412 | } else { | ||
413 | wm_requeue_local(t, edf); | ||
414 | /* maybe we have to reschedule */ | ||
415 | preempt(dom); | ||
416 | } | ||
417 | raw_spin_unlock_irqrestore(&dom->slock, flags); | ||
418 | } | ||
419 | |||
420 | static void wm_release_at(struct task_struct *t, lt_t start) | ||
421 | { | ||
422 | struct rt_param* p = tsk_rt(t); | ||
423 | |||
424 | if (is_sliced_task(t)) { | ||
425 | /* simulate wrapping to the first slice */ | ||
426 | p->semi_part.wm.job_deadline = start; | ||
427 | p->semi_part.wm.slice = get_last_slice(t); | ||
428 | /* FIXME: creates bogus completion event... */ | ||
429 | advance_next_slice(t, 0); | ||
430 | set_rt_flags(t, RT_F_RUNNING); | ||
431 | } else | ||
432 | /* generic code handles it */ | ||
433 | release_at(t, start); | ||
434 | } | ||
435 | |||
436 | static lt_t wm_earliest_release(struct task_struct *t, lt_t now) | ||
437 | { | ||
438 | lt_t deadline; | ||
439 | if (is_sliced_task(t)) | ||
440 | deadline = tsk_rt(t)->semi_part.wm.job_deadline; | ||
441 | else | ||
442 | deadline = get_deadline(t); | ||
443 | if (lt_before(deadline, now)) | ||
444 | return now; | ||
445 | else | ||
446 | return deadline; | ||
447 | } | ||
448 | |||
449 | static void wm_task_wake_up(struct task_struct *t) | ||
450 | { | ||
451 | unsigned long flags; | ||
452 | wm_domain_t* dom = domain_of_task(t); | ||
453 | rt_domain_t* edf = &dom->domain; | ||
454 | struct rt_param* p = tsk_rt(t); | ||
455 | lt_t now, sleep_time; | ||
456 | int migrate = 0; | ||
457 | |||
458 | raw_spin_lock_irqsave(&dom->slock, flags); | ||
459 | BUG_ON(is_queued(t)); | ||
460 | |||
461 | now = litmus_clock(); | ||
462 | |||
463 | sleep_time = now - p->semi_part.wm.suspend_time; | ||
464 | |||
465 | TRACE_TASK(t, "wake_up at %llu after %llu\n", now, sleep_time); | ||
466 | |||
467 | /* account sleep time as execution time */ | ||
468 | if (get_exec_time(t) + sleep_time >= get_exec_cost(t)) { | ||
469 | /* new sporadic release */ | ||
470 | wm_release_at(t, wm_earliest_release(t, now)); | ||
471 | sched_trace_task_release(t); | ||
472 | } else if (is_sliced_task(t)) { | ||
473 | /* figure out which slice we should be executing on */ | ||
474 | fast_forward_slices(t, sleep_time); | ||
475 | } else { | ||
476 | /* simply add to the execution time */ | ||
477 | tsk_rt(t)->job_params.exec_time += sleep_time; | ||
478 | } | ||
479 | |||
480 | |||
481 | /* Only add to ready queue if it is not the currently-scheduled | ||
482 | * task. This could be the case if a task was woken up concurrently | ||
483 | * on a remote CPU before the executing CPU got around to actually | ||
484 | * de-scheduling the task, i.e., wake_up() raced with schedule() | ||
485 | * and won. | ||
486 | */ | ||
487 | if (dom->scheduled != t) { | ||
488 | if (get_partition(t) == dom->cpu) | ||
489 | wm_requeue_local(t, edf); | ||
490 | else | ||
491 | /* post-pone migration until after unlocking */ | ||
492 | migrate = 1; | ||
493 | } | ||
494 | |||
495 | raw_spin_unlock_irqrestore(&dom->slock, flags); | ||
496 | |||
497 | if (migrate) { | ||
498 | TRACE_TASK(t, "wake_up-initiated migration to %d\n", | ||
499 | get_partition(t)); | ||
500 | wm_requeue_remote(t); | ||
501 | } | ||
502 | |||
503 | TRACE_TASK(t, "wake up done\n"); | ||
504 | } | ||
505 | |||
506 | static void wm_task_block(struct task_struct *t) | ||
507 | { | ||
508 | wm_domain_t* dom = domain_of_task(t); | ||
509 | unsigned long flags; | ||
510 | lt_t now = litmus_clock(); | ||
511 | |||
512 | TRACE_TASK(t, "block at %llu, state=%d\n", now, t->state); | ||
513 | |||
514 | tsk_rt(t)->semi_part.wm.suspend_time = now; | ||
515 | |||
516 | raw_spin_lock_irqsave(&dom->slock, flags); | ||
517 | if (is_queued(t)) { | ||
518 | TRACE_TASK(t, "still queued; migration invariant failed?\n"); | ||
519 | remove(&dom->domain, t); | ||
520 | } | ||
521 | raw_spin_unlock_irqrestore(&dom->slock, flags); | ||
522 | |||
523 | BUG_ON(!is_realtime(t)); | ||
524 | } | ||
525 | |||
526 | static void wm_task_exit(struct task_struct * t) | ||
527 | { | ||
528 | unsigned long flags; | ||
529 | wm_domain_t* dom = domain_of_task(t); | ||
530 | rt_domain_t* edf = &dom->domain; | ||
531 | |||
532 | raw_spin_lock_irqsave(&dom->slock, flags); | ||
533 | if (is_queued(t)) { | ||
534 | /* dequeue */ | ||
535 | remove(edf, t); | ||
536 | } | ||
537 | if (dom->scheduled == t) | ||
538 | dom->scheduled = NULL; | ||
539 | |||
540 | TRACE_TASK(t, "RIP, now reschedule\n"); | ||
541 | |||
542 | preempt(dom); | ||
543 | raw_spin_unlock_irqrestore(&dom->slock, flags); | ||
544 | } | ||
545 | |||
546 | static long wm_check_params(struct task_struct *t) | ||
547 | { | ||
548 | struct rt_param* p = tsk_rt(t); | ||
549 | struct edf_wm_params* wm = &p->task_params.semi_part.wm; | ||
550 | int i; | ||
551 | lt_t tmp; | ||
552 | |||
553 | if (!is_sliced_task(t)) { | ||
554 | /* regular task; nothing to check */ | ||
555 | TRACE_TASK(t, "accepted regular (non-sliced) task with " | ||
556 | "%d slices\n", | ||
557 | wm->count); | ||
558 | return 0; | ||
559 | } | ||
560 | |||
561 | /* (1) Either not sliced, or more than 1 slice. */ | ||
562 | if (wm->count == 1 || wm->count > MAX_EDF_WM_SLICES) { | ||
563 | TRACE_TASK(t, "bad number of slices (%u) \n", | ||
564 | wm->count); | ||
565 | return -EINVAL; | ||
566 | } | ||
567 | |||
568 | /* (2) The partition has to agree with the first slice. */ | ||
569 | if (get_partition(t) != wm->slices[0].cpu) { | ||
570 | TRACE_TASK(t, "partition and first slice CPU differ " | ||
571 | "(%d != %d)\n", get_partition(t), wm->slices[0].cpu); | ||
572 | return -EINVAL; | ||
573 | } | ||
574 | |||
575 | /* (3) The total budget must agree. */ | ||
576 | for (i = 0, tmp = 0; i < wm->count; i++) | ||
577 | tmp += wm->slices[i].budget; | ||
578 | if (get_exec_cost(t) != tmp) { | ||
579 | TRACE_TASK(t, "total budget and sum of slice budgets differ\n"); | ||
580 | return -EINVAL; | ||
581 | } | ||
582 | |||
583 | /* (4) The release of each slice must not precede the previous | ||
584 | * deadline. */ | ||
585 | for (i = 0; i < wm->count - 1; i++) | ||
586 | if (wm->slices[i].deadline > wm->slices[i + 1].offset) { | ||
587 | TRACE_TASK(t, "slice %d overlaps with slice %d\n", | ||
588 | i, i + 1); | ||
589 | return -EINVAL; | ||
590 | } | ||
591 | |||
592 | /* (5) The budget of each slice must fit within [offset, deadline] */ | ||
593 | for (i = 0; i < wm->count; i++) | ||
594 | if (lt_before(wm->slices[i].deadline, wm->slices[i].offset) || | ||
595 | wm->slices[i].deadline - wm->slices[i].offset < | ||
596 | wm->slices[i].budget) { | ||
597 | TRACE_TASK(t, "slice %d is overloaded\n", i); | ||
598 | return -EINVAL; | ||
599 | } | ||
600 | |||
601 | /* (6) The budget of each slice must exceed the minimum budget size. */ | ||
602 | for (i = 0; i < wm->count; i++) | ||
603 | if (wm->slices[i].budget < MIN_EDF_WM_SLICE_SIZE) { | ||
604 | TRACE_TASK(t, "slice %d is too short\n", i); | ||
605 | return -EINVAL; | ||
606 | } | ||
607 | |||
608 | /* (7) The CPU of each slice must be different from the previous CPU. */ | ||
609 | for (i = 0; i < wm->count - 1; i++) | ||
610 | if (wm->slices[i].cpu == wm->slices[i + 1].cpu) { | ||
611 | TRACE_TASK(t, "slice %d does not migrate\n", i); | ||
612 | return -EINVAL; | ||
613 | } | ||
614 | |||
615 | /* (8) The CPU of each slice must be online. */ | ||
616 | for (i = 0; i < wm->count; i++) | ||
617 | if (!cpu_online(wm->slices[i].cpu)) { | ||
618 | TRACE_TASK(t, "slice %d is allocated on offline CPU\n", | ||
619 | i); | ||
620 | return -EINVAL; | ||
621 | } | ||
622 | |||
623 | TRACE_TASK(t, "accepted sliced task with %d slices\n", | ||
624 | wm->count); | ||
625 | |||
626 | return 0; | ||
627 | } | ||
628 | |||
629 | static long wm_admit_task(struct task_struct* t) | ||
630 | { | ||
631 | return task_cpu(t) == get_partition(t) ? wm_check_params(t) : -EINVAL; | ||
632 | } | ||
633 | |||
634 | /* Plugin object */ | ||
635 | static struct sched_plugin edf_wm_plugin __cacheline_aligned_in_smp = { | ||
636 | .plugin_name = "EDF-WM", | ||
637 | .tick = wm_tick, | ||
638 | .task_new = wm_task_new, | ||
639 | .complete_job = complete_job, | ||
640 | .task_exit = wm_task_exit, | ||
641 | .schedule = wm_schedule, | ||
642 | .release_at = wm_release_at, | ||
643 | .task_wake_up = wm_task_wake_up, | ||
644 | .task_block = wm_task_block, | ||
645 | .admit_task = wm_admit_task | ||
646 | }; | ||
647 | |||
648 | |||
649 | static int __init init_edf_wm(void) | ||
650 | { | ||
651 | int i; | ||
652 | |||
653 | /* FIXME: breaks with CPU hotplug | ||
654 | */ | ||
655 | for (i = 0; i < num_online_cpus(); i++) { | ||
656 | wm_domain_init(remote_domain(i), | ||
657 | wm_check_resched, | ||
658 | NULL, i); | ||
659 | } | ||
660 | return register_sched_plugin(&edf_wm_plugin); | ||
661 | } | ||
662 | |||
663 | module_init(init_edf_wm); | ||
664 | |||