diff options
Diffstat (limited to 'litmus/sched_cedf.c')
-rw-r--r-- | litmus/sched_cedf.c | 1526 |
1 files changed, 1526 insertions, 0 deletions
diff --git a/litmus/sched_cedf.c b/litmus/sched_cedf.c new file mode 100644 index 000000000000..4f5bb26b339b --- /dev/null +++ b/litmus/sched_cedf.c | |||
@@ -0,0 +1,1526 @@ | |||
1 | /* | ||
2 | * litmus/sched_cedf.c | ||
3 | * | ||
4 | * Implementation of the C-EDF scheduling algorithm. | ||
5 | * | ||
6 | * This implementation is based on G-EDF: | ||
7 | * - CPUs are clustered around L2 or L3 caches. | ||
8 | * - Clusters topology is automatically detected (this is arch dependent | ||
9 | * and is working only on x86 at the moment --- and only with modern | ||
10 | * cpus that exports cpuid4 information) | ||
11 | * - The plugins _does not_ attempt to put tasks in the right cluster i.e. | ||
12 | * the programmer needs to be aware of the topology to place tasks | ||
13 | * in the desired cluster | ||
14 | * - default clustering is around L2 cache (cache index = 2) | ||
15 | * supported clusters are: L1 (private cache: pedf), L2, L3, ALL (all | ||
16 | * online_cpus are placed in a single cluster). | ||
17 | * | ||
18 | * For details on functions, take a look at sched_gsn_edf.c | ||
19 | * | ||
20 | * Currently, we do not support changes in the number of online cpus. | ||
21 | * If the num_online_cpus() dynamically changes, the plugin is broken. | ||
22 | * | ||
23 | * This version uses the simple approach and serializes all scheduling | ||
24 | * decisions by the use of a queue lock. This is probably not the | ||
25 | * best way to do it, but it should suffice for now. | ||
26 | */ | ||
27 | |||
28 | #include <linux/spinlock.h> | ||
29 | #include <linux/percpu.h> | ||
30 | #include <linux/sched.h> | ||
31 | #include <linux/slab.h> | ||
32 | |||
33 | #include <linux/module.h> | ||
34 | |||
35 | #include <litmus/litmus.h> | ||
36 | #include <litmus/wait.h> | ||
37 | #include <litmus/jobs.h> | ||
38 | #include <litmus/preempt.h> | ||
39 | #include <litmus/sched_plugin.h> | ||
40 | #include <litmus/edf_common.h> | ||
41 | #include <litmus/sched_trace.h> | ||
42 | #include <litmus/trace.h> | ||
43 | |||
44 | #include <litmus/clustered.h> | ||
45 | |||
46 | #include <litmus/bheap.h> | ||
47 | |||
48 | /* to configure the cluster size */ | ||
49 | #include <litmus/litmus_proc.h> | ||
50 | #include <linux/uaccess.h> | ||
51 | |||
52 | /* Reference configuration variable. Determines which cache level is used to | ||
53 | * group CPUs into clusters. GLOBAL_CLUSTER, which is the default, means that | ||
54 | * all CPUs form a single cluster (just like GSN-EDF). | ||
55 | */ | ||
56 | static enum cache_level cluster_config = GLOBAL_CLUSTER; | ||
57 | |||
58 | struct clusterdomain; | ||
59 | |||
60 | /* cpu_entry_t - maintain the linked and scheduled state | ||
61 | * | ||
62 | * A cpu also contains a pointer to the cedf_domain_t cluster | ||
63 | * that owns it (struct clusterdomain*) | ||
64 | */ | ||
65 | typedef struct { | ||
66 | int cpu; | ||
67 | struct clusterdomain* cluster; /* owning cluster */ | ||
68 | struct task_struct* linked; /* only RT tasks */ | ||
69 | struct task_struct* scheduled; /* only RT tasks */ | ||
70 | atomic_t will_schedule; /* prevent unneeded IPIs */ | ||
71 | struct bheap_node* hn; | ||
72 | #ifdef CONFIG_LITMUS_LOCKING | ||
73 | struct bheap_node* pending_hn; | ||
74 | struct task_struct* pending; | ||
75 | #endif | ||
76 | } cpu_entry_t; | ||
77 | |||
78 | /* one cpu_entry_t per CPU */ | ||
79 | DEFINE_PER_CPU(cpu_entry_t, cedf_cpu_entries); | ||
80 | |||
81 | |||
82 | static struct bheap_node cpu_nodes[NR_CPUS]; | ||
83 | #ifdef CONFIG_LITMUS_LOCKING | ||
84 | static struct bheap_node pending_nodes[NR_CPUS]; | ||
85 | #endif | ||
86 | |||
87 | /* | ||
88 | * In C-EDF there is a cedf domain _per_ cluster | ||
89 | * The number of clusters is dynamically determined accordingly to the | ||
90 | * total cpu number and the cluster size | ||
91 | */ | ||
92 | typedef struct clusterdomain { | ||
93 | /* rt_domain for this cluster */ | ||
94 | rt_domain_t domain; | ||
95 | /* map of this cluster cpus */ | ||
96 | cpumask_var_t cpu_map; | ||
97 | unsigned int num_cpus; | ||
98 | /* the cpus queue themselves according to priority in here */ | ||
99 | struct bheap cpu_heap; | ||
100 | #ifdef CONFIG_LITMUS_LOCKING | ||
101 | struct bheap pending_jobs; | ||
102 | struct bheap pending_cpus; | ||
103 | #endif | ||
104 | /* lock for this cluster */ | ||
105 | #define cluster_lock domain.ready_lock | ||
106 | } cedf_domain_t; | ||
107 | |||
108 | /* a cedf_domain per cluster; allocation is done at init/activation time */ | ||
109 | cedf_domain_t *cedf; | ||
110 | |||
111 | #define remote_cpu(cpu) (&per_cpu(cedf_cpu_entries, cpu)) | ||
112 | #define remote_cluster(cpu) ((cedf_domain_t *) per_cpu(cedf_cpu_entries, cpu).cluster) | ||
113 | #define task_cpu_cluster(task) remote_cluster(get_partition(task)) | ||
114 | |||
115 | /* Uncomment WANT_ALL_SCHED_EVENTS if you want to see all scheduling | ||
116 | * decisions in the TRACE() log; uncomment VERBOSE_INIT for verbose | ||
117 | * information during the initialization of the plugin (e.g., topology) | ||
118 | #define WANT_ALL_SCHED_EVENTS | ||
119 | */ | ||
120 | #define VERBOSE_INIT | ||
121 | |||
122 | static int cpu_lower_prio(struct bheap_node *_a, struct bheap_node *_b) | ||
123 | { | ||
124 | cpu_entry_t *a, *b; | ||
125 | a = _a->value; | ||
126 | b = _b->value; | ||
127 | /* Note that a and b are inverted: we want the lowest-priority CPU at | ||
128 | * the top of the heap. | ||
129 | */ | ||
130 | return edf_higher_prio(b->linked, a->linked); | ||
131 | } | ||
132 | |||
133 | /* update_cpu_position - Move the cpu entry to the correct place to maintain | ||
134 | * order in the cpu queue. Caller must hold cedf lock. | ||
135 | */ | ||
136 | static void update_cpu_position(cpu_entry_t *entry) | ||
137 | { | ||
138 | cedf_domain_t *cluster = entry->cluster; | ||
139 | |||
140 | if (likely(bheap_node_in_heap(entry->hn))) | ||
141 | bheap_delete(cpu_lower_prio, | ||
142 | &cluster->cpu_heap, | ||
143 | entry->hn); | ||
144 | |||
145 | bheap_insert(cpu_lower_prio, &cluster->cpu_heap, entry->hn); | ||
146 | } | ||
147 | |||
148 | /* caller must hold cedf lock */ | ||
149 | static cpu_entry_t* lowest_prio_cpu(cedf_domain_t *cluster) | ||
150 | { | ||
151 | struct bheap_node* hn; | ||
152 | hn = bheap_peek(cpu_lower_prio, &cluster->cpu_heap); | ||
153 | return hn->value; | ||
154 | } | ||
155 | |||
156 | |||
157 | /* link_task_to_cpu - Update the link of a CPU. | ||
158 | * Handles the case where the to-be-linked task is already | ||
159 | * scheduled on a different CPU. | ||
160 | */ | ||
161 | static noinline void link_task_to_cpu(struct task_struct* linked, | ||
162 | cpu_entry_t *entry) | ||
163 | { | ||
164 | cpu_entry_t *sched; | ||
165 | struct task_struct* tmp; | ||
166 | int on_cpu; | ||
167 | |||
168 | BUG_ON(linked && !is_realtime(linked)); | ||
169 | |||
170 | /* Currently linked task is set to be unlinked. */ | ||
171 | if (entry->linked) { | ||
172 | entry->linked->rt_param.linked_on = NO_CPU; | ||
173 | } | ||
174 | |||
175 | /* Link new task to CPU. */ | ||
176 | if (linked) { | ||
177 | /* handle task is already scheduled somewhere! */ | ||
178 | on_cpu = linked->rt_param.scheduled_on; | ||
179 | if (on_cpu != NO_CPU) { | ||
180 | sched = &per_cpu(cedf_cpu_entries, on_cpu); | ||
181 | /* this should only happen if not linked already */ | ||
182 | BUG_ON(sched->linked == linked); | ||
183 | |||
184 | /* If we are already scheduled on the CPU to which we | ||
185 | * wanted to link, we don't need to do the swap -- | ||
186 | * we just link ourselves to the CPU and depend on | ||
187 | * the caller to get things right. | ||
188 | */ | ||
189 | if (entry != sched) { | ||
190 | TRACE_TASK(linked, | ||
191 | "already scheduled on %d, updating link.\n", | ||
192 | sched->cpu); | ||
193 | tmp = sched->linked; | ||
194 | linked->rt_param.linked_on = sched->cpu; | ||
195 | sched->linked = linked; | ||
196 | update_cpu_position(sched); | ||
197 | linked = tmp; | ||
198 | } | ||
199 | } | ||
200 | if (linked) /* might be NULL due to swap */ | ||
201 | linked->rt_param.linked_on = entry->cpu; | ||
202 | } | ||
203 | entry->linked = linked; | ||
204 | #ifdef WANT_ALL_SCHED_EVENTS | ||
205 | if (linked) | ||
206 | TRACE_TASK(linked, "linked to %d.\n", entry->cpu); | ||
207 | else | ||
208 | TRACE("NULL linked to %d.\n", entry->cpu); | ||
209 | #endif | ||
210 | update_cpu_position(entry); | ||
211 | } | ||
212 | |||
213 | /* unlink - Make sure a task is not linked any longer to an entry | ||
214 | * where it was linked before. Must hold cedf_lock. | ||
215 | */ | ||
216 | static noinline void unlink(struct task_struct* t) | ||
217 | { | ||
218 | cpu_entry_t *entry; | ||
219 | |||
220 | if (t->rt_param.linked_on != NO_CPU) { | ||
221 | /* unlink */ | ||
222 | entry = &per_cpu(cedf_cpu_entries, t->rt_param.linked_on); | ||
223 | t->rt_param.linked_on = NO_CPU; | ||
224 | link_task_to_cpu(NULL, entry); | ||
225 | } else if (is_queued(t)) { | ||
226 | /* This is an interesting situation: t is scheduled, | ||
227 | * but was just recently unlinked. It cannot be | ||
228 | * linked anywhere else (because then it would have | ||
229 | * been relinked to this CPU), thus it must be in some | ||
230 | * queue. We must remove it from the list in this | ||
231 | * case. | ||
232 | * | ||
233 | * in C-EDF case is should be somewhere in the queue for | ||
234 | * its domain, therefore and we can get the domain using | ||
235 | * task_cpu_cluster | ||
236 | */ | ||
237 | remove(&(task_cpu_cluster(t))->domain, t); | ||
238 | } | ||
239 | } | ||
240 | |||
241 | |||
242 | /* preempt - force a CPU to reschedule | ||
243 | */ | ||
244 | static void preempt(cpu_entry_t *entry) | ||
245 | { | ||
246 | preempt_if_preemptable(entry->scheduled, entry->cpu); | ||
247 | } | ||
248 | |||
249 | #ifdef CONFIG_LITMUS_LOCKING | ||
250 | static int update_pending_job(cedf_domain_t* cluster, struct task_struct* t); | ||
251 | static void priodon_become_eligible(void); | ||
252 | static void priodon_complete_request(void); | ||
253 | |||
254 | static inline int in_pending_heap(struct task_struct* t) | ||
255 | { | ||
256 | return bheap_node_in_heap(tsk_rt(t)->pending_node); | ||
257 | } | ||
258 | |||
259 | /* has this task already been processed for pending */ | ||
260 | static inline int is_pending(struct task_struct* t) | ||
261 | { | ||
262 | return tsk_rt(t)->pending_on != NO_CPU || | ||
263 | in_pending_heap(t); | ||
264 | } | ||
265 | |||
266 | #endif | ||
267 | |||
268 | /* requeue - Put an unlinked task into gsn-edf domain. | ||
269 | * Caller must hold cedf_lock. | ||
270 | */ | ||
271 | static noinline void requeue(struct task_struct* task) | ||
272 | { | ||
273 | cedf_domain_t *cluster = task_cpu_cluster(task); | ||
274 | BUG_ON(!task); | ||
275 | /* sanity check before insertion */ | ||
276 | BUG_ON(is_queued(task)); | ||
277 | |||
278 | if (is_released(task, litmus_clock())) { | ||
279 | #ifdef CONFIG_LITMUS_LOCKING | ||
280 | if (!is_pending(task)) | ||
281 | update_pending_job(cluster, task); | ||
282 | #endif | ||
283 | __add_ready(&cluster->domain, task); | ||
284 | } else { | ||
285 | /* it has got to wait */ | ||
286 | add_release(&cluster->domain, task); | ||
287 | } | ||
288 | } | ||
289 | |||
290 | /* check for any necessary preemptions */ | ||
291 | static void check_for_preemptions(cedf_domain_t *cluster) | ||
292 | { | ||
293 | struct task_struct *task; | ||
294 | cpu_entry_t* last; | ||
295 | |||
296 | for(last = lowest_prio_cpu(cluster); | ||
297 | edf_preemption_needed(&cluster->domain, last->linked); | ||
298 | last = lowest_prio_cpu(cluster)) { | ||
299 | /* preemption necessary */ | ||
300 | |||
301 | #ifdef CONFIG_LITMUS_LOCKING | ||
302 | task = __peek_ready(&cluster->domain); | ||
303 | if (update_pending_job(cluster, task)) { | ||
304 | /* Something changed, re-evaluate priorites to | ||
305 | * see if we still need to preempt. | ||
306 | * */ | ||
307 | TRACE_TASK(task, "hitting continue\n"); | ||
308 | continue; | ||
309 | } | ||
310 | #endif | ||
311 | task = __take_ready(&cluster->domain); | ||
312 | TRACE_TASK(task, "attempting to link task to P%d\n", | ||
313 | last->cpu); | ||
314 | if (last->linked) | ||
315 | requeue(last->linked); | ||
316 | link_task_to_cpu(task, last); | ||
317 | preempt(last); | ||
318 | } | ||
319 | } | ||
320 | |||
321 | #ifdef CONFIG_LITMUS_LOCKING | ||
322 | |||
323 | static int pending_lower_prio(struct bheap_node *_a, struct bheap_node *_b) | ||
324 | { | ||
325 | cpu_entry_t *a, *b; | ||
326 | a = _a->value; | ||
327 | b = _b->value; | ||
328 | /* Note that a and b are inverted: we want the lowest-priority CPU at | ||
329 | * the top of the heap. | ||
330 | */ | ||
331 | return edf_higher_base_prio(b->pending, a->pending); | ||
332 | } | ||
333 | |||
334 | /* update_cpu_position - Move the cpu entry to the correct place to maintain | ||
335 | * order in the cpu queue. Caller must hold cedf lock. | ||
336 | */ | ||
337 | static void update_pending_position(cpu_entry_t *entry) | ||
338 | { | ||
339 | cedf_domain_t *cluster = entry->cluster; | ||
340 | |||
341 | if (likely(bheap_node_in_heap(entry->pending_hn))) | ||
342 | bheap_delete(pending_lower_prio, | ||
343 | &cluster->pending_cpus, | ||
344 | entry->pending_hn); | ||
345 | |||
346 | bheap_insert(pending_lower_prio, &cluster->pending_cpus, entry->pending_hn); | ||
347 | } | ||
348 | |||
349 | /* caller must hold cedf lock */ | ||
350 | static cpu_entry_t* lowest_pending_cpu(cedf_domain_t *cluster) | ||
351 | { | ||
352 | struct bheap_node* hn; | ||
353 | hn = bheap_peek(pending_lower_prio, &cluster->pending_cpus); | ||
354 | return hn->value; | ||
355 | } | ||
356 | |||
357 | static void priority_raised(struct task_struct* t) | ||
358 | { | ||
359 | cedf_domain_t *cluster = task_cpu_cluster(t); | ||
360 | int linked_on; | ||
361 | |||
362 | linked_on = tsk_rt(t)->linked_on; | ||
363 | |||
364 | /* If it is scheduled, then we need to reorder the CPU heap. */ | ||
365 | if (linked_on != NO_CPU) { | ||
366 | TRACE_TASK(t, "%s: linked on %d\n", | ||
367 | __FUNCTION__, linked_on); | ||
368 | /* Holder is scheduled; need to re-order CPUs. | ||
369 | * We can't use heap_decrease() here since | ||
370 | * the cpu_heap is ordered in reverse direction, so | ||
371 | * it is actually an increase. */ | ||
372 | bheap_delete(cpu_lower_prio, &cluster->cpu_heap, | ||
373 | remote_cpu(linked_on)->hn); | ||
374 | bheap_insert(cpu_lower_prio, &cluster->cpu_heap, | ||
375 | remote_cpu(linked_on)->hn); | ||
376 | } else { | ||
377 | /* holder may be queued: first stop queue changes */ | ||
378 | raw_spin_lock(&cluster->domain.release_lock); | ||
379 | if (is_queued(t)) { | ||
380 | TRACE_TASK(t, "%s: is queued\n", | ||
381 | __FUNCTION__); | ||
382 | bheap_decrease(edf_ready_order, | ||
383 | tsk_rt(t)->heap_node); | ||
384 | } else { | ||
385 | /* Nothing to do: if it is not queued and not linked | ||
386 | * then it is either sleeping or currently being moved | ||
387 | * by other code (e.g., a timer interrupt handler) that | ||
388 | * will use the correct priority when enqueuing the | ||
389 | * task. */ | ||
390 | TRACE_TASK(t, "%s: is NOT queued => Done.\n", | ||
391 | __FUNCTION__); | ||
392 | } | ||
393 | raw_spin_unlock(&cluster->domain.release_lock); | ||
394 | } | ||
395 | } | ||
396 | |||
397 | static void priority_lowered(struct task_struct* t) | ||
398 | { | ||
399 | /* assumption: t is not in a release heap */ | ||
400 | if (is_queued(t) || tsk_rt(t)->linked_on != NO_CPU) { | ||
401 | unlink(t); | ||
402 | requeue(t); | ||
403 | } | ||
404 | } | ||
405 | |||
406 | static void donate_priority(struct task_struct* recipient, struct task_struct* donor) | ||
407 | { | ||
408 | cedf_domain_t *cluster = task_cpu_cluster(donor); | ||
409 | |||
410 | BUG_ON(task_cpu_cluster(recipient) != task_cpu_cluster(donor)); | ||
411 | BUG_ON(tsk_rt(donor)->is_donor); | ||
412 | BUG_ON(tsk_rt(recipient)->is_donor); | ||
413 | BUG_ON(tsk_rt(donor)->inh_task); | ||
414 | BUG_ON(tsk_rt(recipient)->inh_task); | ||
415 | |||
416 | TRACE_TASK(donor, "priodon: becomes priority donor for %s/%d\n", | ||
417 | recipient->comm, recipient->pid); | ||
418 | |||
419 | /* swap priorities */ | ||
420 | tsk_rt(recipient)->inh_task = donor; | ||
421 | tsk_rt(donor)->inh_task = recipient; | ||
422 | tsk_rt(donor)->is_donor = 1; | ||
423 | |||
424 | priority_lowered(donor); | ||
425 | priority_raised(recipient); | ||
426 | |||
427 | bheap_uncache_min(edf_ready_order, | ||
428 | &cluster->domain.ready_queue); | ||
429 | } | ||
430 | |||
431 | /* assumption: new_donor has a higher priority than old_donor */ | ||
432 | static void switch_donor(struct task_struct* recipient, | ||
433 | struct task_struct* old_donor, | ||
434 | struct task_struct* new_donor) | ||
435 | { | ||
436 | TRACE_TASK(new_donor, "becomes donor for %s/%d instead of %s/%d\n", | ||
437 | recipient->comm, recipient->pid, old_donor->comm, old_donor->pid); | ||
438 | |||
439 | BUG_ON(tsk_rt(recipient)->inh_task != old_donor); | ||
440 | BUG_ON(tsk_rt(old_donor)->inh_task != recipient); | ||
441 | BUG_ON(tsk_rt(new_donor)->inh_task != NULL); | ||
442 | BUG_ON(tsk_rt(new_donor)->is_donor); | ||
443 | |||
444 | tsk_rt(old_donor)->inh_task = NULL; | ||
445 | tsk_rt(old_donor)->is_donor = 0; | ||
446 | |||
447 | tsk_rt(recipient)->inh_task = new_donor; | ||
448 | tsk_rt(new_donor)->inh_task = recipient; | ||
449 | tsk_rt(new_donor)->is_donor = 1; | ||
450 | |||
451 | priority_raised(recipient); | ||
452 | priority_raised(old_donor); | ||
453 | priority_lowered(new_donor); | ||
454 | } | ||
455 | |||
456 | static void undonate_priority(struct task_struct* recipient, struct task_struct* donor) | ||
457 | { | ||
458 | cedf_domain_t *cluster = task_cpu_cluster(donor); | ||
459 | |||
460 | BUG_ON(tsk_rt(recipient)->inh_task != donor); | ||
461 | BUG_ON(tsk_rt(donor)->inh_task != recipient); | ||
462 | |||
463 | TRACE_TASK(donor, "priodon: is no longer priority donor of %s/%d\n", | ||
464 | recipient->comm, recipient->pid); | ||
465 | |||
466 | tsk_rt(recipient)->inh_task = NULL; | ||
467 | tsk_rt(donor)->inh_task = NULL; | ||
468 | tsk_rt(donor)->is_donor = 0; | ||
469 | |||
470 | priority_lowered(recipient); | ||
471 | priority_raised(donor); | ||
472 | |||
473 | bheap_uncache_min(edf_ready_order, | ||
474 | &cluster->domain.ready_queue); | ||
475 | } | ||
476 | |||
477 | static inline void add_to_pending(cedf_domain_t* cluster, struct task_struct* t) | ||
478 | { | ||
479 | TRACE_TASK(t, "priodon: adding to pending heap wait:%u donor:%u req:%u pend:%d\n", | ||
480 | tsk_rt(t)->waiting_eligible, | ||
481 | tsk_rt(t)->is_donor, tsk_rt(t)->request_incomplete, | ||
482 | tsk_rt(t)->pending_on); | ||
483 | bheap_insert(edf_pending_order, | ||
484 | &cluster->pending_jobs, | ||
485 | tsk_rt(t)->pending_node); | ||
486 | } | ||
487 | |||
488 | static inline struct task_struct* take_pending(cedf_domain_t* cluster) | ||
489 | { | ||
490 | struct bheap_node* node; | ||
491 | node = bheap_take(edf_pending_order, &cluster->pending_jobs); | ||
492 | return node ? (struct task_struct*) node->value : NULL; | ||
493 | } | ||
494 | |||
495 | static inline struct task_struct* peek_pending(cedf_domain_t* cluster) | ||
496 | { | ||
497 | struct bheap_node* node; | ||
498 | node = bheap_peek(edf_pending_order, &cluster->pending_jobs); | ||
499 | return node ? (struct task_struct*) node->value : NULL; | ||
500 | } | ||
501 | |||
502 | static inline int fake_resume(struct task_struct* t) | ||
503 | { | ||
504 | TRACE_TASK(t, "priodon: fake resume wait:%u donor:%u\n", | ||
505 | tsk_rt(t)->waiting_eligible, tsk_rt(t)->is_donor); | ||
506 | /* Fake suspended. Let's resume it. */ | ||
507 | if (tsk_rt(t)->waiting_eligible) { | ||
508 | tsk_rt(t)->waiting_eligible = 0; | ||
509 | if (tsk_rt(t)->scheduled_on == NO_CPU) { | ||
510 | /* it was removed from the queue */ | ||
511 | requeue(t); | ||
512 | return 1; | ||
513 | } | ||
514 | } | ||
515 | return 0; | ||
516 | } | ||
517 | |||
518 | |||
519 | /* Lazily update set of highest-priority pending jobs. | ||
520 | * Returns 1 if priority recheck is required. | ||
521 | */ | ||
522 | static int update_pending_job(cedf_domain_t* cluster, | ||
523 | struct task_struct* to_be_linked) | ||
524 | { | ||
525 | cpu_entry_t* entry; | ||
526 | struct task_struct* lowest_hp; /* lowest-priority high-priority task */ | ||
527 | struct task_struct* highest_lp; /* highest-priority low-priority task */ | ||
528 | int reeval = 0; | ||
529 | |||
530 | entry = lowest_pending_cpu(cluster); | ||
531 | lowest_hp = entry->pending; | ||
532 | |||
533 | if (to_be_linked && !is_pending(to_be_linked)) | ||
534 | /* not yet accounted for, stick in heap */ | ||
535 | add_to_pending(cluster, to_be_linked); | ||
536 | |||
537 | highest_lp = peek_pending(cluster); | ||
538 | if (edf_higher_base_prio(highest_lp, lowest_hp)) { | ||
539 | /* yep, should be become of the c highest-prior pending jobs */ | ||
540 | |||
541 | TRACE_TASK(highest_lp, | ||
542 | "priodon: became one of the %u highest-prio tasks (P%d, req:%u) X\n", | ||
543 | cluster->num_cpus, | ||
544 | entry->cpu, | ||
545 | tsk_rt(highest_lp)->request_incomplete); | ||
546 | |||
547 | /* get it out of the heap */ | ||
548 | highest_lp = take_pending(cluster); | ||
549 | |||
550 | BUG_ON(highest_lp == lowest_hp); | ||
551 | |||
552 | /* it should never be a priority donor at this point */ | ||
553 | BUG_ON(tsk_rt(highest_lp)->is_donor); | ||
554 | |||
555 | entry->pending = highest_lp; | ||
556 | update_pending_position(entry); | ||
557 | tsk_rt(highest_lp)->pending_on = entry->cpu; | ||
558 | |||
559 | /* things that could happen: | ||
560 | * | ||
561 | * 1) lowest_hp has no donor, but is in a request => highest_lp becomes donor | ||
562 | * 2) lowest_hp is donor => highest_lp becomes new donor, old donor is resumed if suspended | ||
563 | * 3) lowest_hp is not in a request, and highest_lp is waiting => highest_lp is resumed | ||
564 | * 4) lowest_hp is not in a request, and highest_lp is not waiting => nothing to do | ||
565 | * 5) highest_lp has a priority donor => resume its donor | ||
566 | */ | ||
567 | |||
568 | /* do we need to put it back? */ | ||
569 | if (lowest_hp) { | ||
570 | TRACE_TASK(lowest_hp, | ||
571 | "priodon: no longer among %u highest-prio tasks req:%u\n", | ||
572 | cluster->num_cpus, | ||
573 | tsk_rt(lowest_hp)->request_incomplete); | ||
574 | tsk_rt(lowest_hp)->pending_on = NO_CPU; | ||
575 | add_to_pending(cluster, lowest_hp); | ||
576 | |||
577 | |||
578 | if (tsk_rt(lowest_hp)->request_incomplete) { | ||
579 | /* case 1) */ | ||
580 | donate_priority(lowest_hp, highest_lp); | ||
581 | reeval = 1; | ||
582 | } else if (tsk_rt(lowest_hp)->inh_task) { | ||
583 | /* case 2) */ | ||
584 | switch_donor(tsk_rt(lowest_hp)->inh_task, | ||
585 | lowest_hp, highest_lp); | ||
586 | fake_resume(lowest_hp); | ||
587 | reeval = 1; | ||
588 | } | ||
589 | } | ||
590 | |||
591 | |||
592 | if (!tsk_rt(highest_lp)->is_donor) { | ||
593 | if (tsk_rt(highest_lp)->waiting_eligible) { | ||
594 | /* case 3) */ | ||
595 | reeval = fake_resume(highest_lp); | ||
596 | BUG_ON(tsk_rt(highest_lp)->inh_task); | ||
597 | } else if (tsk_rt(highest_lp)->inh_task) { | ||
598 | /* case 5 */ | ||
599 | struct task_struct* donor = tsk_rt(highest_lp)->inh_task; | ||
600 | undonate_priority(highest_lp, donor); | ||
601 | reeval = fake_resume(donor); | ||
602 | } | ||
603 | } | ||
604 | } | ||
605 | |||
606 | return reeval; | ||
607 | } | ||
608 | |||
609 | /* job has exited => no longer pending */ | ||
610 | |||
611 | static void job_pending_exit(struct task_struct* t) | ||
612 | { | ||
613 | cedf_domain_t *cluster; | ||
614 | cpu_entry_t* entry; | ||
615 | |||
616 | TRACE_TASK(t, "priodon: is no longer pending (pending_on:%d, queued:%d)\n", | ||
617 | tsk_rt(t)->pending_on, in_pending_heap(t)); | ||
618 | |||
619 | cluster = task_cpu_cluster(t); | ||
620 | |||
621 | if (tsk_rt(t)->pending_on != NO_CPU) { | ||
622 | entry = &per_cpu(cedf_cpu_entries, tsk_rt(t)->pending_on); | ||
623 | tsk_rt(t)->pending_on = NO_CPU; | ||
624 | entry->pending = NULL; | ||
625 | update_pending_position(entry); | ||
626 | |||
627 | /* let's see if anything changed */ | ||
628 | update_pending_job(cluster, NULL); | ||
629 | } else if (in_pending_heap(t)) { | ||
630 | bheap_delete(edf_pending_order, &cluster->pending_jobs, | ||
631 | tsk_rt(t)->pending_node); | ||
632 | } | ||
633 | } | ||
634 | |||
635 | #endif | ||
636 | |||
637 | |||
638 | /* cedf_job_arrival: task is either resumed or released */ | ||
639 | static noinline void cedf_job_arrival(struct task_struct* task) | ||
640 | { | ||
641 | cedf_domain_t *cluster = task_cpu_cluster(task); | ||
642 | BUG_ON(!task); | ||
643 | |||
644 | requeue(task); | ||
645 | check_for_preemptions(cluster); | ||
646 | } | ||
647 | |||
648 | |||
649 | static void cedf_release_jobs(rt_domain_t* rt, struct bheap* tasks) | ||
650 | { | ||
651 | cedf_domain_t* cluster = container_of(rt, cedf_domain_t, domain); | ||
652 | unsigned long flags; | ||
653 | |||
654 | raw_spin_lock_irqsave(&cluster->cluster_lock, flags); | ||
655 | |||
656 | __merge_ready(&cluster->domain, tasks); | ||
657 | check_for_preemptions(cluster); | ||
658 | |||
659 | raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags); | ||
660 | } | ||
661 | |||
662 | /* caller holds cedf_lock */ | ||
663 | static noinline void job_completion(struct task_struct *t, int forced) | ||
664 | { | ||
665 | BUG_ON(!t); | ||
666 | |||
667 | sched_trace_task_completion(t, forced); | ||
668 | |||
669 | TRACE_TASK(t, "job_completion().\n"); | ||
670 | |||
671 | #ifdef CONFIG_LITMUS_LOCKING | ||
672 | job_pending_exit(t); | ||
673 | #endif | ||
674 | |||
675 | /* prepare for next period */ | ||
676 | prepare_for_next_period(t); | ||
677 | if (is_released(t, litmus_clock())) | ||
678 | sched_trace_task_release(t); | ||
679 | /* unlink */ | ||
680 | unlink(t); | ||
681 | /* requeue | ||
682 | * But don't requeue a blocking task. */ | ||
683 | set_rt_flags(t, RT_F_RUNNING); | ||
684 | if (is_running(t)) | ||
685 | cedf_job_arrival(t); | ||
686 | } | ||
687 | |||
688 | /* cedf_tick - this function is called for every local timer | ||
689 | * interrupt. | ||
690 | * | ||
691 | * checks whether the current task has expired and checks | ||
692 | * whether we need to preempt it if it has not expired | ||
693 | */ | ||
694 | static void cedf_tick(struct task_struct* t) | ||
695 | { | ||
696 | if (is_realtime(t) && budget_enforced(t) && budget_exhausted(t)) { | ||
697 | if (!is_np(t)) { | ||
698 | /* np tasks will be preempted when they become | ||
699 | * preemptable again | ||
700 | */ | ||
701 | litmus_reschedule_local(); | ||
702 | TRACE("cedf_scheduler_tick: " | ||
703 | "%d is preemptable " | ||
704 | " => FORCE_RESCHED\n", t->pid); | ||
705 | } else if (is_user_np(t)) { | ||
706 | TRACE("cedf_scheduler_tick: " | ||
707 | "%d is non-preemptable, " | ||
708 | "preemption delayed.\n", t->pid); | ||
709 | request_exit_np(t); | ||
710 | } | ||
711 | } | ||
712 | } | ||
713 | |||
714 | /* Getting schedule() right is a bit tricky. schedule() may not make any | ||
715 | * assumptions on the state of the current task since it may be called for a | ||
716 | * number of reasons. The reasons include a scheduler_tick() determined that it | ||
717 | * was necessary, because sys_exit_np() was called, because some Linux | ||
718 | * subsystem determined so, or even (in the worst case) because there is a bug | ||
719 | * hidden somewhere. Thus, we must take extreme care to determine what the | ||
720 | * current state is. | ||
721 | * | ||
722 | * The CPU could currently be scheduling a task (or not), be linked (or not). | ||
723 | * | ||
724 | * The following assertions for the scheduled task could hold: | ||
725 | * | ||
726 | * - !is_running(scheduled) // the job blocks | ||
727 | * - scheduled->timeslice == 0 // the job completed (forcefully) | ||
728 | * - get_rt_flag() == RT_F_SLEEP // the job completed (by syscall) | ||
729 | * - linked != scheduled // we need to reschedule (for any reason) | ||
730 | * - is_np(scheduled) // rescheduling must be delayed, | ||
731 | * sys_exit_np must be requested | ||
732 | * | ||
733 | * Any of these can occur together. | ||
734 | */ | ||
735 | static struct task_struct* cedf_schedule(struct task_struct * prev) | ||
736 | { | ||
737 | cpu_entry_t* entry = &__get_cpu_var(cedf_cpu_entries); | ||
738 | cedf_domain_t *cluster = entry->cluster; | ||
739 | int out_of_time, sleep, preempt, np, exists, blocks; | ||
740 | struct task_struct* next = NULL; | ||
741 | |||
742 | #ifdef CONFIG_LITMUS_LOCKING | ||
743 | int priodon; | ||
744 | #else | ||
745 | #define priodon 0 | ||
746 | #endif | ||
747 | |||
748 | #ifdef CONFIG_RELEASE_MASTER | ||
749 | /* Bail out early if we are the release master. | ||
750 | * The release master never schedules any real-time tasks. | ||
751 | */ | ||
752 | if (cluster->domain.release_master == entry->cpu) { | ||
753 | sched_state_task_picked(); | ||
754 | return NULL; | ||
755 | } | ||
756 | #endif | ||
757 | |||
758 | raw_spin_lock(&cluster->cluster_lock); | ||
759 | |||
760 | /* sanity checking */ | ||
761 | BUG_ON(entry->scheduled && entry->scheduled != prev); | ||
762 | BUG_ON(entry->scheduled && !is_realtime(prev)); | ||
763 | BUG_ON(is_realtime(prev) && !entry->scheduled); | ||
764 | |||
765 | /* (0) Determine state */ | ||
766 | exists = entry->scheduled != NULL; | ||
767 | blocks = exists && !is_running(entry->scheduled); | ||
768 | out_of_time = exists && | ||
769 | budget_enforced(entry->scheduled) && | ||
770 | budget_exhausted(entry->scheduled); | ||
771 | np = exists && is_np(entry->scheduled); | ||
772 | sleep = exists && get_rt_flags(entry->scheduled) == RT_F_SLEEP; | ||
773 | preempt = entry->scheduled != entry->linked; | ||
774 | |||
775 | #ifdef CONFIG_LITMUS_LOCKING | ||
776 | priodon = exists && (tsk_rt(entry->scheduled)->waiting_eligible || | ||
777 | /* can't allow job to exit until request is over */ | ||
778 | (tsk_rt(entry->scheduled)->is_donor && sleep)); | ||
779 | |||
780 | /* this should never happend together (at least we don't handle it atm) */ | ||
781 | BUG_ON(priodon && blocks); | ||
782 | #endif | ||
783 | |||
784 | #ifdef WANT_ALL_SCHED_EVENTS | ||
785 | TRACE_TASK(prev, "invoked cedf_schedule.\n"); | ||
786 | #endif | ||
787 | |||
788 | if (exists) | ||
789 | TRACE_TASK(prev, | ||
790 | "blocks:%d out_of_time:%d np:%d sleep:%d preempt:%d " | ||
791 | "state:%d sig:%d priodon:%d\n", | ||
792 | blocks, out_of_time, np, sleep, preempt, | ||
793 | prev->state, signal_pending(prev), priodon); | ||
794 | if (entry->linked && preempt) | ||
795 | TRACE_TASK(prev, "will be preempted by %s/%d\n", | ||
796 | entry->linked->comm, entry->linked->pid); | ||
797 | |||
798 | |||
799 | /* If a task blocks we have no choice but to reschedule. | ||
800 | */ | ||
801 | if (blocks || priodon) | ||
802 | unlink(entry->scheduled); | ||
803 | |||
804 | /* Request a sys_exit_np() call if we would like to preempt but cannot. | ||
805 | * Do not unlink since entry->scheduled is currently in the ready queue. | ||
806 | * We don't process out_of_time and sleep until the job is preemptive again. | ||
807 | */ | ||
808 | if (np && (out_of_time || preempt || sleep)) { | ||
809 | request_exit_np(entry->scheduled); | ||
810 | } | ||
811 | |||
812 | /* Any task that is preemptable and either exhausts its execution | ||
813 | * budget or wants to sleep completes. We may have to reschedule after | ||
814 | * this. Don't do a job completion if we block (can't have timers running | ||
815 | * for blocked jobs). Preemption go first for the same reason. | ||
816 | */ | ||
817 | if (!np && (out_of_time || sleep) && !blocks && !preempt | ||
818 | && !priodon) | ||
819 | /* note: priority donation prevents job completion */ | ||
820 | job_completion(entry->scheduled, !sleep); | ||
821 | |||
822 | /* Link pending task if we became unlinked. | ||
823 | */ | ||
824 | |||
825 | if (!entry->linked) { | ||
826 | #ifdef CONFIG_LITMUS_LOCKING | ||
827 | struct task_struct *pulled; | ||
828 | int reeval; | ||
829 | do { | ||
830 | pulled = __take_ready(&cluster->domain); | ||
831 | reeval = 0; | ||
832 | if (pulled && !is_pending(pulled)) { | ||
833 | /* Pulled an un-processed task from the ready queue. */ | ||
834 | TRACE_TASK(pulled, "pulled unprocessed\n"); | ||
835 | reeval = update_pending_job(cluster, pulled); | ||
836 | if (reeval) | ||
837 | /* priority may have changed --- try again */ | ||
838 | requeue(pulled); | ||
839 | } | ||
840 | } while (reeval); | ||
841 | link_task_to_cpu(pulled, entry); | ||
842 | #else | ||
843 | link_task_to_cpu(__take_ready(&cluster->domain), entry); | ||
844 | #endif | ||
845 | } | ||
846 | |||
847 | /* The final scheduling decision. Do we need to switch for some reason? | ||
848 | * If linked is different from scheduled, then select linked as next. | ||
849 | */ | ||
850 | if ((!np || blocks || priodon) && | ||
851 | entry->linked != entry->scheduled) { | ||
852 | /* Schedule a linked job? */ | ||
853 | if (entry->linked) { | ||
854 | entry->linked->rt_param.scheduled_on = entry->cpu; | ||
855 | next = entry->linked; | ||
856 | } | ||
857 | if (entry->scheduled) { | ||
858 | /* not gonna be scheduled soon */ | ||
859 | entry->scheduled->rt_param.scheduled_on = NO_CPU; | ||
860 | TRACE_TASK(entry->scheduled, "scheduled_on = NO_CPU\n"); | ||
861 | } | ||
862 | } else | ||
863 | /* Only override Linux scheduler if we have a real-time task | ||
864 | * scheduled that needs to continue. | ||
865 | */ | ||
866 | if (exists) | ||
867 | next = prev; | ||
868 | |||
869 | sched_state_task_picked(); | ||
870 | raw_spin_unlock(&cluster->cluster_lock); | ||
871 | |||
872 | #ifdef WANT_ALL_SCHED_EVENTS | ||
873 | TRACE("cedf_lock released, next=0x%p\n", next); | ||
874 | |||
875 | if (next) | ||
876 | TRACE_TASK(next, "scheduled at %llu\n", litmus_clock()); | ||
877 | else if (exists && !next) | ||
878 | TRACE("becomes idle at %llu.\n", litmus_clock()); | ||
879 | #endif | ||
880 | |||
881 | |||
882 | return next; | ||
883 | } | ||
884 | |||
885 | |||
886 | /* _finish_switch - we just finished the switch away from prev | ||
887 | */ | ||
888 | static void cedf_finish_switch(struct task_struct *prev) | ||
889 | { | ||
890 | cpu_entry_t* entry = &__get_cpu_var(cedf_cpu_entries); | ||
891 | |||
892 | entry->scheduled = is_realtime(current) ? current : NULL; | ||
893 | #ifdef WANT_ALL_SCHED_EVENTS | ||
894 | TRACE_TASK(prev, "switched away from\n"); | ||
895 | #endif | ||
896 | } | ||
897 | |||
898 | |||
899 | /* Prepare a task for running in RT mode | ||
900 | */ | ||
901 | static void cedf_task_new(struct task_struct * t, int on_rq, int running) | ||
902 | { | ||
903 | unsigned long flags; | ||
904 | cpu_entry_t* entry; | ||
905 | cedf_domain_t* cluster; | ||
906 | |||
907 | TRACE("gsn edf: task new %d\n", t->pid); | ||
908 | |||
909 | /* the cluster doesn't change even if t is running */ | ||
910 | cluster = task_cpu_cluster(t); | ||
911 | |||
912 | raw_spin_lock_irqsave(&cluster->cluster_lock, flags); | ||
913 | |||
914 | /* setup job params */ | ||
915 | release_at(t, litmus_clock()); | ||
916 | |||
917 | #ifdef CONFIG_LITMUS_LOCKING | ||
918 | tsk_rt(t)->pending_node = bheap_node_alloc(GFP_ATOMIC | __GFP_NOFAIL); | ||
919 | bheap_node_init(&tsk_rt(t)->pending_node, t); | ||
920 | tsk_rt(t)->pending_on = NO_CPU; | ||
921 | add_to_pending(cluster, t); | ||
922 | #endif | ||
923 | |||
924 | if (running) { | ||
925 | entry = &per_cpu(cedf_cpu_entries, task_cpu(t)); | ||
926 | BUG_ON(entry->scheduled); | ||
927 | |||
928 | #ifdef CONFIG_RELEASE_MASTER | ||
929 | if (entry->cpu != cluster->domain.release_master) { | ||
930 | #endif | ||
931 | entry->scheduled = t; | ||
932 | tsk_rt(t)->scheduled_on = task_cpu(t); | ||
933 | #ifdef CONFIG_RELEASE_MASTER | ||
934 | } else { | ||
935 | /* do not schedule on release master */ | ||
936 | preempt(entry); /* force resched */ | ||
937 | tsk_rt(t)->scheduled_on = NO_CPU; | ||
938 | } | ||
939 | #endif | ||
940 | } else { | ||
941 | t->rt_param.scheduled_on = NO_CPU; | ||
942 | } | ||
943 | t->rt_param.linked_on = NO_CPU; | ||
944 | |||
945 | cedf_job_arrival(t); | ||
946 | raw_spin_unlock_irqrestore(&(cluster->cluster_lock), flags); | ||
947 | } | ||
948 | |||
949 | static void cedf_task_wake_up(struct task_struct *task) | ||
950 | { | ||
951 | unsigned long flags; | ||
952 | lt_t now; | ||
953 | cedf_domain_t *cluster; | ||
954 | |||
955 | TRACE_TASK(task, "wake_up at %llu\n", litmus_clock()); | ||
956 | |||
957 | cluster = task_cpu_cluster(task); | ||
958 | |||
959 | raw_spin_lock_irqsave(&cluster->cluster_lock, flags); | ||
960 | /* We need to take suspensions because of semaphores into | ||
961 | * account! If a job resumes after being suspended due to acquiring | ||
962 | * a semaphore, it should never be treated as a new job release. | ||
963 | */ | ||
964 | if (get_rt_flags(task) == RT_F_EXIT_SEM) { | ||
965 | set_rt_flags(task, RT_F_RUNNING); | ||
966 | } else { | ||
967 | now = litmus_clock(); | ||
968 | if (is_tardy(task, now)) { | ||
969 | /* new sporadic release */ | ||
970 | release_at(task, now); | ||
971 | sched_trace_task_release(task); | ||
972 | } | ||
973 | else { | ||
974 | if (task->rt.time_slice) { | ||
975 | /* came back in time before deadline | ||
976 | */ | ||
977 | set_rt_flags(task, RT_F_RUNNING); | ||
978 | } | ||
979 | } | ||
980 | } | ||
981 | cedf_job_arrival(task); | ||
982 | raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags); | ||
983 | } | ||
984 | |||
985 | static void cedf_task_block(struct task_struct *t) | ||
986 | { | ||
987 | unsigned long flags; | ||
988 | cedf_domain_t *cluster; | ||
989 | |||
990 | TRACE_TASK(t, "block at %llu\n", litmus_clock()); | ||
991 | |||
992 | cluster = task_cpu_cluster(t); | ||
993 | |||
994 | /* unlink if necessary */ | ||
995 | raw_spin_lock_irqsave(&cluster->cluster_lock, flags); | ||
996 | unlink(t); | ||
997 | raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags); | ||
998 | |||
999 | BUG_ON(!is_realtime(t)); | ||
1000 | } | ||
1001 | |||
1002 | #ifdef CONFIG_LITMUS_LOCKING | ||
1003 | static void cedf_pre_setsched(struct task_struct *t, int policy) | ||
1004 | { | ||
1005 | |||
1006 | unsigned long flags; | ||
1007 | cedf_domain_t *cluster = task_cpu_cluster(t); | ||
1008 | |||
1009 | int delay_donor_exit = 0; | ||
1010 | |||
1011 | while (1) { | ||
1012 | raw_spin_lock_irqsave(&cluster->cluster_lock, flags); | ||
1013 | |||
1014 | TRACE_CUR("cedf_pre_setsched wait:%u pend:%d donor:%u req:%u\n", | ||
1015 | tsk_rt(t)->waiting_eligible, | ||
1016 | tsk_rt(t)->pending_on, tsk_rt(t)->is_donor, | ||
1017 | tsk_rt(t)->request_incomplete); | ||
1018 | |||
1019 | delay_donor_exit = tsk_rt(current)->is_donor; | ||
1020 | |||
1021 | raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags); | ||
1022 | |||
1023 | if (!delay_donor_exit) | ||
1024 | break; | ||
1025 | |||
1026 | TRACE_CUR("donor exit delay\n"); | ||
1027 | set_current_state(TASK_INTERRUPTIBLE); | ||
1028 | schedule_timeout(HZ); | ||
1029 | } | ||
1030 | } | ||
1031 | #endif | ||
1032 | |||
1033 | static void cedf_task_exit(struct task_struct * t) | ||
1034 | { | ||
1035 | unsigned long flags; | ||
1036 | cedf_domain_t *cluster = task_cpu_cluster(t); | ||
1037 | |||
1038 | /* unlink if necessary */ | ||
1039 | raw_spin_lock_irqsave(&cluster->cluster_lock, flags); | ||
1040 | |||
1041 | unlink(t); | ||
1042 | |||
1043 | #ifdef CONFIG_LITMUS_LOCKING | ||
1044 | /* make sure it's not pending anymore */ | ||
1045 | job_pending_exit(t); | ||
1046 | bheap_node_free(tsk_rt(t)->pending_node); | ||
1047 | #endif | ||
1048 | |||
1049 | if (tsk_rt(t)->scheduled_on != NO_CPU) { | ||
1050 | cpu_entry_t *cpu; | ||
1051 | cpu = &per_cpu(cedf_cpu_entries, tsk_rt(t)->scheduled_on); | ||
1052 | cpu->scheduled = NULL; | ||
1053 | tsk_rt(t)->scheduled_on = NO_CPU; | ||
1054 | } | ||
1055 | raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags); | ||
1056 | |||
1057 | |||
1058 | BUG_ON(!is_realtime(t)); | ||
1059 | TRACE_TASK(t, "RIP\n"); | ||
1060 | } | ||
1061 | |||
1062 | #ifdef CONFIG_LITMUS_LOCKING | ||
1063 | |||
1064 | #include <litmus/fdso.h> | ||
1065 | #include <litmus/locking.h> | ||
1066 | |||
1067 | /* NOTE: we use fake suspensions because we must wake the task from within the | ||
1068 | * scheduler */ | ||
1069 | |||
1070 | /* suspend until the current task becomes eligible to issue a lock request */ | ||
1071 | static void priodon_become_eligible(void) | ||
1072 | { | ||
1073 | struct task_struct* t = current; | ||
1074 | unsigned long flags; | ||
1075 | cedf_domain_t *cluster; | ||
1076 | |||
1077 | cluster = task_cpu_cluster(t); | ||
1078 | |||
1079 | do { | ||
1080 | TRACE_CUR("priodon: checking whether request may be issued\n"); | ||
1081 | raw_spin_lock_irqsave(&cluster->cluster_lock, flags); | ||
1082 | |||
1083 | if (tsk_rt(t)->pending_on == NO_CPU || | ||
1084 | tsk_rt(t)->is_donor) { | ||
1085 | /* nope, gotta wait */ | ||
1086 | tsk_rt(t)->waiting_eligible = 1; | ||
1087 | TRACE_CUR("priodon: not eligible pend:%u donor:%u\n", | ||
1088 | tsk_rt(t)->pending_on, tsk_rt(t)->is_donor); | ||
1089 | } else { | ||
1090 | /* alright! we are good to go! */ | ||
1091 | tsk_rt(t)->request_incomplete = 1; | ||
1092 | TRACE_CUR("priodon: request issued\n"); | ||
1093 | } | ||
1094 | |||
1095 | raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags); | ||
1096 | |||
1097 | if (tsk_rt(t)->waiting_eligible) { | ||
1098 | TRACE_CUR("priodon: fake suspending\n"); | ||
1099 | TS_LOCK_SUSPEND; | ||
1100 | schedule(); | ||
1101 | TS_LOCK_RESUME; | ||
1102 | } | ||
1103 | |||
1104 | } while (!tsk_rt(t)->request_incomplete); | ||
1105 | } | ||
1106 | |||
1107 | /* current task has completed its request */ | ||
1108 | static void priodon_complete_request(void) | ||
1109 | { | ||
1110 | struct task_struct* t = current; | ||
1111 | struct task_struct* donor; | ||
1112 | unsigned long flags; | ||
1113 | cedf_domain_t *cluster; | ||
1114 | |||
1115 | cluster = task_cpu_cluster(t); | ||
1116 | |||
1117 | preempt_disable(); | ||
1118 | |||
1119 | raw_spin_lock_irqsave(&cluster->cluster_lock, flags); | ||
1120 | |||
1121 | TRACE_CUR("priodon: completing request\n"); | ||
1122 | |||
1123 | if (tsk_rt(t)->inh_task) { | ||
1124 | /* we have a donor job --- see if we need to wake it */ | ||
1125 | donor = tsk_rt(t)->inh_task; | ||
1126 | undonate_priority(t, donor); | ||
1127 | |||
1128 | if (fake_resume(donor)) | ||
1129 | check_for_preemptions(cluster); | ||
1130 | } | ||
1131 | |||
1132 | tsk_rt(t)->request_incomplete = 0; | ||
1133 | |||
1134 | raw_spin_unlock_irqrestore(&cluster->cluster_lock, flags); | ||
1135 | |||
1136 | preempt_enable(); | ||
1137 | } | ||
1138 | |||
1139 | /* struct for semaphore with priority inheritance */ | ||
1140 | struct omlp_semaphore { | ||
1141 | struct litmus_lock litmus_lock; | ||
1142 | |||
1143 | /* current resource holder */ | ||
1144 | struct task_struct *owner; | ||
1145 | |||
1146 | /* FIFO queue of waiting tasks */ | ||
1147 | wait_queue_head_t fifo_wait; | ||
1148 | }; | ||
1149 | |||
1150 | static inline struct omlp_semaphore* omlp_from_lock(struct litmus_lock* lock) | ||
1151 | { | ||
1152 | return container_of(lock, struct omlp_semaphore, litmus_lock); | ||
1153 | } | ||
1154 | |||
1155 | static int cedf_omlp_lock(struct litmus_lock* l) | ||
1156 | { | ||
1157 | struct task_struct* t = current; | ||
1158 | struct omlp_semaphore *sem = omlp_from_lock(l); | ||
1159 | wait_queue_t wait; | ||
1160 | unsigned long flags; | ||
1161 | |||
1162 | if (!is_realtime(t)) | ||
1163 | return -EPERM; | ||
1164 | |||
1165 | priodon_become_eligible(); | ||
1166 | |||
1167 | spin_lock_irqsave(&sem->fifo_wait.lock, flags); | ||
1168 | |||
1169 | if (sem->owner) { | ||
1170 | /* resource is not free => must suspend and wait */ | ||
1171 | |||
1172 | init_waitqueue_entry(&wait, t); | ||
1173 | |||
1174 | set_task_state(t, TASK_UNINTERRUPTIBLE); | ||
1175 | |||
1176 | __add_wait_queue_tail_exclusive(&sem->fifo_wait, &wait); | ||
1177 | |||
1178 | TS_LOCK_SUSPEND; | ||
1179 | |||
1180 | spin_unlock_irqrestore(&sem->fifo_wait.lock, flags); | ||
1181 | |||
1182 | schedule(); | ||
1183 | |||
1184 | TS_LOCK_RESUME; | ||
1185 | |||
1186 | BUG_ON(sem->owner != t); | ||
1187 | } else { | ||
1188 | /* it's ours now */ | ||
1189 | sem->owner = t; | ||
1190 | |||
1191 | spin_unlock_irqrestore(&sem->fifo_wait.lock, flags); | ||
1192 | } | ||
1193 | |||
1194 | return 0; | ||
1195 | } | ||
1196 | |||
1197 | static int cedf_omlp_unlock(struct litmus_lock* l) | ||
1198 | { | ||
1199 | struct task_struct *t = current, *next; | ||
1200 | struct omlp_semaphore *sem = omlp_from_lock(l); | ||
1201 | unsigned long flags; | ||
1202 | int err = 0; | ||
1203 | |||
1204 | spin_lock_irqsave(&sem->fifo_wait.lock, flags); | ||
1205 | |||
1206 | if (sem->owner != t) { | ||
1207 | err = -EINVAL; | ||
1208 | spin_unlock_irqrestore(&sem->fifo_wait.lock, flags); | ||
1209 | goto out; | ||
1210 | } | ||
1211 | |||
1212 | /* check if there are jobs waiting for this resource */ | ||
1213 | next = __waitqueue_remove_first(&sem->fifo_wait); | ||
1214 | if (next) { | ||
1215 | /* next becomes the resouce holder */ | ||
1216 | sem->owner = next; | ||
1217 | TRACE_CUR("lock ownership passed to %s/%d\n", next->comm, next->pid); | ||
1218 | |||
1219 | /* wake up next */ | ||
1220 | wake_up_process(next); | ||
1221 | } else | ||
1222 | /* becomes available */ | ||
1223 | sem->owner = NULL; | ||
1224 | |||
1225 | spin_unlock_irqrestore(&sem->fifo_wait.lock, flags); | ||
1226 | |||
1227 | priodon_complete_request(); | ||
1228 | |||
1229 | out: | ||
1230 | return err; | ||
1231 | } | ||
1232 | |||
1233 | static int cedf_omlp_close(struct litmus_lock* l) | ||
1234 | { | ||
1235 | struct task_struct *t = current; | ||
1236 | struct omlp_semaphore *sem = omlp_from_lock(l); | ||
1237 | unsigned long flags; | ||
1238 | |||
1239 | int owner; | ||
1240 | |||
1241 | spin_lock_irqsave(&sem->fifo_wait.lock, flags); | ||
1242 | |||
1243 | owner = sem->owner == t; | ||
1244 | |||
1245 | spin_unlock_irqrestore(&sem->fifo_wait.lock, flags); | ||
1246 | |||
1247 | if (owner) | ||
1248 | cedf_omlp_unlock(l); | ||
1249 | |||
1250 | return 0; | ||
1251 | } | ||
1252 | |||
1253 | static void cedf_omlp_free(struct litmus_lock* lock) | ||
1254 | { | ||
1255 | kfree(omlp_from_lock(lock)); | ||
1256 | } | ||
1257 | |||
1258 | static struct litmus_lock_ops cedf_omlp_lock_ops = { | ||
1259 | .close = cedf_omlp_close, | ||
1260 | .lock = cedf_omlp_lock, | ||
1261 | .unlock = cedf_omlp_unlock, | ||
1262 | .deallocate = cedf_omlp_free, | ||
1263 | }; | ||
1264 | |||
1265 | static struct litmus_lock* cedf_new_omlp(void) | ||
1266 | { | ||
1267 | struct omlp_semaphore* sem; | ||
1268 | |||
1269 | sem = kmalloc(sizeof(*sem), GFP_KERNEL); | ||
1270 | if (!sem) | ||
1271 | return NULL; | ||
1272 | |||
1273 | sem->owner = NULL; | ||
1274 | init_waitqueue_head(&sem->fifo_wait); | ||
1275 | sem->litmus_lock.ops = &cedf_omlp_lock_ops; | ||
1276 | |||
1277 | return &sem->litmus_lock; | ||
1278 | } | ||
1279 | |||
1280 | static long cedf_allocate_lock(struct litmus_lock **lock, int type, | ||
1281 | void* __user unused) | ||
1282 | { | ||
1283 | int err = -ENXIO; | ||
1284 | |||
1285 | switch (type) { | ||
1286 | |||
1287 | case OMLP_SEM: | ||
1288 | /* O(m) Multiprocessor Locking Protocol */ | ||
1289 | *lock = cedf_new_omlp(); | ||
1290 | if (*lock) | ||
1291 | err = 0; | ||
1292 | else | ||
1293 | err = -ENOMEM; | ||
1294 | break; | ||
1295 | |||
1296 | }; | ||
1297 | |||
1298 | return err; | ||
1299 | } | ||
1300 | |||
1301 | |||
1302 | #endif | ||
1303 | |||
1304 | static long cedf_admit_task(struct task_struct* tsk) | ||
1305 | { | ||
1306 | if (task_cpu(tsk) == tsk->rt_param.task_params.cpu) { | ||
1307 | #ifdef CONFIG_LITMUS_LOCKING | ||
1308 | |||
1309 | #endif | ||
1310 | return 0; | ||
1311 | } | ||
1312 | else | ||
1313 | return -EINVAL; | ||
1314 | } | ||
1315 | |||
1316 | /* total number of cluster */ | ||
1317 | static int num_clusters; | ||
1318 | /* we do not support cluster of different sizes */ | ||
1319 | static unsigned int cluster_size; | ||
1320 | |||
1321 | #ifdef VERBOSE_INIT | ||
1322 | static void print_cluster_topology(cpumask_var_t mask, int cpu) | ||
1323 | { | ||
1324 | int chk; | ||
1325 | char buf[255]; | ||
1326 | |||
1327 | chk = cpulist_scnprintf(buf, 254, mask); | ||
1328 | buf[chk] = '\0'; | ||
1329 | printk(KERN_INFO "CPU = %d, shared cpu(s) = %s\n", cpu, buf); | ||
1330 | |||
1331 | } | ||
1332 | #endif | ||
1333 | |||
1334 | static int clusters_allocated = 0; | ||
1335 | |||
1336 | static void cleanup_cedf(void) | ||
1337 | { | ||
1338 | int i; | ||
1339 | |||
1340 | if (clusters_allocated) { | ||
1341 | for (i = 0; i < num_clusters; i++) { | ||
1342 | free_cpumask_var(cedf[i].cpu_map); | ||
1343 | } | ||
1344 | |||
1345 | kfree(cedf); | ||
1346 | } | ||
1347 | } | ||
1348 | |||
1349 | static long cedf_activate_plugin(void) | ||
1350 | { | ||
1351 | int i, j, cpu, ccpu, cpu_count; | ||
1352 | cpu_entry_t *entry; | ||
1353 | |||
1354 | cpumask_var_t mask; | ||
1355 | int chk = 0; | ||
1356 | |||
1357 | /* de-allocate old clusters, if any */ | ||
1358 | cleanup_cedf(); | ||
1359 | |||
1360 | printk(KERN_INFO "C-EDF: Activate Plugin, cluster configuration = %d\n", | ||
1361 | cluster_config); | ||
1362 | |||
1363 | /* need to get cluster_size first */ | ||
1364 | if(!zalloc_cpumask_var(&mask, GFP_ATOMIC)) | ||
1365 | return -ENOMEM; | ||
1366 | |||
1367 | if (unlikely(cluster_config == GLOBAL_CLUSTER)) { | ||
1368 | cluster_size = num_online_cpus(); | ||
1369 | } else { | ||
1370 | chk = get_shared_cpu_map(mask, 0, cluster_config); | ||
1371 | if (chk) { | ||
1372 | /* if chk != 0 then it is the max allowed index */ | ||
1373 | printk(KERN_INFO "C-EDF: Cluster configuration = %d " | ||
1374 | "is not supported on this hardware.\n", | ||
1375 | cluster_config); | ||
1376 | /* User should notice that the configuration failed, so | ||
1377 | * let's bail out. */ | ||
1378 | return -EINVAL; | ||
1379 | } | ||
1380 | |||
1381 | cluster_size = cpumask_weight(mask); | ||
1382 | } | ||
1383 | |||
1384 | if ((num_online_cpus() % cluster_size) != 0) { | ||
1385 | /* this can't be right, some cpus are left out */ | ||
1386 | printk(KERN_ERR "C-EDF: Trying to group %d cpus in %d!\n", | ||
1387 | num_online_cpus(), cluster_size); | ||
1388 | return -1; | ||
1389 | } | ||
1390 | |||
1391 | num_clusters = num_online_cpus() / cluster_size; | ||
1392 | printk(KERN_INFO "C-EDF: %d cluster(s) of size = %d\n", | ||
1393 | num_clusters, cluster_size); | ||
1394 | |||
1395 | /* initialize clusters */ | ||
1396 | cedf = kmalloc(num_clusters * sizeof(cedf_domain_t), GFP_ATOMIC); | ||
1397 | for (i = 0; i < num_clusters; i++) { | ||
1398 | bheap_init(&(cedf[i].cpu_heap)); | ||
1399 | #ifdef CONFIG_LITMUS_LOCKING | ||
1400 | bheap_init(&(cedf[i].pending_jobs)); | ||
1401 | bheap_init(&(cedf[i].pending_cpus)); | ||
1402 | #endif | ||
1403 | edf_domain_init(&(cedf[i].domain), NULL, cedf_release_jobs); | ||
1404 | |||
1405 | if(!zalloc_cpumask_var(&cedf[i].cpu_map, GFP_ATOMIC)) | ||
1406 | return -ENOMEM; | ||
1407 | #ifdef CONFIG_RELEASE_MASTER | ||
1408 | cedf[i].domain.release_master = atomic_read(&release_master_cpu); | ||
1409 | #endif | ||
1410 | } | ||
1411 | |||
1412 | /* cycle through cluster and add cpus to them */ | ||
1413 | for (i = 0; i < num_clusters; i++) { | ||
1414 | |||
1415 | for_each_online_cpu(cpu) { | ||
1416 | /* check if the cpu is already in a cluster */ | ||
1417 | for (j = 0; j < num_clusters; j++) | ||
1418 | if (cpumask_test_cpu(cpu, cedf[j].cpu_map)) | ||
1419 | break; | ||
1420 | /* if it is in a cluster go to next cpu */ | ||
1421 | if (j < num_clusters && | ||
1422 | cpumask_test_cpu(cpu, cedf[j].cpu_map)) | ||
1423 | continue; | ||
1424 | |||
1425 | /* this cpu isn't in any cluster */ | ||
1426 | /* get the shared cpus */ | ||
1427 | if (unlikely(cluster_config == GLOBAL_CLUSTER)) | ||
1428 | cpumask_copy(mask, cpu_online_mask); | ||
1429 | else | ||
1430 | get_shared_cpu_map(mask, cpu, cluster_config); | ||
1431 | |||
1432 | cpumask_copy(cedf[i].cpu_map, mask); | ||
1433 | #ifdef VERBOSE_INIT | ||
1434 | print_cluster_topology(mask, cpu); | ||
1435 | #endif | ||
1436 | /* add cpus to current cluster and init cpu_entry_t */ | ||
1437 | cpu_count = 0; | ||
1438 | cedf[i].num_cpus = 0; | ||
1439 | for_each_cpu(ccpu, cedf[i].cpu_map) { | ||
1440 | |||
1441 | entry = &per_cpu(cedf_cpu_entries, ccpu); | ||
1442 | atomic_set(&entry->will_schedule, 0); | ||
1443 | entry->cpu = ccpu; | ||
1444 | entry->cluster = &cedf[i]; | ||
1445 | entry->hn = cpu_nodes + ccpu; | ||
1446 | bheap_node_init(&entry->hn, entry); | ||
1447 | |||
1448 | #ifdef CONFIG_LITMUS_LOCKING | ||
1449 | entry->pending_hn = pending_nodes + ccpu; | ||
1450 | bheap_node_init(&entry->pending_hn, entry); | ||
1451 | entry->pending = NULL; | ||
1452 | #endif | ||
1453 | |||
1454 | cpu_count++; | ||
1455 | |||
1456 | entry->linked = NULL; | ||
1457 | entry->scheduled = NULL; | ||
1458 | #ifdef CONFIG_RELEASE_MASTER | ||
1459 | /* only add CPUs that should schedule jobs */ | ||
1460 | if (entry->cpu != entry->cluster->domain.release_master) | ||
1461 | #endif | ||
1462 | { | ||
1463 | cedf[i].num_cpus++; | ||
1464 | update_cpu_position(entry); | ||
1465 | #ifdef CONFIG_LITMUS_LOCKING | ||
1466 | update_pending_position(entry); | ||
1467 | #endif | ||
1468 | } | ||
1469 | } | ||
1470 | /* done with this cluster */ | ||
1471 | break; | ||
1472 | } | ||
1473 | } | ||
1474 | |||
1475 | free_cpumask_var(mask); | ||
1476 | clusters_allocated = 1; | ||
1477 | return 0; | ||
1478 | } | ||
1479 | |||
1480 | /* Plugin object */ | ||
1481 | static struct sched_plugin cedf_plugin __cacheline_aligned_in_smp = { | ||
1482 | .plugin_name = "C-EDF", | ||
1483 | .finish_switch = cedf_finish_switch, | ||
1484 | .tick = cedf_tick, | ||
1485 | .task_new = cedf_task_new, | ||
1486 | .complete_job = complete_job, | ||
1487 | .task_exit = cedf_task_exit, | ||
1488 | .schedule = cedf_schedule, | ||
1489 | .task_wake_up = cedf_task_wake_up, | ||
1490 | .task_block = cedf_task_block, | ||
1491 | .admit_task = cedf_admit_task, | ||
1492 | .activate_plugin = cedf_activate_plugin, | ||
1493 | #ifdef CONFIG_LITMUS_LOCKING | ||
1494 | .allocate_lock = cedf_allocate_lock, | ||
1495 | .pre_setsched = cedf_pre_setsched, | ||
1496 | #endif | ||
1497 | }; | ||
1498 | |||
1499 | static struct proc_dir_entry *cluster_file = NULL, *cedf_dir = NULL; | ||
1500 | |||
1501 | static int __init init_cedf(void) | ||
1502 | { | ||
1503 | int err, fs; | ||
1504 | |||
1505 | err = register_sched_plugin(&cedf_plugin); | ||
1506 | if (!err) { | ||
1507 | fs = make_plugin_proc_dir(&cedf_plugin, &cedf_dir); | ||
1508 | if (!fs) | ||
1509 | cluster_file = create_cluster_file(cedf_dir, &cluster_config); | ||
1510 | else | ||
1511 | printk(KERN_ERR "Could not allocate C-EDF procfs dir.\n"); | ||
1512 | } | ||
1513 | return err; | ||
1514 | } | ||
1515 | |||
1516 | static void clean_cedf(void) | ||
1517 | { | ||
1518 | cleanup_cedf(); | ||
1519 | if (cluster_file) | ||
1520 | remove_proc_entry("cluster", cedf_dir); | ||
1521 | if (cedf_dir) | ||
1522 | remove_plugin_proc_dir(&cedf_plugin); | ||
1523 | } | ||
1524 | |||
1525 | module_init(init_cedf); | ||
1526 | module_exit(clean_cedf); | ||