diff options
author | Bjoern B. Brandenburg <bbb@cs.unc.edu> | 2011-02-05 21:50:36 -0500 |
---|---|---|
committer | Bjoern B. Brandenburg <bbb@cs.unc.edu> | 2011-02-05 21:50:36 -0500 |
commit | 5f265ef19b75b9bfe41a9cb4f5ba9898dbf767b0 (patch) | |
tree | 150cc8fec367892f212ab09bad3169f39c9fbacd /litmus | |
parent | d38924ba27f38b3d2e0be504e73f086c2e6e3efe (diff) |
Pfair: add support for true sporadic releases
This patch also converts Pfair to implement early releasing such that
no timer wheel is required anymore. This removes the need for a
maximum period restriction.
Diffstat (limited to 'litmus')
-rw-r--r-- | litmus/sched_pfair.c | 130 |
1 files changed, 47 insertions, 83 deletions
diff --git a/litmus/sched_pfair.c b/litmus/sched_pfair.c index 0a64273daa47..7168c77f0872 100644 --- a/litmus/sched_pfair.c +++ b/litmus/sched_pfair.c | |||
@@ -1,7 +1,8 @@ | |||
1 | /* | 1 | /* |
2 | * kernel/sched_pfair.c | 2 | * kernel/sched_pfair.c |
3 | * | 3 | * |
4 | * Implementation of the (global) Pfair scheduling algorithm. | 4 | * Implementation of the PD^2 pfair scheduling algorithm. This |
5 | * implementation realizes "early releasing," i.e., it is work-conserving. | ||
5 | * | 6 | * |
6 | */ | 7 | */ |
7 | 8 | ||
@@ -80,30 +81,20 @@ struct pfair_state { | |||
80 | lt_t offset; /* stagger offset */ | 81 | lt_t offset; /* stagger offset */ |
81 | }; | 82 | }; |
82 | 83 | ||
83 | /* Currently, we limit the maximum period of any task to 2000 quanta. | ||
84 | * The reason is that it makes the implementation easier since we do not | ||
85 | * need to reallocate the release wheel on task arrivals. | ||
86 | * In the future | ||
87 | */ | ||
88 | #define PFAIR_MAX_PERIOD 2000 | ||
89 | |||
90 | struct pfair_cluster { | 84 | struct pfair_cluster { |
91 | struct scheduling_cluster topology; | 85 | struct scheduling_cluster topology; |
92 | 86 | ||
93 | /* The "global" time in this cluster. */ | 87 | /* The "global" time in this cluster. */ |
94 | quanta_t pfair_time; /* the "official" PFAIR clock */ | 88 | quanta_t pfair_time; /* the "official" PFAIR clock */ |
95 | quanta_t merge_time; /* Updated after the release queue has been | ||
96 | * merged. Used by drop_all_references(). | ||
97 | */ | ||
98 | 89 | ||
99 | /* The ready queue for this cluster. */ | 90 | /* The ready queue for this cluster. */ |
100 | rt_domain_t pfair; | 91 | rt_domain_t pfair; |
101 | 92 | ||
102 | /* This is the release queue wheel for this cluster. It is indexed by | 93 | /* The set of jobs that should have their release enacted at the next |
103 | * pfair_time % PFAIR_MAX_PERIOD. Each heap is ordered by PFAIR | 94 | * quantum boundary. |
104 | * priority, so that it can be merged with the ready queue. | ||
105 | */ | 95 | */ |
106 | struct bheap release_queue[PFAIR_MAX_PERIOD]; | 96 | struct bheap release_queue; |
97 | raw_spinlock_t release_lock; | ||
107 | }; | 98 | }; |
108 | 99 | ||
109 | static inline struct pfair_cluster* cpu_cluster(struct pfair_state* state) | 100 | static inline struct pfair_cluster* cpu_cluster(struct pfair_state* state) |
@@ -121,6 +112,11 @@ static inline struct pfair_state* from_cluster_list(struct list_head* pos) | |||
121 | return list_entry(pos, struct pfair_state, topology.cluster_list); | 112 | return list_entry(pos, struct pfair_state, topology.cluster_list); |
122 | } | 113 | } |
123 | 114 | ||
115 | static inline struct pfair_cluster* from_domain(rt_domain_t* rt) | ||
116 | { | ||
117 | return container_of(rt, struct pfair_cluster, pfair); | ||
118 | } | ||
119 | |||
124 | static inline raw_spinlock_t* cluster_lock(struct pfair_cluster* cluster) | 120 | static inline raw_spinlock_t* cluster_lock(struct pfair_cluster* cluster) |
125 | { | 121 | { |
126 | /* The ready_lock is used to serialize all scheduling events. */ | 122 | /* The ready_lock is used to serialize all scheduling events. */ |
@@ -161,21 +157,11 @@ static quanta_t cur_deadline(struct task_struct* t) | |||
161 | return cur_subtask(t)->deadline + tsk_pfair(t)->release; | 157 | return cur_subtask(t)->deadline + tsk_pfair(t)->release; |
162 | } | 158 | } |
163 | 159 | ||
164 | |||
165 | static quanta_t cur_sub_release(struct task_struct* t) | ||
166 | { | ||
167 | return cur_subtask(t)->release + tsk_pfair(t)->release; | ||
168 | } | ||
169 | |||
170 | static quanta_t cur_release(struct task_struct* t) | 160 | static quanta_t cur_release(struct task_struct* t) |
171 | { | 161 | { |
172 | #ifdef EARLY_RELEASE | 162 | /* This is early releasing: only the release of the first subtask |
173 | /* only the release of the first subtask counts when we early | 163 | * counts. */ |
174 | * release */ | ||
175 | return tsk_pfair(t)->release; | 164 | return tsk_pfair(t)->release; |
176 | #else | ||
177 | return cur_sub_release(t); | ||
178 | #endif | ||
179 | } | 165 | } |
180 | 166 | ||
181 | static quanta_t cur_overlap(struct task_struct* t) | 167 | static quanta_t cur_overlap(struct task_struct* t) |
@@ -235,11 +221,16 @@ int pfair_ready_order(struct bheap_node* a, struct bheap_node* b) | |||
235 | return pfair_higher_prio(bheap2task(a), bheap2task(b)); | 221 | return pfair_higher_prio(bheap2task(a), bheap2task(b)); |
236 | } | 222 | } |
237 | 223 | ||
238 | /* return the proper release queue for time t */ | 224 | static void pfair_release_jobs(rt_domain_t* rt, struct bheap* tasks) |
239 | static struct bheap* relq(struct pfair_cluster* cluster, quanta_t t) | ||
240 | { | 225 | { |
241 | struct bheap* rq = cluster->release_queue + (t % PFAIR_MAX_PERIOD); | 226 | struct pfair_cluster* cluster = from_domain(rt); |
242 | return rq; | 227 | unsigned long flags; |
228 | |||
229 | raw_spin_lock_irqsave(&cluster->release_lock, flags); | ||
230 | |||
231 | bheap_union(pfair_ready_order, &cluster->release_queue, tasks); | ||
232 | |||
233 | raw_spin_unlock_irqrestore(&cluster->release_lock, flags); | ||
243 | } | 234 | } |
244 | 235 | ||
245 | static void prepare_release(struct task_struct* t, quanta_t at) | 236 | static void prepare_release(struct task_struct* t, quanta_t at) |
@@ -248,25 +239,12 @@ static void prepare_release(struct task_struct* t, quanta_t at) | |||
248 | tsk_pfair(t)->cur = 0; | 239 | tsk_pfair(t)->cur = 0; |
249 | } | 240 | } |
250 | 241 | ||
251 | static void __pfair_add_release(struct task_struct* t, struct bheap* queue) | ||
252 | { | ||
253 | bheap_insert(pfair_ready_order, queue, | ||
254 | tsk_rt(t)->heap_node); | ||
255 | } | ||
256 | |||
257 | static void pfair_add_release(struct pfair_cluster* cluster, | ||
258 | struct task_struct* t) | ||
259 | { | ||
260 | BUG_ON(bheap_node_in_heap(tsk_rt(t)->heap_node)); | ||
261 | __pfair_add_release(t, relq(cluster, cur_release(t))); | ||
262 | } | ||
263 | |||
264 | /* pull released tasks from the release queue */ | 242 | /* pull released tasks from the release queue */ |
265 | static void poll_releases(struct pfair_cluster* cluster, | 243 | static void poll_releases(struct pfair_cluster* cluster) |
266 | quanta_t time) | ||
267 | { | 244 | { |
268 | __merge_ready(&cluster->pfair, relq(cluster, time)); | 245 | raw_spin_lock(&cluster->release_lock); |
269 | cluster->merge_time = time; | 246 | __merge_ready(&cluster->pfair, &cluster->release_queue); |
247 | raw_spin_unlock(&cluster->release_lock); | ||
270 | } | 248 | } |
271 | 249 | ||
272 | static void check_preempt(struct task_struct* t) | 250 | static void check_preempt(struct task_struct* t) |
@@ -292,16 +270,12 @@ static void drop_all_references(struct task_struct *t) | |||
292 | { | 270 | { |
293 | int cpu; | 271 | int cpu; |
294 | struct pfair_state* s; | 272 | struct pfair_state* s; |
295 | struct bheap* q; | ||
296 | struct pfair_cluster* cluster; | 273 | struct pfair_cluster* cluster; |
297 | if (bheap_node_in_heap(tsk_rt(t)->heap_node)) { | 274 | if (bheap_node_in_heap(tsk_rt(t)->heap_node)) { |
298 | /* figure out what queue the node is in */ | 275 | /* It must be in the ready queue; drop references isn't called |
276 | * when the job is in a release queue. */ | ||
299 | cluster = tsk_pfair(t)->cluster; | 277 | cluster = tsk_pfair(t)->cluster; |
300 | if (time_before_eq(cur_release(t), cluster->merge_time)) | 278 | bheap_delete(pfair_ready_order, &cluster->pfair.ready_queue, |
301 | q = &cluster->pfair.ready_queue; | ||
302 | else | ||
303 | q = relq(cluster, cur_release(t)); | ||
304 | bheap_delete(pfair_ready_order, q, | ||
305 | tsk_rt(t)->heap_node); | 279 | tsk_rt(t)->heap_node); |
306 | } | 280 | } |
307 | for (cpu = 0; cpu < num_online_cpus(); cpu++) { | 281 | for (cpu = 0; cpu < num_online_cpus(); cpu++) { |
@@ -322,11 +296,9 @@ static int advance_subtask(quanta_t time, struct task_struct* t, int cpu) | |||
322 | int to_relq; | 296 | int to_relq; |
323 | p->cur = (p->cur + 1) % p->quanta; | 297 | p->cur = (p->cur + 1) % p->quanta; |
324 | if (!p->cur) { | 298 | if (!p->cur) { |
325 | sched_trace_task_completion(t, 1); | ||
326 | if (tsk_rt(t)->present) { | 299 | if (tsk_rt(t)->present) { |
327 | /* we start a new job */ | 300 | /* we start a new job */ |
328 | prepare_for_next_period(t); | 301 | prepare_for_next_period(t); |
329 | sched_trace_task_release(t); | ||
330 | get_rt_flags(t) = RT_F_RUNNING; | 302 | get_rt_flags(t) = RT_F_RUNNING; |
331 | p->release += p->period; | 303 | p->release += p->period; |
332 | } else { | 304 | } else { |
@@ -361,7 +333,8 @@ static void advance_subtasks(struct pfair_cluster *cluster, quanta_t time) | |||
361 | p->last_cpu = cpu_id(cpu); | 333 | p->last_cpu = cpu_id(cpu); |
362 | if (advance_subtask(time, l, cpu_id(cpu))) { | 334 | if (advance_subtask(time, l, cpu_id(cpu))) { |
363 | cpu->linked = NULL; | 335 | cpu->linked = NULL; |
364 | pfair_add_release(cluster, l); | 336 | sched_trace_task_release(l); |
337 | add_release(&cluster->pfair, l); | ||
365 | } | 338 | } |
366 | } | 339 | } |
367 | } | 340 | } |
@@ -471,7 +444,7 @@ static void schedule_next_quantum(struct pfair_cluster *cluster, quanta_t time) | |||
471 | sched_trace_quantum_boundary(); | 444 | sched_trace_quantum_boundary(); |
472 | 445 | ||
473 | advance_subtasks(cluster, time); | 446 | advance_subtasks(cluster, time); |
474 | poll_releases(cluster, time); | 447 | poll_releases(cluster); |
475 | schedule_subtasks(cluster, time); | 448 | schedule_subtasks(cluster, time); |
476 | 449 | ||
477 | list_for_each(pos, &cluster->topology.cpus) { | 450 | list_for_each(pos, &cluster->topology.cpus) { |
@@ -617,6 +590,9 @@ static struct task_struct* pfair_schedule(struct task_struct * prev) | |||
617 | 590 | ||
618 | raw_spin_lock(cpu_lock(state)); | 591 | raw_spin_lock(cpu_lock(state)); |
619 | 592 | ||
593 | if (is_realtime(prev) && get_rt_flags(prev) == RT_F_SLEEP) | ||
594 | sched_trace_task_completion(prev, 0); | ||
595 | |||
620 | blocks = is_realtime(prev) && !is_running(prev); | 596 | blocks = is_realtime(prev) && !is_running(prev); |
621 | 597 | ||
622 | if (state->local && safe_to_schedule(state->local, cpu_id(state))) | 598 | if (state->local && safe_to_schedule(state->local, cpu_id(state))) |
@@ -649,13 +625,16 @@ static void pfair_task_new(struct task_struct * t, int on_rq, int running) | |||
649 | cluster = tsk_pfair(t)->cluster; | 625 | cluster = tsk_pfair(t)->cluster; |
650 | 626 | ||
651 | raw_spin_lock_irqsave(cluster_lock(cluster), flags); | 627 | raw_spin_lock_irqsave(cluster_lock(cluster), flags); |
652 | if (running) | 628 | |
629 | prepare_release(t, cluster->pfair_time + 1); | ||
630 | |||
631 | if (running) { | ||
653 | t->rt_param.scheduled_on = task_cpu(t); | 632 | t->rt_param.scheduled_on = task_cpu(t); |
633 | __add_ready(&cluster->pfair, t); | ||
634 | } | ||
654 | else | 635 | else |
655 | t->rt_param.scheduled_on = NO_CPU; | 636 | t->rt_param.scheduled_on = NO_CPU; |
656 | 637 | ||
657 | prepare_release(t, cluster->pfair_time + 1); | ||
658 | pfair_add_release(cluster, t); | ||
659 | check_preempt(t); | 638 | check_preempt(t); |
660 | 639 | ||
661 | raw_spin_unlock_irqrestore(cluster_lock(cluster), flags); | 640 | raw_spin_unlock_irqrestore(cluster_lock(cluster), flags); |
@@ -684,8 +663,7 @@ static void pfair_task_wake_up(struct task_struct *t) | |||
684 | release_at(t, now); | 663 | release_at(t, now); |
685 | prepare_release(t, time2quanta(now, CEIL)); | 664 | prepare_release(t, time2quanta(now, CEIL)); |
686 | sched_trace_task_release(t); | 665 | sched_trace_task_release(t); |
687 | /* FIXME: race with pfair_time advancing */ | 666 | __add_ready(&cluster->pfair, t); |
688 | pfair_add_release(cluster, t); | ||
689 | } | 667 | } |
690 | 668 | ||
691 | check_preempt(t); | 669 | check_preempt(t); |
@@ -744,15 +722,11 @@ static void pfair_release_at(struct task_struct* task, lt_t start) | |||
744 | release_at(task, start); | 722 | release_at(task, start); |
745 | release = time2quanta(start, CEIL); | 723 | release = time2quanta(start, CEIL); |
746 | 724 | ||
747 | /* FIXME: support arbitrary offsets. */ | ||
748 | if (release - cluster->pfair_time >= PFAIR_MAX_PERIOD) | ||
749 | release = cluster->pfair_time + PFAIR_MAX_PERIOD; | ||
750 | |||
751 | TRACE_TASK(task, "sys release at %lu\n", release); | 725 | TRACE_TASK(task, "sys release at %lu\n", release); |
752 | 726 | ||
753 | drop_all_references(task); | 727 | drop_all_references(task); |
754 | prepare_release(task, release); | 728 | prepare_release(task, release); |
755 | pfair_add_release(cluster, task); | 729 | add_release(&cluster->pfair, task); |
756 | 730 | ||
757 | raw_spin_unlock_irqrestore(cluster_lock(cluster), flags); | 731 | raw_spin_unlock_irqrestore(cluster_lock(cluster), flags); |
758 | } | 732 | } |
@@ -834,13 +808,6 @@ static long pfair_admit_task(struct task_struct* t) | |||
834 | "The period of %s/%d is not a multiple of %llu.\n", | 808 | "The period of %s/%d is not a multiple of %llu.\n", |
835 | t->comm, t->pid, (unsigned long long) quantum_length); | 809 | t->comm, t->pid, (unsigned long long) quantum_length); |
836 | 810 | ||
837 | if (period >= PFAIR_MAX_PERIOD) { | ||
838 | printk(KERN_WARNING | ||
839 | "PFAIR: Rejecting task %s/%d; its period is too long.\n", | ||
840 | t->comm, t->pid); | ||
841 | return -EINVAL; | ||
842 | } | ||
843 | |||
844 | if (quanta == period) { | 811 | if (quanta == period) { |
845 | /* special case: task has weight 1.0 */ | 812 | /* special case: task has weight 1.0 */ |
846 | printk(KERN_INFO | 813 | printk(KERN_INFO |
@@ -880,12 +847,9 @@ static long pfair_admit_task(struct task_struct* t) | |||
880 | 847 | ||
881 | static void pfair_init_cluster(struct pfair_cluster* cluster) | 848 | static void pfair_init_cluster(struct pfair_cluster* cluster) |
882 | { | 849 | { |
883 | int i; | 850 | rt_domain_init(&cluster->pfair, pfair_ready_order, NULL, pfair_release_jobs); |
884 | 851 | bheap_init(&cluster->release_queue); | |
885 | /* initialize release queue */ | 852 | raw_spin_lock_init(&cluster->release_lock); |
886 | for (i = 0; i < PFAIR_MAX_PERIOD; i++) | ||
887 | bheap_init(&cluster->release_queue[i]); | ||
888 | rt_domain_init(&cluster->pfair, pfair_ready_order, NULL, NULL); | ||
889 | INIT_LIST_HEAD(&cluster->topology.cpus); | 853 | INIT_LIST_HEAD(&cluster->topology.cpus); |
890 | } | 854 | } |
891 | 855 | ||
@@ -899,7 +863,7 @@ static void cleanup_clusters(void) | |||
899 | num_pfair_clusters = 0; | 863 | num_pfair_clusters = 0; |
900 | 864 | ||
901 | /* avoid stale pointers */ | 865 | /* avoid stale pointers */ |
902 | for (i = 0; i < NR_CPUS; i++) | 866 | for (i = 0; i < num_online_cpus(); i++) |
903 | pstate[i]->topology.cluster = NULL; | 867 | pstate[i]->topology.cluster = NULL; |
904 | } | 868 | } |
905 | 869 | ||