diff options
-rw-r--r-- | litmus/sched_pfair.c | 137 |
1 files changed, 49 insertions, 88 deletions
diff --git a/litmus/sched_pfair.c b/litmus/sched_pfair.c index 43119e5149fa..055ac623edb4 100644 --- a/litmus/sched_pfair.c +++ b/litmus/sched_pfair.c | |||
@@ -1,7 +1,8 @@ | |||
1 | /* | 1 | /* |
2 | * kernel/sched_pfair.c | 2 | * kernel/sched_pfair.c |
3 | * | 3 | * |
4 | * Implementation of the (global) Pfair scheduling algorithm. | 4 | * Implementation of the PD^2 pfair scheduling algorithm. This |
5 | * implementation realizes "early releasing," i.e., it is work-conserving. | ||
5 | * | 6 | * |
6 | */ | 7 | */ |
7 | 8 | ||
@@ -80,30 +81,20 @@ struct pfair_state { | |||
80 | lt_t offset; /* stagger offset */ | 81 | lt_t offset; /* stagger offset */ |
81 | }; | 82 | }; |
82 | 83 | ||
83 | /* Currently, we limit the maximum period of any task to 2000 quanta. | ||
84 | * The reason is that it makes the implementation easier since we do not | ||
85 | * need to reallocate the release wheel on task arrivals. | ||
86 | * In the future | ||
87 | */ | ||
88 | #define PFAIR_MAX_PERIOD 2000 | ||
89 | |||
90 | struct pfair_cluster { | 84 | struct pfair_cluster { |
91 | struct scheduling_cluster topology; | 85 | struct scheduling_cluster topology; |
92 | 86 | ||
93 | /* The "global" time in this cluster. */ | 87 | /* The "global" time in this cluster. */ |
94 | quanta_t pfair_time; /* the "official" PFAIR clock */ | 88 | quanta_t pfair_time; /* the "official" PFAIR clock */ |
95 | quanta_t merge_time; /* Updated after the release queue has been | ||
96 | * merged. Used by drop_all_references(). | ||
97 | */ | ||
98 | 89 | ||
99 | /* The ready queue for this cluster. */ | 90 | /* The ready queue for this cluster. */ |
100 | rt_domain_t pfair; | 91 | rt_domain_t pfair; |
101 | 92 | ||
102 | /* This is the release queue wheel for this cluster. It is indexed by | 93 | /* The set of jobs that should have their release enacted at the next |
103 | * pfair_time % PFAIR_MAX_PERIOD. Each heap is ordered by PFAIR | 94 | * quantum boundary. |
104 | * priority, so that it can be merged with the ready queue. | ||
105 | */ | 95 | */ |
106 | struct bheap release_queue[PFAIR_MAX_PERIOD]; | 96 | struct bheap release_queue; |
97 | raw_spinlock_t release_lock; | ||
107 | }; | 98 | }; |
108 | 99 | ||
109 | static inline struct pfair_cluster* cpu_cluster(struct pfair_state* state) | 100 | static inline struct pfair_cluster* cpu_cluster(struct pfair_state* state) |
@@ -121,6 +112,11 @@ static inline struct pfair_state* from_cluster_list(struct list_head* pos) | |||
121 | return list_entry(pos, struct pfair_state, topology.cluster_list); | 112 | return list_entry(pos, struct pfair_state, topology.cluster_list); |
122 | } | 113 | } |
123 | 114 | ||
115 | static inline struct pfair_cluster* from_domain(rt_domain_t* rt) | ||
116 | { | ||
117 | return container_of(rt, struct pfair_cluster, pfair); | ||
118 | } | ||
119 | |||
124 | static inline raw_spinlock_t* cluster_lock(struct pfair_cluster* cluster) | 120 | static inline raw_spinlock_t* cluster_lock(struct pfair_cluster* cluster) |
125 | { | 121 | { |
126 | /* The ready_lock is used to serialize all scheduling events. */ | 122 | /* The ready_lock is used to serialize all scheduling events. */ |
@@ -161,21 +157,11 @@ static quanta_t cur_deadline(struct task_struct* t) | |||
161 | return cur_subtask(t)->deadline + tsk_pfair(t)->release; | 157 | return cur_subtask(t)->deadline + tsk_pfair(t)->release; |
162 | } | 158 | } |
163 | 159 | ||
164 | |||
165 | static quanta_t cur_sub_release(struct task_struct* t) | ||
166 | { | ||
167 | return cur_subtask(t)->release + tsk_pfair(t)->release; | ||
168 | } | ||
169 | |||
170 | static quanta_t cur_release(struct task_struct* t) | 160 | static quanta_t cur_release(struct task_struct* t) |
171 | { | 161 | { |
172 | #ifdef EARLY_RELEASE | 162 | /* This is early releasing: only the release of the first subtask |
173 | /* only the release of the first subtask counts when we early | 163 | * counts. */ |
174 | * release */ | ||
175 | return tsk_pfair(t)->release; | 164 | return tsk_pfair(t)->release; |
176 | #else | ||
177 | return cur_sub_release(t); | ||
178 | #endif | ||
179 | } | 165 | } |
180 | 166 | ||
181 | static quanta_t cur_overlap(struct task_struct* t) | 167 | static quanta_t cur_overlap(struct task_struct* t) |
@@ -235,11 +221,16 @@ int pfair_ready_order(struct bheap_node* a, struct bheap_node* b) | |||
235 | return pfair_higher_prio(bheap2task(a), bheap2task(b)); | 221 | return pfair_higher_prio(bheap2task(a), bheap2task(b)); |
236 | } | 222 | } |
237 | 223 | ||
238 | /* return the proper release queue for time t */ | 224 | static void pfair_release_jobs(rt_domain_t* rt, struct bheap* tasks) |
239 | static struct bheap* relq(struct pfair_cluster* cluster, quanta_t t) | ||
240 | { | 225 | { |
241 | struct bheap* rq = cluster->release_queue + (t % PFAIR_MAX_PERIOD); | 226 | struct pfair_cluster* cluster = from_domain(rt); |
242 | return rq; | 227 | unsigned long flags; |
228 | |||
229 | raw_spin_lock_irqsave(&cluster->release_lock, flags); | ||
230 | |||
231 | bheap_union(pfair_ready_order, &cluster->release_queue, tasks); | ||
232 | |||
233 | raw_spin_unlock_irqrestore(&cluster->release_lock, flags); | ||
243 | } | 234 | } |
244 | 235 | ||
245 | static void prepare_release(struct task_struct* t, quanta_t at) | 236 | static void prepare_release(struct task_struct* t, quanta_t at) |
@@ -248,25 +239,12 @@ static void prepare_release(struct task_struct* t, quanta_t at) | |||
248 | tsk_pfair(t)->cur = 0; | 239 | tsk_pfair(t)->cur = 0; |
249 | } | 240 | } |
250 | 241 | ||
251 | static void __pfair_add_release(struct task_struct* t, struct bheap* queue) | ||
252 | { | ||
253 | bheap_insert(pfair_ready_order, queue, | ||
254 | tsk_rt(t)->heap_node); | ||
255 | } | ||
256 | |||
257 | static void pfair_add_release(struct pfair_cluster* cluster, | ||
258 | struct task_struct* t) | ||
259 | { | ||
260 | BUG_ON(bheap_node_in_heap(tsk_rt(t)->heap_node)); | ||
261 | __pfair_add_release(t, relq(cluster, cur_release(t))); | ||
262 | } | ||
263 | |||
264 | /* pull released tasks from the release queue */ | 242 | /* pull released tasks from the release queue */ |
265 | static void poll_releases(struct pfair_cluster* cluster, | 243 | static void poll_releases(struct pfair_cluster* cluster) |
266 | quanta_t time) | ||
267 | { | 244 | { |
268 | __merge_ready(&cluster->pfair, relq(cluster, time)); | 245 | raw_spin_lock(&cluster->release_lock); |
269 | cluster->merge_time = time; | 246 | __merge_ready(&cluster->pfair, &cluster->release_queue); |
247 | raw_spin_unlock(&cluster->release_lock); | ||
270 | } | 248 | } |
271 | 249 | ||
272 | static void check_preempt(struct task_struct* t) | 250 | static void check_preempt(struct task_struct* t) |
@@ -292,16 +270,12 @@ static void drop_all_references(struct task_struct *t) | |||
292 | { | 270 | { |
293 | int cpu; | 271 | int cpu; |
294 | struct pfair_state* s; | 272 | struct pfair_state* s; |
295 | struct bheap* q; | ||
296 | struct pfair_cluster* cluster; | 273 | struct pfair_cluster* cluster; |
297 | if (bheap_node_in_heap(tsk_rt(t)->heap_node)) { | 274 | if (bheap_node_in_heap(tsk_rt(t)->heap_node)) { |
298 | /* figure out what queue the node is in */ | 275 | /* It must be in the ready queue; drop references isn't called |
276 | * when the job is in a release queue. */ | ||
299 | cluster = tsk_pfair(t)->cluster; | 277 | cluster = tsk_pfair(t)->cluster; |
300 | if (time_before_eq(cur_release(t), cluster->merge_time)) | 278 | bheap_delete(pfair_ready_order, &cluster->pfair.ready_queue, |
301 | q = &cluster->pfair.ready_queue; | ||
302 | else | ||
303 | q = relq(cluster, cur_release(t)); | ||
304 | bheap_delete(pfair_ready_order, q, | ||
305 | tsk_rt(t)->heap_node); | 279 | tsk_rt(t)->heap_node); |
306 | } | 280 | } |
307 | for (cpu = 0; cpu < num_online_cpus(); cpu++) { | 281 | for (cpu = 0; cpu < num_online_cpus(); cpu++) { |
@@ -322,11 +296,9 @@ static int advance_subtask(quanta_t time, struct task_struct* t, int cpu) | |||
322 | int to_relq; | 296 | int to_relq; |
323 | p->cur = (p->cur + 1) % p->quanta; | 297 | p->cur = (p->cur + 1) % p->quanta; |
324 | if (!p->cur) { | 298 | if (!p->cur) { |
325 | sched_trace_task_completion(t, 1); | ||
326 | if (tsk_rt(t)->present) { | 299 | if (tsk_rt(t)->present) { |
327 | /* we start a new job */ | 300 | /* we start a new job */ |
328 | prepare_for_next_period(t); | 301 | prepare_for_next_period(t); |
329 | sched_trace_task_release(t); | ||
330 | get_rt_flags(t) = RT_F_RUNNING; | 302 | get_rt_flags(t) = RT_F_RUNNING; |
331 | p->release += p->period; | 303 | p->release += p->period; |
332 | } else { | 304 | } else { |
@@ -361,7 +333,8 @@ static void advance_subtasks(struct pfair_cluster *cluster, quanta_t time) | |||
361 | p->last_cpu = cpu_id(cpu); | 333 | p->last_cpu = cpu_id(cpu); |
362 | if (advance_subtask(time, l, cpu_id(cpu))) { | 334 | if (advance_subtask(time, l, cpu_id(cpu))) { |
363 | cpu->linked = NULL; | 335 | cpu->linked = NULL; |
364 | pfair_add_release(cluster, l); | 336 | sched_trace_task_release(l); |
337 | add_release(&cluster->pfair, l); | ||
365 | } | 338 | } |
366 | } | 339 | } |
367 | } | 340 | } |
@@ -476,7 +449,7 @@ static void schedule_next_quantum(struct pfair_cluster *cluster, quanta_t time) | |||
476 | sched_trace_quantum_boundary(); | 449 | sched_trace_quantum_boundary(); |
477 | 450 | ||
478 | advance_subtasks(cluster, time); | 451 | advance_subtasks(cluster, time); |
479 | poll_releases(cluster, time); | 452 | poll_releases(cluster); |
480 | schedule_subtasks(cluster, time); | 453 | schedule_subtasks(cluster, time); |
481 | 454 | ||
482 | list_for_each(pos, &cluster->topology.cpus) { | 455 | list_for_each(pos, &cluster->topology.cpus) { |
@@ -630,6 +603,9 @@ static struct task_struct* pfair_schedule(struct task_struct * prev) | |||
630 | 603 | ||
631 | raw_spin_lock(cpu_lock(state)); | 604 | raw_spin_lock(cpu_lock(state)); |
632 | 605 | ||
606 | if (is_realtime(prev) && get_rt_flags(prev) == RT_F_SLEEP) | ||
607 | sched_trace_task_completion(prev, 0); | ||
608 | |||
633 | blocks = is_realtime(prev) && !is_running(prev); | 609 | blocks = is_realtime(prev) && !is_running(prev); |
634 | 610 | ||
635 | if (state->local && safe_to_schedule(state->local, cpu_id(state))) | 611 | if (state->local && safe_to_schedule(state->local, cpu_id(state))) |
@@ -663,18 +639,18 @@ static void pfair_task_new(struct task_struct * t, int on_rq, int running) | |||
663 | 639 | ||
664 | raw_spin_lock_irqsave(cluster_lock(cluster), flags); | 640 | raw_spin_lock_irqsave(cluster_lock(cluster), flags); |
665 | 641 | ||
666 | if (running | 642 | prepare_release(t, cluster->pfair_time + 1); |
643 | |||
644 | t->rt_param.scheduled_on = NO_CPU; | ||
645 | |||
646 | if (running) { | ||
667 | #ifdef CONFIG_RELEASE_MASTER | 647 | #ifdef CONFIG_RELEASE_MASTER |
668 | && (task_cpu(t) != cluster->pfair.release_master) | 648 | if (task_cpu(t) != cluster->pfair.release_master) |
669 | #endif | 649 | #endif |
670 | ) { | 650 | t->rt_param.scheduled_on = task_cpu(t); |
671 | t->rt_param.scheduled_on = task_cpu(t); | 651 | __add_ready(&cluster->pfair, t); |
672 | } else { | ||
673 | t->rt_param.scheduled_on = NO_CPU; | ||
674 | } | 652 | } |
675 | 653 | ||
676 | prepare_release(t, cluster->pfair_time + 1); | ||
677 | pfair_add_release(cluster, t); | ||
678 | check_preempt(t); | 654 | check_preempt(t); |
679 | 655 | ||
680 | raw_spin_unlock_irqrestore(cluster_lock(cluster), flags); | 656 | raw_spin_unlock_irqrestore(cluster_lock(cluster), flags); |
@@ -703,8 +679,7 @@ static void pfair_task_wake_up(struct task_struct *t) | |||
703 | release_at(t, now); | 679 | release_at(t, now); |
704 | prepare_release(t, time2quanta(now, CEIL)); | 680 | prepare_release(t, time2quanta(now, CEIL)); |
705 | sched_trace_task_release(t); | 681 | sched_trace_task_release(t); |
706 | /* FIXME: race with pfair_time advancing */ | 682 | __add_ready(&cluster->pfair, t); |
707 | pfair_add_release(cluster, t); | ||
708 | } | 683 | } |
709 | 684 | ||
710 | check_preempt(t); | 685 | check_preempt(t); |
@@ -763,15 +738,11 @@ static void pfair_release_at(struct task_struct* task, lt_t start) | |||
763 | release_at(task, start); | 738 | release_at(task, start); |
764 | release = time2quanta(start, CEIL); | 739 | release = time2quanta(start, CEIL); |
765 | 740 | ||
766 | /* FIXME: support arbitrary offsets. */ | ||
767 | if (release - cluster->pfair_time >= PFAIR_MAX_PERIOD) | ||
768 | release = cluster->pfair_time + PFAIR_MAX_PERIOD; | ||
769 | |||
770 | TRACE_TASK(task, "sys release at %lu\n", release); | 741 | TRACE_TASK(task, "sys release at %lu\n", release); |
771 | 742 | ||
772 | drop_all_references(task); | 743 | drop_all_references(task); |
773 | prepare_release(task, release); | 744 | prepare_release(task, release); |
774 | pfair_add_release(cluster, task); | 745 | add_release(&cluster->pfair, task); |
775 | 746 | ||
776 | raw_spin_unlock_irqrestore(cluster_lock(cluster), flags); | 747 | raw_spin_unlock_irqrestore(cluster_lock(cluster), flags); |
777 | } | 748 | } |
@@ -853,13 +824,6 @@ static long pfair_admit_task(struct task_struct* t) | |||
853 | "The period of %s/%d is not a multiple of %llu.\n", | 824 | "The period of %s/%d is not a multiple of %llu.\n", |
854 | t->comm, t->pid, (unsigned long long) quantum_length); | 825 | t->comm, t->pid, (unsigned long long) quantum_length); |
855 | 826 | ||
856 | if (period >= PFAIR_MAX_PERIOD) { | ||
857 | printk(KERN_WARNING | ||
858 | "PFAIR: Rejecting task %s/%d; its period is too long.\n", | ||
859 | t->comm, t->pid); | ||
860 | return -EINVAL; | ||
861 | } | ||
862 | |||
863 | if (quanta == period) { | 827 | if (quanta == period) { |
864 | /* special case: task has weight 1.0 */ | 828 | /* special case: task has weight 1.0 */ |
865 | printk(KERN_INFO | 829 | printk(KERN_INFO |
@@ -899,12 +863,9 @@ static long pfair_admit_task(struct task_struct* t) | |||
899 | 863 | ||
900 | static void pfair_init_cluster(struct pfair_cluster* cluster) | 864 | static void pfair_init_cluster(struct pfair_cluster* cluster) |
901 | { | 865 | { |
902 | int i; | 866 | rt_domain_init(&cluster->pfair, pfair_ready_order, NULL, pfair_release_jobs); |
903 | 867 | bheap_init(&cluster->release_queue); | |
904 | /* initialize release queue */ | 868 | raw_spin_lock_init(&cluster->release_lock); |
905 | for (i = 0; i < PFAIR_MAX_PERIOD; i++) | ||
906 | bheap_init(&cluster->release_queue[i]); | ||
907 | rt_domain_init(&cluster->pfair, pfair_ready_order, NULL, NULL); | ||
908 | INIT_LIST_HEAD(&cluster->topology.cpus); | 869 | INIT_LIST_HEAD(&cluster->topology.cpus); |
909 | } | 870 | } |
910 | 871 | ||
@@ -918,7 +879,7 @@ static void cleanup_clusters(void) | |||
918 | num_pfair_clusters = 0; | 879 | num_pfair_clusters = 0; |
919 | 880 | ||
920 | /* avoid stale pointers */ | 881 | /* avoid stale pointers */ |
921 | for (i = 0; i < NR_CPUS; i++) | 882 | for (i = 0; i < num_online_cpus(); i++) |
922 | pstate[i]->topology.cluster = NULL; | 883 | pstate[i]->topology.cluster = NULL; |
923 | } | 884 | } |
924 | 885 | ||