diff options
| -rw-r--r-- | include/litmus/rt_param.h | 14 | ||||
| -rw-r--r-- | litmus/Makefile | 9 | ||||
| -rwxr-xr-x | litmus/sched_pfair.c | 785 |
3 files changed, 799 insertions, 9 deletions
diff --git a/include/litmus/rt_param.h b/include/litmus/rt_param.h index a7dd67a81a..7bb568432e 100644 --- a/include/litmus/rt_param.h +++ b/include/litmus/rt_param.h | |||
| @@ -64,6 +64,8 @@ struct rt_job { | |||
| 64 | }; | 64 | }; |
| 65 | 65 | ||
| 66 | 66 | ||
| 67 | struct pfair_param; | ||
| 68 | |||
| 67 | /* RT task parameters for scheduling extensions | 69 | /* RT task parameters for scheduling extensions |
| 68 | * These parameters are inherited during clone and therefore must | 70 | * These parameters are inherited during clone and therefore must |
| 69 | * be explicitly set up before the task set is launched. | 71 | * be explicitly set up before the task set is launched. |
| @@ -108,15 +110,12 @@ struct rt_param { | |||
| 108 | * is currently scheduled. It is the responsibility of the | 110 | * is currently scheduled. It is the responsibility of the |
| 109 | * plugin to avoid race conditions. | 111 | * plugin to avoid race conditions. |
| 110 | * | 112 | * |
| 111 | * Used by GSN-EDF. | 113 | * This used by GSN-EDF and PFAIR. |
| 112 | */ | 114 | */ |
| 113 | volatile int scheduled_on; | 115 | volatile int scheduled_on; |
| 114 | 116 | ||
| 115 | /* Is the stack of the task currently in use? Currently, this | 117 | /* Is the stack of the task currently in use? This is updated by |
| 116 | * is the responsibility of the plugin to update this field. | 118 | * the LITMUS core. |
| 117 | * Maybe become part of the LITMUS core some day. | ||
| 118 | * | ||
| 119 | * Used by GSN-EDF. | ||
| 120 | * | 119 | * |
| 121 | * Be careful to avoid deadlocks! | 120 | * Be careful to avoid deadlocks! |
| 122 | */ | 121 | */ |
| @@ -130,6 +129,9 @@ struct rt_param { | |||
| 130 | */ | 129 | */ |
| 131 | volatile int linked_on; | 130 | volatile int linked_on; |
| 132 | 131 | ||
| 132 | /* PFAIR/PD^2 state. Allocated on demand. */ | ||
| 133 | struct pfair_param* pfair; | ||
| 134 | |||
| 133 | /* Fields saved before BE->RT transition. | 135 | /* Fields saved before BE->RT transition. |
| 134 | */ | 136 | */ |
| 135 | int old_policy; | 137 | int old_policy; |
diff --git a/litmus/Makefile b/litmus/Makefile index bfe393eb56..545203876a 100644 --- a/litmus/Makefile +++ b/litmus/Makefile | |||
| @@ -3,9 +3,12 @@ | |||
| 3 | # | 3 | # |
| 4 | 4 | ||
| 5 | obj-y = sched_plugin.o litmus.o sched_trace.o \ | 5 | obj-y = sched_plugin.o litmus.o sched_trace.o \ |
| 6 | edf_common.o jobs.o\ | 6 | edf_common.o jobs.o \ |
| 7 | sched_gsn_edf.o sched_psn_edf.o sched_cedf.o \ | ||
| 8 | rt_domain.o fdso.o sync.o \ | 7 | rt_domain.o fdso.o sync.o \ |
| 9 | fmlp.o srp.o norqlock.o | 8 | fmlp.o srp.o norqlock.o \ |
| 9 | sched_gsn_edf.o \ | ||
| 10 | sched_psn_edf.o \ | ||
| 11 | sched_cedf.o \ | ||
| 12 | sched_pfair.o | ||
| 10 | 13 | ||
| 11 | obj-$(CONFIG_FEATHER_TRACE) += trace.o ft_event.o | 14 | obj-$(CONFIG_FEATHER_TRACE) += trace.o ft_event.o |
diff --git a/litmus/sched_pfair.c b/litmus/sched_pfair.c new file mode 100755 index 0000000000..6f95688508 --- /dev/null +++ b/litmus/sched_pfair.c | |||
| @@ -0,0 +1,785 @@ | |||
| 1 | /* | ||
| 2 | * kernel/sched_pfair.c | ||
| 3 | * | ||
| 4 | * Implementation of the (global) Pfair scheduling algorithm. | ||
| 5 | * | ||
| 6 | */ | ||
| 7 | |||
| 8 | #include <asm/div64.h> | ||
| 9 | #include <linux/delay.h> | ||
| 10 | #include <linux/module.h> | ||
| 11 | #include <linux/spinlock.h> | ||
| 12 | #include <linux/percpu.h> | ||
| 13 | #include <linux/sched.h> | ||
| 14 | #include <linux/list.h> | ||
| 15 | |||
| 16 | #include <litmus/litmus.h> | ||
| 17 | #include <litmus/jobs.h> | ||
| 18 | #include <litmus/rt_domain.h> | ||
| 19 | #include <litmus/sched_plugin.h> | ||
| 20 | #include <litmus/sched_trace.h> | ||
| 21 | |||
| 22 | #include <litmus/heap.h> | ||
| 23 | |||
| 24 | /* Tick period is used to convert ns-specified execution | ||
| 25 | * costs and periods into tick-based equivalents. | ||
| 26 | */ | ||
| 27 | extern ktime_t tick_period; | ||
| 28 | |||
| 29 | /* make the unit explicit */ | ||
| 30 | typedef unsigned long quanta_t; | ||
| 31 | |||
| 32 | struct subtask { | ||
| 33 | /* measured in quanta relative to job release */ | ||
| 34 | quanta_t release; | ||
| 35 | quanta_t deadline; | ||
| 36 | quanta_t overlap; /* called "b bit" by PD^2 */ | ||
| 37 | quanta_t group_deadline; | ||
| 38 | }; | ||
| 39 | |||
| 40 | struct pfair_param { | ||
| 41 | quanta_t quanta; /* number of subtasks */ | ||
| 42 | quanta_t cur; /* index of current subtask */ | ||
| 43 | |||
| 44 | quanta_t release; /* in quanta */ | ||
| 45 | quanta_t period; /* in quanta */ | ||
| 46 | |||
| 47 | quanta_t last_quantum; /* when scheduled last */ | ||
| 48 | int last_cpu; /* where scheduled last */ | ||
| 49 | |||
| 50 | unsigned int present; /* Can the task be scheduled? */ | ||
| 51 | |||
| 52 | struct subtask subtasks[0]; /* allocate together with pfair_param */ | ||
| 53 | }; | ||
| 54 | |||
| 55 | #define tsk_pfair(tsk) ((tsk)->rt_param.pfair) | ||
| 56 | |||
| 57 | struct pfair_state { | ||
| 58 | int cpu; | ||
| 59 | volatile quanta_t cur_tick; /* updated by the CPU that is advancing | ||
| 60 | * the time */ | ||
| 61 | volatile quanta_t local_tick; /* What tick is the local CPU currently | ||
| 62 | * executing? Updated only by the local | ||
| 63 | * CPU. In QEMU, this may lag behind the | ||
| 64 | * current tick. In a real system, with | ||
| 65 | * proper timers and aligned quanta, | ||
| 66 | * that should only be the | ||
| 67 | * case for a very short time after the | ||
| 68 | * time advanced. With staggered quanta, | ||
| 69 | * it will lag for the duration of the | ||
| 70 | * offset. | ||
| 71 | */ | ||
| 72 | |||
| 73 | struct task_struct* linked; /* the task that should be executing */ | ||
| 74 | struct task_struct* local; /* the local copy of linked */ | ||
| 75 | struct task_struct* scheduled; /* what is actually scheduled */ | ||
| 76 | |||
| 77 | unsigned long missed_quanta; | ||
| 78 | }; | ||
| 79 | |||
| 80 | /* Currently, we limit the maximum period of any task to 1000 quanta. | ||
| 81 | * The reason is that it makes the implementation easier since we do not | ||
| 82 | * need to reallocate the release wheel on task arrivals. | ||
| 83 | * In the future | ||
| 84 | */ | ||
| 85 | #define PFAIR_MAX_PERIOD 1000 | ||
| 86 | |||
| 87 | /* This is the release queue wheel. It is indexed by pfair_time % | ||
| 88 | * PFAIR_MAX_PERIOD. Each heap is ordered by PFAIR priority, so that it can be | ||
| 89 | * merged with the ready queue. | ||
| 90 | */ | ||
| 91 | static struct heap release_queue[PFAIR_MAX_PERIOD]; | ||
| 92 | |||
| 93 | DEFINE_PER_CPU(struct pfair_state, pfair_state); | ||
| 94 | struct pfair_state* pstate[NR_CPUS]; /* short cut */ | ||
| 95 | |||
| 96 | #define NO_CPU 0xffffffff | ||
| 97 | |||
| 98 | static quanta_t pfair_time = 0; /* the "official" PFAIR clock */ | ||
| 99 | static quanta_t merge_time = 0; /* Updated after the release queue has been | ||
| 100 | * merged. Used by drop_all_references(). | ||
| 101 | */ | ||
| 102 | |||
| 103 | static rt_domain_t pfair; | ||
| 104 | |||
| 105 | /* The pfair_lock is used to serialize all scheduling events. | ||
| 106 | */ | ||
| 107 | #define pfair_lock pfair.ready_lock | ||
| 108 | |||
| 109 | /* Enable for lots of trace info. | ||
| 110 | * #define PFAIR_DEBUG | ||
| 111 | */ | ||
| 112 | |||
| 113 | #ifdef PFAIR_DEBUG | ||
| 114 | #define PTRACE_TASK(t, f, args...) TRACE_TASK(t, f, # args) | ||
| 115 | #define PTRACE(f, args...) TRACE(f, # args) | ||
| 116 | #else | ||
| 117 | #define PTRACE_TASK(t, f, args...) | ||
| 118 | #define PTRACE(f, args...) | ||
| 119 | #endif | ||
| 120 | |||
| 121 | /* gcc will inline all of these accessor functions... */ | ||
| 122 | static struct subtask* cur_subtask(struct task_struct* t) | ||
| 123 | { | ||
| 124 | return tsk_pfair(t)->subtasks + tsk_pfair(t)->cur; | ||
| 125 | } | ||
| 126 | |||
| 127 | static quanta_t cur_deadline(struct task_struct* t) | ||
| 128 | { | ||
| 129 | return cur_subtask(t)->deadline + tsk_pfair(t)->release; | ||
| 130 | } | ||
| 131 | |||
| 132 | static quanta_t cur_release(struct task_struct* t) | ||
| 133 | { | ||
| 134 | #ifdef EARLY_RELEASE | ||
| 135 | /* only the release of the first subtask counts when we early | ||
| 136 | * release */ | ||
| 137 | return tsk_pfair(t)->release; | ||
| 138 | #else | ||
| 139 | return cur_subtask(t)->release + tsk_pfair(t)->release; | ||
| 140 | #endif | ||
| 141 | } | ||
| 142 | |||
| 143 | static quanta_t cur_sub_release(struct task_struct* t) | ||
| 144 | { | ||
| 145 | return cur_subtask(t)->release + tsk_pfair(t)->release; | ||
| 146 | } | ||
| 147 | |||
| 148 | static quanta_t cur_overlap(struct task_struct* t) | ||
| 149 | { | ||
| 150 | return cur_subtask(t)->overlap; | ||
| 151 | } | ||
| 152 | |||
| 153 | static quanta_t cur_group_deadline(struct task_struct* t) | ||
| 154 | { | ||
| 155 | quanta_t gdl = cur_subtask(t)->group_deadline; | ||
| 156 | if (gdl) | ||
| 157 | return gdl + tsk_pfair(t)->release; | ||
| 158 | else | ||
| 159 | return gdl; | ||
| 160 | } | ||
| 161 | |||
| 162 | enum round { | ||
| 163 | FLOOR, | ||
| 164 | CEIL | ||
| 165 | }; | ||
| 166 | |||
| 167 | static quanta_t time2quanta(lt_t time, enum round round) | ||
| 168 | { | ||
| 169 | s64 quantum_length = ktime_to_ns(tick_period); | ||
| 170 | |||
| 171 | if (do_div(time, quantum_length) && round == CEIL) | ||
| 172 | time++; | ||
| 173 | return (quanta_t) time; | ||
| 174 | } | ||
| 175 | |||
| 176 | static int pfair_higher_prio(struct task_struct* first, | ||
| 177 | struct task_struct* second) | ||
| 178 | { | ||
| 179 | return /* first task must exist */ | ||
| 180 | first && ( | ||
| 181 | /* Does the second task exist and is it a real-time task? If | ||
| 182 | * not, the first task (which is a RT task) has higher | ||
| 183 | * priority. | ||
| 184 | */ | ||
| 185 | !second || !is_realtime(second) || | ||
| 186 | |||
| 187 | /* Is the (subtask) deadline of the first task earlier? | ||
| 188 | * Then it has higher priority. | ||
| 189 | */ | ||
| 190 | time_before(cur_deadline(first), cur_deadline(second)) || | ||
| 191 | |||
| 192 | /* Do we have a deadline tie? | ||
| 193 | * Then break by B-bit. | ||
| 194 | */ | ||
| 195 | (cur_deadline(first) == cur_deadline(second) && | ||
| 196 | cur_overlap(first) > cur_overlap(second)) || | ||
| 197 | |||
| 198 | /* Do we have a B-bit tie? | ||
| 199 | * Then break by group deadline. | ||
| 200 | */ | ||
| 201 | (cur_overlap(first) == cur_overlap(second) && | ||
| 202 | time_after(cur_group_deadline(first), | ||
| 203 | cur_group_deadline(second))) || | ||
| 204 | |||
| 205 | /* Do we have a group deadline tie? | ||
| 206 | * Then break by PID, which are unique. | ||
| 207 | */ | ||
| 208 | (cur_group_deadline(first) == | ||
| 209 | cur_group_deadline(second) && | ||
| 210 | first->pid < second->pid)); | ||
| 211 | } | ||
| 212 | |||
| 213 | int pfair_ready_order(struct heap_node* a, struct heap_node* b) | ||
| 214 | { | ||
| 215 | return pfair_higher_prio(heap2task(a), heap2task(b)); | ||
| 216 | } | ||
| 217 | |||
| 218 | /* return the proper release queue for time t */ | ||
| 219 | static struct heap* relq(quanta_t t) | ||
| 220 | { | ||
| 221 | struct heap* rq = &release_queue[t % PFAIR_MAX_PERIOD]; | ||
| 222 | return rq; | ||
| 223 | } | ||
| 224 | |||
| 225 | static void prepare_release(struct task_struct* t, quanta_t at) | ||
| 226 | { | ||
| 227 | tsk_pfair(t)->release = at; | ||
| 228 | tsk_pfair(t)->cur = 0; | ||
| 229 | } | ||
| 230 | |||
| 231 | static void __pfair_add_release(struct task_struct* t, struct heap* queue) | ||
| 232 | { | ||
| 233 | heap_insert(pfair_ready_order, queue, | ||
| 234 | tsk_rt(t)->heap_node); | ||
| 235 | } | ||
| 236 | |||
| 237 | static void pfair_add_release(struct task_struct* t) | ||
| 238 | { | ||
| 239 | BUG_ON(heap_node_in_heap(tsk_rt(t)->heap_node)); | ||
| 240 | __pfair_add_release(t, relq(cur_release(t))); | ||
| 241 | } | ||
| 242 | |||
| 243 | /* pull released tasks from the release queue */ | ||
| 244 | static void poll_releases(quanta_t time) | ||
| 245 | { | ||
| 246 | heap_union(pfair_ready_order, &pfair.ready_queue, relq(time)); | ||
| 247 | merge_time = time; | ||
| 248 | } | ||
| 249 | |||
| 250 | static void check_preempt(struct task_struct* t) | ||
| 251 | { | ||
| 252 | int cpu = NO_CPU; | ||
| 253 | if (tsk_rt(t)->linked_on != tsk_rt(t)->scheduled_on && | ||
| 254 | tsk_pfair(t)->present) { | ||
| 255 | /* the task can be scheduled and | ||
| 256 | * is not scheduled where it ought to be scheduled | ||
| 257 | */ | ||
| 258 | cpu = tsk_rt(t)->linked_on != NO_CPU ? | ||
| 259 | tsk_rt(t)->linked_on : | ||
| 260 | tsk_rt(t)->scheduled_on; | ||
| 261 | PTRACE_TASK(t, "linked_on:%d, scheduled_on:%d\n", | ||
| 262 | tsk_rt(t)->linked_on, tsk_rt(t)->scheduled_on); | ||
| 263 | /* preempt */ | ||
| 264 | if (cpu == smp_processor_id()) | ||
| 265 | set_tsk_need_resched(current); | ||
| 266 | else { | ||
| 267 | smp_send_reschedule(cpu); | ||
| 268 | } | ||
| 269 | } | ||
| 270 | } | ||
| 271 | |||
| 272 | /* returns 1 if the task needs to go the release queue */ | ||
| 273 | static int advance_subtask(quanta_t time, struct task_struct* t, int cpu) | ||
| 274 | { | ||
| 275 | struct pfair_param* p = tsk_pfair(t); | ||
| 276 | |||
| 277 | p->cur = (p->cur + 1) % p->quanta; | ||
| 278 | TRACE_TASK(t, "on %d advanced to subtask %lu\n", | ||
| 279 | cpu, | ||
| 280 | p->cur); | ||
| 281 | if (!p->cur) { | ||
| 282 | /* we start a new job */ | ||
| 283 | get_rt_flags(t) = RT_F_RUNNING; | ||
| 284 | prepare_for_next_period(t); | ||
| 285 | p->release += p->period; | ||
| 286 | } | ||
| 287 | return time_after(cur_release(t), time); | ||
| 288 | } | ||
| 289 | |||
| 290 | static void advance_subtasks(quanta_t time) | ||
| 291 | { | ||
| 292 | int cpu, missed; | ||
| 293 | struct task_struct* l; | ||
| 294 | struct pfair_param* p; | ||
| 295 | |||
| 296 | for_each_online_cpu(cpu) { | ||
| 297 | l = pstate[cpu]->linked; | ||
| 298 | missed = pstate[cpu]->linked != pstate[cpu]->local; | ||
| 299 | if (l) { | ||
| 300 | p = tsk_pfair(l); | ||
| 301 | p->last_quantum = time; | ||
| 302 | p->last_cpu = cpu; | ||
| 303 | if (advance_subtask(time, l, cpu)) { | ||
| 304 | pstate[cpu]->linked = NULL; | ||
| 305 | pfair_add_release(l); | ||
| 306 | } | ||
| 307 | } | ||
| 308 | } | ||
| 309 | } | ||
| 310 | |||
| 311 | static int target_cpu(quanta_t time, struct task_struct* t, int default_cpu) | ||
| 312 | { | ||
| 313 | int cpu; | ||
| 314 | if (tsk_rt(t)->scheduled_on != NO_CPU) { | ||
| 315 | /* always observe scheduled_on linkage */ | ||
| 316 | default_cpu = tsk_rt(t)->scheduled_on; | ||
| 317 | PTRACE_TASK(t, "forced on %d (scheduled on)\n", default_cpu); | ||
| 318 | } else if (tsk_pfair(t)->last_quantum == time - 1) { | ||
| 319 | /* back2back quanta */ | ||
| 320 | /* Only observe last_quantum if no scheduled_on is in the way. | ||
| 321 | * This should only kick in if a CPU missed quanta, and that | ||
| 322 | * *should* only happen in QEMU. | ||
| 323 | */ | ||
| 324 | cpu = tsk_pfair(t)->last_cpu; | ||
| 325 | if (!pstate[cpu]->linked || | ||
| 326 | tsk_rt(pstate[cpu]->linked)->scheduled_on != cpu) { | ||
| 327 | default_cpu = cpu; | ||
| 328 | PTRACE_TASK(t, "forced on %d (linked on)\n", | ||
| 329 | default_cpu); | ||
| 330 | } else { | ||
| 331 | PTRACE_TASK(t, "DID NOT force on %d (linked on)\n", | ||
| 332 | default_cpu); | ||
| 333 | } | ||
| 334 | } | ||
| 335 | return default_cpu; | ||
| 336 | } | ||
| 337 | |||
| 338 | /* returns one if linking was redirected */ | ||
| 339 | static int pfair_link(quanta_t time, int cpu, | ||
| 340 | struct task_struct* t) | ||
| 341 | { | ||
| 342 | int target = target_cpu(time, t, cpu); | ||
| 343 | struct task_struct* prev = pstate[cpu]->linked; | ||
| 344 | struct task_struct* other; | ||
| 345 | |||
| 346 | PTRACE_TASK(t, "linked to %d for quantum %lu\n", target, time); | ||
| 347 | if (target != cpu) { | ||
| 348 | other = pstate[target]->linked; | ||
| 349 | pstate[target]->linked = t; | ||
| 350 | tsk_rt(t)->linked_on = target; | ||
| 351 | if (!other) | ||
| 352 | /* linked ok, but reschedule this CPU */ | ||
| 353 | return 1; | ||
| 354 | if (target < cpu) { | ||
| 355 | /* link other to cpu instead */ | ||
| 356 | tsk_rt(other)->linked_on = cpu; | ||
| 357 | pstate[cpu]->linked = other; | ||
| 358 | if (prev) { | ||
| 359 | /* prev got pushed back into the ready queue */ | ||
| 360 | tsk_rt(prev)->linked_on = NO_CPU; | ||
| 361 | __add_ready(&pfair, prev); | ||
| 362 | } | ||
| 363 | /* we are done with this cpu */ | ||
| 364 | return 0; | ||
| 365 | } else { | ||
| 366 | /* re-add other, it's original CPU was not considered yet */ | ||
| 367 | tsk_rt(other)->linked_on = NO_CPU; | ||
| 368 | __add_ready(&pfair, other); | ||
| 369 | /* reschedule this CPU */ | ||
| 370 | return 1; | ||
| 371 | } | ||
| 372 | } else { | ||
| 373 | pstate[cpu]->linked = t; | ||
| 374 | tsk_rt(t)->linked_on = cpu; | ||
| 375 | if (prev) { | ||
| 376 | /* prev got pushed back into the ready queue */ | ||
| 377 | tsk_rt(prev)->linked_on = NO_CPU; | ||
| 378 | __add_ready(&pfair, prev); | ||
| 379 | } | ||
| 380 | /* we are done with this CPU */ | ||
| 381 | return 0; | ||
| 382 | } | ||
| 383 | } | ||
| 384 | |||
| 385 | static void schedule_subtasks(quanta_t time) | ||
| 386 | { | ||
| 387 | int cpu, retry; | ||
| 388 | |||
| 389 | for_each_online_cpu(cpu) { | ||
| 390 | retry = 1; | ||
| 391 | while (retry) { | ||
| 392 | if (pfair_higher_prio(__peek_ready(&pfair), | ||
| 393 | pstate[cpu]->linked)) | ||
| 394 | retry = pfair_link(time, cpu, | ||
| 395 | __take_ready(&pfair)); | ||
| 396 | else | ||
| 397 | retry = 0; | ||
| 398 | } | ||
| 399 | } | ||
| 400 | } | ||
| 401 | |||
| 402 | static void schedule_next_quantum(quanta_t time) | ||
| 403 | { | ||
| 404 | int cpu; | ||
| 405 | |||
| 406 | PTRACE("<<< Q %lu at %llu\n", | ||
| 407 | time, litmus_clock()); | ||
| 408 | |||
| 409 | /* called with interrupts disabled */ | ||
| 410 | spin_lock(&pfair_lock); | ||
| 411 | |||
| 412 | advance_subtasks(time); | ||
| 413 | poll_releases(time); | ||
| 414 | schedule_subtasks(time); | ||
| 415 | |||
| 416 | spin_unlock(&pfair_lock); | ||
| 417 | |||
| 418 | /* We are done. Advance time. */ | ||
| 419 | mb(); | ||
| 420 | for (cpu = 0; cpu < NR_CPUS; cpu++) | ||
| 421 | pstate[cpu]->cur_tick = pfair_time; | ||
| 422 | PTRACE(">>> Q %lu at %llu\n", | ||
| 423 | time, litmus_clock()); | ||
| 424 | } | ||
| 425 | |||
| 426 | /* pfair_tick - this function is called for every local timer | ||
| 427 | * interrupt. | ||
| 428 | */ | ||
| 429 | static void pfair_tick(struct task_struct* t) | ||
| 430 | { | ||
| 431 | struct pfair_state* state = &__get_cpu_var(pfair_state); | ||
| 432 | quanta_t time, loc, cur; | ||
| 433 | |||
| 434 | /* Attempt to advance time. First CPU to get here 00 | ||
| 435 | * will prepare the next quantum. | ||
| 436 | */ | ||
| 437 | time = cmpxchg(&pfair_time, | ||
| 438 | state->local_tick, /* expected */ | ||
| 439 | state->local_tick + 1 /* next */ | ||
| 440 | ); | ||
| 441 | if (time == state->local_tick) | ||
| 442 | /* exchange succeeded */ | ||
| 443 | schedule_next_quantum(time + 1); | ||
| 444 | |||
| 445 | /* Spin locally until time advances. */ | ||
| 446 | while (1) { | ||
| 447 | mb(); | ||
| 448 | cur = state->cur_tick; | ||
| 449 | loc = state->local_tick; | ||
| 450 | if (time_before(loc, cur)) { | ||
| 451 | if (loc + 1 != cur) { | ||
| 452 | TRACE("MISSED quantum! loc:%lu -> cur:%lu\n", | ||
| 453 | loc, cur); | ||
| 454 | state->missed_quanta++; | ||
| 455 | } | ||
| 456 | break; | ||
| 457 | } | ||
| 458 | cpu_relax(); | ||
| 459 | } | ||
| 460 | |||
| 461 | /* copy state info */ | ||
| 462 | state->local_tick = state->cur_tick; | ||
| 463 | state->local = state->linked; | ||
| 464 | if (state->local && tsk_pfair(state->local)->present && | ||
| 465 | state->local != current) | ||
| 466 | set_tsk_need_resched(current); | ||
| 467 | } | ||
| 468 | |||
| 469 | static int safe_to_schedule(struct task_struct* t, int cpu) | ||
| 470 | { | ||
| 471 | int where = tsk_rt(t)->scheduled_on; | ||
| 472 | if (where != NO_CPU && where != cpu) { | ||
| 473 | TRACE_TASK(t, "BAD: can't be scheduled on %d, " | ||
| 474 | "scheduled already on %d.\n", cpu, where); | ||
| 475 | return 0; | ||
| 476 | } else | ||
| 477 | return tsk_pfair(t)->present && get_rt_flags(t) == RT_F_RUNNING; | ||
| 478 | } | ||
| 479 | |||
| 480 | static struct task_struct* pfair_schedule(struct task_struct * prev) | ||
| 481 | { | ||
| 482 | struct pfair_state* state = &__get_cpu_var(pfair_state); | ||
| 483 | int blocks; | ||
| 484 | struct task_struct* next = NULL; | ||
| 485 | |||
| 486 | spin_lock(&pfair_lock); | ||
| 487 | |||
| 488 | blocks = is_realtime(prev) && !is_running(prev); | ||
| 489 | |||
| 490 | if (blocks) | ||
| 491 | tsk_pfair(prev)->present = 0; | ||
| 492 | |||
| 493 | if (state->local && safe_to_schedule(state->local, state->cpu)) | ||
| 494 | next = state->local; | ||
| 495 | |||
| 496 | if (prev != next) { | ||
| 497 | tsk_rt(prev)->scheduled_on = NO_CPU; | ||
| 498 | if (next) | ||
| 499 | tsk_rt(next)->scheduled_on = state->cpu; | ||
| 500 | } | ||
| 501 | |||
| 502 | spin_unlock(&pfair_lock); | ||
| 503 | |||
| 504 | if (next) | ||
| 505 | TRACE_TASK(next, "scheduled rel=%lu at %lu\n", | ||
| 506 | tsk_pfair(next)->release, pfair_time); | ||
| 507 | else if (is_realtime(prev)) | ||
| 508 | TRACE("Becomes idle at %lu\n", pfair_time); | ||
| 509 | |||
| 510 | return next; | ||
| 511 | } | ||
| 512 | |||
| 513 | static void pfair_task_new(struct task_struct * t, int on_rq, int running) | ||
| 514 | { | ||
| 515 | unsigned long flags; | ||
| 516 | |||
| 517 | TRACE("pfair: task new %d state:%d\n", t->pid, t->state); | ||
| 518 | |||
| 519 | spin_lock_irqsave(&pfair_lock, flags); | ||
| 520 | if (running) | ||
| 521 | t->rt_param.scheduled_on = task_cpu(t); | ||
| 522 | else | ||
| 523 | t->rt_param.scheduled_on = NO_CPU; | ||
| 524 | |||
| 525 | prepare_release(t, pfair_time + 1); | ||
| 526 | tsk_pfair(t)->present = running; | ||
| 527 | pfair_add_release(t); | ||
| 528 | check_preempt(t); | ||
| 529 | |||
| 530 | spin_unlock_irqrestore(&pfair_lock, flags); | ||
| 531 | } | ||
| 532 | |||
| 533 | static void pfair_task_wake_up(struct task_struct *t) | ||
| 534 | { | ||
| 535 | unsigned long flags; | ||
| 536 | |||
| 537 | TRACE_TASK(t, "wakes at %lld, release=%lu, pfair_time:%lu\n", | ||
| 538 | cur_release(t), pfair_time); | ||
| 539 | |||
| 540 | spin_lock_irqsave(&pfair_lock, flags); | ||
| 541 | |||
| 542 | tsk_pfair(t)->present = 1; | ||
| 543 | |||
| 544 | /* It is a little unclear how to deal with Pfair | ||
| 545 | * tasks that block for a while and then wake. | ||
| 546 | * For now, we assume that such suspensions are included | ||
| 547 | * in the stated execution time of the task, and thus | ||
| 548 | * count as execution time for our purposes. Thus, if the | ||
| 549 | * task is currently linked somewhere, it may resume, otherwise | ||
| 550 | * it has to wait for its next quantum allocation. | ||
| 551 | */ | ||
| 552 | |||
| 553 | check_preempt(t); | ||
| 554 | |||
| 555 | spin_unlock_irqrestore(&pfair_lock, flags); | ||
| 556 | } | ||
| 557 | |||
| 558 | static void pfair_task_block(struct task_struct *t) | ||
| 559 | { | ||
| 560 | BUG_ON(!is_realtime(t)); | ||
| 561 | TRACE_TASK(t, "blocks at %lld, state:%d\n", | ||
| 562 | (lt_t) jiffies, t->state); | ||
| 563 | } | ||
| 564 | |||
| 565 | /* caller must hold pfair_lock */ | ||
| 566 | static void drop_all_references(struct task_struct *t) | ||
| 567 | { | ||
| 568 | int cpu; | ||
| 569 | struct pfair_state* s; | ||
| 570 | struct heap* q; | ||
| 571 | if (heap_node_in_heap(tsk_rt(t)->heap_node)) { | ||
| 572 | /* figure out what queue the node is in */ | ||
| 573 | if (time_before_eq(cur_release(t), merge_time)) | ||
| 574 | q = &pfair.ready_queue; | ||
| 575 | else | ||
| 576 | q = relq(cur_release(t)); | ||
| 577 | heap_delete(pfair_ready_order, q, | ||
| 578 | tsk_rt(t)->heap_node); | ||
| 579 | } | ||
| 580 | for (cpu = 0; cpu < NR_CPUS; cpu++) { | ||
| 581 | s = &per_cpu(pfair_state, cpu); | ||
| 582 | if (s->linked == t) | ||
| 583 | s->linked = NULL; | ||
| 584 | if (s->local == t) | ||
| 585 | s->local = NULL; | ||
| 586 | if (s->scheduled == t) | ||
| 587 | s->scheduled = NULL; | ||
| 588 | } | ||
| 589 | } | ||
| 590 | |||
| 591 | static void pfair_task_exit(struct task_struct * t) | ||
| 592 | { | ||
| 593 | unsigned long flags; | ||
| 594 | |||
| 595 | BUG_ON(!is_realtime(t)); | ||
| 596 | |||
| 597 | /* Remote task from release or ready queue, and ensure | ||
| 598 | * that it is not the scheduled task for ANY CPU. We | ||
| 599 | * do this blanket check because occassionally when | ||
| 600 | * tasks exit while blocked, the task_cpu of the task | ||
| 601 | * might not be the same as the CPU that the PFAIR scheduler | ||
| 602 | * has chosen for it. | ||
| 603 | */ | ||
| 604 | spin_lock_irqsave(&pfair_lock, flags); | ||
| 605 | |||
| 606 | TRACE_TASK(t, "RIP, state:%d\n", t->state); | ||
| 607 | drop_all_references(t); | ||
| 608 | |||
| 609 | spin_unlock_irqrestore(&pfair_lock, flags); | ||
| 610 | |||
| 611 | kfree(t->rt_param.pfair); | ||
| 612 | t->rt_param.pfair = NULL; | ||
| 613 | } | ||
| 614 | |||
| 615 | |||
| 616 | static void pfair_release_at(struct task_struct* task, lt_t start) | ||
| 617 | { | ||
| 618 | unsigned long flags; | ||
| 619 | lt_t now = litmus_clock(); | ||
| 620 | quanta_t release, delta; | ||
| 621 | |||
| 622 | BUG_ON(!is_realtime(task)); | ||
| 623 | |||
| 624 | spin_lock_irqsave(&pfair_lock, flags); | ||
| 625 | if (lt_before(now, start)) { | ||
| 626 | delta = time2quanta((long long) start - (long long) now, CEIL); | ||
| 627 | if (delta >= PFAIR_MAX_PERIOD) | ||
| 628 | delta = PFAIR_MAX_PERIOD - 1; | ||
| 629 | } else | ||
| 630 | delta = 10; /* release in 10 ticks */ | ||
| 631 | |||
| 632 | release = pfair_time + delta; | ||
| 633 | |||
| 634 | drop_all_references(task); | ||
| 635 | prepare_release(task, release); | ||
| 636 | pfair_add_release(task); | ||
| 637 | spin_unlock_irqrestore(&pfair_lock, flags); | ||
| 638 | } | ||
| 639 | |||
| 640 | static void init_subtask(struct subtask* sub, unsigned long i, | ||
| 641 | lt_t quanta, lt_t period) | ||
| 642 | { | ||
| 643 | /* since i is zero-based, the formulas are shifted by one */ | ||
| 644 | lt_t tmp; | ||
| 645 | |||
| 646 | /* release */ | ||
| 647 | tmp = period * i; | ||
| 648 | do_div(tmp, quanta); /* floor */ | ||
| 649 | sub->release = (quanta_t) tmp; | ||
| 650 | |||
| 651 | /* deadline */ | ||
| 652 | tmp = period * (i + 1); | ||
| 653 | if (do_div(tmp, quanta)) /* ceil */ | ||
| 654 | tmp++; | ||
| 655 | sub->deadline = (quanta_t) tmp; | ||
| 656 | |||
| 657 | /* next release */ | ||
| 658 | tmp = period * (i + 1); | ||
| 659 | do_div(tmp, quanta); /* floor */ | ||
| 660 | sub->overlap = sub->deadline - (quanta_t) tmp; | ||
| 661 | |||
| 662 | /* Group deadline. | ||
| 663 | * Based on the formula given in Uma's thesis. | ||
| 664 | */ | ||
| 665 | if (2 * quanta >= period) { | ||
| 666 | /* heavy */ | ||
| 667 | tmp = (sub->deadline - (i + 1)) * period; | ||
| 668 | if (do_div(tmp, (period - quanta))) /* ceil */ | ||
| 669 | tmp++; | ||
| 670 | sub->group_deadline = (quanta_t) tmp; | ||
| 671 | } else | ||
| 672 | sub->group_deadline = 0; | ||
| 673 | } | ||
| 674 | |||
| 675 | static void dump_subtasks(struct task_struct* t) | ||
| 676 | { | ||
| 677 | unsigned long i; | ||
| 678 | for (i = 0; i < t->rt_param.pfair->quanta; i++) | ||
| 679 | TRACE_TASK(t, "SUBTASK %lu: rel=%lu dl=%lu bbit:%lu gdl:%lu\n", | ||
| 680 | i + 1, | ||
| 681 | t->rt_param.pfair->subtasks[i].release, | ||
| 682 | t->rt_param.pfair->subtasks[i].deadline, | ||
| 683 | t->rt_param.pfair->subtasks[i].overlap, | ||
| 684 | t->rt_param.pfair->subtasks[i].group_deadline); | ||
| 685 | } | ||
| 686 | |||
| 687 | static long pfair_admit_task(struct task_struct* t) | ||
| 688 | { | ||
| 689 | lt_t quanta; | ||
| 690 | lt_t period; | ||
| 691 | s64 quantum_length = ktime_to_ns(tick_period); | ||
| 692 | struct pfair_param* param; | ||
| 693 | unsigned long i; | ||
| 694 | |||
| 695 | /* Pfair is a tick-based method, so the time | ||
| 696 | * of interest is jiffies. Calculate tick-based | ||
| 697 | * times for everything. | ||
| 698 | * (Ceiling of exec cost, floor of period.) | ||
| 699 | */ | ||
| 700 | |||
| 701 | quanta = get_exec_cost(t); | ||
| 702 | period = get_rt_period(t); | ||
| 703 | |||
| 704 | quanta = time2quanta(get_exec_cost(t), CEIL); | ||
| 705 | |||
| 706 | if (do_div(period, quantum_length)) | ||
| 707 | printk(KERN_WARNING | ||
| 708 | "The period of %s/%d is not a multiple of %llu.\n", | ||
| 709 | t->comm, t->pid, (unsigned long long) quantum_length); | ||
| 710 | |||
| 711 | if (period >= PFAIR_MAX_PERIOD) { | ||
| 712 | printk(KERN_WARNING | ||
| 713 | "PFAIR: Rejecting task %s/%d; its period is too long.\n", | ||
| 714 | t->comm, t->pid); | ||
| 715 | return -EINVAL; | ||
| 716 | } | ||
| 717 | |||
| 718 | param = kmalloc(sizeof(struct pfair_param) + | ||
| 719 | quanta * sizeof(struct subtask), GFP_ATOMIC); | ||
| 720 | |||
| 721 | if (!param) | ||
| 722 | return -ENOMEM; | ||
| 723 | |||
| 724 | param->quanta = quanta; | ||
| 725 | param->cur = 0; | ||
| 726 | param->release = 0; | ||
| 727 | param->period = period; | ||
| 728 | |||
| 729 | for (i = 0; i < quanta; i++) | ||
| 730 | init_subtask(param->subtasks + i, i, quanta, period); | ||
| 731 | |||
| 732 | if (t->rt_param.pfair) | ||
| 733 | /* get rid of stale allocation */ | ||
| 734 | kfree(t->rt_param.pfair); | ||
| 735 | |||
| 736 | t->rt_param.pfair = param; | ||
| 737 | |||
| 738 | /* spew out some debug info */ | ||
| 739 | dump_subtasks(t); | ||
| 740 | |||
| 741 | return 0; | ||
| 742 | } | ||
| 743 | |||
| 744 | /* Plugin object */ | ||
| 745 | static struct sched_plugin pfair_plugin __cacheline_aligned_in_smp = { | ||
| 746 | .plugin_name = "PFAIR", | ||
| 747 | .tick = pfair_tick, | ||
| 748 | .task_new = pfair_task_new, | ||
| 749 | .task_exit = pfair_task_exit, | ||
| 750 | .schedule = pfair_schedule, | ||
| 751 | .task_wake_up = pfair_task_wake_up, | ||
| 752 | .task_block = pfair_task_block, | ||
| 753 | .admit_task = pfair_admit_task, | ||
| 754 | .release_at = pfair_release_at, | ||
| 755 | .complete_job = complete_job | ||
| 756 | }; | ||
| 757 | |||
| 758 | static int __init init_pfair(void) | ||
| 759 | { | ||
| 760 | int cpu, i; | ||
| 761 | struct pfair_state *state; | ||
| 762 | |||
| 763 | /* initialize release queue */ | ||
| 764 | for (i = 0; i < PFAIR_MAX_PERIOD; i++) | ||
| 765 | heap_init(&release_queue[i]); | ||
| 766 | |||
| 767 | /* initialize CPU state */ | ||
| 768 | for (cpu = 0; cpu < NR_CPUS; cpu++) { | ||
| 769 | state = &per_cpu(pfair_state, cpu); | ||
| 770 | state->cpu = cpu; | ||
| 771 | state->cur_tick = 0; | ||
| 772 | state->local_tick = 0; | ||
| 773 | state->linked = NULL; | ||
| 774 | state->local = NULL; | ||
| 775 | state->scheduled = NULL; | ||
| 776 | state->missed_quanta = 0; | ||
| 777 | pstate[cpu] = state; | ||
| 778 | } | ||
| 779 | |||
| 780 | rt_domain_init(&pfair, pfair_ready_order, NULL, NULL); | ||
| 781 | return register_sched_plugin(&pfair_plugin); | ||
| 782 | } | ||
| 783 | |||
| 784 | module_init(init_pfair); | ||
| 785 | |||
