diff options
| author | Bjoern Brandenburg <bbb@mpi-sws.org> | 2015-08-09 07:18:48 -0400 |
|---|---|---|
| committer | Bjoern Brandenburg <bbb@mpi-sws.org> | 2015-08-09 06:21:18 -0400 |
| commit | 8e048c798adaabef530a1526f7ce8c6c3cd3475e (patch) | |
| tree | 5a96b3eaeaafecec1bf08ba71a9d0084d39d46eb /kernel | |
| parent | bd175e94795774908317a861a883761b75750e35 (diff) | |
Add LITMUS^RT core implementation
This patch adds the core of LITMUS^RT:
- library functionality (heaps, rt_domain, prioritization, etc.)
- budget enforcement logic
- job management
- system call backends
- virtual devices (control page, etc.)
- scheduler plugin API (and dummy plugin)
This code compiles, but is not yet integrated with the rest of Linux.
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/sched/litmus.c | 350 |
1 files changed, 350 insertions, 0 deletions
diff --git a/kernel/sched/litmus.c b/kernel/sched/litmus.c new file mode 100644 index 000000000000..924358babde2 --- /dev/null +++ b/kernel/sched/litmus.c | |||
| @@ -0,0 +1,350 @@ | |||
| 1 | /* This file is included from kernel/sched.c */ | ||
| 2 | |||
| 3 | #include "sched.h" | ||
| 4 | |||
| 5 | #include <litmus/trace.h> | ||
| 6 | #include <litmus/sched_trace.h> | ||
| 7 | |||
| 8 | #include <litmus/litmus.h> | ||
| 9 | #include <litmus/budget.h> | ||
| 10 | #include <litmus/sched_plugin.h> | ||
| 11 | #include <litmus/preempt.h> | ||
| 12 | |||
| 13 | static void update_time_litmus(struct rq *rq, struct task_struct *p) | ||
| 14 | { | ||
| 15 | u64 delta = rq->clock - p->se.exec_start; | ||
| 16 | if (unlikely((s64)delta < 0)) | ||
| 17 | delta = 0; | ||
| 18 | /* per job counter */ | ||
| 19 | p->rt_param.job_params.exec_time += delta; | ||
| 20 | /* task counter */ | ||
| 21 | p->se.sum_exec_runtime += delta; | ||
| 22 | if (delta) { | ||
| 23 | TRACE_TASK(p, "charged %llu exec time (total:%llu, rem:%llu)\n", | ||
| 24 | delta, p->rt_param.job_params.exec_time, budget_remaining(p)); | ||
| 25 | } | ||
| 26 | /* sched_clock() */ | ||
| 27 | p->se.exec_start = rq->clock; | ||
| 28 | cpuacct_charge(p, delta); | ||
| 29 | } | ||
| 30 | |||
| 31 | static void double_rq_lock(struct rq *rq1, struct rq *rq2); | ||
| 32 | static void double_rq_unlock(struct rq *rq1, struct rq *rq2); | ||
| 33 | |||
| 34 | static struct task_struct * | ||
| 35 | litmus_schedule(struct rq *rq, struct task_struct *prev) | ||
| 36 | { | ||
| 37 | struct task_struct *next; | ||
| 38 | |||
| 39 | #ifdef CONFIG_SMP | ||
| 40 | struct rq* other_rq; | ||
| 41 | long was_running; | ||
| 42 | lt_t _maybe_deadlock = 0; | ||
| 43 | #endif | ||
| 44 | |||
| 45 | /* let the plugin schedule */ | ||
| 46 | next = litmus->schedule(prev); | ||
| 47 | |||
| 48 | sched_state_plugin_check(); | ||
| 49 | |||
| 50 | #ifdef CONFIG_SMP | ||
| 51 | /* check if a global plugin pulled a task from a different RQ */ | ||
| 52 | if (next && task_rq(next) != rq) { | ||
| 53 | /* we need to migrate the task */ | ||
| 54 | other_rq = task_rq(next); | ||
| 55 | TRACE_TASK(next, "migrate from %d\n", other_rq->cpu); | ||
| 56 | |||
| 57 | /* while we drop the lock, the prev task could change its | ||
| 58 | * state | ||
| 59 | */ | ||
| 60 | BUG_ON(prev != current); | ||
| 61 | was_running = is_current_running(); | ||
| 62 | mb(); | ||
| 63 | raw_spin_unlock(&rq->lock); | ||
| 64 | |||
| 65 | /* Don't race with a concurrent switch. This could deadlock in | ||
| 66 | * the case of cross or circular migrations. It's the job of | ||
| 67 | * the plugin to make sure that doesn't happen. | ||
| 68 | */ | ||
| 69 | TRACE_TASK(next, "stack_in_use=%d\n", | ||
| 70 | next->rt_param.stack_in_use); | ||
| 71 | if (next->rt_param.stack_in_use != NO_CPU) { | ||
| 72 | TRACE_TASK(next, "waiting to deschedule\n"); | ||
| 73 | _maybe_deadlock = litmus_clock(); | ||
| 74 | } | ||
| 75 | while (next->rt_param.stack_in_use != NO_CPU) { | ||
| 76 | cpu_relax(); | ||
| 77 | mb(); | ||
| 78 | if (next->rt_param.stack_in_use == NO_CPU) | ||
| 79 | TRACE_TASK(next,"descheduled. Proceeding.\n"); | ||
| 80 | |||
| 81 | if (lt_before(_maybe_deadlock + 1000000000L, | ||
| 82 | litmus_clock())) { | ||
| 83 | /* We've been spinning for 1s. | ||
| 84 | * Something can't be right! | ||
| 85 | * Let's abandon the task and bail out; at least | ||
| 86 | * we will have debug info instead of a hard | ||
| 87 | * deadlock. | ||
| 88 | */ | ||
| 89 | #ifdef CONFIG_BUG_ON_MIGRATION_DEADLOCK | ||
| 90 | BUG(); | ||
| 91 | #else | ||
| 92 | TRACE_TASK(next,"stack too long in use. " | ||
| 93 | "Deadlock?\n"); | ||
| 94 | next = NULL; | ||
| 95 | |||
| 96 | /* bail out */ | ||
| 97 | raw_spin_lock(&rq->lock); | ||
| 98 | return next; | ||
| 99 | #endif | ||
| 100 | } | ||
| 101 | } | ||
| 102 | #ifdef __ARCH_WANT_UNLOCKED_CTXSW | ||
| 103 | if (next->on_cpu) | ||
| 104 | TRACE_TASK(next, "waiting for !oncpu"); | ||
| 105 | while (next->on_cpu) { | ||
| 106 | cpu_relax(); | ||
| 107 | mb(); | ||
| 108 | } | ||
| 109 | #endif | ||
| 110 | double_rq_lock(rq, other_rq); | ||
| 111 | mb(); | ||
| 112 | if (is_realtime(current) && is_current_running() != was_running) { | ||
| 113 | TRACE_TASK(prev, | ||
| 114 | "state changed while we dropped" | ||
| 115 | " the lock: is_running=%d, was_running=%d\n", | ||
| 116 | is_current_running(), was_running); | ||
| 117 | if (is_current_running() && !was_running) { | ||
| 118 | /* prev task became unblocked | ||
| 119 | * we need to simulate normal sequence of events | ||
| 120 | * to scheduler plugins. | ||
| 121 | */ | ||
| 122 | litmus->task_block(prev); | ||
| 123 | litmus->task_wake_up(prev); | ||
| 124 | } | ||
| 125 | } | ||
| 126 | |||
| 127 | set_task_cpu(next, smp_processor_id()); | ||
| 128 | |||
| 129 | /* DEBUG: now that we have the lock we need to make sure a | ||
| 130 | * couple of things still hold: | ||
| 131 | * - it is still a real-time task | ||
| 132 | * - it is still runnable (could have been stopped) | ||
| 133 | * If either is violated, then the active plugin is | ||
| 134 | * doing something wrong. | ||
| 135 | */ | ||
| 136 | if (!is_realtime(next) || !tsk_rt(next)->present) { | ||
| 137 | /* BAD BAD BAD */ | ||
| 138 | TRACE_TASK(next,"BAD: migration invariant FAILED: " | ||
| 139 | "rt=%d present=%d\n", | ||
| 140 | is_realtime(next), | ||
| 141 | tsk_rt(next)->present); | ||
| 142 | /* drop the task */ | ||
| 143 | next = NULL; | ||
| 144 | } | ||
| 145 | /* release the other CPU's runqueue, but keep ours */ | ||
| 146 | raw_spin_unlock(&other_rq->lock); | ||
| 147 | } | ||
| 148 | #endif | ||
| 149 | |||
| 150 | if (next) { | ||
| 151 | #ifdef CONFIG_SMP | ||
| 152 | next->rt_param.stack_in_use = rq->cpu; | ||
| 153 | #else | ||
| 154 | next->rt_param.stack_in_use = 0; | ||
| 155 | #endif | ||
| 156 | update_rq_clock(rq); | ||
| 157 | next->se.exec_start = rq->clock; | ||
| 158 | } | ||
| 159 | |||
| 160 | update_enforcement_timer(next); | ||
| 161 | return next; | ||
| 162 | } | ||
| 163 | |||
| 164 | static void enqueue_task_litmus(struct rq *rq, struct task_struct *p, | ||
| 165 | int flags) | ||
| 166 | { | ||
| 167 | if (flags & ENQUEUE_WAKEUP) { | ||
| 168 | sched_trace_task_resume(p); | ||
| 169 | tsk_rt(p)->present = 1; | ||
| 170 | /* LITMUS^RT plugins need to update the state | ||
| 171 | * _before_ making it available in global structures. | ||
| 172 | * Linux gets away with being lazy about the task state | ||
| 173 | * update. We can't do that, hence we update the task | ||
| 174 | * state already here. | ||
| 175 | * | ||
| 176 | * WARNING: this needs to be re-evaluated when porting | ||
| 177 | * to newer kernel versions. | ||
| 178 | */ | ||
| 179 | p->state = TASK_RUNNING; | ||
| 180 | litmus->task_wake_up(p); | ||
| 181 | |||
| 182 | rq->litmus.nr_running++; | ||
| 183 | } else { | ||
| 184 | TRACE_TASK(p, "ignoring an enqueue, not a wake up.\n"); | ||
| 185 | p->se.exec_start = rq->clock; | ||
| 186 | } | ||
| 187 | } | ||
| 188 | |||
| 189 | static void dequeue_task_litmus(struct rq *rq, struct task_struct *p, | ||
| 190 | int flags) | ||
| 191 | { | ||
| 192 | if (flags & DEQUEUE_SLEEP) { | ||
| 193 | litmus->task_block(p); | ||
| 194 | tsk_rt(p)->present = 0; | ||
| 195 | sched_trace_task_block(p); | ||
| 196 | |||
| 197 | rq->litmus.nr_running--; | ||
| 198 | } else | ||
| 199 | TRACE_TASK(p, "ignoring a dequeue, not going to sleep.\n"); | ||
| 200 | } | ||
| 201 | |||
| 202 | static void yield_task_litmus(struct rq *rq) | ||
| 203 | { | ||
| 204 | TS_SYSCALL_IN_START; | ||
| 205 | TS_SYSCALL_IN_END; | ||
| 206 | |||
| 207 | BUG_ON(rq->curr != current); | ||
| 208 | /* sched_yield() is called to trigger delayed preemptions. | ||
| 209 | * Thus, mark the current task as needing to be rescheduled. | ||
| 210 | * This will cause the scheduler plugin to be invoked, which can | ||
| 211 | * then determine if a preemption is still required. | ||
| 212 | */ | ||
| 213 | clear_exit_np(current); | ||
| 214 | litmus_reschedule_local(); | ||
| 215 | |||
| 216 | TS_SYSCALL_OUT_START; | ||
| 217 | } | ||
| 218 | |||
| 219 | /* Plugins are responsible for this. | ||
| 220 | */ | ||
| 221 | static void check_preempt_curr_litmus(struct rq *rq, struct task_struct *p, int flags) | ||
| 222 | { | ||
| 223 | } | ||
| 224 | |||
| 225 | static void put_prev_task_litmus(struct rq *rq, struct task_struct *p) | ||
| 226 | { | ||
| 227 | } | ||
| 228 | |||
| 229 | /* pick_next_task_litmus() - litmus_schedule() function | ||
| 230 | * | ||
| 231 | * return the next task to be scheduled | ||
| 232 | */ | ||
| 233 | static struct task_struct *pick_next_task_litmus(struct rq *rq, struct task_struct *prev) | ||
| 234 | { | ||
| 235 | struct task_struct *next; | ||
| 236 | |||
| 237 | if (is_realtime(prev)) | ||
| 238 | update_time_litmus(rq, prev); | ||
| 239 | |||
| 240 | TS_PLUGIN_SCHED_START; | ||
| 241 | next = litmus_schedule(rq, prev); | ||
| 242 | TS_PLUGIN_SCHED_END; | ||
| 243 | |||
| 244 | /* This is a bit backwards: the other classes call put_prev_task() | ||
| 245 | * _after_ they've determined that the class has some queued tasks. | ||
| 246 | * We can't determine this easily because each plugin manages its own | ||
| 247 | * ready queues, and because in the case of globally shared queues, | ||
| 248 | * we really don't know whether we'll have something ready even if | ||
| 249 | * we test here. So we do it in reverse: first ask the plugin to | ||
| 250 | * provide a task, and if we find one, call put_prev_task() on the | ||
| 251 | * previously scheduled task. | ||
| 252 | */ | ||
| 253 | if (next) | ||
| 254 | put_prev_task(rq, prev); | ||
| 255 | |||
| 256 | return next; | ||
| 257 | } | ||
| 258 | |||
| 259 | static void task_tick_litmus(struct rq *rq, struct task_struct *p, int queued) | ||
| 260 | { | ||
| 261 | if (is_realtime(p) && !queued) { | ||
| 262 | update_time_litmus(rq, p); | ||
| 263 | /* budget check for QUANTUM_ENFORCEMENT tasks */ | ||
| 264 | if (budget_enforced(p) && budget_exhausted(p)) { | ||
| 265 | litmus_reschedule_local(); | ||
| 266 | } | ||
| 267 | } | ||
| 268 | } | ||
| 269 | |||
| 270 | static void switched_to_litmus(struct rq *rq, struct task_struct *p) | ||
| 271 | { | ||
| 272 | } | ||
| 273 | |||
| 274 | static void prio_changed_litmus(struct rq *rq, struct task_struct *p, | ||
| 275 | int oldprio) | ||
| 276 | { | ||
| 277 | } | ||
| 278 | |||
| 279 | unsigned int get_rr_interval_litmus(struct rq *rq, struct task_struct *p) | ||
| 280 | { | ||
| 281 | /* return infinity */ | ||
| 282 | return 0; | ||
| 283 | } | ||
| 284 | |||
| 285 | /* This is called when a task became a real-time task, either due to a SCHED_* | ||
| 286 | * class transition or due to PI mutex inheritance. We don't handle Linux PI | ||
| 287 | * mutex inheritance yet (and probably never will). Use LITMUS provided | ||
| 288 | * synchronization primitives instead. | ||
| 289 | */ | ||
| 290 | static void set_curr_task_litmus(struct rq *rq) | ||
| 291 | { | ||
| 292 | rq->curr->se.exec_start = rq->clock; | ||
| 293 | } | ||
| 294 | |||
| 295 | |||
| 296 | #ifdef CONFIG_SMP | ||
| 297 | /* execve tries to rebalance task in this scheduling domain. | ||
| 298 | * We don't care about the scheduling domain; can gets called from | ||
| 299 | * exec, fork, wakeup. | ||
| 300 | */ | ||
| 301 | static int | ||
| 302 | select_task_rq_litmus(struct task_struct *p, int cpu, int sd_flag, int flags) | ||
| 303 | { | ||
| 304 | /* preemption is already disabled. | ||
| 305 | * We don't want to change cpu here | ||
| 306 | */ | ||
| 307 | return task_cpu(p); | ||
| 308 | } | ||
| 309 | #endif | ||
| 310 | |||
| 311 | static void update_curr_litmus(struct rq *rq) | ||
| 312 | { | ||
| 313 | struct task_struct *p = rq->curr; | ||
| 314 | |||
| 315 | if (!is_realtime(p)) | ||
| 316 | return; | ||
| 317 | |||
| 318 | update_time_litmus(rq, p); | ||
| 319 | } | ||
| 320 | |||
| 321 | const struct sched_class litmus_sched_class = { | ||
| 322 | /* From 34f971f6 the stop/migrate worker threads have a class on | ||
| 323 | * their own, which is the highest prio class. We don't support | ||
| 324 | * cpu-hotplug or cpu throttling. Allows Litmus to use up to 1.0 | ||
| 325 | * CPU capacity. | ||
| 326 | */ | ||
| 327 | .next = &dl_sched_class, | ||
| 328 | .enqueue_task = enqueue_task_litmus, | ||
| 329 | .dequeue_task = dequeue_task_litmus, | ||
| 330 | .yield_task = yield_task_litmus, | ||
| 331 | |||
| 332 | .check_preempt_curr = check_preempt_curr_litmus, | ||
| 333 | |||
| 334 | .pick_next_task = pick_next_task_litmus, | ||
| 335 | .put_prev_task = put_prev_task_litmus, | ||
| 336 | |||
| 337 | #ifdef CONFIG_SMP | ||
| 338 | .select_task_rq = select_task_rq_litmus, | ||
| 339 | #endif | ||
| 340 | |||
| 341 | .set_curr_task = set_curr_task_litmus, | ||
| 342 | .task_tick = task_tick_litmus, | ||
| 343 | |||
| 344 | .get_rr_interval = get_rr_interval_litmus, | ||
| 345 | |||
| 346 | .prio_changed = prio_changed_litmus, | ||
| 347 | .switched_to = switched_to_litmus, | ||
| 348 | |||
| 349 | .update_curr = update_curr_litmus, | ||
| 350 | }; | ||
