diff options
| author | Bjoern Brandenburg <bbb@mpi-sws.org> | 2016-02-14 19:24:06 -0500 |
|---|---|---|
| committer | Bjoern Brandenburg <bbb@mpi-sws.org> | 2016-03-08 10:12:52 -0500 |
| commit | 14cc73779849021aa199e305933852c7847aa85a (patch) | |
| tree | 8603ed9fcf5837b3f848483ae48bd694924db4a4 | |
| parent | 5f594c7c328a1d6518b196566664c0286f39b88f (diff) | |
LITMUS^RT core: be more careful when pull-migrating tasks
Close more race windows and give plugins a chance to validate
tasks after they have been migrated.
| -rw-r--r-- | kernel/sched/litmus.c | 55 |
1 files changed, 47 insertions, 8 deletions
diff --git a/kernel/sched/litmus.c b/kernel/sched/litmus.c index 8eb9df31ea01..fdb17e958adb 100644 --- a/kernel/sched/litmus.c +++ b/kernel/sched/litmus.c | |||
| @@ -39,6 +39,7 @@ litmus_schedule(struct rq *rq, struct task_struct *prev) | |||
| 39 | #ifdef CONFIG_SMP | 39 | #ifdef CONFIG_SMP |
| 40 | struct rq* other_rq; | 40 | struct rq* other_rq; |
| 41 | long was_running; | 41 | long was_running; |
| 42 | int from_where; | ||
| 42 | lt_t _maybe_deadlock = 0; | 43 | lt_t _maybe_deadlock = 0; |
| 43 | #endif | 44 | #endif |
| 44 | 45 | ||
| @@ -52,15 +53,14 @@ litmus_schedule(struct rq *rq, struct task_struct *prev) | |||
| 52 | if (next && task_rq(next) != rq) { | 53 | if (next && task_rq(next) != rq) { |
| 53 | /* we need to migrate the task */ | 54 | /* we need to migrate the task */ |
| 54 | other_rq = task_rq(next); | 55 | other_rq = task_rq(next); |
| 55 | TRACE_TASK(next, "migrate from %d\n", other_rq->cpu); | 56 | from_where = other_rq->cpu; |
| 57 | TRACE_TASK(next, "migrate from %d\n", from_where); | ||
| 56 | 58 | ||
| 57 | /* while we drop the lock, the prev task could change its | 59 | /* while we drop the lock, the prev task could change its |
| 58 | * state | 60 | * state |
| 59 | */ | 61 | */ |
| 60 | BUG_ON(prev != current); | 62 | BUG_ON(prev != current); |
| 61 | was_running = is_current_running(); | 63 | was_running = is_current_running(); |
| 62 | mb(); | ||
| 63 | raw_spin_unlock(&rq->lock); | ||
| 64 | 64 | ||
| 65 | /* Don't race with a concurrent switch. This could deadlock in | 65 | /* Don't race with a concurrent switch. This could deadlock in |
| 66 | * the case of cross or circular migrations. It's the job of | 66 | * the case of cross or circular migrations. It's the job of |
| @@ -72,6 +72,9 @@ litmus_schedule(struct rq *rq, struct task_struct *prev) | |||
| 72 | TRACE_TASK(next, "waiting to deschedule\n"); | 72 | TRACE_TASK(next, "waiting to deschedule\n"); |
| 73 | _maybe_deadlock = litmus_clock(); | 73 | _maybe_deadlock = litmus_clock(); |
| 74 | } | 74 | } |
| 75 | |||
| 76 | raw_spin_unlock(&rq->lock); | ||
| 77 | |||
| 75 | while (next->rt_param.stack_in_use != NO_CPU) { | 78 | while (next->rt_param.stack_in_use != NO_CPU) { |
| 76 | cpu_relax(); | 79 | cpu_relax(); |
| 77 | mb(); | 80 | mb(); |
| @@ -88,7 +91,24 @@ litmus_schedule(struct rq *rq, struct task_struct *prev) | |||
| 88 | litmus_reschedule_local(); | 91 | litmus_reschedule_local(); |
| 89 | /* give up */ | 92 | /* give up */ |
| 90 | raw_spin_lock(&rq->lock); | 93 | raw_spin_lock(&rq->lock); |
| 91 | return next; | 94 | goto out; |
| 95 | } | ||
| 96 | |||
| 97 | if (from_where != task_rq(next)->cpu) { | ||
| 98 | /* The plugin should not give us something | ||
| 99 | * that other cores are trying to pull, too */ | ||
| 100 | TRACE_TASK(next, "next invalid: task keeps " | ||
| 101 | "shifting around!? " | ||
| 102 | "(%d->%d)\n", | ||
| 103 | from_where, | ||
| 104 | task_rq(next)->cpu); | ||
| 105 | |||
| 106 | /* bail out */ | ||
| 107 | raw_spin_lock(&rq->lock); | ||
| 108 | litmus->next_became_invalid(next); | ||
| 109 | litmus_reschedule_local(); | ||
| 110 | next = NULL; | ||
| 111 | goto out; | ||
| 92 | } | 112 | } |
| 93 | 113 | ||
| 94 | if (lt_before(_maybe_deadlock + 1000000000L, | 114 | if (lt_before(_maybe_deadlock + 1000000000L, |
| @@ -108,7 +128,7 @@ litmus_schedule(struct rq *rq, struct task_struct *prev) | |||
| 108 | 128 | ||
| 109 | /* bail out */ | 129 | /* bail out */ |
| 110 | raw_spin_lock(&rq->lock); | 130 | raw_spin_lock(&rq->lock); |
| 111 | return next; | 131 | goto out; |
| 112 | #endif | 132 | #endif |
| 113 | } | 133 | } |
| 114 | } | 134 | } |
| @@ -121,9 +141,27 @@ litmus_schedule(struct rq *rq, struct task_struct *prev) | |||
| 121 | } | 141 | } |
| 122 | #endif | 142 | #endif |
| 123 | double_rq_lock(rq, other_rq); | 143 | double_rq_lock(rq, other_rq); |
| 124 | set_task_cpu(next, smp_processor_id()); | 144 | if (other_rq == task_rq(next) && |
| 125 | /* release the other CPU's runqueue, but keep ours */ | 145 | next->rt_param.stack_in_use == NO_CPU) { |
| 126 | raw_spin_unlock(&other_rq->lock); | 146 | /* ok, we can grab it */ |
| 147 | set_task_cpu(next, rq->cpu); | ||
| 148 | /* release the other CPU's runqueue, but keep ours */ | ||
| 149 | raw_spin_unlock(&other_rq->lock); | ||
| 150 | } else { | ||
| 151 | /* Either it moved or the stack was claimed; both is | ||
| 152 | * bad and forces us to abort the migration. */ | ||
| 153 | TRACE_TASK(next, "next invalid: no longer available\n"); | ||
| 154 | raw_spin_unlock(&other_rq->lock); | ||
| 155 | litmus->next_became_invalid(next); | ||
| 156 | next = NULL; | ||
| 157 | goto out; | ||
| 158 | } | ||
| 159 | |||
| 160 | if (!litmus->post_migration_validate(next)) { | ||
| 161 | TRACE_TASK(next, "plugin deems task now invalid\n"); | ||
| 162 | litmus_reschedule_local(); | ||
| 163 | next = NULL; | ||
| 164 | } | ||
| 127 | } | 165 | } |
| 128 | #endif | 166 | #endif |
| 129 | 167 | ||
| @@ -146,6 +184,7 @@ litmus_schedule(struct rq *rq, struct task_struct *prev) | |||
| 146 | next->se.exec_start = rq->clock; | 184 | next->se.exec_start = rq->clock; |
| 147 | } | 185 | } |
| 148 | 186 | ||
| 187 | out: | ||
| 149 | update_enforcement_timer(next); | 188 | update_enforcement_timer(next); |
| 150 | return next; | 189 | return next; |
| 151 | } | 190 | } |
