aboutsummaryrefslogtreecommitdiffstats
path: root/kernel
diff options
context:
space:
mode:
authorSteven Rostedt <rostedt@goodmis.org>2011-08-05 08:27:49 -0400
committerIngo Molnar <mingo@elte.hu>2011-08-14 06:01:07 -0400
commitd473750b4073f16f23f46f30dc1bd3de45c35754 (patch)
treecefe6613df0f129836be5e218a768d02c1c7b814 /kernel
parentc92211d9b772792a9dea530c042efb4ab5562f50 (diff)
sched/cpupri: Fix memory barriers for vec updates to always be in order
[ This patch actually compiles. Thanks to Mike Galbraith for pointing that out. I compiled and booted this patch with no issues. ] Re-examining the cpupri patch, I see there's a possible race because the update of the two priorities vec->counts are not protected by a memory barrier. When a RT runqueue is overloaded and wants to push an RT task to another runqueue, it scans the RT priority vectors in a loop from lowest priority to highest. When we queue or dequeue an RT task that changes a runqueue's highest priority task, we update the vectors to show that a runqueue is rated at a different priority. To do this, we first set the new priority mask, and increment the vec->count, and then set the old priority mask by decrementing the vec->count. If we are lowering the runqueue's RT priority rating, it will trigger a RT pull, and we do not care if we miss pushing to this runqueue or not. But if we raise the priority, but the priority is still lower than an RT task that is looking to be pushed, we must make sure that this runqueue is still seen by the push algorithm (the loop). Because the loop reads from lowest to highest, and the new priority is set before the old one is cleared, we will either see the new or old priority set and the vector will be checked. But! Since there's no memory barrier between the updates of the two, the old count may be decremented first before the new count is incremented. This means the loop may see the old count of zero and skip it, and also the new count of zero before it was updated. A possible runqueue that the RT task could move to could be missed. A conditional memory barrier is placed between the vec->count updates and is only called when both updates are done. The smp_wmb() has also been changed to smp_mb__before_atomic_inc/dec(), as they are not needed by archs that already synchronize atomic_inc/dec(). The smp_rmb() has been moved to be called at every iteration of the loop so that the race between seeing the two updates is visible by each iteration of the loop, as an arch is free to optimize the reading of memory of the counters in the loop. Signed-off-by: Steven Rostedt <rostedt@goodmis.org> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl> Cc: Nick Piggin <npiggin@kernel.dk> Cc: Linus Torvalds <torvalds@linux-foundation.org> Link: http://lkml.kernel.org/r/1312547269.18583.194.camel@gandalf.stny.rr.com Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/sched_cpupri.c28
1 files changed, 25 insertions, 3 deletions
diff --git a/kernel/sched_cpupri.c b/kernel/sched_cpupri.c
index 7761a2669fff..90faffdbdf98 100644
--- a/kernel/sched_cpupri.c
+++ b/kernel/sched_cpupri.c
@@ -73,9 +73,10 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p,
73 73
74 for (idx = 0; idx < task_pri; idx++) { 74 for (idx = 0; idx < task_pri; idx++) {
75 struct cpupri_vec *vec = &cp->pri_to_cpu[idx]; 75 struct cpupri_vec *vec = &cp->pri_to_cpu[idx];
76 int skip = 0;
76 77
77 if (!atomic_read(&(vec)->count)) 78 if (!atomic_read(&(vec)->count))
78 continue; 79 skip = 1;
79 /* 80 /*
80 * When looking at the vector, we need to read the counter, 81 * When looking at the vector, we need to read the counter,
81 * do a memory barrier, then read the mask. 82 * do a memory barrier, then read the mask.
@@ -96,6 +97,10 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p,
96 */ 97 */
97 smp_rmb(); 98 smp_rmb();
98 99
100 /* Need to do the rmb for every iteration */
101 if (skip)
102 continue;
103
99 if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids) 104 if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids)
100 continue; 105 continue;
101 106
@@ -134,6 +139,7 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri)
134{ 139{
135 int *currpri = &cp->cpu_to_pri[cpu]; 140 int *currpri = &cp->cpu_to_pri[cpu];
136 int oldpri = *currpri; 141 int oldpri = *currpri;
142 int do_mb = 0;
137 143
138 newpri = convert_prio(newpri); 144 newpri = convert_prio(newpri);
139 145
@@ -158,18 +164,34 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri)
158 * do a write memory barrier, and then update the count, to 164 * do a write memory barrier, and then update the count, to
159 * make sure the vector is visible when count is set. 165 * make sure the vector is visible when count is set.
160 */ 166 */
161 smp_wmb(); 167 smp_mb__before_atomic_inc();
162 atomic_inc(&(vec)->count); 168 atomic_inc(&(vec)->count);
169 do_mb = 1;
163 } 170 }
164 if (likely(oldpri != CPUPRI_INVALID)) { 171 if (likely(oldpri != CPUPRI_INVALID)) {
165 struct cpupri_vec *vec = &cp->pri_to_cpu[oldpri]; 172 struct cpupri_vec *vec = &cp->pri_to_cpu[oldpri];
166 173
167 /* 174 /*
175 * Because the order of modification of the vec->count
176 * is important, we must make sure that the update
177 * of the new prio is seen before we decrement the
178 * old prio. This makes sure that the loop sees
179 * one or the other when we raise the priority of
180 * the run queue. We don't care about when we lower the
181 * priority, as that will trigger an rt pull anyway.
182 *
183 * We only need to do a memory barrier if we updated
184 * the new priority vec.
185 */
186 if (do_mb)
187 smp_mb__after_atomic_inc();
188
189 /*
168 * When removing from the vector, we decrement the counter first 190 * When removing from the vector, we decrement the counter first
169 * do a memory barrier and then clear the mask. 191 * do a memory barrier and then clear the mask.
170 */ 192 */
171 atomic_dec(&(vec)->count); 193 atomic_dec(&(vec)->count);
172 smp_wmb(); 194 smp_mb__after_atomic_inc();
173 cpumask_clear_cpu(cpu, vec->mask); 195 cpumask_clear_cpu(cpu, vec->mask);
174 } 196 }
175 197