diff options
Diffstat (limited to 'kernel/sched_cpupri.c')
-rw-r--r-- | kernel/sched_cpupri.c | 89 |
1 files changed, 63 insertions, 26 deletions
diff --git a/kernel/sched_cpupri.c b/kernel/sched_cpupri.c index 2722dc1b4138..a86cf9d9eb11 100644 --- a/kernel/sched_cpupri.c +++ b/kernel/sched_cpupri.c | |||
@@ -47,9 +47,6 @@ static int convert_prio(int prio) | |||
47 | return cpupri; | 47 | return cpupri; |
48 | } | 48 | } |
49 | 49 | ||
50 | #define for_each_cpupri_active(array, idx) \ | ||
51 | for_each_set_bit(idx, array, CPUPRI_NR_PRIORITIES) | ||
52 | |||
53 | /** | 50 | /** |
54 | * cpupri_find - find the best (lowest-pri) CPU in the system | 51 | * cpupri_find - find the best (lowest-pri) CPU in the system |
55 | * @cp: The cpupri context | 52 | * @cp: The cpupri context |
@@ -71,11 +68,38 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p, | |||
71 | int idx = 0; | 68 | int idx = 0; |
72 | int task_pri = convert_prio(p->prio); | 69 | int task_pri = convert_prio(p->prio); |
73 | 70 | ||
74 | for_each_cpupri_active(cp->pri_active, idx) { | 71 | if (task_pri >= MAX_RT_PRIO) |
75 | struct cpupri_vec *vec = &cp->pri_to_cpu[idx]; | 72 | return 0; |
76 | 73 | ||
77 | if (idx >= task_pri) | 74 | for (idx = 0; idx < task_pri; idx++) { |
78 | break; | 75 | struct cpupri_vec *vec = &cp->pri_to_cpu[idx]; |
76 | int skip = 0; | ||
77 | |||
78 | if (!atomic_read(&(vec)->count)) | ||
79 | skip = 1; | ||
80 | /* | ||
81 | * When looking at the vector, we need to read the counter, | ||
82 | * do a memory barrier, then read the mask. | ||
83 | * | ||
84 | * Note: This is still all racey, but we can deal with it. | ||
85 | * Ideally, we only want to look at masks that are set. | ||
86 | * | ||
87 | * If a mask is not set, then the only thing wrong is that we | ||
88 | * did a little more work than necessary. | ||
89 | * | ||
90 | * If we read a zero count but the mask is set, because of the | ||
91 | * memory barriers, that can only happen when the highest prio | ||
92 | * task for a run queue has left the run queue, in which case, | ||
93 | * it will be followed by a pull. If the task we are processing | ||
94 | * fails to find a proper place to go, that pull request will | ||
95 | * pull this task if the run queue is running at a lower | ||
96 | * priority. | ||
97 | */ | ||
98 | smp_rmb(); | ||
99 | |||
100 | /* Need to do the rmb for every iteration */ | ||
101 | if (skip) | ||
102 | continue; | ||
79 | 103 | ||
80 | if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids) | 104 | if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids) |
81 | continue; | 105 | continue; |
@@ -115,7 +139,7 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri) | |||
115 | { | 139 | { |
116 | int *currpri = &cp->cpu_to_pri[cpu]; | 140 | int *currpri = &cp->cpu_to_pri[cpu]; |
117 | int oldpri = *currpri; | 141 | int oldpri = *currpri; |
118 | unsigned long flags; | 142 | int do_mb = 0; |
119 | 143 | ||
120 | newpri = convert_prio(newpri); | 144 | newpri = convert_prio(newpri); |
121 | 145 | ||
@@ -128,32 +152,46 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri) | |||
128 | * If the cpu was currently mapped to a different value, we | 152 | * If the cpu was currently mapped to a different value, we |
129 | * need to map it to the new value then remove the old value. | 153 | * need to map it to the new value then remove the old value. |
130 | * Note, we must add the new value first, otherwise we risk the | 154 | * Note, we must add the new value first, otherwise we risk the |
131 | * cpu being cleared from pri_active, and this cpu could be | 155 | * cpu being missed by the priority loop in cpupri_find. |
132 | * missed for a push or pull. | ||
133 | */ | 156 | */ |
134 | if (likely(newpri != CPUPRI_INVALID)) { | 157 | if (likely(newpri != CPUPRI_INVALID)) { |
135 | struct cpupri_vec *vec = &cp->pri_to_cpu[newpri]; | 158 | struct cpupri_vec *vec = &cp->pri_to_cpu[newpri]; |
136 | 159 | ||
137 | raw_spin_lock_irqsave(&vec->lock, flags); | ||
138 | |||
139 | cpumask_set_cpu(cpu, vec->mask); | 160 | cpumask_set_cpu(cpu, vec->mask); |
140 | vec->count++; | 161 | /* |
141 | if (vec->count == 1) | 162 | * When adding a new vector, we update the mask first, |
142 | set_bit(newpri, cp->pri_active); | 163 | * do a write memory barrier, and then update the count, to |
143 | 164 | * make sure the vector is visible when count is set. | |
144 | raw_spin_unlock_irqrestore(&vec->lock, flags); | 165 | */ |
166 | smp_mb__before_atomic_inc(); | ||
167 | atomic_inc(&(vec)->count); | ||
168 | do_mb = 1; | ||
145 | } | 169 | } |
146 | if (likely(oldpri != CPUPRI_INVALID)) { | 170 | if (likely(oldpri != CPUPRI_INVALID)) { |
147 | struct cpupri_vec *vec = &cp->pri_to_cpu[oldpri]; | 171 | struct cpupri_vec *vec = &cp->pri_to_cpu[oldpri]; |
148 | 172 | ||
149 | raw_spin_lock_irqsave(&vec->lock, flags); | 173 | /* |
150 | 174 | * Because the order of modification of the vec->count | |
151 | vec->count--; | 175 | * is important, we must make sure that the update |
152 | if (!vec->count) | 176 | * of the new prio is seen before we decrement the |
153 | clear_bit(oldpri, cp->pri_active); | 177 | * old prio. This makes sure that the loop sees |
178 | * one or the other when we raise the priority of | ||
179 | * the run queue. We don't care about when we lower the | ||
180 | * priority, as that will trigger an rt pull anyway. | ||
181 | * | ||
182 | * We only need to do a memory barrier if we updated | ||
183 | * the new priority vec. | ||
184 | */ | ||
185 | if (do_mb) | ||
186 | smp_mb__after_atomic_inc(); | ||
187 | |||
188 | /* | ||
189 | * When removing from the vector, we decrement the counter first | ||
190 | * do a memory barrier and then clear the mask. | ||
191 | */ | ||
192 | atomic_dec(&(vec)->count); | ||
193 | smp_mb__after_atomic_inc(); | ||
154 | cpumask_clear_cpu(cpu, vec->mask); | 194 | cpumask_clear_cpu(cpu, vec->mask); |
155 | |||
156 | raw_spin_unlock_irqrestore(&vec->lock, flags); | ||
157 | } | 195 | } |
158 | 196 | ||
159 | *currpri = newpri; | 197 | *currpri = newpri; |
@@ -175,8 +213,7 @@ int cpupri_init(struct cpupri *cp) | |||
175 | for (i = 0; i < CPUPRI_NR_PRIORITIES; i++) { | 213 | for (i = 0; i < CPUPRI_NR_PRIORITIES; i++) { |
176 | struct cpupri_vec *vec = &cp->pri_to_cpu[i]; | 214 | struct cpupri_vec *vec = &cp->pri_to_cpu[i]; |
177 | 215 | ||
178 | raw_spin_lock_init(&vec->lock); | 216 | atomic_set(&vec->count, 0); |
179 | vec->count = 0; | ||
180 | if (!zalloc_cpumask_var(&vec->mask, GFP_KERNEL)) | 217 | if (!zalloc_cpumask_var(&vec->mask, GFP_KERNEL)) |
181 | goto cleanup; | 218 | goto cleanup; |
182 | } | 219 | } |