diff options
Diffstat (limited to 'kernel/sched_cpupri.c')
| -rw-r--r-- | kernel/sched_cpupri.c | 89 |
1 files changed, 63 insertions, 26 deletions
diff --git a/kernel/sched_cpupri.c b/kernel/sched_cpupri.c index 2722dc1b4138..a86cf9d9eb11 100644 --- a/kernel/sched_cpupri.c +++ b/kernel/sched_cpupri.c | |||
| @@ -47,9 +47,6 @@ static int convert_prio(int prio) | |||
| 47 | return cpupri; | 47 | return cpupri; |
| 48 | } | 48 | } |
| 49 | 49 | ||
| 50 | #define for_each_cpupri_active(array, idx) \ | ||
| 51 | for_each_set_bit(idx, array, CPUPRI_NR_PRIORITIES) | ||
| 52 | |||
| 53 | /** | 50 | /** |
| 54 | * cpupri_find - find the best (lowest-pri) CPU in the system | 51 | * cpupri_find - find the best (lowest-pri) CPU in the system |
| 55 | * @cp: The cpupri context | 52 | * @cp: The cpupri context |
| @@ -71,11 +68,38 @@ int cpupri_find(struct cpupri *cp, struct task_struct *p, | |||
| 71 | int idx = 0; | 68 | int idx = 0; |
| 72 | int task_pri = convert_prio(p->prio); | 69 | int task_pri = convert_prio(p->prio); |
| 73 | 70 | ||
| 74 | for_each_cpupri_active(cp->pri_active, idx) { | 71 | if (task_pri >= MAX_RT_PRIO) |
| 75 | struct cpupri_vec *vec = &cp->pri_to_cpu[idx]; | 72 | return 0; |
| 76 | 73 | ||
| 77 | if (idx >= task_pri) | 74 | for (idx = 0; idx < task_pri; idx++) { |
| 78 | break; | 75 | struct cpupri_vec *vec = &cp->pri_to_cpu[idx]; |
| 76 | int skip = 0; | ||
| 77 | |||
| 78 | if (!atomic_read(&(vec)->count)) | ||
| 79 | skip = 1; | ||
| 80 | /* | ||
| 81 | * When looking at the vector, we need to read the counter, | ||
| 82 | * do a memory barrier, then read the mask. | ||
| 83 | * | ||
| 84 | * Note: This is still all racey, but we can deal with it. | ||
| 85 | * Ideally, we only want to look at masks that are set. | ||
| 86 | * | ||
| 87 | * If a mask is not set, then the only thing wrong is that we | ||
| 88 | * did a little more work than necessary. | ||
| 89 | * | ||
| 90 | * If we read a zero count but the mask is set, because of the | ||
| 91 | * memory barriers, that can only happen when the highest prio | ||
| 92 | * task for a run queue has left the run queue, in which case, | ||
| 93 | * it will be followed by a pull. If the task we are processing | ||
| 94 | * fails to find a proper place to go, that pull request will | ||
| 95 | * pull this task if the run queue is running at a lower | ||
| 96 | * priority. | ||
| 97 | */ | ||
| 98 | smp_rmb(); | ||
| 99 | |||
| 100 | /* Need to do the rmb for every iteration */ | ||
| 101 | if (skip) | ||
| 102 | continue; | ||
| 79 | 103 | ||
| 80 | if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids) | 104 | if (cpumask_any_and(&p->cpus_allowed, vec->mask) >= nr_cpu_ids) |
| 81 | continue; | 105 | continue; |
| @@ -115,7 +139,7 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri) | |||
| 115 | { | 139 | { |
| 116 | int *currpri = &cp->cpu_to_pri[cpu]; | 140 | int *currpri = &cp->cpu_to_pri[cpu]; |
| 117 | int oldpri = *currpri; | 141 | int oldpri = *currpri; |
| 118 | unsigned long flags; | 142 | int do_mb = 0; |
| 119 | 143 | ||
| 120 | newpri = convert_prio(newpri); | 144 | newpri = convert_prio(newpri); |
| 121 | 145 | ||
| @@ -128,32 +152,46 @@ void cpupri_set(struct cpupri *cp, int cpu, int newpri) | |||
| 128 | * If the cpu was currently mapped to a different value, we | 152 | * If the cpu was currently mapped to a different value, we |
| 129 | * need to map it to the new value then remove the old value. | 153 | * need to map it to the new value then remove the old value. |
| 130 | * Note, we must add the new value first, otherwise we risk the | 154 | * Note, we must add the new value first, otherwise we risk the |
| 131 | * cpu being cleared from pri_active, and this cpu could be | 155 | * cpu being missed by the priority loop in cpupri_find. |
| 132 | * missed for a push or pull. | ||
| 133 | */ | 156 | */ |
| 134 | if (likely(newpri != CPUPRI_INVALID)) { | 157 | if (likely(newpri != CPUPRI_INVALID)) { |
| 135 | struct cpupri_vec *vec = &cp->pri_to_cpu[newpri]; | 158 | struct cpupri_vec *vec = &cp->pri_to_cpu[newpri]; |
| 136 | 159 | ||
| 137 | raw_spin_lock_irqsave(&vec->lock, flags); | ||
| 138 | |||
| 139 | cpumask_set_cpu(cpu, vec->mask); | 160 | cpumask_set_cpu(cpu, vec->mask); |
| 140 | vec->count++; | 161 | /* |
| 141 | if (vec->count == 1) | 162 | * When adding a new vector, we update the mask first, |
| 142 | set_bit(newpri, cp->pri_active); | 163 | * do a write memory barrier, and then update the count, to |
| 143 | 164 | * make sure the vector is visible when count is set. | |
| 144 | raw_spin_unlock_irqrestore(&vec->lock, flags); | 165 | */ |
| 166 | smp_mb__before_atomic_inc(); | ||
| 167 | atomic_inc(&(vec)->count); | ||
| 168 | do_mb = 1; | ||
| 145 | } | 169 | } |
| 146 | if (likely(oldpri != CPUPRI_INVALID)) { | 170 | if (likely(oldpri != CPUPRI_INVALID)) { |
| 147 | struct cpupri_vec *vec = &cp->pri_to_cpu[oldpri]; | 171 | struct cpupri_vec *vec = &cp->pri_to_cpu[oldpri]; |
| 148 | 172 | ||
| 149 | raw_spin_lock_irqsave(&vec->lock, flags); | 173 | /* |
| 150 | 174 | * Because the order of modification of the vec->count | |
| 151 | vec->count--; | 175 | * is important, we must make sure that the update |
| 152 | if (!vec->count) | 176 | * of the new prio is seen before we decrement the |
| 153 | clear_bit(oldpri, cp->pri_active); | 177 | * old prio. This makes sure that the loop sees |
| 178 | * one or the other when we raise the priority of | ||
| 179 | * the run queue. We don't care about when we lower the | ||
| 180 | * priority, as that will trigger an rt pull anyway. | ||
| 181 | * | ||
| 182 | * We only need to do a memory barrier if we updated | ||
| 183 | * the new priority vec. | ||
| 184 | */ | ||
| 185 | if (do_mb) | ||
| 186 | smp_mb__after_atomic_inc(); | ||
| 187 | |||
| 188 | /* | ||
| 189 | * When removing from the vector, we decrement the counter first | ||
| 190 | * do a memory barrier and then clear the mask. | ||
| 191 | */ | ||
| 192 | atomic_dec(&(vec)->count); | ||
| 193 | smp_mb__after_atomic_inc(); | ||
| 154 | cpumask_clear_cpu(cpu, vec->mask); | 194 | cpumask_clear_cpu(cpu, vec->mask); |
| 155 | |||
| 156 | raw_spin_unlock_irqrestore(&vec->lock, flags); | ||
| 157 | } | 195 | } |
| 158 | 196 | ||
| 159 | *currpri = newpri; | 197 | *currpri = newpri; |
| @@ -175,8 +213,7 @@ int cpupri_init(struct cpupri *cp) | |||
| 175 | for (i = 0; i < CPUPRI_NR_PRIORITIES; i++) { | 213 | for (i = 0; i < CPUPRI_NR_PRIORITIES; i++) { |
| 176 | struct cpupri_vec *vec = &cp->pri_to_cpu[i]; | 214 | struct cpupri_vec *vec = &cp->pri_to_cpu[i]; |
| 177 | 215 | ||
| 178 | raw_spin_lock_init(&vec->lock); | 216 | atomic_set(&vec->count, 0); |
| 179 | vec->count = 0; | ||
| 180 | if (!zalloc_cpumask_var(&vec->mask, GFP_KERNEL)) | 217 | if (!zalloc_cpumask_var(&vec->mask, GFP_KERNEL)) |
| 181 | goto cleanup; | 218 | goto cleanup; |
| 182 | } | 219 | } |
