diff options
Diffstat (limited to 'kernel/hrtimer.c')
-rw-r--r-- | kernel/hrtimer.c | 110 |
1 files changed, 64 insertions, 46 deletions
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 052a0f53e4eb..e2f91ecc01a8 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c | |||
@@ -135,6 +135,46 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer, | |||
135 | } | 135 | } |
136 | } | 136 | } |
137 | 137 | ||
138 | |||
139 | /* | ||
140 | * Get the preferred target CPU for NOHZ | ||
141 | */ | ||
142 | static int hrtimer_get_target(int this_cpu, int pinned) | ||
143 | { | ||
144 | #ifdef CONFIG_NO_HZ | ||
145 | if (!pinned && get_sysctl_timer_migration() && idle_cpu(this_cpu)) { | ||
146 | int preferred_cpu = get_nohz_load_balancer(); | ||
147 | |||
148 | if (preferred_cpu >= 0) | ||
149 | return preferred_cpu; | ||
150 | } | ||
151 | #endif | ||
152 | return this_cpu; | ||
153 | } | ||
154 | |||
155 | /* | ||
156 | * With HIGHRES=y we do not migrate the timer when it is expiring | ||
157 | * before the next event on the target cpu because we cannot reprogram | ||
158 | * the target cpu hardware and we would cause it to fire late. | ||
159 | * | ||
160 | * Called with cpu_base->lock of target cpu held. | ||
161 | */ | ||
162 | static int | ||
163 | hrtimer_check_target(struct hrtimer *timer, struct hrtimer_clock_base *new_base) | ||
164 | { | ||
165 | #ifdef CONFIG_HIGH_RES_TIMERS | ||
166 | ktime_t expires; | ||
167 | |||
168 | if (!new_base->cpu_base->hres_active) | ||
169 | return 0; | ||
170 | |||
171 | expires = ktime_sub(hrtimer_get_expires(timer), new_base->offset); | ||
172 | return expires.tv64 <= new_base->cpu_base->expires_next.tv64; | ||
173 | #else | ||
174 | return 0; | ||
175 | #endif | ||
176 | } | ||
177 | |||
138 | /* | 178 | /* |
139 | * Switch the timer base to the current CPU when possible. | 179 | * Switch the timer base to the current CPU when possible. |
140 | */ | 180 | */ |
@@ -144,16 +184,8 @@ switch_hrtimer_base(struct hrtimer *timer, struct hrtimer_clock_base *base, | |||
144 | { | 184 | { |
145 | struct hrtimer_clock_base *new_base; | 185 | struct hrtimer_clock_base *new_base; |
146 | struct hrtimer_cpu_base *new_cpu_base; | 186 | struct hrtimer_cpu_base *new_cpu_base; |
147 | int cpu, preferred_cpu = -1; | 187 | int this_cpu = smp_processor_id(); |
148 | 188 | int cpu = hrtimer_get_target(this_cpu, pinned); | |
149 | cpu = smp_processor_id(); | ||
150 | #if defined(CONFIG_NO_HZ) && defined(CONFIG_SMP) | ||
151 | if (!pinned && get_sysctl_timer_migration() && idle_cpu(cpu)) { | ||
152 | preferred_cpu = get_nohz_load_balancer(); | ||
153 | if (preferred_cpu >= 0) | ||
154 | cpu = preferred_cpu; | ||
155 | } | ||
156 | #endif | ||
157 | 189 | ||
158 | again: | 190 | again: |
159 | new_cpu_base = &per_cpu(hrtimer_bases, cpu); | 191 | new_cpu_base = &per_cpu(hrtimer_bases, cpu); |
@@ -161,7 +193,7 @@ again: | |||
161 | 193 | ||
162 | if (base != new_base) { | 194 | if (base != new_base) { |
163 | /* | 195 | /* |
164 | * We are trying to schedule the timer on the local CPU. | 196 | * We are trying to move timer to new_base. |
165 | * However we can't change timer's base while it is running, | 197 | * However we can't change timer's base while it is running, |
166 | * so we keep it on the same CPU. No hassle vs. reprogramming | 198 | * so we keep it on the same CPU. No hassle vs. reprogramming |
167 | * the event source in the high resolution case. The softirq | 199 | * the event source in the high resolution case. The softirq |
@@ -177,38 +209,12 @@ again: | |||
177 | spin_unlock(&base->cpu_base->lock); | 209 | spin_unlock(&base->cpu_base->lock); |
178 | spin_lock(&new_base->cpu_base->lock); | 210 | spin_lock(&new_base->cpu_base->lock); |
179 | 211 | ||
180 | /* Optimized away for NOHZ=n SMP=n */ | 212 | if (cpu != this_cpu && hrtimer_check_target(timer, new_base)) { |
181 | if (cpu == preferred_cpu) { | 213 | cpu = this_cpu; |
182 | /* Calculate clock monotonic expiry time */ | 214 | spin_unlock(&new_base->cpu_base->lock); |
183 | #ifdef CONFIG_HIGH_RES_TIMERS | 215 | spin_lock(&base->cpu_base->lock); |
184 | ktime_t expires = ktime_sub(hrtimer_get_expires(timer), | 216 | timer->base = base; |
185 | new_base->offset); | 217 | goto again; |
186 | #else | ||
187 | ktime_t expires = hrtimer_get_expires(timer); | ||
188 | #endif | ||
189 | |||
190 | /* | ||
191 | * Get the next event on target cpu from the | ||
192 | * clock events layer. | ||
193 | * This covers the highres=off nohz=on case as well. | ||
194 | */ | ||
195 | ktime_t next = clockevents_get_next_event(cpu); | ||
196 | |||
197 | ktime_t delta = ktime_sub(expires, next); | ||
198 | |||
199 | /* | ||
200 | * We do not migrate the timer when it is expiring | ||
201 | * before the next event on the target cpu because | ||
202 | * we cannot reprogram the target cpu hardware and | ||
203 | * we would cause it to fire late. | ||
204 | */ | ||
205 | if (delta.tv64 < 0) { | ||
206 | cpu = smp_processor_id(); | ||
207 | spin_unlock(&new_base->cpu_base->lock); | ||
208 | spin_lock(&base->cpu_base->lock); | ||
209 | timer->base = base; | ||
210 | goto again; | ||
211 | } | ||
212 | } | 218 | } |
213 | timer->base = new_base; | 219 | timer->base = new_base; |
214 | } | 220 | } |
@@ -1219,14 +1225,22 @@ void hrtimer_interrupt(struct clock_event_device *dev) | |||
1219 | 1225 | ||
1220 | expires_next.tv64 = KTIME_MAX; | 1226 | expires_next.tv64 = KTIME_MAX; |
1221 | 1227 | ||
1228 | spin_lock(&cpu_base->lock); | ||
1229 | /* | ||
1230 | * We set expires_next to KTIME_MAX here with cpu_base->lock | ||
1231 | * held to prevent that a timer is enqueued in our queue via | ||
1232 | * the migration code. This does not affect enqueueing of | ||
1233 | * timers which run their callback and need to be requeued on | ||
1234 | * this CPU. | ||
1235 | */ | ||
1236 | cpu_base->expires_next.tv64 = KTIME_MAX; | ||
1237 | |||
1222 | base = cpu_base->clock_base; | 1238 | base = cpu_base->clock_base; |
1223 | 1239 | ||
1224 | for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { | 1240 | for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) { |
1225 | ktime_t basenow; | 1241 | ktime_t basenow; |
1226 | struct rb_node *node; | 1242 | struct rb_node *node; |
1227 | 1243 | ||
1228 | spin_lock(&cpu_base->lock); | ||
1229 | |||
1230 | basenow = ktime_add(now, base->offset); | 1244 | basenow = ktime_add(now, base->offset); |
1231 | 1245 | ||
1232 | while ((node = base->first)) { | 1246 | while ((node = base->first)) { |
@@ -1259,11 +1273,15 @@ void hrtimer_interrupt(struct clock_event_device *dev) | |||
1259 | 1273 | ||
1260 | __run_hrtimer(timer); | 1274 | __run_hrtimer(timer); |
1261 | } | 1275 | } |
1262 | spin_unlock(&cpu_base->lock); | ||
1263 | base++; | 1276 | base++; |
1264 | } | 1277 | } |
1265 | 1278 | ||
1279 | /* | ||
1280 | * Store the new expiry value so the migration code can verify | ||
1281 | * against it. | ||
1282 | */ | ||
1266 | cpu_base->expires_next = expires_next; | 1283 | cpu_base->expires_next = expires_next; |
1284 | spin_unlock(&cpu_base->lock); | ||
1267 | 1285 | ||
1268 | /* Reprogramming necessary ? */ | 1286 | /* Reprogramming necessary ? */ |
1269 | if (expires_next.tv64 != KTIME_MAX) { | 1287 | if (expires_next.tv64 != KTIME_MAX) { |