diff options
author | Peter Zijlstra <a.p.zijlstra@chello.nl> | 2008-01-25 15:08:31 -0500 |
---|---|---|
committer | Ingo Molnar <mingo@elte.hu> | 2008-01-25 15:08:31 -0500 |
commit | d3d74453c34f8fd87674a8cf5b8a327c68f22e99 (patch) | |
tree | cbbd46eb7b81f5c9d39a93604a206ac775084858 /kernel/hrtimer.c | |
parent | 2d44ae4d7135b9aee26439b3523b43473381bc5f (diff) |
hrtimer: fixup the HRTIMER_CB_IRQSAFE_NO_SOFTIRQ fallback
Currently all highres=off timers are run from softirq context, but
HRTIMER_CB_IRQSAFE_NO_SOFTIRQ timers expect to run from irq context.
Fix this up by splitting it similar to the highres=on case.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Diffstat (limited to 'kernel/hrtimer.c')
-rw-r--r-- | kernel/hrtimer.c | 270 |
1 files changed, 139 insertions, 131 deletions
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c index 9f850ca032b6..061ae28a36a0 100644 --- a/kernel/hrtimer.c +++ b/kernel/hrtimer.c | |||
@@ -325,6 +325,22 @@ unsigned long ktime_divns(const ktime_t kt, s64 div) | |||
325 | } | 325 | } |
326 | #endif /* BITS_PER_LONG >= 64 */ | 326 | #endif /* BITS_PER_LONG >= 64 */ |
327 | 327 | ||
328 | /* | ||
329 | * Check, whether the timer is on the callback pending list | ||
330 | */ | ||
331 | static inline int hrtimer_cb_pending(const struct hrtimer *timer) | ||
332 | { | ||
333 | return timer->state & HRTIMER_STATE_PENDING; | ||
334 | } | ||
335 | |||
336 | /* | ||
337 | * Remove a timer from the callback pending list | ||
338 | */ | ||
339 | static inline void hrtimer_remove_cb_pending(struct hrtimer *timer) | ||
340 | { | ||
341 | list_del_init(&timer->cb_entry); | ||
342 | } | ||
343 | |||
328 | /* High resolution timer related functions */ | 344 | /* High resolution timer related functions */ |
329 | #ifdef CONFIG_HIGH_RES_TIMERS | 345 | #ifdef CONFIG_HIGH_RES_TIMERS |
330 | 346 | ||
@@ -494,29 +510,12 @@ void hres_timers_resume(void) | |||
494 | } | 510 | } |
495 | 511 | ||
496 | /* | 512 | /* |
497 | * Check, whether the timer is on the callback pending list | ||
498 | */ | ||
499 | static inline int hrtimer_cb_pending(const struct hrtimer *timer) | ||
500 | { | ||
501 | return timer->state & HRTIMER_STATE_PENDING; | ||
502 | } | ||
503 | |||
504 | /* | ||
505 | * Remove a timer from the callback pending list | ||
506 | */ | ||
507 | static inline void hrtimer_remove_cb_pending(struct hrtimer *timer) | ||
508 | { | ||
509 | list_del_init(&timer->cb_entry); | ||
510 | } | ||
511 | |||
512 | /* | ||
513 | * Initialize the high resolution related parts of cpu_base | 513 | * Initialize the high resolution related parts of cpu_base |
514 | */ | 514 | */ |
515 | static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) | 515 | static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) |
516 | { | 516 | { |
517 | base->expires_next.tv64 = KTIME_MAX; | 517 | base->expires_next.tv64 = KTIME_MAX; |
518 | base->hres_active = 0; | 518 | base->hres_active = 0; |
519 | INIT_LIST_HEAD(&base->cb_pending); | ||
520 | } | 519 | } |
521 | 520 | ||
522 | /* | 521 | /* |
@@ -524,7 +523,6 @@ static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) | |||
524 | */ | 523 | */ |
525 | static inline void hrtimer_init_timer_hres(struct hrtimer *timer) | 524 | static inline void hrtimer_init_timer_hres(struct hrtimer *timer) |
526 | { | 525 | { |
527 | INIT_LIST_HEAD(&timer->cb_entry); | ||
528 | } | 526 | } |
529 | 527 | ||
530 | /* | 528 | /* |
@@ -618,10 +616,13 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer, | |||
618 | { | 616 | { |
619 | return 0; | 617 | return 0; |
620 | } | 618 | } |
621 | static inline int hrtimer_cb_pending(struct hrtimer *timer) { return 0; } | ||
622 | static inline void hrtimer_remove_cb_pending(struct hrtimer *timer) { } | ||
623 | static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { } | 619 | static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { } |
624 | static inline void hrtimer_init_timer_hres(struct hrtimer *timer) { } | 620 | static inline void hrtimer_init_timer_hres(struct hrtimer *timer) { } |
621 | static inline int hrtimer_reprogram(struct hrtimer *timer, | ||
622 | struct hrtimer_clock_base *base) | ||
623 | { | ||
624 | return 0; | ||
625 | } | ||
625 | 626 | ||
626 | #endif /* CONFIG_HIGH_RES_TIMERS */ | 627 | #endif /* CONFIG_HIGH_RES_TIMERS */ |
627 | 628 | ||
@@ -1001,6 +1002,7 @@ void hrtimer_init(struct hrtimer *timer, clockid_t clock_id, | |||
1001 | clock_id = CLOCK_MONOTONIC; | 1002 | clock_id = CLOCK_MONOTONIC; |
1002 | 1003 | ||
1003 | timer->base = &cpu_base->clock_base[clock_id]; | 1004 | timer->base = &cpu_base->clock_base[clock_id]; |
1005 | INIT_LIST_HEAD(&timer->cb_entry); | ||
1004 | hrtimer_init_timer_hres(timer); | 1006 | hrtimer_init_timer_hres(timer); |
1005 | 1007 | ||
1006 | #ifdef CONFIG_TIMER_STATS | 1008 | #ifdef CONFIG_TIMER_STATS |
@@ -1030,6 +1032,85 @@ int hrtimer_get_res(const clockid_t which_clock, struct timespec *tp) | |||
1030 | } | 1032 | } |
1031 | EXPORT_SYMBOL_GPL(hrtimer_get_res); | 1033 | EXPORT_SYMBOL_GPL(hrtimer_get_res); |
1032 | 1034 | ||
1035 | static void run_hrtimer_pending(struct hrtimer_cpu_base *cpu_base) | ||
1036 | { | ||
1037 | spin_lock_irq(&cpu_base->lock); | ||
1038 | |||
1039 | while (!list_empty(&cpu_base->cb_pending)) { | ||
1040 | enum hrtimer_restart (*fn)(struct hrtimer *); | ||
1041 | struct hrtimer *timer; | ||
1042 | int restart; | ||
1043 | |||
1044 | timer = list_entry(cpu_base->cb_pending.next, | ||
1045 | struct hrtimer, cb_entry); | ||
1046 | |||
1047 | timer_stats_account_hrtimer(timer); | ||
1048 | |||
1049 | fn = timer->function; | ||
1050 | __remove_hrtimer(timer, timer->base, HRTIMER_STATE_CALLBACK, 0); | ||
1051 | spin_unlock_irq(&cpu_base->lock); | ||
1052 | |||
1053 | restart = fn(timer); | ||
1054 | |||
1055 | spin_lock_irq(&cpu_base->lock); | ||
1056 | |||
1057 | timer->state &= ~HRTIMER_STATE_CALLBACK; | ||
1058 | if (restart == HRTIMER_RESTART) { | ||
1059 | BUG_ON(hrtimer_active(timer)); | ||
1060 | /* | ||
1061 | * Enqueue the timer, allow reprogramming of the event | ||
1062 | * device | ||
1063 | */ | ||
1064 | enqueue_hrtimer(timer, timer->base, 1); | ||
1065 | } else if (hrtimer_active(timer)) { | ||
1066 | /* | ||
1067 | * If the timer was rearmed on another CPU, reprogram | ||
1068 | * the event device. | ||
1069 | */ | ||
1070 | if (timer->base->first == &timer->node) | ||
1071 | hrtimer_reprogram(timer, timer->base); | ||
1072 | } | ||
1073 | } | ||
1074 | spin_unlock_irq(&cpu_base->lock); | ||
1075 | } | ||
1076 | |||
1077 | static void __run_hrtimer(struct hrtimer *timer) | ||
1078 | { | ||
1079 | struct hrtimer_clock_base *base = timer->base; | ||
1080 | struct hrtimer_cpu_base *cpu_base = base->cpu_base; | ||
1081 | enum hrtimer_restart (*fn)(struct hrtimer *); | ||
1082 | int restart; | ||
1083 | |||
1084 | __remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0); | ||
1085 | timer_stats_account_hrtimer(timer); | ||
1086 | |||
1087 | fn = timer->function; | ||
1088 | if (timer->cb_mode == HRTIMER_CB_IRQSAFE_NO_SOFTIRQ) { | ||
1089 | /* | ||
1090 | * Used for scheduler timers, avoid lock inversion with | ||
1091 | * rq->lock and tasklist_lock. | ||
1092 | * | ||
1093 | * These timers are required to deal with enqueue expiry | ||
1094 | * themselves and are not allowed to migrate. | ||
1095 | */ | ||
1096 | spin_unlock(&cpu_base->lock); | ||
1097 | restart = fn(timer); | ||
1098 | spin_lock(&cpu_base->lock); | ||
1099 | } else | ||
1100 | restart = fn(timer); | ||
1101 | |||
1102 | /* | ||
1103 | * Note: We clear the CALLBACK bit after enqueue_hrtimer to avoid | ||
1104 | * reprogramming of the event hardware. This happens at the end of this | ||
1105 | * function anyway. | ||
1106 | */ | ||
1107 | if (restart != HRTIMER_NORESTART) { | ||
1108 | BUG_ON(timer->state != HRTIMER_STATE_CALLBACK); | ||
1109 | enqueue_hrtimer(timer, base, 0); | ||
1110 | } | ||
1111 | timer->state &= ~HRTIMER_STATE_CALLBACK; | ||
1112 | } | ||
1113 | |||
1033 | #ifdef CONFIG_HIGH_RES_TIMERS | 1114 | #ifdef CONFIG_HIGH_RES_TIMERS |
1034 | 1115 | ||
1035 | /* | 1116 | /* |
@@ -1063,9 +1144,7 @@ void hrtimer_interrupt(struct clock_event_device *dev) | |||
1063 | basenow = ktime_add(now, base->offset); | 1144 | basenow = ktime_add(now, base->offset); |
1064 | 1145 | ||
1065 | while ((node = base->first)) { | 1146 | while ((node = base->first)) { |
1066 | enum hrtimer_restart (*fn)(struct hrtimer *); | ||
1067 | struct hrtimer *timer; | 1147 | struct hrtimer *timer; |
1068 | int restart; | ||
1069 | 1148 | ||
1070 | timer = rb_entry(node, struct hrtimer, node); | 1149 | timer = rb_entry(node, struct hrtimer, node); |
1071 | 1150 | ||
@@ -1089,37 +1168,7 @@ void hrtimer_interrupt(struct clock_event_device *dev) | |||
1089 | continue; | 1168 | continue; |
1090 | } | 1169 | } |
1091 | 1170 | ||
1092 | __remove_hrtimer(timer, base, | 1171 | __run_hrtimer(timer); |
1093 | HRTIMER_STATE_CALLBACK, 0); | ||
1094 | timer_stats_account_hrtimer(timer); | ||
1095 | |||
1096 | fn = timer->function; | ||
1097 | if (timer->cb_mode == HRTIMER_CB_IRQSAFE_NO_SOFTIRQ) { | ||
1098 | /* | ||
1099 | * Used for scheduler timers, avoid lock | ||
1100 | * inversion with rq->lock and tasklist_lock. | ||
1101 | * | ||
1102 | * These timers are required to deal with | ||
1103 | * enqueue expiry themselves and are not | ||
1104 | * allowed to migrate. | ||
1105 | */ | ||
1106 | spin_unlock(&cpu_base->lock); | ||
1107 | restart = fn(timer); | ||
1108 | spin_lock(&cpu_base->lock); | ||
1109 | } else | ||
1110 | restart = fn(timer); | ||
1111 | |||
1112 | /* | ||
1113 | * Note: We clear the CALLBACK bit after | ||
1114 | * enqueue_hrtimer to avoid reprogramming of | ||
1115 | * the event hardware. This happens at the end | ||
1116 | * of this function anyway. | ||
1117 | */ | ||
1118 | if (restart != HRTIMER_NORESTART) { | ||
1119 | BUG_ON(timer->state != HRTIMER_STATE_CALLBACK); | ||
1120 | enqueue_hrtimer(timer, base, 0); | ||
1121 | } | ||
1122 | timer->state &= ~HRTIMER_STATE_CALLBACK; | ||
1123 | } | 1172 | } |
1124 | spin_unlock(&cpu_base->lock); | 1173 | spin_unlock(&cpu_base->lock); |
1125 | base++; | 1174 | base++; |
@@ -1140,52 +1189,41 @@ void hrtimer_interrupt(struct clock_event_device *dev) | |||
1140 | 1189 | ||
1141 | static void run_hrtimer_softirq(struct softirq_action *h) | 1190 | static void run_hrtimer_softirq(struct softirq_action *h) |
1142 | { | 1191 | { |
1143 | struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); | 1192 | run_hrtimer_pending(&__get_cpu_var(hrtimer_bases)); |
1144 | 1193 | } | |
1145 | spin_lock_irq(&cpu_base->lock); | ||
1146 | |||
1147 | while (!list_empty(&cpu_base->cb_pending)) { | ||
1148 | enum hrtimer_restart (*fn)(struct hrtimer *); | ||
1149 | struct hrtimer *timer; | ||
1150 | int restart; | ||
1151 | |||
1152 | timer = list_entry(cpu_base->cb_pending.next, | ||
1153 | struct hrtimer, cb_entry); | ||
1154 | 1194 | ||
1155 | timer_stats_account_hrtimer(timer); | 1195 | #endif /* CONFIG_HIGH_RES_TIMERS */ |
1156 | 1196 | ||
1157 | fn = timer->function; | 1197 | /* |
1158 | __remove_hrtimer(timer, timer->base, HRTIMER_STATE_CALLBACK, 0); | 1198 | * Called from timer softirq every jiffy, expire hrtimers: |
1159 | spin_unlock_irq(&cpu_base->lock); | 1199 | * |
1200 | * For HRT its the fall back code to run the softirq in the timer | ||
1201 | * softirq context in case the hrtimer initialization failed or has | ||
1202 | * not been done yet. | ||
1203 | */ | ||
1204 | void hrtimer_run_pending(void) | ||
1205 | { | ||
1206 | struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); | ||
1160 | 1207 | ||
1161 | restart = fn(timer); | 1208 | if (hrtimer_hres_active()) |
1209 | return; | ||
1162 | 1210 | ||
1163 | spin_lock_irq(&cpu_base->lock); | 1211 | /* |
1212 | * This _is_ ugly: We have to check in the softirq context, | ||
1213 | * whether we can switch to highres and / or nohz mode. The | ||
1214 | * clocksource switch happens in the timer interrupt with | ||
1215 | * xtime_lock held. Notification from there only sets the | ||
1216 | * check bit in the tick_oneshot code, otherwise we might | ||
1217 | * deadlock vs. xtime_lock. | ||
1218 | */ | ||
1219 | if (tick_check_oneshot_change(!hrtimer_is_hres_enabled())) | ||
1220 | hrtimer_switch_to_hres(); | ||
1164 | 1221 | ||
1165 | timer->state &= ~HRTIMER_STATE_CALLBACK; | 1222 | run_hrtimer_pending(cpu_base); |
1166 | if (restart == HRTIMER_RESTART) { | ||
1167 | BUG_ON(hrtimer_active(timer)); | ||
1168 | /* | ||
1169 | * Enqueue the timer, allow reprogramming of the event | ||
1170 | * device | ||
1171 | */ | ||
1172 | enqueue_hrtimer(timer, timer->base, 1); | ||
1173 | } else if (hrtimer_active(timer)) { | ||
1174 | /* | ||
1175 | * If the timer was rearmed on another CPU, reprogram | ||
1176 | * the event device. | ||
1177 | */ | ||
1178 | if (timer->base->first == &timer->node) | ||
1179 | hrtimer_reprogram(timer, timer->base); | ||
1180 | } | ||
1181 | } | ||
1182 | spin_unlock_irq(&cpu_base->lock); | ||
1183 | } | 1223 | } |
1184 | 1224 | ||
1185 | #endif /* CONFIG_HIGH_RES_TIMERS */ | ||
1186 | |||
1187 | /* | 1225 | /* |
1188 | * Expire the per base hrtimer-queue: | 1226 | * Called from hardirq context every jiffy |
1189 | */ | 1227 | */ |
1190 | static inline void run_hrtimer_queue(struct hrtimer_cpu_base *cpu_base, | 1228 | static inline void run_hrtimer_queue(struct hrtimer_cpu_base *cpu_base, |
1191 | int index) | 1229 | int index) |
@@ -1199,46 +1237,27 @@ static inline void run_hrtimer_queue(struct hrtimer_cpu_base *cpu_base, | |||
1199 | if (base->get_softirq_time) | 1237 | if (base->get_softirq_time) |
1200 | base->softirq_time = base->get_softirq_time(); | 1238 | base->softirq_time = base->get_softirq_time(); |
1201 | 1239 | ||
1202 | spin_lock_irq(&cpu_base->lock); | 1240 | spin_lock(&cpu_base->lock); |
1203 | 1241 | ||
1204 | while ((node = base->first)) { | 1242 | while ((node = base->first)) { |
1205 | struct hrtimer *timer; | 1243 | struct hrtimer *timer; |
1206 | enum hrtimer_restart (*fn)(struct hrtimer *); | ||
1207 | int restart; | ||
1208 | 1244 | ||
1209 | timer = rb_entry(node, struct hrtimer, node); | 1245 | timer = rb_entry(node, struct hrtimer, node); |
1210 | if (base->softirq_time.tv64 <= timer->expires.tv64) | 1246 | if (base->softirq_time.tv64 <= timer->expires.tv64) |
1211 | break; | 1247 | break; |
1212 | 1248 | ||
1213 | #ifdef CONFIG_HIGH_RES_TIMERS | 1249 | if (timer->cb_mode == HRTIMER_CB_SOFTIRQ) { |
1214 | WARN_ON_ONCE(timer->cb_mode == HRTIMER_CB_IRQSAFE_NO_SOFTIRQ); | 1250 | __remove_hrtimer(timer, base, HRTIMER_STATE_PENDING, 0); |
1215 | #endif | 1251 | list_add_tail(&timer->cb_entry, |
1216 | timer_stats_account_hrtimer(timer); | 1252 | &base->cpu_base->cb_pending); |
1217 | 1253 | continue; | |
1218 | fn = timer->function; | ||
1219 | __remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0); | ||
1220 | spin_unlock_irq(&cpu_base->lock); | ||
1221 | |||
1222 | restart = fn(timer); | ||
1223 | |||
1224 | spin_lock_irq(&cpu_base->lock); | ||
1225 | |||
1226 | timer->state &= ~HRTIMER_STATE_CALLBACK; | ||
1227 | if (restart != HRTIMER_NORESTART) { | ||
1228 | BUG_ON(hrtimer_active(timer)); | ||
1229 | enqueue_hrtimer(timer, base, 0); | ||
1230 | } | 1254 | } |
1255 | |||
1256 | __run_hrtimer(timer); | ||
1231 | } | 1257 | } |
1232 | spin_unlock_irq(&cpu_base->lock); | 1258 | spin_unlock(&cpu_base->lock); |
1233 | } | 1259 | } |
1234 | 1260 | ||
1235 | /* | ||
1236 | * Called from timer softirq every jiffy, expire hrtimers: | ||
1237 | * | ||
1238 | * For HRT its the fall back code to run the softirq in the timer | ||
1239 | * softirq context in case the hrtimer initialization failed or has | ||
1240 | * not been done yet. | ||
1241 | */ | ||
1242 | void hrtimer_run_queues(void) | 1261 | void hrtimer_run_queues(void) |
1243 | { | 1262 | { |
1244 | struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); | 1263 | struct hrtimer_cpu_base *cpu_base = &__get_cpu_var(hrtimer_bases); |
@@ -1247,18 +1266,6 @@ void hrtimer_run_queues(void) | |||
1247 | if (hrtimer_hres_active()) | 1266 | if (hrtimer_hres_active()) |
1248 | return; | 1267 | return; |
1249 | 1268 | ||
1250 | /* | ||
1251 | * This _is_ ugly: We have to check in the softirq context, | ||
1252 | * whether we can switch to highres and / or nohz mode. The | ||
1253 | * clocksource switch happens in the timer interrupt with | ||
1254 | * xtime_lock held. Notification from there only sets the | ||
1255 | * check bit in the tick_oneshot code, otherwise we might | ||
1256 | * deadlock vs. xtime_lock. | ||
1257 | */ | ||
1258 | if (tick_check_oneshot_change(!hrtimer_is_hres_enabled())) | ||
1259 | if (hrtimer_switch_to_hres()) | ||
1260 | return; | ||
1261 | |||
1262 | hrtimer_get_softirq_time(cpu_base); | 1269 | hrtimer_get_softirq_time(cpu_base); |
1263 | 1270 | ||
1264 | for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) | 1271 | for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) |
@@ -1407,6 +1414,7 @@ static void __cpuinit init_hrtimers_cpu(int cpu) | |||
1407 | for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) | 1414 | for (i = 0; i < HRTIMER_MAX_CLOCK_BASES; i++) |
1408 | cpu_base->clock_base[i].cpu_base = cpu_base; | 1415 | cpu_base->clock_base[i].cpu_base = cpu_base; |
1409 | 1416 | ||
1417 | INIT_LIST_HEAD(&cpu_base->cb_pending); | ||
1410 | hrtimer_init_hres(cpu_base); | 1418 | hrtimer_init_hres(cpu_base); |
1411 | } | 1419 | } |
1412 | 1420 | ||