diff options
author | Peter Zijlstra <peterz@infradead.org> | 2015-06-11 08:46:48 -0400 |
---|---|---|
committer | Thomas Gleixner <tglx@linutronix.de> | 2015-06-18 18:09:56 -0400 |
commit | 887d9dc989eb0154492e41e7c07492edbb088ba1 (patch) | |
tree | 53f3c08252d60c9ceb32fa488c1e0ab77a95b9ef /kernel/time/hrtimer.c | |
parent | c4bfa3f5f906aee2e084c5b1fb15caf876338ef8 (diff) |
hrtimer: Allow hrtimer::function() to free the timer
Currently an hrtimer callback function cannot free its own timer
because __run_hrtimer() still needs to clear HRTIMER_STATE_CALLBACK
after it. Freeing the timer would result in a clear use-after-free.
Solve this by using a scheme similar to regular timers; track the
current running timer in hrtimer_clock_base::running.
Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: ktkhai@parallels.com
Cc: rostedt@goodmis.org
Cc: juri.lelli@gmail.com
Cc: pang.xunlei@linaro.org
Cc: wanpeng.li@linux.intel.com
Cc: Al Viro <viro@ZenIV.linux.org.uk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul McKenney <paulmck@linux.vnet.ibm.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: umgwanakikbuti@gmail.com
Link: http://lkml.kernel.org/r/20150611124743.471563047@infradead.org
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'kernel/time/hrtimer.c')
-rw-r--r-- | kernel/time/hrtimer.c | 114 |
1 files changed, 91 insertions, 23 deletions
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 1604157374d7..f026413de4d6 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c | |||
@@ -67,6 +67,7 @@ | |||
67 | DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) = | 67 | DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) = |
68 | { | 68 | { |
69 | .lock = __RAW_SPIN_LOCK_UNLOCKED(hrtimer_bases.lock), | 69 | .lock = __RAW_SPIN_LOCK_UNLOCKED(hrtimer_bases.lock), |
70 | .seq = SEQCNT_ZERO(hrtimer_bases.seq), | ||
70 | .clock_base = | 71 | .clock_base = |
71 | { | 72 | { |
72 | { | 73 | { |
@@ -111,6 +112,18 @@ static inline int hrtimer_clockid_to_base(clockid_t clock_id) | |||
111 | #ifdef CONFIG_SMP | 112 | #ifdef CONFIG_SMP |
112 | 113 | ||
113 | /* | 114 | /* |
115 | * We require the migration_base for lock_hrtimer_base()/switch_hrtimer_base() | ||
116 | * such that hrtimer_callback_running() can unconditionally dereference | ||
117 | * timer->base->cpu_base | ||
118 | */ | ||
119 | static struct hrtimer_cpu_base migration_cpu_base = { | ||
120 | .seq = SEQCNT_ZERO(migration_cpu_base), | ||
121 | .clock_base = { { .cpu_base = &migration_cpu_base, }, }, | ||
122 | }; | ||
123 | |||
124 | #define migration_base migration_cpu_base.clock_base[0] | ||
125 | |||
126 | /* | ||
114 | * We are using hashed locking: holding per_cpu(hrtimer_bases)[n].lock | 127 | * We are using hashed locking: holding per_cpu(hrtimer_bases)[n].lock |
115 | * means that all timers which are tied to this base via timer->base are | 128 | * means that all timers which are tied to this base via timer->base are |
116 | * locked, and the base itself is locked too. | 129 | * locked, and the base itself is locked too. |
@@ -119,8 +132,8 @@ static inline int hrtimer_clockid_to_base(clockid_t clock_id) | |||
119 | * be found on the lists/queues. | 132 | * be found on the lists/queues. |
120 | * | 133 | * |
121 | * When the timer's base is locked, and the timer removed from list, it is | 134 | * When the timer's base is locked, and the timer removed from list, it is |
122 | * possible to set timer->base = NULL and drop the lock: the timer remains | 135 | * possible to set timer->base = &migration_base and drop the lock: the timer |
123 | * locked. | 136 | * remains locked. |
124 | */ | 137 | */ |
125 | static | 138 | static |
126 | struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer, | 139 | struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer, |
@@ -130,7 +143,7 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer, | |||
130 | 143 | ||
131 | for (;;) { | 144 | for (;;) { |
132 | base = timer->base; | 145 | base = timer->base; |
133 | if (likely(base != NULL)) { | 146 | if (likely(base != &migration_base)) { |
134 | raw_spin_lock_irqsave(&base->cpu_base->lock, *flags); | 147 | raw_spin_lock_irqsave(&base->cpu_base->lock, *flags); |
135 | if (likely(base == timer->base)) | 148 | if (likely(base == timer->base)) |
136 | return base; | 149 | return base; |
@@ -194,8 +207,8 @@ again: | |||
194 | if (unlikely(hrtimer_callback_running(timer))) | 207 | if (unlikely(hrtimer_callback_running(timer))) |
195 | return base; | 208 | return base; |
196 | 209 | ||
197 | /* See the comment in lock_timer_base() */ | 210 | /* See the comment in lock_hrtimer_base() */ |
198 | timer->base = NULL; | 211 | timer->base = &migration_base; |
199 | raw_spin_unlock(&base->cpu_base->lock); | 212 | raw_spin_unlock(&base->cpu_base->lock); |
200 | raw_spin_lock(&new_base->cpu_base->lock); | 213 | raw_spin_lock(&new_base->cpu_base->lock); |
201 | 214 | ||
@@ -838,11 +851,7 @@ static int enqueue_hrtimer(struct hrtimer *timer, | |||
838 | 851 | ||
839 | base->cpu_base->active_bases |= 1 << base->index; | 852 | base->cpu_base->active_bases |= 1 << base->index; |
840 | 853 | ||
841 | /* | 854 | timer->state = HRTIMER_STATE_ENQUEUED; |
842 | * HRTIMER_STATE_ENQUEUED is or'ed to the current state to preserve the | ||
843 | * state of a possibly running callback. | ||
844 | */ | ||
845 | timer->state |= HRTIMER_STATE_ENQUEUED; | ||
846 | 855 | ||
847 | return timerqueue_add(&base->active, &timer->node); | 856 | return timerqueue_add(&base->active, &timer->node); |
848 | } | 857 | } |
@@ -907,14 +916,9 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, bool rest | |||
907 | timer_stats_hrtimer_clear_start_info(timer); | 916 | timer_stats_hrtimer_clear_start_info(timer); |
908 | reprogram = base->cpu_base == this_cpu_ptr(&hrtimer_bases); | 917 | reprogram = base->cpu_base == this_cpu_ptr(&hrtimer_bases); |
909 | 918 | ||
910 | if (!restart) { | 919 | if (!restart) |
911 | /* | 920 | state = HRTIMER_STATE_INACTIVE; |
912 | * We must preserve the CALLBACK state flag here, | 921 | |
913 | * otherwise we could move the timer base in | ||
914 | * switch_hrtimer_base. | ||
915 | */ | ||
916 | state &= HRTIMER_STATE_CALLBACK; | ||
917 | } | ||
918 | __remove_hrtimer(timer, base, state, reprogram); | 922 | __remove_hrtimer(timer, base, state, reprogram); |
919 | return 1; | 923 | return 1; |
920 | } | 924 | } |
@@ -1115,6 +1119,51 @@ void hrtimer_init(struct hrtimer *timer, clockid_t clock_id, | |||
1115 | } | 1119 | } |
1116 | EXPORT_SYMBOL_GPL(hrtimer_init); | 1120 | EXPORT_SYMBOL_GPL(hrtimer_init); |
1117 | 1121 | ||
1122 | /* | ||
1123 | * A timer is active, when it is enqueued into the rbtree or the | ||
1124 | * callback function is running or it's in the state of being migrated | ||
1125 | * to another cpu. | ||
1126 | * | ||
1127 | * It is important for this function to not return a false negative. | ||
1128 | */ | ||
1129 | bool hrtimer_active(const struct hrtimer *timer) | ||
1130 | { | ||
1131 | struct hrtimer_cpu_base *cpu_base; | ||
1132 | unsigned int seq; | ||
1133 | |||
1134 | do { | ||
1135 | cpu_base = READ_ONCE(timer->base->cpu_base); | ||
1136 | seq = raw_read_seqcount_begin(&cpu_base->seq); | ||
1137 | |||
1138 | if (timer->state != HRTIMER_STATE_INACTIVE || | ||
1139 | cpu_base->running == timer) | ||
1140 | return true; | ||
1141 | |||
1142 | } while (read_seqcount_retry(&cpu_base->seq, seq) || | ||
1143 | cpu_base != READ_ONCE(timer->base->cpu_base)); | ||
1144 | |||
1145 | return false; | ||
1146 | } | ||
1147 | EXPORT_SYMBOL_GPL(hrtimer_active); | ||
1148 | |||
1149 | /* | ||
1150 | * The write_seqcount_barrier()s in __run_hrtimer() split the thing into 3 | ||
1151 | * distinct sections: | ||
1152 | * | ||
1153 | * - queued: the timer is queued | ||
1154 | * - callback: the timer is being ran | ||
1155 | * - post: the timer is inactive or (re)queued | ||
1156 | * | ||
1157 | * On the read side we ensure we observe timer->state and cpu_base->running | ||
1158 | * from the same section, if anything changed while we looked at it, we retry. | ||
1159 | * This includes timer->base changing because sequence numbers alone are | ||
1160 | * insufficient for that. | ||
1161 | * | ||
1162 | * The sequence numbers are required because otherwise we could still observe | ||
1163 | * a false negative if the read side got smeared over multiple consequtive | ||
1164 | * __run_hrtimer() invocations. | ||
1165 | */ | ||
1166 | |||
1118 | static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base, | 1167 | static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base, |
1119 | struct hrtimer_clock_base *base, | 1168 | struct hrtimer_clock_base *base, |
1120 | struct hrtimer *timer, ktime_t *now) | 1169 | struct hrtimer *timer, ktime_t *now) |
@@ -1122,10 +1171,21 @@ static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base, | |||
1122 | enum hrtimer_restart (*fn)(struct hrtimer *); | 1171 | enum hrtimer_restart (*fn)(struct hrtimer *); |
1123 | int restart; | 1172 | int restart; |
1124 | 1173 | ||
1125 | WARN_ON(!irqs_disabled()); | 1174 | lockdep_assert_held(&cpu_base->lock); |
1126 | 1175 | ||
1127 | debug_deactivate(timer); | 1176 | debug_deactivate(timer); |
1128 | __remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0); | 1177 | cpu_base->running = timer; |
1178 | |||
1179 | /* | ||
1180 | * Separate the ->running assignment from the ->state assignment. | ||
1181 | * | ||
1182 | * As with a regular write barrier, this ensures the read side in | ||
1183 | * hrtimer_active() cannot observe cpu_base->running == NULL && | ||
1184 | * timer->state == INACTIVE. | ||
1185 | */ | ||
1186 | raw_write_seqcount_barrier(&cpu_base->seq); | ||
1187 | |||
1188 | __remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, 0); | ||
1129 | timer_stats_account_hrtimer(timer); | 1189 | timer_stats_account_hrtimer(timer); |
1130 | fn = timer->function; | 1190 | fn = timer->function; |
1131 | 1191 | ||
@@ -1141,7 +1201,7 @@ static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base, | |||
1141 | raw_spin_lock(&cpu_base->lock); | 1201 | raw_spin_lock(&cpu_base->lock); |
1142 | 1202 | ||
1143 | /* | 1203 | /* |
1144 | * Note: We clear the CALLBACK bit after enqueue_hrtimer and | 1204 | * Note: We clear the running state after enqueue_hrtimer and |
1145 | * we do not reprogramm the event hardware. Happens either in | 1205 | * we do not reprogramm the event hardware. Happens either in |
1146 | * hrtimer_start_range_ns() or in hrtimer_interrupt() | 1206 | * hrtimer_start_range_ns() or in hrtimer_interrupt() |
1147 | * | 1207 | * |
@@ -1153,9 +1213,17 @@ static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base, | |||
1153 | !(timer->state & HRTIMER_STATE_ENQUEUED)) | 1213 | !(timer->state & HRTIMER_STATE_ENQUEUED)) |
1154 | enqueue_hrtimer(timer, base); | 1214 | enqueue_hrtimer(timer, base); |
1155 | 1215 | ||
1156 | WARN_ON_ONCE(!(timer->state & HRTIMER_STATE_CALLBACK)); | 1216 | /* |
1217 | * Separate the ->running assignment from the ->state assignment. | ||
1218 | * | ||
1219 | * As with a regular write barrier, this ensures the read side in | ||
1220 | * hrtimer_active() cannot observe cpu_base->running == NULL && | ||
1221 | * timer->state == INACTIVE. | ||
1222 | */ | ||
1223 | raw_write_seqcount_barrier(&cpu_base->seq); | ||
1157 | 1224 | ||
1158 | timer->state &= ~HRTIMER_STATE_CALLBACK; | 1225 | WARN_ON_ONCE(cpu_base->running != timer); |
1226 | cpu_base->running = NULL; | ||
1159 | } | 1227 | } |
1160 | 1228 | ||
1161 | static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now) | 1229 | static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now) |