summaryrefslogtreecommitdiffstats
path: root/kernel/time/hrtimer.c
diff options
context:
space:
mode:
authorPeter Zijlstra <peterz@infradead.org>2015-06-11 08:46:48 -0400
committerThomas Gleixner <tglx@linutronix.de>2015-06-18 18:09:56 -0400
commit887d9dc989eb0154492e41e7c07492edbb088ba1 (patch)
tree53f3c08252d60c9ceb32fa488c1e0ab77a95b9ef /kernel/time/hrtimer.c
parentc4bfa3f5f906aee2e084c5b1fb15caf876338ef8 (diff)
hrtimer: Allow hrtimer::function() to free the timer
Currently an hrtimer callback function cannot free its own timer because __run_hrtimer() still needs to clear HRTIMER_STATE_CALLBACK after it. Freeing the timer would result in a clear use-after-free. Solve this by using a scheme similar to regular timers; track the current running timer in hrtimer_clock_base::running. Suggested-by: Thomas Gleixner <tglx@linutronix.de> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Cc: ktkhai@parallels.com Cc: rostedt@goodmis.org Cc: juri.lelli@gmail.com Cc: pang.xunlei@linaro.org Cc: wanpeng.li@linux.intel.com Cc: Al Viro <viro@ZenIV.linux.org.uk> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Paul McKenney <paulmck@linux.vnet.ibm.com> Cc: Oleg Nesterov <oleg@redhat.com> Cc: umgwanakikbuti@gmail.com Link: http://lkml.kernel.org/r/20150611124743.471563047@infradead.org Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'kernel/time/hrtimer.c')
-rw-r--r--kernel/time/hrtimer.c114
1 files changed, 91 insertions, 23 deletions
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 1604157374d7..f026413de4d6 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -67,6 +67,7 @@
67DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) = 67DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
68{ 68{
69 .lock = __RAW_SPIN_LOCK_UNLOCKED(hrtimer_bases.lock), 69 .lock = __RAW_SPIN_LOCK_UNLOCKED(hrtimer_bases.lock),
70 .seq = SEQCNT_ZERO(hrtimer_bases.seq),
70 .clock_base = 71 .clock_base =
71 { 72 {
72 { 73 {
@@ -111,6 +112,18 @@ static inline int hrtimer_clockid_to_base(clockid_t clock_id)
111#ifdef CONFIG_SMP 112#ifdef CONFIG_SMP
112 113
113/* 114/*
115 * We require the migration_base for lock_hrtimer_base()/switch_hrtimer_base()
116 * such that hrtimer_callback_running() can unconditionally dereference
117 * timer->base->cpu_base
118 */
119static struct hrtimer_cpu_base migration_cpu_base = {
120 .seq = SEQCNT_ZERO(migration_cpu_base),
121 .clock_base = { { .cpu_base = &migration_cpu_base, }, },
122};
123
124#define migration_base migration_cpu_base.clock_base[0]
125
126/*
114 * We are using hashed locking: holding per_cpu(hrtimer_bases)[n].lock 127 * We are using hashed locking: holding per_cpu(hrtimer_bases)[n].lock
115 * means that all timers which are tied to this base via timer->base are 128 * means that all timers which are tied to this base via timer->base are
116 * locked, and the base itself is locked too. 129 * locked, and the base itself is locked too.
@@ -119,8 +132,8 @@ static inline int hrtimer_clockid_to_base(clockid_t clock_id)
119 * be found on the lists/queues. 132 * be found on the lists/queues.
120 * 133 *
121 * When the timer's base is locked, and the timer removed from list, it is 134 * When the timer's base is locked, and the timer removed from list, it is
122 * possible to set timer->base = NULL and drop the lock: the timer remains 135 * possible to set timer->base = &migration_base and drop the lock: the timer
123 * locked. 136 * remains locked.
124 */ 137 */
125static 138static
126struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer, 139struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer,
@@ -130,7 +143,7 @@ struct hrtimer_clock_base *lock_hrtimer_base(const struct hrtimer *timer,
130 143
131 for (;;) { 144 for (;;) {
132 base = timer->base; 145 base = timer->base;
133 if (likely(base != NULL)) { 146 if (likely(base != &migration_base)) {
134 raw_spin_lock_irqsave(&base->cpu_base->lock, *flags); 147 raw_spin_lock_irqsave(&base->cpu_base->lock, *flags);
135 if (likely(base == timer->base)) 148 if (likely(base == timer->base))
136 return base; 149 return base;
@@ -194,8 +207,8 @@ again:
194 if (unlikely(hrtimer_callback_running(timer))) 207 if (unlikely(hrtimer_callback_running(timer)))
195 return base; 208 return base;
196 209
197 /* See the comment in lock_timer_base() */ 210 /* See the comment in lock_hrtimer_base() */
198 timer->base = NULL; 211 timer->base = &migration_base;
199 raw_spin_unlock(&base->cpu_base->lock); 212 raw_spin_unlock(&base->cpu_base->lock);
200 raw_spin_lock(&new_base->cpu_base->lock); 213 raw_spin_lock(&new_base->cpu_base->lock);
201 214
@@ -838,11 +851,7 @@ static int enqueue_hrtimer(struct hrtimer *timer,
838 851
839 base->cpu_base->active_bases |= 1 << base->index; 852 base->cpu_base->active_bases |= 1 << base->index;
840 853
841 /* 854 timer->state = HRTIMER_STATE_ENQUEUED;
842 * HRTIMER_STATE_ENQUEUED is or'ed to the current state to preserve the
843 * state of a possibly running callback.
844 */
845 timer->state |= HRTIMER_STATE_ENQUEUED;
846 855
847 return timerqueue_add(&base->active, &timer->node); 856 return timerqueue_add(&base->active, &timer->node);
848} 857}
@@ -907,14 +916,9 @@ remove_hrtimer(struct hrtimer *timer, struct hrtimer_clock_base *base, bool rest
907 timer_stats_hrtimer_clear_start_info(timer); 916 timer_stats_hrtimer_clear_start_info(timer);
908 reprogram = base->cpu_base == this_cpu_ptr(&hrtimer_bases); 917 reprogram = base->cpu_base == this_cpu_ptr(&hrtimer_bases);
909 918
910 if (!restart) { 919 if (!restart)
911 /* 920 state = HRTIMER_STATE_INACTIVE;
912 * We must preserve the CALLBACK state flag here, 921
913 * otherwise we could move the timer base in
914 * switch_hrtimer_base.
915 */
916 state &= HRTIMER_STATE_CALLBACK;
917 }
918 __remove_hrtimer(timer, base, state, reprogram); 922 __remove_hrtimer(timer, base, state, reprogram);
919 return 1; 923 return 1;
920 } 924 }
@@ -1115,6 +1119,51 @@ void hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
1115} 1119}
1116EXPORT_SYMBOL_GPL(hrtimer_init); 1120EXPORT_SYMBOL_GPL(hrtimer_init);
1117 1121
1122/*
1123 * A timer is active, when it is enqueued into the rbtree or the
1124 * callback function is running or it's in the state of being migrated
1125 * to another cpu.
1126 *
1127 * It is important for this function to not return a false negative.
1128 */
1129bool hrtimer_active(const struct hrtimer *timer)
1130{
1131 struct hrtimer_cpu_base *cpu_base;
1132 unsigned int seq;
1133
1134 do {
1135 cpu_base = READ_ONCE(timer->base->cpu_base);
1136 seq = raw_read_seqcount_begin(&cpu_base->seq);
1137
1138 if (timer->state != HRTIMER_STATE_INACTIVE ||
1139 cpu_base->running == timer)
1140 return true;
1141
1142 } while (read_seqcount_retry(&cpu_base->seq, seq) ||
1143 cpu_base != READ_ONCE(timer->base->cpu_base));
1144
1145 return false;
1146}
1147EXPORT_SYMBOL_GPL(hrtimer_active);
1148
1149/*
1150 * The write_seqcount_barrier()s in __run_hrtimer() split the thing into 3
1151 * distinct sections:
1152 *
1153 * - queued: the timer is queued
1154 * - callback: the timer is being ran
1155 * - post: the timer is inactive or (re)queued
1156 *
1157 * On the read side we ensure we observe timer->state and cpu_base->running
1158 * from the same section, if anything changed while we looked at it, we retry.
1159 * This includes timer->base changing because sequence numbers alone are
1160 * insufficient for that.
1161 *
1162 * The sequence numbers are required because otherwise we could still observe
1163 * a false negative if the read side got smeared over multiple consequtive
1164 * __run_hrtimer() invocations.
1165 */
1166
1118static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base, 1167static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base,
1119 struct hrtimer_clock_base *base, 1168 struct hrtimer_clock_base *base,
1120 struct hrtimer *timer, ktime_t *now) 1169 struct hrtimer *timer, ktime_t *now)
@@ -1122,10 +1171,21 @@ static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base,
1122 enum hrtimer_restart (*fn)(struct hrtimer *); 1171 enum hrtimer_restart (*fn)(struct hrtimer *);
1123 int restart; 1172 int restart;
1124 1173
1125 WARN_ON(!irqs_disabled()); 1174 lockdep_assert_held(&cpu_base->lock);
1126 1175
1127 debug_deactivate(timer); 1176 debug_deactivate(timer);
1128 __remove_hrtimer(timer, base, HRTIMER_STATE_CALLBACK, 0); 1177 cpu_base->running = timer;
1178
1179 /*
1180 * Separate the ->running assignment from the ->state assignment.
1181 *
1182 * As with a regular write barrier, this ensures the read side in
1183 * hrtimer_active() cannot observe cpu_base->running == NULL &&
1184 * timer->state == INACTIVE.
1185 */
1186 raw_write_seqcount_barrier(&cpu_base->seq);
1187
1188 __remove_hrtimer(timer, base, HRTIMER_STATE_INACTIVE, 0);
1129 timer_stats_account_hrtimer(timer); 1189 timer_stats_account_hrtimer(timer);
1130 fn = timer->function; 1190 fn = timer->function;
1131 1191
@@ -1141,7 +1201,7 @@ static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base,
1141 raw_spin_lock(&cpu_base->lock); 1201 raw_spin_lock(&cpu_base->lock);
1142 1202
1143 /* 1203 /*
1144 * Note: We clear the CALLBACK bit after enqueue_hrtimer and 1204 * Note: We clear the running state after enqueue_hrtimer and
1145 * we do not reprogramm the event hardware. Happens either in 1205 * we do not reprogramm the event hardware. Happens either in
1146 * hrtimer_start_range_ns() or in hrtimer_interrupt() 1206 * hrtimer_start_range_ns() or in hrtimer_interrupt()
1147 * 1207 *
@@ -1153,9 +1213,17 @@ static void __run_hrtimer(struct hrtimer_cpu_base *cpu_base,
1153 !(timer->state & HRTIMER_STATE_ENQUEUED)) 1213 !(timer->state & HRTIMER_STATE_ENQUEUED))
1154 enqueue_hrtimer(timer, base); 1214 enqueue_hrtimer(timer, base);
1155 1215
1156 WARN_ON_ONCE(!(timer->state & HRTIMER_STATE_CALLBACK)); 1216 /*
1217 * Separate the ->running assignment from the ->state assignment.
1218 *
1219 * As with a regular write barrier, this ensures the read side in
1220 * hrtimer_active() cannot observe cpu_base->running == NULL &&
1221 * timer->state == INACTIVE.
1222 */
1223 raw_write_seqcount_barrier(&cpu_base->seq);
1157 1224
1158 timer->state &= ~HRTIMER_STATE_CALLBACK; 1225 WARN_ON_ONCE(cpu_base->running != timer);
1226 cpu_base->running = NULL;
1159} 1227}
1160 1228
1161static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now) 1229static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now)