aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/time/hrtimer.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/time/hrtimer.c')
-rw-r--r--kernel/time/hrtimer.c235
1 files changed, 212 insertions, 23 deletions
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 5ee77f1a8a92..0d4dc241c0fb 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -140,6 +140,11 @@ static struct hrtimer_cpu_base migration_cpu_base = {
140 140
141#define migration_base migration_cpu_base.clock_base[0] 141#define migration_base migration_cpu_base.clock_base[0]
142 142
143static inline bool is_migration_base(struct hrtimer_clock_base *base)
144{
145 return base == &migration_base;
146}
147
143/* 148/*
144 * We are using hashed locking: holding per_cpu(hrtimer_bases)[n].lock 149 * We are using hashed locking: holding per_cpu(hrtimer_bases)[n].lock
145 * means that all timers which are tied to this base via timer->base are 150 * means that all timers which are tied to this base via timer->base are
@@ -264,6 +269,11 @@ again:
264 269
265#else /* CONFIG_SMP */ 270#else /* CONFIG_SMP */
266 271
272static inline bool is_migration_base(struct hrtimer_clock_base *base)
273{
274 return false;
275}
276
267static inline struct hrtimer_clock_base * 277static inline struct hrtimer_clock_base *
268lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) 278lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags)
269{ 279{
@@ -427,6 +437,17 @@ void hrtimer_init_on_stack(struct hrtimer *timer, clockid_t clock_id,
427} 437}
428EXPORT_SYMBOL_GPL(hrtimer_init_on_stack); 438EXPORT_SYMBOL_GPL(hrtimer_init_on_stack);
429 439
440static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl,
441 clockid_t clock_id, enum hrtimer_mode mode);
442
443void hrtimer_init_sleeper_on_stack(struct hrtimer_sleeper *sl,
444 clockid_t clock_id, enum hrtimer_mode mode)
445{
446 debug_object_init_on_stack(&sl->timer, &hrtimer_debug_descr);
447 __hrtimer_init_sleeper(sl, clock_id, mode);
448}
449EXPORT_SYMBOL_GPL(hrtimer_init_sleeper_on_stack);
450
430void destroy_hrtimer_on_stack(struct hrtimer *timer) 451void destroy_hrtimer_on_stack(struct hrtimer *timer)
431{ 452{
432 debug_object_free(timer, &hrtimer_debug_descr); 453 debug_object_free(timer, &hrtimer_debug_descr);
@@ -1096,9 +1117,13 @@ void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim,
1096 1117
1097 /* 1118 /*
1098 * Check whether the HRTIMER_MODE_SOFT bit and hrtimer.is_soft 1119 * Check whether the HRTIMER_MODE_SOFT bit and hrtimer.is_soft
1099 * match. 1120 * match on CONFIG_PREEMPT_RT = n. With PREEMPT_RT check the hard
1121 * expiry mode because unmarked timers are moved to softirq expiry.
1100 */ 1122 */
1101 WARN_ON_ONCE(!(mode & HRTIMER_MODE_SOFT) ^ !timer->is_soft); 1123 if (!IS_ENABLED(CONFIG_PREEMPT_RT))
1124 WARN_ON_ONCE(!(mode & HRTIMER_MODE_SOFT) ^ !timer->is_soft);
1125 else
1126 WARN_ON_ONCE(!(mode & HRTIMER_MODE_HARD) ^ !timer->is_hard);
1102 1127
1103 base = lock_hrtimer_base(timer, &flags); 1128 base = lock_hrtimer_base(timer, &flags);
1104 1129
@@ -1147,6 +1172,93 @@ int hrtimer_try_to_cancel(struct hrtimer *timer)
1147} 1172}
1148EXPORT_SYMBOL_GPL(hrtimer_try_to_cancel); 1173EXPORT_SYMBOL_GPL(hrtimer_try_to_cancel);
1149 1174
1175#ifdef CONFIG_PREEMPT_RT
1176static void hrtimer_cpu_base_init_expiry_lock(struct hrtimer_cpu_base *base)
1177{
1178 spin_lock_init(&base->softirq_expiry_lock);
1179}
1180
1181static void hrtimer_cpu_base_lock_expiry(struct hrtimer_cpu_base *base)
1182{
1183 spin_lock(&base->softirq_expiry_lock);
1184}
1185
1186static void hrtimer_cpu_base_unlock_expiry(struct hrtimer_cpu_base *base)
1187{
1188 spin_unlock(&base->softirq_expiry_lock);
1189}
1190
1191/*
1192 * The counterpart to hrtimer_cancel_wait_running().
1193 *
1194 * If there is a waiter for cpu_base->expiry_lock, then it was waiting for
1195 * the timer callback to finish. Drop expiry_lock and reaquire it. That
1196 * allows the waiter to acquire the lock and make progress.
1197 */
1198static void hrtimer_sync_wait_running(struct hrtimer_cpu_base *cpu_base,
1199 unsigned long flags)
1200{
1201 if (atomic_read(&cpu_base->timer_waiters)) {
1202 raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
1203 spin_unlock(&cpu_base->softirq_expiry_lock);
1204 spin_lock(&cpu_base->softirq_expiry_lock);
1205 raw_spin_lock_irq(&cpu_base->lock);
1206 }
1207}
1208
1209/*
1210 * This function is called on PREEMPT_RT kernels when the fast path
1211 * deletion of a timer failed because the timer callback function was
1212 * running.
1213 *
1214 * This prevents priority inversion: if the soft irq thread is preempted
1215 * in the middle of a timer callback, then calling del_timer_sync() can
1216 * lead to two issues:
1217 *
1218 * - If the caller is on a remote CPU then it has to spin wait for the timer
1219 * handler to complete. This can result in unbound priority inversion.
1220 *
1221 * - If the caller originates from the task which preempted the timer
1222 * handler on the same CPU, then spin waiting for the timer handler to
1223 * complete is never going to end.
1224 */
1225void hrtimer_cancel_wait_running(const struct hrtimer *timer)
1226{
1227 /* Lockless read. Prevent the compiler from reloading it below */
1228 struct hrtimer_clock_base *base = READ_ONCE(timer->base);
1229
1230 /*
1231 * Just relax if the timer expires in hard interrupt context or if
1232 * it is currently on the migration base.
1233 */
1234 if (!timer->is_soft || is_migration_base(base)) {
1235 cpu_relax();
1236 return;
1237 }
1238
1239 /*
1240 * Mark the base as contended and grab the expiry lock, which is
1241 * held by the softirq across the timer callback. Drop the lock
1242 * immediately so the softirq can expire the next timer. In theory
1243 * the timer could already be running again, but that's more than
1244 * unlikely and just causes another wait loop.
1245 */
1246 atomic_inc(&base->cpu_base->timer_waiters);
1247 spin_lock_bh(&base->cpu_base->softirq_expiry_lock);
1248 atomic_dec(&base->cpu_base->timer_waiters);
1249 spin_unlock_bh(&base->cpu_base->softirq_expiry_lock);
1250}
1251#else
1252static inline void
1253hrtimer_cpu_base_init_expiry_lock(struct hrtimer_cpu_base *base) { }
1254static inline void
1255hrtimer_cpu_base_lock_expiry(struct hrtimer_cpu_base *base) { }
1256static inline void
1257hrtimer_cpu_base_unlock_expiry(struct hrtimer_cpu_base *base) { }
1258static inline void hrtimer_sync_wait_running(struct hrtimer_cpu_base *base,
1259 unsigned long flags) { }
1260#endif
1261
1150/** 1262/**
1151 * hrtimer_cancel - cancel a timer and wait for the handler to finish. 1263 * hrtimer_cancel - cancel a timer and wait for the handler to finish.
1152 * @timer: the timer to be cancelled 1264 * @timer: the timer to be cancelled
@@ -1157,13 +1269,15 @@ EXPORT_SYMBOL_GPL(hrtimer_try_to_cancel);
1157 */ 1269 */
1158int hrtimer_cancel(struct hrtimer *timer) 1270int hrtimer_cancel(struct hrtimer *timer)
1159{ 1271{
1160 for (;;) { 1272 int ret;
1161 int ret = hrtimer_try_to_cancel(timer);
1162 1273
1163 if (ret >= 0) 1274 do {
1164 return ret; 1275 ret = hrtimer_try_to_cancel(timer);
1165 cpu_relax(); 1276
1166 } 1277 if (ret < 0)
1278 hrtimer_cancel_wait_running(timer);
1279 } while (ret < 0);
1280 return ret;
1167} 1281}
1168EXPORT_SYMBOL_GPL(hrtimer_cancel); 1282EXPORT_SYMBOL_GPL(hrtimer_cancel);
1169 1283
@@ -1260,8 +1374,17 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
1260 enum hrtimer_mode mode) 1374 enum hrtimer_mode mode)
1261{ 1375{
1262 bool softtimer = !!(mode & HRTIMER_MODE_SOFT); 1376 bool softtimer = !!(mode & HRTIMER_MODE_SOFT);
1263 int base = softtimer ? HRTIMER_MAX_CLOCK_BASES / 2 : 0;
1264 struct hrtimer_cpu_base *cpu_base; 1377 struct hrtimer_cpu_base *cpu_base;
1378 int base;
1379
1380 /*
1381 * On PREEMPT_RT enabled kernels hrtimers which are not explicitely
1382 * marked for hard interrupt expiry mode are moved into soft
1383 * interrupt context for latency reasons and because the callbacks
1384 * can invoke functions which might sleep on RT, e.g. spin_lock().
1385 */
1386 if (IS_ENABLED(CONFIG_PREEMPT_RT) && !(mode & HRTIMER_MODE_HARD))
1387 softtimer = true;
1265 1388
1266 memset(timer, 0, sizeof(struct hrtimer)); 1389 memset(timer, 0, sizeof(struct hrtimer));
1267 1390
@@ -1275,8 +1398,10 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id,
1275 if (clock_id == CLOCK_REALTIME && mode & HRTIMER_MODE_REL) 1398 if (clock_id == CLOCK_REALTIME && mode & HRTIMER_MODE_REL)
1276 clock_id = CLOCK_MONOTONIC; 1399 clock_id = CLOCK_MONOTONIC;
1277 1400
1401 base = softtimer ? HRTIMER_MAX_CLOCK_BASES / 2 : 0;
1278 base += hrtimer_clockid_to_base(clock_id); 1402 base += hrtimer_clockid_to_base(clock_id);
1279 timer->is_soft = softtimer; 1403 timer->is_soft = softtimer;
1404 timer->is_hard = !softtimer;
1280 timer->base = &cpu_base->clock_base[base]; 1405 timer->base = &cpu_base->clock_base[base];
1281 timerqueue_init(&timer->node); 1406 timerqueue_init(&timer->node);
1282} 1407}
@@ -1449,6 +1574,8 @@ static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now,
1449 break; 1574 break;
1450 1575
1451 __run_hrtimer(cpu_base, base, timer, &basenow, flags); 1576 __run_hrtimer(cpu_base, base, timer, &basenow, flags);
1577 if (active_mask == HRTIMER_ACTIVE_SOFT)
1578 hrtimer_sync_wait_running(cpu_base, flags);
1452 } 1579 }
1453 } 1580 }
1454} 1581}
@@ -1459,6 +1586,7 @@ static __latent_entropy void hrtimer_run_softirq(struct softirq_action *h)
1459 unsigned long flags; 1586 unsigned long flags;
1460 ktime_t now; 1587 ktime_t now;
1461 1588
1589 hrtimer_cpu_base_lock_expiry(cpu_base);
1462 raw_spin_lock_irqsave(&cpu_base->lock, flags); 1590 raw_spin_lock_irqsave(&cpu_base->lock, flags);
1463 1591
1464 now = hrtimer_update_base(cpu_base); 1592 now = hrtimer_update_base(cpu_base);
@@ -1468,6 +1596,7 @@ static __latent_entropy void hrtimer_run_softirq(struct softirq_action *h)
1468 hrtimer_update_softirq_timer(cpu_base, true); 1596 hrtimer_update_softirq_timer(cpu_base, true);
1469 1597
1470 raw_spin_unlock_irqrestore(&cpu_base->lock, flags); 1598 raw_spin_unlock_irqrestore(&cpu_base->lock, flags);
1599 hrtimer_cpu_base_unlock_expiry(cpu_base);
1471} 1600}
1472 1601
1473#ifdef CONFIG_HIGH_RES_TIMERS 1602#ifdef CONFIG_HIGH_RES_TIMERS
@@ -1639,10 +1768,75 @@ static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer)
1639 return HRTIMER_NORESTART; 1768 return HRTIMER_NORESTART;
1640} 1769}
1641 1770
1642void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task) 1771/**
1772 * hrtimer_sleeper_start_expires - Start a hrtimer sleeper timer
1773 * @sl: sleeper to be started
1774 * @mode: timer mode abs/rel
1775 *
1776 * Wrapper around hrtimer_start_expires() for hrtimer_sleeper based timers
1777 * to allow PREEMPT_RT to tweak the delivery mode (soft/hardirq context)
1778 */
1779void hrtimer_sleeper_start_expires(struct hrtimer_sleeper *sl,
1780 enum hrtimer_mode mode)
1781{
1782 /*
1783 * Make the enqueue delivery mode check work on RT. If the sleeper
1784 * was initialized for hard interrupt delivery, force the mode bit.
1785 * This is a special case for hrtimer_sleepers because
1786 * hrtimer_init_sleeper() determines the delivery mode on RT so the
1787 * fiddling with this decision is avoided at the call sites.
1788 */
1789 if (IS_ENABLED(CONFIG_PREEMPT_RT) && sl->timer.is_hard)
1790 mode |= HRTIMER_MODE_HARD;
1791
1792 hrtimer_start_expires(&sl->timer, mode);
1793}
1794EXPORT_SYMBOL_GPL(hrtimer_sleeper_start_expires);
1795
1796static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl,
1797 clockid_t clock_id, enum hrtimer_mode mode)
1643{ 1798{
1799 /*
1800 * On PREEMPT_RT enabled kernels hrtimers which are not explicitely
1801 * marked for hard interrupt expiry mode are moved into soft
1802 * interrupt context either for latency reasons or because the
1803 * hrtimer callback takes regular spinlocks or invokes other
1804 * functions which are not suitable for hard interrupt context on
1805 * PREEMPT_RT.
1806 *
1807 * The hrtimer_sleeper callback is RT compatible in hard interrupt
1808 * context, but there is a latency concern: Untrusted userspace can
1809 * spawn many threads which arm timers for the same expiry time on
1810 * the same CPU. That causes a latency spike due to the wakeup of
1811 * a gazillion threads.
1812 *
1813 * OTOH, priviledged real-time user space applications rely on the
1814 * low latency of hard interrupt wakeups. If the current task is in
1815 * a real-time scheduling class, mark the mode for hard interrupt
1816 * expiry.
1817 */
1818 if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
1819 if (task_is_realtime(current) && !(mode & HRTIMER_MODE_SOFT))
1820 mode |= HRTIMER_MODE_HARD;
1821 }
1822
1823 __hrtimer_init(&sl->timer, clock_id, mode);
1644 sl->timer.function = hrtimer_wakeup; 1824 sl->timer.function = hrtimer_wakeup;
1645 sl->task = task; 1825 sl->task = current;
1826}
1827
1828/**
1829 * hrtimer_init_sleeper - initialize sleeper to the given clock
1830 * @sl: sleeper to be initialized
1831 * @clock_id: the clock to be used
1832 * @mode: timer mode abs/rel
1833 */
1834void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, clockid_t clock_id,
1835 enum hrtimer_mode mode)
1836{
1837 debug_init(&sl->timer, clock_id, mode);
1838 __hrtimer_init_sleeper(sl, clock_id, mode);
1839
1646} 1840}
1647EXPORT_SYMBOL_GPL(hrtimer_init_sleeper); 1841EXPORT_SYMBOL_GPL(hrtimer_init_sleeper);
1648 1842
@@ -1669,11 +1863,9 @@ static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mod
1669{ 1863{
1670 struct restart_block *restart; 1864 struct restart_block *restart;
1671 1865
1672 hrtimer_init_sleeper(t, current);
1673
1674 do { 1866 do {
1675 set_current_state(TASK_INTERRUPTIBLE); 1867 set_current_state(TASK_INTERRUPTIBLE);
1676 hrtimer_start_expires(&t->timer, mode); 1868 hrtimer_sleeper_start_expires(t, mode);
1677 1869
1678 if (likely(t->task)) 1870 if (likely(t->task))
1679 freezable_schedule(); 1871 freezable_schedule();
@@ -1707,10 +1899,9 @@ static long __sched hrtimer_nanosleep_restart(struct restart_block *restart)
1707 struct hrtimer_sleeper t; 1899 struct hrtimer_sleeper t;
1708 int ret; 1900 int ret;
1709 1901
1710 hrtimer_init_on_stack(&t.timer, restart->nanosleep.clockid, 1902 hrtimer_init_sleeper_on_stack(&t, restart->nanosleep.clockid,
1711 HRTIMER_MODE_ABS); 1903 HRTIMER_MODE_ABS);
1712 hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires); 1904 hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires);
1713
1714 ret = do_nanosleep(&t, HRTIMER_MODE_ABS); 1905 ret = do_nanosleep(&t, HRTIMER_MODE_ABS);
1715 destroy_hrtimer_on_stack(&t.timer); 1906 destroy_hrtimer_on_stack(&t.timer);
1716 return ret; 1907 return ret;
@@ -1728,7 +1919,7 @@ long hrtimer_nanosleep(const struct timespec64 *rqtp,
1728 if (dl_task(current) || rt_task(current)) 1919 if (dl_task(current) || rt_task(current))
1729 slack = 0; 1920 slack = 0;
1730 1921
1731 hrtimer_init_on_stack(&t.timer, clockid, mode); 1922 hrtimer_init_sleeper_on_stack(&t, clockid, mode);
1732 hrtimer_set_expires_range_ns(&t.timer, timespec64_to_ktime(*rqtp), slack); 1923 hrtimer_set_expires_range_ns(&t.timer, timespec64_to_ktime(*rqtp), slack);
1733 ret = do_nanosleep(&t, mode); 1924 ret = do_nanosleep(&t, mode);
1734 if (ret != -ERESTART_RESTARTBLOCK) 1925 if (ret != -ERESTART_RESTARTBLOCK)
@@ -1809,6 +2000,7 @@ int hrtimers_prepare_cpu(unsigned int cpu)
1809 cpu_base->softirq_next_timer = NULL; 2000 cpu_base->softirq_next_timer = NULL;
1810 cpu_base->expires_next = KTIME_MAX; 2001 cpu_base->expires_next = KTIME_MAX;
1811 cpu_base->softirq_expires_next = KTIME_MAX; 2002 cpu_base->softirq_expires_next = KTIME_MAX;
2003 hrtimer_cpu_base_init_expiry_lock(cpu_base);
1812 return 0; 2004 return 0;
1813} 2005}
1814 2006
@@ -1927,12 +2119,9 @@ schedule_hrtimeout_range_clock(ktime_t *expires, u64 delta,
1927 return -EINTR; 2119 return -EINTR;
1928 } 2120 }
1929 2121
1930 hrtimer_init_on_stack(&t.timer, clock_id, mode); 2122 hrtimer_init_sleeper_on_stack(&t, clock_id, mode);
1931 hrtimer_set_expires_range_ns(&t.timer, *expires, delta); 2123 hrtimer_set_expires_range_ns(&t.timer, *expires, delta);
1932 2124 hrtimer_sleeper_start_expires(&t, mode);
1933 hrtimer_init_sleeper(&t, current);
1934
1935 hrtimer_start_expires(&t.timer, mode);
1936 2125
1937 if (likely(t.task)) 2126 if (likely(t.task))
1938 schedule(); 2127 schedule();