diff options
Diffstat (limited to 'kernel/time/hrtimer.c')
-rw-r--r-- | kernel/time/hrtimer.c | 235 |
1 files changed, 212 insertions, 23 deletions
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 5ee77f1a8a92..0d4dc241c0fb 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c | |||
@@ -140,6 +140,11 @@ static struct hrtimer_cpu_base migration_cpu_base = { | |||
140 | 140 | ||
141 | #define migration_base migration_cpu_base.clock_base[0] | 141 | #define migration_base migration_cpu_base.clock_base[0] |
142 | 142 | ||
143 | static inline bool is_migration_base(struct hrtimer_clock_base *base) | ||
144 | { | ||
145 | return base == &migration_base; | ||
146 | } | ||
147 | |||
143 | /* | 148 | /* |
144 | * We are using hashed locking: holding per_cpu(hrtimer_bases)[n].lock | 149 | * We are using hashed locking: holding per_cpu(hrtimer_bases)[n].lock |
145 | * means that all timers which are tied to this base via timer->base are | 150 | * means that all timers which are tied to this base via timer->base are |
@@ -264,6 +269,11 @@ again: | |||
264 | 269 | ||
265 | #else /* CONFIG_SMP */ | 270 | #else /* CONFIG_SMP */ |
266 | 271 | ||
272 | static inline bool is_migration_base(struct hrtimer_clock_base *base) | ||
273 | { | ||
274 | return false; | ||
275 | } | ||
276 | |||
267 | static inline struct hrtimer_clock_base * | 277 | static inline struct hrtimer_clock_base * |
268 | lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) | 278 | lock_hrtimer_base(const struct hrtimer *timer, unsigned long *flags) |
269 | { | 279 | { |
@@ -427,6 +437,17 @@ void hrtimer_init_on_stack(struct hrtimer *timer, clockid_t clock_id, | |||
427 | } | 437 | } |
428 | EXPORT_SYMBOL_GPL(hrtimer_init_on_stack); | 438 | EXPORT_SYMBOL_GPL(hrtimer_init_on_stack); |
429 | 439 | ||
440 | static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl, | ||
441 | clockid_t clock_id, enum hrtimer_mode mode); | ||
442 | |||
443 | void hrtimer_init_sleeper_on_stack(struct hrtimer_sleeper *sl, | ||
444 | clockid_t clock_id, enum hrtimer_mode mode) | ||
445 | { | ||
446 | debug_object_init_on_stack(&sl->timer, &hrtimer_debug_descr); | ||
447 | __hrtimer_init_sleeper(sl, clock_id, mode); | ||
448 | } | ||
449 | EXPORT_SYMBOL_GPL(hrtimer_init_sleeper_on_stack); | ||
450 | |||
430 | void destroy_hrtimer_on_stack(struct hrtimer *timer) | 451 | void destroy_hrtimer_on_stack(struct hrtimer *timer) |
431 | { | 452 | { |
432 | debug_object_free(timer, &hrtimer_debug_descr); | 453 | debug_object_free(timer, &hrtimer_debug_descr); |
@@ -1096,9 +1117,13 @@ void hrtimer_start_range_ns(struct hrtimer *timer, ktime_t tim, | |||
1096 | 1117 | ||
1097 | /* | 1118 | /* |
1098 | * Check whether the HRTIMER_MODE_SOFT bit and hrtimer.is_soft | 1119 | * Check whether the HRTIMER_MODE_SOFT bit and hrtimer.is_soft |
1099 | * match. | 1120 | * match on CONFIG_PREEMPT_RT = n. With PREEMPT_RT check the hard |
1121 | * expiry mode because unmarked timers are moved to softirq expiry. | ||
1100 | */ | 1122 | */ |
1101 | WARN_ON_ONCE(!(mode & HRTIMER_MODE_SOFT) ^ !timer->is_soft); | 1123 | if (!IS_ENABLED(CONFIG_PREEMPT_RT)) |
1124 | WARN_ON_ONCE(!(mode & HRTIMER_MODE_SOFT) ^ !timer->is_soft); | ||
1125 | else | ||
1126 | WARN_ON_ONCE(!(mode & HRTIMER_MODE_HARD) ^ !timer->is_hard); | ||
1102 | 1127 | ||
1103 | base = lock_hrtimer_base(timer, &flags); | 1128 | base = lock_hrtimer_base(timer, &flags); |
1104 | 1129 | ||
@@ -1147,6 +1172,93 @@ int hrtimer_try_to_cancel(struct hrtimer *timer) | |||
1147 | } | 1172 | } |
1148 | EXPORT_SYMBOL_GPL(hrtimer_try_to_cancel); | 1173 | EXPORT_SYMBOL_GPL(hrtimer_try_to_cancel); |
1149 | 1174 | ||
1175 | #ifdef CONFIG_PREEMPT_RT | ||
1176 | static void hrtimer_cpu_base_init_expiry_lock(struct hrtimer_cpu_base *base) | ||
1177 | { | ||
1178 | spin_lock_init(&base->softirq_expiry_lock); | ||
1179 | } | ||
1180 | |||
1181 | static void hrtimer_cpu_base_lock_expiry(struct hrtimer_cpu_base *base) | ||
1182 | { | ||
1183 | spin_lock(&base->softirq_expiry_lock); | ||
1184 | } | ||
1185 | |||
1186 | static void hrtimer_cpu_base_unlock_expiry(struct hrtimer_cpu_base *base) | ||
1187 | { | ||
1188 | spin_unlock(&base->softirq_expiry_lock); | ||
1189 | } | ||
1190 | |||
1191 | /* | ||
1192 | * The counterpart to hrtimer_cancel_wait_running(). | ||
1193 | * | ||
1194 | * If there is a waiter for cpu_base->expiry_lock, then it was waiting for | ||
1195 | * the timer callback to finish. Drop expiry_lock and reaquire it. That | ||
1196 | * allows the waiter to acquire the lock and make progress. | ||
1197 | */ | ||
1198 | static void hrtimer_sync_wait_running(struct hrtimer_cpu_base *cpu_base, | ||
1199 | unsigned long flags) | ||
1200 | { | ||
1201 | if (atomic_read(&cpu_base->timer_waiters)) { | ||
1202 | raw_spin_unlock_irqrestore(&cpu_base->lock, flags); | ||
1203 | spin_unlock(&cpu_base->softirq_expiry_lock); | ||
1204 | spin_lock(&cpu_base->softirq_expiry_lock); | ||
1205 | raw_spin_lock_irq(&cpu_base->lock); | ||
1206 | } | ||
1207 | } | ||
1208 | |||
1209 | /* | ||
1210 | * This function is called on PREEMPT_RT kernels when the fast path | ||
1211 | * deletion of a timer failed because the timer callback function was | ||
1212 | * running. | ||
1213 | * | ||
1214 | * This prevents priority inversion: if the soft irq thread is preempted | ||
1215 | * in the middle of a timer callback, then calling del_timer_sync() can | ||
1216 | * lead to two issues: | ||
1217 | * | ||
1218 | * - If the caller is on a remote CPU then it has to spin wait for the timer | ||
1219 | * handler to complete. This can result in unbound priority inversion. | ||
1220 | * | ||
1221 | * - If the caller originates from the task which preempted the timer | ||
1222 | * handler on the same CPU, then spin waiting for the timer handler to | ||
1223 | * complete is never going to end. | ||
1224 | */ | ||
1225 | void hrtimer_cancel_wait_running(const struct hrtimer *timer) | ||
1226 | { | ||
1227 | /* Lockless read. Prevent the compiler from reloading it below */ | ||
1228 | struct hrtimer_clock_base *base = READ_ONCE(timer->base); | ||
1229 | |||
1230 | /* | ||
1231 | * Just relax if the timer expires in hard interrupt context or if | ||
1232 | * it is currently on the migration base. | ||
1233 | */ | ||
1234 | if (!timer->is_soft || is_migration_base(base)) { | ||
1235 | cpu_relax(); | ||
1236 | return; | ||
1237 | } | ||
1238 | |||
1239 | /* | ||
1240 | * Mark the base as contended and grab the expiry lock, which is | ||
1241 | * held by the softirq across the timer callback. Drop the lock | ||
1242 | * immediately so the softirq can expire the next timer. In theory | ||
1243 | * the timer could already be running again, but that's more than | ||
1244 | * unlikely and just causes another wait loop. | ||
1245 | */ | ||
1246 | atomic_inc(&base->cpu_base->timer_waiters); | ||
1247 | spin_lock_bh(&base->cpu_base->softirq_expiry_lock); | ||
1248 | atomic_dec(&base->cpu_base->timer_waiters); | ||
1249 | spin_unlock_bh(&base->cpu_base->softirq_expiry_lock); | ||
1250 | } | ||
1251 | #else | ||
1252 | static inline void | ||
1253 | hrtimer_cpu_base_init_expiry_lock(struct hrtimer_cpu_base *base) { } | ||
1254 | static inline void | ||
1255 | hrtimer_cpu_base_lock_expiry(struct hrtimer_cpu_base *base) { } | ||
1256 | static inline void | ||
1257 | hrtimer_cpu_base_unlock_expiry(struct hrtimer_cpu_base *base) { } | ||
1258 | static inline void hrtimer_sync_wait_running(struct hrtimer_cpu_base *base, | ||
1259 | unsigned long flags) { } | ||
1260 | #endif | ||
1261 | |||
1150 | /** | 1262 | /** |
1151 | * hrtimer_cancel - cancel a timer and wait for the handler to finish. | 1263 | * hrtimer_cancel - cancel a timer and wait for the handler to finish. |
1152 | * @timer: the timer to be cancelled | 1264 | * @timer: the timer to be cancelled |
@@ -1157,13 +1269,15 @@ EXPORT_SYMBOL_GPL(hrtimer_try_to_cancel); | |||
1157 | */ | 1269 | */ |
1158 | int hrtimer_cancel(struct hrtimer *timer) | 1270 | int hrtimer_cancel(struct hrtimer *timer) |
1159 | { | 1271 | { |
1160 | for (;;) { | 1272 | int ret; |
1161 | int ret = hrtimer_try_to_cancel(timer); | ||
1162 | 1273 | ||
1163 | if (ret >= 0) | 1274 | do { |
1164 | return ret; | 1275 | ret = hrtimer_try_to_cancel(timer); |
1165 | cpu_relax(); | 1276 | |
1166 | } | 1277 | if (ret < 0) |
1278 | hrtimer_cancel_wait_running(timer); | ||
1279 | } while (ret < 0); | ||
1280 | return ret; | ||
1167 | } | 1281 | } |
1168 | EXPORT_SYMBOL_GPL(hrtimer_cancel); | 1282 | EXPORT_SYMBOL_GPL(hrtimer_cancel); |
1169 | 1283 | ||
@@ -1260,8 +1374,17 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id, | |||
1260 | enum hrtimer_mode mode) | 1374 | enum hrtimer_mode mode) |
1261 | { | 1375 | { |
1262 | bool softtimer = !!(mode & HRTIMER_MODE_SOFT); | 1376 | bool softtimer = !!(mode & HRTIMER_MODE_SOFT); |
1263 | int base = softtimer ? HRTIMER_MAX_CLOCK_BASES / 2 : 0; | ||
1264 | struct hrtimer_cpu_base *cpu_base; | 1377 | struct hrtimer_cpu_base *cpu_base; |
1378 | int base; | ||
1379 | |||
1380 | /* | ||
1381 | * On PREEMPT_RT enabled kernels hrtimers which are not explicitely | ||
1382 | * marked for hard interrupt expiry mode are moved into soft | ||
1383 | * interrupt context for latency reasons and because the callbacks | ||
1384 | * can invoke functions which might sleep on RT, e.g. spin_lock(). | ||
1385 | */ | ||
1386 | if (IS_ENABLED(CONFIG_PREEMPT_RT) && !(mode & HRTIMER_MODE_HARD)) | ||
1387 | softtimer = true; | ||
1265 | 1388 | ||
1266 | memset(timer, 0, sizeof(struct hrtimer)); | 1389 | memset(timer, 0, sizeof(struct hrtimer)); |
1267 | 1390 | ||
@@ -1275,8 +1398,10 @@ static void __hrtimer_init(struct hrtimer *timer, clockid_t clock_id, | |||
1275 | if (clock_id == CLOCK_REALTIME && mode & HRTIMER_MODE_REL) | 1398 | if (clock_id == CLOCK_REALTIME && mode & HRTIMER_MODE_REL) |
1276 | clock_id = CLOCK_MONOTONIC; | 1399 | clock_id = CLOCK_MONOTONIC; |
1277 | 1400 | ||
1401 | base = softtimer ? HRTIMER_MAX_CLOCK_BASES / 2 : 0; | ||
1278 | base += hrtimer_clockid_to_base(clock_id); | 1402 | base += hrtimer_clockid_to_base(clock_id); |
1279 | timer->is_soft = softtimer; | 1403 | timer->is_soft = softtimer; |
1404 | timer->is_hard = !softtimer; | ||
1280 | timer->base = &cpu_base->clock_base[base]; | 1405 | timer->base = &cpu_base->clock_base[base]; |
1281 | timerqueue_init(&timer->node); | 1406 | timerqueue_init(&timer->node); |
1282 | } | 1407 | } |
@@ -1449,6 +1574,8 @@ static void __hrtimer_run_queues(struct hrtimer_cpu_base *cpu_base, ktime_t now, | |||
1449 | break; | 1574 | break; |
1450 | 1575 | ||
1451 | __run_hrtimer(cpu_base, base, timer, &basenow, flags); | 1576 | __run_hrtimer(cpu_base, base, timer, &basenow, flags); |
1577 | if (active_mask == HRTIMER_ACTIVE_SOFT) | ||
1578 | hrtimer_sync_wait_running(cpu_base, flags); | ||
1452 | } | 1579 | } |
1453 | } | 1580 | } |
1454 | } | 1581 | } |
@@ -1459,6 +1586,7 @@ static __latent_entropy void hrtimer_run_softirq(struct softirq_action *h) | |||
1459 | unsigned long flags; | 1586 | unsigned long flags; |
1460 | ktime_t now; | 1587 | ktime_t now; |
1461 | 1588 | ||
1589 | hrtimer_cpu_base_lock_expiry(cpu_base); | ||
1462 | raw_spin_lock_irqsave(&cpu_base->lock, flags); | 1590 | raw_spin_lock_irqsave(&cpu_base->lock, flags); |
1463 | 1591 | ||
1464 | now = hrtimer_update_base(cpu_base); | 1592 | now = hrtimer_update_base(cpu_base); |
@@ -1468,6 +1596,7 @@ static __latent_entropy void hrtimer_run_softirq(struct softirq_action *h) | |||
1468 | hrtimer_update_softirq_timer(cpu_base, true); | 1596 | hrtimer_update_softirq_timer(cpu_base, true); |
1469 | 1597 | ||
1470 | raw_spin_unlock_irqrestore(&cpu_base->lock, flags); | 1598 | raw_spin_unlock_irqrestore(&cpu_base->lock, flags); |
1599 | hrtimer_cpu_base_unlock_expiry(cpu_base); | ||
1471 | } | 1600 | } |
1472 | 1601 | ||
1473 | #ifdef CONFIG_HIGH_RES_TIMERS | 1602 | #ifdef CONFIG_HIGH_RES_TIMERS |
@@ -1639,10 +1768,75 @@ static enum hrtimer_restart hrtimer_wakeup(struct hrtimer *timer) | |||
1639 | return HRTIMER_NORESTART; | 1768 | return HRTIMER_NORESTART; |
1640 | } | 1769 | } |
1641 | 1770 | ||
1642 | void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, struct task_struct *task) | 1771 | /** |
1772 | * hrtimer_sleeper_start_expires - Start a hrtimer sleeper timer | ||
1773 | * @sl: sleeper to be started | ||
1774 | * @mode: timer mode abs/rel | ||
1775 | * | ||
1776 | * Wrapper around hrtimer_start_expires() for hrtimer_sleeper based timers | ||
1777 | * to allow PREEMPT_RT to tweak the delivery mode (soft/hardirq context) | ||
1778 | */ | ||
1779 | void hrtimer_sleeper_start_expires(struct hrtimer_sleeper *sl, | ||
1780 | enum hrtimer_mode mode) | ||
1781 | { | ||
1782 | /* | ||
1783 | * Make the enqueue delivery mode check work on RT. If the sleeper | ||
1784 | * was initialized for hard interrupt delivery, force the mode bit. | ||
1785 | * This is a special case for hrtimer_sleepers because | ||
1786 | * hrtimer_init_sleeper() determines the delivery mode on RT so the | ||
1787 | * fiddling with this decision is avoided at the call sites. | ||
1788 | */ | ||
1789 | if (IS_ENABLED(CONFIG_PREEMPT_RT) && sl->timer.is_hard) | ||
1790 | mode |= HRTIMER_MODE_HARD; | ||
1791 | |||
1792 | hrtimer_start_expires(&sl->timer, mode); | ||
1793 | } | ||
1794 | EXPORT_SYMBOL_GPL(hrtimer_sleeper_start_expires); | ||
1795 | |||
1796 | static void __hrtimer_init_sleeper(struct hrtimer_sleeper *sl, | ||
1797 | clockid_t clock_id, enum hrtimer_mode mode) | ||
1643 | { | 1798 | { |
1799 | /* | ||
1800 | * On PREEMPT_RT enabled kernels hrtimers which are not explicitely | ||
1801 | * marked for hard interrupt expiry mode are moved into soft | ||
1802 | * interrupt context either for latency reasons or because the | ||
1803 | * hrtimer callback takes regular spinlocks or invokes other | ||
1804 | * functions which are not suitable for hard interrupt context on | ||
1805 | * PREEMPT_RT. | ||
1806 | * | ||
1807 | * The hrtimer_sleeper callback is RT compatible in hard interrupt | ||
1808 | * context, but there is a latency concern: Untrusted userspace can | ||
1809 | * spawn many threads which arm timers for the same expiry time on | ||
1810 | * the same CPU. That causes a latency spike due to the wakeup of | ||
1811 | * a gazillion threads. | ||
1812 | * | ||
1813 | * OTOH, priviledged real-time user space applications rely on the | ||
1814 | * low latency of hard interrupt wakeups. If the current task is in | ||
1815 | * a real-time scheduling class, mark the mode for hard interrupt | ||
1816 | * expiry. | ||
1817 | */ | ||
1818 | if (IS_ENABLED(CONFIG_PREEMPT_RT)) { | ||
1819 | if (task_is_realtime(current) && !(mode & HRTIMER_MODE_SOFT)) | ||
1820 | mode |= HRTIMER_MODE_HARD; | ||
1821 | } | ||
1822 | |||
1823 | __hrtimer_init(&sl->timer, clock_id, mode); | ||
1644 | sl->timer.function = hrtimer_wakeup; | 1824 | sl->timer.function = hrtimer_wakeup; |
1645 | sl->task = task; | 1825 | sl->task = current; |
1826 | } | ||
1827 | |||
1828 | /** | ||
1829 | * hrtimer_init_sleeper - initialize sleeper to the given clock | ||
1830 | * @sl: sleeper to be initialized | ||
1831 | * @clock_id: the clock to be used | ||
1832 | * @mode: timer mode abs/rel | ||
1833 | */ | ||
1834 | void hrtimer_init_sleeper(struct hrtimer_sleeper *sl, clockid_t clock_id, | ||
1835 | enum hrtimer_mode mode) | ||
1836 | { | ||
1837 | debug_init(&sl->timer, clock_id, mode); | ||
1838 | __hrtimer_init_sleeper(sl, clock_id, mode); | ||
1839 | |||
1646 | } | 1840 | } |
1647 | EXPORT_SYMBOL_GPL(hrtimer_init_sleeper); | 1841 | EXPORT_SYMBOL_GPL(hrtimer_init_sleeper); |
1648 | 1842 | ||
@@ -1669,11 +1863,9 @@ static int __sched do_nanosleep(struct hrtimer_sleeper *t, enum hrtimer_mode mod | |||
1669 | { | 1863 | { |
1670 | struct restart_block *restart; | 1864 | struct restart_block *restart; |
1671 | 1865 | ||
1672 | hrtimer_init_sleeper(t, current); | ||
1673 | |||
1674 | do { | 1866 | do { |
1675 | set_current_state(TASK_INTERRUPTIBLE); | 1867 | set_current_state(TASK_INTERRUPTIBLE); |
1676 | hrtimer_start_expires(&t->timer, mode); | 1868 | hrtimer_sleeper_start_expires(t, mode); |
1677 | 1869 | ||
1678 | if (likely(t->task)) | 1870 | if (likely(t->task)) |
1679 | freezable_schedule(); | 1871 | freezable_schedule(); |
@@ -1707,10 +1899,9 @@ static long __sched hrtimer_nanosleep_restart(struct restart_block *restart) | |||
1707 | struct hrtimer_sleeper t; | 1899 | struct hrtimer_sleeper t; |
1708 | int ret; | 1900 | int ret; |
1709 | 1901 | ||
1710 | hrtimer_init_on_stack(&t.timer, restart->nanosleep.clockid, | 1902 | hrtimer_init_sleeper_on_stack(&t, restart->nanosleep.clockid, |
1711 | HRTIMER_MODE_ABS); | 1903 | HRTIMER_MODE_ABS); |
1712 | hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires); | 1904 | hrtimer_set_expires_tv64(&t.timer, restart->nanosleep.expires); |
1713 | |||
1714 | ret = do_nanosleep(&t, HRTIMER_MODE_ABS); | 1905 | ret = do_nanosleep(&t, HRTIMER_MODE_ABS); |
1715 | destroy_hrtimer_on_stack(&t.timer); | 1906 | destroy_hrtimer_on_stack(&t.timer); |
1716 | return ret; | 1907 | return ret; |
@@ -1728,7 +1919,7 @@ long hrtimer_nanosleep(const struct timespec64 *rqtp, | |||
1728 | if (dl_task(current) || rt_task(current)) | 1919 | if (dl_task(current) || rt_task(current)) |
1729 | slack = 0; | 1920 | slack = 0; |
1730 | 1921 | ||
1731 | hrtimer_init_on_stack(&t.timer, clockid, mode); | 1922 | hrtimer_init_sleeper_on_stack(&t, clockid, mode); |
1732 | hrtimer_set_expires_range_ns(&t.timer, timespec64_to_ktime(*rqtp), slack); | 1923 | hrtimer_set_expires_range_ns(&t.timer, timespec64_to_ktime(*rqtp), slack); |
1733 | ret = do_nanosleep(&t, mode); | 1924 | ret = do_nanosleep(&t, mode); |
1734 | if (ret != -ERESTART_RESTARTBLOCK) | 1925 | if (ret != -ERESTART_RESTARTBLOCK) |
@@ -1809,6 +2000,7 @@ int hrtimers_prepare_cpu(unsigned int cpu) | |||
1809 | cpu_base->softirq_next_timer = NULL; | 2000 | cpu_base->softirq_next_timer = NULL; |
1810 | cpu_base->expires_next = KTIME_MAX; | 2001 | cpu_base->expires_next = KTIME_MAX; |
1811 | cpu_base->softirq_expires_next = KTIME_MAX; | 2002 | cpu_base->softirq_expires_next = KTIME_MAX; |
2003 | hrtimer_cpu_base_init_expiry_lock(cpu_base); | ||
1812 | return 0; | 2004 | return 0; |
1813 | } | 2005 | } |
1814 | 2006 | ||
@@ -1927,12 +2119,9 @@ schedule_hrtimeout_range_clock(ktime_t *expires, u64 delta, | |||
1927 | return -EINTR; | 2119 | return -EINTR; |
1928 | } | 2120 | } |
1929 | 2121 | ||
1930 | hrtimer_init_on_stack(&t.timer, clock_id, mode); | 2122 | hrtimer_init_sleeper_on_stack(&t, clock_id, mode); |
1931 | hrtimer_set_expires_range_ns(&t.timer, *expires, delta); | 2123 | hrtimer_set_expires_range_ns(&t.timer, *expires, delta); |
1932 | 2124 | hrtimer_sleeper_start_expires(&t, mode); | |
1933 | hrtimer_init_sleeper(&t, current); | ||
1934 | |||
1935 | hrtimer_start_expires(&t.timer, mode); | ||
1936 | 2125 | ||
1937 | if (likely(t.task)) | 2126 | if (likely(t.task)) |
1938 | schedule(); | 2127 | schedule(); |