aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2011-05-20 10:18:50 -0400
committerThomas Gleixner <tglx@linutronix.de>2011-05-23 07:59:53 -0400
commit9ec2690758a5467f24beb301cca5098078073bba (patch)
treee5bc78f690d12635a56460ea6f54b49318221dc8
parent250f972d85effad5b6e10da4bbd877e6a4b503b6 (diff)
timerfd: Manage cancelable timers in timerfd
Peter is concerned about the extra scan of CLOCK_REALTIME_COS in the timer interrupt. Yes, I did not think about it, because the solution was so elegant. I didn't like the extra list in timerfd when it was proposed some time ago, but with a rcu based list the list walk it's less horrible than the original global lock, which was held over the list iteration. Requested-by: Peter Zijlstra <peterz@infradead.org> Signed-off-by: Thomas Gleixner <tglx@linutronix.de> Reviewed-by: Peter Zijlstra <peterz@infradead.org>
-rw-r--r--fs/timerfd.c105
-rw-r--r--include/linux/hrtimer.h6
-rw-r--r--include/linux/time.h6
-rw-r--r--include/linux/timerfd.h4
-rw-r--r--kernel/hrtimer.c94
5 files changed, 113 insertions, 102 deletions
diff --git a/fs/timerfd.c b/fs/timerfd.c
index 7e14c9e7c4ee..f67acbdda5e8 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -22,6 +22,7 @@
22#include <linux/anon_inodes.h> 22#include <linux/anon_inodes.h>
23#include <linux/timerfd.h> 23#include <linux/timerfd.h>
24#include <linux/syscalls.h> 24#include <linux/syscalls.h>
25#include <linux/rcupdate.h>
25 26
26struct timerfd_ctx { 27struct timerfd_ctx {
27 struct hrtimer tmr; 28 struct hrtimer tmr;
@@ -31,9 +32,14 @@ struct timerfd_ctx {
31 u64 ticks; 32 u64 ticks;
32 int expired; 33 int expired;
33 int clockid; 34 int clockid;
35 struct rcu_head rcu;
36 struct list_head clist;
34 bool might_cancel; 37 bool might_cancel;
35}; 38};
36 39
40static LIST_HEAD(cancel_list);
41static DEFINE_SPINLOCK(cancel_lock);
42
37/* 43/*
38 * This gets called when the timer event triggers. We set the "expired" 44 * This gets called when the timer event triggers. We set the "expired"
39 * flag, but we do not re-arm the timer (in case it's necessary, 45 * flag, but we do not re-arm the timer (in case it's necessary,
@@ -53,28 +59,69 @@ static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr)
53 return HRTIMER_NORESTART; 59 return HRTIMER_NORESTART;
54} 60}
55 61
56static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx) 62/*
63 * Called when the clock was set to cancel the timers in the cancel
64 * list.
65 */
66void timerfd_clock_was_set(void)
57{ 67{
58 ktime_t remaining; 68 ktime_t moffs = ktime_get_monotonic_offset();
69 struct timerfd_ctx *ctx;
70 unsigned long flags;
59 71
60 remaining = hrtimer_expires_remaining(&ctx->tmr); 72 rcu_read_lock();
61 return remaining.tv64 < 0 ? ktime_set(0, 0): remaining; 73 list_for_each_entry_rcu(ctx, &cancel_list, clist) {
74 if (!ctx->might_cancel)
75 continue;
76 spin_lock_irqsave(&ctx->wqh.lock, flags);
77 if (ctx->moffs.tv64 != moffs.tv64) {
78 ctx->moffs.tv64 = KTIME_MAX;
79 wake_up_locked(&ctx->wqh);
80 }
81 spin_unlock_irqrestore(&ctx->wqh.lock, flags);
82 }
83 rcu_read_unlock();
62} 84}
63 85
64static bool timerfd_canceled(struct timerfd_ctx *ctx) 86static void timerfd_remove_cancel(struct timerfd_ctx *ctx)
65{ 87{
66 ktime_t moffs; 88 if (ctx->might_cancel) {
89 ctx->might_cancel = false;
90 spin_lock(&cancel_lock);
91 list_del_rcu(&ctx->clist);
92 spin_unlock(&cancel_lock);
93 }
94}
67 95
68 if (!ctx->might_cancel) 96static bool timerfd_canceled(struct timerfd_ctx *ctx)
97{
98 if (!ctx->might_cancel || ctx->moffs.tv64 != KTIME_MAX)
69 return false; 99 return false;
100 ctx->moffs = ktime_get_monotonic_offset();
101 return true;
102}
70 103
71 moffs = ktime_get_monotonic_offset(); 104static void timerfd_setup_cancel(struct timerfd_ctx *ctx, int flags)
105{
106 if (ctx->clockid == CLOCK_REALTIME && (flags & TFD_TIMER_ABSTIME) &&
107 (flags & TFD_TIMER_CANCEL_ON_SET)) {
108 if (!ctx->might_cancel) {
109 ctx->might_cancel = true;
110 spin_lock(&cancel_lock);
111 list_add_rcu(&ctx->clist, &cancel_list);
112 spin_unlock(&cancel_lock);
113 }
114 } else if (ctx->might_cancel) {
115 timerfd_remove_cancel(ctx);
116 }
117}
72 118
73 if (moffs.tv64 == ctx->moffs.tv64) 119static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx)
74 return false; 120{
121 ktime_t remaining;
75 122
76 ctx->moffs = moffs; 123 remaining = hrtimer_expires_remaining(&ctx->tmr);
77 return true; 124 return remaining.tv64 < 0 ? ktime_set(0, 0): remaining;
78} 125}
79 126
80static int timerfd_setup(struct timerfd_ctx *ctx, int flags, 127static int timerfd_setup(struct timerfd_ctx *ctx, int flags,
@@ -87,13 +134,6 @@ static int timerfd_setup(struct timerfd_ctx *ctx, int flags,
87 htmode = (flags & TFD_TIMER_ABSTIME) ? 134 htmode = (flags & TFD_TIMER_ABSTIME) ?
88 HRTIMER_MODE_ABS: HRTIMER_MODE_REL; 135 HRTIMER_MODE_ABS: HRTIMER_MODE_REL;
89 136
90 ctx->might_cancel = false;
91 if (htmode == HRTIMER_MODE_ABS && ctx->clockid == CLOCK_REALTIME &&
92 (flags & TFD_TIMER_CANCELON_SET)) {
93 clockid = CLOCK_REALTIME_COS;
94 ctx->might_cancel = true;
95 }
96
97 texp = timespec_to_ktime(ktmr->it_value); 137 texp = timespec_to_ktime(ktmr->it_value);
98 ctx->expired = 0; 138 ctx->expired = 0;
99 ctx->ticks = 0; 139 ctx->ticks = 0;
@@ -113,8 +153,9 @@ static int timerfd_release(struct inode *inode, struct file *file)
113{ 153{
114 struct timerfd_ctx *ctx = file->private_data; 154 struct timerfd_ctx *ctx = file->private_data;
115 155
156 timerfd_remove_cancel(ctx);
116 hrtimer_cancel(&ctx->tmr); 157 hrtimer_cancel(&ctx->tmr);
117 kfree(ctx); 158 kfree_rcu(ctx, rcu);
118 return 0; 159 return 0;
119} 160}
120 161
@@ -149,20 +190,20 @@ static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count,
149 else 190 else
150 res = wait_event_interruptible_locked_irq(ctx->wqh, ctx->ticks); 191 res = wait_event_interruptible_locked_irq(ctx->wqh, ctx->ticks);
151 192
193 /*
194 * If clock has changed, we do not care about the
195 * ticks and we do not rearm the timer. Userspace must
196 * reevaluate anyway.
197 */
198 if (timerfd_canceled(ctx)) {
199 ctx->ticks = 0;
200 ctx->expired = 0;
201 res = -ECANCELED;
202 }
203
152 if (ctx->ticks) { 204 if (ctx->ticks) {
153 ticks = ctx->ticks; 205 ticks = ctx->ticks;
154 206
155 /*
156 * If clock has changed, we do not care about the
157 * ticks and we do not rearm the timer. Userspace must
158 * reevaluate anyway.
159 */
160 if (timerfd_canceled(ctx)) {
161 ticks = 0;
162 ctx->expired = 0;
163 res = -ECANCELED;
164 }
165
166 if (ctx->expired && ctx->tintv.tv64) { 207 if (ctx->expired && ctx->tintv.tv64) {
167 /* 208 /*
168 * If tintv.tv64 != 0, this is a periodic timer that 209 * If tintv.tv64 != 0, this is a periodic timer that
@@ -258,6 +299,8 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags,
258 return PTR_ERR(file); 299 return PTR_ERR(file);
259 ctx = file->private_data; 300 ctx = file->private_data;
260 301
302 timerfd_setup_cancel(ctx, flags);
303
261 /* 304 /*
262 * We need to stop the existing timer before reprogramming 305 * We need to stop the existing timer before reprogramming
263 * it to the new values. 306 * it to the new values.
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index eda4ccde0730..925c8c01db7b 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -155,7 +155,6 @@ enum hrtimer_base_type {
155 HRTIMER_BASE_REALTIME, 155 HRTIMER_BASE_REALTIME,
156 HRTIMER_BASE_MONOTONIC, 156 HRTIMER_BASE_MONOTONIC,
157 HRTIMER_BASE_BOOTTIME, 157 HRTIMER_BASE_BOOTTIME,
158 HRTIMER_BASE_REALTIME_COS,
159 HRTIMER_MAX_CLOCK_BASES, 158 HRTIMER_MAX_CLOCK_BASES,
160}; 159};
161 160
@@ -306,6 +305,11 @@ static inline int hrtimer_is_hres_active(struct hrtimer *timer)
306#endif 305#endif
307 306
308extern void clock_was_set(void); 307extern void clock_was_set(void);
308#ifdef CONFIG_TIMERFD
309extern void timerfd_clock_was_set(void);
310#else
311static inline void timerfd_clock_was_set(void) { }
312#endif
309extern void hrtimers_resume(void); 313extern void hrtimers_resume(void);
310 314
311extern ktime_t ktime_get(void); 315extern ktime_t ktime_get(void);
diff --git a/include/linux/time.h b/include/linux/time.h
index a9242773eb24..b3061782dec3 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -302,12 +302,6 @@ struct itimerval {
302 * The IDs of various hardware clocks: 302 * The IDs of various hardware clocks:
303 */ 303 */
304#define CLOCK_SGI_CYCLE 10 304#define CLOCK_SGI_CYCLE 10
305
306#ifdef __KERNEL__
307/* This clock is not exposed to user space */
308#define CLOCK_REALTIME_COS 15
309#endif
310
311#define MAX_CLOCKS 16 305#define MAX_CLOCKS 16
312#define CLOCKS_MASK (CLOCK_REALTIME | CLOCK_MONOTONIC) 306#define CLOCKS_MASK (CLOCK_REALTIME | CLOCK_MONOTONIC)
313#define CLOCKS_MONO CLOCK_MONOTONIC 307#define CLOCKS_MONO CLOCK_MONOTONIC
diff --git a/include/linux/timerfd.h b/include/linux/timerfd.h
index e9571fc8f1a0..d3b57fa12225 100644
--- a/include/linux/timerfd.h
+++ b/include/linux/timerfd.h
@@ -19,7 +19,7 @@
19 * shared O_* flags. 19 * shared O_* flags.
20 */ 20 */
21#define TFD_TIMER_ABSTIME (1 << 0) 21#define TFD_TIMER_ABSTIME (1 << 0)
22#define TFD_TIMER_CANCELON_SET (1 << 1) 22#define TFD_TIMER_CANCEL_ON_SET (1 << 1)
23#define TFD_CLOEXEC O_CLOEXEC 23#define TFD_CLOEXEC O_CLOEXEC
24#define TFD_NONBLOCK O_NONBLOCK 24#define TFD_NONBLOCK O_NONBLOCK
25 25
@@ -27,6 +27,6 @@
27/* Flags for timerfd_create. */ 27/* Flags for timerfd_create. */
28#define TFD_CREATE_FLAGS TFD_SHARED_FCNTL_FLAGS 28#define TFD_CREATE_FLAGS TFD_SHARED_FCNTL_FLAGS
29/* Flags for timerfd_settime. */ 29/* Flags for timerfd_settime. */
30#define TFD_SETTIME_FLAGS (TFD_TIMER_ABSTIME | TFD_TIMER_CANCELON_SET) 30#define TFD_SETTIME_FLAGS (TFD_TIMER_ABSTIME | TFD_TIMER_CANCEL_ON_SET)
31 31
32#endif /* _LINUX_TIMERFD_H */ 32#endif /* _LINUX_TIMERFD_H */
diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
index eabcbd781433..26dd32f9f6b2 100644
--- a/kernel/hrtimer.c
+++ b/kernel/hrtimer.c
@@ -78,11 +78,6 @@ DEFINE_PER_CPU(struct hrtimer_cpu_base, hrtimer_bases) =
78 .get_time = &ktime_get_boottime, 78 .get_time = &ktime_get_boottime,
79 .resolution = KTIME_LOW_RES, 79 .resolution = KTIME_LOW_RES,
80 }, 80 },
81 {
82 .index = CLOCK_REALTIME_COS,
83 .get_time = &ktime_get_real,
84 .resolution = KTIME_LOW_RES,
85 },
86 } 81 }
87}; 82};
88 83
@@ -90,7 +85,6 @@ static const int hrtimer_clock_to_base_table[MAX_CLOCKS] = {
90 [CLOCK_REALTIME] = HRTIMER_BASE_REALTIME, 85 [CLOCK_REALTIME] = HRTIMER_BASE_REALTIME,
91 [CLOCK_MONOTONIC] = HRTIMER_BASE_MONOTONIC, 86 [CLOCK_MONOTONIC] = HRTIMER_BASE_MONOTONIC,
92 [CLOCK_BOOTTIME] = HRTIMER_BASE_BOOTTIME, 87 [CLOCK_BOOTTIME] = HRTIMER_BASE_BOOTTIME,
93 [CLOCK_REALTIME_COS] = HRTIMER_BASE_REALTIME_COS,
94}; 88};
95 89
96static inline int hrtimer_clockid_to_base(clockid_t clock_id) 90static inline int hrtimer_clockid_to_base(clockid_t clock_id)
@@ -116,7 +110,6 @@ static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base)
116 base->clock_base[HRTIMER_BASE_REALTIME].softirq_time = xtim; 110 base->clock_base[HRTIMER_BASE_REALTIME].softirq_time = xtim;
117 base->clock_base[HRTIMER_BASE_MONOTONIC].softirq_time = mono; 111 base->clock_base[HRTIMER_BASE_MONOTONIC].softirq_time = mono;
118 base->clock_base[HRTIMER_BASE_BOOTTIME].softirq_time = boot; 112 base->clock_base[HRTIMER_BASE_BOOTTIME].softirq_time = boot;
119 base->clock_base[HRTIMER_BASE_REALTIME_COS].softirq_time = xtim;
120} 113}
121 114
122/* 115/*
@@ -486,8 +479,6 @@ static inline void debug_deactivate(struct hrtimer *timer)
486 trace_hrtimer_cancel(timer); 479 trace_hrtimer_cancel(timer);
487} 480}
488 481
489static void hrtimer_expire_cancelable(struct hrtimer_cpu_base *cpu_base);
490
491/* High resolution timer related functions */ 482/* High resolution timer related functions */
492#ifdef CONFIG_HIGH_RES_TIMERS 483#ifdef CONFIG_HIGH_RES_TIMERS
493 484
@@ -663,7 +654,33 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
663 return 0; 654 return 0;
664} 655}
665 656
666static void retrigger_next_event(void *arg); 657/*
658 * Retrigger next event is called after clock was set
659 *
660 * Called with interrupts disabled via on_each_cpu()
661 */
662static void retrigger_next_event(void *arg)
663{
664 struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases);
665 struct timespec realtime_offset, xtim, wtm, sleep;
666
667 if (!hrtimer_hres_active())
668 return;
669
670 /* Optimized out for !HIGH_RES */
671 get_xtime_and_monotonic_and_sleep_offset(&xtim, &wtm, &sleep);
672 set_normalized_timespec(&realtime_offset, -wtm.tv_sec, -wtm.tv_nsec);
673
674 /* Adjust CLOCK_REALTIME offset */
675 raw_spin_lock(&base->lock);
676 base->clock_base[HRTIMER_BASE_REALTIME].offset =
677 timespec_to_ktime(realtime_offset);
678 base->clock_base[HRTIMER_BASE_BOOTTIME].offset =
679 timespec_to_ktime(sleep);
680
681 hrtimer_force_reprogram(base, 0);
682 raw_spin_unlock(&base->lock);
683}
667 684
668/* 685/*
669 * Switch to high resolution mode 686 * Switch to high resolution mode
@@ -711,46 +728,11 @@ static inline int hrtimer_enqueue_reprogram(struct hrtimer *timer,
711 return 0; 728 return 0;
712} 729}
713static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { } 730static inline void hrtimer_init_hres(struct hrtimer_cpu_base *base) { }
731static inline void retrigger_next_event(void *arg) { }
714 732
715#endif /* CONFIG_HIGH_RES_TIMERS */ 733#endif /* CONFIG_HIGH_RES_TIMERS */
716 734
717/* 735/*
718 * Retrigger next event is called after clock was set
719 *
720 * Called with interrupts disabled via on_each_cpu()
721 */
722static void retrigger_next_event(void *arg)
723{
724 struct hrtimer_cpu_base *base = &__get_cpu_var(hrtimer_bases);
725 struct timespec realtime_offset, xtim, wtm, sleep;
726
727 if (!hrtimer_hres_active()) {
728 raw_spin_lock(&base->lock);
729 hrtimer_expire_cancelable(base);
730 raw_spin_unlock(&base->lock);
731 return;
732 }
733
734 /* Optimized out for !HIGH_RES */
735 get_xtime_and_monotonic_and_sleep_offset(&xtim, &wtm, &sleep);
736 set_normalized_timespec(&realtime_offset, -wtm.tv_sec, -wtm.tv_nsec);
737
738 /* Adjust CLOCK_REALTIME offset */
739 raw_spin_lock(&base->lock);
740 base->clock_base[HRTIMER_BASE_REALTIME].offset =
741 timespec_to_ktime(realtime_offset);
742 base->clock_base[HRTIMER_BASE_BOOTTIME].offset =
743 timespec_to_ktime(sleep);
744 base->clock_base[HRTIMER_BASE_REALTIME_COS].offset =
745 timespec_to_ktime(realtime_offset);
746
747 hrtimer_expire_cancelable(base);
748
749 hrtimer_force_reprogram(base, 0);
750 raw_spin_unlock(&base->lock);
751}
752
753/*
754 * Clock realtime was set 736 * Clock realtime was set
755 * 737 *
756 * Change the offset of the realtime clock vs. the monotonic 738 * Change the offset of the realtime clock vs. the monotonic
@@ -763,8 +745,11 @@ static void retrigger_next_event(void *arg)
763 */ 745 */
764void clock_was_set(void) 746void clock_was_set(void)
765{ 747{
748#ifdef CONFIG_HIGHRES_TIMERS
766 /* Retrigger the CPU local events everywhere */ 749 /* Retrigger the CPU local events everywhere */
767 on_each_cpu(retrigger_next_event, NULL, 1); 750 on_each_cpu(retrigger_next_event, NULL, 1);
751#endif
752 timerfd_clock_was_set();
768} 753}
769 754
770/* 755/*
@@ -777,6 +762,7 @@ void hrtimers_resume(void)
777 KERN_INFO "hrtimers_resume() called with IRQs enabled!"); 762 KERN_INFO "hrtimers_resume() called with IRQs enabled!");
778 763
779 retrigger_next_event(NULL); 764 retrigger_next_event(NULL);
765 timerfd_clock_was_set();
780} 766}
781 767
782static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer) 768static inline void timer_stats_hrtimer_set_start_info(struct hrtimer *timer)
@@ -1240,22 +1226,6 @@ static void __run_hrtimer(struct hrtimer *timer, ktime_t *now)
1240 timer->state &= ~HRTIMER_STATE_CALLBACK; 1226 timer->state &= ~HRTIMER_STATE_CALLBACK;
1241} 1227}
1242 1228
1243static void hrtimer_expire_cancelable(struct hrtimer_cpu_base *cpu_base)
1244{
1245 struct timerqueue_node *node;
1246 struct hrtimer_clock_base *base;
1247 ktime_t now = ktime_get_real();
1248
1249 base = &cpu_base->clock_base[HRTIMER_BASE_REALTIME_COS];
1250
1251 while ((node = timerqueue_getnext(&base->active))) {
1252 struct hrtimer *timer;
1253
1254 timer = container_of(node, struct hrtimer, node);
1255 __run_hrtimer(timer, &now);
1256 }
1257}
1258
1259#ifdef CONFIG_HIGH_RES_TIMERS 1229#ifdef CONFIG_HIGH_RES_TIMERS
1260 1230
1261/* 1231/*