diff options
author | Thomas Gleixner <tglx@linutronix.de> | 2011-05-20 10:18:50 -0400 |
---|---|---|
committer | Thomas Gleixner <tglx@linutronix.de> | 2011-05-23 07:59:53 -0400 |
commit | 9ec2690758a5467f24beb301cca5098078073bba (patch) | |
tree | e5bc78f690d12635a56460ea6f54b49318221dc8 /fs | |
parent | 250f972d85effad5b6e10da4bbd877e6a4b503b6 (diff) |
timerfd: Manage cancelable timers in timerfd
Peter is concerned about the extra scan of CLOCK_REALTIME_COS in the
timer interrupt. Yes, I did not think about it, because the solution
was so elegant. I didn't like the extra list in timerfd when it was
proposed some time ago, but with a rcu based list the list walk it's
less horrible than the original global lock, which was held over the
list iteration.
Requested-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Peter Zijlstra <peterz@infradead.org>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/timerfd.c | 105 |
1 files changed, 74 insertions, 31 deletions
diff --git a/fs/timerfd.c b/fs/timerfd.c index 7e14c9e7c4ee..f67acbdda5e8 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c | |||
@@ -22,6 +22,7 @@ | |||
22 | #include <linux/anon_inodes.h> | 22 | #include <linux/anon_inodes.h> |
23 | #include <linux/timerfd.h> | 23 | #include <linux/timerfd.h> |
24 | #include <linux/syscalls.h> | 24 | #include <linux/syscalls.h> |
25 | #include <linux/rcupdate.h> | ||
25 | 26 | ||
26 | struct timerfd_ctx { | 27 | struct timerfd_ctx { |
27 | struct hrtimer tmr; | 28 | struct hrtimer tmr; |
@@ -31,9 +32,14 @@ struct timerfd_ctx { | |||
31 | u64 ticks; | 32 | u64 ticks; |
32 | int expired; | 33 | int expired; |
33 | int clockid; | 34 | int clockid; |
35 | struct rcu_head rcu; | ||
36 | struct list_head clist; | ||
34 | bool might_cancel; | 37 | bool might_cancel; |
35 | }; | 38 | }; |
36 | 39 | ||
40 | static LIST_HEAD(cancel_list); | ||
41 | static DEFINE_SPINLOCK(cancel_lock); | ||
42 | |||
37 | /* | 43 | /* |
38 | * This gets called when the timer event triggers. We set the "expired" | 44 | * This gets called when the timer event triggers. We set the "expired" |
39 | * flag, but we do not re-arm the timer (in case it's necessary, | 45 | * flag, but we do not re-arm the timer (in case it's necessary, |
@@ -53,28 +59,69 @@ static enum hrtimer_restart timerfd_tmrproc(struct hrtimer *htmr) | |||
53 | return HRTIMER_NORESTART; | 59 | return HRTIMER_NORESTART; |
54 | } | 60 | } |
55 | 61 | ||
56 | static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx) | 62 | /* |
63 | * Called when the clock was set to cancel the timers in the cancel | ||
64 | * list. | ||
65 | */ | ||
66 | void timerfd_clock_was_set(void) | ||
57 | { | 67 | { |
58 | ktime_t remaining; | 68 | ktime_t moffs = ktime_get_monotonic_offset(); |
69 | struct timerfd_ctx *ctx; | ||
70 | unsigned long flags; | ||
59 | 71 | ||
60 | remaining = hrtimer_expires_remaining(&ctx->tmr); | 72 | rcu_read_lock(); |
61 | return remaining.tv64 < 0 ? ktime_set(0, 0): remaining; | 73 | list_for_each_entry_rcu(ctx, &cancel_list, clist) { |
74 | if (!ctx->might_cancel) | ||
75 | continue; | ||
76 | spin_lock_irqsave(&ctx->wqh.lock, flags); | ||
77 | if (ctx->moffs.tv64 != moffs.tv64) { | ||
78 | ctx->moffs.tv64 = KTIME_MAX; | ||
79 | wake_up_locked(&ctx->wqh); | ||
80 | } | ||
81 | spin_unlock_irqrestore(&ctx->wqh.lock, flags); | ||
82 | } | ||
83 | rcu_read_unlock(); | ||
62 | } | 84 | } |
63 | 85 | ||
64 | static bool timerfd_canceled(struct timerfd_ctx *ctx) | 86 | static void timerfd_remove_cancel(struct timerfd_ctx *ctx) |
65 | { | 87 | { |
66 | ktime_t moffs; | 88 | if (ctx->might_cancel) { |
89 | ctx->might_cancel = false; | ||
90 | spin_lock(&cancel_lock); | ||
91 | list_del_rcu(&ctx->clist); | ||
92 | spin_unlock(&cancel_lock); | ||
93 | } | ||
94 | } | ||
67 | 95 | ||
68 | if (!ctx->might_cancel) | 96 | static bool timerfd_canceled(struct timerfd_ctx *ctx) |
97 | { | ||
98 | if (!ctx->might_cancel || ctx->moffs.tv64 != KTIME_MAX) | ||
69 | return false; | 99 | return false; |
100 | ctx->moffs = ktime_get_monotonic_offset(); | ||
101 | return true; | ||
102 | } | ||
70 | 103 | ||
71 | moffs = ktime_get_monotonic_offset(); | 104 | static void timerfd_setup_cancel(struct timerfd_ctx *ctx, int flags) |
105 | { | ||
106 | if (ctx->clockid == CLOCK_REALTIME && (flags & TFD_TIMER_ABSTIME) && | ||
107 | (flags & TFD_TIMER_CANCEL_ON_SET)) { | ||
108 | if (!ctx->might_cancel) { | ||
109 | ctx->might_cancel = true; | ||
110 | spin_lock(&cancel_lock); | ||
111 | list_add_rcu(&ctx->clist, &cancel_list); | ||
112 | spin_unlock(&cancel_lock); | ||
113 | } | ||
114 | } else if (ctx->might_cancel) { | ||
115 | timerfd_remove_cancel(ctx); | ||
116 | } | ||
117 | } | ||
72 | 118 | ||
73 | if (moffs.tv64 == ctx->moffs.tv64) | 119 | static ktime_t timerfd_get_remaining(struct timerfd_ctx *ctx) |
74 | return false; | 120 | { |
121 | ktime_t remaining; | ||
75 | 122 | ||
76 | ctx->moffs = moffs; | 123 | remaining = hrtimer_expires_remaining(&ctx->tmr); |
77 | return true; | 124 | return remaining.tv64 < 0 ? ktime_set(0, 0): remaining; |
78 | } | 125 | } |
79 | 126 | ||
80 | static int timerfd_setup(struct timerfd_ctx *ctx, int flags, | 127 | static int timerfd_setup(struct timerfd_ctx *ctx, int flags, |
@@ -87,13 +134,6 @@ static int timerfd_setup(struct timerfd_ctx *ctx, int flags, | |||
87 | htmode = (flags & TFD_TIMER_ABSTIME) ? | 134 | htmode = (flags & TFD_TIMER_ABSTIME) ? |
88 | HRTIMER_MODE_ABS: HRTIMER_MODE_REL; | 135 | HRTIMER_MODE_ABS: HRTIMER_MODE_REL; |
89 | 136 | ||
90 | ctx->might_cancel = false; | ||
91 | if (htmode == HRTIMER_MODE_ABS && ctx->clockid == CLOCK_REALTIME && | ||
92 | (flags & TFD_TIMER_CANCELON_SET)) { | ||
93 | clockid = CLOCK_REALTIME_COS; | ||
94 | ctx->might_cancel = true; | ||
95 | } | ||
96 | |||
97 | texp = timespec_to_ktime(ktmr->it_value); | 137 | texp = timespec_to_ktime(ktmr->it_value); |
98 | ctx->expired = 0; | 138 | ctx->expired = 0; |
99 | ctx->ticks = 0; | 139 | ctx->ticks = 0; |
@@ -113,8 +153,9 @@ static int timerfd_release(struct inode *inode, struct file *file) | |||
113 | { | 153 | { |
114 | struct timerfd_ctx *ctx = file->private_data; | 154 | struct timerfd_ctx *ctx = file->private_data; |
115 | 155 | ||
156 | timerfd_remove_cancel(ctx); | ||
116 | hrtimer_cancel(&ctx->tmr); | 157 | hrtimer_cancel(&ctx->tmr); |
117 | kfree(ctx); | 158 | kfree_rcu(ctx, rcu); |
118 | return 0; | 159 | return 0; |
119 | } | 160 | } |
120 | 161 | ||
@@ -149,20 +190,20 @@ static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count, | |||
149 | else | 190 | else |
150 | res = wait_event_interruptible_locked_irq(ctx->wqh, ctx->ticks); | 191 | res = wait_event_interruptible_locked_irq(ctx->wqh, ctx->ticks); |
151 | 192 | ||
193 | /* | ||
194 | * If clock has changed, we do not care about the | ||
195 | * ticks and we do not rearm the timer. Userspace must | ||
196 | * reevaluate anyway. | ||
197 | */ | ||
198 | if (timerfd_canceled(ctx)) { | ||
199 | ctx->ticks = 0; | ||
200 | ctx->expired = 0; | ||
201 | res = -ECANCELED; | ||
202 | } | ||
203 | |||
152 | if (ctx->ticks) { | 204 | if (ctx->ticks) { |
153 | ticks = ctx->ticks; | 205 | ticks = ctx->ticks; |
154 | 206 | ||
155 | /* | ||
156 | * If clock has changed, we do not care about the | ||
157 | * ticks and we do not rearm the timer. Userspace must | ||
158 | * reevaluate anyway. | ||
159 | */ | ||
160 | if (timerfd_canceled(ctx)) { | ||
161 | ticks = 0; | ||
162 | ctx->expired = 0; | ||
163 | res = -ECANCELED; | ||
164 | } | ||
165 | |||
166 | if (ctx->expired && ctx->tintv.tv64) { | 207 | if (ctx->expired && ctx->tintv.tv64) { |
167 | /* | 208 | /* |
168 | * If tintv.tv64 != 0, this is a periodic timer that | 209 | * If tintv.tv64 != 0, this is a periodic timer that |
@@ -258,6 +299,8 @@ SYSCALL_DEFINE4(timerfd_settime, int, ufd, int, flags, | |||
258 | return PTR_ERR(file); | 299 | return PTR_ERR(file); |
259 | ctx = file->private_data; | 300 | ctx = file->private_data; |
260 | 301 | ||
302 | timerfd_setup_cancel(ctx, flags); | ||
303 | |||
261 | /* | 304 | /* |
262 | * We need to stop the existing timer before reprogramming | 305 | * We need to stop the existing timer before reprogramming |
263 | * it to the new values. | 306 | * it to the new values. |