aboutsummaryrefslogtreecommitdiffstats
path: root/kernel/posix-timers.c
diff options
context:
space:
mode:
authorPavel Emelyanov <xemul@parallels.com>2013-03-11 05:12:21 -0400
committerThomas Gleixner <tglx@linutronix.de>2013-04-17 14:51:01 -0400
commit5ed67f05f66c41e39880a6d61358438a25f9fee5 (patch)
treed3e52cca63e119a1f437b660f5d7a553f8ee37bc /kernel/posix-timers.c
parent4e8f8b34b92b6514cc070aeb94d317cadd5071d7 (diff)
posix timers: Allocate timer id per process (v2)
Currently kernel generates IDs for posix timers in a global manner -- there's a kernel-wide IDR tree from which IDs are created. This makes it impossible to recreate a timer with a desired ID (in particular this is done by the CRIU checkpoint-restore project) -- since these IDs are global it may happen, that at the time we recreate a timer, the ID we want for it is already busy by some other timer. In order to address this, replace the IDR tree with a global hash table for timers and makes timer IDs unique per signal_struct (to which timers are linked anyway). With this, two timers belonging to different processes may have equal IDs and we can recreate either of them with the ID we want. Signed-off-by: Pavel Emelyanov <xemul@parallels.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Michael Kerrisk <mtk.manpages@gmail.com> Cc: Matthew Helsley <matt.helsley@gmail.com> Link: http://lkml.kernel.org/r/513D9FF5.9010004@parallels.com Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'kernel/posix-timers.c')
-rw-r--r--kernel/posix-timers.c106
1 files changed, 69 insertions, 37 deletions
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c
index 2a2e173d0a7a..34d75926b843 100644
--- a/kernel/posix-timers.c
+++ b/kernel/posix-timers.c
@@ -40,38 +40,31 @@
40#include <linux/list.h> 40#include <linux/list.h>
41#include <linux/init.h> 41#include <linux/init.h>
42#include <linux/compiler.h> 42#include <linux/compiler.h>
43#include <linux/idr.h> 43#include <linux/hash.h>
44#include <linux/posix-clock.h> 44#include <linux/posix-clock.h>
45#include <linux/posix-timers.h> 45#include <linux/posix-timers.h>
46#include <linux/syscalls.h> 46#include <linux/syscalls.h>
47#include <linux/wait.h> 47#include <linux/wait.h>
48#include <linux/workqueue.h> 48#include <linux/workqueue.h>
49#include <linux/export.h> 49#include <linux/export.h>
50#include <linux/hashtable.h>
50 51
51/* 52/*
52 * Management arrays for POSIX timers. Timers are kept in slab memory 53 * Management arrays for POSIX timers. Timers are now kept in static hash table
53 * Timer ids are allocated by an external routine that keeps track of the 54 * with 512 entries.
54 * id and the timer. The external interface is: 55 * Timer ids are allocated by local routine, which selects proper hash head by
55 * 56 * key, constructed from current->signal address and per signal struct counter.
56 * void *idr_find(struct idr *idp, int id); to find timer_id <id> 57 * This keeps timer ids unique per process, but now they can intersect between
57 * int idr_get_new(struct idr *idp, void *ptr); to get a new id and 58 * processes.
58 * related it to <ptr>
59 * void idr_remove(struct idr *idp, int id); to release <id>
60 * void idr_init(struct idr *idp); to initialize <idp>
61 * which we supply.
62 * The idr_get_new *may* call slab for more memory so it must not be
63 * called under a spin lock. Likewise idr_remore may release memory
64 * (but it may be ok to do this under a lock...).
65 * idr_find is just a memory look up and is quite fast. A -1 return
66 * indicates that the requested id does not exist.
67 */ 59 */
68 60
69/* 61/*
70 * Lets keep our timers in a slab cache :-) 62 * Lets keep our timers in a slab cache :-)
71 */ 63 */
72static struct kmem_cache *posix_timers_cache; 64static struct kmem_cache *posix_timers_cache;
73static struct idr posix_timers_id; 65
74static DEFINE_SPINLOCK(idr_lock); 66static DEFINE_HASHTABLE(posix_timers_hashtable, 9);
67static DEFINE_SPINLOCK(hash_lock);
75 68
76/* 69/*
77 * we assume that the new SIGEV_THREAD_ID shares no bits with the other 70 * we assume that the new SIGEV_THREAD_ID shares no bits with the other
@@ -152,6 +145,57 @@ static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags);
152 __timr; \ 145 __timr; \
153}) 146})
154 147
148static int hash(struct signal_struct *sig, unsigned int nr)
149{
150 return hash_32(hash32_ptr(sig) ^ nr, HASH_BITS(posix_timers_hashtable));
151}
152
153static struct k_itimer *__posix_timers_find(struct hlist_head *head,
154 struct signal_struct *sig,
155 timer_t id)
156{
157 struct hlist_node *node;
158 struct k_itimer *timer;
159
160 hlist_for_each_entry_rcu(timer, head, t_hash) {
161 if ((timer->it_signal == sig) && (timer->it_id == id))
162 return timer;
163 }
164 return NULL;
165}
166
167static struct k_itimer *posix_timer_by_id(timer_t id)
168{
169 struct signal_struct *sig = current->signal;
170 struct hlist_head *head = &posix_timers_hashtable[hash(sig, id)];
171
172 return __posix_timers_find(head, sig, id);
173}
174
175static int posix_timer_add(struct k_itimer *timer)
176{
177 struct signal_struct *sig = current->signal;
178 int first_free_id = sig->posix_timer_id;
179 struct hlist_head *head;
180 int ret = -ENOENT;
181
182 do {
183 spin_lock(&hash_lock);
184 head = &posix_timers_hashtable[hash(sig, sig->posix_timer_id)];
185 if (!__posix_timers_find(head, sig, sig->posix_timer_id)) {
186 hlist_add_head_rcu(&timer->t_hash, head);
187 ret = sig->posix_timer_id;
188 }
189 if (++sig->posix_timer_id < 0)
190 sig->posix_timer_id = 0;
191 if ((sig->posix_timer_id == first_free_id) && (ret == -ENOENT))
192 /* Loop over all possible ids completed */
193 ret = -EAGAIN;
194 spin_unlock(&hash_lock);
195 } while (ret == -ENOENT);
196 return ret;
197}
198
155static inline void unlock_timer(struct k_itimer *timr, unsigned long flags) 199static inline void unlock_timer(struct k_itimer *timr, unsigned long flags)
156{ 200{
157 spin_unlock_irqrestore(&timr->it_lock, flags); 201 spin_unlock_irqrestore(&timr->it_lock, flags);
@@ -298,7 +342,6 @@ static __init int init_posix_timers(void)
298 posix_timers_cache = kmem_cache_create("posix_timers_cache", 342 posix_timers_cache = kmem_cache_create("posix_timers_cache",
299 sizeof (struct k_itimer), 0, SLAB_PANIC, 343 sizeof (struct k_itimer), 0, SLAB_PANIC,
300 NULL); 344 NULL);
301 idr_init(&posix_timers_id);
302 return 0; 345 return 0;
303} 346}
304 347
@@ -520,9 +563,9 @@ static void release_posix_timer(struct k_itimer *tmr, int it_id_set)
520{ 563{
521 if (it_id_set) { 564 if (it_id_set) {
522 unsigned long flags; 565 unsigned long flags;
523 spin_lock_irqsave(&idr_lock, flags); 566 spin_lock_irqsave(&hash_lock, flags);
524 idr_remove(&posix_timers_id, tmr->it_id); 567 hlist_del_rcu(&tmr->t_hash);
525 spin_unlock_irqrestore(&idr_lock, flags); 568 spin_unlock_irqrestore(&hash_lock, flags);
526 } 569 }
527 put_pid(tmr->it_pid); 570 put_pid(tmr->it_pid);
528 sigqueue_free(tmr->sigq); 571 sigqueue_free(tmr->sigq);
@@ -568,22 +611,11 @@ SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock,
568 return -EAGAIN; 611 return -EAGAIN;
569 612
570 spin_lock_init(&new_timer->it_lock); 613 spin_lock_init(&new_timer->it_lock);
571 614 new_timer_id = posix_timer_add(new_timer);
572 idr_preload(GFP_KERNEL); 615 if (new_timer_id < 0) {
573 spin_lock_irq(&idr_lock); 616 error = new_timer_id;
574 error = idr_alloc(&posix_timers_id, new_timer, 0, 0, GFP_NOWAIT);
575 spin_unlock_irq(&idr_lock);
576 idr_preload_end();
577 if (error < 0) {
578 /*
579 * Weird looking, but we return EAGAIN if the IDR is
580 * full (proper POSIX return value for this)
581 */
582 if (error == -ENOSPC)
583 error = -EAGAIN;
584 goto out; 617 goto out;
585 } 618 }
586 new_timer_id = error;
587 619
588 it_id_set = IT_ID_SET; 620 it_id_set = IT_ID_SET;
589 new_timer->it_id = (timer_t) new_timer_id; 621 new_timer->it_id = (timer_t) new_timer_id;
@@ -661,7 +693,7 @@ static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags)
661 return NULL; 693 return NULL;
662 694
663 rcu_read_lock(); 695 rcu_read_lock();
664 timr = idr_find(&posix_timers_id, (int)timer_id); 696 timr = posix_timer_by_id(timer_id);
665 if (timr) { 697 if (timr) {
666 spin_lock_irqsave(&timr->it_lock, *flags); 698 spin_lock_irqsave(&timr->it_lock, *flags);
667 if (timr->it_signal == current->signal) { 699 if (timr->it_signal == current->signal) {