diff options
author | Pavel Emelyanov <xemul@parallels.com> | 2013-03-11 05:12:21 -0400 |
---|---|---|
committer | Thomas Gleixner <tglx@linutronix.de> | 2013-04-17 14:51:01 -0400 |
commit | 5ed67f05f66c41e39880a6d61358438a25f9fee5 (patch) | |
tree | d3e52cca63e119a1f437b660f5d7a553f8ee37bc /kernel/posix-timers.c | |
parent | 4e8f8b34b92b6514cc070aeb94d317cadd5071d7 (diff) |
posix timers: Allocate timer id per process (v2)
Currently kernel generates IDs for posix timers in a global manner --
there's a kernel-wide IDR tree from which IDs are created. This makes
it impossible to recreate a timer with a desired ID (in particular
this is done by the CRIU checkpoint-restore project) -- since these
IDs are global it may happen, that at the time we recreate a timer, the
ID we want for it is already busy by some other timer.
In order to address this, replace the IDR tree with a global hash
table for timers and makes timer IDs unique per signal_struct (to
which timers are linked anyway). With this, two timers belonging to
different processes may have equal IDs and we can recreate either of
them with the ID we want.
Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Matthew Helsley <matt.helsley@gmail.com>
Link: http://lkml.kernel.org/r/513D9FF5.9010004@parallels.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'kernel/posix-timers.c')
-rw-r--r-- | kernel/posix-timers.c | 106 |
1 files changed, 69 insertions, 37 deletions
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index 2a2e173d0a7a..34d75926b843 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c | |||
@@ -40,38 +40,31 @@ | |||
40 | #include <linux/list.h> | 40 | #include <linux/list.h> |
41 | #include <linux/init.h> | 41 | #include <linux/init.h> |
42 | #include <linux/compiler.h> | 42 | #include <linux/compiler.h> |
43 | #include <linux/idr.h> | 43 | #include <linux/hash.h> |
44 | #include <linux/posix-clock.h> | 44 | #include <linux/posix-clock.h> |
45 | #include <linux/posix-timers.h> | 45 | #include <linux/posix-timers.h> |
46 | #include <linux/syscalls.h> | 46 | #include <linux/syscalls.h> |
47 | #include <linux/wait.h> | 47 | #include <linux/wait.h> |
48 | #include <linux/workqueue.h> | 48 | #include <linux/workqueue.h> |
49 | #include <linux/export.h> | 49 | #include <linux/export.h> |
50 | #include <linux/hashtable.h> | ||
50 | 51 | ||
51 | /* | 52 | /* |
52 | * Management arrays for POSIX timers. Timers are kept in slab memory | 53 | * Management arrays for POSIX timers. Timers are now kept in static hash table |
53 | * Timer ids are allocated by an external routine that keeps track of the | 54 | * with 512 entries. |
54 | * id and the timer. The external interface is: | 55 | * Timer ids are allocated by local routine, which selects proper hash head by |
55 | * | 56 | * key, constructed from current->signal address and per signal struct counter. |
56 | * void *idr_find(struct idr *idp, int id); to find timer_id <id> | 57 | * This keeps timer ids unique per process, but now they can intersect between |
57 | * int idr_get_new(struct idr *idp, void *ptr); to get a new id and | 58 | * processes. |
58 | * related it to <ptr> | ||
59 | * void idr_remove(struct idr *idp, int id); to release <id> | ||
60 | * void idr_init(struct idr *idp); to initialize <idp> | ||
61 | * which we supply. | ||
62 | * The idr_get_new *may* call slab for more memory so it must not be | ||
63 | * called under a spin lock. Likewise idr_remore may release memory | ||
64 | * (but it may be ok to do this under a lock...). | ||
65 | * idr_find is just a memory look up and is quite fast. A -1 return | ||
66 | * indicates that the requested id does not exist. | ||
67 | */ | 59 | */ |
68 | 60 | ||
69 | /* | 61 | /* |
70 | * Lets keep our timers in a slab cache :-) | 62 | * Lets keep our timers in a slab cache :-) |
71 | */ | 63 | */ |
72 | static struct kmem_cache *posix_timers_cache; | 64 | static struct kmem_cache *posix_timers_cache; |
73 | static struct idr posix_timers_id; | 65 | |
74 | static DEFINE_SPINLOCK(idr_lock); | 66 | static DEFINE_HASHTABLE(posix_timers_hashtable, 9); |
67 | static DEFINE_SPINLOCK(hash_lock); | ||
75 | 68 | ||
76 | /* | 69 | /* |
77 | * we assume that the new SIGEV_THREAD_ID shares no bits with the other | 70 | * we assume that the new SIGEV_THREAD_ID shares no bits with the other |
@@ -152,6 +145,57 @@ static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags); | |||
152 | __timr; \ | 145 | __timr; \ |
153 | }) | 146 | }) |
154 | 147 | ||
148 | static int hash(struct signal_struct *sig, unsigned int nr) | ||
149 | { | ||
150 | return hash_32(hash32_ptr(sig) ^ nr, HASH_BITS(posix_timers_hashtable)); | ||
151 | } | ||
152 | |||
153 | static struct k_itimer *__posix_timers_find(struct hlist_head *head, | ||
154 | struct signal_struct *sig, | ||
155 | timer_t id) | ||
156 | { | ||
157 | struct hlist_node *node; | ||
158 | struct k_itimer *timer; | ||
159 | |||
160 | hlist_for_each_entry_rcu(timer, head, t_hash) { | ||
161 | if ((timer->it_signal == sig) && (timer->it_id == id)) | ||
162 | return timer; | ||
163 | } | ||
164 | return NULL; | ||
165 | } | ||
166 | |||
167 | static struct k_itimer *posix_timer_by_id(timer_t id) | ||
168 | { | ||
169 | struct signal_struct *sig = current->signal; | ||
170 | struct hlist_head *head = &posix_timers_hashtable[hash(sig, id)]; | ||
171 | |||
172 | return __posix_timers_find(head, sig, id); | ||
173 | } | ||
174 | |||
175 | static int posix_timer_add(struct k_itimer *timer) | ||
176 | { | ||
177 | struct signal_struct *sig = current->signal; | ||
178 | int first_free_id = sig->posix_timer_id; | ||
179 | struct hlist_head *head; | ||
180 | int ret = -ENOENT; | ||
181 | |||
182 | do { | ||
183 | spin_lock(&hash_lock); | ||
184 | head = &posix_timers_hashtable[hash(sig, sig->posix_timer_id)]; | ||
185 | if (!__posix_timers_find(head, sig, sig->posix_timer_id)) { | ||
186 | hlist_add_head_rcu(&timer->t_hash, head); | ||
187 | ret = sig->posix_timer_id; | ||
188 | } | ||
189 | if (++sig->posix_timer_id < 0) | ||
190 | sig->posix_timer_id = 0; | ||
191 | if ((sig->posix_timer_id == first_free_id) && (ret == -ENOENT)) | ||
192 | /* Loop over all possible ids completed */ | ||
193 | ret = -EAGAIN; | ||
194 | spin_unlock(&hash_lock); | ||
195 | } while (ret == -ENOENT); | ||
196 | return ret; | ||
197 | } | ||
198 | |||
155 | static inline void unlock_timer(struct k_itimer *timr, unsigned long flags) | 199 | static inline void unlock_timer(struct k_itimer *timr, unsigned long flags) |
156 | { | 200 | { |
157 | spin_unlock_irqrestore(&timr->it_lock, flags); | 201 | spin_unlock_irqrestore(&timr->it_lock, flags); |
@@ -298,7 +342,6 @@ static __init int init_posix_timers(void) | |||
298 | posix_timers_cache = kmem_cache_create("posix_timers_cache", | 342 | posix_timers_cache = kmem_cache_create("posix_timers_cache", |
299 | sizeof (struct k_itimer), 0, SLAB_PANIC, | 343 | sizeof (struct k_itimer), 0, SLAB_PANIC, |
300 | NULL); | 344 | NULL); |
301 | idr_init(&posix_timers_id); | ||
302 | return 0; | 345 | return 0; |
303 | } | 346 | } |
304 | 347 | ||
@@ -520,9 +563,9 @@ static void release_posix_timer(struct k_itimer *tmr, int it_id_set) | |||
520 | { | 563 | { |
521 | if (it_id_set) { | 564 | if (it_id_set) { |
522 | unsigned long flags; | 565 | unsigned long flags; |
523 | spin_lock_irqsave(&idr_lock, flags); | 566 | spin_lock_irqsave(&hash_lock, flags); |
524 | idr_remove(&posix_timers_id, tmr->it_id); | 567 | hlist_del_rcu(&tmr->t_hash); |
525 | spin_unlock_irqrestore(&idr_lock, flags); | 568 | spin_unlock_irqrestore(&hash_lock, flags); |
526 | } | 569 | } |
527 | put_pid(tmr->it_pid); | 570 | put_pid(tmr->it_pid); |
528 | sigqueue_free(tmr->sigq); | 571 | sigqueue_free(tmr->sigq); |
@@ -568,22 +611,11 @@ SYSCALL_DEFINE3(timer_create, const clockid_t, which_clock, | |||
568 | return -EAGAIN; | 611 | return -EAGAIN; |
569 | 612 | ||
570 | spin_lock_init(&new_timer->it_lock); | 613 | spin_lock_init(&new_timer->it_lock); |
571 | 614 | new_timer_id = posix_timer_add(new_timer); | |
572 | idr_preload(GFP_KERNEL); | 615 | if (new_timer_id < 0) { |
573 | spin_lock_irq(&idr_lock); | 616 | error = new_timer_id; |
574 | error = idr_alloc(&posix_timers_id, new_timer, 0, 0, GFP_NOWAIT); | ||
575 | spin_unlock_irq(&idr_lock); | ||
576 | idr_preload_end(); | ||
577 | if (error < 0) { | ||
578 | /* | ||
579 | * Weird looking, but we return EAGAIN if the IDR is | ||
580 | * full (proper POSIX return value for this) | ||
581 | */ | ||
582 | if (error == -ENOSPC) | ||
583 | error = -EAGAIN; | ||
584 | goto out; | 617 | goto out; |
585 | } | 618 | } |
586 | new_timer_id = error; | ||
587 | 619 | ||
588 | it_id_set = IT_ID_SET; | 620 | it_id_set = IT_ID_SET; |
589 | new_timer->it_id = (timer_t) new_timer_id; | 621 | new_timer->it_id = (timer_t) new_timer_id; |
@@ -661,7 +693,7 @@ static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags) | |||
661 | return NULL; | 693 | return NULL; |
662 | 694 | ||
663 | rcu_read_lock(); | 695 | rcu_read_lock(); |
664 | timr = idr_find(&posix_timers_id, (int)timer_id); | 696 | timr = posix_timer_by_id(timer_id); |
665 | if (timr) { | 697 | if (timr) { |
666 | spin_lock_irqsave(&timr->it_lock, *flags); | 698 | spin_lock_irqsave(&timr->it_lock, *flags); |
667 | if (timr->it_signal == current->signal) { | 699 | if (timr->it_signal == current->signal) { |