diff options
| author | Eric Dumazet <eric.dumazet@gmail.com> | 2011-05-24 05:12:58 -0400 |
|---|---|---|
| committer | Thomas Gleixner <tglx@linutronix.de> | 2011-05-24 06:10:51 -0400 |
| commit | 8af088710d1eb3c980e0ef3779c8d47f3f217b48 (patch) | |
| tree | e122a2e65684f0a40d263ba73afe3d54a2c5993a | |
| parent | d762f4383100c2a87b1a3f2d678cd3b5425655b4 (diff) | |
posix-timers: RCU conversion
Ben Nagy reported a scalability problem with KVM/QEMU that hit very hard
a single spinlock (idr_lock) in posix-timers code, on its 48 core
machine.
Even on a 16 cpu machine (2x4x2), a single test can show 98% of cpu time
used in ticket_spin_lock, from lock_timer
Ref: http://www.spinics.net/lists/kvm/msg51526.html
Switching to RCU is quite easy, IDR being already RCU ready. idr_lock
should be locked only for an insert/delete, not a lookup.
Benchmark on a 2x4x2 machine, 16 processes calling timer_gettime().
Before :
real 1m18.669s
user 0m1.346s
sys 1m17.180s
After :
real 0m3.296s
user 0m1.366s
sys 0m1.926s
Reported-by: Ben Nagy <ben@iagu.net>
Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
Tested-by: Ben Nagy <ben@iagu.net>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Avi Kivity <avi@redhat.com>
Cc: John Stultz <johnstul@us.ibm.com>
Cc: Richard Cochran <richard.cochran@omicron.at>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
| -rw-r--r-- | include/linux/posix-timers.h | 1 | ||||
| -rw-r--r-- | kernel/posix-timers.c | 25 |
2 files changed, 15 insertions, 11 deletions
diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 808227d40a64..959c14132f46 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h | |||
| @@ -82,6 +82,7 @@ struct k_itimer { | |||
| 82 | unsigned long expires; | 82 | unsigned long expires; |
| 83 | } mmtimer; | 83 | } mmtimer; |
| 84 | struct alarm alarmtimer; | 84 | struct alarm alarmtimer; |
| 85 | struct rcu_head rcu; | ||
| 85 | } it; | 86 | } it; |
| 86 | }; | 87 | }; |
| 87 | 88 | ||
diff --git a/kernel/posix-timers.c b/kernel/posix-timers.c index a1b5edf1bf92..4556182527f3 100644 --- a/kernel/posix-timers.c +++ b/kernel/posix-timers.c | |||
| @@ -491,6 +491,13 @@ static struct k_itimer * alloc_posix_timer(void) | |||
| 491 | return tmr; | 491 | return tmr; |
| 492 | } | 492 | } |
| 493 | 493 | ||
| 494 | static void k_itimer_rcu_free(struct rcu_head *head) | ||
| 495 | { | ||
| 496 | struct k_itimer *tmr = container_of(head, struct k_itimer, it.rcu); | ||
| 497 | |||
| 498 | kmem_cache_free(posix_timers_cache, tmr); | ||
| 499 | } | ||
| 500 | |||
| 494 | #define IT_ID_SET 1 | 501 | #define IT_ID_SET 1 |
| 495 | #define IT_ID_NOT_SET 0 | 502 | #define IT_ID_NOT_SET 0 |
| 496 | static void release_posix_timer(struct k_itimer *tmr, int it_id_set) | 503 | static void release_posix_timer(struct k_itimer *tmr, int it_id_set) |
| @@ -503,7 +510,7 @@ static void release_posix_timer(struct k_itimer *tmr, int it_id_set) | |||
| 503 | } | 510 | } |
| 504 | put_pid(tmr->it_pid); | 511 | put_pid(tmr->it_pid); |
| 505 | sigqueue_free(tmr->sigq); | 512 | sigqueue_free(tmr->sigq); |
| 506 | kmem_cache_free(posix_timers_cache, tmr); | 513 | call_rcu(&tmr->it.rcu, k_itimer_rcu_free); |
| 507 | } | 514 | } |
| 508 | 515 | ||
| 509 | static struct k_clock *clockid_to_kclock(const clockid_t id) | 516 | static struct k_clock *clockid_to_kclock(const clockid_t id) |
| @@ -631,22 +638,18 @@ out: | |||
| 631 | static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags) | 638 | static struct k_itimer *__lock_timer(timer_t timer_id, unsigned long *flags) |
| 632 | { | 639 | { |
| 633 | struct k_itimer *timr; | 640 | struct k_itimer *timr; |
| 634 | /* | 641 | |
| 635 | * Watch out here. We do a irqsave on the idr_lock and pass the | 642 | rcu_read_lock(); |
| 636 | * flags part over to the timer lock. Must not let interrupts in | ||
| 637 | * while we are moving the lock. | ||
| 638 | */ | ||
| 639 | spin_lock_irqsave(&idr_lock, *flags); | ||
| 640 | timr = idr_find(&posix_timers_id, (int)timer_id); | 643 | timr = idr_find(&posix_timers_id, (int)timer_id); |
| 641 | if (timr) { | 644 | if (timr) { |
| 642 | spin_lock(&timr->it_lock); | 645 | spin_lock_irqsave(&timr->it_lock, *flags); |
| 643 | if (timr->it_signal == current->signal) { | 646 | if (timr->it_signal == current->signal) { |
| 644 | spin_unlock(&idr_lock); | 647 | rcu_read_unlock(); |
| 645 | return timr; | 648 | return timr; |
| 646 | } | 649 | } |
| 647 | spin_unlock(&timr->it_lock); | 650 | spin_unlock_irqrestore(&timr->it_lock, *flags); |
| 648 | } | 651 | } |
| 649 | spin_unlock_irqrestore(&idr_lock, *flags); | 652 | rcu_read_unlock(); |
| 650 | 653 | ||
| 651 | return NULL; | 654 | return NULL; |
| 652 | } | 655 | } |
