aboutsummaryrefslogtreecommitdiffstats
path: root/include/linux
diff options
context:
space:
mode:
authorOleg Nesterov <oleg@redhat.com>2012-12-17 19:01:32 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2012-12-17 20:15:18 -0500
commita1fd3e24d8a484b3265a6d485202afe093c058f3 (patch)
tree472f6480a81abbc04b27eccdb798d80b1685bee0 /include/linux
parent53809751ac230a3611b5cdd375f3389f3207d471 (diff)
percpu_rw_semaphore: reimplement to not block the readers unnecessarily
Currently the writer does msleep() plus synchronize_sched() 3 times to acquire/release the semaphore, and during this time the readers are blocked completely. Even if the "write" section was not actually started or if it was already finished. With this patch down_write/up_write does synchronize_sched() twice and down_read/up_read are still possible during this time, just they use the slow path. percpu_down_write() first forces the readers to use rw_semaphore and increment the "slow" counter to take the lock for reading, then it takes that rw_semaphore for writing and blocks the readers. Also. With this patch the code relies on the documented behaviour of synchronize_sched(), it doesn't try to pair synchronize_sched() with barrier. Signed-off-by: Oleg Nesterov <oleg@redhat.com> Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Mikulas Patocka <mpatocka@redhat.com> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Ingo Molnar <mingo@elte.hu> Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com> Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com> Cc: Anton Arapov <anton@redhat.com> Cc: Jens Axboe <axboe@kernel.dk> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/percpu-rwsem.h83
1 files changed, 13 insertions, 70 deletions
diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h
index bd1e86071e57..592f0d610d8e 100644
--- a/include/linux/percpu-rwsem.h
+++ b/include/linux/percpu-rwsem.h
@@ -2,82 +2,25 @@
2#define _LINUX_PERCPU_RWSEM_H 2#define _LINUX_PERCPU_RWSEM_H
3 3
4#include <linux/mutex.h> 4#include <linux/mutex.h>
5#include <linux/rwsem.h>
5#include <linux/percpu.h> 6#include <linux/percpu.h>
6#include <linux/rcupdate.h> 7#include <linux/wait.h>
7#include <linux/delay.h>
8 8
9struct percpu_rw_semaphore { 9struct percpu_rw_semaphore {
10 unsigned __percpu *counters; 10 unsigned int __percpu *fast_read_ctr;
11 bool locked; 11 struct mutex writer_mutex;
12 struct mutex mtx; 12 struct rw_semaphore rw_sem;
13 atomic_t slow_read_ctr;
14 wait_queue_head_t write_waitq;
13}; 15};
14 16
15#define light_mb() barrier() 17extern void percpu_down_read(struct percpu_rw_semaphore *);
16#define heavy_mb() synchronize_sched_expedited() 18extern void percpu_up_read(struct percpu_rw_semaphore *);
17 19
18static inline void percpu_down_read(struct percpu_rw_semaphore *p) 20extern void percpu_down_write(struct percpu_rw_semaphore *);
19{ 21extern void percpu_up_write(struct percpu_rw_semaphore *);
20 rcu_read_lock_sched();
21 if (unlikely(p->locked)) {
22 rcu_read_unlock_sched();
23 mutex_lock(&p->mtx);
24 this_cpu_inc(*p->counters);
25 mutex_unlock(&p->mtx);
26 return;
27 }
28 this_cpu_inc(*p->counters);
29 rcu_read_unlock_sched();
30 light_mb(); /* A, between read of p->locked and read of data, paired with D */
31}
32 22
33static inline void percpu_up_read(struct percpu_rw_semaphore *p) 23extern int percpu_init_rwsem(struct percpu_rw_semaphore *);
34{ 24extern void percpu_free_rwsem(struct percpu_rw_semaphore *);
35 light_mb(); /* B, between read of the data and write to p->counter, paired with C */
36 this_cpu_dec(*p->counters);
37}
38
39static inline unsigned __percpu_count(unsigned __percpu *counters)
40{
41 unsigned total = 0;
42 int cpu;
43
44 for_each_possible_cpu(cpu)
45 total += ACCESS_ONCE(*per_cpu_ptr(counters, cpu));
46
47 return total;
48}
49
50static inline void percpu_down_write(struct percpu_rw_semaphore *p)
51{
52 mutex_lock(&p->mtx);
53 p->locked = true;
54 synchronize_sched_expedited(); /* make sure that all readers exit the rcu_read_lock_sched region */
55 while (__percpu_count(p->counters))
56 msleep(1);
57 heavy_mb(); /* C, between read of p->counter and write to data, paired with B */
58}
59
60static inline void percpu_up_write(struct percpu_rw_semaphore *p)
61{
62 heavy_mb(); /* D, between write to data and write to p->locked, paired with A */
63 p->locked = false;
64 mutex_unlock(&p->mtx);
65}
66
67static inline int percpu_init_rwsem(struct percpu_rw_semaphore *p)
68{
69 p->counters = alloc_percpu(unsigned);
70 if (unlikely(!p->counters))
71 return -ENOMEM;
72 p->locked = false;
73 mutex_init(&p->mtx);
74 return 0;
75}
76
77static inline void percpu_free_rwsem(struct percpu_rw_semaphore *p)
78{
79 free_percpu(p->counters);
80 p->counters = NULL; /* catch use after free bugs */
81}
82 25
83#endif 26#endif