diff options
-rw-r--r-- | include/linux/percpu-rwsem.h | 83 | ||||
-rw-r--r-- | lib/Makefile | 2 | ||||
-rw-r--r-- | lib/percpu-rwsem.c | 154 |
3 files changed, 168 insertions, 71 deletions
diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h index bd1e86071e57..592f0d610d8e 100644 --- a/include/linux/percpu-rwsem.h +++ b/include/linux/percpu-rwsem.h | |||
@@ -2,82 +2,25 @@ | |||
2 | #define _LINUX_PERCPU_RWSEM_H | 2 | #define _LINUX_PERCPU_RWSEM_H |
3 | 3 | ||
4 | #include <linux/mutex.h> | 4 | #include <linux/mutex.h> |
5 | #include <linux/rwsem.h> | ||
5 | #include <linux/percpu.h> | 6 | #include <linux/percpu.h> |
6 | #include <linux/rcupdate.h> | 7 | #include <linux/wait.h> |
7 | #include <linux/delay.h> | ||
8 | 8 | ||
9 | struct percpu_rw_semaphore { | 9 | struct percpu_rw_semaphore { |
10 | unsigned __percpu *counters; | 10 | unsigned int __percpu *fast_read_ctr; |
11 | bool locked; | 11 | struct mutex writer_mutex; |
12 | struct mutex mtx; | 12 | struct rw_semaphore rw_sem; |
13 | atomic_t slow_read_ctr; | ||
14 | wait_queue_head_t write_waitq; | ||
13 | }; | 15 | }; |
14 | 16 | ||
15 | #define light_mb() barrier() | 17 | extern void percpu_down_read(struct percpu_rw_semaphore *); |
16 | #define heavy_mb() synchronize_sched_expedited() | 18 | extern void percpu_up_read(struct percpu_rw_semaphore *); |
17 | 19 | ||
18 | static inline void percpu_down_read(struct percpu_rw_semaphore *p) | 20 | extern void percpu_down_write(struct percpu_rw_semaphore *); |
19 | { | 21 | extern void percpu_up_write(struct percpu_rw_semaphore *); |
20 | rcu_read_lock_sched(); | ||
21 | if (unlikely(p->locked)) { | ||
22 | rcu_read_unlock_sched(); | ||
23 | mutex_lock(&p->mtx); | ||
24 | this_cpu_inc(*p->counters); | ||
25 | mutex_unlock(&p->mtx); | ||
26 | return; | ||
27 | } | ||
28 | this_cpu_inc(*p->counters); | ||
29 | rcu_read_unlock_sched(); | ||
30 | light_mb(); /* A, between read of p->locked and read of data, paired with D */ | ||
31 | } | ||
32 | 22 | ||
33 | static inline void percpu_up_read(struct percpu_rw_semaphore *p) | 23 | extern int percpu_init_rwsem(struct percpu_rw_semaphore *); |
34 | { | 24 | extern void percpu_free_rwsem(struct percpu_rw_semaphore *); |
35 | light_mb(); /* B, between read of the data and write to p->counter, paired with C */ | ||
36 | this_cpu_dec(*p->counters); | ||
37 | } | ||
38 | |||
39 | static inline unsigned __percpu_count(unsigned __percpu *counters) | ||
40 | { | ||
41 | unsigned total = 0; | ||
42 | int cpu; | ||
43 | |||
44 | for_each_possible_cpu(cpu) | ||
45 | total += ACCESS_ONCE(*per_cpu_ptr(counters, cpu)); | ||
46 | |||
47 | return total; | ||
48 | } | ||
49 | |||
50 | static inline void percpu_down_write(struct percpu_rw_semaphore *p) | ||
51 | { | ||
52 | mutex_lock(&p->mtx); | ||
53 | p->locked = true; | ||
54 | synchronize_sched_expedited(); /* make sure that all readers exit the rcu_read_lock_sched region */ | ||
55 | while (__percpu_count(p->counters)) | ||
56 | msleep(1); | ||
57 | heavy_mb(); /* C, between read of p->counter and write to data, paired with B */ | ||
58 | } | ||
59 | |||
60 | static inline void percpu_up_write(struct percpu_rw_semaphore *p) | ||
61 | { | ||
62 | heavy_mb(); /* D, between write to data and write to p->locked, paired with A */ | ||
63 | p->locked = false; | ||
64 | mutex_unlock(&p->mtx); | ||
65 | } | ||
66 | |||
67 | static inline int percpu_init_rwsem(struct percpu_rw_semaphore *p) | ||
68 | { | ||
69 | p->counters = alloc_percpu(unsigned); | ||
70 | if (unlikely(!p->counters)) | ||
71 | return -ENOMEM; | ||
72 | p->locked = false; | ||
73 | mutex_init(&p->mtx); | ||
74 | return 0; | ||
75 | } | ||
76 | |||
77 | static inline void percpu_free_rwsem(struct percpu_rw_semaphore *p) | ||
78 | { | ||
79 | free_percpu(p->counters); | ||
80 | p->counters = NULL; /* catch use after free bugs */ | ||
81 | } | ||
82 | 25 | ||
83 | #endif | 26 | #endif |
diff --git a/lib/Makefile b/lib/Makefile index e2152fa7ff4d..e959c20efb24 100644 --- a/lib/Makefile +++ b/lib/Makefile | |||
@@ -9,7 +9,7 @@ endif | |||
9 | 9 | ||
10 | lib-y := ctype.o string.o vsprintf.o cmdline.o \ | 10 | lib-y := ctype.o string.o vsprintf.o cmdline.o \ |
11 | rbtree.o radix-tree.o dump_stack.o timerqueue.o\ | 11 | rbtree.o radix-tree.o dump_stack.o timerqueue.o\ |
12 | idr.o int_sqrt.o extable.o \ | 12 | idr.o int_sqrt.o extable.o percpu-rwsem.o \ |
13 | sha1.o md5.o irq_regs.o reciprocal_div.o argv_split.o \ | 13 | sha1.o md5.o irq_regs.o reciprocal_div.o argv_split.o \ |
14 | proportions.o flex_proportions.o prio_heap.o ratelimit.o show_mem.o \ | 14 | proportions.o flex_proportions.o prio_heap.o ratelimit.o show_mem.o \ |
15 | is_single_threaded.o plist.o decompress.o kobject_uevent.o \ | 15 | is_single_threaded.o plist.o decompress.o kobject_uevent.o \ |
diff --git a/lib/percpu-rwsem.c b/lib/percpu-rwsem.c new file mode 100644 index 000000000000..2e03bcfe48f9 --- /dev/null +++ b/lib/percpu-rwsem.c | |||
@@ -0,0 +1,154 @@ | |||
1 | #include <linux/mutex.h> | ||
2 | #include <linux/rwsem.h> | ||
3 | #include <linux/percpu.h> | ||
4 | #include <linux/wait.h> | ||
5 | #include <linux/percpu-rwsem.h> | ||
6 | #include <linux/rcupdate.h> | ||
7 | #include <linux/sched.h> | ||
8 | #include <linux/errno.h> | ||
9 | |||
10 | int percpu_init_rwsem(struct percpu_rw_semaphore *brw) | ||
11 | { | ||
12 | brw->fast_read_ctr = alloc_percpu(int); | ||
13 | if (unlikely(!brw->fast_read_ctr)) | ||
14 | return -ENOMEM; | ||
15 | |||
16 | mutex_init(&brw->writer_mutex); | ||
17 | init_rwsem(&brw->rw_sem); | ||
18 | atomic_set(&brw->slow_read_ctr, 0); | ||
19 | init_waitqueue_head(&brw->write_waitq); | ||
20 | return 0; | ||
21 | } | ||
22 | |||
23 | void percpu_free_rwsem(struct percpu_rw_semaphore *brw) | ||
24 | { | ||
25 | free_percpu(brw->fast_read_ctr); | ||
26 | brw->fast_read_ctr = NULL; /* catch use after free bugs */ | ||
27 | } | ||
28 | |||
29 | /* | ||
30 | * This is the fast-path for down_read/up_read, it only needs to ensure | ||
31 | * there is no pending writer (!mutex_is_locked() check) and inc/dec the | ||
32 | * fast per-cpu counter. The writer uses synchronize_sched_expedited() to | ||
33 | * serialize with the preempt-disabled section below. | ||
34 | * | ||
35 | * The nontrivial part is that we should guarantee acquire/release semantics | ||
36 | * in case when | ||
37 | * | ||
38 | * R_W: down_write() comes after up_read(), the writer should see all | ||
39 | * changes done by the reader | ||
40 | * or | ||
41 | * W_R: down_read() comes after up_write(), the reader should see all | ||
42 | * changes done by the writer | ||
43 | * | ||
44 | * If this helper fails the callers rely on the normal rw_semaphore and | ||
45 | * atomic_dec_and_test(), so in this case we have the necessary barriers. | ||
46 | * | ||
47 | * But if it succeeds we do not have any barriers, mutex_is_locked() or | ||
48 | * __this_cpu_add() below can be reordered with any LOAD/STORE done by the | ||
49 | * reader inside the critical section. See the comments in down_write and | ||
50 | * up_write below. | ||
51 | */ | ||
52 | static bool update_fast_ctr(struct percpu_rw_semaphore *brw, unsigned int val) | ||
53 | { | ||
54 | bool success = false; | ||
55 | |||
56 | preempt_disable(); | ||
57 | if (likely(!mutex_is_locked(&brw->writer_mutex))) { | ||
58 | __this_cpu_add(*brw->fast_read_ctr, val); | ||
59 | success = true; | ||
60 | } | ||
61 | preempt_enable(); | ||
62 | |||
63 | return success; | ||
64 | } | ||
65 | |||
66 | /* | ||
67 | * Like the normal down_read() this is not recursive, the writer can | ||
68 | * come after the first percpu_down_read() and create the deadlock. | ||
69 | */ | ||
70 | void percpu_down_read(struct percpu_rw_semaphore *brw) | ||
71 | { | ||
72 | if (likely(update_fast_ctr(brw, +1))) | ||
73 | return; | ||
74 | |||
75 | down_read(&brw->rw_sem); | ||
76 | atomic_inc(&brw->slow_read_ctr); | ||
77 | up_read(&brw->rw_sem); | ||
78 | } | ||
79 | |||
80 | void percpu_up_read(struct percpu_rw_semaphore *brw) | ||
81 | { | ||
82 | if (likely(update_fast_ctr(brw, -1))) | ||
83 | return; | ||
84 | |||
85 | /* false-positive is possible but harmless */ | ||
86 | if (atomic_dec_and_test(&brw->slow_read_ctr)) | ||
87 | wake_up_all(&brw->write_waitq); | ||
88 | } | ||
89 | |||
90 | static int clear_fast_ctr(struct percpu_rw_semaphore *brw) | ||
91 | { | ||
92 | unsigned int sum = 0; | ||
93 | int cpu; | ||
94 | |||
95 | for_each_possible_cpu(cpu) { | ||
96 | sum += per_cpu(*brw->fast_read_ctr, cpu); | ||
97 | per_cpu(*brw->fast_read_ctr, cpu) = 0; | ||
98 | } | ||
99 | |||
100 | return sum; | ||
101 | } | ||
102 | |||
103 | /* | ||
104 | * A writer takes ->writer_mutex to exclude other writers and to force the | ||
105 | * readers to switch to the slow mode, note the mutex_is_locked() check in | ||
106 | * update_fast_ctr(). | ||
107 | * | ||
108 | * After that the readers can only inc/dec the slow ->slow_read_ctr counter, | ||
109 | * ->fast_read_ctr is stable. Once the writer moves its sum into the slow | ||
110 | * counter it represents the number of active readers. | ||
111 | * | ||
112 | * Finally the writer takes ->rw_sem for writing and blocks the new readers, | ||
113 | * then waits until the slow counter becomes zero. | ||
114 | */ | ||
115 | void percpu_down_write(struct percpu_rw_semaphore *brw) | ||
116 | { | ||
117 | /* also blocks update_fast_ctr() which checks mutex_is_locked() */ | ||
118 | mutex_lock(&brw->writer_mutex); | ||
119 | |||
120 | /* | ||
121 | * 1. Ensures mutex_is_locked() is visible to any down_read/up_read | ||
122 | * so that update_fast_ctr() can't succeed. | ||
123 | * | ||
124 | * 2. Ensures we see the result of every previous this_cpu_add() in | ||
125 | * update_fast_ctr(). | ||
126 | * | ||
127 | * 3. Ensures that if any reader has exited its critical section via | ||
128 | * fast-path, it executes a full memory barrier before we return. | ||
129 | * See R_W case in the comment above update_fast_ctr(). | ||
130 | */ | ||
131 | synchronize_sched_expedited(); | ||
132 | |||
133 | /* nobody can use fast_read_ctr, move its sum into slow_read_ctr */ | ||
134 | atomic_add(clear_fast_ctr(brw), &brw->slow_read_ctr); | ||
135 | |||
136 | /* block the new readers completely */ | ||
137 | down_write(&brw->rw_sem); | ||
138 | |||
139 | /* wait for all readers to complete their percpu_up_read() */ | ||
140 | wait_event(brw->write_waitq, !atomic_read(&brw->slow_read_ctr)); | ||
141 | } | ||
142 | |||
143 | void percpu_up_write(struct percpu_rw_semaphore *brw) | ||
144 | { | ||
145 | /* allow the new readers, but only the slow-path */ | ||
146 | up_write(&brw->rw_sem); | ||
147 | |||
148 | /* | ||
149 | * Insert the barrier before the next fast-path in down_read, | ||
150 | * see W_R case in the comment above update_fast_ctr(). | ||
151 | */ | ||
152 | synchronize_sched_expedited(); | ||
153 | mutex_unlock(&brw->writer_mutex); | ||
154 | } | ||