aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/percpu-rwsem.h4
-rw-r--r--lib/percpu-rwsem.c34
2 files changed, 18 insertions, 20 deletions
diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h
index 592f0d610d8e..d2146a4f833e 100644
--- a/include/linux/percpu-rwsem.h
+++ b/include/linux/percpu-rwsem.h
@@ -1,14 +1,14 @@
1#ifndef _LINUX_PERCPU_RWSEM_H 1#ifndef _LINUX_PERCPU_RWSEM_H
2#define _LINUX_PERCPU_RWSEM_H 2#define _LINUX_PERCPU_RWSEM_H
3 3
4#include <linux/mutex.h> 4#include <linux/atomic.h>
5#include <linux/rwsem.h> 5#include <linux/rwsem.h>
6#include <linux/percpu.h> 6#include <linux/percpu.h>
7#include <linux/wait.h> 7#include <linux/wait.h>
8 8
9struct percpu_rw_semaphore { 9struct percpu_rw_semaphore {
10 unsigned int __percpu *fast_read_ctr; 10 unsigned int __percpu *fast_read_ctr;
11 struct mutex writer_mutex; 11 atomic_t write_ctr;
12 struct rw_semaphore rw_sem; 12 struct rw_semaphore rw_sem;
13 atomic_t slow_read_ctr; 13 atomic_t slow_read_ctr;
14 wait_queue_head_t write_waitq; 14 wait_queue_head_t write_waitq;
diff --git a/lib/percpu-rwsem.c b/lib/percpu-rwsem.c
index 2e03bcfe48f9..ce92ab563a08 100644
--- a/lib/percpu-rwsem.c
+++ b/lib/percpu-rwsem.c
@@ -1,4 +1,4 @@
1#include <linux/mutex.h> 1#include <linux/atomic.h>
2#include <linux/rwsem.h> 2#include <linux/rwsem.h>
3#include <linux/percpu.h> 3#include <linux/percpu.h>
4#include <linux/wait.h> 4#include <linux/wait.h>
@@ -13,8 +13,8 @@ int percpu_init_rwsem(struct percpu_rw_semaphore *brw)
13 if (unlikely(!brw->fast_read_ctr)) 13 if (unlikely(!brw->fast_read_ctr))
14 return -ENOMEM; 14 return -ENOMEM;
15 15
16 mutex_init(&brw->writer_mutex);
17 init_rwsem(&brw->rw_sem); 16 init_rwsem(&brw->rw_sem);
17 atomic_set(&brw->write_ctr, 0);
18 atomic_set(&brw->slow_read_ctr, 0); 18 atomic_set(&brw->slow_read_ctr, 0);
19 init_waitqueue_head(&brw->write_waitq); 19 init_waitqueue_head(&brw->write_waitq);
20 return 0; 20 return 0;
@@ -28,7 +28,7 @@ void percpu_free_rwsem(struct percpu_rw_semaphore *brw)
28 28
29/* 29/*
30 * This is the fast-path for down_read/up_read, it only needs to ensure 30 * This is the fast-path for down_read/up_read, it only needs to ensure
31 * there is no pending writer (!mutex_is_locked() check) and inc/dec the 31 * there is no pending writer (atomic_read(write_ctr) == 0) and inc/dec the
32 * fast per-cpu counter. The writer uses synchronize_sched_expedited() to 32 * fast per-cpu counter. The writer uses synchronize_sched_expedited() to
33 * serialize with the preempt-disabled section below. 33 * serialize with the preempt-disabled section below.
34 * 34 *
@@ -44,7 +44,7 @@ void percpu_free_rwsem(struct percpu_rw_semaphore *brw)
44 * If this helper fails the callers rely on the normal rw_semaphore and 44 * If this helper fails the callers rely on the normal rw_semaphore and
45 * atomic_dec_and_test(), so in this case we have the necessary barriers. 45 * atomic_dec_and_test(), so in this case we have the necessary barriers.
46 * 46 *
47 * But if it succeeds we do not have any barriers, mutex_is_locked() or 47 * But if it succeeds we do not have any barriers, atomic_read(write_ctr) or
48 * __this_cpu_add() below can be reordered with any LOAD/STORE done by the 48 * __this_cpu_add() below can be reordered with any LOAD/STORE done by the
49 * reader inside the critical section. See the comments in down_write and 49 * reader inside the critical section. See the comments in down_write and
50 * up_write below. 50 * up_write below.
@@ -54,7 +54,7 @@ static bool update_fast_ctr(struct percpu_rw_semaphore *brw, unsigned int val)
54 bool success = false; 54 bool success = false;
55 55
56 preempt_disable(); 56 preempt_disable();
57 if (likely(!mutex_is_locked(&brw->writer_mutex))) { 57 if (likely(!atomic_read(&brw->write_ctr))) {
58 __this_cpu_add(*brw->fast_read_ctr, val); 58 __this_cpu_add(*brw->fast_read_ctr, val);
59 success = true; 59 success = true;
60 } 60 }
@@ -101,9 +101,8 @@ static int clear_fast_ctr(struct percpu_rw_semaphore *brw)
101} 101}
102 102
103/* 103/*
104 * A writer takes ->writer_mutex to exclude other writers and to force the 104 * A writer increments ->write_ctr to force the readers to switch to the
105 * readers to switch to the slow mode, note the mutex_is_locked() check in 105 * slow mode, note the atomic_read() check in update_fast_ctr().
106 * update_fast_ctr().
107 * 106 *
108 * After that the readers can only inc/dec the slow ->slow_read_ctr counter, 107 * After that the readers can only inc/dec the slow ->slow_read_ctr counter,
109 * ->fast_read_ctr is stable. Once the writer moves its sum into the slow 108 * ->fast_read_ctr is stable. Once the writer moves its sum into the slow
@@ -114,11 +113,10 @@ static int clear_fast_ctr(struct percpu_rw_semaphore *brw)
114 */ 113 */
115void percpu_down_write(struct percpu_rw_semaphore *brw) 114void percpu_down_write(struct percpu_rw_semaphore *brw)
116{ 115{
117 /* also blocks update_fast_ctr() which checks mutex_is_locked() */ 116 /* tell update_fast_ctr() there is a pending writer */
118 mutex_lock(&brw->writer_mutex); 117 atomic_inc(&brw->write_ctr);
119
120 /* 118 /*
121 * 1. Ensures mutex_is_locked() is visible to any down_read/up_read 119 * 1. Ensures that write_ctr != 0 is visible to any down_read/up_read
122 * so that update_fast_ctr() can't succeed. 120 * so that update_fast_ctr() can't succeed.
123 * 121 *
124 * 2. Ensures we see the result of every previous this_cpu_add() in 122 * 2. Ensures we see the result of every previous this_cpu_add() in
@@ -130,25 +128,25 @@ void percpu_down_write(struct percpu_rw_semaphore *brw)
130 */ 128 */
131 synchronize_sched_expedited(); 129 synchronize_sched_expedited();
132 130
131 /* exclude other writers, and block the new readers completely */
132 down_write(&brw->rw_sem);
133
133 /* nobody can use fast_read_ctr, move its sum into slow_read_ctr */ 134 /* nobody can use fast_read_ctr, move its sum into slow_read_ctr */
134 atomic_add(clear_fast_ctr(brw), &brw->slow_read_ctr); 135 atomic_add(clear_fast_ctr(brw), &brw->slow_read_ctr);
135 136
136 /* block the new readers completely */
137 down_write(&brw->rw_sem);
138
139 /* wait for all readers to complete their percpu_up_read() */ 137 /* wait for all readers to complete their percpu_up_read() */
140 wait_event(brw->write_waitq, !atomic_read(&brw->slow_read_ctr)); 138 wait_event(brw->write_waitq, !atomic_read(&brw->slow_read_ctr));
141} 139}
142 140
143void percpu_up_write(struct percpu_rw_semaphore *brw) 141void percpu_up_write(struct percpu_rw_semaphore *brw)
144{ 142{
145 /* allow the new readers, but only the slow-path */ 143 /* release the lock, but the readers can't use the fast-path */
146 up_write(&brw->rw_sem); 144 up_write(&brw->rw_sem);
147
148 /* 145 /*
149 * Insert the barrier before the next fast-path in down_read, 146 * Insert the barrier before the next fast-path in down_read,
150 * see W_R case in the comment above update_fast_ctr(). 147 * see W_R case in the comment above update_fast_ctr().
151 */ 148 */
152 synchronize_sched_expedited(); 149 synchronize_sched_expedited();
153 mutex_unlock(&brw->writer_mutex); 150 /* the last writer unblocks update_fast_ctr() */
151 atomic_dec(&brw->write_ctr);
154} 152}