diff options
-rw-r--r-- | include/linux/percpu-rwsem.h | 4 | ||||
-rw-r--r-- | lib/percpu-rwsem.c | 34 |
2 files changed, 18 insertions, 20 deletions
diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h index 592f0d610d8e..d2146a4f833e 100644 --- a/include/linux/percpu-rwsem.h +++ b/include/linux/percpu-rwsem.h | |||
@@ -1,14 +1,14 @@ | |||
1 | #ifndef _LINUX_PERCPU_RWSEM_H | 1 | #ifndef _LINUX_PERCPU_RWSEM_H |
2 | #define _LINUX_PERCPU_RWSEM_H | 2 | #define _LINUX_PERCPU_RWSEM_H |
3 | 3 | ||
4 | #include <linux/mutex.h> | 4 | #include <linux/atomic.h> |
5 | #include <linux/rwsem.h> | 5 | #include <linux/rwsem.h> |
6 | #include <linux/percpu.h> | 6 | #include <linux/percpu.h> |
7 | #include <linux/wait.h> | 7 | #include <linux/wait.h> |
8 | 8 | ||
9 | struct percpu_rw_semaphore { | 9 | struct percpu_rw_semaphore { |
10 | unsigned int __percpu *fast_read_ctr; | 10 | unsigned int __percpu *fast_read_ctr; |
11 | struct mutex writer_mutex; | 11 | atomic_t write_ctr; |
12 | struct rw_semaphore rw_sem; | 12 | struct rw_semaphore rw_sem; |
13 | atomic_t slow_read_ctr; | 13 | atomic_t slow_read_ctr; |
14 | wait_queue_head_t write_waitq; | 14 | wait_queue_head_t write_waitq; |
diff --git a/lib/percpu-rwsem.c b/lib/percpu-rwsem.c index 2e03bcfe48f9..ce92ab563a08 100644 --- a/lib/percpu-rwsem.c +++ b/lib/percpu-rwsem.c | |||
@@ -1,4 +1,4 @@ | |||
1 | #include <linux/mutex.h> | 1 | #include <linux/atomic.h> |
2 | #include <linux/rwsem.h> | 2 | #include <linux/rwsem.h> |
3 | #include <linux/percpu.h> | 3 | #include <linux/percpu.h> |
4 | #include <linux/wait.h> | 4 | #include <linux/wait.h> |
@@ -13,8 +13,8 @@ int percpu_init_rwsem(struct percpu_rw_semaphore *brw) | |||
13 | if (unlikely(!brw->fast_read_ctr)) | 13 | if (unlikely(!brw->fast_read_ctr)) |
14 | return -ENOMEM; | 14 | return -ENOMEM; |
15 | 15 | ||
16 | mutex_init(&brw->writer_mutex); | ||
17 | init_rwsem(&brw->rw_sem); | 16 | init_rwsem(&brw->rw_sem); |
17 | atomic_set(&brw->write_ctr, 0); | ||
18 | atomic_set(&brw->slow_read_ctr, 0); | 18 | atomic_set(&brw->slow_read_ctr, 0); |
19 | init_waitqueue_head(&brw->write_waitq); | 19 | init_waitqueue_head(&brw->write_waitq); |
20 | return 0; | 20 | return 0; |
@@ -28,7 +28,7 @@ void percpu_free_rwsem(struct percpu_rw_semaphore *brw) | |||
28 | 28 | ||
29 | /* | 29 | /* |
30 | * This is the fast-path for down_read/up_read, it only needs to ensure | 30 | * This is the fast-path for down_read/up_read, it only needs to ensure |
31 | * there is no pending writer (!mutex_is_locked() check) and inc/dec the | 31 | * there is no pending writer (atomic_read(write_ctr) == 0) and inc/dec the |
32 | * fast per-cpu counter. The writer uses synchronize_sched_expedited() to | 32 | * fast per-cpu counter. The writer uses synchronize_sched_expedited() to |
33 | * serialize with the preempt-disabled section below. | 33 | * serialize with the preempt-disabled section below. |
34 | * | 34 | * |
@@ -44,7 +44,7 @@ void percpu_free_rwsem(struct percpu_rw_semaphore *brw) | |||
44 | * If this helper fails the callers rely on the normal rw_semaphore and | 44 | * If this helper fails the callers rely on the normal rw_semaphore and |
45 | * atomic_dec_and_test(), so in this case we have the necessary barriers. | 45 | * atomic_dec_and_test(), so in this case we have the necessary barriers. |
46 | * | 46 | * |
47 | * But if it succeeds we do not have any barriers, mutex_is_locked() or | 47 | * But if it succeeds we do not have any barriers, atomic_read(write_ctr) or |
48 | * __this_cpu_add() below can be reordered with any LOAD/STORE done by the | 48 | * __this_cpu_add() below can be reordered with any LOAD/STORE done by the |
49 | * reader inside the critical section. See the comments in down_write and | 49 | * reader inside the critical section. See the comments in down_write and |
50 | * up_write below. | 50 | * up_write below. |
@@ -54,7 +54,7 @@ static bool update_fast_ctr(struct percpu_rw_semaphore *brw, unsigned int val) | |||
54 | bool success = false; | 54 | bool success = false; |
55 | 55 | ||
56 | preempt_disable(); | 56 | preempt_disable(); |
57 | if (likely(!mutex_is_locked(&brw->writer_mutex))) { | 57 | if (likely(!atomic_read(&brw->write_ctr))) { |
58 | __this_cpu_add(*brw->fast_read_ctr, val); | 58 | __this_cpu_add(*brw->fast_read_ctr, val); |
59 | success = true; | 59 | success = true; |
60 | } | 60 | } |
@@ -101,9 +101,8 @@ static int clear_fast_ctr(struct percpu_rw_semaphore *brw) | |||
101 | } | 101 | } |
102 | 102 | ||
103 | /* | 103 | /* |
104 | * A writer takes ->writer_mutex to exclude other writers and to force the | 104 | * A writer increments ->write_ctr to force the readers to switch to the |
105 | * readers to switch to the slow mode, note the mutex_is_locked() check in | 105 | * slow mode, note the atomic_read() check in update_fast_ctr(). |
106 | * update_fast_ctr(). | ||
107 | * | 106 | * |
108 | * After that the readers can only inc/dec the slow ->slow_read_ctr counter, | 107 | * After that the readers can only inc/dec the slow ->slow_read_ctr counter, |
109 | * ->fast_read_ctr is stable. Once the writer moves its sum into the slow | 108 | * ->fast_read_ctr is stable. Once the writer moves its sum into the slow |
@@ -114,11 +113,10 @@ static int clear_fast_ctr(struct percpu_rw_semaphore *brw) | |||
114 | */ | 113 | */ |
115 | void percpu_down_write(struct percpu_rw_semaphore *brw) | 114 | void percpu_down_write(struct percpu_rw_semaphore *brw) |
116 | { | 115 | { |
117 | /* also blocks update_fast_ctr() which checks mutex_is_locked() */ | 116 | /* tell update_fast_ctr() there is a pending writer */ |
118 | mutex_lock(&brw->writer_mutex); | 117 | atomic_inc(&brw->write_ctr); |
119 | |||
120 | /* | 118 | /* |
121 | * 1. Ensures mutex_is_locked() is visible to any down_read/up_read | 119 | * 1. Ensures that write_ctr != 0 is visible to any down_read/up_read |
122 | * so that update_fast_ctr() can't succeed. | 120 | * so that update_fast_ctr() can't succeed. |
123 | * | 121 | * |
124 | * 2. Ensures we see the result of every previous this_cpu_add() in | 122 | * 2. Ensures we see the result of every previous this_cpu_add() in |
@@ -130,25 +128,25 @@ void percpu_down_write(struct percpu_rw_semaphore *brw) | |||
130 | */ | 128 | */ |
131 | synchronize_sched_expedited(); | 129 | synchronize_sched_expedited(); |
132 | 130 | ||
131 | /* exclude other writers, and block the new readers completely */ | ||
132 | down_write(&brw->rw_sem); | ||
133 | |||
133 | /* nobody can use fast_read_ctr, move its sum into slow_read_ctr */ | 134 | /* nobody can use fast_read_ctr, move its sum into slow_read_ctr */ |
134 | atomic_add(clear_fast_ctr(brw), &brw->slow_read_ctr); | 135 | atomic_add(clear_fast_ctr(brw), &brw->slow_read_ctr); |
135 | 136 | ||
136 | /* block the new readers completely */ | ||
137 | down_write(&brw->rw_sem); | ||
138 | |||
139 | /* wait for all readers to complete their percpu_up_read() */ | 137 | /* wait for all readers to complete their percpu_up_read() */ |
140 | wait_event(brw->write_waitq, !atomic_read(&brw->slow_read_ctr)); | 138 | wait_event(brw->write_waitq, !atomic_read(&brw->slow_read_ctr)); |
141 | } | 139 | } |
142 | 140 | ||
143 | void percpu_up_write(struct percpu_rw_semaphore *brw) | 141 | void percpu_up_write(struct percpu_rw_semaphore *brw) |
144 | { | 142 | { |
145 | /* allow the new readers, but only the slow-path */ | 143 | /* release the lock, but the readers can't use the fast-path */ |
146 | up_write(&brw->rw_sem); | 144 | up_write(&brw->rw_sem); |
147 | |||
148 | /* | 145 | /* |
149 | * Insert the barrier before the next fast-path in down_read, | 146 | * Insert the barrier before the next fast-path in down_read, |
150 | * see W_R case in the comment above update_fast_ctr(). | 147 | * see W_R case in the comment above update_fast_ctr(). |
151 | */ | 148 | */ |
152 | synchronize_sched_expedited(); | 149 | synchronize_sched_expedited(); |
153 | mutex_unlock(&brw->writer_mutex); | 150 | /* the last writer unblocks update_fast_ctr() */ |
151 | atomic_dec(&brw->write_ctr); | ||
154 | } | 152 | } |