diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Makefile | 2 | ||||
-rw-r--r-- | lib/percpu-rwsem.c | 154 |
2 files changed, 155 insertions, 1 deletions
diff --git a/lib/Makefile b/lib/Makefile index e2152fa7ff4d..e959c20efb24 100644 --- a/lib/Makefile +++ b/lib/Makefile | |||
@@ -9,7 +9,7 @@ endif | |||
9 | 9 | ||
10 | lib-y := ctype.o string.o vsprintf.o cmdline.o \ | 10 | lib-y := ctype.o string.o vsprintf.o cmdline.o \ |
11 | rbtree.o radix-tree.o dump_stack.o timerqueue.o\ | 11 | rbtree.o radix-tree.o dump_stack.o timerqueue.o\ |
12 | idr.o int_sqrt.o extable.o \ | 12 | idr.o int_sqrt.o extable.o percpu-rwsem.o \ |
13 | sha1.o md5.o irq_regs.o reciprocal_div.o argv_split.o \ | 13 | sha1.o md5.o irq_regs.o reciprocal_div.o argv_split.o \ |
14 | proportions.o flex_proportions.o prio_heap.o ratelimit.o show_mem.o \ | 14 | proportions.o flex_proportions.o prio_heap.o ratelimit.o show_mem.o \ |
15 | is_single_threaded.o plist.o decompress.o kobject_uevent.o \ | 15 | is_single_threaded.o plist.o decompress.o kobject_uevent.o \ |
diff --git a/lib/percpu-rwsem.c b/lib/percpu-rwsem.c new file mode 100644 index 000000000000..2e03bcfe48f9 --- /dev/null +++ b/lib/percpu-rwsem.c | |||
@@ -0,0 +1,154 @@ | |||
1 | #include <linux/mutex.h> | ||
2 | #include <linux/rwsem.h> | ||
3 | #include <linux/percpu.h> | ||
4 | #include <linux/wait.h> | ||
5 | #include <linux/percpu-rwsem.h> | ||
6 | #include <linux/rcupdate.h> | ||
7 | #include <linux/sched.h> | ||
8 | #include <linux/errno.h> | ||
9 | |||
10 | int percpu_init_rwsem(struct percpu_rw_semaphore *brw) | ||
11 | { | ||
12 | brw->fast_read_ctr = alloc_percpu(int); | ||
13 | if (unlikely(!brw->fast_read_ctr)) | ||
14 | return -ENOMEM; | ||
15 | |||
16 | mutex_init(&brw->writer_mutex); | ||
17 | init_rwsem(&brw->rw_sem); | ||
18 | atomic_set(&brw->slow_read_ctr, 0); | ||
19 | init_waitqueue_head(&brw->write_waitq); | ||
20 | return 0; | ||
21 | } | ||
22 | |||
23 | void percpu_free_rwsem(struct percpu_rw_semaphore *brw) | ||
24 | { | ||
25 | free_percpu(brw->fast_read_ctr); | ||
26 | brw->fast_read_ctr = NULL; /* catch use after free bugs */ | ||
27 | } | ||
28 | |||
29 | /* | ||
30 | * This is the fast-path for down_read/up_read, it only needs to ensure | ||
31 | * there is no pending writer (!mutex_is_locked() check) and inc/dec the | ||
32 | * fast per-cpu counter. The writer uses synchronize_sched_expedited() to | ||
33 | * serialize with the preempt-disabled section below. | ||
34 | * | ||
35 | * The nontrivial part is that we should guarantee acquire/release semantics | ||
36 | * in case when | ||
37 | * | ||
38 | * R_W: down_write() comes after up_read(), the writer should see all | ||
39 | * changes done by the reader | ||
40 | * or | ||
41 | * W_R: down_read() comes after up_write(), the reader should see all | ||
42 | * changes done by the writer | ||
43 | * | ||
44 | * If this helper fails the callers rely on the normal rw_semaphore and | ||
45 | * atomic_dec_and_test(), so in this case we have the necessary barriers. | ||
46 | * | ||
47 | * But if it succeeds we do not have any barriers, mutex_is_locked() or | ||
48 | * __this_cpu_add() below can be reordered with any LOAD/STORE done by the | ||
49 | * reader inside the critical section. See the comments in down_write and | ||
50 | * up_write below. | ||
51 | */ | ||
52 | static bool update_fast_ctr(struct percpu_rw_semaphore *brw, unsigned int val) | ||
53 | { | ||
54 | bool success = false; | ||
55 | |||
56 | preempt_disable(); | ||
57 | if (likely(!mutex_is_locked(&brw->writer_mutex))) { | ||
58 | __this_cpu_add(*brw->fast_read_ctr, val); | ||
59 | success = true; | ||
60 | } | ||
61 | preempt_enable(); | ||
62 | |||
63 | return success; | ||
64 | } | ||
65 | |||
66 | /* | ||
67 | * Like the normal down_read() this is not recursive, the writer can | ||
68 | * come after the first percpu_down_read() and create the deadlock. | ||
69 | */ | ||
70 | void percpu_down_read(struct percpu_rw_semaphore *brw) | ||
71 | { | ||
72 | if (likely(update_fast_ctr(brw, +1))) | ||
73 | return; | ||
74 | |||
75 | down_read(&brw->rw_sem); | ||
76 | atomic_inc(&brw->slow_read_ctr); | ||
77 | up_read(&brw->rw_sem); | ||
78 | } | ||
79 | |||
80 | void percpu_up_read(struct percpu_rw_semaphore *brw) | ||
81 | { | ||
82 | if (likely(update_fast_ctr(brw, -1))) | ||
83 | return; | ||
84 | |||
85 | /* false-positive is possible but harmless */ | ||
86 | if (atomic_dec_and_test(&brw->slow_read_ctr)) | ||
87 | wake_up_all(&brw->write_waitq); | ||
88 | } | ||
89 | |||
90 | static int clear_fast_ctr(struct percpu_rw_semaphore *brw) | ||
91 | { | ||
92 | unsigned int sum = 0; | ||
93 | int cpu; | ||
94 | |||
95 | for_each_possible_cpu(cpu) { | ||
96 | sum += per_cpu(*brw->fast_read_ctr, cpu); | ||
97 | per_cpu(*brw->fast_read_ctr, cpu) = 0; | ||
98 | } | ||
99 | |||
100 | return sum; | ||
101 | } | ||
102 | |||
103 | /* | ||
104 | * A writer takes ->writer_mutex to exclude other writers and to force the | ||
105 | * readers to switch to the slow mode, note the mutex_is_locked() check in | ||
106 | * update_fast_ctr(). | ||
107 | * | ||
108 | * After that the readers can only inc/dec the slow ->slow_read_ctr counter, | ||
109 | * ->fast_read_ctr is stable. Once the writer moves its sum into the slow | ||
110 | * counter it represents the number of active readers. | ||
111 | * | ||
112 | * Finally the writer takes ->rw_sem for writing and blocks the new readers, | ||
113 | * then waits until the slow counter becomes zero. | ||
114 | */ | ||
115 | void percpu_down_write(struct percpu_rw_semaphore *brw) | ||
116 | { | ||
117 | /* also blocks update_fast_ctr() which checks mutex_is_locked() */ | ||
118 | mutex_lock(&brw->writer_mutex); | ||
119 | |||
120 | /* | ||
121 | * 1. Ensures mutex_is_locked() is visible to any down_read/up_read | ||
122 | * so that update_fast_ctr() can't succeed. | ||
123 | * | ||
124 | * 2. Ensures we see the result of every previous this_cpu_add() in | ||
125 | * update_fast_ctr(). | ||
126 | * | ||
127 | * 3. Ensures that if any reader has exited its critical section via | ||
128 | * fast-path, it executes a full memory barrier before we return. | ||
129 | * See R_W case in the comment above update_fast_ctr(). | ||
130 | */ | ||
131 | synchronize_sched_expedited(); | ||
132 | |||
133 | /* nobody can use fast_read_ctr, move its sum into slow_read_ctr */ | ||
134 | atomic_add(clear_fast_ctr(brw), &brw->slow_read_ctr); | ||
135 | |||
136 | /* block the new readers completely */ | ||
137 | down_write(&brw->rw_sem); | ||
138 | |||
139 | /* wait for all readers to complete their percpu_up_read() */ | ||
140 | wait_event(brw->write_waitq, !atomic_read(&brw->slow_read_ctr)); | ||
141 | } | ||
142 | |||
143 | void percpu_up_write(struct percpu_rw_semaphore *brw) | ||
144 | { | ||
145 | /* allow the new readers, but only the slow-path */ | ||
146 | up_write(&brw->rw_sem); | ||
147 | |||
148 | /* | ||
149 | * Insert the barrier before the next fast-path in down_read, | ||
150 | * see W_R case in the comment above update_fast_ctr(). | ||
151 | */ | ||
152 | synchronize_sched_expedited(); | ||
153 | mutex_unlock(&brw->writer_mutex); | ||
154 | } | ||