aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rw-r--r--lib/Kconfig28
-rw-r--r--lib/Kconfig.debug20
-rw-r--r--lib/Makefile11
-rw-r--r--lib/assoc_array.c1746
-rw-r--r--lib/crc32.c456
-rw-r--r--lib/debugobjects.c2
-rw-r--r--lib/digsig.c2
-rw-r--r--lib/genalloc.c28
-rw-r--r--lib/kfifo.c4
-rw-r--r--lib/kobject.c90
-rw-r--r--lib/llist.c22
-rw-r--r--lib/locking-selftest.c2
-rw-r--r--lib/lockref.c3
-rw-r--r--lib/mpi/mpiutil.c3
-rw-r--r--lib/percpu-rwsem.c165
-rw-r--r--lib/percpu_counter.c15
-rw-r--r--lib/percpu_ida.c94
-rw-r--r--lib/percpu_test.c138
-rw-r--r--lib/random32.c311
-rw-r--r--lib/rwsem-spinlock.c296
-rw-r--r--lib/rwsem.c293
-rw-r--r--lib/scatterlist.c3
-rw-r--r--lib/show_mem.c39
-rw-r--r--lib/smp_processor_id.c3
-rw-r--r--lib/spinlock_debug.c302
-rw-r--r--lib/swiotlb.c6
-rw-r--r--lib/vsprintf.c55
27 files changed, 2752 insertions, 1385 deletions
diff --git a/lib/Kconfig b/lib/Kconfig
index b3c8be0da17f..991c98bc4a3f 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -51,13 +51,6 @@ config PERCPU_RWSEM
51config ARCH_USE_CMPXCHG_LOCKREF 51config ARCH_USE_CMPXCHG_LOCKREF
52 bool 52 bool
53 53
54config CMPXCHG_LOCKREF
55 def_bool y if ARCH_USE_CMPXCHG_LOCKREF
56 depends on SMP
57 depends on !GENERIC_LOCKBREAK
58 depends on !DEBUG_SPINLOCK
59 depends on !DEBUG_LOCK_ALLOC
60
61config CRC_CCITT 54config CRC_CCITT
62 tristate "CRC-CCITT functions" 55 tristate "CRC-CCITT functions"
63 help 56 help
@@ -189,6 +182,13 @@ config AUDIT_GENERIC
189 depends on AUDIT && !AUDIT_ARCH 182 depends on AUDIT && !AUDIT_ARCH
190 default y 183 default y
191 184
185config RANDOM32_SELFTEST
186 bool "PRNG perform self test on init"
187 default n
188 help
189 This option enables the 32 bit PRNG library functions to perform a
190 self test on initialization.
191
192# 192#
193# compression support is select'ed if needed 193# compression support is select'ed if needed
194# 194#
@@ -322,6 +322,20 @@ config TEXTSEARCH_FSM
322config BTREE 322config BTREE
323 boolean 323 boolean
324 324
325config ASSOCIATIVE_ARRAY
326 bool
327 help
328 Generic associative array. Can be searched and iterated over whilst
329 it is being modified. It is also reasonably quick to search and
330 modify. The algorithms are non-recursive, and the trees are highly
331 capacious.
332
333 See:
334
335 Documentation/assoc_array.txt
336
337 for more information.
338
325config HAS_IOMEM 339config HAS_IOMEM
326 boolean 340 boolean
327 depends on !NO_IOMEM 341 depends on !NO_IOMEM
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 06344d986eb9..db25707aa41b 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -312,6 +312,15 @@ config MAGIC_SYSRQ
312 keys are documented in <file:Documentation/sysrq.txt>. Don't say Y 312 keys are documented in <file:Documentation/sysrq.txt>. Don't say Y
313 unless you really know what this hack does. 313 unless you really know what this hack does.
314 314
315config MAGIC_SYSRQ_DEFAULT_ENABLE
316 hex "Enable magic SysRq key functions by default"
317 depends on MAGIC_SYSRQ
318 default 0x1
319 help
320 Specifies which SysRq key functions are enabled by default.
321 This may be set to 1 or 0 to enable or disable them all, or
322 to a bitmask as described in Documentation/sysrq.txt.
323
315config DEBUG_KERNEL 324config DEBUG_KERNEL
316 bool "Kernel debugging" 325 bool "Kernel debugging"
317 help 326 help
@@ -983,7 +992,7 @@ config DEBUG_KOBJECT
983 992
984config DEBUG_KOBJECT_RELEASE 993config DEBUG_KOBJECT_RELEASE
985 bool "kobject release debugging" 994 bool "kobject release debugging"
986 depends on DEBUG_KERNEL 995 depends on DEBUG_OBJECTS_TIMERS
987 help 996 help
988 kobjects are reference counted objects. This means that their 997 kobjects are reference counted objects. This means that their
989 last reference count put is not predictable, and the kobject can 998 last reference count put is not predictable, and the kobject can
@@ -1472,6 +1481,15 @@ config INTERVAL_TREE_TEST
1472 help 1481 help
1473 A benchmark measuring the performance of the interval tree library 1482 A benchmark measuring the performance of the interval tree library
1474 1483
1484config PERCPU_TEST
1485 tristate "Per cpu operations test"
1486 depends on m && DEBUG_KERNEL
1487 help
1488 Enable this option to build test module which validates per-cpu
1489 operations.
1490
1491 If unsure, say N.
1492
1475config ATOMIC64_SELFTEST 1493config ATOMIC64_SELFTEST
1476 bool "Perform an atomic64_t self-test at boot" 1494 bool "Perform an atomic64_t self-test at boot"
1477 help 1495 help
diff --git a/lib/Makefile b/lib/Makefile
index f3bb2cb98adf..a459c31e8c6b 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -13,7 +13,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
13 sha1.o md5.o irq_regs.o reciprocal_div.o argv_split.o \ 13 sha1.o md5.o irq_regs.o reciprocal_div.o argv_split.o \
14 proportions.o flex_proportions.o prio_heap.o ratelimit.o show_mem.o \ 14 proportions.o flex_proportions.o prio_heap.o ratelimit.o show_mem.o \
15 is_single_threaded.o plist.o decompress.o kobject_uevent.o \ 15 is_single_threaded.o plist.o decompress.o kobject_uevent.o \
16 earlycpio.o percpu-refcount.o percpu_ida.o 16 earlycpio.o
17 17
18obj-$(CONFIG_ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS) += usercopy.o 18obj-$(CONFIG_ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS) += usercopy.o
19lib-$(CONFIG_MMU) += ioremap.o 19lib-$(CONFIG_MMU) += ioremap.o
@@ -26,7 +26,7 @@ obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
26 bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \ 26 bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
27 gcd.o lcm.o list_sort.o uuid.o flex_array.o iovec.o clz_ctz.o \ 27 gcd.o lcm.o list_sort.o uuid.o flex_array.o iovec.o clz_ctz.o \
28 bsearch.o find_last_bit.o find_next_bit.o llist.o memweight.o kfifo.o \ 28 bsearch.o find_last_bit.o find_next_bit.o llist.o memweight.o kfifo.o \
29 percpu_ida.o 29 percpu-refcount.o percpu_ida.o
30obj-y += string_helpers.o 30obj-y += string_helpers.o
31obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o 31obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o
32obj-y += kstrtox.o 32obj-y += kstrtox.o
@@ -42,15 +42,12 @@ obj-$(CONFIG_GENERIC_PCI_IOMAP) += pci_iomap.o
42obj-$(CONFIG_HAS_IOMEM) += iomap_copy.o devres.o 42obj-$(CONFIG_HAS_IOMEM) += iomap_copy.o devres.o
43obj-$(CONFIG_CHECK_SIGNATURE) += check_signature.o 43obj-$(CONFIG_CHECK_SIGNATURE) += check_signature.o
44obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o 44obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o
45obj-$(CONFIG_DEBUG_SPINLOCK) += spinlock_debug.o
46lib-$(CONFIG_RWSEM_GENERIC_SPINLOCK) += rwsem-spinlock.o
47lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
48lib-$(CONFIG_PERCPU_RWSEM) += percpu-rwsem.o
49 45
50CFLAGS_hweight.o = $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS)) 46CFLAGS_hweight.o = $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS))
51obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o 47obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o
52 48
53obj-$(CONFIG_BTREE) += btree.o 49obj-$(CONFIG_BTREE) += btree.o
50obj-$(CONFIG_ASSOCIATIVE_ARRAY) += assoc_array.o
54obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o 51obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o
55obj-$(CONFIG_DEBUG_LIST) += list_debug.o 52obj-$(CONFIG_DEBUG_LIST) += list_debug.o
56obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o 53obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o
@@ -157,6 +154,8 @@ obj-$(CONFIG_INTERVAL_TREE_TEST) += interval_tree_test.o
157 154
158interval_tree_test-objs := interval_tree_test_main.o interval_tree.o 155interval_tree_test-objs := interval_tree_test_main.o interval_tree.o
159 156
157obj-$(CONFIG_PERCPU_TEST) += percpu_test.o
158
160obj-$(CONFIG_ASN1) += asn1_decoder.o 159obj-$(CONFIG_ASN1) += asn1_decoder.o
161 160
162obj-$(CONFIG_FONT_SUPPORT) += fonts/ 161obj-$(CONFIG_FONT_SUPPORT) += fonts/
diff --git a/lib/assoc_array.c b/lib/assoc_array.c
new file mode 100644
index 000000000000..17edeaf19180
--- /dev/null
+++ b/lib/assoc_array.c
@@ -0,0 +1,1746 @@
1/* Generic associative array implementation.
2 *
3 * See Documentation/assoc_array.txt for information.
4 *
5 * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved.
6 * Written by David Howells (dhowells@redhat.com)
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public Licence
10 * as published by the Free Software Foundation; either version
11 * 2 of the Licence, or (at your option) any later version.
12 */
13//#define DEBUG
14#include <linux/slab.h>
15#include <linux/err.h>
16#include <linux/assoc_array_priv.h>
17
18/*
19 * Iterate over an associative array. The caller must hold the RCU read lock
20 * or better.
21 */
22static int assoc_array_subtree_iterate(const struct assoc_array_ptr *root,
23 const struct assoc_array_ptr *stop,
24 int (*iterator)(const void *leaf,
25 void *iterator_data),
26 void *iterator_data)
27{
28 const struct assoc_array_shortcut *shortcut;
29 const struct assoc_array_node *node;
30 const struct assoc_array_ptr *cursor, *ptr, *parent;
31 unsigned long has_meta;
32 int slot, ret;
33
34 cursor = root;
35
36begin_node:
37 if (assoc_array_ptr_is_shortcut(cursor)) {
38 /* Descend through a shortcut */
39 shortcut = assoc_array_ptr_to_shortcut(cursor);
40 smp_read_barrier_depends();
41 cursor = ACCESS_ONCE(shortcut->next_node);
42 }
43
44 node = assoc_array_ptr_to_node(cursor);
45 smp_read_barrier_depends();
46 slot = 0;
47
48 /* We perform two passes of each node.
49 *
50 * The first pass does all the leaves in this node. This means we
51 * don't miss any leaves if the node is split up by insertion whilst
52 * we're iterating over the branches rooted here (we may, however, see
53 * some leaves twice).
54 */
55 has_meta = 0;
56 for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
57 ptr = ACCESS_ONCE(node->slots[slot]);
58 has_meta |= (unsigned long)ptr;
59 if (ptr && assoc_array_ptr_is_leaf(ptr)) {
60 /* We need a barrier between the read of the pointer
61 * and dereferencing the pointer - but only if we are
62 * actually going to dereference it.
63 */
64 smp_read_barrier_depends();
65
66 /* Invoke the callback */
67 ret = iterator(assoc_array_ptr_to_leaf(ptr),
68 iterator_data);
69 if (ret)
70 return ret;
71 }
72 }
73
74 /* The second pass attends to all the metadata pointers. If we follow
75 * one of these we may find that we don't come back here, but rather go
76 * back to a replacement node with the leaves in a different layout.
77 *
78 * We are guaranteed to make progress, however, as the slot number for
79 * a particular portion of the key space cannot change - and we
80 * continue at the back pointer + 1.
81 */
82 if (!(has_meta & ASSOC_ARRAY_PTR_META_TYPE))
83 goto finished_node;
84 slot = 0;
85
86continue_node:
87 node = assoc_array_ptr_to_node(cursor);
88 smp_read_barrier_depends();
89
90 for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
91 ptr = ACCESS_ONCE(node->slots[slot]);
92 if (assoc_array_ptr_is_meta(ptr)) {
93 cursor = ptr;
94 goto begin_node;
95 }
96 }
97
98finished_node:
99 /* Move up to the parent (may need to skip back over a shortcut) */
100 parent = ACCESS_ONCE(node->back_pointer);
101 slot = node->parent_slot;
102 if (parent == stop)
103 return 0;
104
105 if (assoc_array_ptr_is_shortcut(parent)) {
106 shortcut = assoc_array_ptr_to_shortcut(parent);
107 smp_read_barrier_depends();
108 cursor = parent;
109 parent = ACCESS_ONCE(shortcut->back_pointer);
110 slot = shortcut->parent_slot;
111 if (parent == stop)
112 return 0;
113 }
114
115 /* Ascend to next slot in parent node */
116 cursor = parent;
117 slot++;
118 goto continue_node;
119}
120
121/**
122 * assoc_array_iterate - Pass all objects in the array to a callback
123 * @array: The array to iterate over.
124 * @iterator: The callback function.
125 * @iterator_data: Private data for the callback function.
126 *
127 * Iterate over all the objects in an associative array. Each one will be
128 * presented to the iterator function.
129 *
130 * If the array is being modified concurrently with the iteration then it is
131 * possible that some objects in the array will be passed to the iterator
132 * callback more than once - though every object should be passed at least
133 * once. If this is undesirable then the caller must lock against modification
134 * for the duration of this function.
135 *
136 * The function will return 0 if no objects were in the array or else it will
137 * return the result of the last iterator function called. Iteration stops
138 * immediately if any call to the iteration function results in a non-zero
139 * return.
140 *
141 * The caller should hold the RCU read lock or better if concurrent
142 * modification is possible.
143 */
144int assoc_array_iterate(const struct assoc_array *array,
145 int (*iterator)(const void *object,
146 void *iterator_data),
147 void *iterator_data)
148{
149 struct assoc_array_ptr *root = ACCESS_ONCE(array->root);
150
151 if (!root)
152 return 0;
153 return assoc_array_subtree_iterate(root, NULL, iterator, iterator_data);
154}
155
156enum assoc_array_walk_status {
157 assoc_array_walk_tree_empty,
158 assoc_array_walk_found_terminal_node,
159 assoc_array_walk_found_wrong_shortcut,
160} status;
161
162struct assoc_array_walk_result {
163 struct {
164 struct assoc_array_node *node; /* Node in which leaf might be found */
165 int level;
166 int slot;
167 } terminal_node;
168 struct {
169 struct assoc_array_shortcut *shortcut;
170 int level;
171 int sc_level;
172 unsigned long sc_segments;
173 unsigned long dissimilarity;
174 } wrong_shortcut;
175};
176
177/*
178 * Navigate through the internal tree looking for the closest node to the key.
179 */
180static enum assoc_array_walk_status
181assoc_array_walk(const struct assoc_array *array,
182 const struct assoc_array_ops *ops,
183 const void *index_key,
184 struct assoc_array_walk_result *result)
185{
186 struct assoc_array_shortcut *shortcut;
187 struct assoc_array_node *node;
188 struct assoc_array_ptr *cursor, *ptr;
189 unsigned long sc_segments, dissimilarity;
190 unsigned long segments;
191 int level, sc_level, next_sc_level;
192 int slot;
193
194 pr_devel("-->%s()\n", __func__);
195
196 cursor = ACCESS_ONCE(array->root);
197 if (!cursor)
198 return assoc_array_walk_tree_empty;
199
200 level = 0;
201
202 /* Use segments from the key for the new leaf to navigate through the
203 * internal tree, skipping through nodes and shortcuts that are on
204 * route to the destination. Eventually we'll come to a slot that is
205 * either empty or contains a leaf at which point we've found a node in
206 * which the leaf we're looking for might be found or into which it
207 * should be inserted.
208 */
209jumped:
210 segments = ops->get_key_chunk(index_key, level);
211 pr_devel("segments[%d]: %lx\n", level, segments);
212
213 if (assoc_array_ptr_is_shortcut(cursor))
214 goto follow_shortcut;
215
216consider_node:
217 node = assoc_array_ptr_to_node(cursor);
218 smp_read_barrier_depends();
219
220 slot = segments >> (level & ASSOC_ARRAY_KEY_CHUNK_MASK);
221 slot &= ASSOC_ARRAY_FAN_MASK;
222 ptr = ACCESS_ONCE(node->slots[slot]);
223
224 pr_devel("consider slot %x [ix=%d type=%lu]\n",
225 slot, level, (unsigned long)ptr & 3);
226
227 if (!assoc_array_ptr_is_meta(ptr)) {
228 /* The node doesn't have a node/shortcut pointer in the slot
229 * corresponding to the index key that we have to follow.
230 */
231 result->terminal_node.node = node;
232 result->terminal_node.level = level;
233 result->terminal_node.slot = slot;
234 pr_devel("<--%s() = terminal_node\n", __func__);
235 return assoc_array_walk_found_terminal_node;
236 }
237
238 if (assoc_array_ptr_is_node(ptr)) {
239 /* There is a pointer to a node in the slot corresponding to
240 * this index key segment, so we need to follow it.
241 */
242 cursor = ptr;
243 level += ASSOC_ARRAY_LEVEL_STEP;
244 if ((level & ASSOC_ARRAY_KEY_CHUNK_MASK) != 0)
245 goto consider_node;
246 goto jumped;
247 }
248
249 /* There is a shortcut in the slot corresponding to the index key
250 * segment. We follow the shortcut if its partial index key matches
251 * this leaf's. Otherwise we need to split the shortcut.
252 */
253 cursor = ptr;
254follow_shortcut:
255 shortcut = assoc_array_ptr_to_shortcut(cursor);
256 smp_read_barrier_depends();
257 pr_devel("shortcut to %d\n", shortcut->skip_to_level);
258 sc_level = level + ASSOC_ARRAY_LEVEL_STEP;
259 BUG_ON(sc_level > shortcut->skip_to_level);
260
261 do {
262 /* Check the leaf against the shortcut's index key a word at a
263 * time, trimming the final word (the shortcut stores the index
264 * key completely from the root to the shortcut's target).
265 */
266 if ((sc_level & ASSOC_ARRAY_KEY_CHUNK_MASK) == 0)
267 segments = ops->get_key_chunk(index_key, sc_level);
268
269 sc_segments = shortcut->index_key[sc_level >> ASSOC_ARRAY_KEY_CHUNK_SHIFT];
270 dissimilarity = segments ^ sc_segments;
271
272 if (round_up(sc_level, ASSOC_ARRAY_KEY_CHUNK_SIZE) > shortcut->skip_to_level) {
273 /* Trim segments that are beyond the shortcut */
274 int shift = shortcut->skip_to_level & ASSOC_ARRAY_KEY_CHUNK_MASK;
275 dissimilarity &= ~(ULONG_MAX << shift);
276 next_sc_level = shortcut->skip_to_level;
277 } else {
278 next_sc_level = sc_level + ASSOC_ARRAY_KEY_CHUNK_SIZE;
279 next_sc_level = round_down(next_sc_level, ASSOC_ARRAY_KEY_CHUNK_SIZE);
280 }
281
282 if (dissimilarity != 0) {
283 /* This shortcut points elsewhere */
284 result->wrong_shortcut.shortcut = shortcut;
285 result->wrong_shortcut.level = level;
286 result->wrong_shortcut.sc_level = sc_level;
287 result->wrong_shortcut.sc_segments = sc_segments;
288 result->wrong_shortcut.dissimilarity = dissimilarity;
289 return assoc_array_walk_found_wrong_shortcut;
290 }
291
292 sc_level = next_sc_level;
293 } while (sc_level < shortcut->skip_to_level);
294
295 /* The shortcut matches the leaf's index to this point. */
296 cursor = ACCESS_ONCE(shortcut->next_node);
297 if (((level ^ sc_level) & ~ASSOC_ARRAY_KEY_CHUNK_MASK) != 0) {
298 level = sc_level;
299 goto jumped;
300 } else {
301 level = sc_level;
302 goto consider_node;
303 }
304}
305
306/**
307 * assoc_array_find - Find an object by index key
308 * @array: The associative array to search.
309 * @ops: The operations to use.
310 * @index_key: The key to the object.
311 *
312 * Find an object in an associative array by walking through the internal tree
313 * to the node that should contain the object and then searching the leaves
314 * there. NULL is returned if the requested object was not found in the array.
315 *
316 * The caller must hold the RCU read lock or better.
317 */
318void *assoc_array_find(const struct assoc_array *array,
319 const struct assoc_array_ops *ops,
320 const void *index_key)
321{
322 struct assoc_array_walk_result result;
323 const struct assoc_array_node *node;
324 const struct assoc_array_ptr *ptr;
325 const void *leaf;
326 int slot;
327
328 if (assoc_array_walk(array, ops, index_key, &result) !=
329 assoc_array_walk_found_terminal_node)
330 return NULL;
331
332 node = result.terminal_node.node;
333 smp_read_barrier_depends();
334
335 /* If the target key is available to us, it's has to be pointed to by
336 * the terminal node.
337 */
338 for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
339 ptr = ACCESS_ONCE(node->slots[slot]);
340 if (ptr && assoc_array_ptr_is_leaf(ptr)) {
341 /* We need a barrier between the read of the pointer
342 * and dereferencing the pointer - but only if we are
343 * actually going to dereference it.
344 */
345 leaf = assoc_array_ptr_to_leaf(ptr);
346 smp_read_barrier_depends();
347 if (ops->compare_object(leaf, index_key))
348 return (void *)leaf;
349 }
350 }
351
352 return NULL;
353}
354
355/*
356 * Destructively iterate over an associative array. The caller must prevent
357 * other simultaneous accesses.
358 */
359static void assoc_array_destroy_subtree(struct assoc_array_ptr *root,
360 const struct assoc_array_ops *ops)
361{
362 struct assoc_array_shortcut *shortcut;
363 struct assoc_array_node *node;
364 struct assoc_array_ptr *cursor, *parent = NULL;
365 int slot = -1;
366
367 pr_devel("-->%s()\n", __func__);
368
369 cursor = root;
370 if (!cursor) {
371 pr_devel("empty\n");
372 return;
373 }
374
375move_to_meta:
376 if (assoc_array_ptr_is_shortcut(cursor)) {
377 /* Descend through a shortcut */
378 pr_devel("[%d] shortcut\n", slot);
379 BUG_ON(!assoc_array_ptr_is_shortcut(cursor));
380 shortcut = assoc_array_ptr_to_shortcut(cursor);
381 BUG_ON(shortcut->back_pointer != parent);
382 BUG_ON(slot != -1 && shortcut->parent_slot != slot);
383 parent = cursor;
384 cursor = shortcut->next_node;
385 slot = -1;
386 BUG_ON(!assoc_array_ptr_is_node(cursor));
387 }
388
389 pr_devel("[%d] node\n", slot);
390 node = assoc_array_ptr_to_node(cursor);
391 BUG_ON(node->back_pointer != parent);
392 BUG_ON(slot != -1 && node->parent_slot != slot);
393 slot = 0;
394
395continue_node:
396 pr_devel("Node %p [back=%p]\n", node, node->back_pointer);
397 for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
398 struct assoc_array_ptr *ptr = node->slots[slot];
399 if (!ptr)
400 continue;
401 if (assoc_array_ptr_is_meta(ptr)) {
402 parent = cursor;
403 cursor = ptr;
404 goto move_to_meta;
405 }
406
407 if (ops) {
408 pr_devel("[%d] free leaf\n", slot);
409 ops->free_object(assoc_array_ptr_to_leaf(ptr));
410 }
411 }
412
413 parent = node->back_pointer;
414 slot = node->parent_slot;
415 pr_devel("free node\n");
416 kfree(node);
417 if (!parent)
418 return; /* Done */
419
420 /* Move back up to the parent (may need to free a shortcut on
421 * the way up) */
422 if (assoc_array_ptr_is_shortcut(parent)) {
423 shortcut = assoc_array_ptr_to_shortcut(parent);
424 BUG_ON(shortcut->next_node != cursor);
425 cursor = parent;
426 parent = shortcut->back_pointer;
427 slot = shortcut->parent_slot;
428 pr_devel("free shortcut\n");
429 kfree(shortcut);
430 if (!parent)
431 return;
432
433 BUG_ON(!assoc_array_ptr_is_node(parent));
434 }
435
436 /* Ascend to next slot in parent node */
437 pr_devel("ascend to %p[%d]\n", parent, slot);
438 cursor = parent;
439 node = assoc_array_ptr_to_node(cursor);
440 slot++;
441 goto continue_node;
442}
443
444/**
445 * assoc_array_destroy - Destroy an associative array
446 * @array: The array to destroy.
447 * @ops: The operations to use.
448 *
449 * Discard all metadata and free all objects in an associative array. The
450 * array will be empty and ready to use again upon completion. This function
451 * cannot fail.
452 *
453 * The caller must prevent all other accesses whilst this takes place as no
454 * attempt is made to adjust pointers gracefully to permit RCU readlock-holding
455 * accesses to continue. On the other hand, no memory allocation is required.
456 */
457void assoc_array_destroy(struct assoc_array *array,
458 const struct assoc_array_ops *ops)
459{
460 assoc_array_destroy_subtree(array->root, ops);
461 array->root = NULL;
462}
463
464/*
465 * Handle insertion into an empty tree.
466 */
467static bool assoc_array_insert_in_empty_tree(struct assoc_array_edit *edit)
468{
469 struct assoc_array_node *new_n0;
470
471 pr_devel("-->%s()\n", __func__);
472
473 new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
474 if (!new_n0)
475 return false;
476
477 edit->new_meta[0] = assoc_array_node_to_ptr(new_n0);
478 edit->leaf_p = &new_n0->slots[0];
479 edit->adjust_count_on = new_n0;
480 edit->set[0].ptr = &edit->array->root;
481 edit->set[0].to = assoc_array_node_to_ptr(new_n0);
482
483 pr_devel("<--%s() = ok [no root]\n", __func__);
484 return true;
485}
486
487/*
488 * Handle insertion into a terminal node.
489 */
490static bool assoc_array_insert_into_terminal_node(struct assoc_array_edit *edit,
491 const struct assoc_array_ops *ops,
492 const void *index_key,
493 struct assoc_array_walk_result *result)
494{
495 struct assoc_array_shortcut *shortcut, *new_s0;
496 struct assoc_array_node *node, *new_n0, *new_n1, *side;
497 struct assoc_array_ptr *ptr;
498 unsigned long dissimilarity, base_seg, blank;
499 size_t keylen;
500 bool have_meta;
501 int level, diff;
502 int slot, next_slot, free_slot, i, j;
503
504 node = result->terminal_node.node;
505 level = result->terminal_node.level;
506 edit->segment_cache[ASSOC_ARRAY_FAN_OUT] = result->terminal_node.slot;
507
508 pr_devel("-->%s()\n", __func__);
509
510 /* We arrived at a node which doesn't have an onward node or shortcut
511 * pointer that we have to follow. This means that (a) the leaf we
512 * want must go here (either by insertion or replacement) or (b) we
513 * need to split this node and insert in one of the fragments.
514 */
515 free_slot = -1;
516
517 /* Firstly, we have to check the leaves in this node to see if there's
518 * a matching one we should replace in place.
519 */
520 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
521 ptr = node->slots[i];
522 if (!ptr) {
523 free_slot = i;
524 continue;
525 }
526 if (ops->compare_object(assoc_array_ptr_to_leaf(ptr), index_key)) {
527 pr_devel("replace in slot %d\n", i);
528 edit->leaf_p = &node->slots[i];
529 edit->dead_leaf = node->slots[i];
530 pr_devel("<--%s() = ok [replace]\n", __func__);
531 return true;
532 }
533 }
534
535 /* If there is a free slot in this node then we can just insert the
536 * leaf here.
537 */
538 if (free_slot >= 0) {
539 pr_devel("insert in free slot %d\n", free_slot);
540 edit->leaf_p = &node->slots[free_slot];
541 edit->adjust_count_on = node;
542 pr_devel("<--%s() = ok [insert]\n", __func__);
543 return true;
544 }
545
546 /* The node has no spare slots - so we're either going to have to split
547 * it or insert another node before it.
548 *
549 * Whatever, we're going to need at least two new nodes - so allocate
550 * those now. We may also need a new shortcut, but we deal with that
551 * when we need it.
552 */
553 new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
554 if (!new_n0)
555 return false;
556 edit->new_meta[0] = assoc_array_node_to_ptr(new_n0);
557 new_n1 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
558 if (!new_n1)
559 return false;
560 edit->new_meta[1] = assoc_array_node_to_ptr(new_n1);
561
562 /* We need to find out how similar the leaves are. */
563 pr_devel("no spare slots\n");
564 have_meta = false;
565 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
566 ptr = node->slots[i];
567 if (assoc_array_ptr_is_meta(ptr)) {
568 edit->segment_cache[i] = 0xff;
569 have_meta = true;
570 continue;
571 }
572 base_seg = ops->get_object_key_chunk(
573 assoc_array_ptr_to_leaf(ptr), level);
574 base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK;
575 edit->segment_cache[i] = base_seg & ASSOC_ARRAY_FAN_MASK;
576 }
577
578 if (have_meta) {
579 pr_devel("have meta\n");
580 goto split_node;
581 }
582
583 /* The node contains only leaves */
584 dissimilarity = 0;
585 base_seg = edit->segment_cache[0];
586 for (i = 1; i < ASSOC_ARRAY_FAN_OUT; i++)
587 dissimilarity |= edit->segment_cache[i] ^ base_seg;
588
589 pr_devel("only leaves; dissimilarity=%lx\n", dissimilarity);
590
591 if ((dissimilarity & ASSOC_ARRAY_FAN_MASK) == 0) {
592 /* The old leaves all cluster in the same slot. We will need
593 * to insert a shortcut if the new node wants to cluster with them.
594 */
595 if ((edit->segment_cache[ASSOC_ARRAY_FAN_OUT] ^ base_seg) == 0)
596 goto all_leaves_cluster_together;
597
598 /* Otherwise we can just insert a new node ahead of the old
599 * one.
600 */
601 goto present_leaves_cluster_but_not_new_leaf;
602 }
603
604split_node:
605 pr_devel("split node\n");
606
607 /* We need to split the current node; we know that the node doesn't
608 * simply contain a full set of leaves that cluster together (it
609 * contains meta pointers and/or non-clustering leaves).
610 *
611 * We need to expel at least two leaves out of a set consisting of the
612 * leaves in the node and the new leaf.
613 *
614 * We need a new node (n0) to replace the current one and a new node to
615 * take the expelled nodes (n1).
616 */
617 edit->set[0].to = assoc_array_node_to_ptr(new_n0);
618 new_n0->back_pointer = node->back_pointer;
619 new_n0->parent_slot = node->parent_slot;
620 new_n1->back_pointer = assoc_array_node_to_ptr(new_n0);
621 new_n1->parent_slot = -1; /* Need to calculate this */
622
623do_split_node:
624 pr_devel("do_split_node\n");
625
626 new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch;
627 new_n1->nr_leaves_on_branch = 0;
628
629 /* Begin by finding two matching leaves. There have to be at least two
630 * that match - even if there are meta pointers - because any leaf that
631 * would match a slot with a meta pointer in it must be somewhere
632 * behind that meta pointer and cannot be here. Further, given N
633 * remaining leaf slots, we now have N+1 leaves to go in them.
634 */
635 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
636 slot = edit->segment_cache[i];
637 if (slot != 0xff)
638 for (j = i + 1; j < ASSOC_ARRAY_FAN_OUT + 1; j++)
639 if (edit->segment_cache[j] == slot)
640 goto found_slot_for_multiple_occupancy;
641 }
642found_slot_for_multiple_occupancy:
643 pr_devel("same slot: %x %x [%02x]\n", i, j, slot);
644 BUG_ON(i >= ASSOC_ARRAY_FAN_OUT);
645 BUG_ON(j >= ASSOC_ARRAY_FAN_OUT + 1);
646 BUG_ON(slot >= ASSOC_ARRAY_FAN_OUT);
647
648 new_n1->parent_slot = slot;
649
650 /* Metadata pointers cannot change slot */
651 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++)
652 if (assoc_array_ptr_is_meta(node->slots[i]))
653 new_n0->slots[i] = node->slots[i];
654 else
655 new_n0->slots[i] = NULL;
656 BUG_ON(new_n0->slots[slot] != NULL);
657 new_n0->slots[slot] = assoc_array_node_to_ptr(new_n1);
658
659 /* Filter the leaf pointers between the new nodes */
660 free_slot = -1;
661 next_slot = 0;
662 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
663 if (assoc_array_ptr_is_meta(node->slots[i]))
664 continue;
665 if (edit->segment_cache[i] == slot) {
666 new_n1->slots[next_slot++] = node->slots[i];
667 new_n1->nr_leaves_on_branch++;
668 } else {
669 do {
670 free_slot++;
671 } while (new_n0->slots[free_slot] != NULL);
672 new_n0->slots[free_slot] = node->slots[i];
673 }
674 }
675
676 pr_devel("filtered: f=%x n=%x\n", free_slot, next_slot);
677
678 if (edit->segment_cache[ASSOC_ARRAY_FAN_OUT] != slot) {
679 do {
680 free_slot++;
681 } while (new_n0->slots[free_slot] != NULL);
682 edit->leaf_p = &new_n0->slots[free_slot];
683 edit->adjust_count_on = new_n0;
684 } else {
685 edit->leaf_p = &new_n1->slots[next_slot++];
686 edit->adjust_count_on = new_n1;
687 }
688
689 BUG_ON(next_slot <= 1);
690
691 edit->set_backpointers_to = assoc_array_node_to_ptr(new_n0);
692 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
693 if (edit->segment_cache[i] == 0xff) {
694 ptr = node->slots[i];
695 BUG_ON(assoc_array_ptr_is_leaf(ptr));
696 if (assoc_array_ptr_is_node(ptr)) {
697 side = assoc_array_ptr_to_node(ptr);
698 edit->set_backpointers[i] = &side->back_pointer;
699 } else {
700 shortcut = assoc_array_ptr_to_shortcut(ptr);
701 edit->set_backpointers[i] = &shortcut->back_pointer;
702 }
703 }
704 }
705
706 ptr = node->back_pointer;
707 if (!ptr)
708 edit->set[0].ptr = &edit->array->root;
709 else if (assoc_array_ptr_is_node(ptr))
710 edit->set[0].ptr = &assoc_array_ptr_to_node(ptr)->slots[node->parent_slot];
711 else
712 edit->set[0].ptr = &assoc_array_ptr_to_shortcut(ptr)->next_node;
713 edit->excised_meta[0] = assoc_array_node_to_ptr(node);
714 pr_devel("<--%s() = ok [split node]\n", __func__);
715 return true;
716
717present_leaves_cluster_but_not_new_leaf:
718 /* All the old leaves cluster in the same slot, but the new leaf wants
719 * to go into a different slot, so we create a new node to hold the new
720 * leaf and a pointer to a new node holding all the old leaves.
721 */
722 pr_devel("present leaves cluster but not new leaf\n");
723
724 new_n0->back_pointer = node->back_pointer;
725 new_n0->parent_slot = node->parent_slot;
726 new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch;
727 new_n1->back_pointer = assoc_array_node_to_ptr(new_n0);
728 new_n1->parent_slot = edit->segment_cache[0];
729 new_n1->nr_leaves_on_branch = node->nr_leaves_on_branch;
730 edit->adjust_count_on = new_n0;
731
732 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++)
733 new_n1->slots[i] = node->slots[i];
734
735 new_n0->slots[edit->segment_cache[0]] = assoc_array_node_to_ptr(new_n0);
736 edit->leaf_p = &new_n0->slots[edit->segment_cache[ASSOC_ARRAY_FAN_OUT]];
737
738 edit->set[0].ptr = &assoc_array_ptr_to_node(node->back_pointer)->slots[node->parent_slot];
739 edit->set[0].to = assoc_array_node_to_ptr(new_n0);
740 edit->excised_meta[0] = assoc_array_node_to_ptr(node);
741 pr_devel("<--%s() = ok [insert node before]\n", __func__);
742 return true;
743
744all_leaves_cluster_together:
745 /* All the leaves, new and old, want to cluster together in this node
746 * in the same slot, so we have to replace this node with a shortcut to
747 * skip over the identical parts of the key and then place a pair of
748 * nodes, one inside the other, at the end of the shortcut and
749 * distribute the keys between them.
750 *
751 * Firstly we need to work out where the leaves start diverging as a
752 * bit position into their keys so that we know how big the shortcut
753 * needs to be.
754 *
755 * We only need to make a single pass of N of the N+1 leaves because if
756 * any keys differ between themselves at bit X then at least one of
757 * them must also differ with the base key at bit X or before.
758 */
759 pr_devel("all leaves cluster together\n");
760 diff = INT_MAX;
761 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
762 int x = ops->diff_objects(assoc_array_ptr_to_leaf(edit->leaf),
763 assoc_array_ptr_to_leaf(node->slots[i]));
764 if (x < diff) {
765 BUG_ON(x < 0);
766 diff = x;
767 }
768 }
769 BUG_ON(diff == INT_MAX);
770 BUG_ON(diff < level + ASSOC_ARRAY_LEVEL_STEP);
771
772 keylen = round_up(diff, ASSOC_ARRAY_KEY_CHUNK_SIZE);
773 keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT;
774
775 new_s0 = kzalloc(sizeof(struct assoc_array_shortcut) +
776 keylen * sizeof(unsigned long), GFP_KERNEL);
777 if (!new_s0)
778 return false;
779 edit->new_meta[2] = assoc_array_shortcut_to_ptr(new_s0);
780
781 edit->set[0].to = assoc_array_shortcut_to_ptr(new_s0);
782 new_s0->back_pointer = node->back_pointer;
783 new_s0->parent_slot = node->parent_slot;
784 new_s0->next_node = assoc_array_node_to_ptr(new_n0);
785 new_n0->back_pointer = assoc_array_shortcut_to_ptr(new_s0);
786 new_n0->parent_slot = 0;
787 new_n1->back_pointer = assoc_array_node_to_ptr(new_n0);
788 new_n1->parent_slot = -1; /* Need to calculate this */
789
790 new_s0->skip_to_level = level = diff & ~ASSOC_ARRAY_LEVEL_STEP_MASK;
791 pr_devel("skip_to_level = %d [diff %d]\n", level, diff);
792 BUG_ON(level <= 0);
793
794 for (i = 0; i < keylen; i++)
795 new_s0->index_key[i] =
796 ops->get_key_chunk(index_key, i * ASSOC_ARRAY_KEY_CHUNK_SIZE);
797
798 blank = ULONG_MAX << (level & ASSOC_ARRAY_KEY_CHUNK_MASK);
799 pr_devel("blank off [%zu] %d: %lx\n", keylen - 1, level, blank);
800 new_s0->index_key[keylen - 1] &= ~blank;
801
802 /* This now reduces to a node splitting exercise for which we'll need
803 * to regenerate the disparity table.
804 */
805 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
806 ptr = node->slots[i];
807 base_seg = ops->get_object_key_chunk(assoc_array_ptr_to_leaf(ptr),
808 level);
809 base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK;
810 edit->segment_cache[i] = base_seg & ASSOC_ARRAY_FAN_MASK;
811 }
812
813 base_seg = ops->get_key_chunk(index_key, level);
814 base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK;
815 edit->segment_cache[ASSOC_ARRAY_FAN_OUT] = base_seg & ASSOC_ARRAY_FAN_MASK;
816 goto do_split_node;
817}
818
819/*
820 * Handle insertion into the middle of a shortcut.
821 */
822static bool assoc_array_insert_mid_shortcut(struct assoc_array_edit *edit,
823 const struct assoc_array_ops *ops,
824 struct assoc_array_walk_result *result)
825{
826 struct assoc_array_shortcut *shortcut, *new_s0, *new_s1;
827 struct assoc_array_node *node, *new_n0, *side;
828 unsigned long sc_segments, dissimilarity, blank;
829 size_t keylen;
830 int level, sc_level, diff;
831 int sc_slot;
832
833 shortcut = result->wrong_shortcut.shortcut;
834 level = result->wrong_shortcut.level;
835 sc_level = result->wrong_shortcut.sc_level;
836 sc_segments = result->wrong_shortcut.sc_segments;
837 dissimilarity = result->wrong_shortcut.dissimilarity;
838
839 pr_devel("-->%s(ix=%d dis=%lx scix=%d)\n",
840 __func__, level, dissimilarity, sc_level);
841
842 /* We need to split a shortcut and insert a node between the two
843 * pieces. Zero-length pieces will be dispensed with entirely.
844 *
845 * First of all, we need to find out in which level the first
846 * difference was.
847 */
848 diff = __ffs(dissimilarity);
849 diff &= ~ASSOC_ARRAY_LEVEL_STEP_MASK;
850 diff += sc_level & ~ASSOC_ARRAY_KEY_CHUNK_MASK;
851 pr_devel("diff=%d\n", diff);
852
853 if (!shortcut->back_pointer) {
854 edit->set[0].ptr = &edit->array->root;
855 } else if (assoc_array_ptr_is_node(shortcut->back_pointer)) {
856 node = assoc_array_ptr_to_node(shortcut->back_pointer);
857 edit->set[0].ptr = &node->slots[shortcut->parent_slot];
858 } else {
859 BUG();
860 }
861
862 edit->excised_meta[0] = assoc_array_shortcut_to_ptr(shortcut);
863
864 /* Create a new node now since we're going to need it anyway */
865 new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
866 if (!new_n0)
867 return false;
868 edit->new_meta[0] = assoc_array_node_to_ptr(new_n0);
869 edit->adjust_count_on = new_n0;
870
871 /* Insert a new shortcut before the new node if this segment isn't of
872 * zero length - otherwise we just connect the new node directly to the
873 * parent.
874 */
875 level += ASSOC_ARRAY_LEVEL_STEP;
876 if (diff > level) {
877 pr_devel("pre-shortcut %d...%d\n", level, diff);
878 keylen = round_up(diff, ASSOC_ARRAY_KEY_CHUNK_SIZE);
879 keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT;
880
881 new_s0 = kzalloc(sizeof(struct assoc_array_shortcut) +
882 keylen * sizeof(unsigned long), GFP_KERNEL);
883 if (!new_s0)
884 return false;
885 edit->new_meta[1] = assoc_array_shortcut_to_ptr(new_s0);
886 edit->set[0].to = assoc_array_shortcut_to_ptr(new_s0);
887 new_s0->back_pointer = shortcut->back_pointer;
888 new_s0->parent_slot = shortcut->parent_slot;
889 new_s0->next_node = assoc_array_node_to_ptr(new_n0);
890 new_s0->skip_to_level = diff;
891
892 new_n0->back_pointer = assoc_array_shortcut_to_ptr(new_s0);
893 new_n0->parent_slot = 0;
894
895 memcpy(new_s0->index_key, shortcut->index_key,
896 keylen * sizeof(unsigned long));
897
898 blank = ULONG_MAX << (diff & ASSOC_ARRAY_KEY_CHUNK_MASK);
899 pr_devel("blank off [%zu] %d: %lx\n", keylen - 1, diff, blank);
900 new_s0->index_key[keylen - 1] &= ~blank;
901 } else {
902 pr_devel("no pre-shortcut\n");
903 edit->set[0].to = assoc_array_node_to_ptr(new_n0);
904 new_n0->back_pointer = shortcut->back_pointer;
905 new_n0->parent_slot = shortcut->parent_slot;
906 }
907
908 side = assoc_array_ptr_to_node(shortcut->next_node);
909 new_n0->nr_leaves_on_branch = side->nr_leaves_on_branch;
910
911 /* We need to know which slot in the new node is going to take a
912 * metadata pointer.
913 */
914 sc_slot = sc_segments >> (diff & ASSOC_ARRAY_KEY_CHUNK_MASK);
915 sc_slot &= ASSOC_ARRAY_FAN_MASK;
916
917 pr_devel("new slot %lx >> %d -> %d\n",
918 sc_segments, diff & ASSOC_ARRAY_KEY_CHUNK_MASK, sc_slot);
919
920 /* Determine whether we need to follow the new node with a replacement
921 * for the current shortcut. We could in theory reuse the current
922 * shortcut if its parent slot number doesn't change - but that's a
923 * 1-in-16 chance so not worth expending the code upon.
924 */
925 level = diff + ASSOC_ARRAY_LEVEL_STEP;
926 if (level < shortcut->skip_to_level) {
927 pr_devel("post-shortcut %d...%d\n", level, shortcut->skip_to_level);
928 keylen = round_up(shortcut->skip_to_level, ASSOC_ARRAY_KEY_CHUNK_SIZE);
929 keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT;
930
931 new_s1 = kzalloc(sizeof(struct assoc_array_shortcut) +
932 keylen * sizeof(unsigned long), GFP_KERNEL);
933 if (!new_s1)
934 return false;
935 edit->new_meta[2] = assoc_array_shortcut_to_ptr(new_s1);
936
937 new_s1->back_pointer = assoc_array_node_to_ptr(new_n0);
938 new_s1->parent_slot = sc_slot;
939 new_s1->next_node = shortcut->next_node;
940 new_s1->skip_to_level = shortcut->skip_to_level;
941
942 new_n0->slots[sc_slot] = assoc_array_shortcut_to_ptr(new_s1);
943
944 memcpy(new_s1->index_key, shortcut->index_key,
945 keylen * sizeof(unsigned long));
946
947 edit->set[1].ptr = &side->back_pointer;
948 edit->set[1].to = assoc_array_shortcut_to_ptr(new_s1);
949 } else {
950 pr_devel("no post-shortcut\n");
951
952 /* We don't have to replace the pointed-to node as long as we
953 * use memory barriers to make sure the parent slot number is
954 * changed before the back pointer (the parent slot number is
955 * irrelevant to the old parent shortcut).
956 */
957 new_n0->slots[sc_slot] = shortcut->next_node;
958 edit->set_parent_slot[0].p = &side->parent_slot;
959 edit->set_parent_slot[0].to = sc_slot;
960 edit->set[1].ptr = &side->back_pointer;
961 edit->set[1].to = assoc_array_node_to_ptr(new_n0);
962 }
963
964 /* Install the new leaf in a spare slot in the new node. */
965 if (sc_slot == 0)
966 edit->leaf_p = &new_n0->slots[1];
967 else
968 edit->leaf_p = &new_n0->slots[0];
969
970 pr_devel("<--%s() = ok [split shortcut]\n", __func__);
971 return edit;
972}
973
974/**
975 * assoc_array_insert - Script insertion of an object into an associative array
976 * @array: The array to insert into.
977 * @ops: The operations to use.
978 * @index_key: The key to insert at.
979 * @object: The object to insert.
980 *
981 * Precalculate and preallocate a script for the insertion or replacement of an
982 * object in an associative array. This results in an edit script that can
983 * either be applied or cancelled.
984 *
985 * The function returns a pointer to an edit script or -ENOMEM.
986 *
987 * The caller should lock against other modifications and must continue to hold
988 * the lock until assoc_array_apply_edit() has been called.
989 *
990 * Accesses to the tree may take place concurrently with this function,
991 * provided they hold the RCU read lock.
992 */
993struct assoc_array_edit *assoc_array_insert(struct assoc_array *array,
994 const struct assoc_array_ops *ops,
995 const void *index_key,
996 void *object)
997{
998 struct assoc_array_walk_result result;
999 struct assoc_array_edit *edit;
1000
1001 pr_devel("-->%s()\n", __func__);
1002
1003 /* The leaf pointer we're given must not have the bottom bit set as we
1004 * use those for type-marking the pointer. NULL pointers are also not
1005 * allowed as they indicate an empty slot but we have to allow them
1006 * here as they can be updated later.
1007 */
1008 BUG_ON(assoc_array_ptr_is_meta(object));
1009
1010 edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL);
1011 if (!edit)
1012 return ERR_PTR(-ENOMEM);
1013 edit->array = array;
1014 edit->ops = ops;
1015 edit->leaf = assoc_array_leaf_to_ptr(object);
1016 edit->adjust_count_by = 1;
1017
1018 switch (assoc_array_walk(array, ops, index_key, &result)) {
1019 case assoc_array_walk_tree_empty:
1020 /* Allocate a root node if there isn't one yet */
1021 if (!assoc_array_insert_in_empty_tree(edit))
1022 goto enomem;
1023 return edit;
1024
1025 case assoc_array_walk_found_terminal_node:
1026 /* We found a node that doesn't have a node/shortcut pointer in
1027 * the slot corresponding to the index key that we have to
1028 * follow.
1029 */
1030 if (!assoc_array_insert_into_terminal_node(edit, ops, index_key,
1031 &result))
1032 goto enomem;
1033 return edit;
1034
1035 case assoc_array_walk_found_wrong_shortcut:
1036 /* We found a shortcut that didn't match our key in a slot we
1037 * needed to follow.
1038 */
1039 if (!assoc_array_insert_mid_shortcut(edit, ops, &result))
1040 goto enomem;
1041 return edit;
1042 }
1043
1044enomem:
1045 /* Clean up after an out of memory error */
1046 pr_devel("enomem\n");
1047 assoc_array_cancel_edit(edit);
1048 return ERR_PTR(-ENOMEM);
1049}
1050
1051/**
1052 * assoc_array_insert_set_object - Set the new object pointer in an edit script
1053 * @edit: The edit script to modify.
1054 * @object: The object pointer to set.
1055 *
1056 * Change the object to be inserted in an edit script. The object pointed to
1057 * by the old object is not freed. This must be done prior to applying the
1058 * script.
1059 */
1060void assoc_array_insert_set_object(struct assoc_array_edit *edit, void *object)
1061{
1062 BUG_ON(!object);
1063 edit->leaf = assoc_array_leaf_to_ptr(object);
1064}
1065
1066struct assoc_array_delete_collapse_context {
1067 struct assoc_array_node *node;
1068 const void *skip_leaf;
1069 int slot;
1070};
1071
1072/*
1073 * Subtree collapse to node iterator.
1074 */
1075static int assoc_array_delete_collapse_iterator(const void *leaf,
1076 void *iterator_data)
1077{
1078 struct assoc_array_delete_collapse_context *collapse = iterator_data;
1079
1080 if (leaf == collapse->skip_leaf)
1081 return 0;
1082
1083 BUG_ON(collapse->slot >= ASSOC_ARRAY_FAN_OUT);
1084
1085 collapse->node->slots[collapse->slot++] = assoc_array_leaf_to_ptr(leaf);
1086 return 0;
1087}
1088
1089/**
1090 * assoc_array_delete - Script deletion of an object from an associative array
1091 * @array: The array to search.
1092 * @ops: The operations to use.
1093 * @index_key: The key to the object.
1094 *
1095 * Precalculate and preallocate a script for the deletion of an object from an
1096 * associative array. This results in an edit script that can either be
1097 * applied or cancelled.
1098 *
1099 * The function returns a pointer to an edit script if the object was found,
1100 * NULL if the object was not found or -ENOMEM.
1101 *
1102 * The caller should lock against other modifications and must continue to hold
1103 * the lock until assoc_array_apply_edit() has been called.
1104 *
1105 * Accesses to the tree may take place concurrently with this function,
1106 * provided they hold the RCU read lock.
1107 */
1108struct assoc_array_edit *assoc_array_delete(struct assoc_array *array,
1109 const struct assoc_array_ops *ops,
1110 const void *index_key)
1111{
1112 struct assoc_array_delete_collapse_context collapse;
1113 struct assoc_array_walk_result result;
1114 struct assoc_array_node *node, *new_n0;
1115 struct assoc_array_edit *edit;
1116 struct assoc_array_ptr *ptr;
1117 bool has_meta;
1118 int slot, i;
1119
1120 pr_devel("-->%s()\n", __func__);
1121
1122 edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL);
1123 if (!edit)
1124 return ERR_PTR(-ENOMEM);
1125 edit->array = array;
1126 edit->ops = ops;
1127 edit->adjust_count_by = -1;
1128
1129 switch (assoc_array_walk(array, ops, index_key, &result)) {
1130 case assoc_array_walk_found_terminal_node:
1131 /* We found a node that should contain the leaf we've been
1132 * asked to remove - *if* it's in the tree.
1133 */
1134 pr_devel("terminal_node\n");
1135 node = result.terminal_node.node;
1136
1137 for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
1138 ptr = node->slots[slot];
1139 if (ptr &&
1140 assoc_array_ptr_is_leaf(ptr) &&
1141 ops->compare_object(assoc_array_ptr_to_leaf(ptr),
1142 index_key))
1143 goto found_leaf;
1144 }
1145 case assoc_array_walk_tree_empty:
1146 case assoc_array_walk_found_wrong_shortcut:
1147 default:
1148 assoc_array_cancel_edit(edit);
1149 pr_devel("not found\n");
1150 return NULL;
1151 }
1152
1153found_leaf:
1154 BUG_ON(array->nr_leaves_on_tree <= 0);
1155
1156 /* In the simplest form of deletion we just clear the slot and release
1157 * the leaf after a suitable interval.
1158 */
1159 edit->dead_leaf = node->slots[slot];
1160 edit->set[0].ptr = &node->slots[slot];
1161 edit->set[0].to = NULL;
1162 edit->adjust_count_on = node;
1163
1164 /* If that concludes erasure of the last leaf, then delete the entire
1165 * internal array.
1166 */
1167 if (array->nr_leaves_on_tree == 1) {
1168 edit->set[1].ptr = &array->root;
1169 edit->set[1].to = NULL;
1170 edit->adjust_count_on = NULL;
1171 edit->excised_subtree = array->root;
1172 pr_devel("all gone\n");
1173 return edit;
1174 }
1175
1176 /* However, we'd also like to clear up some metadata blocks if we
1177 * possibly can.
1178 *
1179 * We go for a simple algorithm of: if this node has FAN_OUT or fewer
1180 * leaves in it, then attempt to collapse it - and attempt to
1181 * recursively collapse up the tree.
1182 *
1183 * We could also try and collapse in partially filled subtrees to take
1184 * up space in this node.
1185 */
1186 if (node->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT + 1) {
1187 struct assoc_array_node *parent, *grandparent;
1188 struct assoc_array_ptr *ptr;
1189
1190 /* First of all, we need to know if this node has metadata so
1191 * that we don't try collapsing if all the leaves are already
1192 * here.
1193 */
1194 has_meta = false;
1195 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
1196 ptr = node->slots[i];
1197 if (assoc_array_ptr_is_meta(ptr)) {
1198 has_meta = true;
1199 break;
1200 }
1201 }
1202
1203 pr_devel("leaves: %ld [m=%d]\n",
1204 node->nr_leaves_on_branch - 1, has_meta);
1205
1206 /* Look further up the tree to see if we can collapse this node
1207 * into a more proximal node too.
1208 */
1209 parent = node;
1210 collapse_up:
1211 pr_devel("collapse subtree: %ld\n", parent->nr_leaves_on_branch);
1212
1213 ptr = parent->back_pointer;
1214 if (!ptr)
1215 goto do_collapse;
1216 if (assoc_array_ptr_is_shortcut(ptr)) {
1217 struct assoc_array_shortcut *s = assoc_array_ptr_to_shortcut(ptr);
1218 ptr = s->back_pointer;
1219 if (!ptr)
1220 goto do_collapse;
1221 }
1222
1223 grandparent = assoc_array_ptr_to_node(ptr);
1224 if (grandparent->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT + 1) {
1225 parent = grandparent;
1226 goto collapse_up;
1227 }
1228
1229 do_collapse:
1230 /* There's no point collapsing if the original node has no meta
1231 * pointers to discard and if we didn't merge into one of that
1232 * node's ancestry.
1233 */
1234 if (has_meta || parent != node) {
1235 node = parent;
1236
1237 /* Create a new node to collapse into */
1238 new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
1239 if (!new_n0)
1240 goto enomem;
1241 edit->new_meta[0] = assoc_array_node_to_ptr(new_n0);
1242
1243 new_n0->back_pointer = node->back_pointer;
1244 new_n0->parent_slot = node->parent_slot;
1245 new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch;
1246 edit->adjust_count_on = new_n0;
1247
1248 collapse.node = new_n0;
1249 collapse.skip_leaf = assoc_array_ptr_to_leaf(edit->dead_leaf);
1250 collapse.slot = 0;
1251 assoc_array_subtree_iterate(assoc_array_node_to_ptr(node),
1252 node->back_pointer,
1253 assoc_array_delete_collapse_iterator,
1254 &collapse);
1255 pr_devel("collapsed %d,%lu\n", collapse.slot, new_n0->nr_leaves_on_branch);
1256 BUG_ON(collapse.slot != new_n0->nr_leaves_on_branch - 1);
1257
1258 if (!node->back_pointer) {
1259 edit->set[1].ptr = &array->root;
1260 } else if (assoc_array_ptr_is_leaf(node->back_pointer)) {
1261 BUG();
1262 } else if (assoc_array_ptr_is_node(node->back_pointer)) {
1263 struct assoc_array_node *p =
1264 assoc_array_ptr_to_node(node->back_pointer);
1265 edit->set[1].ptr = &p->slots[node->parent_slot];
1266 } else if (assoc_array_ptr_is_shortcut(node->back_pointer)) {
1267 struct assoc_array_shortcut *s =
1268 assoc_array_ptr_to_shortcut(node->back_pointer);
1269 edit->set[1].ptr = &s->next_node;
1270 }
1271 edit->set[1].to = assoc_array_node_to_ptr(new_n0);
1272 edit->excised_subtree = assoc_array_node_to_ptr(node);
1273 }
1274 }
1275
1276 return edit;
1277
1278enomem:
1279 /* Clean up after an out of memory error */
1280 pr_devel("enomem\n");
1281 assoc_array_cancel_edit(edit);
1282 return ERR_PTR(-ENOMEM);
1283}
1284
1285/**
1286 * assoc_array_clear - Script deletion of all objects from an associative array
1287 * @array: The array to clear.
1288 * @ops: The operations to use.
1289 *
1290 * Precalculate and preallocate a script for the deletion of all the objects
1291 * from an associative array. This results in an edit script that can either
1292 * be applied or cancelled.
1293 *
1294 * The function returns a pointer to an edit script if there are objects to be
1295 * deleted, NULL if there are no objects in the array or -ENOMEM.
1296 *
1297 * The caller should lock against other modifications and must continue to hold
1298 * the lock until assoc_array_apply_edit() has been called.
1299 *
1300 * Accesses to the tree may take place concurrently with this function,
1301 * provided they hold the RCU read lock.
1302 */
1303struct assoc_array_edit *assoc_array_clear(struct assoc_array *array,
1304 const struct assoc_array_ops *ops)
1305{
1306 struct assoc_array_edit *edit;
1307
1308 pr_devel("-->%s()\n", __func__);
1309
1310 if (!array->root)
1311 return NULL;
1312
1313 edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL);
1314 if (!edit)
1315 return ERR_PTR(-ENOMEM);
1316 edit->array = array;
1317 edit->ops = ops;
1318 edit->set[1].ptr = &array->root;
1319 edit->set[1].to = NULL;
1320 edit->excised_subtree = array->root;
1321 edit->ops_for_excised_subtree = ops;
1322 pr_devel("all gone\n");
1323 return edit;
1324}
1325
1326/*
1327 * Handle the deferred destruction after an applied edit.
1328 */
1329static void assoc_array_rcu_cleanup(struct rcu_head *head)
1330{
1331 struct assoc_array_edit *edit =
1332 container_of(head, struct assoc_array_edit, rcu);
1333 int i;
1334
1335 pr_devel("-->%s()\n", __func__);
1336
1337 if (edit->dead_leaf)
1338 edit->ops->free_object(assoc_array_ptr_to_leaf(edit->dead_leaf));
1339 for (i = 0; i < ARRAY_SIZE(edit->excised_meta); i++)
1340 if (edit->excised_meta[i])
1341 kfree(assoc_array_ptr_to_node(edit->excised_meta[i]));
1342
1343 if (edit->excised_subtree) {
1344 BUG_ON(assoc_array_ptr_is_leaf(edit->excised_subtree));
1345 if (assoc_array_ptr_is_node(edit->excised_subtree)) {
1346 struct assoc_array_node *n =
1347 assoc_array_ptr_to_node(edit->excised_subtree);
1348 n->back_pointer = NULL;
1349 } else {
1350 struct assoc_array_shortcut *s =
1351 assoc_array_ptr_to_shortcut(edit->excised_subtree);
1352 s->back_pointer = NULL;
1353 }
1354 assoc_array_destroy_subtree(edit->excised_subtree,
1355 edit->ops_for_excised_subtree);
1356 }
1357
1358 kfree(edit);
1359}
1360
1361/**
1362 * assoc_array_apply_edit - Apply an edit script to an associative array
1363 * @edit: The script to apply.
1364 *
1365 * Apply an edit script to an associative array to effect an insertion,
1366 * deletion or clearance. As the edit script includes preallocated memory,
1367 * this is guaranteed not to fail.
1368 *
1369 * The edit script, dead objects and dead metadata will be scheduled for
1370 * destruction after an RCU grace period to permit those doing read-only
1371 * accesses on the array to continue to do so under the RCU read lock whilst
1372 * the edit is taking place.
1373 */
1374void assoc_array_apply_edit(struct assoc_array_edit *edit)
1375{
1376 struct assoc_array_shortcut *shortcut;
1377 struct assoc_array_node *node;
1378 struct assoc_array_ptr *ptr;
1379 int i;
1380
1381 pr_devel("-->%s()\n", __func__);
1382
1383 smp_wmb();
1384 if (edit->leaf_p)
1385 *edit->leaf_p = edit->leaf;
1386
1387 smp_wmb();
1388 for (i = 0; i < ARRAY_SIZE(edit->set_parent_slot); i++)
1389 if (edit->set_parent_slot[i].p)
1390 *edit->set_parent_slot[i].p = edit->set_parent_slot[i].to;
1391
1392 smp_wmb();
1393 for (i = 0; i < ARRAY_SIZE(edit->set_backpointers); i++)
1394 if (edit->set_backpointers[i])
1395 *edit->set_backpointers[i] = edit->set_backpointers_to;
1396
1397 smp_wmb();
1398 for (i = 0; i < ARRAY_SIZE(edit->set); i++)
1399 if (edit->set[i].ptr)
1400 *edit->set[i].ptr = edit->set[i].to;
1401
1402 if (edit->array->root == NULL) {
1403 edit->array->nr_leaves_on_tree = 0;
1404 } else if (edit->adjust_count_on) {
1405 node = edit->adjust_count_on;
1406 for (;;) {
1407 node->nr_leaves_on_branch += edit->adjust_count_by;
1408
1409 ptr = node->back_pointer;
1410 if (!ptr)
1411 break;
1412 if (assoc_array_ptr_is_shortcut(ptr)) {
1413 shortcut = assoc_array_ptr_to_shortcut(ptr);
1414 ptr = shortcut->back_pointer;
1415 if (!ptr)
1416 break;
1417 }
1418 BUG_ON(!assoc_array_ptr_is_node(ptr));
1419 node = assoc_array_ptr_to_node(ptr);
1420 }
1421
1422 edit->array->nr_leaves_on_tree += edit->adjust_count_by;
1423 }
1424
1425 call_rcu(&edit->rcu, assoc_array_rcu_cleanup);
1426}
1427
1428/**
1429 * assoc_array_cancel_edit - Discard an edit script.
1430 * @edit: The script to discard.
1431 *
1432 * Free an edit script and all the preallocated data it holds without making
1433 * any changes to the associative array it was intended for.
1434 *
1435 * NOTE! In the case of an insertion script, this does _not_ release the leaf
1436 * that was to be inserted. That is left to the caller.
1437 */
1438void assoc_array_cancel_edit(struct assoc_array_edit *edit)
1439{
1440 struct assoc_array_ptr *ptr;
1441 int i;
1442
1443 pr_devel("-->%s()\n", __func__);
1444
1445 /* Clean up after an out of memory error */
1446 for (i = 0; i < ARRAY_SIZE(edit->new_meta); i++) {
1447 ptr = edit->new_meta[i];
1448 if (ptr) {
1449 if (assoc_array_ptr_is_node(ptr))
1450 kfree(assoc_array_ptr_to_node(ptr));
1451 else
1452 kfree(assoc_array_ptr_to_shortcut(ptr));
1453 }
1454 }
1455 kfree(edit);
1456}
1457
1458/**
1459 * assoc_array_gc - Garbage collect an associative array.
1460 * @array: The array to clean.
1461 * @ops: The operations to use.
1462 * @iterator: A callback function to pass judgement on each object.
1463 * @iterator_data: Private data for the callback function.
1464 *
1465 * Collect garbage from an associative array and pack down the internal tree to
1466 * save memory.
1467 *
1468 * The iterator function is asked to pass judgement upon each object in the
1469 * array. If it returns false, the object is discard and if it returns true,
1470 * the object is kept. If it returns true, it must increment the object's
1471 * usage count (or whatever it needs to do to retain it) before returning.
1472 *
1473 * This function returns 0 if successful or -ENOMEM if out of memory. In the
1474 * latter case, the array is not changed.
1475 *
1476 * The caller should lock against other modifications and must continue to hold
1477 * the lock until assoc_array_apply_edit() has been called.
1478 *
1479 * Accesses to the tree may take place concurrently with this function,
1480 * provided they hold the RCU read lock.
1481 */
1482int assoc_array_gc(struct assoc_array *array,
1483 const struct assoc_array_ops *ops,
1484 bool (*iterator)(void *object, void *iterator_data),
1485 void *iterator_data)
1486{
1487 struct assoc_array_shortcut *shortcut, *new_s;
1488 struct assoc_array_node *node, *new_n;
1489 struct assoc_array_edit *edit;
1490 struct assoc_array_ptr *cursor, *ptr;
1491 struct assoc_array_ptr *new_root, *new_parent, **new_ptr_pp;
1492 unsigned long nr_leaves_on_tree;
1493 int keylen, slot, nr_free, next_slot, i;
1494
1495 pr_devel("-->%s()\n", __func__);
1496
1497 if (!array->root)
1498 return 0;
1499
1500 edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL);
1501 if (!edit)
1502 return -ENOMEM;
1503 edit->array = array;
1504 edit->ops = ops;
1505 edit->ops_for_excised_subtree = ops;
1506 edit->set[0].ptr = &array->root;
1507 edit->excised_subtree = array->root;
1508
1509 new_root = new_parent = NULL;
1510 new_ptr_pp = &new_root;
1511 cursor = array->root;
1512
1513descend:
1514 /* If this point is a shortcut, then we need to duplicate it and
1515 * advance the target cursor.
1516 */
1517 if (assoc_array_ptr_is_shortcut(cursor)) {
1518 shortcut = assoc_array_ptr_to_shortcut(cursor);
1519 keylen = round_up(shortcut->skip_to_level, ASSOC_ARRAY_KEY_CHUNK_SIZE);
1520 keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT;
1521 new_s = kmalloc(sizeof(struct assoc_array_shortcut) +
1522 keylen * sizeof(unsigned long), GFP_KERNEL);
1523 if (!new_s)
1524 goto enomem;
1525 pr_devel("dup shortcut %p -> %p\n", shortcut, new_s);
1526 memcpy(new_s, shortcut, (sizeof(struct assoc_array_shortcut) +
1527 keylen * sizeof(unsigned long)));
1528 new_s->back_pointer = new_parent;
1529 new_s->parent_slot = shortcut->parent_slot;
1530 *new_ptr_pp = new_parent = assoc_array_shortcut_to_ptr(new_s);
1531 new_ptr_pp = &new_s->next_node;
1532 cursor = shortcut->next_node;
1533 }
1534
1535 /* Duplicate the node at this position */
1536 node = assoc_array_ptr_to_node(cursor);
1537 new_n = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
1538 if (!new_n)
1539 goto enomem;
1540 pr_devel("dup node %p -> %p\n", node, new_n);
1541 new_n->back_pointer = new_parent;
1542 new_n->parent_slot = node->parent_slot;
1543 *new_ptr_pp = new_parent = assoc_array_node_to_ptr(new_n);
1544 new_ptr_pp = NULL;
1545 slot = 0;
1546
1547continue_node:
1548 /* Filter across any leaves and gc any subtrees */
1549 for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
1550 ptr = node->slots[slot];
1551 if (!ptr)
1552 continue;
1553
1554 if (assoc_array_ptr_is_leaf(ptr)) {
1555 if (iterator(assoc_array_ptr_to_leaf(ptr),
1556 iterator_data))
1557 /* The iterator will have done any reference
1558 * counting on the object for us.
1559 */
1560 new_n->slots[slot] = ptr;
1561 continue;
1562 }
1563
1564 new_ptr_pp = &new_n->slots[slot];
1565 cursor = ptr;
1566 goto descend;
1567 }
1568
1569 pr_devel("-- compress node %p --\n", new_n);
1570
1571 /* Count up the number of empty slots in this node and work out the
1572 * subtree leaf count.
1573 */
1574 new_n->nr_leaves_on_branch = 0;
1575 nr_free = 0;
1576 for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
1577 ptr = new_n->slots[slot];
1578 if (!ptr)
1579 nr_free++;
1580 else if (assoc_array_ptr_is_leaf(ptr))
1581 new_n->nr_leaves_on_branch++;
1582 }
1583 pr_devel("free=%d, leaves=%lu\n", nr_free, new_n->nr_leaves_on_branch);
1584
1585 /* See what we can fold in */
1586 next_slot = 0;
1587 for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
1588 struct assoc_array_shortcut *s;
1589 struct assoc_array_node *child;
1590
1591 ptr = new_n->slots[slot];
1592 if (!ptr || assoc_array_ptr_is_leaf(ptr))
1593 continue;
1594
1595 s = NULL;
1596 if (assoc_array_ptr_is_shortcut(ptr)) {
1597 s = assoc_array_ptr_to_shortcut(ptr);
1598 ptr = s->next_node;
1599 }
1600
1601 child = assoc_array_ptr_to_node(ptr);
1602 new_n->nr_leaves_on_branch += child->nr_leaves_on_branch;
1603
1604 if (child->nr_leaves_on_branch <= nr_free + 1) {
1605 /* Fold the child node into this one */
1606 pr_devel("[%d] fold node %lu/%d [nx %d]\n",
1607 slot, child->nr_leaves_on_branch, nr_free + 1,
1608 next_slot);
1609
1610 /* We would already have reaped an intervening shortcut
1611 * on the way back up the tree.
1612 */
1613 BUG_ON(s);
1614
1615 new_n->slots[slot] = NULL;
1616 nr_free++;
1617 if (slot < next_slot)
1618 next_slot = slot;
1619 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
1620 struct assoc_array_ptr *p = child->slots[i];
1621 if (!p)
1622 continue;
1623 BUG_ON(assoc_array_ptr_is_meta(p));
1624 while (new_n->slots[next_slot])
1625 next_slot++;
1626 BUG_ON(next_slot >= ASSOC_ARRAY_FAN_OUT);
1627 new_n->slots[next_slot++] = p;
1628 nr_free--;
1629 }
1630 kfree(child);
1631 } else {
1632 pr_devel("[%d] retain node %lu/%d [nx %d]\n",
1633 slot, child->nr_leaves_on_branch, nr_free + 1,
1634 next_slot);
1635 }
1636 }
1637
1638 pr_devel("after: %lu\n", new_n->nr_leaves_on_branch);
1639
1640 nr_leaves_on_tree = new_n->nr_leaves_on_branch;
1641
1642 /* Excise this node if it is singly occupied by a shortcut */
1643 if (nr_free == ASSOC_ARRAY_FAN_OUT - 1) {
1644 for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++)
1645 if ((ptr = new_n->slots[slot]))
1646 break;
1647
1648 if (assoc_array_ptr_is_meta(ptr) &&
1649 assoc_array_ptr_is_shortcut(ptr)) {
1650 pr_devel("excise node %p with 1 shortcut\n", new_n);
1651 new_s = assoc_array_ptr_to_shortcut(ptr);
1652 new_parent = new_n->back_pointer;
1653 slot = new_n->parent_slot;
1654 kfree(new_n);
1655 if (!new_parent) {
1656 new_s->back_pointer = NULL;
1657 new_s->parent_slot = 0;
1658 new_root = ptr;
1659 goto gc_complete;
1660 }
1661
1662 if (assoc_array_ptr_is_shortcut(new_parent)) {
1663 /* We can discard any preceding shortcut also */
1664 struct assoc_array_shortcut *s =
1665 assoc_array_ptr_to_shortcut(new_parent);
1666
1667 pr_devel("excise preceding shortcut\n");
1668
1669 new_parent = new_s->back_pointer = s->back_pointer;
1670 slot = new_s->parent_slot = s->parent_slot;
1671 kfree(s);
1672 if (!new_parent) {
1673 new_s->back_pointer = NULL;
1674 new_s->parent_slot = 0;
1675 new_root = ptr;
1676 goto gc_complete;
1677 }
1678 }
1679
1680 new_s->back_pointer = new_parent;
1681 new_s->parent_slot = slot;
1682 new_n = assoc_array_ptr_to_node(new_parent);
1683 new_n->slots[slot] = ptr;
1684 goto ascend_old_tree;
1685 }
1686 }
1687
1688 /* Excise any shortcuts we might encounter that point to nodes that
1689 * only contain leaves.
1690 */
1691 ptr = new_n->back_pointer;
1692 if (!ptr)
1693 goto gc_complete;
1694
1695 if (assoc_array_ptr_is_shortcut(ptr)) {
1696 new_s = assoc_array_ptr_to_shortcut(ptr);
1697 new_parent = new_s->back_pointer;
1698 slot = new_s->parent_slot;
1699
1700 if (new_n->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT) {
1701 struct assoc_array_node *n;
1702
1703 pr_devel("excise shortcut\n");
1704 new_n->back_pointer = new_parent;
1705 new_n->parent_slot = slot;
1706 kfree(new_s);
1707 if (!new_parent) {
1708 new_root = assoc_array_node_to_ptr(new_n);
1709 goto gc_complete;
1710 }
1711
1712 n = assoc_array_ptr_to_node(new_parent);
1713 n->slots[slot] = assoc_array_node_to_ptr(new_n);
1714 }
1715 } else {
1716 new_parent = ptr;
1717 }
1718 new_n = assoc_array_ptr_to_node(new_parent);
1719
1720ascend_old_tree:
1721 ptr = node->back_pointer;
1722 if (assoc_array_ptr_is_shortcut(ptr)) {
1723 shortcut = assoc_array_ptr_to_shortcut(ptr);
1724 slot = shortcut->parent_slot;
1725 cursor = shortcut->back_pointer;
1726 } else {
1727 slot = node->parent_slot;
1728 cursor = ptr;
1729 }
1730 BUG_ON(!ptr);
1731 node = assoc_array_ptr_to_node(cursor);
1732 slot++;
1733 goto continue_node;
1734
1735gc_complete:
1736 edit->set[0].to = new_root;
1737 assoc_array_apply_edit(edit);
1738 edit->array->nr_leaves_on_tree = nr_leaves_on_tree;
1739 return 0;
1740
1741enomem:
1742 pr_devel("enomem\n");
1743 assoc_array_destroy_subtree(new_root, edit->ops);
1744 kfree(edit);
1745 return -ENOMEM;
1746}
diff --git a/lib/crc32.c b/lib/crc32.c
index 410093dbe51c..70f00ca5ef1e 100644
--- a/lib/crc32.c
+++ b/lib/crc32.c
@@ -29,6 +29,7 @@
29#include <linux/crc32.h> 29#include <linux/crc32.h>
30#include <linux/module.h> 30#include <linux/module.h>
31#include <linux/types.h> 31#include <linux/types.h>
32#include <linux/sched.h>
32#include "crc32defs.h" 33#include "crc32defs.h"
33 34
34#if CRC_LE_BITS > 8 35#if CRC_LE_BITS > 8
@@ -49,6 +50,30 @@ MODULE_AUTHOR("Matt Domsch <Matt_Domsch@dell.com>");
49MODULE_DESCRIPTION("Various CRC32 calculations"); 50MODULE_DESCRIPTION("Various CRC32 calculations");
50MODULE_LICENSE("GPL"); 51MODULE_LICENSE("GPL");
51 52
53#define GF2_DIM 32
54
55static u32 gf2_matrix_times(u32 *mat, u32 vec)
56{
57 u32 sum = 0;
58
59 while (vec) {
60 if (vec & 1)
61 sum ^= *mat;
62 vec >>= 1;
63 mat++;
64 }
65
66 return sum;
67}
68
69static void gf2_matrix_square(u32 *square, u32 *mat)
70{
71 int i;
72
73 for (i = 0; i < GF2_DIM; i++)
74 square[i] = gf2_matrix_times(mat, mat[i]);
75}
76
52#if CRC_LE_BITS > 8 || CRC_BE_BITS > 8 77#if CRC_LE_BITS > 8 || CRC_BE_BITS > 8
53 78
54/* implements slicing-by-4 or slicing-by-8 algorithm */ 79/* implements slicing-by-4 or slicing-by-8 algorithm */
@@ -130,6 +155,52 @@ crc32_body(u32 crc, unsigned char const *buf, size_t len, const u32 (*tab)[256])
130} 155}
131#endif 156#endif
132 157
158/* For conditions of distribution and use, see copyright notice in zlib.h */
159static u32 crc32_generic_combine(u32 crc1, u32 crc2, size_t len2,
160 u32 polynomial)
161{
162 u32 even[GF2_DIM]; /* Even-power-of-two zeros operator */
163 u32 odd[GF2_DIM]; /* Odd-power-of-two zeros operator */
164 u32 row;
165 int i;
166
167 if (len2 <= 0)
168 return crc1;
169
170 /* Put operator for one zero bit in odd */
171 odd[0] = polynomial;
172 row = 1;
173 for (i = 1; i < GF2_DIM; i++) {
174 odd[i] = row;
175 row <<= 1;
176 }
177
178 gf2_matrix_square(even, odd); /* Put operator for two zero bits in even */
179 gf2_matrix_square(odd, even); /* Put operator for four zero bits in odd */
180
181 /* Apply len2 zeros to crc1 (first square will put the operator for one
182 * zero byte, eight zero bits, in even).
183 */
184 do {
185 /* Apply zeros operator for this bit of len2 */
186 gf2_matrix_square(even, odd);
187 if (len2 & 1)
188 crc1 = gf2_matrix_times(even, crc1);
189 len2 >>= 1;
190 /* If no more bits set, then done */
191 if (len2 == 0)
192 break;
193 /* Another iteration of the loop with odd and even swapped */
194 gf2_matrix_square(odd, even);
195 if (len2 & 1)
196 crc1 = gf2_matrix_times(odd, crc1);
197 len2 >>= 1;
198 } while (len2 != 0);
199
200 crc1 ^= crc2;
201 return crc1;
202}
203
133/** 204/**
134 * crc32_le_generic() - Calculate bitwise little-endian Ethernet AUTODIN II 205 * crc32_le_generic() - Calculate bitwise little-endian Ethernet AUTODIN II
135 * CRC32/CRC32C 206 * CRC32/CRC32C
@@ -200,8 +271,19 @@ u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len)
200 (const u32 (*)[256])crc32ctable_le, CRC32C_POLY_LE); 271 (const u32 (*)[256])crc32ctable_le, CRC32C_POLY_LE);
201} 272}
202#endif 273#endif
274u32 __pure crc32_le_combine(u32 crc1, u32 crc2, size_t len2)
275{
276 return crc32_generic_combine(crc1, crc2, len2, CRCPOLY_LE);
277}
278
279u32 __pure __crc32c_le_combine(u32 crc1, u32 crc2, size_t len2)
280{
281 return crc32_generic_combine(crc1, crc2, len2, CRC32C_POLY_LE);
282}
203EXPORT_SYMBOL(crc32_le); 283EXPORT_SYMBOL(crc32_le);
284EXPORT_SYMBOL(crc32_le_combine);
204EXPORT_SYMBOL(__crc32c_le); 285EXPORT_SYMBOL(__crc32c_le);
286EXPORT_SYMBOL(__crc32c_le_combine);
205 287
206/** 288/**
207 * crc32_be_generic() - Calculate bitwise big-endian Ethernet AUTODIN II CRC32 289 * crc32_be_generic() - Calculate bitwise big-endian Ethernet AUTODIN II CRC32
@@ -795,206 +877,106 @@ static struct crc_test {
795 u32 crc32c_le; /* expected crc32c_le result */ 877 u32 crc32c_le; /* expected crc32c_le result */
796} test[] = 878} test[] =
797{ 879{
798 {0x674bf11d, 0x00000038, 0x00000542, 0x0af6d466, 0xd8b6e4c1, 880 {0x674bf11d, 0x00000038, 0x00000542, 0x0af6d466, 0xd8b6e4c1, 0xf6e93d6c},
799 0xf6e93d6c}, 881 {0x35c672c6, 0x0000003a, 0x000001aa, 0xc6d3dfba, 0x28aaf3ad, 0x0fe92aca},
800 {0x35c672c6, 0x0000003a, 0x000001aa, 0xc6d3dfba, 0x28aaf3ad, 882 {0x496da28e, 0x00000039, 0x000005af, 0xd933660f, 0x5d57e81f, 0x52e1ebb8},
801 0x0fe92aca}, 883 {0x09a9b90e, 0x00000027, 0x000001f8, 0xb45fe007, 0xf45fca9a, 0x0798af9a},
802 {0x496da28e, 0x00000039, 0x000005af, 0xd933660f, 0x5d57e81f, 884 {0xdc97e5a9, 0x00000025, 0x000003b6, 0xf81a3562, 0xe0126ba2, 0x18eb3152},
803 0x52e1ebb8}, 885 {0x47c58900, 0x0000000a, 0x000000b9, 0x8e58eccf, 0xf3afc793, 0xd00d08c7},
804 {0x09a9b90e, 0x00000027, 0x000001f8, 0xb45fe007, 0xf45fca9a, 886 {0x292561e8, 0x0000000c, 0x00000403, 0xa2ba8aaf, 0x0b797aed, 0x8ba966bc},
805 0x0798af9a}, 887 {0x415037f6, 0x00000003, 0x00000676, 0xa17d52e8, 0x7f0fdf35, 0x11d694a2},
806 {0xdc97e5a9, 0x00000025, 0x000003b6, 0xf81a3562, 0xe0126ba2, 888 {0x3466e707, 0x00000026, 0x00000042, 0x258319be, 0x75c484a2, 0x6ab3208d},
807 0x18eb3152}, 889 {0xafd1281b, 0x00000023, 0x000002ee, 0x4428eaf8, 0x06c7ad10, 0xba4603c5},
808 {0x47c58900, 0x0000000a, 0x000000b9, 0x8e58eccf, 0xf3afc793, 890 {0xd3857b18, 0x00000028, 0x000004a2, 0x5c430821, 0xb062b7cb, 0xe6071c6f},
809 0xd00d08c7}, 891 {0x1d825a8f, 0x0000002b, 0x0000050b, 0xd2c45f0c, 0xd68634e0, 0x179ec30a},
810 {0x292561e8, 0x0000000c, 0x00000403, 0xa2ba8aaf, 0x0b797aed, 892 {0x5033e3bc, 0x0000000b, 0x00000078, 0xa3ea4113, 0xac6d31fb, 0x0903beb8},
811 0x8ba966bc}, 893 {0x94f1fb5e, 0x0000000f, 0x000003a2, 0xfbfc50b1, 0x3cfe50ed, 0x6a7cb4fa},
812 {0x415037f6, 0x00000003, 0x00000676, 0xa17d52e8, 0x7f0fdf35, 894 {0xc9a0fe14, 0x00000009, 0x00000473, 0x5fb61894, 0x87070591, 0xdb535801},
813 0x11d694a2}, 895 {0x88a034b1, 0x0000001c, 0x000005ad, 0xc1b16053, 0x46f95c67, 0x92bed597},
814 {0x3466e707, 0x00000026, 0x00000042, 0x258319be, 0x75c484a2, 896 {0xf0f72239, 0x00000020, 0x0000026d, 0xa6fa58f3, 0xf8c2c1dd, 0x192a3f1b},
815 0x6ab3208d}, 897 {0xcc20a5e3, 0x0000003b, 0x0000067a, 0x7740185a, 0x308b979a, 0xccbaec1a},
816 {0xafd1281b, 0x00000023, 0x000002ee, 0x4428eaf8, 0x06c7ad10, 898 {0xce589c95, 0x0000002b, 0x00000641, 0xd055e987, 0x40aae25b, 0x7eabae4d},
817 0xba4603c5}, 899 {0x78edc885, 0x00000035, 0x000005be, 0xa39cb14b, 0x035b0d1f, 0x28c72982},
818 {0xd3857b18, 0x00000028, 0x000004a2, 0x5c430821, 0xb062b7cb, 900 {0x9d40a377, 0x0000003b, 0x00000038, 0x1f47ccd2, 0x197fbc9d, 0xc3cd4d18},
819 0xe6071c6f}, 901 {0x703d0e01, 0x0000003c, 0x000006f1, 0x88735e7c, 0xfed57c5a, 0xbca8f0e7},
820 {0x1d825a8f, 0x0000002b, 0x0000050b, 0xd2c45f0c, 0xd68634e0, 902 {0x776bf505, 0x0000000f, 0x000005b2, 0x5cc4fc01, 0xf32efb97, 0x713f60b3},
821 0x179ec30a}, 903 {0x4a3e7854, 0x00000027, 0x000004b8, 0x8d923c82, 0x0cbfb4a2, 0xebd08fd5},
822 {0x5033e3bc, 0x0000000b, 0x00000078, 0xa3ea4113, 0xac6d31fb, 904 {0x209172dd, 0x0000003b, 0x00000356, 0xb89e9c2b, 0xd7868138, 0x64406c59},
823 0x0903beb8}, 905 {0x3ba4cc5b, 0x0000002f, 0x00000203, 0xe51601a9, 0x5b2a1032, 0x7421890e},
824 {0x94f1fb5e, 0x0000000f, 0x000003a2, 0xfbfc50b1, 0x3cfe50ed, 906 {0xfc62f297, 0x00000000, 0x00000079, 0x71a8e1a2, 0x5d88685f, 0xe9347603},
825 0x6a7cb4fa}, 907 {0x64280b8b, 0x00000016, 0x000007ab, 0x0fa7a30c, 0xda3a455f, 0x1bef9060},
826 {0xc9a0fe14, 0x00000009, 0x00000473, 0x5fb61894, 0x87070591, 908 {0x97dd724b, 0x00000033, 0x000007ad, 0x5788b2f4, 0xd7326d32, 0x34720072},
827 0xdb535801}, 909 {0x61394b52, 0x00000035, 0x00000571, 0xc66525f1, 0xcabe7fef, 0x48310f59},
828 {0x88a034b1, 0x0000001c, 0x000005ad, 0xc1b16053, 0x46f95c67, 910 {0x29b4faff, 0x00000024, 0x0000006e, 0xca13751e, 0x993648e0, 0x783a4213},
829 0x92bed597}, 911 {0x29bfb1dc, 0x0000000b, 0x00000244, 0x436c43f7, 0x429f7a59, 0x9e8efd41},
830 {0xf0f72239, 0x00000020, 0x0000026d, 0xa6fa58f3, 0xf8c2c1dd, 912 {0x86ae934b, 0x00000035, 0x00000104, 0x0760ec93, 0x9cf7d0f4, 0xfc3d34a5},
831 0x192a3f1b}, 913 {0xc4c1024e, 0x0000002e, 0x000006b1, 0x6516a3ec, 0x19321f9c, 0x17a52ae2},
832 {0xcc20a5e3, 0x0000003b, 0x0000067a, 0x7740185a, 0x308b979a, 914 {0x3287a80a, 0x00000026, 0x00000496, 0x0b257eb1, 0x754ebd51, 0x886d935a},
833 0xccbaec1a}, 915 {0xa4db423e, 0x00000023, 0x0000045d, 0x9b3a66dc, 0x873e9f11, 0xeaaeaeb2},
834 {0xce589c95, 0x0000002b, 0x00000641, 0xd055e987, 0x40aae25b, 916 {0x7a1078df, 0x00000015, 0x0000014a, 0x8c2484c5, 0x6a628659, 0x8e900a4b},
835 0x7eabae4d}, 917 {0x6048bd5b, 0x00000006, 0x0000006a, 0x897e3559, 0xac9961af, 0xd74662b1},
836 {0x78edc885, 0x00000035, 0x000005be, 0xa39cb14b, 0x035b0d1f, 918 {0xd8f9ea20, 0x0000003d, 0x00000277, 0x60eb905b, 0xed2aaf99, 0xd26752ba},
837 0x28c72982}, 919 {0xea5ec3b4, 0x0000002a, 0x000004fe, 0x869965dc, 0x6c1f833b, 0x8b1fcd62},
838 {0x9d40a377, 0x0000003b, 0x00000038, 0x1f47ccd2, 0x197fbc9d, 920 {0x2dfb005d, 0x00000016, 0x00000345, 0x6a3b117e, 0xf05e8521, 0xf54342fe},
839 0xc3cd4d18}, 921 {0x5a214ade, 0x00000020, 0x000005b6, 0x467f70be, 0xcb22ccd3, 0x5b95b988},
840 {0x703d0e01, 0x0000003c, 0x000006f1, 0x88735e7c, 0xfed57c5a, 922 {0xf0ab9cca, 0x00000032, 0x00000515, 0xed223df3, 0x7f3ef01d, 0x2e1176be},
841 0xbca8f0e7}, 923 {0x91b444f9, 0x0000002e, 0x000007f8, 0x84e9a983, 0x5676756f, 0x66120546},
842 {0x776bf505, 0x0000000f, 0x000005b2, 0x5cc4fc01, 0xf32efb97, 924 {0x1b5d2ddb, 0x0000002e, 0x0000012c, 0xba638c4c, 0x3f42047b, 0xf256a5cc},
843 0x713f60b3}, 925 {0xd824d1bb, 0x0000003a, 0x000007b5, 0x6288653b, 0x3a3ebea0, 0x4af1dd69},
844 {0x4a3e7854, 0x00000027, 0x000004b8, 0x8d923c82, 0x0cbfb4a2, 926 {0x0470180c, 0x00000034, 0x000001f0, 0x9d5b80d6, 0x3de08195, 0x56f0a04a},
845 0xebd08fd5}, 927 {0xffaa3a3f, 0x00000036, 0x00000299, 0xf3a82ab8, 0x53e0c13d, 0x74f6b6b2},
846 {0x209172dd, 0x0000003b, 0x00000356, 0xb89e9c2b, 0xd7868138, 928 {0x6406cfeb, 0x00000023, 0x00000600, 0xa920b8e8, 0xe4e2acf4, 0x085951fd},
847 0x64406c59}, 929 {0xb24aaa38, 0x0000003e, 0x000004a1, 0x657cc328, 0x5077b2c3, 0xc65387eb},
848 {0x3ba4cc5b, 0x0000002f, 0x00000203, 0xe51601a9, 0x5b2a1032, 930 {0x58b2ab7c, 0x00000039, 0x000002b4, 0x3a17ee7e, 0x9dcb3643, 0x1ca9257b},
849 0x7421890e}, 931 {0x3db85970, 0x00000006, 0x000002b6, 0x95268b59, 0xb9812c10, 0xfd196d76},
850 {0xfc62f297, 0x00000000, 0x00000079, 0x71a8e1a2, 0x5d88685f, 932 {0x857830c5, 0x00000003, 0x00000590, 0x4ef439d5, 0xf042161d, 0x5ef88339},
851 0xe9347603}, 933 {0xe1fcd978, 0x0000003e, 0x000007d8, 0xae8d8699, 0xce0a1ef5, 0x2c3714d9},
852 {0x64280b8b, 0x00000016, 0x000007ab, 0x0fa7a30c, 0xda3a455f, 934 {0xb982a768, 0x00000016, 0x000006e0, 0x62fad3df, 0x5f8a067b, 0x58576548},
853 0x1bef9060}, 935 {0x1d581ce8, 0x0000001e, 0x0000058b, 0xf0f5da53, 0x26e39eee, 0xfd7c57de},
854 {0x97dd724b, 0x00000033, 0x000007ad, 0x5788b2f4, 0xd7326d32, 936 {0x2456719b, 0x00000025, 0x00000503, 0x4296ac64, 0xd50e4c14, 0xd5fedd59},
855 0x34720072}, 937 {0xfae6d8f2, 0x00000000, 0x0000055d, 0x057fdf2e, 0x2a31391a, 0x1cc3b17b},
856 {0x61394b52, 0x00000035, 0x00000571, 0xc66525f1, 0xcabe7fef, 938 {0xcba828e3, 0x00000039, 0x000002ce, 0xe3f22351, 0x8f00877b, 0x270eed73},
857 0x48310f59}, 939 {0x13d25952, 0x0000000a, 0x0000072d, 0x76d4b4cc, 0x5eb67ec3, 0x91ecbb11},
858 {0x29b4faff, 0x00000024, 0x0000006e, 0xca13751e, 0x993648e0, 940 {0x0342be3f, 0x00000015, 0x00000599, 0xec75d9f1, 0x9d4d2826, 0x05ed8d0c},
859 0x783a4213}, 941 {0xeaa344e0, 0x00000014, 0x000004d8, 0x72a4c981, 0x2064ea06, 0x0b09ad5b},
860 {0x29bfb1dc, 0x0000000b, 0x00000244, 0x436c43f7, 0x429f7a59, 942 {0xbbb52021, 0x0000003b, 0x00000272, 0x04af99fc, 0xaf042d35, 0xf8d511fb},
861 0x9e8efd41}, 943 {0xb66384dc, 0x0000001d, 0x000007fc, 0xd7629116, 0x782bd801, 0x5ad832cc},
862 {0x86ae934b, 0x00000035, 0x00000104, 0x0760ec93, 0x9cf7d0f4, 944 {0x616c01b6, 0x00000022, 0x000002c8, 0x5b1dab30, 0x783ce7d2, 0x1214d196},
863 0xfc3d34a5}, 945 {0xce2bdaad, 0x00000016, 0x0000062a, 0x932535c8, 0x3f02926d, 0x5747218a},
864 {0xc4c1024e, 0x0000002e, 0x000006b1, 0x6516a3ec, 0x19321f9c, 946 {0x00fe84d7, 0x00000005, 0x00000205, 0x850e50aa, 0x753d649c, 0xde8f14de},
865 0x17a52ae2}, 947 {0xbebdcb4c, 0x00000006, 0x0000055d, 0xbeaa37a2, 0x2d8c9eba, 0x3563b7b9},
866 {0x3287a80a, 0x00000026, 0x00000496, 0x0b257eb1, 0x754ebd51, 948 {0xd8b1a02a, 0x00000010, 0x00000387, 0x5017d2fc, 0x503541a5, 0x071475d0},
867 0x886d935a}, 949 {0x3b96cad2, 0x00000036, 0x00000347, 0x1d2372ae, 0x926cd90b, 0x54c79d60},
868 {0xa4db423e, 0x00000023, 0x0000045d, 0x9b3a66dc, 0x873e9f11, 950 {0xc94c1ed7, 0x00000005, 0x0000038b, 0x9e9fdb22, 0x144a9178, 0x4c53eee6},
869 0xeaaeaeb2}, 951 {0x1aad454e, 0x00000025, 0x000002b2, 0xc3f6315c, 0x5c7a35b3, 0x10137a3c},
870 {0x7a1078df, 0x00000015, 0x0000014a, 0x8c2484c5, 0x6a628659, 952 {0xa4fec9a6, 0x00000000, 0x000006d6, 0x90be5080, 0xa4107605, 0xaa9d6c73},
871 0x8e900a4b}, 953 {0x1bbe71e2, 0x0000001f, 0x000002fd, 0x4e504c3b, 0x284ccaf1, 0xb63d23e7},
872 {0x6048bd5b, 0x00000006, 0x0000006a, 0x897e3559, 0xac9961af, 954 {0x4201c7e4, 0x00000002, 0x000002b7, 0x7822e3f9, 0x0cc912a9, 0x7f53e9cf},
873 0xd74662b1}, 955 {0x23fddc96, 0x00000003, 0x00000627, 0x8a385125, 0x07767e78, 0x13c1cd83},
874 {0xd8f9ea20, 0x0000003d, 0x00000277, 0x60eb905b, 0xed2aaf99, 956 {0xd82ba25c, 0x00000016, 0x0000063e, 0x98e4148a, 0x283330c9, 0x49ff5867},
875 0xd26752ba}, 957 {0x786f2032, 0x0000002d, 0x0000060f, 0xf201600a, 0xf561bfcd, 0x8467f211},
876 {0xea5ec3b4, 0x0000002a, 0x000004fe, 0x869965dc, 0x6c1f833b, 958 {0xfebe4e1f, 0x0000002a, 0x000004f2, 0x95e51961, 0xfd80dcab, 0x3f9683b2},
877 0x8b1fcd62}, 959 {0x1a6e0a39, 0x00000008, 0x00000672, 0x8af6c2a5, 0x78dd84cb, 0x76a3f874},
878 {0x2dfb005d, 0x00000016, 0x00000345, 0x6a3b117e, 0xf05e8521, 960 {0x56000ab8, 0x0000000e, 0x000000e5, 0x36bacb8f, 0x22ee1f77, 0x863b702f},
879 0xf54342fe}, 961 {0x4717fe0c, 0x00000000, 0x000006ec, 0x8439f342, 0x5c8e03da, 0xdc6c58ff},
880 {0x5a214ade, 0x00000020, 0x000005b6, 0x467f70be, 0xcb22ccd3, 962 {0xd5d5d68e, 0x0000003c, 0x000003a3, 0x46fff083, 0x177d1b39, 0x0622cc95},
881 0x5b95b988}, 963 {0xc25dd6c6, 0x00000024, 0x000006c0, 0x5ceb8eb4, 0x892b0d16, 0xe85605cd},
882 {0xf0ab9cca, 0x00000032, 0x00000515, 0xed223df3, 0x7f3ef01d, 964 {0xe9b11300, 0x00000023, 0x00000683, 0x07a5d59a, 0x6c6a3208, 0x31da5f06},
883 0x2e1176be}, 965 {0x95cd285e, 0x00000001, 0x00000047, 0x7b3a4368, 0x0202c07e, 0xa1f2e784},
884 {0x91b444f9, 0x0000002e, 0x000007f8, 0x84e9a983, 0x5676756f, 966 {0xd9245a25, 0x0000001e, 0x000003a6, 0xd33c1841, 0x1936c0d5, 0xb07cc616},
885 0x66120546}, 967 {0x103279db, 0x00000006, 0x0000039b, 0xca09b8a0, 0x77d62892, 0xbf943b6c},
886 {0x1b5d2ddb, 0x0000002e, 0x0000012c, 0xba638c4c, 0x3f42047b, 968 {0x1cba3172, 0x00000027, 0x000001c8, 0xcb377194, 0xebe682db, 0x2c01af1c},
887 0xf256a5cc}, 969 {0x8f613739, 0x0000000c, 0x000001df, 0xb4b0bc87, 0x7710bd43, 0x0fe5f56d},
888 {0xd824d1bb, 0x0000003a, 0x000007b5, 0x6288653b, 0x3a3ebea0, 970 {0x1c6aa90d, 0x0000001b, 0x0000053c, 0x70559245, 0xda7894ac, 0xf8943b2d},
889 0x4af1dd69}, 971 {0xaabe5b93, 0x0000003d, 0x00000715, 0xcdbf42fa, 0x0c3b99e7, 0xe4d89272},
890 {0x0470180c, 0x00000034, 0x000001f0, 0x9d5b80d6, 0x3de08195, 972 {0xf15dd038, 0x00000006, 0x000006db, 0x6e104aea, 0x8d5967f2, 0x7c2f6bbb},
891 0x56f0a04a}, 973 {0x584dd49c, 0x00000020, 0x000007bc, 0x36b6cfd6, 0xad4e23b2, 0xabbf388b},
892 {0xffaa3a3f, 0x00000036, 0x00000299, 0xf3a82ab8, 0x53e0c13d, 974 {0x5d8c9506, 0x00000020, 0x00000470, 0x4c62378e, 0x31d92640, 0x1dca1f4e},
893 0x74f6b6b2}, 975 {0xb80d17b0, 0x00000032, 0x00000346, 0x22a5bb88, 0x9a7ec89f, 0x5c170e23},
894 {0x6406cfeb, 0x00000023, 0x00000600, 0xa920b8e8, 0xe4e2acf4, 976 {0xdaf0592e, 0x00000023, 0x000007b0, 0x3cab3f99, 0x9b1fdd99, 0xc0e9d672},
895 0x085951fd}, 977 {0x4793cc85, 0x0000000d, 0x00000706, 0xe82e04f6, 0xed3db6b7, 0xc18bdc86},
896 {0xb24aaa38, 0x0000003e, 0x000004a1, 0x657cc328, 0x5077b2c3, 978 {0x82ebf64e, 0x00000009, 0x000007c3, 0x69d590a9, 0x9efa8499, 0xa874fcdd},
897 0xc65387eb}, 979 {0xb18a0319, 0x00000026, 0x000007db, 0x1cf98dcc, 0x8fa9ad6a, 0x9dc0bb48},
898 {0x58b2ab7c, 0x00000039, 0x000002b4, 0x3a17ee7e, 0x9dcb3643,
899 0x1ca9257b},
900 {0x3db85970, 0x00000006, 0x000002b6, 0x95268b59, 0xb9812c10,
901 0xfd196d76},
902 {0x857830c5, 0x00000003, 0x00000590, 0x4ef439d5, 0xf042161d,
903 0x5ef88339},
904 {0xe1fcd978, 0x0000003e, 0x000007d8, 0xae8d8699, 0xce0a1ef5,
905 0x2c3714d9},
906 {0xb982a768, 0x00000016, 0x000006e0, 0x62fad3df, 0x5f8a067b,
907 0x58576548},
908 {0x1d581ce8, 0x0000001e, 0x0000058b, 0xf0f5da53, 0x26e39eee,
909 0xfd7c57de},
910 {0x2456719b, 0x00000025, 0x00000503, 0x4296ac64, 0xd50e4c14,
911 0xd5fedd59},
912 {0xfae6d8f2, 0x00000000, 0x0000055d, 0x057fdf2e, 0x2a31391a,
913 0x1cc3b17b},
914 {0xcba828e3, 0x00000039, 0x000002ce, 0xe3f22351, 0x8f00877b,
915 0x270eed73},
916 {0x13d25952, 0x0000000a, 0x0000072d, 0x76d4b4cc, 0x5eb67ec3,
917 0x91ecbb11},
918 {0x0342be3f, 0x00000015, 0x00000599, 0xec75d9f1, 0x9d4d2826,
919 0x05ed8d0c},
920 {0xeaa344e0, 0x00000014, 0x000004d8, 0x72a4c981, 0x2064ea06,
921 0x0b09ad5b},
922 {0xbbb52021, 0x0000003b, 0x00000272, 0x04af99fc, 0xaf042d35,
923 0xf8d511fb},
924 {0xb66384dc, 0x0000001d, 0x000007fc, 0xd7629116, 0x782bd801,
925 0x5ad832cc},
926 {0x616c01b6, 0x00000022, 0x000002c8, 0x5b1dab30, 0x783ce7d2,
927 0x1214d196},
928 {0xce2bdaad, 0x00000016, 0x0000062a, 0x932535c8, 0x3f02926d,
929 0x5747218a},
930 {0x00fe84d7, 0x00000005, 0x00000205, 0x850e50aa, 0x753d649c,
931 0xde8f14de},
932 {0xbebdcb4c, 0x00000006, 0x0000055d, 0xbeaa37a2, 0x2d8c9eba,
933 0x3563b7b9},
934 {0xd8b1a02a, 0x00000010, 0x00000387, 0x5017d2fc, 0x503541a5,
935 0x071475d0},
936 {0x3b96cad2, 0x00000036, 0x00000347, 0x1d2372ae, 0x926cd90b,
937 0x54c79d60},
938 {0xc94c1ed7, 0x00000005, 0x0000038b, 0x9e9fdb22, 0x144a9178,
939 0x4c53eee6},
940 {0x1aad454e, 0x00000025, 0x000002b2, 0xc3f6315c, 0x5c7a35b3,
941 0x10137a3c},
942 {0xa4fec9a6, 0x00000000, 0x000006d6, 0x90be5080, 0xa4107605,
943 0xaa9d6c73},
944 {0x1bbe71e2, 0x0000001f, 0x000002fd, 0x4e504c3b, 0x284ccaf1,
945 0xb63d23e7},
946 {0x4201c7e4, 0x00000002, 0x000002b7, 0x7822e3f9, 0x0cc912a9,
947 0x7f53e9cf},
948 {0x23fddc96, 0x00000003, 0x00000627, 0x8a385125, 0x07767e78,
949 0x13c1cd83},
950 {0xd82ba25c, 0x00000016, 0x0000063e, 0x98e4148a, 0x283330c9,
951 0x49ff5867},
952 {0x786f2032, 0x0000002d, 0x0000060f, 0xf201600a, 0xf561bfcd,
953 0x8467f211},
954 {0xfebe4e1f, 0x0000002a, 0x000004f2, 0x95e51961, 0xfd80dcab,
955 0x3f9683b2},
956 {0x1a6e0a39, 0x00000008, 0x00000672, 0x8af6c2a5, 0x78dd84cb,
957 0x76a3f874},
958 {0x56000ab8, 0x0000000e, 0x000000e5, 0x36bacb8f, 0x22ee1f77,
959 0x863b702f},
960 {0x4717fe0c, 0x00000000, 0x000006ec, 0x8439f342, 0x5c8e03da,
961 0xdc6c58ff},
962 {0xd5d5d68e, 0x0000003c, 0x000003a3, 0x46fff083, 0x177d1b39,
963 0x0622cc95},
964 {0xc25dd6c6, 0x00000024, 0x000006c0, 0x5ceb8eb4, 0x892b0d16,
965 0xe85605cd},
966 {0xe9b11300, 0x00000023, 0x00000683, 0x07a5d59a, 0x6c6a3208,
967 0x31da5f06},
968 {0x95cd285e, 0x00000001, 0x00000047, 0x7b3a4368, 0x0202c07e,
969 0xa1f2e784},
970 {0xd9245a25, 0x0000001e, 0x000003a6, 0xd33c1841, 0x1936c0d5,
971 0xb07cc616},
972 {0x103279db, 0x00000006, 0x0000039b, 0xca09b8a0, 0x77d62892,
973 0xbf943b6c},
974 {0x1cba3172, 0x00000027, 0x000001c8, 0xcb377194, 0xebe682db,
975 0x2c01af1c},
976 {0x8f613739, 0x0000000c, 0x000001df, 0xb4b0bc87, 0x7710bd43,
977 0x0fe5f56d},
978 {0x1c6aa90d, 0x0000001b, 0x0000053c, 0x70559245, 0xda7894ac,
979 0xf8943b2d},
980 {0xaabe5b93, 0x0000003d, 0x00000715, 0xcdbf42fa, 0x0c3b99e7,
981 0xe4d89272},
982 {0xf15dd038, 0x00000006, 0x000006db, 0x6e104aea, 0x8d5967f2,
983 0x7c2f6bbb},
984 {0x584dd49c, 0x00000020, 0x000007bc, 0x36b6cfd6, 0xad4e23b2,
985 0xabbf388b},
986 {0x5d8c9506, 0x00000020, 0x00000470, 0x4c62378e, 0x31d92640,
987 0x1dca1f4e},
988 {0xb80d17b0, 0x00000032, 0x00000346, 0x22a5bb88, 0x9a7ec89f,
989 0x5c170e23},
990 {0xdaf0592e, 0x00000023, 0x000007b0, 0x3cab3f99, 0x9b1fdd99,
991 0xc0e9d672},
992 {0x4793cc85, 0x0000000d, 0x00000706, 0xe82e04f6, 0xed3db6b7,
993 0xc18bdc86},
994 {0x82ebf64e, 0x00000009, 0x000007c3, 0x69d590a9, 0x9efa8499,
995 0xa874fcdd},
996 {0xb18a0319, 0x00000026, 0x000007db, 0x1cf98dcc, 0x8fa9ad6a,
997 0x9dc0bb48},
998}; 980};
999 981
1000#include <linux/time.h> 982#include <linux/time.h>
@@ -1050,6 +1032,41 @@ static int __init crc32c_test(void)
1050 return 0; 1032 return 0;
1051} 1033}
1052 1034
1035static int __init crc32c_combine_test(void)
1036{
1037 int i, j;
1038 int errors = 0, runs = 0;
1039
1040 for (i = 0; i < 10; i++) {
1041 u32 crc_full;
1042
1043 crc_full = __crc32c_le(test[i].crc, test_buf + test[i].start,
1044 test[i].length);
1045 for (j = 0; j <= test[i].length; ++j) {
1046 u32 crc1, crc2;
1047 u32 len1 = j, len2 = test[i].length - j;
1048
1049 crc1 = __crc32c_le(test[i].crc, test_buf +
1050 test[i].start, len1);
1051 crc2 = __crc32c_le(0, test_buf + test[i].start +
1052 len1, len2);
1053
1054 if (!(crc_full == __crc32c_le_combine(crc1, crc2, len2) &&
1055 crc_full == test[i].crc32c_le))
1056 errors++;
1057 runs++;
1058 cond_resched();
1059 }
1060 }
1061
1062 if (errors)
1063 pr_warn("crc32c_combine: %d/%d self tests failed\n", errors, runs);
1064 else
1065 pr_info("crc32c_combine: %d self tests passed\n", runs);
1066
1067 return 0;
1068}
1069
1053static int __init crc32_test(void) 1070static int __init crc32_test(void)
1054{ 1071{
1055 int i; 1072 int i;
@@ -1109,10 +1126,49 @@ static int __init crc32_test(void)
1109 return 0; 1126 return 0;
1110} 1127}
1111 1128
1129static int __init crc32_combine_test(void)
1130{
1131 int i, j;
1132 int errors = 0, runs = 0;
1133
1134 for (i = 0; i < 10; i++) {
1135 u32 crc_full;
1136
1137 crc_full = crc32_le(test[i].crc, test_buf + test[i].start,
1138 test[i].length);
1139 for (j = 0; j <= test[i].length; ++j) {
1140 u32 crc1, crc2;
1141 u32 len1 = j, len2 = test[i].length - j;
1142
1143 crc1 = crc32_le(test[i].crc, test_buf +
1144 test[i].start, len1);
1145 crc2 = crc32_le(0, test_buf + test[i].start +
1146 len1, len2);
1147
1148 if (!(crc_full == crc32_le_combine(crc1, crc2, len2) &&
1149 crc_full == test[i].crc_le))
1150 errors++;
1151 runs++;
1152 cond_resched();
1153 }
1154 }
1155
1156 if (errors)
1157 pr_warn("crc32_combine: %d/%d self tests failed\n", errors, runs);
1158 else
1159 pr_info("crc32_combine: %d self tests passed\n", runs);
1160
1161 return 0;
1162}
1163
1112static int __init crc32test_init(void) 1164static int __init crc32test_init(void)
1113{ 1165{
1114 crc32_test(); 1166 crc32_test();
1115 crc32c_test(); 1167 crc32c_test();
1168
1169 crc32_combine_test();
1170 crc32c_combine_test();
1171
1116 return 0; 1172 return 0;
1117} 1173}
1118 1174
diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index bf2c8b1043d8..e0731c3db706 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -196,7 +196,7 @@ static void free_object(struct debug_obj *obj)
196 * initialized: 196 * initialized:
197 */ 197 */
198 if (obj_pool_free > ODEBUG_POOL_SIZE && obj_cache) 198 if (obj_pool_free > ODEBUG_POOL_SIZE && obj_cache)
199 sched = keventd_up() && !work_pending(&debug_obj_work); 199 sched = keventd_up();
200 hlist_add_head(&obj->node, &obj_pool); 200 hlist_add_head(&obj->node, &obj_pool);
201 obj_pool_free++; 201 obj_pool_free++;
202 obj_pool_used--; 202 obj_pool_used--;
diff --git a/lib/digsig.c b/lib/digsig.c
index 2f31e6a45f0a..8793aeda30ca 100644
--- a/lib/digsig.c
+++ b/lib/digsig.c
@@ -209,7 +209,7 @@ int digsig_verify(struct key *keyring, const char *sig, int siglen,
209 kref = keyring_search(make_key_ref(keyring, 1UL), 209 kref = keyring_search(make_key_ref(keyring, 1UL),
210 &key_type_user, name); 210 &key_type_user, name);
211 if (IS_ERR(kref)) 211 if (IS_ERR(kref))
212 key = ERR_PTR(PTR_ERR(kref)); 212 key = ERR_CAST(kref);
213 else 213 else
214 key = key_ref_to_ptr(kref); 214 key = key_ref_to_ptr(kref);
215 } else { 215 } else {
diff --git a/lib/genalloc.c b/lib/genalloc.c
index 26cf20be72b7..dda31168844f 100644
--- a/lib/genalloc.c
+++ b/lib/genalloc.c
@@ -313,6 +313,34 @@ retry:
313EXPORT_SYMBOL(gen_pool_alloc); 313EXPORT_SYMBOL(gen_pool_alloc);
314 314
315/** 315/**
316 * gen_pool_dma_alloc - allocate special memory from the pool for DMA usage
317 * @pool: pool to allocate from
318 * @size: number of bytes to allocate from the pool
319 * @dma: dma-view physical address
320 *
321 * Allocate the requested number of bytes from the specified pool.
322 * Uses the pool allocation function (with first-fit algorithm by default).
323 * Can not be used in NMI handler on architectures without
324 * NMI-safe cmpxchg implementation.
325 */
326void *gen_pool_dma_alloc(struct gen_pool *pool, size_t size, dma_addr_t *dma)
327{
328 unsigned long vaddr;
329
330 if (!pool)
331 return NULL;
332
333 vaddr = gen_pool_alloc(pool, size);
334 if (!vaddr)
335 return NULL;
336
337 *dma = gen_pool_virt_to_phys(pool, vaddr);
338
339 return (void *)vaddr;
340}
341EXPORT_SYMBOL(gen_pool_dma_alloc);
342
343/**
316 * gen_pool_free - free allocated special memory back to the pool 344 * gen_pool_free - free allocated special memory back to the pool
317 * @pool: pool to free to 345 * @pool: pool to free to
318 * @addr: starting address of memory to free back to pool 346 * @addr: starting address of memory to free back to pool
diff --git a/lib/kfifo.c b/lib/kfifo.c
index 7b7f83027b7b..d79b9d222065 100644
--- a/lib/kfifo.c
+++ b/lib/kfifo.c
@@ -215,7 +215,7 @@ static unsigned long kfifo_copy_from_user(struct __kfifo *fifo,
215 * incrementing the fifo->in index counter 215 * incrementing the fifo->in index counter
216 */ 216 */
217 smp_wmb(); 217 smp_wmb();
218 *copied = len - ret; 218 *copied = len - ret * esize;
219 /* return the number of elements which are not copied */ 219 /* return the number of elements which are not copied */
220 return ret; 220 return ret;
221} 221}
@@ -275,7 +275,7 @@ static unsigned long kfifo_copy_to_user(struct __kfifo *fifo, void __user *to,
275 * incrementing the fifo->out index counter 275 * incrementing the fifo->out index counter
276 */ 276 */
277 smp_wmb(); 277 smp_wmb();
278 *copied = len - ret; 278 *copied = len - ret * esize;
279 /* return the number of elements which are not copied */ 279 /* return the number of elements which are not copied */
280 return ret; 280 return ret;
281} 281}
diff --git a/lib/kobject.c b/lib/kobject.c
index 084f7b18d0c0..5b4b8886435e 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -13,11 +13,30 @@
13 */ 13 */
14 14
15#include <linux/kobject.h> 15#include <linux/kobject.h>
16#include <linux/kobj_completion.h>
16#include <linux/string.h> 17#include <linux/string.h>
17#include <linux/export.h> 18#include <linux/export.h>
18#include <linux/stat.h> 19#include <linux/stat.h>
19#include <linux/slab.h> 20#include <linux/slab.h>
20 21
22/**
23 * kobject_namespace - return @kobj's namespace tag
24 * @kobj: kobject in question
25 *
26 * Returns namespace tag of @kobj if its parent has namespace ops enabled
27 * and thus @kobj should have a namespace tag associated with it. Returns
28 * %NULL otherwise.
29 */
30const void *kobject_namespace(struct kobject *kobj)
31{
32 const struct kobj_ns_type_operations *ns_ops = kobj_ns_ops(kobj);
33
34 if (!ns_ops || ns_ops->type == KOBJ_NS_TYPE_NONE)
35 return NULL;
36
37 return kobj->ktype->namespace(kobj);
38}
39
21/* 40/*
22 * populate_dir - populate directory with attributes. 41 * populate_dir - populate directory with attributes.
23 * @kobj: object we're working on. 42 * @kobj: object we're working on.
@@ -46,13 +65,21 @@ static int populate_dir(struct kobject *kobj)
46 65
47static int create_dir(struct kobject *kobj) 66static int create_dir(struct kobject *kobj)
48{ 67{
49 int error = 0; 68 int error;
50 error = sysfs_create_dir(kobj); 69
70 error = sysfs_create_dir_ns(kobj, kobject_namespace(kobj));
51 if (!error) { 71 if (!error) {
52 error = populate_dir(kobj); 72 error = populate_dir(kobj);
53 if (error) 73 if (error)
54 sysfs_remove_dir(kobj); 74 sysfs_remove_dir(kobj);
55 } 75 }
76
77 /*
78 * @kobj->sd may be deleted by an ancestor going away. Hold an
79 * extra reference so that it stays until @kobj is gone.
80 */
81 sysfs_get(kobj->sd);
82
56 return error; 83 return error;
57} 84}
58 85
@@ -428,7 +455,7 @@ int kobject_rename(struct kobject *kobj, const char *new_name)
428 goto out; 455 goto out;
429 } 456 }
430 457
431 error = sysfs_rename_dir(kobj, new_name); 458 error = sysfs_rename_dir_ns(kobj, new_name, kobject_namespace(kobj));
432 if (error) 459 if (error)
433 goto out; 460 goto out;
434 461
@@ -472,6 +499,7 @@ int kobject_move(struct kobject *kobj, struct kobject *new_parent)
472 if (kobj->kset) 499 if (kobj->kset)
473 new_parent = kobject_get(&kobj->kset->kobj); 500 new_parent = kobject_get(&kobj->kset->kobj);
474 } 501 }
502
475 /* old object path */ 503 /* old object path */
476 devpath = kobject_get_path(kobj, GFP_KERNEL); 504 devpath = kobject_get_path(kobj, GFP_KERNEL);
477 if (!devpath) { 505 if (!devpath) {
@@ -486,7 +514,7 @@ int kobject_move(struct kobject *kobj, struct kobject *new_parent)
486 sprintf(devpath_string, "DEVPATH_OLD=%s", devpath); 514 sprintf(devpath_string, "DEVPATH_OLD=%s", devpath);
487 envp[0] = devpath_string; 515 envp[0] = devpath_string;
488 envp[1] = NULL; 516 envp[1] = NULL;
489 error = sysfs_move_dir(kobj, new_parent); 517 error = sysfs_move_dir_ns(kobj, new_parent, kobject_namespace(kobj));
490 if (error) 518 if (error)
491 goto out; 519 goto out;
492 old_parent = kobj->parent; 520 old_parent = kobj->parent;
@@ -508,10 +536,15 @@ out:
508 */ 536 */
509void kobject_del(struct kobject *kobj) 537void kobject_del(struct kobject *kobj)
510{ 538{
539 struct sysfs_dirent *sd;
540
511 if (!kobj) 541 if (!kobj)
512 return; 542 return;
513 543
544 sd = kobj->sd;
514 sysfs_remove_dir(kobj); 545 sysfs_remove_dir(kobj);
546 sysfs_put(sd);
547
515 kobj->state_in_sysfs = 0; 548 kobj->state_in_sysfs = 0;
516 kobj_kset_leave(kobj); 549 kobj_kset_leave(kobj);
517 kobject_put(kobj->parent); 550 kobject_put(kobj->parent);
@@ -727,6 +760,55 @@ const struct sysfs_ops kobj_sysfs_ops = {
727}; 760};
728 761
729/** 762/**
763 * kobj_completion_init - initialize a kobj_completion object.
764 * @kc: kobj_completion
765 * @ktype: type of kobject to initialize
766 *
767 * kobj_completion structures can be embedded within structures with different
768 * lifetime rules. During the release of the enclosing object, we can
769 * wait on the release of the kobject so that we don't free it while it's
770 * still busy.
771 */
772void kobj_completion_init(struct kobj_completion *kc, struct kobj_type *ktype)
773{
774 init_completion(&kc->kc_unregister);
775 kobject_init(&kc->kc_kobj, ktype);
776}
777EXPORT_SYMBOL_GPL(kobj_completion_init);
778
779/**
780 * kobj_completion_release - release a kobj_completion object
781 * @kobj: kobject embedded in kobj_completion
782 *
783 * Used with kobject_release to notify waiters that the kobject has been
784 * released.
785 */
786void kobj_completion_release(struct kobject *kobj)
787{
788 struct kobj_completion *kc = kobj_to_kobj_completion(kobj);
789 complete(&kc->kc_unregister);
790}
791EXPORT_SYMBOL_GPL(kobj_completion_release);
792
793/**
794 * kobj_completion_del_and_wait - release the kobject and wait for it
795 * @kc: kobj_completion object to release
796 *
797 * Delete the kobject from sysfs and drop the reference count. Then wait
798 * until any other outstanding references are also dropped. This routine
799 * is only necessary once other references may have been taken on the
800 * kobject. Typically this happens when the kobject has been published
801 * to sysfs via kobject_add.
802 */
803void kobj_completion_del_and_wait(struct kobj_completion *kc)
804{
805 kobject_del(&kc->kc_kobj);
806 kobject_put(&kc->kc_kobj);
807 wait_for_completion(&kc->kc_unregister);
808}
809EXPORT_SYMBOL_GPL(kobj_completion_del_and_wait);
810
811/**
730 * kset_register - initialize and add a kset. 812 * kset_register - initialize and add a kset.
731 * @k: kset. 813 * @k: kset.
732 */ 814 */
diff --git a/lib/llist.c b/lib/llist.c
index 4a70d120138c..f76196d07409 100644
--- a/lib/llist.c
+++ b/lib/llist.c
@@ -81,3 +81,25 @@ struct llist_node *llist_del_first(struct llist_head *head)
81 return entry; 81 return entry;
82} 82}
83EXPORT_SYMBOL_GPL(llist_del_first); 83EXPORT_SYMBOL_GPL(llist_del_first);
84
85/**
86 * llist_reverse_order - reverse order of a llist chain
87 * @head: first item of the list to be reversed
88 *
89 * Reverse the order of a chain of llist entries and return the
90 * new first entry.
91 */
92struct llist_node *llist_reverse_order(struct llist_node *head)
93{
94 struct llist_node *new_head = NULL;
95
96 while (head) {
97 struct llist_node *tmp = head;
98 head = head->next;
99 tmp->next = new_head;
100 new_head = tmp;
101 }
102
103 return new_head;
104}
105EXPORT_SYMBOL_GPL(llist_reverse_order);
diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c
index 6dc09d8f4c24..872a15a2a637 100644
--- a/lib/locking-selftest.c
+++ b/lib/locking-selftest.c
@@ -1002,7 +1002,7 @@ static void dotest(void (*testcase_fn)(void), int expected, int lockclass_mask)
1002 * Some tests (e.g. double-unlock) might corrupt the preemption 1002 * Some tests (e.g. double-unlock) might corrupt the preemption
1003 * count, so restore it: 1003 * count, so restore it:
1004 */ 1004 */
1005 preempt_count() = saved_preempt_count; 1005 preempt_count_set(saved_preempt_count);
1006#ifdef CONFIG_TRACE_IRQFLAGS 1006#ifdef CONFIG_TRACE_IRQFLAGS
1007 if (softirq_count()) 1007 if (softirq_count())
1008 current->softirqs_enabled = 0; 1008 current->softirqs_enabled = 0;
diff --git a/lib/lockref.c b/lib/lockref.c
index 6f9d434c1521..d2b123f8456b 100644
--- a/lib/lockref.c
+++ b/lib/lockref.c
@@ -1,7 +1,7 @@
1#include <linux/export.h> 1#include <linux/export.h>
2#include <linux/lockref.h> 2#include <linux/lockref.h>
3 3
4#ifdef CONFIG_CMPXCHG_LOCKREF 4#if USE_CMPXCHG_LOCKREF
5 5
6/* 6/*
7 * Allow weakly-ordered memory architectures to provide barrier-less 7 * Allow weakly-ordered memory architectures to provide barrier-less
@@ -153,6 +153,7 @@ void lockref_mark_dead(struct lockref *lockref)
153 assert_spin_locked(&lockref->lock); 153 assert_spin_locked(&lockref->lock);
154 lockref->count = -128; 154 lockref->count = -128;
155} 155}
156EXPORT_SYMBOL(lockref_mark_dead);
156 157
157/** 158/**
158 * lockref_get_not_dead - Increments count unless the ref is dead 159 * lockref_get_not_dead - Increments count unless the ref is dead
diff --git a/lib/mpi/mpiutil.c b/lib/mpi/mpiutil.c
index 657979f71bef..bf076d281d40 100644
--- a/lib/mpi/mpiutil.c
+++ b/lib/mpi/mpiutil.c
@@ -121,3 +121,6 @@ void mpi_free(MPI a)
121 kfree(a); 121 kfree(a);
122} 122}
123EXPORT_SYMBOL_GPL(mpi_free); 123EXPORT_SYMBOL_GPL(mpi_free);
124
125MODULE_DESCRIPTION("Multiprecision maths library");
126MODULE_LICENSE("GPL");
diff --git a/lib/percpu-rwsem.c b/lib/percpu-rwsem.c
deleted file mode 100644
index 652a8ee8efe9..000000000000
--- a/lib/percpu-rwsem.c
+++ /dev/null
@@ -1,165 +0,0 @@
1#include <linux/atomic.h>
2#include <linux/rwsem.h>
3#include <linux/percpu.h>
4#include <linux/wait.h>
5#include <linux/lockdep.h>
6#include <linux/percpu-rwsem.h>
7#include <linux/rcupdate.h>
8#include <linux/sched.h>
9#include <linux/errno.h>
10
11int __percpu_init_rwsem(struct percpu_rw_semaphore *brw,
12 const char *name, struct lock_class_key *rwsem_key)
13{
14 brw->fast_read_ctr = alloc_percpu(int);
15 if (unlikely(!brw->fast_read_ctr))
16 return -ENOMEM;
17
18 /* ->rw_sem represents the whole percpu_rw_semaphore for lockdep */
19 __init_rwsem(&brw->rw_sem, name, rwsem_key);
20 atomic_set(&brw->write_ctr, 0);
21 atomic_set(&brw->slow_read_ctr, 0);
22 init_waitqueue_head(&brw->write_waitq);
23 return 0;
24}
25
26void percpu_free_rwsem(struct percpu_rw_semaphore *brw)
27{
28 free_percpu(brw->fast_read_ctr);
29 brw->fast_read_ctr = NULL; /* catch use after free bugs */
30}
31
32/*
33 * This is the fast-path for down_read/up_read, it only needs to ensure
34 * there is no pending writer (atomic_read(write_ctr) == 0) and inc/dec the
35 * fast per-cpu counter. The writer uses synchronize_sched_expedited() to
36 * serialize with the preempt-disabled section below.
37 *
38 * The nontrivial part is that we should guarantee acquire/release semantics
39 * in case when
40 *
41 * R_W: down_write() comes after up_read(), the writer should see all
42 * changes done by the reader
43 * or
44 * W_R: down_read() comes after up_write(), the reader should see all
45 * changes done by the writer
46 *
47 * If this helper fails the callers rely on the normal rw_semaphore and
48 * atomic_dec_and_test(), so in this case we have the necessary barriers.
49 *
50 * But if it succeeds we do not have any barriers, atomic_read(write_ctr) or
51 * __this_cpu_add() below can be reordered with any LOAD/STORE done by the
52 * reader inside the critical section. See the comments in down_write and
53 * up_write below.
54 */
55static bool update_fast_ctr(struct percpu_rw_semaphore *brw, unsigned int val)
56{
57 bool success = false;
58
59 preempt_disable();
60 if (likely(!atomic_read(&brw->write_ctr))) {
61 __this_cpu_add(*brw->fast_read_ctr, val);
62 success = true;
63 }
64 preempt_enable();
65
66 return success;
67}
68
69/*
70 * Like the normal down_read() this is not recursive, the writer can
71 * come after the first percpu_down_read() and create the deadlock.
72 *
73 * Note: returns with lock_is_held(brw->rw_sem) == T for lockdep,
74 * percpu_up_read() does rwsem_release(). This pairs with the usage
75 * of ->rw_sem in percpu_down/up_write().
76 */
77void percpu_down_read(struct percpu_rw_semaphore *brw)
78{
79 might_sleep();
80 if (likely(update_fast_ctr(brw, +1))) {
81 rwsem_acquire_read(&brw->rw_sem.dep_map, 0, 0, _RET_IP_);
82 return;
83 }
84
85 down_read(&brw->rw_sem);
86 atomic_inc(&brw->slow_read_ctr);
87 /* avoid up_read()->rwsem_release() */
88 __up_read(&brw->rw_sem);
89}
90
91void percpu_up_read(struct percpu_rw_semaphore *brw)
92{
93 rwsem_release(&brw->rw_sem.dep_map, 1, _RET_IP_);
94
95 if (likely(update_fast_ctr(brw, -1)))
96 return;
97
98 /* false-positive is possible but harmless */
99 if (atomic_dec_and_test(&brw->slow_read_ctr))
100 wake_up_all(&brw->write_waitq);
101}
102
103static int clear_fast_ctr(struct percpu_rw_semaphore *brw)
104{
105 unsigned int sum = 0;
106 int cpu;
107
108 for_each_possible_cpu(cpu) {
109 sum += per_cpu(*brw->fast_read_ctr, cpu);
110 per_cpu(*brw->fast_read_ctr, cpu) = 0;
111 }
112
113 return sum;
114}
115
116/*
117 * A writer increments ->write_ctr to force the readers to switch to the
118 * slow mode, note the atomic_read() check in update_fast_ctr().
119 *
120 * After that the readers can only inc/dec the slow ->slow_read_ctr counter,
121 * ->fast_read_ctr is stable. Once the writer moves its sum into the slow
122 * counter it represents the number of active readers.
123 *
124 * Finally the writer takes ->rw_sem for writing and blocks the new readers,
125 * then waits until the slow counter becomes zero.
126 */
127void percpu_down_write(struct percpu_rw_semaphore *brw)
128{
129 /* tell update_fast_ctr() there is a pending writer */
130 atomic_inc(&brw->write_ctr);
131 /*
132 * 1. Ensures that write_ctr != 0 is visible to any down_read/up_read
133 * so that update_fast_ctr() can't succeed.
134 *
135 * 2. Ensures we see the result of every previous this_cpu_add() in
136 * update_fast_ctr().
137 *
138 * 3. Ensures that if any reader has exited its critical section via
139 * fast-path, it executes a full memory barrier before we return.
140 * See R_W case in the comment above update_fast_ctr().
141 */
142 synchronize_sched_expedited();
143
144 /* exclude other writers, and block the new readers completely */
145 down_write(&brw->rw_sem);
146
147 /* nobody can use fast_read_ctr, move its sum into slow_read_ctr */
148 atomic_add(clear_fast_ctr(brw), &brw->slow_read_ctr);
149
150 /* wait for all readers to complete their percpu_up_read() */
151 wait_event(brw->write_waitq, !atomic_read(&brw->slow_read_ctr));
152}
153
154void percpu_up_write(struct percpu_rw_semaphore *brw)
155{
156 /* release the lock, but the readers can't use the fast-path */
157 up_write(&brw->rw_sem);
158 /*
159 * Insert the barrier before the next fast-path in down_read,
160 * see W_R case in the comment above update_fast_ctr().
161 */
162 synchronize_sched_expedited();
163 /* the last writer unblocks update_fast_ctr() */
164 atomic_dec(&brw->write_ctr);
165}
diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
index 93c5d5ecff4e..7473ee3b4ee7 100644
--- a/lib/percpu_counter.c
+++ b/lib/percpu_counter.c
@@ -60,14 +60,15 @@ static inline void debug_percpu_counter_deactivate(struct percpu_counter *fbc)
60void percpu_counter_set(struct percpu_counter *fbc, s64 amount) 60void percpu_counter_set(struct percpu_counter *fbc, s64 amount)
61{ 61{
62 int cpu; 62 int cpu;
63 unsigned long flags;
63 64
64 raw_spin_lock(&fbc->lock); 65 raw_spin_lock_irqsave(&fbc->lock, flags);
65 for_each_possible_cpu(cpu) { 66 for_each_possible_cpu(cpu) {
66 s32 *pcount = per_cpu_ptr(fbc->counters, cpu); 67 s32 *pcount = per_cpu_ptr(fbc->counters, cpu);
67 *pcount = 0; 68 *pcount = 0;
68 } 69 }
69 fbc->count = amount; 70 fbc->count = amount;
70 raw_spin_unlock(&fbc->lock); 71 raw_spin_unlock_irqrestore(&fbc->lock, flags);
71} 72}
72EXPORT_SYMBOL(percpu_counter_set); 73EXPORT_SYMBOL(percpu_counter_set);
73 74
@@ -78,9 +79,10 @@ void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch)
78 preempt_disable(); 79 preempt_disable();
79 count = __this_cpu_read(*fbc->counters) + amount; 80 count = __this_cpu_read(*fbc->counters) + amount;
80 if (count >= batch || count <= -batch) { 81 if (count >= batch || count <= -batch) {
81 raw_spin_lock(&fbc->lock); 82 unsigned long flags;
83 raw_spin_lock_irqsave(&fbc->lock, flags);
82 fbc->count += count; 84 fbc->count += count;
83 raw_spin_unlock(&fbc->lock); 85 raw_spin_unlock_irqrestore(&fbc->lock, flags);
84 __this_cpu_write(*fbc->counters, 0); 86 __this_cpu_write(*fbc->counters, 0);
85 } else { 87 } else {
86 __this_cpu_write(*fbc->counters, count); 88 __this_cpu_write(*fbc->counters, count);
@@ -97,14 +99,15 @@ s64 __percpu_counter_sum(struct percpu_counter *fbc)
97{ 99{
98 s64 ret; 100 s64 ret;
99 int cpu; 101 int cpu;
102 unsigned long flags;
100 103
101 raw_spin_lock(&fbc->lock); 104 raw_spin_lock_irqsave(&fbc->lock, flags);
102 ret = fbc->count; 105 ret = fbc->count;
103 for_each_online_cpu(cpu) { 106 for_each_online_cpu(cpu) {
104 s32 *pcount = per_cpu_ptr(fbc->counters, cpu); 107 s32 *pcount = per_cpu_ptr(fbc->counters, cpu);
105 ret += *pcount; 108 ret += *pcount;
106 } 109 }
107 raw_spin_unlock(&fbc->lock); 110 raw_spin_unlock_irqrestore(&fbc->lock, flags);
108 return ret; 111 return ret;
109} 112}
110EXPORT_SYMBOL(__percpu_counter_sum); 113EXPORT_SYMBOL(__percpu_counter_sum);
diff --git a/lib/percpu_ida.c b/lib/percpu_ida.c
index bab1ba2a4c71..9d054bf91d0f 100644
--- a/lib/percpu_ida.c
+++ b/lib/percpu_ida.c
@@ -30,15 +30,6 @@
30#include <linux/spinlock.h> 30#include <linux/spinlock.h>
31#include <linux/percpu_ida.h> 31#include <linux/percpu_ida.h>
32 32
33/*
34 * Number of tags we move between the percpu freelist and the global freelist at
35 * a time
36 */
37#define IDA_PCPU_BATCH_MOVE 32U
38
39/* Max size of percpu freelist, */
40#define IDA_PCPU_SIZE ((IDA_PCPU_BATCH_MOVE * 3) / 2)
41
42struct percpu_ida_cpu { 33struct percpu_ida_cpu {
43 /* 34 /*
44 * Even though this is percpu, we need a lock for tag stealing by remote 35 * Even though this is percpu, we need a lock for tag stealing by remote
@@ -78,7 +69,7 @@ static inline void steal_tags(struct percpu_ida *pool,
78 struct percpu_ida_cpu *remote; 69 struct percpu_ida_cpu *remote;
79 70
80 for (cpus_have_tags = cpumask_weight(&pool->cpus_have_tags); 71 for (cpus_have_tags = cpumask_weight(&pool->cpus_have_tags);
81 cpus_have_tags * IDA_PCPU_SIZE > pool->nr_tags / 2; 72 cpus_have_tags * pool->percpu_max_size > pool->nr_tags / 2;
82 cpus_have_tags--) { 73 cpus_have_tags--) {
83 cpu = cpumask_next(cpu, &pool->cpus_have_tags); 74 cpu = cpumask_next(cpu, &pool->cpus_have_tags);
84 75
@@ -123,11 +114,10 @@ static inline void alloc_global_tags(struct percpu_ida *pool,
123{ 114{
124 move_tags(tags->freelist, &tags->nr_free, 115 move_tags(tags->freelist, &tags->nr_free,
125 pool->freelist, &pool->nr_free, 116 pool->freelist, &pool->nr_free,
126 min(pool->nr_free, IDA_PCPU_BATCH_MOVE)); 117 min(pool->nr_free, pool->percpu_batch_size));
127} 118}
128 119
129static inline unsigned alloc_local_tag(struct percpu_ida *pool, 120static inline unsigned alloc_local_tag(struct percpu_ida_cpu *tags)
130 struct percpu_ida_cpu *tags)
131{ 121{
132 int tag = -ENOSPC; 122 int tag = -ENOSPC;
133 123
@@ -168,7 +158,7 @@ int percpu_ida_alloc(struct percpu_ida *pool, gfp_t gfp)
168 tags = this_cpu_ptr(pool->tag_cpu); 158 tags = this_cpu_ptr(pool->tag_cpu);
169 159
170 /* Fastpath */ 160 /* Fastpath */
171 tag = alloc_local_tag(pool, tags); 161 tag = alloc_local_tag(tags);
172 if (likely(tag >= 0)) { 162 if (likely(tag >= 0)) {
173 local_irq_restore(flags); 163 local_irq_restore(flags);
174 return tag; 164 return tag;
@@ -245,17 +235,17 @@ void percpu_ida_free(struct percpu_ida *pool, unsigned tag)
245 wake_up(&pool->wait); 235 wake_up(&pool->wait);
246 } 236 }
247 237
248 if (nr_free == IDA_PCPU_SIZE) { 238 if (nr_free == pool->percpu_max_size) {
249 spin_lock(&pool->lock); 239 spin_lock(&pool->lock);
250 240
251 /* 241 /*
252 * Global lock held and irqs disabled, don't need percpu 242 * Global lock held and irqs disabled, don't need percpu
253 * lock 243 * lock
254 */ 244 */
255 if (tags->nr_free == IDA_PCPU_SIZE) { 245 if (tags->nr_free == pool->percpu_max_size) {
256 move_tags(pool->freelist, &pool->nr_free, 246 move_tags(pool->freelist, &pool->nr_free,
257 tags->freelist, &tags->nr_free, 247 tags->freelist, &tags->nr_free,
258 IDA_PCPU_BATCH_MOVE); 248 pool->percpu_batch_size);
259 249
260 wake_up(&pool->wait); 250 wake_up(&pool->wait);
261 } 251 }
@@ -292,7 +282,8 @@ EXPORT_SYMBOL_GPL(percpu_ida_destroy);
292 * Allocation is percpu, but sharding is limited by nr_tags - for best 282 * Allocation is percpu, but sharding is limited by nr_tags - for best
293 * performance, the workload should not span more cpus than nr_tags / 128. 283 * performance, the workload should not span more cpus than nr_tags / 128.
294 */ 284 */
295int percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags) 285int __percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags,
286 unsigned long max_size, unsigned long batch_size)
296{ 287{
297 unsigned i, cpu, order; 288 unsigned i, cpu, order;
298 289
@@ -301,6 +292,8 @@ int percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags)
301 init_waitqueue_head(&pool->wait); 292 init_waitqueue_head(&pool->wait);
302 spin_lock_init(&pool->lock); 293 spin_lock_init(&pool->lock);
303 pool->nr_tags = nr_tags; 294 pool->nr_tags = nr_tags;
295 pool->percpu_max_size = max_size;
296 pool->percpu_batch_size = batch_size;
304 297
305 /* Guard against overflow */ 298 /* Guard against overflow */
306 if (nr_tags > (unsigned) INT_MAX + 1) { 299 if (nr_tags > (unsigned) INT_MAX + 1) {
@@ -319,7 +312,7 @@ int percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags)
319 pool->nr_free = nr_tags; 312 pool->nr_free = nr_tags;
320 313
321 pool->tag_cpu = __alloc_percpu(sizeof(struct percpu_ida_cpu) + 314 pool->tag_cpu = __alloc_percpu(sizeof(struct percpu_ida_cpu) +
322 IDA_PCPU_SIZE * sizeof(unsigned), 315 pool->percpu_max_size * sizeof(unsigned),
323 sizeof(unsigned)); 316 sizeof(unsigned));
324 if (!pool->tag_cpu) 317 if (!pool->tag_cpu)
325 goto err; 318 goto err;
@@ -332,4 +325,65 @@ err:
332 percpu_ida_destroy(pool); 325 percpu_ida_destroy(pool);
333 return -ENOMEM; 326 return -ENOMEM;
334} 327}
335EXPORT_SYMBOL_GPL(percpu_ida_init); 328EXPORT_SYMBOL_GPL(__percpu_ida_init);
329
330/**
331 * percpu_ida_for_each_free - iterate free ids of a pool
332 * @pool: pool to iterate
333 * @fn: interate callback function
334 * @data: parameter for @fn
335 *
336 * Note, this doesn't guarantee to iterate all free ids restrictly. Some free
337 * ids might be missed, some might be iterated duplicated, and some might
338 * be iterated and not free soon.
339 */
340int percpu_ida_for_each_free(struct percpu_ida *pool, percpu_ida_cb fn,
341 void *data)
342{
343 unsigned long flags;
344 struct percpu_ida_cpu *remote;
345 unsigned cpu, i, err = 0;
346
347 local_irq_save(flags);
348 for_each_possible_cpu(cpu) {
349 remote = per_cpu_ptr(pool->tag_cpu, cpu);
350 spin_lock(&remote->lock);
351 for (i = 0; i < remote->nr_free; i++) {
352 err = fn(remote->freelist[i], data);
353 if (err)
354 break;
355 }
356 spin_unlock(&remote->lock);
357 if (err)
358 goto out;
359 }
360
361 spin_lock(&pool->lock);
362 for (i = 0; i < pool->nr_free; i++) {
363 err = fn(pool->freelist[i], data);
364 if (err)
365 break;
366 }
367 spin_unlock(&pool->lock);
368out:
369 local_irq_restore(flags);
370 return err;
371}
372EXPORT_SYMBOL_GPL(percpu_ida_for_each_free);
373
374/**
375 * percpu_ida_free_tags - return free tags number of a specific cpu or global pool
376 * @pool: pool related
377 * @cpu: specific cpu or global pool if @cpu == nr_cpu_ids
378 *
379 * Note: this just returns a snapshot of free tags number.
380 */
381unsigned percpu_ida_free_tags(struct percpu_ida *pool, int cpu)
382{
383 struct percpu_ida_cpu *remote;
384 if (cpu == nr_cpu_ids)
385 return pool->nr_free;
386 remote = per_cpu_ptr(pool->tag_cpu, cpu);
387 return remote->nr_free;
388}
389EXPORT_SYMBOL_GPL(percpu_ida_free_tags);
diff --git a/lib/percpu_test.c b/lib/percpu_test.c
new file mode 100644
index 000000000000..0b5d14dadd1a
--- /dev/null
+++ b/lib/percpu_test.c
@@ -0,0 +1,138 @@
1#include <linux/module.h>
2
3/* validate @native and @pcp counter values match @expected */
4#define CHECK(native, pcp, expected) \
5 do { \
6 WARN((native) != (expected), \
7 "raw %ld (0x%lx) != expected %lld (0x%llx)", \
8 (native), (native), \
9 (long long)(expected), (long long)(expected)); \
10 WARN(__this_cpu_read(pcp) != (expected), \
11 "pcp %ld (0x%lx) != expected %lld (0x%llx)", \
12 __this_cpu_read(pcp), __this_cpu_read(pcp), \
13 (long long)(expected), (long long)(expected)); \
14 } while (0)
15
16static DEFINE_PER_CPU(long, long_counter);
17static DEFINE_PER_CPU(unsigned long, ulong_counter);
18
19static int __init percpu_test_init(void)
20{
21 /*
22 * volatile prevents compiler from optimizing it uses, otherwise the
23 * +ul_one/-ul_one below would replace with inc/dec instructions.
24 */
25 volatile unsigned int ui_one = 1;
26 long l = 0;
27 unsigned long ul = 0;
28
29 pr_info("percpu test start\n");
30
31 preempt_disable();
32
33 l += -1;
34 __this_cpu_add(long_counter, -1);
35 CHECK(l, long_counter, -1);
36
37 l += 1;
38 __this_cpu_add(long_counter, 1);
39 CHECK(l, long_counter, 0);
40
41 ul = 0;
42 __this_cpu_write(ulong_counter, 0);
43
44 ul += 1UL;
45 __this_cpu_add(ulong_counter, 1UL);
46 CHECK(ul, ulong_counter, 1);
47
48 ul += -1UL;
49 __this_cpu_add(ulong_counter, -1UL);
50 CHECK(ul, ulong_counter, 0);
51
52 ul += -(unsigned long)1;
53 __this_cpu_add(ulong_counter, -(unsigned long)1);
54 CHECK(ul, ulong_counter, -1);
55
56 ul = 0;
57 __this_cpu_write(ulong_counter, 0);
58
59 ul -= 1;
60 __this_cpu_dec(ulong_counter);
61 CHECK(ul, ulong_counter, -1);
62 CHECK(ul, ulong_counter, ULONG_MAX);
63
64 l += -ui_one;
65 __this_cpu_add(long_counter, -ui_one);
66 CHECK(l, long_counter, 0xffffffff);
67
68 l += ui_one;
69 __this_cpu_add(long_counter, ui_one);
70 CHECK(l, long_counter, (long)0x100000000LL);
71
72
73 l = 0;
74 __this_cpu_write(long_counter, 0);
75
76 l -= ui_one;
77 __this_cpu_sub(long_counter, ui_one);
78 CHECK(l, long_counter, -1);
79
80 l = 0;
81 __this_cpu_write(long_counter, 0);
82
83 l += ui_one;
84 __this_cpu_add(long_counter, ui_one);
85 CHECK(l, long_counter, 1);
86
87 l += -ui_one;
88 __this_cpu_add(long_counter, -ui_one);
89 CHECK(l, long_counter, (long)0x100000000LL);
90
91 l = 0;
92 __this_cpu_write(long_counter, 0);
93
94 l -= ui_one;
95 this_cpu_sub(long_counter, ui_one);
96 CHECK(l, long_counter, -1);
97 CHECK(l, long_counter, ULONG_MAX);
98
99 ul = 0;
100 __this_cpu_write(ulong_counter, 0);
101
102 ul += ui_one;
103 __this_cpu_add(ulong_counter, ui_one);
104 CHECK(ul, ulong_counter, 1);
105
106 ul = 0;
107 __this_cpu_write(ulong_counter, 0);
108
109 ul -= ui_one;
110 __this_cpu_sub(ulong_counter, ui_one);
111 CHECK(ul, ulong_counter, -1);
112 CHECK(ul, ulong_counter, ULONG_MAX);
113
114 ul = 3;
115 __this_cpu_write(ulong_counter, 3);
116
117 ul = this_cpu_sub_return(ulong_counter, ui_one);
118 CHECK(ul, ulong_counter, 2);
119
120 ul = __this_cpu_sub_return(ulong_counter, ui_one);
121 CHECK(ul, ulong_counter, 1);
122
123 preempt_enable();
124
125 pr_info("percpu test done\n");
126 return -EAGAIN; /* Fail will directly unload the module */
127}
128
129static void __exit percpu_test_exit(void)
130{
131}
132
133module_init(percpu_test_init)
134module_exit(percpu_test_exit)
135
136MODULE_LICENSE("GPL");
137MODULE_AUTHOR("Greg Thelen");
138MODULE_DESCRIPTION("percpu operations test");
diff --git a/lib/random32.c b/lib/random32.c
index 52280d5526be..1e5b2df44291 100644
--- a/lib/random32.c
+++ b/lib/random32.c
@@ -2,19 +2,19 @@
2 This is a maximally equidistributed combined Tausworthe generator 2 This is a maximally equidistributed combined Tausworthe generator
3 based on code from GNU Scientific Library 1.5 (30 Jun 2004) 3 based on code from GNU Scientific Library 1.5 (30 Jun 2004)
4 4
5 x_n = (s1_n ^ s2_n ^ s3_n) 5 lfsr113 version:
6 6
7 s1_{n+1} = (((s1_n & 4294967294) <<12) ^ (((s1_n <<13) ^ s1_n) >>19)) 7 x_n = (s1_n ^ s2_n ^ s3_n ^ s4_n)
8 s2_{n+1} = (((s2_n & 4294967288) << 4) ^ (((s2_n << 2) ^ s2_n) >>25))
9 s3_{n+1} = (((s3_n & 4294967280) <<17) ^ (((s3_n << 3) ^ s3_n) >>11))
10 8
11 The period of this generator is about 2^88. 9 s1_{n+1} = (((s1_n & 4294967294) << 18) ^ (((s1_n << 6) ^ s1_n) >> 13))
10 s2_{n+1} = (((s2_n & 4294967288) << 2) ^ (((s2_n << 2) ^ s2_n) >> 27))
11 s3_{n+1} = (((s3_n & 4294967280) << 7) ^ (((s3_n << 13) ^ s3_n) >> 21))
12 s4_{n+1} = (((s4_n & 4294967168) << 13) ^ (((s4_n << 3) ^ s4_n) >> 12))
12 13
13 From: P. L'Ecuyer, "Maximally Equidistributed Combined Tausworthe 14 The period of this generator is about 2^113 (see erratum paper).
14 Generators", Mathematics of Computation, 65, 213 (1996), 203--213.
15
16 This is available on the net from L'Ecuyer's home page,
17 15
16 From: P. L'Ecuyer, "Maximally Equidistributed Combined Tausworthe
17 Generators", Mathematics of Computation, 65, 213 (1996), 203--213:
18 http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme.ps 18 http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme.ps
19 ftp://ftp.iro.umontreal.ca/pub/simulation/lecuyer/papers/tausme.ps 19 ftp://ftp.iro.umontreal.ca/pub/simulation/lecuyer/papers/tausme.ps
20 20
@@ -29,7 +29,7 @@
29 that paper.) 29 that paper.)
30 30
31 This affects the seeding procedure by imposing the requirement 31 This affects the seeding procedure by imposing the requirement
32 s1 > 1, s2 > 7, s3 > 15. 32 s1 > 1, s2 > 7, s3 > 15, s4 > 127.
33 33
34*/ 34*/
35 35
@@ -38,6 +38,11 @@
38#include <linux/export.h> 38#include <linux/export.h>
39#include <linux/jiffies.h> 39#include <linux/jiffies.h>
40#include <linux/random.h> 40#include <linux/random.h>
41#include <linux/sched.h>
42
43#ifdef CONFIG_RANDOM32_SELFTEST
44static void __init prandom_state_selftest(void);
45#endif
41 46
42static DEFINE_PER_CPU(struct rnd_state, net_rand_state); 47static DEFINE_PER_CPU(struct rnd_state, net_rand_state);
43 48
@@ -52,11 +57,12 @@ u32 prandom_u32_state(struct rnd_state *state)
52{ 57{
53#define TAUSWORTHE(s,a,b,c,d) ((s&c)<<d) ^ (((s <<a) ^ s)>>b) 58#define TAUSWORTHE(s,a,b,c,d) ((s&c)<<d) ^ (((s <<a) ^ s)>>b)
54 59
55 state->s1 = TAUSWORTHE(state->s1, 13, 19, 4294967294UL, 12); 60 state->s1 = TAUSWORTHE(state->s1, 6U, 13U, 4294967294U, 18U);
56 state->s2 = TAUSWORTHE(state->s2, 2, 25, 4294967288UL, 4); 61 state->s2 = TAUSWORTHE(state->s2, 2U, 27U, 4294967288U, 2U);
57 state->s3 = TAUSWORTHE(state->s3, 3, 11, 4294967280UL, 17); 62 state->s3 = TAUSWORTHE(state->s3, 13U, 21U, 4294967280U, 7U);
63 state->s4 = TAUSWORTHE(state->s4, 3U, 12U, 4294967168U, 13U);
58 64
59 return (state->s1 ^ state->s2 ^ state->s3); 65 return (state->s1 ^ state->s2 ^ state->s3 ^ state->s4);
60} 66}
61EXPORT_SYMBOL(prandom_u32_state); 67EXPORT_SYMBOL(prandom_u32_state);
62 68
@@ -126,6 +132,38 @@ void prandom_bytes(void *buf, int bytes)
126} 132}
127EXPORT_SYMBOL(prandom_bytes); 133EXPORT_SYMBOL(prandom_bytes);
128 134
135static void prandom_warmup(struct rnd_state *state)
136{
137 /* Calling RNG ten times to satify recurrence condition */
138 prandom_u32_state(state);
139 prandom_u32_state(state);
140 prandom_u32_state(state);
141 prandom_u32_state(state);
142 prandom_u32_state(state);
143 prandom_u32_state(state);
144 prandom_u32_state(state);
145 prandom_u32_state(state);
146 prandom_u32_state(state);
147 prandom_u32_state(state);
148}
149
150static void prandom_seed_very_weak(struct rnd_state *state, u32 seed)
151{
152 /* Note: This sort of seeding is ONLY used in test cases and
153 * during boot at the time from core_initcall until late_initcall
154 * as we don't have a stronger entropy source available yet.
155 * After late_initcall, we reseed entire state, we have to (!),
156 * otherwise an attacker just needs to search 32 bit space to
157 * probe for our internal 128 bit state if he knows a couple
158 * of prandom32 outputs!
159 */
160#define LCG(x) ((x) * 69069U) /* super-duper LCG */
161 state->s1 = __seed(LCG(seed), 2U);
162 state->s2 = __seed(LCG(state->s1), 8U);
163 state->s3 = __seed(LCG(state->s2), 16U);
164 state->s4 = __seed(LCG(state->s3), 128U);
165}
166
129/** 167/**
130 * prandom_seed - add entropy to pseudo random number generator 168 * prandom_seed - add entropy to pseudo random number generator
131 * @seed: seed value 169 * @seed: seed value
@@ -141,7 +179,9 @@ void prandom_seed(u32 entropy)
141 */ 179 */
142 for_each_possible_cpu (i) { 180 for_each_possible_cpu (i) {
143 struct rnd_state *state = &per_cpu(net_rand_state, i); 181 struct rnd_state *state = &per_cpu(net_rand_state, i);
144 state->s1 = __seed(state->s1 ^ entropy, 1); 182
183 state->s1 = __seed(state->s1 ^ entropy, 2U);
184 prandom_warmup(state);
145 } 185 }
146} 186}
147EXPORT_SYMBOL(prandom_seed); 187EXPORT_SYMBOL(prandom_seed);
@@ -154,46 +194,249 @@ static int __init prandom_init(void)
154{ 194{
155 int i; 195 int i;
156 196
197#ifdef CONFIG_RANDOM32_SELFTEST
198 prandom_state_selftest();
199#endif
200
157 for_each_possible_cpu(i) { 201 for_each_possible_cpu(i) {
158 struct rnd_state *state = &per_cpu(net_rand_state,i); 202 struct rnd_state *state = &per_cpu(net_rand_state,i);
159 203
160#define LCG(x) ((x) * 69069) /* super-duper LCG */ 204 prandom_seed_very_weak(state, (i + jiffies) ^ random_get_entropy());
161 state->s1 = __seed(LCG(i + jiffies), 1); 205 prandom_warmup(state);
162 state->s2 = __seed(LCG(state->s1), 7);
163 state->s3 = __seed(LCG(state->s2), 15);
164
165 /* "warm it up" */
166 prandom_u32_state(state);
167 prandom_u32_state(state);
168 prandom_u32_state(state);
169 prandom_u32_state(state);
170 prandom_u32_state(state);
171 prandom_u32_state(state);
172 } 206 }
173 return 0; 207 return 0;
174} 208}
175core_initcall(prandom_init); 209core_initcall(prandom_init);
176 210
211static void __prandom_timer(unsigned long dontcare);
212static DEFINE_TIMER(seed_timer, __prandom_timer, 0, 0);
213
214static void __prandom_timer(unsigned long dontcare)
215{
216 u32 entropy;
217 unsigned long expires;
218
219 get_random_bytes(&entropy, sizeof(entropy));
220 prandom_seed(entropy);
221
222 /* reseed every ~60 seconds, in [40 .. 80) interval with slack */
223 expires = 40 + (prandom_u32() % 40);
224 seed_timer.expires = jiffies + msecs_to_jiffies(expires * MSEC_PER_SEC);
225
226 add_timer(&seed_timer);
227}
228
229static void __init __prandom_start_seed_timer(void)
230{
231 set_timer_slack(&seed_timer, HZ);
232 seed_timer.expires = jiffies + msecs_to_jiffies(40 * MSEC_PER_SEC);
233 add_timer(&seed_timer);
234}
235
177/* 236/*
178 * Generate better values after random number generator 237 * Generate better values after random number generator
179 * is fully initialized. 238 * is fully initialized.
180 */ 239 */
181static int __init prandom_reseed(void) 240static void __prandom_reseed(bool late)
182{ 241{
183 int i; 242 int i;
243 unsigned long flags;
244 static bool latch = false;
245 static DEFINE_SPINLOCK(lock);
246
247 /* only allow initial seeding (late == false) once */
248 spin_lock_irqsave(&lock, flags);
249 if (latch && !late)
250 goto out;
251 latch = true;
184 252
185 for_each_possible_cpu(i) { 253 for_each_possible_cpu(i) {
186 struct rnd_state *state = &per_cpu(net_rand_state,i); 254 struct rnd_state *state = &per_cpu(net_rand_state,i);
187 u32 seeds[3]; 255 u32 seeds[4];
188 256
189 get_random_bytes(&seeds, sizeof(seeds)); 257 get_random_bytes(&seeds, sizeof(seeds));
190 state->s1 = __seed(seeds[0], 1); 258 state->s1 = __seed(seeds[0], 2U);
191 state->s2 = __seed(seeds[1], 7); 259 state->s2 = __seed(seeds[1], 8U);
192 state->s3 = __seed(seeds[2], 15); 260 state->s3 = __seed(seeds[2], 16U);
261 state->s4 = __seed(seeds[3], 128U);
193 262
194 /* mix it in */ 263 prandom_warmup(state);
195 prandom_u32_state(state);
196 } 264 }
265out:
266 spin_unlock_irqrestore(&lock, flags);
267}
268
269void prandom_reseed_late(void)
270{
271 __prandom_reseed(true);
272}
273
274static int __init prandom_reseed(void)
275{
276 __prandom_reseed(false);
277 __prandom_start_seed_timer();
197 return 0; 278 return 0;
198} 279}
199late_initcall(prandom_reseed); 280late_initcall(prandom_reseed);
281
282#ifdef CONFIG_RANDOM32_SELFTEST
283static struct prandom_test1 {
284 u32 seed;
285 u32 result;
286} test1[] = {
287 { 1U, 3484351685U },
288 { 2U, 2623130059U },
289 { 3U, 3125133893U },
290 { 4U, 984847254U },
291};
292
293static struct prandom_test2 {
294 u32 seed;
295 u32 iteration;
296 u32 result;
297} test2[] = {
298 /* Test cases against taus113 from GSL library. */
299 { 931557656U, 959U, 2975593782U },
300 { 1339693295U, 876U, 3887776532U },
301 { 1545556285U, 961U, 1615538833U },
302 { 601730776U, 723U, 1776162651U },
303 { 1027516047U, 687U, 511983079U },
304 { 416526298U, 700U, 916156552U },
305 { 1395522032U, 652U, 2222063676U },
306 { 366221443U, 617U, 2992857763U },
307 { 1539836965U, 714U, 3783265725U },
308 { 556206671U, 994U, 799626459U },
309 { 684907218U, 799U, 367789491U },
310 { 2121230701U, 931U, 2115467001U },
311 { 1668516451U, 644U, 3620590685U },
312 { 768046066U, 883U, 2034077390U },
313 { 1989159136U, 833U, 1195767305U },
314 { 536585145U, 996U, 3577259204U },
315 { 1008129373U, 642U, 1478080776U },
316 { 1740775604U, 939U, 1264980372U },
317 { 1967883163U, 508U, 10734624U },
318 { 1923019697U, 730U, 3821419629U },
319 { 442079932U, 560U, 3440032343U },
320 { 1961302714U, 845U, 841962572U },
321 { 2030205964U, 962U, 1325144227U },
322 { 1160407529U, 507U, 240940858U },
323 { 635482502U, 779U, 4200489746U },
324 { 1252788931U, 699U, 867195434U },
325 { 1961817131U, 719U, 668237657U },
326 { 1071468216U, 983U, 917876630U },
327 { 1281848367U, 932U, 1003100039U },
328 { 582537119U, 780U, 1127273778U },
329 { 1973672777U, 853U, 1071368872U },
330 { 1896756996U, 762U, 1127851055U },
331 { 847917054U, 500U, 1717499075U },
332 { 1240520510U, 951U, 2849576657U },
333 { 1685071682U, 567U, 1961810396U },
334 { 1516232129U, 557U, 3173877U },
335 { 1208118903U, 612U, 1613145022U },
336 { 1817269927U, 693U, 4279122573U },
337 { 1510091701U, 717U, 638191229U },
338 { 365916850U, 807U, 600424314U },
339 { 399324359U, 702U, 1803598116U },
340 { 1318480274U, 779U, 2074237022U },
341 { 697758115U, 840U, 1483639402U },
342 { 1696507773U, 840U, 577415447U },
343 { 2081979121U, 981U, 3041486449U },
344 { 955646687U, 742U, 3846494357U },
345 { 1250683506U, 749U, 836419859U },
346 { 595003102U, 534U, 366794109U },
347 { 47485338U, 558U, 3521120834U },
348 { 619433479U, 610U, 3991783875U },
349 { 704096520U, 518U, 4139493852U },
350 { 1712224984U, 606U, 2393312003U },
351 { 1318233152U, 922U, 3880361134U },
352 { 855572992U, 761U, 1472974787U },
353 { 64721421U, 703U, 683860550U },
354 { 678931758U, 840U, 380616043U },
355 { 692711973U, 778U, 1382361947U },
356 { 677703619U, 530U, 2826914161U },
357 { 92393223U, 586U, 1522128471U },
358 { 1222592920U, 743U, 3466726667U },
359 { 358288986U, 695U, 1091956998U },
360 { 1935056945U, 958U, 514864477U },
361 { 735675993U, 990U, 1294239989U },
362 { 1560089402U, 897U, 2238551287U },
363 { 70616361U, 829U, 22483098U },
364 { 368234700U, 731U, 2913875084U },
365 { 20221190U, 879U, 1564152970U },
366 { 539444654U, 682U, 1835141259U },
367 { 1314987297U, 840U, 1801114136U },
368 { 2019295544U, 645U, 3286438930U },
369 { 469023838U, 716U, 1637918202U },
370 { 1843754496U, 653U, 2562092152U },
371 { 400672036U, 809U, 4264212785U },
372 { 404722249U, 965U, 2704116999U },
373 { 600702209U, 758U, 584979986U },
374 { 519953954U, 667U, 2574436237U },
375 { 1658071126U, 694U, 2214569490U },
376 { 420480037U, 749U, 3430010866U },
377 { 690103647U, 969U, 3700758083U },
378 { 1029424799U, 937U, 3787746841U },
379 { 2012608669U, 506U, 3362628973U },
380 { 1535432887U, 998U, 42610943U },
381 { 1330635533U, 857U, 3040806504U },
382 { 1223800550U, 539U, 3954229517U },
383 { 1322411537U, 680U, 3223250324U },
384 { 1877847898U, 945U, 2915147143U },
385 { 1646356099U, 874U, 965988280U },
386 { 805687536U, 744U, 4032277920U },
387 { 1948093210U, 633U, 1346597684U },
388 { 392609744U, 783U, 1636083295U },
389 { 690241304U, 770U, 1201031298U },
390 { 1360302965U, 696U, 1665394461U },
391 { 1220090946U, 780U, 1316922812U },
392 { 447092251U, 500U, 3438743375U },
393 { 1613868791U, 592U, 828546883U },
394 { 523430951U, 548U, 2552392304U },
395 { 726692899U, 810U, 1656872867U },
396 { 1364340021U, 836U, 3710513486U },
397 { 1986257729U, 931U, 935013962U },
398 { 407983964U, 921U, 728767059U },
399};
400
401static void __init prandom_state_selftest(void)
402{
403 int i, j, errors = 0, runs = 0;
404 bool error = false;
405
406 for (i = 0; i < ARRAY_SIZE(test1); i++) {
407 struct rnd_state state;
408
409 prandom_seed_very_weak(&state, test1[i].seed);
410 prandom_warmup(&state);
411
412 if (test1[i].result != prandom_u32_state(&state))
413 error = true;
414 }
415
416 if (error)
417 pr_warn("prandom: seed boundary self test failed\n");
418 else
419 pr_info("prandom: seed boundary self test passed\n");
420
421 for (i = 0; i < ARRAY_SIZE(test2); i++) {
422 struct rnd_state state;
423
424 prandom_seed_very_weak(&state, test2[i].seed);
425 prandom_warmup(&state);
426
427 for (j = 0; j < test2[i].iteration - 1; j++)
428 prandom_u32_state(&state);
429
430 if (test2[i].result != prandom_u32_state(&state))
431 errors++;
432
433 runs++;
434 cond_resched();
435 }
436
437 if (errors)
438 pr_warn("prandom: %d/%d self tests failed\n", errors, runs);
439 else
440 pr_info("prandom: %d self tests passed\n", runs);
441}
442#endif
diff --git a/lib/rwsem-spinlock.c b/lib/rwsem-spinlock.c
deleted file mode 100644
index 9be8a9144978..000000000000
--- a/lib/rwsem-spinlock.c
+++ /dev/null
@@ -1,296 +0,0 @@
1/* rwsem-spinlock.c: R/W semaphores: contention handling functions for
2 * generic spinlock implementation
3 *
4 * Copyright (c) 2001 David Howells (dhowells@redhat.com).
5 * - Derived partially from idea by Andrea Arcangeli <andrea@suse.de>
6 * - Derived also from comments by Linus
7 */
8#include <linux/rwsem.h>
9#include <linux/sched.h>
10#include <linux/export.h>
11
12enum rwsem_waiter_type {
13 RWSEM_WAITING_FOR_WRITE,
14 RWSEM_WAITING_FOR_READ
15};
16
17struct rwsem_waiter {
18 struct list_head list;
19 struct task_struct *task;
20 enum rwsem_waiter_type type;
21};
22
23int rwsem_is_locked(struct rw_semaphore *sem)
24{
25 int ret = 1;
26 unsigned long flags;
27
28 if (raw_spin_trylock_irqsave(&sem->wait_lock, flags)) {
29 ret = (sem->activity != 0);
30 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
31 }
32 return ret;
33}
34EXPORT_SYMBOL(rwsem_is_locked);
35
36/*
37 * initialise the semaphore
38 */
39void __init_rwsem(struct rw_semaphore *sem, const char *name,
40 struct lock_class_key *key)
41{
42#ifdef CONFIG_DEBUG_LOCK_ALLOC
43 /*
44 * Make sure we are not reinitializing a held semaphore:
45 */
46 debug_check_no_locks_freed((void *)sem, sizeof(*sem));
47 lockdep_init_map(&sem->dep_map, name, key, 0);
48#endif
49 sem->activity = 0;
50 raw_spin_lock_init(&sem->wait_lock);
51 INIT_LIST_HEAD(&sem->wait_list);
52}
53EXPORT_SYMBOL(__init_rwsem);
54
55/*
56 * handle the lock release when processes blocked on it that can now run
57 * - if we come here, then:
58 * - the 'active count' _reached_ zero
59 * - the 'waiting count' is non-zero
60 * - the spinlock must be held by the caller
61 * - woken process blocks are discarded from the list after having task zeroed
62 * - writers are only woken if wakewrite is non-zero
63 */
64static inline struct rw_semaphore *
65__rwsem_do_wake(struct rw_semaphore *sem, int wakewrite)
66{
67 struct rwsem_waiter *waiter;
68 struct task_struct *tsk;
69 int woken;
70
71 waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
72
73 if (waiter->type == RWSEM_WAITING_FOR_WRITE) {
74 if (wakewrite)
75 /* Wake up a writer. Note that we do not grant it the
76 * lock - it will have to acquire it when it runs. */
77 wake_up_process(waiter->task);
78 goto out;
79 }
80
81 /* grant an infinite number of read locks to the front of the queue */
82 woken = 0;
83 do {
84 struct list_head *next = waiter->list.next;
85
86 list_del(&waiter->list);
87 tsk = waiter->task;
88 smp_mb();
89 waiter->task = NULL;
90 wake_up_process(tsk);
91 put_task_struct(tsk);
92 woken++;
93 if (next == &sem->wait_list)
94 break;
95 waiter = list_entry(next, struct rwsem_waiter, list);
96 } while (waiter->type != RWSEM_WAITING_FOR_WRITE);
97
98 sem->activity += woken;
99
100 out:
101 return sem;
102}
103
104/*
105 * wake a single writer
106 */
107static inline struct rw_semaphore *
108__rwsem_wake_one_writer(struct rw_semaphore *sem)
109{
110 struct rwsem_waiter *waiter;
111
112 waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
113 wake_up_process(waiter->task);
114
115 return sem;
116}
117
118/*
119 * get a read lock on the semaphore
120 */
121void __sched __down_read(struct rw_semaphore *sem)
122{
123 struct rwsem_waiter waiter;
124 struct task_struct *tsk;
125 unsigned long flags;
126
127 raw_spin_lock_irqsave(&sem->wait_lock, flags);
128
129 if (sem->activity >= 0 && list_empty(&sem->wait_list)) {
130 /* granted */
131 sem->activity++;
132 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
133 goto out;
134 }
135
136 tsk = current;
137 set_task_state(tsk, TASK_UNINTERRUPTIBLE);
138
139 /* set up my own style of waitqueue */
140 waiter.task = tsk;
141 waiter.type = RWSEM_WAITING_FOR_READ;
142 get_task_struct(tsk);
143
144 list_add_tail(&waiter.list, &sem->wait_list);
145
146 /* we don't need to touch the semaphore struct anymore */
147 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
148
149 /* wait to be given the lock */
150 for (;;) {
151 if (!waiter.task)
152 break;
153 schedule();
154 set_task_state(tsk, TASK_UNINTERRUPTIBLE);
155 }
156
157 tsk->state = TASK_RUNNING;
158 out:
159 ;
160}
161
162/*
163 * trylock for reading -- returns 1 if successful, 0 if contention
164 */
165int __down_read_trylock(struct rw_semaphore *sem)
166{
167 unsigned long flags;
168 int ret = 0;
169
170
171 raw_spin_lock_irqsave(&sem->wait_lock, flags);
172
173 if (sem->activity >= 0 && list_empty(&sem->wait_list)) {
174 /* granted */
175 sem->activity++;
176 ret = 1;
177 }
178
179 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
180
181 return ret;
182}
183
184/*
185 * get a write lock on the semaphore
186 */
187void __sched __down_write_nested(struct rw_semaphore *sem, int subclass)
188{
189 struct rwsem_waiter waiter;
190 struct task_struct *tsk;
191 unsigned long flags;
192
193 raw_spin_lock_irqsave(&sem->wait_lock, flags);
194
195 /* set up my own style of waitqueue */
196 tsk = current;
197 waiter.task = tsk;
198 waiter.type = RWSEM_WAITING_FOR_WRITE;
199 list_add_tail(&waiter.list, &sem->wait_list);
200
201 /* wait for someone to release the lock */
202 for (;;) {
203 /*
204 * That is the key to support write lock stealing: allows the
205 * task already on CPU to get the lock soon rather than put
206 * itself into sleep and waiting for system woke it or someone
207 * else in the head of the wait list up.
208 */
209 if (sem->activity == 0)
210 break;
211 set_task_state(tsk, TASK_UNINTERRUPTIBLE);
212 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
213 schedule();
214 raw_spin_lock_irqsave(&sem->wait_lock, flags);
215 }
216 /* got the lock */
217 sem->activity = -1;
218 list_del(&waiter.list);
219
220 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
221}
222
223void __sched __down_write(struct rw_semaphore *sem)
224{
225 __down_write_nested(sem, 0);
226}
227
228/*
229 * trylock for writing -- returns 1 if successful, 0 if contention
230 */
231int __down_write_trylock(struct rw_semaphore *sem)
232{
233 unsigned long flags;
234 int ret = 0;
235
236 raw_spin_lock_irqsave(&sem->wait_lock, flags);
237
238 if (sem->activity == 0) {
239 /* got the lock */
240 sem->activity = -1;
241 ret = 1;
242 }
243
244 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
245
246 return ret;
247}
248
249/*
250 * release a read lock on the semaphore
251 */
252void __up_read(struct rw_semaphore *sem)
253{
254 unsigned long flags;
255
256 raw_spin_lock_irqsave(&sem->wait_lock, flags);
257
258 if (--sem->activity == 0 && !list_empty(&sem->wait_list))
259 sem = __rwsem_wake_one_writer(sem);
260
261 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
262}
263
264/*
265 * release a write lock on the semaphore
266 */
267void __up_write(struct rw_semaphore *sem)
268{
269 unsigned long flags;
270
271 raw_spin_lock_irqsave(&sem->wait_lock, flags);
272
273 sem->activity = 0;
274 if (!list_empty(&sem->wait_list))
275 sem = __rwsem_do_wake(sem, 1);
276
277 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
278}
279
280/*
281 * downgrade a write lock into a read lock
282 * - just wake up any readers at the front of the queue
283 */
284void __downgrade_write(struct rw_semaphore *sem)
285{
286 unsigned long flags;
287
288 raw_spin_lock_irqsave(&sem->wait_lock, flags);
289
290 sem->activity = 1;
291 if (!list_empty(&sem->wait_list))
292 sem = __rwsem_do_wake(sem, 0);
293
294 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
295}
296
diff --git a/lib/rwsem.c b/lib/rwsem.c
deleted file mode 100644
index 19c5fa95e0b4..000000000000
--- a/lib/rwsem.c
+++ /dev/null
@@ -1,293 +0,0 @@
1/* rwsem.c: R/W semaphores: contention handling functions
2 *
3 * Written by David Howells (dhowells@redhat.com).
4 * Derived from arch/i386/kernel/semaphore.c
5 *
6 * Writer lock-stealing by Alex Shi <alex.shi@intel.com>
7 * and Michel Lespinasse <walken@google.com>
8 */
9#include <linux/rwsem.h>
10#include <linux/sched.h>
11#include <linux/init.h>
12#include <linux/export.h>
13
14/*
15 * Initialize an rwsem:
16 */
17void __init_rwsem(struct rw_semaphore *sem, const char *name,
18 struct lock_class_key *key)
19{
20#ifdef CONFIG_DEBUG_LOCK_ALLOC
21 /*
22 * Make sure we are not reinitializing a held semaphore:
23 */
24 debug_check_no_locks_freed((void *)sem, sizeof(*sem));
25 lockdep_init_map(&sem->dep_map, name, key, 0);
26#endif
27 sem->count = RWSEM_UNLOCKED_VALUE;
28 raw_spin_lock_init(&sem->wait_lock);
29 INIT_LIST_HEAD(&sem->wait_list);
30}
31
32EXPORT_SYMBOL(__init_rwsem);
33
34enum rwsem_waiter_type {
35 RWSEM_WAITING_FOR_WRITE,
36 RWSEM_WAITING_FOR_READ
37};
38
39struct rwsem_waiter {
40 struct list_head list;
41 struct task_struct *task;
42 enum rwsem_waiter_type type;
43};
44
45enum rwsem_wake_type {
46 RWSEM_WAKE_ANY, /* Wake whatever's at head of wait list */
47 RWSEM_WAKE_READERS, /* Wake readers only */
48 RWSEM_WAKE_READ_OWNED /* Waker thread holds the read lock */
49};
50
51/*
52 * handle the lock release when processes blocked on it that can now run
53 * - if we come here from up_xxxx(), then:
54 * - the 'active part' of count (&0x0000ffff) reached 0 (but may have changed)
55 * - the 'waiting part' of count (&0xffff0000) is -ve (and will still be so)
56 * - there must be someone on the queue
57 * - the spinlock must be held by the caller
58 * - woken process blocks are discarded from the list after having task zeroed
59 * - writers are only woken if downgrading is false
60 */
61static struct rw_semaphore *
62__rwsem_do_wake(struct rw_semaphore *sem, enum rwsem_wake_type wake_type)
63{
64 struct rwsem_waiter *waiter;
65 struct task_struct *tsk;
66 struct list_head *next;
67 long oldcount, woken, loop, adjustment;
68
69 waiter = list_entry(sem->wait_list.next, struct rwsem_waiter, list);
70 if (waiter->type == RWSEM_WAITING_FOR_WRITE) {
71 if (wake_type == RWSEM_WAKE_ANY)
72 /* Wake writer at the front of the queue, but do not
73 * grant it the lock yet as we want other writers
74 * to be able to steal it. Readers, on the other hand,
75 * will block as they will notice the queued writer.
76 */
77 wake_up_process(waiter->task);
78 goto out;
79 }
80
81 /* Writers might steal the lock before we grant it to the next reader.
82 * We prefer to do the first reader grant before counting readers
83 * so we can bail out early if a writer stole the lock.
84 */
85 adjustment = 0;
86 if (wake_type != RWSEM_WAKE_READ_OWNED) {
87 adjustment = RWSEM_ACTIVE_READ_BIAS;
88 try_reader_grant:
89 oldcount = rwsem_atomic_update(adjustment, sem) - adjustment;
90 if (unlikely(oldcount < RWSEM_WAITING_BIAS)) {
91 /* A writer stole the lock. Undo our reader grant. */
92 if (rwsem_atomic_update(-adjustment, sem) &
93 RWSEM_ACTIVE_MASK)
94 goto out;
95 /* Last active locker left. Retry waking readers. */
96 goto try_reader_grant;
97 }
98 }
99
100 /* Grant an infinite number of read locks to the readers at the front
101 * of the queue. Note we increment the 'active part' of the count by
102 * the number of readers before waking any processes up.
103 */
104 woken = 0;
105 do {
106 woken++;
107
108 if (waiter->list.next == &sem->wait_list)
109 break;
110
111 waiter = list_entry(waiter->list.next,
112 struct rwsem_waiter, list);
113
114 } while (waiter->type != RWSEM_WAITING_FOR_WRITE);
115
116 adjustment = woken * RWSEM_ACTIVE_READ_BIAS - adjustment;
117 if (waiter->type != RWSEM_WAITING_FOR_WRITE)
118 /* hit end of list above */
119 adjustment -= RWSEM_WAITING_BIAS;
120
121 if (adjustment)
122 rwsem_atomic_add(adjustment, sem);
123
124 next = sem->wait_list.next;
125 loop = woken;
126 do {
127 waiter = list_entry(next, struct rwsem_waiter, list);
128 next = waiter->list.next;
129 tsk = waiter->task;
130 smp_mb();
131 waiter->task = NULL;
132 wake_up_process(tsk);
133 put_task_struct(tsk);
134 } while (--loop);
135
136 sem->wait_list.next = next;
137 next->prev = &sem->wait_list;
138
139 out:
140 return sem;
141}
142
143/*
144 * wait for the read lock to be granted
145 */
146struct rw_semaphore __sched *rwsem_down_read_failed(struct rw_semaphore *sem)
147{
148 long count, adjustment = -RWSEM_ACTIVE_READ_BIAS;
149 struct rwsem_waiter waiter;
150 struct task_struct *tsk = current;
151
152 /* set up my own style of waitqueue */
153 waiter.task = tsk;
154 waiter.type = RWSEM_WAITING_FOR_READ;
155 get_task_struct(tsk);
156
157 raw_spin_lock_irq(&sem->wait_lock);
158 if (list_empty(&sem->wait_list))
159 adjustment += RWSEM_WAITING_BIAS;
160 list_add_tail(&waiter.list, &sem->wait_list);
161
162 /* we're now waiting on the lock, but no longer actively locking */
163 count = rwsem_atomic_update(adjustment, sem);
164
165 /* If there are no active locks, wake the front queued process(es).
166 *
167 * If there are no writers and we are first in the queue,
168 * wake our own waiter to join the existing active readers !
169 */
170 if (count == RWSEM_WAITING_BIAS ||
171 (count > RWSEM_WAITING_BIAS &&
172 adjustment != -RWSEM_ACTIVE_READ_BIAS))
173 sem = __rwsem_do_wake(sem, RWSEM_WAKE_ANY);
174
175 raw_spin_unlock_irq(&sem->wait_lock);
176
177 /* wait to be given the lock */
178 while (true) {
179 set_task_state(tsk, TASK_UNINTERRUPTIBLE);
180 if (!waiter.task)
181 break;
182 schedule();
183 }
184
185 tsk->state = TASK_RUNNING;
186
187 return sem;
188}
189
190/*
191 * wait until we successfully acquire the write lock
192 */
193struct rw_semaphore __sched *rwsem_down_write_failed(struct rw_semaphore *sem)
194{
195 long count, adjustment = -RWSEM_ACTIVE_WRITE_BIAS;
196 struct rwsem_waiter waiter;
197 struct task_struct *tsk = current;
198
199 /* set up my own style of waitqueue */
200 waiter.task = tsk;
201 waiter.type = RWSEM_WAITING_FOR_WRITE;
202
203 raw_spin_lock_irq(&sem->wait_lock);
204 if (list_empty(&sem->wait_list))
205 adjustment += RWSEM_WAITING_BIAS;
206 list_add_tail(&waiter.list, &sem->wait_list);
207
208 /* we're now waiting on the lock, but no longer actively locking */
209 count = rwsem_atomic_update(adjustment, sem);
210
211 /* If there were already threads queued before us and there are no
212 * active writers, the lock must be read owned; so we try to wake
213 * any read locks that were queued ahead of us. */
214 if (count > RWSEM_WAITING_BIAS &&
215 adjustment == -RWSEM_ACTIVE_WRITE_BIAS)
216 sem = __rwsem_do_wake(sem, RWSEM_WAKE_READERS);
217
218 /* wait until we successfully acquire the lock */
219 set_task_state(tsk, TASK_UNINTERRUPTIBLE);
220 while (true) {
221 if (!(count & RWSEM_ACTIVE_MASK)) {
222 /* Try acquiring the write lock. */
223 count = RWSEM_ACTIVE_WRITE_BIAS;
224 if (!list_is_singular(&sem->wait_list))
225 count += RWSEM_WAITING_BIAS;
226
227 if (sem->count == RWSEM_WAITING_BIAS &&
228 cmpxchg(&sem->count, RWSEM_WAITING_BIAS, count) ==
229 RWSEM_WAITING_BIAS)
230 break;
231 }
232
233 raw_spin_unlock_irq(&sem->wait_lock);
234
235 /* Block until there are no active lockers. */
236 do {
237 schedule();
238 set_task_state(tsk, TASK_UNINTERRUPTIBLE);
239 } while ((count = sem->count) & RWSEM_ACTIVE_MASK);
240
241 raw_spin_lock_irq(&sem->wait_lock);
242 }
243
244 list_del(&waiter.list);
245 raw_spin_unlock_irq(&sem->wait_lock);
246 tsk->state = TASK_RUNNING;
247
248 return sem;
249}
250
251/*
252 * handle waking up a waiter on the semaphore
253 * - up_read/up_write has decremented the active part of count if we come here
254 */
255struct rw_semaphore *rwsem_wake(struct rw_semaphore *sem)
256{
257 unsigned long flags;
258
259 raw_spin_lock_irqsave(&sem->wait_lock, flags);
260
261 /* do nothing if list empty */
262 if (!list_empty(&sem->wait_list))
263 sem = __rwsem_do_wake(sem, RWSEM_WAKE_ANY);
264
265 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
266
267 return sem;
268}
269
270/*
271 * downgrade a write lock into a read lock
272 * - caller incremented waiting part of count and discovered it still negative
273 * - just wake up any readers at the front of the queue
274 */
275struct rw_semaphore *rwsem_downgrade_wake(struct rw_semaphore *sem)
276{
277 unsigned long flags;
278
279 raw_spin_lock_irqsave(&sem->wait_lock, flags);
280
281 /* do nothing if list empty */
282 if (!list_empty(&sem->wait_list))
283 sem = __rwsem_do_wake(sem, RWSEM_WAKE_READ_OWNED);
284
285 raw_spin_unlock_irqrestore(&sem->wait_lock, flags);
286
287 return sem;
288}
289
290EXPORT_SYMBOL(rwsem_down_read_failed);
291EXPORT_SYMBOL(rwsem_down_write_failed);
292EXPORT_SYMBOL(rwsem_wake);
293EXPORT_SYMBOL(rwsem_downgrade_wake);
diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index a685c8a79578..d16fa295ae1d 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -577,7 +577,8 @@ void sg_miter_stop(struct sg_mapping_iter *miter)
577 miter->__offset += miter->consumed; 577 miter->__offset += miter->consumed;
578 miter->__remaining -= miter->consumed; 578 miter->__remaining -= miter->consumed;
579 579
580 if (miter->__flags & SG_MITER_TO_SG) 580 if ((miter->__flags & SG_MITER_TO_SG) &&
581 !PageSlab(miter->page))
581 flush_kernel_dcache_page(miter->page); 582 flush_kernel_dcache_page(miter->page);
582 583
583 if (miter->__flags & SG_MITER_ATOMIC) { 584 if (miter->__flags & SG_MITER_ATOMIC) {
diff --git a/lib/show_mem.c b/lib/show_mem.c
index b7c72311ad0c..5847a4921b8e 100644
--- a/lib/show_mem.c
+++ b/lib/show_mem.c
@@ -12,8 +12,7 @@
12void show_mem(unsigned int filter) 12void show_mem(unsigned int filter)
13{ 13{
14 pg_data_t *pgdat; 14 pg_data_t *pgdat;
15 unsigned long total = 0, reserved = 0, shared = 0, 15 unsigned long total = 0, reserved = 0, highmem = 0;
16 nonshared = 0, highmem = 0;
17 16
18 printk("Mem-Info:\n"); 17 printk("Mem-Info:\n");
19 show_free_areas(filter); 18 show_free_areas(filter);
@@ -22,43 +21,27 @@ void show_mem(unsigned int filter)
22 return; 21 return;
23 22
24 for_each_online_pgdat(pgdat) { 23 for_each_online_pgdat(pgdat) {
25 unsigned long i, flags; 24 unsigned long flags;
25 int zoneid;
26 26
27 pgdat_resize_lock(pgdat, &flags); 27 pgdat_resize_lock(pgdat, &flags);
28 for (i = 0; i < pgdat->node_spanned_pages; i++) { 28 for (zoneid = 0; zoneid < MAX_NR_ZONES; zoneid++) {
29 struct page *page; 29 struct zone *zone = &pgdat->node_zones[zoneid];
30 unsigned long pfn = pgdat->node_start_pfn + i; 30 if (!populated_zone(zone))
31
32 if (unlikely(!(i % MAX_ORDER_NR_PAGES)))
33 touch_nmi_watchdog();
34
35 if (!pfn_valid(pfn))
36 continue; 31 continue;
37 32
38 page = pfn_to_page(pfn); 33 total += zone->present_pages;
39 34 reserved = zone->present_pages - zone->managed_pages;
40 if (PageHighMem(page))
41 highmem++;
42 35
43 if (PageReserved(page)) 36 if (is_highmem_idx(zoneid))
44 reserved++; 37 highmem += zone->present_pages;
45 else if (page_count(page) == 1)
46 nonshared++;
47 else if (page_count(page) > 1)
48 shared += page_count(page) - 1;
49
50 total++;
51 } 38 }
52 pgdat_resize_unlock(pgdat, &flags); 39 pgdat_resize_unlock(pgdat, &flags);
53 } 40 }
54 41
55 printk("%lu pages RAM\n", total); 42 printk("%lu pages RAM\n", total);
56#ifdef CONFIG_HIGHMEM 43 printk("%lu pages HighMem/MovableOnly\n", highmem);
57 printk("%lu pages HighMem\n", highmem);
58#endif
59 printk("%lu pages reserved\n", reserved); 44 printk("%lu pages reserved\n", reserved);
60 printk("%lu pages shared\n", shared);
61 printk("%lu pages non-shared\n", nonshared);
62#ifdef CONFIG_QUICKLIST 45#ifdef CONFIG_QUICKLIST
63 printk("%lu pages in pagetable cache\n", 46 printk("%lu pages in pagetable cache\n",
64 quicklist_total_size()); 47 quicklist_total_size());
diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c
index 4c0d0e51d49e..04abe53f12a1 100644
--- a/lib/smp_processor_id.c
+++ b/lib/smp_processor_id.c
@@ -9,10 +9,9 @@
9 9
10notrace unsigned int debug_smp_processor_id(void) 10notrace unsigned int debug_smp_processor_id(void)
11{ 11{
12 unsigned long preempt_count = preempt_count();
13 int this_cpu = raw_smp_processor_id(); 12 int this_cpu = raw_smp_processor_id();
14 13
15 if (likely(preempt_count)) 14 if (likely(preempt_count()))
16 goto out; 15 goto out;
17 16
18 if (irqs_disabled()) 17 if (irqs_disabled())
diff --git a/lib/spinlock_debug.c b/lib/spinlock_debug.c
deleted file mode 100644
index 0374a596cffa..000000000000
--- a/lib/spinlock_debug.c
+++ /dev/null
@@ -1,302 +0,0 @@
1/*
2 * Copyright 2005, Red Hat, Inc., Ingo Molnar
3 * Released under the General Public License (GPL).
4 *
5 * This file contains the spinlock/rwlock implementations for
6 * DEBUG_SPINLOCK.
7 */
8
9#include <linux/spinlock.h>
10#include <linux/nmi.h>
11#include <linux/interrupt.h>
12#include <linux/debug_locks.h>
13#include <linux/delay.h>
14#include <linux/export.h>
15
16void __raw_spin_lock_init(raw_spinlock_t *lock, const char *name,
17 struct lock_class_key *key)
18{
19#ifdef CONFIG_DEBUG_LOCK_ALLOC
20 /*
21 * Make sure we are not reinitializing a held lock:
22 */
23 debug_check_no_locks_freed((void *)lock, sizeof(*lock));
24 lockdep_init_map(&lock->dep_map, name, key, 0);
25#endif
26 lock->raw_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
27 lock->magic = SPINLOCK_MAGIC;
28 lock->owner = SPINLOCK_OWNER_INIT;
29 lock->owner_cpu = -1;
30}
31
32EXPORT_SYMBOL(__raw_spin_lock_init);
33
34void __rwlock_init(rwlock_t *lock, const char *name,
35 struct lock_class_key *key)
36{
37#ifdef CONFIG_DEBUG_LOCK_ALLOC
38 /*
39 * Make sure we are not reinitializing a held lock:
40 */
41 debug_check_no_locks_freed((void *)lock, sizeof(*lock));
42 lockdep_init_map(&lock->dep_map, name, key, 0);
43#endif
44 lock->raw_lock = (arch_rwlock_t) __ARCH_RW_LOCK_UNLOCKED;
45 lock->magic = RWLOCK_MAGIC;
46 lock->owner = SPINLOCK_OWNER_INIT;
47 lock->owner_cpu = -1;
48}
49
50EXPORT_SYMBOL(__rwlock_init);
51
52static void spin_dump(raw_spinlock_t *lock, const char *msg)
53{
54 struct task_struct *owner = NULL;
55
56 if (lock->owner && lock->owner != SPINLOCK_OWNER_INIT)
57 owner = lock->owner;
58 printk(KERN_EMERG "BUG: spinlock %s on CPU#%d, %s/%d\n",
59 msg, raw_smp_processor_id(),
60 current->comm, task_pid_nr(current));
61 printk(KERN_EMERG " lock: %pS, .magic: %08x, .owner: %s/%d, "
62 ".owner_cpu: %d\n",
63 lock, lock->magic,
64 owner ? owner->comm : "<none>",
65 owner ? task_pid_nr(owner) : -1,
66 lock->owner_cpu);
67 dump_stack();
68}
69
70static void spin_bug(raw_spinlock_t *lock, const char *msg)
71{
72 if (!debug_locks_off())
73 return;
74
75 spin_dump(lock, msg);
76}
77
78#define SPIN_BUG_ON(cond, lock, msg) if (unlikely(cond)) spin_bug(lock, msg)
79
80static inline void
81debug_spin_lock_before(raw_spinlock_t *lock)
82{
83 SPIN_BUG_ON(lock->magic != SPINLOCK_MAGIC, lock, "bad magic");
84 SPIN_BUG_ON(lock->owner == current, lock, "recursion");
85 SPIN_BUG_ON(lock->owner_cpu == raw_smp_processor_id(),
86 lock, "cpu recursion");
87}
88
89static inline void debug_spin_lock_after(raw_spinlock_t *lock)
90{
91 lock->owner_cpu = raw_smp_processor_id();
92 lock->owner = current;
93}
94
95static inline void debug_spin_unlock(raw_spinlock_t *lock)
96{
97 SPIN_BUG_ON(lock->magic != SPINLOCK_MAGIC, lock, "bad magic");
98 SPIN_BUG_ON(!raw_spin_is_locked(lock), lock, "already unlocked");
99 SPIN_BUG_ON(lock->owner != current, lock, "wrong owner");
100 SPIN_BUG_ON(lock->owner_cpu != raw_smp_processor_id(),
101 lock, "wrong CPU");
102 lock->owner = SPINLOCK_OWNER_INIT;
103 lock->owner_cpu = -1;
104}
105
106static void __spin_lock_debug(raw_spinlock_t *lock)
107{
108 u64 i;
109 u64 loops = loops_per_jiffy * HZ;
110
111 for (i = 0; i < loops; i++) {
112 if (arch_spin_trylock(&lock->raw_lock))
113 return;
114 __delay(1);
115 }
116 /* lockup suspected: */
117 spin_dump(lock, "lockup suspected");
118#ifdef CONFIG_SMP
119 trigger_all_cpu_backtrace();
120#endif
121
122 /*
123 * The trylock above was causing a livelock. Give the lower level arch
124 * specific lock code a chance to acquire the lock. We have already
125 * printed a warning/backtrace at this point. The non-debug arch
126 * specific code might actually succeed in acquiring the lock. If it is
127 * not successful, the end-result is the same - there is no forward
128 * progress.
129 */
130 arch_spin_lock(&lock->raw_lock);
131}
132
133void do_raw_spin_lock(raw_spinlock_t *lock)
134{
135 debug_spin_lock_before(lock);
136 if (unlikely(!arch_spin_trylock(&lock->raw_lock)))
137 __spin_lock_debug(lock);
138 debug_spin_lock_after(lock);
139}
140
141int do_raw_spin_trylock(raw_spinlock_t *lock)
142{
143 int ret = arch_spin_trylock(&lock->raw_lock);
144
145 if (ret)
146 debug_spin_lock_after(lock);
147#ifndef CONFIG_SMP
148 /*
149 * Must not happen on UP:
150 */
151 SPIN_BUG_ON(!ret, lock, "trylock failure on UP");
152#endif
153 return ret;
154}
155
156void do_raw_spin_unlock(raw_spinlock_t *lock)
157{
158 debug_spin_unlock(lock);
159 arch_spin_unlock(&lock->raw_lock);
160}
161
162static void rwlock_bug(rwlock_t *lock, const char *msg)
163{
164 if (!debug_locks_off())
165 return;
166
167 printk(KERN_EMERG "BUG: rwlock %s on CPU#%d, %s/%d, %p\n",
168 msg, raw_smp_processor_id(), current->comm,
169 task_pid_nr(current), lock);
170 dump_stack();
171}
172
173#define RWLOCK_BUG_ON(cond, lock, msg) if (unlikely(cond)) rwlock_bug(lock, msg)
174
175#if 0 /* __write_lock_debug() can lock up - maybe this can too? */
176static void __read_lock_debug(rwlock_t *lock)
177{
178 u64 i;
179 u64 loops = loops_per_jiffy * HZ;
180 int print_once = 1;
181
182 for (;;) {
183 for (i = 0; i < loops; i++) {
184 if (arch_read_trylock(&lock->raw_lock))
185 return;
186 __delay(1);
187 }
188 /* lockup suspected: */
189 if (print_once) {
190 print_once = 0;
191 printk(KERN_EMERG "BUG: read-lock lockup on CPU#%d, "
192 "%s/%d, %p\n",
193 raw_smp_processor_id(), current->comm,
194 current->pid, lock);
195 dump_stack();
196 }
197 }
198}
199#endif
200
201void do_raw_read_lock(rwlock_t *lock)
202{
203 RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic");
204 arch_read_lock(&lock->raw_lock);
205}
206
207int do_raw_read_trylock(rwlock_t *lock)
208{
209 int ret = arch_read_trylock(&lock->raw_lock);
210
211#ifndef CONFIG_SMP
212 /*
213 * Must not happen on UP:
214 */
215 RWLOCK_BUG_ON(!ret, lock, "trylock failure on UP");
216#endif
217 return ret;
218}
219
220void do_raw_read_unlock(rwlock_t *lock)
221{
222 RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic");
223 arch_read_unlock(&lock->raw_lock);
224}
225
226static inline void debug_write_lock_before(rwlock_t *lock)
227{
228 RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic");
229 RWLOCK_BUG_ON(lock->owner == current, lock, "recursion");
230 RWLOCK_BUG_ON(lock->owner_cpu == raw_smp_processor_id(),
231 lock, "cpu recursion");
232}
233
234static inline void debug_write_lock_after(rwlock_t *lock)
235{
236 lock->owner_cpu = raw_smp_processor_id();
237 lock->owner = current;
238}
239
240static inline void debug_write_unlock(rwlock_t *lock)
241{
242 RWLOCK_BUG_ON(lock->magic != RWLOCK_MAGIC, lock, "bad magic");
243 RWLOCK_BUG_ON(lock->owner != current, lock, "wrong owner");
244 RWLOCK_BUG_ON(lock->owner_cpu != raw_smp_processor_id(),
245 lock, "wrong CPU");
246 lock->owner = SPINLOCK_OWNER_INIT;
247 lock->owner_cpu = -1;
248}
249
250#if 0 /* This can cause lockups */
251static void __write_lock_debug(rwlock_t *lock)
252{
253 u64 i;
254 u64 loops = loops_per_jiffy * HZ;
255 int print_once = 1;
256
257 for (;;) {
258 for (i = 0; i < loops; i++) {
259 if (arch_write_trylock(&lock->raw_lock))
260 return;
261 __delay(1);
262 }
263 /* lockup suspected: */
264 if (print_once) {
265 print_once = 0;
266 printk(KERN_EMERG "BUG: write-lock lockup on CPU#%d, "
267 "%s/%d, %p\n",
268 raw_smp_processor_id(), current->comm,
269 current->pid, lock);
270 dump_stack();
271 }
272 }
273}
274#endif
275
276void do_raw_write_lock(rwlock_t *lock)
277{
278 debug_write_lock_before(lock);
279 arch_write_lock(&lock->raw_lock);
280 debug_write_lock_after(lock);
281}
282
283int do_raw_write_trylock(rwlock_t *lock)
284{
285 int ret = arch_write_trylock(&lock->raw_lock);
286
287 if (ret)
288 debug_write_lock_after(lock);
289#ifndef CONFIG_SMP
290 /*
291 * Must not happen on UP:
292 */
293 RWLOCK_BUG_ON(!ret, lock, "trylock failure on UP");
294#endif
295 return ret;
296}
297
298void do_raw_write_unlock(rwlock_t *lock)
299{
300 debug_write_unlock(lock);
301 arch_write_unlock(&lock->raw_lock);
302}
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index 4e8686c7e5a4..e4399fa65ad6 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -38,6 +38,9 @@
38#include <linux/bootmem.h> 38#include <linux/bootmem.h>
39#include <linux/iommu-helper.h> 39#include <linux/iommu-helper.h>
40 40
41#define CREATE_TRACE_POINTS
42#include <trace/events/swiotlb.h>
43
41#define OFFSET(val,align) ((unsigned long) \ 44#define OFFSET(val,align) ((unsigned long) \
42 ( (val) & ( (align) - 1))) 45 ( (val) & ( (align) - 1)))
43 46
@@ -502,6 +505,7 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev,
502 505
503not_found: 506not_found:
504 spin_unlock_irqrestore(&io_tlb_lock, flags); 507 spin_unlock_irqrestore(&io_tlb_lock, flags);
508 dev_warn(hwdev, "swiotlb buffer is full\n");
505 return SWIOTLB_MAP_ERROR; 509 return SWIOTLB_MAP_ERROR;
506found: 510found:
507 spin_unlock_irqrestore(&io_tlb_lock, flags); 511 spin_unlock_irqrestore(&io_tlb_lock, flags);
@@ -726,6 +730,8 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
726 if (dma_capable(dev, dev_addr, size) && !swiotlb_force) 730 if (dma_capable(dev, dev_addr, size) && !swiotlb_force)
727 return dev_addr; 731 return dev_addr;
728 732
733 trace_swiotlb_bounced(dev, dev_addr, size, swiotlb_force);
734
729 /* Oh well, have to allocate and map a bounce buffer. */ 735 /* Oh well, have to allocate and map a bounce buffer. */
730 map = map_single(dev, phys, size, dir); 736 map = map_single(dev, phys, size, dir);
731 if (map == SWIOTLB_MAP_ERROR) { 737 if (map == SWIOTLB_MAP_ERROR) {
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 26559bdb4c49..10909c571494 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -27,6 +27,7 @@
27#include <linux/uaccess.h> 27#include <linux/uaccess.h>
28#include <linux/ioport.h> 28#include <linux/ioport.h>
29#include <linux/dcache.h> 29#include <linux/dcache.h>
30#include <linux/cred.h>
30#include <net/addrconf.h> 31#include <net/addrconf.h>
31 32
32#include <asm/page.h> /* for PAGE_SIZE */ 33#include <asm/page.h> /* for PAGE_SIZE */
@@ -1218,6 +1219,8 @@ int kptr_restrict __read_mostly;
1218 * The maximum supported length is 64 bytes of the input. Consider 1219 * The maximum supported length is 64 bytes of the input. Consider
1219 * to use print_hex_dump() for the larger input. 1220 * to use print_hex_dump() for the larger input.
1220 * - 'a' For a phys_addr_t type and its derivative types (passed by reference) 1221 * - 'a' For a phys_addr_t type and its derivative types (passed by reference)
1222 * - 'd[234]' For a dentry name (optionally 2-4 last components)
1223 * - 'D[234]' Same as 'd' but for a struct file
1221 * 1224 *
1222 * Note: The difference between 'S' and 'F' is that on ia64 and ppc64 1225 * Note: The difference between 'S' and 'F' is that on ia64 and ppc64
1223 * function pointers are really function descriptors, which contain a 1226 * function pointers are really function descriptors, which contain a
@@ -1312,11 +1315,37 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
1312 spec.field_width = default_width; 1315 spec.field_width = default_width;
1313 return string(buf, end, "pK-error", spec); 1316 return string(buf, end, "pK-error", spec);
1314 } 1317 }
1315 if (!((kptr_restrict == 0) || 1318
1316 (kptr_restrict == 1 && 1319 switch (kptr_restrict) {
1317 has_capability_noaudit(current, CAP_SYSLOG)))) 1320 case 0:
1321 /* Always print %pK values */
1322 break;
1323 case 1: {
1324 /*
1325 * Only print the real pointer value if the current
1326 * process has CAP_SYSLOG and is running with the
1327 * same credentials it started with. This is because
1328 * access to files is checked at open() time, but %pK
1329 * checks permission at read() time. We don't want to
1330 * leak pointer values if a binary opens a file using
1331 * %pK and then elevates privileges before reading it.
1332 */
1333 const struct cred *cred = current_cred();
1334
1335 if (!has_capability_noaudit(current, CAP_SYSLOG) ||
1336 !uid_eq(cred->euid, cred->uid) ||
1337 !gid_eq(cred->egid, cred->gid))
1338 ptr = NULL;
1339 break;
1340 }
1341 case 2:
1342 default:
1343 /* Always print 0's for %pK */
1318 ptr = NULL; 1344 ptr = NULL;
1345 break;
1346 }
1319 break; 1347 break;
1348
1320 case 'N': 1349 case 'N':
1321 switch (fmt[1]) { 1350 switch (fmt[1]) {
1322 case 'F': 1351 case 'F':
@@ -1683,18 +1712,16 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args)
1683 break; 1712 break;
1684 1713
1685 case FORMAT_TYPE_NRCHARS: { 1714 case FORMAT_TYPE_NRCHARS: {
1686 u8 qualifier = spec.qualifier; 1715 /*
1716 * Since %n poses a greater security risk than
1717 * utility, ignore %n and skip its argument.
1718 */
1719 void *skip_arg;
1687 1720
1688 if (qualifier == 'l') { 1721 WARN_ONCE(1, "Please remove ignored %%n in '%s'\n",
1689 long *ip = va_arg(args, long *); 1722 old_fmt);
1690 *ip = (str - buf); 1723
1691 } else if (_tolower(qualifier) == 'z') { 1724 skip_arg = va_arg(args, void *);
1692 size_t *ip = va_arg(args, size_t *);
1693 *ip = (str - buf);
1694 } else {
1695 int *ip = va_arg(args, int *);
1696 *ip = (str - buf);
1697 }
1698 break; 1725 break;
1699 } 1726 }
1700 1727