aboutsummaryrefslogtreecommitdiffstats
path: root/lib/assoc_array.c
diff options
context:
space:
mode:
authorDavid Howells <dhowells@redhat.com>2013-09-24 05:35:17 -0400
committerDavid Howells <dhowells@redhat.com>2013-09-24 05:35:17 -0400
commit3cb989501c2688cacbb7dc4b0d353faf838f53a1 (patch)
tree0639e17c2291cfd84c6db3a7850f55ffc8b284b4 /lib/assoc_array.c
parente57e8669f2ab8350d30f771dd2fdd5377f183db2 (diff)
Add a generic associative array implementation.
Add a generic associative array implementation that can be used as the container for keyrings, thereby massively increasing the capacity available whilst also speeding up searching in keyrings that contain a lot of keys. This may also be useful in FS-Cache for tracking cookies. Documentation is added into Documentation/associative_array.txt Some of the properties of the implementation are: (1) Objects are opaque pointers. The implementation does not care where they point (if anywhere) or what they point to (if anything). [!] NOTE: Pointers to objects _must_ be zero in the two least significant bits. (2) Objects do not need to contain linkage blocks for use by the array. This permits an object to be located in multiple arrays simultaneously. Rather, the array is made up of metadata blocks that point to objects. (3) Objects are labelled as being one of two types (the type is a bool value). This information is stored in the array, but has no consequence to the array itself or its algorithms. (4) Objects require index keys to locate them within the array. (5) Index keys must be unique. Inserting an object with the same key as one already in the array will replace the old object. (6) Index keys can be of any length and can be of different lengths. (7) Index keys should encode the length early on, before any variation due to length is seen. (8) Index keys can include a hash to scatter objects throughout the array. (9) The array can iterated over. The objects will not necessarily come out in key order. (10) The array can be iterated whilst it is being modified, provided the RCU readlock is being held by the iterator. Note, however, under these circumstances, some objects may be seen more than once. If this is a problem, the iterator should lock against modification. Objects will not be missed, however, unless deleted. (11) Objects in the array can be looked up by means of their index key. (12) Objects can be looked up whilst the array is being modified, provided the RCU readlock is being held by the thread doing the look up. The implementation uses a tree of 16-pointer nodes internally that are indexed on each level by nibbles from the index key. To improve memory efficiency, shortcuts can be emplaced to skip over what would otherwise be a series of single-occupancy nodes. Further, nodes pack leaf object pointers into spare space in the node rather than making an extra branch until as such time an object needs to be added to a full node. Signed-off-by: David Howells <dhowells@redhat.com>
Diffstat (limited to 'lib/assoc_array.c')
-rw-r--r--lib/assoc_array.c1745
1 files changed, 1745 insertions, 0 deletions
diff --git a/lib/assoc_array.c b/lib/assoc_array.c
new file mode 100644
index 000000000000..a0952818f938
--- /dev/null
+++ b/lib/assoc_array.c
@@ -0,0 +1,1745 @@
1/* Generic associative array implementation.
2 *
3 * See Documentation/assoc_array.txt for information.
4 *
5 * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved.
6 * Written by David Howells (dhowells@redhat.com)
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public Licence
10 * as published by the Free Software Foundation; either version
11 * 2 of the Licence, or (at your option) any later version.
12 */
13//#define DEBUG
14#include <linux/slab.h>
15#include <linux/assoc_array_priv.h>
16
17/*
18 * Iterate over an associative array. The caller must hold the RCU read lock
19 * or better.
20 */
21static int assoc_array_subtree_iterate(const struct assoc_array_ptr *root,
22 const struct assoc_array_ptr *stop,
23 int (*iterator)(const void *leaf,
24 void *iterator_data),
25 void *iterator_data)
26{
27 const struct assoc_array_shortcut *shortcut;
28 const struct assoc_array_node *node;
29 const struct assoc_array_ptr *cursor, *ptr, *parent;
30 unsigned long has_meta;
31 int slot, ret;
32
33 cursor = root;
34
35begin_node:
36 if (assoc_array_ptr_is_shortcut(cursor)) {
37 /* Descend through a shortcut */
38 shortcut = assoc_array_ptr_to_shortcut(cursor);
39 smp_read_barrier_depends();
40 cursor = ACCESS_ONCE(shortcut->next_node);
41 }
42
43 node = assoc_array_ptr_to_node(cursor);
44 smp_read_barrier_depends();
45 slot = 0;
46
47 /* We perform two passes of each node.
48 *
49 * The first pass does all the leaves in this node. This means we
50 * don't miss any leaves if the node is split up by insertion whilst
51 * we're iterating over the branches rooted here (we may, however, see
52 * some leaves twice).
53 */
54 has_meta = 0;
55 for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
56 ptr = ACCESS_ONCE(node->slots[slot]);
57 has_meta |= (unsigned long)ptr;
58 if (ptr && assoc_array_ptr_is_leaf(ptr)) {
59 /* We need a barrier between the read of the pointer
60 * and dereferencing the pointer - but only if we are
61 * actually going to dereference it.
62 */
63 smp_read_barrier_depends();
64
65 /* Invoke the callback */
66 ret = iterator(assoc_array_ptr_to_leaf(ptr),
67 iterator_data);
68 if (ret)
69 return ret;
70 }
71 }
72
73 /* The second pass attends to all the metadata pointers. If we follow
74 * one of these we may find that we don't come back here, but rather go
75 * back to a replacement node with the leaves in a different layout.
76 *
77 * We are guaranteed to make progress, however, as the slot number for
78 * a particular portion of the key space cannot change - and we
79 * continue at the back pointer + 1.
80 */
81 if (!(has_meta & ASSOC_ARRAY_PTR_META_TYPE))
82 goto finished_node;
83 slot = 0;
84
85continue_node:
86 node = assoc_array_ptr_to_node(cursor);
87 smp_read_barrier_depends();
88
89 for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
90 ptr = ACCESS_ONCE(node->slots[slot]);
91 if (assoc_array_ptr_is_meta(ptr)) {
92 cursor = ptr;
93 goto begin_node;
94 }
95 }
96
97finished_node:
98 /* Move up to the parent (may need to skip back over a shortcut) */
99 parent = ACCESS_ONCE(node->back_pointer);
100 slot = node->parent_slot;
101 if (parent == stop)
102 return 0;
103
104 if (assoc_array_ptr_is_shortcut(parent)) {
105 shortcut = assoc_array_ptr_to_shortcut(parent);
106 smp_read_barrier_depends();
107 cursor = parent;
108 parent = ACCESS_ONCE(shortcut->back_pointer);
109 slot = shortcut->parent_slot;
110 if (parent == stop)
111 return 0;
112 }
113
114 /* Ascend to next slot in parent node */
115 cursor = parent;
116 slot++;
117 goto continue_node;
118}
119
120/**
121 * assoc_array_iterate - Pass all objects in the array to a callback
122 * @array: The array to iterate over.
123 * @iterator: The callback function.
124 * @iterator_data: Private data for the callback function.
125 *
126 * Iterate over all the objects in an associative array. Each one will be
127 * presented to the iterator function.
128 *
129 * If the array is being modified concurrently with the iteration then it is
130 * possible that some objects in the array will be passed to the iterator
131 * callback more than once - though every object should be passed at least
132 * once. If this is undesirable then the caller must lock against modification
133 * for the duration of this function.
134 *
135 * The function will return 0 if no objects were in the array or else it will
136 * return the result of the last iterator function called. Iteration stops
137 * immediately if any call to the iteration function results in a non-zero
138 * return.
139 *
140 * The caller should hold the RCU read lock or better if concurrent
141 * modification is possible.
142 */
143int assoc_array_iterate(const struct assoc_array *array,
144 int (*iterator)(const void *object,
145 void *iterator_data),
146 void *iterator_data)
147{
148 struct assoc_array_ptr *root = ACCESS_ONCE(array->root);
149
150 if (!root)
151 return 0;
152 return assoc_array_subtree_iterate(root, NULL, iterator, iterator_data);
153}
154
155enum assoc_array_walk_status {
156 assoc_array_walk_tree_empty,
157 assoc_array_walk_found_terminal_node,
158 assoc_array_walk_found_wrong_shortcut,
159} status;
160
161struct assoc_array_walk_result {
162 struct {
163 struct assoc_array_node *node; /* Node in which leaf might be found */
164 int level;
165 int slot;
166 } terminal_node;
167 struct {
168 struct assoc_array_shortcut *shortcut;
169 int level;
170 int sc_level;
171 unsigned long sc_segments;
172 unsigned long dissimilarity;
173 } wrong_shortcut;
174};
175
176/*
177 * Navigate through the internal tree looking for the closest node to the key.
178 */
179static enum assoc_array_walk_status
180assoc_array_walk(const struct assoc_array *array,
181 const struct assoc_array_ops *ops,
182 const void *index_key,
183 struct assoc_array_walk_result *result)
184{
185 struct assoc_array_shortcut *shortcut;
186 struct assoc_array_node *node;
187 struct assoc_array_ptr *cursor, *ptr;
188 unsigned long sc_segments, dissimilarity;
189 unsigned long segments;
190 int level, sc_level, next_sc_level;
191 int slot;
192
193 pr_devel("-->%s()\n", __func__);
194
195 cursor = ACCESS_ONCE(array->root);
196 if (!cursor)
197 return assoc_array_walk_tree_empty;
198
199 level = 0;
200
201 /* Use segments from the key for the new leaf to navigate through the
202 * internal tree, skipping through nodes and shortcuts that are on
203 * route to the destination. Eventually we'll come to a slot that is
204 * either empty or contains a leaf at which point we've found a node in
205 * which the leaf we're looking for might be found or into which it
206 * should be inserted.
207 */
208jumped:
209 segments = ops->get_key_chunk(index_key, level);
210 pr_devel("segments[%d]: %lx\n", level, segments);
211
212 if (assoc_array_ptr_is_shortcut(cursor))
213 goto follow_shortcut;
214
215consider_node:
216 node = assoc_array_ptr_to_node(cursor);
217 smp_read_barrier_depends();
218
219 slot = segments >> (level & ASSOC_ARRAY_KEY_CHUNK_MASK);
220 slot &= ASSOC_ARRAY_FAN_MASK;
221 ptr = ACCESS_ONCE(node->slots[slot]);
222
223 pr_devel("consider slot %x [ix=%d type=%lu]\n",
224 slot, level, (unsigned long)ptr & 3);
225
226 if (!assoc_array_ptr_is_meta(ptr)) {
227 /* The node doesn't have a node/shortcut pointer in the slot
228 * corresponding to the index key that we have to follow.
229 */
230 result->terminal_node.node = node;
231 result->terminal_node.level = level;
232 result->terminal_node.slot = slot;
233 pr_devel("<--%s() = terminal_node\n", __func__);
234 return assoc_array_walk_found_terminal_node;
235 }
236
237 if (assoc_array_ptr_is_node(ptr)) {
238 /* There is a pointer to a node in the slot corresponding to
239 * this index key segment, so we need to follow it.
240 */
241 cursor = ptr;
242 level += ASSOC_ARRAY_LEVEL_STEP;
243 if ((level & ASSOC_ARRAY_KEY_CHUNK_MASK) != 0)
244 goto consider_node;
245 goto jumped;
246 }
247
248 /* There is a shortcut in the slot corresponding to the index key
249 * segment. We follow the shortcut if its partial index key matches
250 * this leaf's. Otherwise we need to split the shortcut.
251 */
252 cursor = ptr;
253follow_shortcut:
254 shortcut = assoc_array_ptr_to_shortcut(cursor);
255 smp_read_barrier_depends();
256 pr_devel("shortcut to %d\n", shortcut->skip_to_level);
257 sc_level = level + ASSOC_ARRAY_LEVEL_STEP;
258 BUG_ON(sc_level > shortcut->skip_to_level);
259
260 do {
261 /* Check the leaf against the shortcut's index key a word at a
262 * time, trimming the final word (the shortcut stores the index
263 * key completely from the root to the shortcut's target).
264 */
265 if ((sc_level & ASSOC_ARRAY_KEY_CHUNK_MASK) == 0)
266 segments = ops->get_key_chunk(index_key, sc_level);
267
268 sc_segments = shortcut->index_key[sc_level >> ASSOC_ARRAY_KEY_CHUNK_SHIFT];
269 dissimilarity = segments ^ sc_segments;
270
271 if (round_up(sc_level, ASSOC_ARRAY_KEY_CHUNK_SIZE) > shortcut->skip_to_level) {
272 /* Trim segments that are beyond the shortcut */
273 int shift = shortcut->skip_to_level & ASSOC_ARRAY_KEY_CHUNK_MASK;
274 dissimilarity &= ~(ULONG_MAX << shift);
275 next_sc_level = shortcut->skip_to_level;
276 } else {
277 next_sc_level = sc_level + ASSOC_ARRAY_KEY_CHUNK_SIZE;
278 next_sc_level = round_down(next_sc_level, ASSOC_ARRAY_KEY_CHUNK_SIZE);
279 }
280
281 if (dissimilarity != 0) {
282 /* This shortcut points elsewhere */
283 result->wrong_shortcut.shortcut = shortcut;
284 result->wrong_shortcut.level = level;
285 result->wrong_shortcut.sc_level = sc_level;
286 result->wrong_shortcut.sc_segments = sc_segments;
287 result->wrong_shortcut.dissimilarity = dissimilarity;
288 return assoc_array_walk_found_wrong_shortcut;
289 }
290
291 sc_level = next_sc_level;
292 } while (sc_level < shortcut->skip_to_level);
293
294 /* The shortcut matches the leaf's index to this point. */
295 cursor = ACCESS_ONCE(shortcut->next_node);
296 if (((level ^ sc_level) & ~ASSOC_ARRAY_KEY_CHUNK_MASK) != 0) {
297 level = sc_level;
298 goto jumped;
299 } else {
300 level = sc_level;
301 goto consider_node;
302 }
303}
304
305/**
306 * assoc_array_find - Find an object by index key
307 * @array: The associative array to search.
308 * @ops: The operations to use.
309 * @index_key: The key to the object.
310 *
311 * Find an object in an associative array by walking through the internal tree
312 * to the node that should contain the object and then searching the leaves
313 * there. NULL is returned if the requested object was not found in the array.
314 *
315 * The caller must hold the RCU read lock or better.
316 */
317void *assoc_array_find(const struct assoc_array *array,
318 const struct assoc_array_ops *ops,
319 const void *index_key)
320{
321 struct assoc_array_walk_result result;
322 const struct assoc_array_node *node;
323 const struct assoc_array_ptr *ptr;
324 const void *leaf;
325 int slot;
326
327 if (assoc_array_walk(array, ops, index_key, &result) !=
328 assoc_array_walk_found_terminal_node)
329 return NULL;
330
331 node = result.terminal_node.node;
332 smp_read_barrier_depends();
333
334 /* If the target key is available to us, it's has to be pointed to by
335 * the terminal node.
336 */
337 for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
338 ptr = ACCESS_ONCE(node->slots[slot]);
339 if (ptr && assoc_array_ptr_is_leaf(ptr)) {
340 /* We need a barrier between the read of the pointer
341 * and dereferencing the pointer - but only if we are
342 * actually going to dereference it.
343 */
344 leaf = assoc_array_ptr_to_leaf(ptr);
345 smp_read_barrier_depends();
346 if (ops->compare_object(leaf, index_key))
347 return (void *)leaf;
348 }
349 }
350
351 return NULL;
352}
353
354/*
355 * Destructively iterate over an associative array. The caller must prevent
356 * other simultaneous accesses.
357 */
358static void assoc_array_destroy_subtree(struct assoc_array_ptr *root,
359 const struct assoc_array_ops *ops)
360{
361 struct assoc_array_shortcut *shortcut;
362 struct assoc_array_node *node;
363 struct assoc_array_ptr *cursor, *parent = NULL;
364 int slot = -1;
365
366 pr_devel("-->%s()\n", __func__);
367
368 cursor = root;
369 if (!cursor) {
370 pr_devel("empty\n");
371 return;
372 }
373
374move_to_meta:
375 if (assoc_array_ptr_is_shortcut(cursor)) {
376 /* Descend through a shortcut */
377 pr_devel("[%d] shortcut\n", slot);
378 BUG_ON(!assoc_array_ptr_is_shortcut(cursor));
379 shortcut = assoc_array_ptr_to_shortcut(cursor);
380 BUG_ON(shortcut->back_pointer != parent);
381 BUG_ON(slot != -1 && shortcut->parent_slot != slot);
382 parent = cursor;
383 cursor = shortcut->next_node;
384 slot = -1;
385 BUG_ON(!assoc_array_ptr_is_node(cursor));
386 }
387
388 pr_devel("[%d] node\n", slot);
389 node = assoc_array_ptr_to_node(cursor);
390 BUG_ON(node->back_pointer != parent);
391 BUG_ON(slot != -1 && node->parent_slot != slot);
392 slot = 0;
393
394continue_node:
395 pr_devel("Node %p [back=%p]\n", node, node->back_pointer);
396 for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
397 struct assoc_array_ptr *ptr = node->slots[slot];
398 if (!ptr)
399 continue;
400 if (assoc_array_ptr_is_meta(ptr)) {
401 parent = cursor;
402 cursor = ptr;
403 goto move_to_meta;
404 }
405
406 if (ops) {
407 pr_devel("[%d] free leaf\n", slot);
408 ops->free_object(assoc_array_ptr_to_leaf(ptr));
409 }
410 }
411
412 parent = node->back_pointer;
413 slot = node->parent_slot;
414 pr_devel("free node\n");
415 kfree(node);
416 if (!parent)
417 return; /* Done */
418
419 /* Move back up to the parent (may need to free a shortcut on
420 * the way up) */
421 if (assoc_array_ptr_is_shortcut(parent)) {
422 shortcut = assoc_array_ptr_to_shortcut(parent);
423 BUG_ON(shortcut->next_node != cursor);
424 cursor = parent;
425 parent = shortcut->back_pointer;
426 slot = shortcut->parent_slot;
427 pr_devel("free shortcut\n");
428 kfree(shortcut);
429 if (!parent)
430 return;
431
432 BUG_ON(!assoc_array_ptr_is_node(parent));
433 }
434
435 /* Ascend to next slot in parent node */
436 pr_devel("ascend to %p[%d]\n", parent, slot);
437 cursor = parent;
438 node = assoc_array_ptr_to_node(cursor);
439 slot++;
440 goto continue_node;
441}
442
443/**
444 * assoc_array_destroy - Destroy an associative array
445 * @array: The array to destroy.
446 * @ops: The operations to use.
447 *
448 * Discard all metadata and free all objects in an associative array. The
449 * array will be empty and ready to use again upon completion. This function
450 * cannot fail.
451 *
452 * The caller must prevent all other accesses whilst this takes place as no
453 * attempt is made to adjust pointers gracefully to permit RCU readlock-holding
454 * accesses to continue. On the other hand, no memory allocation is required.
455 */
456void assoc_array_destroy(struct assoc_array *array,
457 const struct assoc_array_ops *ops)
458{
459 assoc_array_destroy_subtree(array->root, ops);
460 array->root = NULL;
461}
462
463/*
464 * Handle insertion into an empty tree.
465 */
466static bool assoc_array_insert_in_empty_tree(struct assoc_array_edit *edit)
467{
468 struct assoc_array_node *new_n0;
469
470 pr_devel("-->%s()\n", __func__);
471
472 new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
473 if (!new_n0)
474 return false;
475
476 edit->new_meta[0] = assoc_array_node_to_ptr(new_n0);
477 edit->leaf_p = &new_n0->slots[0];
478 edit->adjust_count_on = new_n0;
479 edit->set[0].ptr = &edit->array->root;
480 edit->set[0].to = assoc_array_node_to_ptr(new_n0);
481
482 pr_devel("<--%s() = ok [no root]\n", __func__);
483 return true;
484}
485
486/*
487 * Handle insertion into a terminal node.
488 */
489static bool assoc_array_insert_into_terminal_node(struct assoc_array_edit *edit,
490 const struct assoc_array_ops *ops,
491 const void *index_key,
492 struct assoc_array_walk_result *result)
493{
494 struct assoc_array_shortcut *shortcut, *new_s0;
495 struct assoc_array_node *node, *new_n0, *new_n1, *side;
496 struct assoc_array_ptr *ptr;
497 unsigned long dissimilarity, base_seg, blank;
498 size_t keylen;
499 bool have_meta;
500 int level, diff;
501 int slot, next_slot, free_slot, i, j;
502
503 node = result->terminal_node.node;
504 level = result->terminal_node.level;
505 edit->segment_cache[ASSOC_ARRAY_FAN_OUT] = result->terminal_node.slot;
506
507 pr_devel("-->%s()\n", __func__);
508
509 /* We arrived at a node which doesn't have an onward node or shortcut
510 * pointer that we have to follow. This means that (a) the leaf we
511 * want must go here (either by insertion or replacement) or (b) we
512 * need to split this node and insert in one of the fragments.
513 */
514 free_slot = -1;
515
516 /* Firstly, we have to check the leaves in this node to see if there's
517 * a matching one we should replace in place.
518 */
519 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
520 ptr = node->slots[i];
521 if (!ptr) {
522 free_slot = i;
523 continue;
524 }
525 if (ops->compare_object(assoc_array_ptr_to_leaf(ptr), index_key)) {
526 pr_devel("replace in slot %d\n", i);
527 edit->leaf_p = &node->slots[i];
528 edit->dead_leaf = node->slots[i];
529 pr_devel("<--%s() = ok [replace]\n", __func__);
530 return true;
531 }
532 }
533
534 /* If there is a free slot in this node then we can just insert the
535 * leaf here.
536 */
537 if (free_slot >= 0) {
538 pr_devel("insert in free slot %d\n", free_slot);
539 edit->leaf_p = &node->slots[free_slot];
540 edit->adjust_count_on = node;
541 pr_devel("<--%s() = ok [insert]\n", __func__);
542 return true;
543 }
544
545 /* The node has no spare slots - so we're either going to have to split
546 * it or insert another node before it.
547 *
548 * Whatever, we're going to need at least two new nodes - so allocate
549 * those now. We may also need a new shortcut, but we deal with that
550 * when we need it.
551 */
552 new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
553 if (!new_n0)
554 return false;
555 edit->new_meta[0] = assoc_array_node_to_ptr(new_n0);
556 new_n1 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
557 if (!new_n1)
558 return false;
559 edit->new_meta[1] = assoc_array_node_to_ptr(new_n1);
560
561 /* We need to find out how similar the leaves are. */
562 pr_devel("no spare slots\n");
563 have_meta = false;
564 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
565 ptr = node->slots[i];
566 if (assoc_array_ptr_is_meta(ptr)) {
567 edit->segment_cache[i] = 0xff;
568 have_meta = true;
569 continue;
570 }
571 base_seg = ops->get_object_key_chunk(
572 assoc_array_ptr_to_leaf(ptr), level);
573 base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK;
574 edit->segment_cache[i] = base_seg & ASSOC_ARRAY_FAN_MASK;
575 }
576
577 if (have_meta) {
578 pr_devel("have meta\n");
579 goto split_node;
580 }
581
582 /* The node contains only leaves */
583 dissimilarity = 0;
584 base_seg = edit->segment_cache[0];
585 for (i = 1; i < ASSOC_ARRAY_FAN_OUT; i++)
586 dissimilarity |= edit->segment_cache[i] ^ base_seg;
587
588 pr_devel("only leaves; dissimilarity=%lx\n", dissimilarity);
589
590 if ((dissimilarity & ASSOC_ARRAY_FAN_MASK) == 0) {
591 /* The old leaves all cluster in the same slot. We will need
592 * to insert a shortcut if the new node wants to cluster with them.
593 */
594 if ((edit->segment_cache[ASSOC_ARRAY_FAN_OUT] ^ base_seg) == 0)
595 goto all_leaves_cluster_together;
596
597 /* Otherwise we can just insert a new node ahead of the old
598 * one.
599 */
600 goto present_leaves_cluster_but_not_new_leaf;
601 }
602
603split_node:
604 pr_devel("split node\n");
605
606 /* We need to split the current node; we know that the node doesn't
607 * simply contain a full set of leaves that cluster together (it
608 * contains meta pointers and/or non-clustering leaves).
609 *
610 * We need to expel at least two leaves out of a set consisting of the
611 * leaves in the node and the new leaf.
612 *
613 * We need a new node (n0) to replace the current one and a new node to
614 * take the expelled nodes (n1).
615 */
616 edit->set[0].to = assoc_array_node_to_ptr(new_n0);
617 new_n0->back_pointer = node->back_pointer;
618 new_n0->parent_slot = node->parent_slot;
619 new_n1->back_pointer = assoc_array_node_to_ptr(new_n0);
620 new_n1->parent_slot = -1; /* Need to calculate this */
621
622do_split_node:
623 pr_devel("do_split_node\n");
624
625 new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch;
626 new_n1->nr_leaves_on_branch = 0;
627
628 /* Begin by finding two matching leaves. There have to be at least two
629 * that match - even if there are meta pointers - because any leaf that
630 * would match a slot with a meta pointer in it must be somewhere
631 * behind that meta pointer and cannot be here. Further, given N
632 * remaining leaf slots, we now have N+1 leaves to go in them.
633 */
634 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
635 slot = edit->segment_cache[i];
636 if (slot != 0xff)
637 for (j = i + 1; j < ASSOC_ARRAY_FAN_OUT + 1; j++)
638 if (edit->segment_cache[j] == slot)
639 goto found_slot_for_multiple_occupancy;
640 }
641found_slot_for_multiple_occupancy:
642 pr_devel("same slot: %x %x [%02x]\n", i, j, slot);
643 BUG_ON(i >= ASSOC_ARRAY_FAN_OUT);
644 BUG_ON(j >= ASSOC_ARRAY_FAN_OUT + 1);
645 BUG_ON(slot >= ASSOC_ARRAY_FAN_OUT);
646
647 new_n1->parent_slot = slot;
648
649 /* Metadata pointers cannot change slot */
650 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++)
651 if (assoc_array_ptr_is_meta(node->slots[i]))
652 new_n0->slots[i] = node->slots[i];
653 else
654 new_n0->slots[i] = NULL;
655 BUG_ON(new_n0->slots[slot] != NULL);
656 new_n0->slots[slot] = assoc_array_node_to_ptr(new_n1);
657
658 /* Filter the leaf pointers between the new nodes */
659 free_slot = -1;
660 next_slot = 0;
661 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
662 if (assoc_array_ptr_is_meta(node->slots[i]))
663 continue;
664 if (edit->segment_cache[i] == slot) {
665 new_n1->slots[next_slot++] = node->slots[i];
666 new_n1->nr_leaves_on_branch++;
667 } else {
668 do {
669 free_slot++;
670 } while (new_n0->slots[free_slot] != NULL);
671 new_n0->slots[free_slot] = node->slots[i];
672 }
673 }
674
675 pr_devel("filtered: f=%x n=%x\n", free_slot, next_slot);
676
677 if (edit->segment_cache[ASSOC_ARRAY_FAN_OUT] != slot) {
678 do {
679 free_slot++;
680 } while (new_n0->slots[free_slot] != NULL);
681 edit->leaf_p = &new_n0->slots[free_slot];
682 edit->adjust_count_on = new_n0;
683 } else {
684 edit->leaf_p = &new_n1->slots[next_slot++];
685 edit->adjust_count_on = new_n1;
686 }
687
688 BUG_ON(next_slot <= 1);
689
690 edit->set_backpointers_to = assoc_array_node_to_ptr(new_n0);
691 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
692 if (edit->segment_cache[i] == 0xff) {
693 ptr = node->slots[i];
694 BUG_ON(assoc_array_ptr_is_leaf(ptr));
695 if (assoc_array_ptr_is_node(ptr)) {
696 side = assoc_array_ptr_to_node(ptr);
697 edit->set_backpointers[i] = &side->back_pointer;
698 } else {
699 shortcut = assoc_array_ptr_to_shortcut(ptr);
700 edit->set_backpointers[i] = &shortcut->back_pointer;
701 }
702 }
703 }
704
705 ptr = node->back_pointer;
706 if (!ptr)
707 edit->set[0].ptr = &edit->array->root;
708 else if (assoc_array_ptr_is_node(ptr))
709 edit->set[0].ptr = &assoc_array_ptr_to_node(ptr)->slots[node->parent_slot];
710 else
711 edit->set[0].ptr = &assoc_array_ptr_to_shortcut(ptr)->next_node;
712 edit->excised_meta[0] = assoc_array_node_to_ptr(node);
713 pr_devel("<--%s() = ok [split node]\n", __func__);
714 return true;
715
716present_leaves_cluster_but_not_new_leaf:
717 /* All the old leaves cluster in the same slot, but the new leaf wants
718 * to go into a different slot, so we create a new node to hold the new
719 * leaf and a pointer to a new node holding all the old leaves.
720 */
721 pr_devel("present leaves cluster but not new leaf\n");
722
723 new_n0->back_pointer = node->back_pointer;
724 new_n0->parent_slot = node->parent_slot;
725 new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch;
726 new_n1->back_pointer = assoc_array_node_to_ptr(new_n0);
727 new_n1->parent_slot = edit->segment_cache[0];
728 new_n1->nr_leaves_on_branch = node->nr_leaves_on_branch;
729 edit->adjust_count_on = new_n0;
730
731 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++)
732 new_n1->slots[i] = node->slots[i];
733
734 new_n0->slots[edit->segment_cache[0]] = assoc_array_node_to_ptr(new_n0);
735 edit->leaf_p = &new_n0->slots[edit->segment_cache[ASSOC_ARRAY_FAN_OUT]];
736
737 edit->set[0].ptr = &assoc_array_ptr_to_node(node->back_pointer)->slots[node->parent_slot];
738 edit->set[0].to = assoc_array_node_to_ptr(new_n0);
739 edit->excised_meta[0] = assoc_array_node_to_ptr(node);
740 pr_devel("<--%s() = ok [insert node before]\n", __func__);
741 return true;
742
743all_leaves_cluster_together:
744 /* All the leaves, new and old, want to cluster together in this node
745 * in the same slot, so we have to replace this node with a shortcut to
746 * skip over the identical parts of the key and then place a pair of
747 * nodes, one inside the other, at the end of the shortcut and
748 * distribute the keys between them.
749 *
750 * Firstly we need to work out where the leaves start diverging as a
751 * bit position into their keys so that we know how big the shortcut
752 * needs to be.
753 *
754 * We only need to make a single pass of N of the N+1 leaves because if
755 * any keys differ between themselves at bit X then at least one of
756 * them must also differ with the base key at bit X or before.
757 */
758 pr_devel("all leaves cluster together\n");
759 diff = INT_MAX;
760 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
761 int x = ops->diff_objects(assoc_array_ptr_to_leaf(edit->leaf),
762 assoc_array_ptr_to_leaf(node->slots[i]));
763 if (x < diff) {
764 BUG_ON(x < 0);
765 diff = x;
766 }
767 }
768 BUG_ON(diff == INT_MAX);
769 BUG_ON(diff < level + ASSOC_ARRAY_LEVEL_STEP);
770
771 keylen = round_up(diff, ASSOC_ARRAY_KEY_CHUNK_SIZE);
772 keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT;
773
774 new_s0 = kzalloc(sizeof(struct assoc_array_shortcut) +
775 keylen * sizeof(unsigned long), GFP_KERNEL);
776 if (!new_s0)
777 return false;
778 edit->new_meta[2] = assoc_array_shortcut_to_ptr(new_s0);
779
780 edit->set[0].to = assoc_array_shortcut_to_ptr(new_s0);
781 new_s0->back_pointer = node->back_pointer;
782 new_s0->parent_slot = node->parent_slot;
783 new_s0->next_node = assoc_array_node_to_ptr(new_n0);
784 new_n0->back_pointer = assoc_array_shortcut_to_ptr(new_s0);
785 new_n0->parent_slot = 0;
786 new_n1->back_pointer = assoc_array_node_to_ptr(new_n0);
787 new_n1->parent_slot = -1; /* Need to calculate this */
788
789 new_s0->skip_to_level = level = diff & ~ASSOC_ARRAY_LEVEL_STEP_MASK;
790 pr_devel("skip_to_level = %d [diff %d]\n", level, diff);
791 BUG_ON(level <= 0);
792
793 for (i = 0; i < keylen; i++)
794 new_s0->index_key[i] =
795 ops->get_key_chunk(index_key, i * ASSOC_ARRAY_KEY_CHUNK_SIZE);
796
797 blank = ULONG_MAX << (level & ASSOC_ARRAY_KEY_CHUNK_MASK);
798 pr_devel("blank off [%zu] %d: %lx\n", keylen - 1, level, blank);
799 new_s0->index_key[keylen - 1] &= ~blank;
800
801 /* This now reduces to a node splitting exercise for which we'll need
802 * to regenerate the disparity table.
803 */
804 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
805 ptr = node->slots[i];
806 base_seg = ops->get_object_key_chunk(assoc_array_ptr_to_leaf(ptr),
807 level);
808 base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK;
809 edit->segment_cache[i] = base_seg & ASSOC_ARRAY_FAN_MASK;
810 }
811
812 base_seg = ops->get_key_chunk(index_key, level);
813 base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK;
814 edit->segment_cache[ASSOC_ARRAY_FAN_OUT] = base_seg & ASSOC_ARRAY_FAN_MASK;
815 goto do_split_node;
816}
817
818/*
819 * Handle insertion into the middle of a shortcut.
820 */
821static bool assoc_array_insert_mid_shortcut(struct assoc_array_edit *edit,
822 const struct assoc_array_ops *ops,
823 struct assoc_array_walk_result *result)
824{
825 struct assoc_array_shortcut *shortcut, *new_s0, *new_s1;
826 struct assoc_array_node *node, *new_n0, *side;
827 unsigned long sc_segments, dissimilarity, blank;
828 size_t keylen;
829 int level, sc_level, diff;
830 int sc_slot;
831
832 shortcut = result->wrong_shortcut.shortcut;
833 level = result->wrong_shortcut.level;
834 sc_level = result->wrong_shortcut.sc_level;
835 sc_segments = result->wrong_shortcut.sc_segments;
836 dissimilarity = result->wrong_shortcut.dissimilarity;
837
838 pr_devel("-->%s(ix=%d dis=%lx scix=%d)\n",
839 __func__, level, dissimilarity, sc_level);
840
841 /* We need to split a shortcut and insert a node between the two
842 * pieces. Zero-length pieces will be dispensed with entirely.
843 *
844 * First of all, we need to find out in which level the first
845 * difference was.
846 */
847 diff = __ffs(dissimilarity);
848 diff &= ~ASSOC_ARRAY_LEVEL_STEP_MASK;
849 diff += sc_level & ~ASSOC_ARRAY_KEY_CHUNK_MASK;
850 pr_devel("diff=%d\n", diff);
851
852 if (!shortcut->back_pointer) {
853 edit->set[0].ptr = &edit->array->root;
854 } else if (assoc_array_ptr_is_node(shortcut->back_pointer)) {
855 node = assoc_array_ptr_to_node(shortcut->back_pointer);
856 edit->set[0].ptr = &node->slots[shortcut->parent_slot];
857 } else {
858 BUG();
859 }
860
861 edit->excised_meta[0] = assoc_array_shortcut_to_ptr(shortcut);
862
863 /* Create a new node now since we're going to need it anyway */
864 new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
865 if (!new_n0)
866 return false;
867 edit->new_meta[0] = assoc_array_node_to_ptr(new_n0);
868 edit->adjust_count_on = new_n0;
869
870 /* Insert a new shortcut before the new node if this segment isn't of
871 * zero length - otherwise we just connect the new node directly to the
872 * parent.
873 */
874 level += ASSOC_ARRAY_LEVEL_STEP;
875 if (diff > level) {
876 pr_devel("pre-shortcut %d...%d\n", level, diff);
877 keylen = round_up(diff, ASSOC_ARRAY_KEY_CHUNK_SIZE);
878 keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT;
879
880 new_s0 = kzalloc(sizeof(struct assoc_array_shortcut) +
881 keylen * sizeof(unsigned long), GFP_KERNEL);
882 if (!new_s0)
883 return false;
884 edit->new_meta[1] = assoc_array_shortcut_to_ptr(new_s0);
885 edit->set[0].to = assoc_array_shortcut_to_ptr(new_s0);
886 new_s0->back_pointer = shortcut->back_pointer;
887 new_s0->parent_slot = shortcut->parent_slot;
888 new_s0->next_node = assoc_array_node_to_ptr(new_n0);
889 new_s0->skip_to_level = diff;
890
891 new_n0->back_pointer = assoc_array_shortcut_to_ptr(new_s0);
892 new_n0->parent_slot = 0;
893
894 memcpy(new_s0->index_key, shortcut->index_key,
895 keylen * sizeof(unsigned long));
896
897 blank = ULONG_MAX << (diff & ASSOC_ARRAY_KEY_CHUNK_MASK);
898 pr_devel("blank off [%zu] %d: %lx\n", keylen - 1, diff, blank);
899 new_s0->index_key[keylen - 1] &= ~blank;
900 } else {
901 pr_devel("no pre-shortcut\n");
902 edit->set[0].to = assoc_array_node_to_ptr(new_n0);
903 new_n0->back_pointer = shortcut->back_pointer;
904 new_n0->parent_slot = shortcut->parent_slot;
905 }
906
907 side = assoc_array_ptr_to_node(shortcut->next_node);
908 new_n0->nr_leaves_on_branch = side->nr_leaves_on_branch;
909
910 /* We need to know which slot in the new node is going to take a
911 * metadata pointer.
912 */
913 sc_slot = sc_segments >> (diff & ASSOC_ARRAY_KEY_CHUNK_MASK);
914 sc_slot &= ASSOC_ARRAY_FAN_MASK;
915
916 pr_devel("new slot %lx >> %d -> %d\n",
917 sc_segments, diff & ASSOC_ARRAY_KEY_CHUNK_MASK, sc_slot);
918
919 /* Determine whether we need to follow the new node with a replacement
920 * for the current shortcut. We could in theory reuse the current
921 * shortcut if its parent slot number doesn't change - but that's a
922 * 1-in-16 chance so not worth expending the code upon.
923 */
924 level = diff + ASSOC_ARRAY_LEVEL_STEP;
925 if (level < shortcut->skip_to_level) {
926 pr_devel("post-shortcut %d...%d\n", level, shortcut->skip_to_level);
927 keylen = round_up(shortcut->skip_to_level, ASSOC_ARRAY_KEY_CHUNK_SIZE);
928 keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT;
929
930 new_s1 = kzalloc(sizeof(struct assoc_array_shortcut) +
931 keylen * sizeof(unsigned long), GFP_KERNEL);
932 if (!new_s1)
933 return false;
934 edit->new_meta[2] = assoc_array_shortcut_to_ptr(new_s1);
935
936 new_s1->back_pointer = assoc_array_node_to_ptr(new_n0);
937 new_s1->parent_slot = sc_slot;
938 new_s1->next_node = shortcut->next_node;
939 new_s1->skip_to_level = shortcut->skip_to_level;
940
941 new_n0->slots[sc_slot] = assoc_array_shortcut_to_ptr(new_s1);
942
943 memcpy(new_s1->index_key, shortcut->index_key,
944 keylen * sizeof(unsigned long));
945
946 edit->set[1].ptr = &side->back_pointer;
947 edit->set[1].to = assoc_array_shortcut_to_ptr(new_s1);
948 } else {
949 pr_devel("no post-shortcut\n");
950
951 /* We don't have to replace the pointed-to node as long as we
952 * use memory barriers to make sure the parent slot number is
953 * changed before the back pointer (the parent slot number is
954 * irrelevant to the old parent shortcut).
955 */
956 new_n0->slots[sc_slot] = shortcut->next_node;
957 edit->set_parent_slot[0].p = &side->parent_slot;
958 edit->set_parent_slot[0].to = sc_slot;
959 edit->set[1].ptr = &side->back_pointer;
960 edit->set[1].to = assoc_array_node_to_ptr(new_n0);
961 }
962
963 /* Install the new leaf in a spare slot in the new node. */
964 if (sc_slot == 0)
965 edit->leaf_p = &new_n0->slots[1];
966 else
967 edit->leaf_p = &new_n0->slots[0];
968
969 pr_devel("<--%s() = ok [split shortcut]\n", __func__);
970 return edit;
971}
972
973/**
974 * assoc_array_insert - Script insertion of an object into an associative array
975 * @array: The array to insert into.
976 * @ops: The operations to use.
977 * @index_key: The key to insert at.
978 * @object: The object to insert.
979 *
980 * Precalculate and preallocate a script for the insertion or replacement of an
981 * object in an associative array. This results in an edit script that can
982 * either be applied or cancelled.
983 *
984 * The function returns a pointer to an edit script or -ENOMEM.
985 *
986 * The caller should lock against other modifications and must continue to hold
987 * the lock until assoc_array_apply_edit() has been called.
988 *
989 * Accesses to the tree may take place concurrently with this function,
990 * provided they hold the RCU read lock.
991 */
992struct assoc_array_edit *assoc_array_insert(struct assoc_array *array,
993 const struct assoc_array_ops *ops,
994 const void *index_key,
995 void *object)
996{
997 struct assoc_array_walk_result result;
998 struct assoc_array_edit *edit;
999
1000 pr_devel("-->%s()\n", __func__);
1001
1002 /* The leaf pointer we're given must not have the bottom bit set as we
1003 * use those for type-marking the pointer. NULL pointers are also not
1004 * allowed as they indicate an empty slot but we have to allow them
1005 * here as they can be updated later.
1006 */
1007 BUG_ON(assoc_array_ptr_is_meta(object));
1008
1009 edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL);
1010 if (!edit)
1011 return ERR_PTR(-ENOMEM);
1012 edit->array = array;
1013 edit->ops = ops;
1014 edit->leaf = assoc_array_leaf_to_ptr(object);
1015 edit->adjust_count_by = 1;
1016
1017 switch (assoc_array_walk(array, ops, index_key, &result)) {
1018 case assoc_array_walk_tree_empty:
1019 /* Allocate a root node if there isn't one yet */
1020 if (!assoc_array_insert_in_empty_tree(edit))
1021 goto enomem;
1022 return edit;
1023
1024 case assoc_array_walk_found_terminal_node:
1025 /* We found a node that doesn't have a node/shortcut pointer in
1026 * the slot corresponding to the index key that we have to
1027 * follow.
1028 */
1029 if (!assoc_array_insert_into_terminal_node(edit, ops, index_key,
1030 &result))
1031 goto enomem;
1032 return edit;
1033
1034 case assoc_array_walk_found_wrong_shortcut:
1035 /* We found a shortcut that didn't match our key in a slot we
1036 * needed to follow.
1037 */
1038 if (!assoc_array_insert_mid_shortcut(edit, ops, &result))
1039 goto enomem;
1040 return edit;
1041 }
1042
1043enomem:
1044 /* Clean up after an out of memory error */
1045 pr_devel("enomem\n");
1046 assoc_array_cancel_edit(edit);
1047 return ERR_PTR(-ENOMEM);
1048}
1049
1050/**
1051 * assoc_array_insert_set_object - Set the new object pointer in an edit script
1052 * @edit: The edit script to modify.
1053 * @object: The object pointer to set.
1054 *
1055 * Change the object to be inserted in an edit script. The object pointed to
1056 * by the old object is not freed. This must be done prior to applying the
1057 * script.
1058 */
1059void assoc_array_insert_set_object(struct assoc_array_edit *edit, void *object)
1060{
1061 BUG_ON(!object);
1062 edit->leaf = assoc_array_leaf_to_ptr(object);
1063}
1064
1065struct assoc_array_delete_collapse_context {
1066 struct assoc_array_node *node;
1067 const void *skip_leaf;
1068 int slot;
1069};
1070
1071/*
1072 * Subtree collapse to node iterator.
1073 */
1074static int assoc_array_delete_collapse_iterator(const void *leaf,
1075 void *iterator_data)
1076{
1077 struct assoc_array_delete_collapse_context *collapse = iterator_data;
1078
1079 if (leaf == collapse->skip_leaf)
1080 return 0;
1081
1082 BUG_ON(collapse->slot >= ASSOC_ARRAY_FAN_OUT);
1083
1084 collapse->node->slots[collapse->slot++] = assoc_array_leaf_to_ptr(leaf);
1085 return 0;
1086}
1087
1088/**
1089 * assoc_array_delete - Script deletion of an object from an associative array
1090 * @array: The array to search.
1091 * @ops: The operations to use.
1092 * @index_key: The key to the object.
1093 *
1094 * Precalculate and preallocate a script for the deletion of an object from an
1095 * associative array. This results in an edit script that can either be
1096 * applied or cancelled.
1097 *
1098 * The function returns a pointer to an edit script if the object was found,
1099 * NULL if the object was not found or -ENOMEM.
1100 *
1101 * The caller should lock against other modifications and must continue to hold
1102 * the lock until assoc_array_apply_edit() has been called.
1103 *
1104 * Accesses to the tree may take place concurrently with this function,
1105 * provided they hold the RCU read lock.
1106 */
1107struct assoc_array_edit *assoc_array_delete(struct assoc_array *array,
1108 const struct assoc_array_ops *ops,
1109 const void *index_key)
1110{
1111 struct assoc_array_delete_collapse_context collapse;
1112 struct assoc_array_walk_result result;
1113 struct assoc_array_node *node, *new_n0;
1114 struct assoc_array_edit *edit;
1115 struct assoc_array_ptr *ptr;
1116 bool has_meta;
1117 int slot, i;
1118
1119 pr_devel("-->%s()\n", __func__);
1120
1121 edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL);
1122 if (!edit)
1123 return ERR_PTR(-ENOMEM);
1124 edit->array = array;
1125 edit->ops = ops;
1126 edit->adjust_count_by = -1;
1127
1128 switch (assoc_array_walk(array, ops, index_key, &result)) {
1129 case assoc_array_walk_found_terminal_node:
1130 /* We found a node that should contain the leaf we've been
1131 * asked to remove - *if* it's in the tree.
1132 */
1133 pr_devel("terminal_node\n");
1134 node = result.terminal_node.node;
1135
1136 for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
1137 ptr = node->slots[slot];
1138 if (ptr &&
1139 assoc_array_ptr_is_leaf(ptr) &&
1140 ops->compare_object(assoc_array_ptr_to_leaf(ptr),
1141 index_key))
1142 goto found_leaf;
1143 }
1144 case assoc_array_walk_tree_empty:
1145 case assoc_array_walk_found_wrong_shortcut:
1146 default:
1147 assoc_array_cancel_edit(edit);
1148 pr_devel("not found\n");
1149 return NULL;
1150 }
1151
1152found_leaf:
1153 BUG_ON(array->nr_leaves_on_tree <= 0);
1154
1155 /* In the simplest form of deletion we just clear the slot and release
1156 * the leaf after a suitable interval.
1157 */
1158 edit->dead_leaf = node->slots[slot];
1159 edit->set[0].ptr = &node->slots[slot];
1160 edit->set[0].to = NULL;
1161 edit->adjust_count_on = node;
1162
1163 /* If that concludes erasure of the last leaf, then delete the entire
1164 * internal array.
1165 */
1166 if (array->nr_leaves_on_tree == 1) {
1167 edit->set[1].ptr = &array->root;
1168 edit->set[1].to = NULL;
1169 edit->adjust_count_on = NULL;
1170 edit->excised_subtree = array->root;
1171 pr_devel("all gone\n");
1172 return edit;
1173 }
1174
1175 /* However, we'd also like to clear up some metadata blocks if we
1176 * possibly can.
1177 *
1178 * We go for a simple algorithm of: if this node has FAN_OUT or fewer
1179 * leaves in it, then attempt to collapse it - and attempt to
1180 * recursively collapse up the tree.
1181 *
1182 * We could also try and collapse in partially filled subtrees to take
1183 * up space in this node.
1184 */
1185 if (node->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT + 1) {
1186 struct assoc_array_node *parent, *grandparent;
1187 struct assoc_array_ptr *ptr;
1188
1189 /* First of all, we need to know if this node has metadata so
1190 * that we don't try collapsing if all the leaves are already
1191 * here.
1192 */
1193 has_meta = false;
1194 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
1195 ptr = node->slots[i];
1196 if (assoc_array_ptr_is_meta(ptr)) {
1197 has_meta = true;
1198 break;
1199 }
1200 }
1201
1202 pr_devel("leaves: %ld [m=%d]\n",
1203 node->nr_leaves_on_branch - 1, has_meta);
1204
1205 /* Look further up the tree to see if we can collapse this node
1206 * into a more proximal node too.
1207 */
1208 parent = node;
1209 collapse_up:
1210 pr_devel("collapse subtree: %ld\n", parent->nr_leaves_on_branch);
1211
1212 ptr = parent->back_pointer;
1213 if (!ptr)
1214 goto do_collapse;
1215 if (assoc_array_ptr_is_shortcut(ptr)) {
1216 struct assoc_array_shortcut *s = assoc_array_ptr_to_shortcut(ptr);
1217 ptr = s->back_pointer;
1218 if (!ptr)
1219 goto do_collapse;
1220 }
1221
1222 grandparent = assoc_array_ptr_to_node(ptr);
1223 if (grandparent->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT + 1) {
1224 parent = grandparent;
1225 goto collapse_up;
1226 }
1227
1228 do_collapse:
1229 /* There's no point collapsing if the original node has no meta
1230 * pointers to discard and if we didn't merge into one of that
1231 * node's ancestry.
1232 */
1233 if (has_meta || parent != node) {
1234 node = parent;
1235
1236 /* Create a new node to collapse into */
1237 new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
1238 if (!new_n0)
1239 goto enomem;
1240 edit->new_meta[0] = assoc_array_node_to_ptr(new_n0);
1241
1242 new_n0->back_pointer = node->back_pointer;
1243 new_n0->parent_slot = node->parent_slot;
1244 new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch;
1245 edit->adjust_count_on = new_n0;
1246
1247 collapse.node = new_n0;
1248 collapse.skip_leaf = assoc_array_ptr_to_leaf(edit->dead_leaf);
1249 collapse.slot = 0;
1250 assoc_array_subtree_iterate(assoc_array_node_to_ptr(node),
1251 node->back_pointer,
1252 assoc_array_delete_collapse_iterator,
1253 &collapse);
1254 pr_devel("collapsed %d,%lu\n", collapse.slot, new_n0->nr_leaves_on_branch);
1255 BUG_ON(collapse.slot != new_n0->nr_leaves_on_branch - 1);
1256
1257 if (!node->back_pointer) {
1258 edit->set[1].ptr = &array->root;
1259 } else if (assoc_array_ptr_is_leaf(node->back_pointer)) {
1260 BUG();
1261 } else if (assoc_array_ptr_is_node(node->back_pointer)) {
1262 struct assoc_array_node *p =
1263 assoc_array_ptr_to_node(node->back_pointer);
1264 edit->set[1].ptr = &p->slots[node->parent_slot];
1265 } else if (assoc_array_ptr_is_shortcut(node->back_pointer)) {
1266 struct assoc_array_shortcut *s =
1267 assoc_array_ptr_to_shortcut(node->back_pointer);
1268 edit->set[1].ptr = &s->next_node;
1269 }
1270 edit->set[1].to = assoc_array_node_to_ptr(new_n0);
1271 edit->excised_subtree = assoc_array_node_to_ptr(node);
1272 }
1273 }
1274
1275 return edit;
1276
1277enomem:
1278 /* Clean up after an out of memory error */
1279 pr_devel("enomem\n");
1280 assoc_array_cancel_edit(edit);
1281 return ERR_PTR(-ENOMEM);
1282}
1283
1284/**
1285 * assoc_array_clear - Script deletion of all objects from an associative array
1286 * @array: The array to clear.
1287 * @ops: The operations to use.
1288 *
1289 * Precalculate and preallocate a script for the deletion of all the objects
1290 * from an associative array. This results in an edit script that can either
1291 * be applied or cancelled.
1292 *
1293 * The function returns a pointer to an edit script if there are objects to be
1294 * deleted, NULL if there are no objects in the array or -ENOMEM.
1295 *
1296 * The caller should lock against other modifications and must continue to hold
1297 * the lock until assoc_array_apply_edit() has been called.
1298 *
1299 * Accesses to the tree may take place concurrently with this function,
1300 * provided they hold the RCU read lock.
1301 */
1302struct assoc_array_edit *assoc_array_clear(struct assoc_array *array,
1303 const struct assoc_array_ops *ops)
1304{
1305 struct assoc_array_edit *edit;
1306
1307 pr_devel("-->%s()\n", __func__);
1308
1309 if (!array->root)
1310 return NULL;
1311
1312 edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL);
1313 if (!edit)
1314 return ERR_PTR(-ENOMEM);
1315 edit->array = array;
1316 edit->ops = ops;
1317 edit->set[1].ptr = &array->root;
1318 edit->set[1].to = NULL;
1319 edit->excised_subtree = array->root;
1320 edit->ops_for_excised_subtree = ops;
1321 pr_devel("all gone\n");
1322 return edit;
1323}
1324
1325/*
1326 * Handle the deferred destruction after an applied edit.
1327 */
1328static void assoc_array_rcu_cleanup(struct rcu_head *head)
1329{
1330 struct assoc_array_edit *edit =
1331 container_of(head, struct assoc_array_edit, rcu);
1332 int i;
1333
1334 pr_devel("-->%s()\n", __func__);
1335
1336 if (edit->dead_leaf)
1337 edit->ops->free_object(assoc_array_ptr_to_leaf(edit->dead_leaf));
1338 for (i = 0; i < ARRAY_SIZE(edit->excised_meta); i++)
1339 if (edit->excised_meta[i])
1340 kfree(assoc_array_ptr_to_node(edit->excised_meta[i]));
1341
1342 if (edit->excised_subtree) {
1343 BUG_ON(assoc_array_ptr_is_leaf(edit->excised_subtree));
1344 if (assoc_array_ptr_is_node(edit->excised_subtree)) {
1345 struct assoc_array_node *n =
1346 assoc_array_ptr_to_node(edit->excised_subtree);
1347 n->back_pointer = NULL;
1348 } else {
1349 struct assoc_array_shortcut *s =
1350 assoc_array_ptr_to_shortcut(edit->excised_subtree);
1351 s->back_pointer = NULL;
1352 }
1353 assoc_array_destroy_subtree(edit->excised_subtree,
1354 edit->ops_for_excised_subtree);
1355 }
1356
1357 kfree(edit);
1358}
1359
1360/**
1361 * assoc_array_apply_edit - Apply an edit script to an associative array
1362 * @edit: The script to apply.
1363 *
1364 * Apply an edit script to an associative array to effect an insertion,
1365 * deletion or clearance. As the edit script includes preallocated memory,
1366 * this is guaranteed not to fail.
1367 *
1368 * The edit script, dead objects and dead metadata will be scheduled for
1369 * destruction after an RCU grace period to permit those doing read-only
1370 * accesses on the array to continue to do so under the RCU read lock whilst
1371 * the edit is taking place.
1372 */
1373void assoc_array_apply_edit(struct assoc_array_edit *edit)
1374{
1375 struct assoc_array_shortcut *shortcut;
1376 struct assoc_array_node *node;
1377 struct assoc_array_ptr *ptr;
1378 int i;
1379
1380 pr_devel("-->%s()\n", __func__);
1381
1382 smp_wmb();
1383 if (edit->leaf_p)
1384 *edit->leaf_p = edit->leaf;
1385
1386 smp_wmb();
1387 for (i = 0; i < ARRAY_SIZE(edit->set_parent_slot); i++)
1388 if (edit->set_parent_slot[i].p)
1389 *edit->set_parent_slot[i].p = edit->set_parent_slot[i].to;
1390
1391 smp_wmb();
1392 for (i = 0; i < ARRAY_SIZE(edit->set_backpointers); i++)
1393 if (edit->set_backpointers[i])
1394 *edit->set_backpointers[i] = edit->set_backpointers_to;
1395
1396 smp_wmb();
1397 for (i = 0; i < ARRAY_SIZE(edit->set); i++)
1398 if (edit->set[i].ptr)
1399 *edit->set[i].ptr = edit->set[i].to;
1400
1401 if (edit->array->root == NULL) {
1402 edit->array->nr_leaves_on_tree = 0;
1403 } else if (edit->adjust_count_on) {
1404 node = edit->adjust_count_on;
1405 for (;;) {
1406 node->nr_leaves_on_branch += edit->adjust_count_by;
1407
1408 ptr = node->back_pointer;
1409 if (!ptr)
1410 break;
1411 if (assoc_array_ptr_is_shortcut(ptr)) {
1412 shortcut = assoc_array_ptr_to_shortcut(ptr);
1413 ptr = shortcut->back_pointer;
1414 if (!ptr)
1415 break;
1416 }
1417 BUG_ON(!assoc_array_ptr_is_node(ptr));
1418 node = assoc_array_ptr_to_node(ptr);
1419 }
1420
1421 edit->array->nr_leaves_on_tree += edit->adjust_count_by;
1422 }
1423
1424 call_rcu(&edit->rcu, assoc_array_rcu_cleanup);
1425}
1426
1427/**
1428 * assoc_array_cancel_edit - Discard an edit script.
1429 * @edit: The script to discard.
1430 *
1431 * Free an edit script and all the preallocated data it holds without making
1432 * any changes to the associative array it was intended for.
1433 *
1434 * NOTE! In the case of an insertion script, this does _not_ release the leaf
1435 * that was to be inserted. That is left to the caller.
1436 */
1437void assoc_array_cancel_edit(struct assoc_array_edit *edit)
1438{
1439 struct assoc_array_ptr *ptr;
1440 int i;
1441
1442 pr_devel("-->%s()\n", __func__);
1443
1444 /* Clean up after an out of memory error */
1445 for (i = 0; i < ARRAY_SIZE(edit->new_meta); i++) {
1446 ptr = edit->new_meta[i];
1447 if (ptr) {
1448 if (assoc_array_ptr_is_node(ptr))
1449 kfree(assoc_array_ptr_to_node(ptr));
1450 else
1451 kfree(assoc_array_ptr_to_shortcut(ptr));
1452 }
1453 }
1454 kfree(edit);
1455}
1456
1457/**
1458 * assoc_array_gc - Garbage collect an associative array.
1459 * @array: The array to clean.
1460 * @ops: The operations to use.
1461 * @iterator: A callback function to pass judgement on each object.
1462 * @iterator_data: Private data for the callback function.
1463 *
1464 * Collect garbage from an associative array and pack down the internal tree to
1465 * save memory.
1466 *
1467 * The iterator function is asked to pass judgement upon each object in the
1468 * array. If it returns false, the object is discard and if it returns true,
1469 * the object is kept. If it returns true, it must increment the object's
1470 * usage count (or whatever it needs to do to retain it) before returning.
1471 *
1472 * This function returns 0 if successful or -ENOMEM if out of memory. In the
1473 * latter case, the array is not changed.
1474 *
1475 * The caller should lock against other modifications and must continue to hold
1476 * the lock until assoc_array_apply_edit() has been called.
1477 *
1478 * Accesses to the tree may take place concurrently with this function,
1479 * provided they hold the RCU read lock.
1480 */
1481int assoc_array_gc(struct assoc_array *array,
1482 const struct assoc_array_ops *ops,
1483 bool (*iterator)(void *object, void *iterator_data),
1484 void *iterator_data)
1485{
1486 struct assoc_array_shortcut *shortcut, *new_s;
1487 struct assoc_array_node *node, *new_n;
1488 struct assoc_array_edit *edit;
1489 struct assoc_array_ptr *cursor, *ptr;
1490 struct assoc_array_ptr *new_root, *new_parent, **new_ptr_pp;
1491 unsigned long nr_leaves_on_tree;
1492 int keylen, slot, nr_free, next_slot, i;
1493
1494 pr_devel("-->%s()\n", __func__);
1495
1496 if (!array->root)
1497 return 0;
1498
1499 edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL);
1500 if (!edit)
1501 return -ENOMEM;
1502 edit->array = array;
1503 edit->ops = ops;
1504 edit->ops_for_excised_subtree = ops;
1505 edit->set[0].ptr = &array->root;
1506 edit->excised_subtree = array->root;
1507
1508 new_root = new_parent = NULL;
1509 new_ptr_pp = &new_root;
1510 cursor = array->root;
1511
1512descend:
1513 /* If this point is a shortcut, then we need to duplicate it and
1514 * advance the target cursor.
1515 */
1516 if (assoc_array_ptr_is_shortcut(cursor)) {
1517 shortcut = assoc_array_ptr_to_shortcut(cursor);
1518 keylen = round_up(shortcut->skip_to_level, ASSOC_ARRAY_KEY_CHUNK_SIZE);
1519 keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT;
1520 new_s = kmalloc(sizeof(struct assoc_array_shortcut) +
1521 keylen * sizeof(unsigned long), GFP_KERNEL);
1522 if (!new_s)
1523 goto enomem;
1524 pr_devel("dup shortcut %p -> %p\n", shortcut, new_s);
1525 memcpy(new_s, shortcut, (sizeof(struct assoc_array_shortcut) +
1526 keylen * sizeof(unsigned long)));
1527 new_s->back_pointer = new_parent;
1528 new_s->parent_slot = shortcut->parent_slot;
1529 *new_ptr_pp = new_parent = assoc_array_shortcut_to_ptr(new_s);
1530 new_ptr_pp = &new_s->next_node;
1531 cursor = shortcut->next_node;
1532 }
1533
1534 /* Duplicate the node at this position */
1535 node = assoc_array_ptr_to_node(cursor);
1536 new_n = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
1537 if (!new_n)
1538 goto enomem;
1539 pr_devel("dup node %p -> %p\n", node, new_n);
1540 new_n->back_pointer = new_parent;
1541 new_n->parent_slot = node->parent_slot;
1542 *new_ptr_pp = new_parent = assoc_array_node_to_ptr(new_n);
1543 new_ptr_pp = NULL;
1544 slot = 0;
1545
1546continue_node:
1547 /* Filter across any leaves and gc any subtrees */
1548 for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
1549 ptr = node->slots[slot];
1550 if (!ptr)
1551 continue;
1552
1553 if (assoc_array_ptr_is_leaf(ptr)) {
1554 if (iterator(assoc_array_ptr_to_leaf(ptr),
1555 iterator_data))
1556 /* The iterator will have done any reference
1557 * counting on the object for us.
1558 */
1559 new_n->slots[slot] = ptr;
1560 continue;
1561 }
1562
1563 new_ptr_pp = &new_n->slots[slot];
1564 cursor = ptr;
1565 goto descend;
1566 }
1567
1568 pr_devel("-- compress node %p --\n", new_n);
1569
1570 /* Count up the number of empty slots in this node and work out the
1571 * subtree leaf count.
1572 */
1573 new_n->nr_leaves_on_branch = 0;
1574 nr_free = 0;
1575 for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
1576 ptr = new_n->slots[slot];
1577 if (!ptr)
1578 nr_free++;
1579 else if (assoc_array_ptr_is_leaf(ptr))
1580 new_n->nr_leaves_on_branch++;
1581 }
1582 pr_devel("free=%d, leaves=%lu\n", nr_free, new_n->nr_leaves_on_branch);
1583
1584 /* See what we can fold in */
1585 next_slot = 0;
1586 for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
1587 struct assoc_array_shortcut *s;
1588 struct assoc_array_node *child;
1589
1590 ptr = new_n->slots[slot];
1591 if (!ptr || assoc_array_ptr_is_leaf(ptr))
1592 continue;
1593
1594 s = NULL;
1595 if (assoc_array_ptr_is_shortcut(ptr)) {
1596 s = assoc_array_ptr_to_shortcut(ptr);
1597 ptr = s->next_node;
1598 }
1599
1600 child = assoc_array_ptr_to_node(ptr);
1601 new_n->nr_leaves_on_branch += child->nr_leaves_on_branch;
1602
1603 if (child->nr_leaves_on_branch <= nr_free + 1) {
1604 /* Fold the child node into this one */
1605 pr_devel("[%d] fold node %lu/%d [nx %d]\n",
1606 slot, child->nr_leaves_on_branch, nr_free + 1,
1607 next_slot);
1608
1609 /* We would already have reaped an intervening shortcut
1610 * on the way back up the tree.
1611 */
1612 BUG_ON(s);
1613
1614 new_n->slots[slot] = NULL;
1615 nr_free++;
1616 if (slot < next_slot)
1617 next_slot = slot;
1618 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
1619 struct assoc_array_ptr *p = child->slots[i];
1620 if (!p)
1621 continue;
1622 BUG_ON(assoc_array_ptr_is_meta(p));
1623 while (new_n->slots[next_slot])
1624 next_slot++;
1625 BUG_ON(next_slot >= ASSOC_ARRAY_FAN_OUT);
1626 new_n->slots[next_slot++] = p;
1627 nr_free--;
1628 }
1629 kfree(child);
1630 } else {
1631 pr_devel("[%d] retain node %lu/%d [nx %d]\n",
1632 slot, child->nr_leaves_on_branch, nr_free + 1,
1633 next_slot);
1634 }
1635 }
1636
1637 pr_devel("after: %lu\n", new_n->nr_leaves_on_branch);
1638
1639 nr_leaves_on_tree = new_n->nr_leaves_on_branch;
1640
1641 /* Excise this node if it is singly occupied by a shortcut */
1642 if (nr_free == ASSOC_ARRAY_FAN_OUT - 1) {
1643 for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++)
1644 if ((ptr = new_n->slots[slot]))
1645 break;
1646
1647 if (assoc_array_ptr_is_meta(ptr) &&
1648 assoc_array_ptr_is_shortcut(ptr)) {
1649 pr_devel("excise node %p with 1 shortcut\n", new_n);
1650 new_s = assoc_array_ptr_to_shortcut(ptr);
1651 new_parent = new_n->back_pointer;
1652 slot = new_n->parent_slot;
1653 kfree(new_n);
1654 if (!new_parent) {
1655 new_s->back_pointer = NULL;
1656 new_s->parent_slot = 0;
1657 new_root = ptr;
1658 goto gc_complete;
1659 }
1660
1661 if (assoc_array_ptr_is_shortcut(new_parent)) {
1662 /* We can discard any preceding shortcut also */
1663 struct assoc_array_shortcut *s =
1664 assoc_array_ptr_to_shortcut(new_parent);
1665
1666 pr_devel("excise preceding shortcut\n");
1667
1668 new_parent = new_s->back_pointer = s->back_pointer;
1669 slot = new_s->parent_slot = s->parent_slot;
1670 kfree(s);
1671 if (!new_parent) {
1672 new_s->back_pointer = NULL;
1673 new_s->parent_slot = 0;
1674 new_root = ptr;
1675 goto gc_complete;
1676 }
1677 }
1678
1679 new_s->back_pointer = new_parent;
1680 new_s->parent_slot = slot;
1681 new_n = assoc_array_ptr_to_node(new_parent);
1682 new_n->slots[slot] = ptr;
1683 goto ascend_old_tree;
1684 }
1685 }
1686
1687 /* Excise any shortcuts we might encounter that point to nodes that
1688 * only contain leaves.
1689 */
1690 ptr = new_n->back_pointer;
1691 if (!ptr)
1692 goto gc_complete;
1693
1694 if (assoc_array_ptr_is_shortcut(ptr)) {
1695 new_s = assoc_array_ptr_to_shortcut(ptr);
1696 new_parent = new_s->back_pointer;
1697 slot = new_s->parent_slot;
1698
1699 if (new_n->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT) {
1700 struct assoc_array_node *n;
1701
1702 pr_devel("excise shortcut\n");
1703 new_n->back_pointer = new_parent;
1704 new_n->parent_slot = slot;
1705 kfree(new_s);
1706 if (!new_parent) {
1707 new_root = assoc_array_node_to_ptr(new_n);
1708 goto gc_complete;
1709 }
1710
1711 n = assoc_array_ptr_to_node(new_parent);
1712 n->slots[slot] = assoc_array_node_to_ptr(new_n);
1713 }
1714 } else {
1715 new_parent = ptr;
1716 }
1717 new_n = assoc_array_ptr_to_node(new_parent);
1718
1719ascend_old_tree:
1720 ptr = node->back_pointer;
1721 if (assoc_array_ptr_is_shortcut(ptr)) {
1722 shortcut = assoc_array_ptr_to_shortcut(ptr);
1723 slot = shortcut->parent_slot;
1724 cursor = shortcut->back_pointer;
1725 } else {
1726 slot = node->parent_slot;
1727 cursor = ptr;
1728 }
1729 BUG_ON(!ptr);
1730 node = assoc_array_ptr_to_node(cursor);
1731 slot++;
1732 goto continue_node;
1733
1734gc_complete:
1735 edit->set[0].to = new_root;
1736 assoc_array_apply_edit(edit);
1737 edit->array->nr_leaves_on_tree = nr_leaves_on_tree;
1738 return 0;
1739
1740enomem:
1741 pr_devel("enomem\n");
1742 assoc_array_destroy_subtree(new_root, edit->ops);
1743 kfree(edit);
1744 return -ENOMEM;
1745}