aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid Howells <dhowells@redhat.com>2013-09-24 05:35:17 -0400
committerDavid Howells <dhowells@redhat.com>2013-09-24 05:35:17 -0400
commit3cb989501c2688cacbb7dc4b0d353faf838f53a1 (patch)
tree0639e17c2291cfd84c6db3a7850f55ffc8b284b4
parente57e8669f2ab8350d30f771dd2fdd5377f183db2 (diff)
Add a generic associative array implementation.
Add a generic associative array implementation that can be used as the container for keyrings, thereby massively increasing the capacity available whilst also speeding up searching in keyrings that contain a lot of keys. This may also be useful in FS-Cache for tracking cookies. Documentation is added into Documentation/associative_array.txt Some of the properties of the implementation are: (1) Objects are opaque pointers. The implementation does not care where they point (if anywhere) or what they point to (if anything). [!] NOTE: Pointers to objects _must_ be zero in the two least significant bits. (2) Objects do not need to contain linkage blocks for use by the array. This permits an object to be located in multiple arrays simultaneously. Rather, the array is made up of metadata blocks that point to objects. (3) Objects are labelled as being one of two types (the type is a bool value). This information is stored in the array, but has no consequence to the array itself or its algorithms. (4) Objects require index keys to locate them within the array. (5) Index keys must be unique. Inserting an object with the same key as one already in the array will replace the old object. (6) Index keys can be of any length and can be of different lengths. (7) Index keys should encode the length early on, before any variation due to length is seen. (8) Index keys can include a hash to scatter objects throughout the array. (9) The array can iterated over. The objects will not necessarily come out in key order. (10) The array can be iterated whilst it is being modified, provided the RCU readlock is being held by the iterator. Note, however, under these circumstances, some objects may be seen more than once. If this is a problem, the iterator should lock against modification. Objects will not be missed, however, unless deleted. (11) Objects in the array can be looked up by means of their index key. (12) Objects can be looked up whilst the array is being modified, provided the RCU readlock is being held by the thread doing the look up. The implementation uses a tree of 16-pointer nodes internally that are indexed on each level by nibbles from the index key. To improve memory efficiency, shortcuts can be emplaced to skip over what would otherwise be a series of single-occupancy nodes. Further, nodes pack leaf object pointers into spare space in the node rather than making an extra branch until as such time an object needs to be added to a full node. Signed-off-by: David Howells <dhowells@redhat.com>
-rw-r--r--Documentation/assoc_array.txt574
-rw-r--r--include/linux/assoc_array.h92
-rw-r--r--include/linux/assoc_array_priv.h182
-rw-r--r--lib/Kconfig14
-rw-r--r--lib/Makefile1
-rw-r--r--lib/assoc_array.c1745
6 files changed, 2608 insertions, 0 deletions
diff --git a/Documentation/assoc_array.txt b/Documentation/assoc_array.txt
new file mode 100644
index 000000000000..f4faec0f66e4
--- /dev/null
+++ b/Documentation/assoc_array.txt
@@ -0,0 +1,574 @@
1 ========================================
2 GENERIC ASSOCIATIVE ARRAY IMPLEMENTATION
3 ========================================
4
5Contents:
6
7 - Overview.
8
9 - The public API.
10 - Edit script.
11 - Operations table.
12 - Manipulation functions.
13 - Access functions.
14 - Index key form.
15
16 - Internal workings.
17 - Basic internal tree layout.
18 - Shortcuts.
19 - Splitting and collapsing nodes.
20 - Non-recursive iteration.
21 - Simultaneous alteration and iteration.
22
23
24========
25OVERVIEW
26========
27
28This associative array implementation is an object container with the following
29properties:
30
31 (1) Objects are opaque pointers. The implementation does not care where they
32 point (if anywhere) or what they point to (if anything).
33
34 [!] NOTE: Pointers to objects _must_ be zero in the least significant bit.
35
36 (2) Objects do not need to contain linkage blocks for use by the array. This
37 permits an object to be located in multiple arrays simultaneously.
38 Rather, the array is made up of metadata blocks that point to objects.
39
40 (3) Objects require index keys to locate them within the array.
41
42 (4) Index keys must be unique. Inserting an object with the same key as one
43 already in the array will replace the old object.
44
45 (5) Index keys can be of any length and can be of different lengths.
46
47 (6) Index keys should encode the length early on, before any variation due to
48 length is seen.
49
50 (7) Index keys can include a hash to scatter objects throughout the array.
51
52 (8) The array can iterated over. The objects will not necessarily come out in
53 key order.
54
55 (9) The array can be iterated over whilst it is being modified, provided the
56 RCU readlock is being held by the iterator. Note, however, under these
57 circumstances, some objects may be seen more than once. If this is a
58 problem, the iterator should lock against modification. Objects will not
59 be missed, however, unless deleted.
60
61(10) Objects in the array can be looked up by means of their index key.
62
63(11) Objects can be looked up whilst the array is being modified, provided the
64 RCU readlock is being held by the thread doing the look up.
65
66The implementation uses a tree of 16-pointer nodes internally that are indexed
67on each level by nibbles from the index key in the same manner as in a radix
68tree. To improve memory efficiency, shortcuts can be emplaced to skip over
69what would otherwise be a series of single-occupancy nodes. Further, nodes
70pack leaf object pointers into spare space in the node rather than making an
71extra branch until as such time an object needs to be added to a full node.
72
73
74==============
75THE PUBLIC API
76==============
77
78The public API can be found in <linux/assoc_array.h>. The associative array is
79rooted on the following structure:
80
81 struct assoc_array {
82 ...
83 };
84
85The code is selected by enabling CONFIG_ASSOCIATIVE_ARRAY.
86
87
88EDIT SCRIPT
89-----------
90
91The insertion and deletion functions produce an 'edit script' that can later be
92applied to effect the changes without risking ENOMEM. This retains the
93preallocated metadata blocks that will be installed in the internal tree and
94keeps track of the metadata blocks that will be removed from the tree when the
95script is applied.
96
97This is also used to keep track of dead blocks and dead objects after the
98script has been applied so that they can be freed later. The freeing is done
99after an RCU grace period has passed - thus allowing access functions to
100proceed under the RCU read lock.
101
102The script appears as outside of the API as a pointer of the type:
103
104 struct assoc_array_edit;
105
106There are two functions for dealing with the script:
107
108 (1) Apply an edit script.
109
110 void assoc_array_apply_edit(struct assoc_array_edit *edit);
111
112 This will perform the edit functions, interpolating various write barriers
113 to permit accesses under the RCU read lock to continue. The edit script
114 will then be passed to call_rcu() to free it and any dead stuff it points
115 to.
116
117 (2) Cancel an edit script.
118
119 void assoc_array_cancel_edit(struct assoc_array_edit *edit);
120
121 This frees the edit script and all preallocated memory immediately. If
122 this was for insertion, the new object is _not_ released by this function,
123 but must rather be released by the caller.
124
125These functions are guaranteed not to fail.
126
127
128OPERATIONS TABLE
129----------------
130
131Various functions take a table of operations:
132
133 struct assoc_array_ops {
134 ...
135 };
136
137This points to a number of methods, all of which need to be provided:
138
139 (1) Get a chunk of index key from caller data:
140
141 unsigned long (*get_key_chunk)(const void *index_key, int level);
142
143 This should return a chunk of caller-supplied index key starting at the
144 *bit* position given by the level argument. The level argument will be a
145 multiple of ASSOC_ARRAY_KEY_CHUNK_SIZE and the function should return
146 ASSOC_ARRAY_KEY_CHUNK_SIZE bits. No error is possible.
147
148
149 (2) Get a chunk of an object's index key.
150
151 unsigned long (*get_object_key_chunk)(const void *object, int level);
152
153 As the previous function, but gets its data from an object in the array
154 rather than from a caller-supplied index key.
155
156
157 (3) See if this is the object we're looking for.
158
159 bool (*compare_object)(const void *object, const void *index_key);
160
161 Compare the object against an index key and return true if it matches and
162 false if it doesn't.
163
164
165 (4) Diff the index keys of two objects.
166
167 int (*diff_objects)(const void *a, const void *b);
168
169 Return the bit position at which the index keys of two objects differ or
170 -1 if they are the same.
171
172
173 (5) Free an object.
174
175 void (*free_object)(void *object);
176
177 Free the specified object. Note that this may be called an RCU grace
178 period after assoc_array_apply_edit() was called, so synchronize_rcu() may
179 be necessary on module unloading.
180
181
182MANIPULATION FUNCTIONS
183----------------------
184
185There are a number of functions for manipulating an associative array:
186
187 (1) Initialise an associative array.
188
189 void assoc_array_init(struct assoc_array *array);
190
191 This initialises the base structure for an associative array. It can't
192 fail.
193
194
195 (2) Insert/replace an object in an associative array.
196
197 struct assoc_array_edit *
198 assoc_array_insert(struct assoc_array *array,
199 const struct assoc_array_ops *ops,
200 const void *index_key,
201 void *object);
202
203 This inserts the given object into the array. Note that the least
204 significant bit of the pointer must be zero as it's used to type-mark
205 pointers internally.
206
207 If an object already exists for that key then it will be replaced with the
208 new object and the old one will be freed automatically.
209
210 The index_key argument should hold index key information and is
211 passed to the methods in the ops table when they are called.
212
213 This function makes no alteration to the array itself, but rather returns
214 an edit script that must be applied. -ENOMEM is returned in the case of
215 an out-of-memory error.
216
217 The caller should lock exclusively against other modifiers of the array.
218
219
220 (3) Delete an object from an associative array.
221
222 struct assoc_array_edit *
223 assoc_array_delete(struct assoc_array *array,
224 const struct assoc_array_ops *ops,
225 const void *index_key);
226
227 This deletes an object that matches the specified data from the array.
228
229 The index_key argument should hold index key information and is
230 passed to the methods in the ops table when they are called.
231
232 This function makes no alteration to the array itself, but rather returns
233 an edit script that must be applied. -ENOMEM is returned in the case of
234 an out-of-memory error. NULL will be returned if the specified object is
235 not found within the array.
236
237 The caller should lock exclusively against other modifiers of the array.
238
239
240 (4) Delete all objects from an associative array.
241
242 struct assoc_array_edit *
243 assoc_array_clear(struct assoc_array *array,
244 const struct assoc_array_ops *ops);
245
246 This deletes all the objects from an associative array and leaves it
247 completely empty.
248
249 This function makes no alteration to the array itself, but rather returns
250 an edit script that must be applied. -ENOMEM is returned in the case of
251 an out-of-memory error.
252
253 The caller should lock exclusively against other modifiers of the array.
254
255
256 (5) Destroy an associative array, deleting all objects.
257
258 void assoc_array_destroy(struct assoc_array *array,
259 const struct assoc_array_ops *ops);
260
261 This destroys the contents of the associative array and leaves it
262 completely empty. It is not permitted for another thread to be traversing
263 the array under the RCU read lock at the same time as this function is
264 destroying it as no RCU deferral is performed on memory release -
265 something that would require memory to be allocated.
266
267 The caller should lock exclusively against other modifiers and accessors
268 of the array.
269
270
271 (6) Garbage collect an associative array.
272
273 int assoc_array_gc(struct assoc_array *array,
274 const struct assoc_array_ops *ops,
275 bool (*iterator)(void *object, void *iterator_data),
276 void *iterator_data);
277
278 This iterates over the objects in an associative array and passes each one
279 to iterator(). If iterator() returns true, the object is kept. If it
280 returns false, the object will be freed. If the iterator() function
281 returns true, it must perform any appropriate refcount incrementing on the
282 object before returning.
283
284 The internal tree will be packed down if possible as part of the iteration
285 to reduce the number of nodes in it.
286
287 The iterator_data is passed directly to iterator() and is otherwise
288 ignored by the function.
289
290 The function will return 0 if successful and -ENOMEM if there wasn't
291 enough memory.
292
293 It is possible for other threads to iterate over or search the array under
294 the RCU read lock whilst this function is in progress. The caller should
295 lock exclusively against other modifiers of the array.
296
297
298ACCESS FUNCTIONS
299----------------
300
301There are two functions for accessing an associative array:
302
303 (1) Iterate over all the objects in an associative array.
304
305 int assoc_array_iterate(const struct assoc_array *array,
306 int (*iterator)(const void *object,
307 void *iterator_data),
308 void *iterator_data);
309
310 This passes each object in the array to the iterator callback function.
311 iterator_data is private data for that function.
312
313 This may be used on an array at the same time as the array is being
314 modified, provided the RCU read lock is held. Under such circumstances,
315 it is possible for the iteration function to see some objects twice. If
316 this is a problem, then modification should be locked against. The
317 iteration algorithm should not, however, miss any objects.
318
319 The function will return 0 if no objects were in the array or else it will
320 return the result of the last iterator function called. Iteration stops
321 immediately if any call to the iteration function results in a non-zero
322 return.
323
324
325 (2) Find an object in an associative array.
326
327 void *assoc_array_find(const struct assoc_array *array,
328 const struct assoc_array_ops *ops,
329 const void *index_key);
330
331 This walks through the array's internal tree directly to the object
332 specified by the index key..
333
334 This may be used on an array at the same time as the array is being
335 modified, provided the RCU read lock is held.
336
337 The function will return the object if found (and set *_type to the object
338 type) or will return NULL if the object was not found.
339
340
341INDEX KEY FORM
342--------------
343
344The index key can be of any form, but since the algorithms aren't told how long
345the key is, it is strongly recommended that the index key includes its length
346very early on before any variation due to the length would have an effect on
347comparisons.
348
349This will cause leaves with different length keys to scatter away from each
350other - and those with the same length keys to cluster together.
351
352It is also recommended that the index key begin with a hash of the rest of the
353key to maximise scattering throughout keyspace.
354
355The better the scattering, the wider and lower the internal tree will be.
356
357Poor scattering isn't too much of a problem as there are shortcuts and nodes
358can contain mixtures of leaves and metadata pointers.
359
360The index key is read in chunks of machine word. Each chunk is subdivided into
361one nibble (4 bits) per level, so on a 32-bit CPU this is good for 8 levels and
362on a 64-bit CPU, 16 levels. Unless the scattering is really poor, it is
363unlikely that more than one word of any particular index key will have to be
364used.
365
366
367=================
368INTERNAL WORKINGS
369=================
370
371The associative array data structure has an internal tree. This tree is
372constructed of two types of metadata blocks: nodes and shortcuts.
373
374A node is an array of slots. Each slot can contain one of four things:
375
376 (*) A NULL pointer, indicating that the slot is empty.
377
378 (*) A pointer to an object (a leaf).
379
380 (*) A pointer to a node at the next level.
381
382 (*) A pointer to a shortcut.
383
384
385BASIC INTERNAL TREE LAYOUT
386--------------------------
387
388Ignoring shortcuts for the moment, the nodes form a multilevel tree. The index
389key space is strictly subdivided by the nodes in the tree and nodes occur on
390fixed levels. For example:
391
392 Level: 0 1 2 3
393 =============== =============== =============== ===============
394 NODE D
395 NODE B NODE C +------>+---+
396 +------>+---+ +------>+---+ | | 0 |
397 NODE A | | 0 | | | 0 | | +---+
398 +---+ | +---+ | +---+ | : :
399 | 0 | | : : | : : | +---+
400 +---+ | +---+ | +---+ | | f |
401 | 1 |---+ | 3 |---+ | 7 |---+ +---+
402 +---+ +---+ +---+
403 : : : : | 8 |---+
404 +---+ +---+ +---+ | NODE E
405 | e |---+ | f | : : +------>+---+
406 +---+ | +---+ +---+ | 0 |
407 | f | | | f | +---+
408 +---+ | +---+ : :
409 | NODE F +---+
410 +------>+---+ | f |
411 | 0 | NODE G +---+
412 +---+ +------>+---+
413 : : | | 0 |
414 +---+ | +---+
415 | 6 |---+ : :
416 +---+ +---+
417 : : | f |
418 +---+ +---+
419 | f |
420 +---+
421
422In the above example, there are 7 nodes (A-G), each with 16 slots (0-f).
423Assuming no other meta data nodes in the tree, the key space is divided thusly:
424
425 KEY PREFIX NODE
426 ========== ====
427 137* D
428 138* E
429 13[0-69-f]* C
430 1[0-24-f]* B
431 e6* G
432 e[0-57-f]* F
433 [02-df]* A
434
435So, for instance, keys with the following example index keys will be found in
436the appropriate nodes:
437
438 INDEX KEY PREFIX NODE
439 =============== ======= ====
440 13694892892489 13 C
441 13795289025897 137 D
442 13889dde88793 138 E
443 138bbb89003093 138 E
444 1394879524789 12 C
445 1458952489 1 B
446 9431809de993ba - A
447 b4542910809cd - A
448 e5284310def98 e F
449 e68428974237 e6 G
450 e7fffcbd443 e F
451 f3842239082 - A
452
453To save memory, if a node can hold all the leaves in its portion of keyspace,
454then the node will have all those leaves in it and will not have any metadata
455pointers - even if some of those leaves would like to be in the same slot.
456
457A node can contain a heterogeneous mix of leaves and metadata pointers.
458Metadata pointers must be in the slots that match their subdivisions of key
459space. The leaves can be in any slot not occupied by a metadata pointer. It
460is guaranteed that none of the leaves in a node will match a slot occupied by a
461metadata pointer. If the metadata pointer is there, any leaf whose key matches
462the metadata key prefix must be in the subtree that the metadata pointer points
463to.
464
465In the above example list of index keys, node A will contain:
466
467 SLOT CONTENT INDEX KEY (PREFIX)
468 ==== =============== ==================
469 1 PTR TO NODE B 1*
470 any LEAF 9431809de993ba
471 any LEAF b4542910809cd
472 e PTR TO NODE F e*
473 any LEAF f3842239082
474
475and node B:
476
477 3 PTR TO NODE C 13*
478 any LEAF 1458952489
479
480
481SHORTCUTS
482---------
483
484Shortcuts are metadata records that jump over a piece of keyspace. A shortcut
485is a replacement for a series of single-occupancy nodes ascending through the
486levels. Shortcuts exist to save memory and to speed up traversal.
487
488It is possible for the root of the tree to be a shortcut - say, for example,
489the tree contains at least 17 nodes all with key prefix '1111'. The insertion
490algorithm will insert a shortcut to skip over the '1111' keyspace in a single
491bound and get to the fourth level where these actually become different.
492
493
494SPLITTING AND COLLAPSING NODES
495------------------------------
496
497Each node has a maximum capacity of 16 leaves and metadata pointers. If the
498insertion algorithm finds that it is trying to insert a 17th object into a
499node, that node will be split such that at least two leaves that have a common
500key segment at that level end up in a separate node rooted on that slot for
501that common key segment.
502
503If the leaves in a full node and the leaf that is being inserted are
504sufficiently similar, then a shortcut will be inserted into the tree.
505
506When the number of objects in the subtree rooted at a node falls to 16 or
507fewer, then the subtree will be collapsed down to a single node - and this will
508ripple towards the root if possible.
509
510
511NON-RECURSIVE ITERATION
512-----------------------
513
514Each node and shortcut contains a back pointer to its parent and the number of
515slot in that parent that points to it. None-recursive iteration uses these to
516proceed rootwards through the tree, going to the parent node, slot N + 1 to
517make sure progress is made without the need for a stack.
518
519The backpointers, however, make simultaneous alteration and iteration tricky.
520
521
522SIMULTANEOUS ALTERATION AND ITERATION
523-------------------------------------
524
525There are a number of cases to consider:
526
527 (1) Simple insert/replace. This involves simply replacing a NULL or old
528 matching leaf pointer with the pointer to the new leaf after a barrier.
529 The metadata blocks don't change otherwise. An old leaf won't be freed
530 until after the RCU grace period.
531
532 (2) Simple delete. This involves just clearing an old matching leaf. The
533 metadata blocks don't change otherwise. The old leaf won't be freed until
534 after the RCU grace period.
535
536 (3) Insertion replacing part of a subtree that we haven't yet entered. This
537 may involve replacement of part of that subtree - but that won't affect
538 the iteration as we won't have reached the pointer to it yet and the
539 ancestry blocks are not replaced (the layout of those does not change).
540
541 (4) Insertion replacing nodes that we're actively processing. This isn't a
542 problem as we've passed the anchoring pointer and won't switch onto the
543 new layout until we follow the back pointers - at which point we've
544 already examined the leaves in the replaced node (we iterate over all the
545 leaves in a node before following any of its metadata pointers).
546
547 We might, however, re-see some leaves that have been split out into a new
548 branch that's in a slot further along than we were at.
549
550 (5) Insertion replacing nodes that we're processing a dependent branch of.
551 This won't affect us until we follow the back pointers. Similar to (4).
552
553 (6) Deletion collapsing a branch under us. This doesn't affect us because the
554 back pointers will get us back to the parent of the new node before we
555 could see the new node. The entire collapsed subtree is thrown away
556 unchanged - and will still be rooted on the same slot, so we shouldn't
557 process it a second time as we'll go back to slot + 1.
558
559Note:
560
561 (*) Under some circumstances, we need to simultaneously change the parent
562 pointer and the parent slot pointer on a node (say, for example, we
563 inserted another node before it and moved it up a level). We cannot do
564 this without locking against a read - so we have to replace that node too.
565
566 However, when we're changing a shortcut into a node this isn't a problem
567 as shortcuts only have one slot and so the parent slot number isn't used
568 when traversing backwards over one. This means that it's okay to change
569 the slot number first - provided suitable barriers are used to make sure
570 the parent slot number is read after the back pointer.
571
572Obsolete blocks and leaves are freed up after an RCU grace period has passed,
573so as long as anyone doing walking or iteration holds the RCU read lock, the
574old superstructure should not go away on them.
diff --git a/include/linux/assoc_array.h b/include/linux/assoc_array.h
new file mode 100644
index 000000000000..9a193b84238a
--- /dev/null
+++ b/include/linux/assoc_array.h
@@ -0,0 +1,92 @@
1/* Generic associative array implementation.
2 *
3 * See Documentation/assoc_array.txt for information.
4 *
5 * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved.
6 * Written by David Howells (dhowells@redhat.com)
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public Licence
10 * as published by the Free Software Foundation; either version
11 * 2 of the Licence, or (at your option) any later version.
12 */
13
14#ifndef _LINUX_ASSOC_ARRAY_H
15#define _LINUX_ASSOC_ARRAY_H
16
17#ifdef CONFIG_ASSOCIATIVE_ARRAY
18
19#include <linux/types.h>
20
21#define ASSOC_ARRAY_KEY_CHUNK_SIZE BITS_PER_LONG /* Key data retrieved in chunks of this size */
22
23/*
24 * Generic associative array.
25 */
26struct assoc_array {
27 struct assoc_array_ptr *root; /* The node at the root of the tree */
28 unsigned long nr_leaves_on_tree;
29};
30
31/*
32 * Operations on objects and index keys for use by array manipulation routines.
33 */
34struct assoc_array_ops {
35 /* Method to get a chunk of an index key from caller-supplied data */
36 unsigned long (*get_key_chunk)(const void *index_key, int level);
37
38 /* Method to get a piece of an object's index key */
39 unsigned long (*get_object_key_chunk)(const void *object, int level);
40
41 /* Is this the object we're looking for? */
42 bool (*compare_object)(const void *object, const void *index_key);
43
44 /* How different are two objects, to a bit position in their keys? (or
45 * -1 if they're the same)
46 */
47 int (*diff_objects)(const void *a, const void *b);
48
49 /* Method to free an object. */
50 void (*free_object)(void *object);
51};
52
53/*
54 * Access and manipulation functions.
55 */
56struct assoc_array_edit;
57
58static inline void assoc_array_init(struct assoc_array *array)
59{
60 array->root = NULL;
61 array->nr_leaves_on_tree = 0;
62}
63
64extern int assoc_array_iterate(const struct assoc_array *array,
65 int (*iterator)(const void *object,
66 void *iterator_data),
67 void *iterator_data);
68extern void *assoc_array_find(const struct assoc_array *array,
69 const struct assoc_array_ops *ops,
70 const void *index_key);
71extern void assoc_array_destroy(struct assoc_array *array,
72 const struct assoc_array_ops *ops);
73extern struct assoc_array_edit *assoc_array_insert(struct assoc_array *array,
74 const struct assoc_array_ops *ops,
75 const void *index_key,
76 void *object);
77extern void assoc_array_insert_set_object(struct assoc_array_edit *edit,
78 void *object);
79extern struct assoc_array_edit *assoc_array_delete(struct assoc_array *array,
80 const struct assoc_array_ops *ops,
81 const void *index_key);
82extern struct assoc_array_edit *assoc_array_clear(struct assoc_array *array,
83 const struct assoc_array_ops *ops);
84extern void assoc_array_apply_edit(struct assoc_array_edit *edit);
85extern void assoc_array_cancel_edit(struct assoc_array_edit *edit);
86extern int assoc_array_gc(struct assoc_array *array,
87 const struct assoc_array_ops *ops,
88 bool (*iterator)(void *object, void *iterator_data),
89 void *iterator_data);
90
91#endif /* CONFIG_ASSOCIATIVE_ARRAY */
92#endif /* _LINUX_ASSOC_ARRAY_H */
diff --git a/include/linux/assoc_array_priv.h b/include/linux/assoc_array_priv.h
new file mode 100644
index 000000000000..711275e6681c
--- /dev/null
+++ b/include/linux/assoc_array_priv.h
@@ -0,0 +1,182 @@
1/* Private definitions for the generic associative array implementation.
2 *
3 * See Documentation/assoc_array.txt for information.
4 *
5 * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved.
6 * Written by David Howells (dhowells@redhat.com)
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public Licence
10 * as published by the Free Software Foundation; either version
11 * 2 of the Licence, or (at your option) any later version.
12 */
13
14#ifndef _LINUX_ASSOC_ARRAY_PRIV_H
15#define _LINUX_ASSOC_ARRAY_PRIV_H
16
17#ifdef CONFIG_ASSOCIATIVE_ARRAY
18
19#include <linux/assoc_array.h>
20
21#define ASSOC_ARRAY_FAN_OUT 16 /* Number of slots per node */
22#define ASSOC_ARRAY_FAN_MASK (ASSOC_ARRAY_FAN_OUT - 1)
23#define ASSOC_ARRAY_LEVEL_STEP (ilog2(ASSOC_ARRAY_FAN_OUT))
24#define ASSOC_ARRAY_LEVEL_STEP_MASK (ASSOC_ARRAY_LEVEL_STEP - 1)
25#define ASSOC_ARRAY_KEY_CHUNK_MASK (ASSOC_ARRAY_KEY_CHUNK_SIZE - 1)
26#define ASSOC_ARRAY_KEY_CHUNK_SHIFT (ilog2(BITS_PER_LONG))
27
28/*
29 * Undefined type representing a pointer with type information in the bottom
30 * two bits.
31 */
32struct assoc_array_ptr;
33
34/*
35 * An N-way node in the tree.
36 *
37 * Each slot contains one of four things:
38 *
39 * (1) Nothing (NULL).
40 *
41 * (2) A leaf object (pointer types 0).
42 *
43 * (3) A next-level node (pointer type 1, subtype 0).
44 *
45 * (4) A shortcut (pointer type 1, subtype 1).
46 *
47 * The tree is optimised for search-by-ID, but permits reasonable iteration
48 * also.
49 *
50 * The tree is navigated by constructing an index key consisting of an array of
51 * segments, where each segment is ilog2(ASSOC_ARRAY_FAN_OUT) bits in size.
52 *
53 * The segments correspond to levels of the tree (the first segment is used at
54 * level 0, the second at level 1, etc.).
55 */
56struct assoc_array_node {
57 struct assoc_array_ptr *back_pointer;
58 u8 parent_slot;
59 struct assoc_array_ptr *slots[ASSOC_ARRAY_FAN_OUT];
60 unsigned long nr_leaves_on_branch;
61};
62
63/*
64 * A shortcut through the index space out to where a collection of nodes/leaves
65 * with the same IDs live.
66 */
67struct assoc_array_shortcut {
68 struct assoc_array_ptr *back_pointer;
69 int parent_slot;
70 int skip_to_level;
71 struct assoc_array_ptr *next_node;
72 unsigned long index_key[];
73};
74
75/*
76 * Preallocation cache.
77 */
78struct assoc_array_edit {
79 struct rcu_head rcu;
80 struct assoc_array *array;
81 const struct assoc_array_ops *ops;
82 const struct assoc_array_ops *ops_for_excised_subtree;
83 struct assoc_array_ptr *leaf;
84 struct assoc_array_ptr **leaf_p;
85 struct assoc_array_ptr *dead_leaf;
86 struct assoc_array_ptr *new_meta[3];
87 struct assoc_array_ptr *excised_meta[1];
88 struct assoc_array_ptr *excised_subtree;
89 struct assoc_array_ptr **set_backpointers[ASSOC_ARRAY_FAN_OUT];
90 struct assoc_array_ptr *set_backpointers_to;
91 struct assoc_array_node *adjust_count_on;
92 long adjust_count_by;
93 struct {
94 struct assoc_array_ptr **ptr;
95 struct assoc_array_ptr *to;
96 } set[2];
97 struct {
98 u8 *p;
99 u8 to;
100 } set_parent_slot[1];
101 u8 segment_cache[ASSOC_ARRAY_FAN_OUT + 1];
102};
103
104/*
105 * Internal tree member pointers are marked in the bottom one or two bits to
106 * indicate what type they are so that we don't have to look behind every
107 * pointer to see what it points to.
108 *
109 * We provide functions to test type annotations and to create and translate
110 * the annotated pointers.
111 */
112#define ASSOC_ARRAY_PTR_TYPE_MASK 0x1UL
113#define ASSOC_ARRAY_PTR_LEAF_TYPE 0x0UL /* Points to leaf (or nowhere) */
114#define ASSOC_ARRAY_PTR_META_TYPE 0x1UL /* Points to node or shortcut */
115#define ASSOC_ARRAY_PTR_SUBTYPE_MASK 0x2UL
116#define ASSOC_ARRAY_PTR_NODE_SUBTYPE 0x0UL
117#define ASSOC_ARRAY_PTR_SHORTCUT_SUBTYPE 0x2UL
118
119static inline bool assoc_array_ptr_is_meta(const struct assoc_array_ptr *x)
120{
121 return (unsigned long)x & ASSOC_ARRAY_PTR_TYPE_MASK;
122}
123static inline bool assoc_array_ptr_is_leaf(const struct assoc_array_ptr *x)
124{
125 return !assoc_array_ptr_is_meta(x);
126}
127static inline bool assoc_array_ptr_is_shortcut(const struct assoc_array_ptr *x)
128{
129 return (unsigned long)x & ASSOC_ARRAY_PTR_SUBTYPE_MASK;
130}
131static inline bool assoc_array_ptr_is_node(const struct assoc_array_ptr *x)
132{
133 return !assoc_array_ptr_is_shortcut(x);
134}
135
136static inline void *assoc_array_ptr_to_leaf(const struct assoc_array_ptr *x)
137{
138 return (void *)((unsigned long)x & ~ASSOC_ARRAY_PTR_TYPE_MASK);
139}
140
141static inline
142unsigned long __assoc_array_ptr_to_meta(const struct assoc_array_ptr *x)
143{
144 return (unsigned long)x &
145 ~(ASSOC_ARRAY_PTR_SUBTYPE_MASK | ASSOC_ARRAY_PTR_TYPE_MASK);
146}
147static inline
148struct assoc_array_node *assoc_array_ptr_to_node(const struct assoc_array_ptr *x)
149{
150 return (struct assoc_array_node *)__assoc_array_ptr_to_meta(x);
151}
152static inline
153struct assoc_array_shortcut *assoc_array_ptr_to_shortcut(const struct assoc_array_ptr *x)
154{
155 return (struct assoc_array_shortcut *)__assoc_array_ptr_to_meta(x);
156}
157
158static inline
159struct assoc_array_ptr *__assoc_array_x_to_ptr(const void *p, unsigned long t)
160{
161 return (struct assoc_array_ptr *)((unsigned long)p | t);
162}
163static inline
164struct assoc_array_ptr *assoc_array_leaf_to_ptr(const void *p)
165{
166 return __assoc_array_x_to_ptr(p, ASSOC_ARRAY_PTR_LEAF_TYPE);
167}
168static inline
169struct assoc_array_ptr *assoc_array_node_to_ptr(const struct assoc_array_node *p)
170{
171 return __assoc_array_x_to_ptr(
172 p, ASSOC_ARRAY_PTR_META_TYPE | ASSOC_ARRAY_PTR_NODE_SUBTYPE);
173}
174static inline
175struct assoc_array_ptr *assoc_array_shortcut_to_ptr(const struct assoc_array_shortcut *p)
176{
177 return __assoc_array_x_to_ptr(
178 p, ASSOC_ARRAY_PTR_META_TYPE | ASSOC_ARRAY_PTR_SHORTCUT_SUBTYPE);
179}
180
181#endif /* CONFIG_ASSOCIATIVE_ARRAY */
182#endif /* _LINUX_ASSOC_ARRAY_PRIV_H */
diff --git a/lib/Kconfig b/lib/Kconfig
index b3c8be0da17f..3cb879b1f282 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -322,6 +322,20 @@ config TEXTSEARCH_FSM
322config BTREE 322config BTREE
323 boolean 323 boolean
324 324
325config ASSOCIATIVE_ARRAY
326 bool
327 help
328 Generic associative array. Can be searched and iterated over whilst
329 it is being modified. It is also reasonably quick to search and
330 modify. The algorithms are non-recursive, and the trees are highly
331 capacious.
332
333 See:
334
335 Documentation/assoc_array.txt
336
337 for more information.
338
325config HAS_IOMEM 339config HAS_IOMEM
326 boolean 340 boolean
327 depends on !NO_IOMEM 341 depends on !NO_IOMEM
diff --git a/lib/Makefile b/lib/Makefile
index f3bb2cb98adf..1e806477e472 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -51,6 +51,7 @@ CFLAGS_hweight.o = $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS))
51obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o 51obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o
52 52
53obj-$(CONFIG_BTREE) += btree.o 53obj-$(CONFIG_BTREE) += btree.o
54obj-$(CONFIG_ASSOCIATIVE_ARRAY) += assoc_array.o
54obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o 55obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o
55obj-$(CONFIG_DEBUG_LIST) += list_debug.o 56obj-$(CONFIG_DEBUG_LIST) += list_debug.o
56obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o 57obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o
diff --git a/lib/assoc_array.c b/lib/assoc_array.c
new file mode 100644
index 000000000000..a0952818f938
--- /dev/null
+++ b/lib/assoc_array.c
@@ -0,0 +1,1745 @@
1/* Generic associative array implementation.
2 *
3 * See Documentation/assoc_array.txt for information.
4 *
5 * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved.
6 * Written by David Howells (dhowells@redhat.com)
7 *
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU General Public Licence
10 * as published by the Free Software Foundation; either version
11 * 2 of the Licence, or (at your option) any later version.
12 */
13//#define DEBUG
14#include <linux/slab.h>
15#include <linux/assoc_array_priv.h>
16
17/*
18 * Iterate over an associative array. The caller must hold the RCU read lock
19 * or better.
20 */
21static int assoc_array_subtree_iterate(const struct assoc_array_ptr *root,
22 const struct assoc_array_ptr *stop,
23 int (*iterator)(const void *leaf,
24 void *iterator_data),
25 void *iterator_data)
26{
27 const struct assoc_array_shortcut *shortcut;
28 const struct assoc_array_node *node;
29 const struct assoc_array_ptr *cursor, *ptr, *parent;
30 unsigned long has_meta;
31 int slot, ret;
32
33 cursor = root;
34
35begin_node:
36 if (assoc_array_ptr_is_shortcut(cursor)) {
37 /* Descend through a shortcut */
38 shortcut = assoc_array_ptr_to_shortcut(cursor);
39 smp_read_barrier_depends();
40 cursor = ACCESS_ONCE(shortcut->next_node);
41 }
42
43 node = assoc_array_ptr_to_node(cursor);
44 smp_read_barrier_depends();
45 slot = 0;
46
47 /* We perform two passes of each node.
48 *
49 * The first pass does all the leaves in this node. This means we
50 * don't miss any leaves if the node is split up by insertion whilst
51 * we're iterating over the branches rooted here (we may, however, see
52 * some leaves twice).
53 */
54 has_meta = 0;
55 for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
56 ptr = ACCESS_ONCE(node->slots[slot]);
57 has_meta |= (unsigned long)ptr;
58 if (ptr && assoc_array_ptr_is_leaf(ptr)) {
59 /* We need a barrier between the read of the pointer
60 * and dereferencing the pointer - but only if we are
61 * actually going to dereference it.
62 */
63 smp_read_barrier_depends();
64
65 /* Invoke the callback */
66 ret = iterator(assoc_array_ptr_to_leaf(ptr),
67 iterator_data);
68 if (ret)
69 return ret;
70 }
71 }
72
73 /* The second pass attends to all the metadata pointers. If we follow
74 * one of these we may find that we don't come back here, but rather go
75 * back to a replacement node with the leaves in a different layout.
76 *
77 * We are guaranteed to make progress, however, as the slot number for
78 * a particular portion of the key space cannot change - and we
79 * continue at the back pointer + 1.
80 */
81 if (!(has_meta & ASSOC_ARRAY_PTR_META_TYPE))
82 goto finished_node;
83 slot = 0;
84
85continue_node:
86 node = assoc_array_ptr_to_node(cursor);
87 smp_read_barrier_depends();
88
89 for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
90 ptr = ACCESS_ONCE(node->slots[slot]);
91 if (assoc_array_ptr_is_meta(ptr)) {
92 cursor = ptr;
93 goto begin_node;
94 }
95 }
96
97finished_node:
98 /* Move up to the parent (may need to skip back over a shortcut) */
99 parent = ACCESS_ONCE(node->back_pointer);
100 slot = node->parent_slot;
101 if (parent == stop)
102 return 0;
103
104 if (assoc_array_ptr_is_shortcut(parent)) {
105 shortcut = assoc_array_ptr_to_shortcut(parent);
106 smp_read_barrier_depends();
107 cursor = parent;
108 parent = ACCESS_ONCE(shortcut->back_pointer);
109 slot = shortcut->parent_slot;
110 if (parent == stop)
111 return 0;
112 }
113
114 /* Ascend to next slot in parent node */
115 cursor = parent;
116 slot++;
117 goto continue_node;
118}
119
120/**
121 * assoc_array_iterate - Pass all objects in the array to a callback
122 * @array: The array to iterate over.
123 * @iterator: The callback function.
124 * @iterator_data: Private data for the callback function.
125 *
126 * Iterate over all the objects in an associative array. Each one will be
127 * presented to the iterator function.
128 *
129 * If the array is being modified concurrently with the iteration then it is
130 * possible that some objects in the array will be passed to the iterator
131 * callback more than once - though every object should be passed at least
132 * once. If this is undesirable then the caller must lock against modification
133 * for the duration of this function.
134 *
135 * The function will return 0 if no objects were in the array or else it will
136 * return the result of the last iterator function called. Iteration stops
137 * immediately if any call to the iteration function results in a non-zero
138 * return.
139 *
140 * The caller should hold the RCU read lock or better if concurrent
141 * modification is possible.
142 */
143int assoc_array_iterate(const struct assoc_array *array,
144 int (*iterator)(const void *object,
145 void *iterator_data),
146 void *iterator_data)
147{
148 struct assoc_array_ptr *root = ACCESS_ONCE(array->root);
149
150 if (!root)
151 return 0;
152 return assoc_array_subtree_iterate(root, NULL, iterator, iterator_data);
153}
154
155enum assoc_array_walk_status {
156 assoc_array_walk_tree_empty,
157 assoc_array_walk_found_terminal_node,
158 assoc_array_walk_found_wrong_shortcut,
159} status;
160
161struct assoc_array_walk_result {
162 struct {
163 struct assoc_array_node *node; /* Node in which leaf might be found */
164 int level;
165 int slot;
166 } terminal_node;
167 struct {
168 struct assoc_array_shortcut *shortcut;
169 int level;
170 int sc_level;
171 unsigned long sc_segments;
172 unsigned long dissimilarity;
173 } wrong_shortcut;
174};
175
176/*
177 * Navigate through the internal tree looking for the closest node to the key.
178 */
179static enum assoc_array_walk_status
180assoc_array_walk(const struct assoc_array *array,
181 const struct assoc_array_ops *ops,
182 const void *index_key,
183 struct assoc_array_walk_result *result)
184{
185 struct assoc_array_shortcut *shortcut;
186 struct assoc_array_node *node;
187 struct assoc_array_ptr *cursor, *ptr;
188 unsigned long sc_segments, dissimilarity;
189 unsigned long segments;
190 int level, sc_level, next_sc_level;
191 int slot;
192
193 pr_devel("-->%s()\n", __func__);
194
195 cursor = ACCESS_ONCE(array->root);
196 if (!cursor)
197 return assoc_array_walk_tree_empty;
198
199 level = 0;
200
201 /* Use segments from the key for the new leaf to navigate through the
202 * internal tree, skipping through nodes and shortcuts that are on
203 * route to the destination. Eventually we'll come to a slot that is
204 * either empty or contains a leaf at which point we've found a node in
205 * which the leaf we're looking for might be found or into which it
206 * should be inserted.
207 */
208jumped:
209 segments = ops->get_key_chunk(index_key, level);
210 pr_devel("segments[%d]: %lx\n", level, segments);
211
212 if (assoc_array_ptr_is_shortcut(cursor))
213 goto follow_shortcut;
214
215consider_node:
216 node = assoc_array_ptr_to_node(cursor);
217 smp_read_barrier_depends();
218
219 slot = segments >> (level & ASSOC_ARRAY_KEY_CHUNK_MASK);
220 slot &= ASSOC_ARRAY_FAN_MASK;
221 ptr = ACCESS_ONCE(node->slots[slot]);
222
223 pr_devel("consider slot %x [ix=%d type=%lu]\n",
224 slot, level, (unsigned long)ptr & 3);
225
226 if (!assoc_array_ptr_is_meta(ptr)) {
227 /* The node doesn't have a node/shortcut pointer in the slot
228 * corresponding to the index key that we have to follow.
229 */
230 result->terminal_node.node = node;
231 result->terminal_node.level = level;
232 result->terminal_node.slot = slot;
233 pr_devel("<--%s() = terminal_node\n", __func__);
234 return assoc_array_walk_found_terminal_node;
235 }
236
237 if (assoc_array_ptr_is_node(ptr)) {
238 /* There is a pointer to a node in the slot corresponding to
239 * this index key segment, so we need to follow it.
240 */
241 cursor = ptr;
242 level += ASSOC_ARRAY_LEVEL_STEP;
243 if ((level & ASSOC_ARRAY_KEY_CHUNK_MASK) != 0)
244 goto consider_node;
245 goto jumped;
246 }
247
248 /* There is a shortcut in the slot corresponding to the index key
249 * segment. We follow the shortcut if its partial index key matches
250 * this leaf's. Otherwise we need to split the shortcut.
251 */
252 cursor = ptr;
253follow_shortcut:
254 shortcut = assoc_array_ptr_to_shortcut(cursor);
255 smp_read_barrier_depends();
256 pr_devel("shortcut to %d\n", shortcut->skip_to_level);
257 sc_level = level + ASSOC_ARRAY_LEVEL_STEP;
258 BUG_ON(sc_level > shortcut->skip_to_level);
259
260 do {
261 /* Check the leaf against the shortcut's index key a word at a
262 * time, trimming the final word (the shortcut stores the index
263 * key completely from the root to the shortcut's target).
264 */
265 if ((sc_level & ASSOC_ARRAY_KEY_CHUNK_MASK) == 0)
266 segments = ops->get_key_chunk(index_key, sc_level);
267
268 sc_segments = shortcut->index_key[sc_level >> ASSOC_ARRAY_KEY_CHUNK_SHIFT];
269 dissimilarity = segments ^ sc_segments;
270
271 if (round_up(sc_level, ASSOC_ARRAY_KEY_CHUNK_SIZE) > shortcut->skip_to_level) {
272 /* Trim segments that are beyond the shortcut */
273 int shift = shortcut->skip_to_level & ASSOC_ARRAY_KEY_CHUNK_MASK;
274 dissimilarity &= ~(ULONG_MAX << shift);
275 next_sc_level = shortcut->skip_to_level;
276 } else {
277 next_sc_level = sc_level + ASSOC_ARRAY_KEY_CHUNK_SIZE;
278 next_sc_level = round_down(next_sc_level, ASSOC_ARRAY_KEY_CHUNK_SIZE);
279 }
280
281 if (dissimilarity != 0) {
282 /* This shortcut points elsewhere */
283 result->wrong_shortcut.shortcut = shortcut;
284 result->wrong_shortcut.level = level;
285 result->wrong_shortcut.sc_level = sc_level;
286 result->wrong_shortcut.sc_segments = sc_segments;
287 result->wrong_shortcut.dissimilarity = dissimilarity;
288 return assoc_array_walk_found_wrong_shortcut;
289 }
290
291 sc_level = next_sc_level;
292 } while (sc_level < shortcut->skip_to_level);
293
294 /* The shortcut matches the leaf's index to this point. */
295 cursor = ACCESS_ONCE(shortcut->next_node);
296 if (((level ^ sc_level) & ~ASSOC_ARRAY_KEY_CHUNK_MASK) != 0) {
297 level = sc_level;
298 goto jumped;
299 } else {
300 level = sc_level;
301 goto consider_node;
302 }
303}
304
305/**
306 * assoc_array_find - Find an object by index key
307 * @array: The associative array to search.
308 * @ops: The operations to use.
309 * @index_key: The key to the object.
310 *
311 * Find an object in an associative array by walking through the internal tree
312 * to the node that should contain the object and then searching the leaves
313 * there. NULL is returned if the requested object was not found in the array.
314 *
315 * The caller must hold the RCU read lock or better.
316 */
317void *assoc_array_find(const struct assoc_array *array,
318 const struct assoc_array_ops *ops,
319 const void *index_key)
320{
321 struct assoc_array_walk_result result;
322 const struct assoc_array_node *node;
323 const struct assoc_array_ptr *ptr;
324 const void *leaf;
325 int slot;
326
327 if (assoc_array_walk(array, ops, index_key, &result) !=
328 assoc_array_walk_found_terminal_node)
329 return NULL;
330
331 node = result.terminal_node.node;
332 smp_read_barrier_depends();
333
334 /* If the target key is available to us, it's has to be pointed to by
335 * the terminal node.
336 */
337 for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
338 ptr = ACCESS_ONCE(node->slots[slot]);
339 if (ptr && assoc_array_ptr_is_leaf(ptr)) {
340 /* We need a barrier between the read of the pointer
341 * and dereferencing the pointer - but only if we are
342 * actually going to dereference it.
343 */
344 leaf = assoc_array_ptr_to_leaf(ptr);
345 smp_read_barrier_depends();
346 if (ops->compare_object(leaf, index_key))
347 return (void *)leaf;
348 }
349 }
350
351 return NULL;
352}
353
354/*
355 * Destructively iterate over an associative array. The caller must prevent
356 * other simultaneous accesses.
357 */
358static void assoc_array_destroy_subtree(struct assoc_array_ptr *root,
359 const struct assoc_array_ops *ops)
360{
361 struct assoc_array_shortcut *shortcut;
362 struct assoc_array_node *node;
363 struct assoc_array_ptr *cursor, *parent = NULL;
364 int slot = -1;
365
366 pr_devel("-->%s()\n", __func__);
367
368 cursor = root;
369 if (!cursor) {
370 pr_devel("empty\n");
371 return;
372 }
373
374move_to_meta:
375 if (assoc_array_ptr_is_shortcut(cursor)) {
376 /* Descend through a shortcut */
377 pr_devel("[%d] shortcut\n", slot);
378 BUG_ON(!assoc_array_ptr_is_shortcut(cursor));
379 shortcut = assoc_array_ptr_to_shortcut(cursor);
380 BUG_ON(shortcut->back_pointer != parent);
381 BUG_ON(slot != -1 && shortcut->parent_slot != slot);
382 parent = cursor;
383 cursor = shortcut->next_node;
384 slot = -1;
385 BUG_ON(!assoc_array_ptr_is_node(cursor));
386 }
387
388 pr_devel("[%d] node\n", slot);
389 node = assoc_array_ptr_to_node(cursor);
390 BUG_ON(node->back_pointer != parent);
391 BUG_ON(slot != -1 && node->parent_slot != slot);
392 slot = 0;
393
394continue_node:
395 pr_devel("Node %p [back=%p]\n", node, node->back_pointer);
396 for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
397 struct assoc_array_ptr *ptr = node->slots[slot];
398 if (!ptr)
399 continue;
400 if (assoc_array_ptr_is_meta(ptr)) {
401 parent = cursor;
402 cursor = ptr;
403 goto move_to_meta;
404 }
405
406 if (ops) {
407 pr_devel("[%d] free leaf\n", slot);
408 ops->free_object(assoc_array_ptr_to_leaf(ptr));
409 }
410 }
411
412 parent = node->back_pointer;
413 slot = node->parent_slot;
414 pr_devel("free node\n");
415 kfree(node);
416 if (!parent)
417 return; /* Done */
418
419 /* Move back up to the parent (may need to free a shortcut on
420 * the way up) */
421 if (assoc_array_ptr_is_shortcut(parent)) {
422 shortcut = assoc_array_ptr_to_shortcut(parent);
423 BUG_ON(shortcut->next_node != cursor);
424 cursor = parent;
425 parent = shortcut->back_pointer;
426 slot = shortcut->parent_slot;
427 pr_devel("free shortcut\n");
428 kfree(shortcut);
429 if (!parent)
430 return;
431
432 BUG_ON(!assoc_array_ptr_is_node(parent));
433 }
434
435 /* Ascend to next slot in parent node */
436 pr_devel("ascend to %p[%d]\n", parent, slot);
437 cursor = parent;
438 node = assoc_array_ptr_to_node(cursor);
439 slot++;
440 goto continue_node;
441}
442
443/**
444 * assoc_array_destroy - Destroy an associative array
445 * @array: The array to destroy.
446 * @ops: The operations to use.
447 *
448 * Discard all metadata and free all objects in an associative array. The
449 * array will be empty and ready to use again upon completion. This function
450 * cannot fail.
451 *
452 * The caller must prevent all other accesses whilst this takes place as no
453 * attempt is made to adjust pointers gracefully to permit RCU readlock-holding
454 * accesses to continue. On the other hand, no memory allocation is required.
455 */
456void assoc_array_destroy(struct assoc_array *array,
457 const struct assoc_array_ops *ops)
458{
459 assoc_array_destroy_subtree(array->root, ops);
460 array->root = NULL;
461}
462
463/*
464 * Handle insertion into an empty tree.
465 */
466static bool assoc_array_insert_in_empty_tree(struct assoc_array_edit *edit)
467{
468 struct assoc_array_node *new_n0;
469
470 pr_devel("-->%s()\n", __func__);
471
472 new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
473 if (!new_n0)
474 return false;
475
476 edit->new_meta[0] = assoc_array_node_to_ptr(new_n0);
477 edit->leaf_p = &new_n0->slots[0];
478 edit->adjust_count_on = new_n0;
479 edit->set[0].ptr = &edit->array->root;
480 edit->set[0].to = assoc_array_node_to_ptr(new_n0);
481
482 pr_devel("<--%s() = ok [no root]\n", __func__);
483 return true;
484}
485
486/*
487 * Handle insertion into a terminal node.
488 */
489static bool assoc_array_insert_into_terminal_node(struct assoc_array_edit *edit,
490 const struct assoc_array_ops *ops,
491 const void *index_key,
492 struct assoc_array_walk_result *result)
493{
494 struct assoc_array_shortcut *shortcut, *new_s0;
495 struct assoc_array_node *node, *new_n0, *new_n1, *side;
496 struct assoc_array_ptr *ptr;
497 unsigned long dissimilarity, base_seg, blank;
498 size_t keylen;
499 bool have_meta;
500 int level, diff;
501 int slot, next_slot, free_slot, i, j;
502
503 node = result->terminal_node.node;
504 level = result->terminal_node.level;
505 edit->segment_cache[ASSOC_ARRAY_FAN_OUT] = result->terminal_node.slot;
506
507 pr_devel("-->%s()\n", __func__);
508
509 /* We arrived at a node which doesn't have an onward node or shortcut
510 * pointer that we have to follow. This means that (a) the leaf we
511 * want must go here (either by insertion or replacement) or (b) we
512 * need to split this node and insert in one of the fragments.
513 */
514 free_slot = -1;
515
516 /* Firstly, we have to check the leaves in this node to see if there's
517 * a matching one we should replace in place.
518 */
519 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
520 ptr = node->slots[i];
521 if (!ptr) {
522 free_slot = i;
523 continue;
524 }
525 if (ops->compare_object(assoc_array_ptr_to_leaf(ptr), index_key)) {
526 pr_devel("replace in slot %d\n", i);
527 edit->leaf_p = &node->slots[i];
528 edit->dead_leaf = node->slots[i];
529 pr_devel("<--%s() = ok [replace]\n", __func__);
530 return true;
531 }
532 }
533
534 /* If there is a free slot in this node then we can just insert the
535 * leaf here.
536 */
537 if (free_slot >= 0) {
538 pr_devel("insert in free slot %d\n", free_slot);
539 edit->leaf_p = &node->slots[free_slot];
540 edit->adjust_count_on = node;
541 pr_devel("<--%s() = ok [insert]\n", __func__);
542 return true;
543 }
544
545 /* The node has no spare slots - so we're either going to have to split
546 * it or insert another node before it.
547 *
548 * Whatever, we're going to need at least two new nodes - so allocate
549 * those now. We may also need a new shortcut, but we deal with that
550 * when we need it.
551 */
552 new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
553 if (!new_n0)
554 return false;
555 edit->new_meta[0] = assoc_array_node_to_ptr(new_n0);
556 new_n1 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
557 if (!new_n1)
558 return false;
559 edit->new_meta[1] = assoc_array_node_to_ptr(new_n1);
560
561 /* We need to find out how similar the leaves are. */
562 pr_devel("no spare slots\n");
563 have_meta = false;
564 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
565 ptr = node->slots[i];
566 if (assoc_array_ptr_is_meta(ptr)) {
567 edit->segment_cache[i] = 0xff;
568 have_meta = true;
569 continue;
570 }
571 base_seg = ops->get_object_key_chunk(
572 assoc_array_ptr_to_leaf(ptr), level);
573 base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK;
574 edit->segment_cache[i] = base_seg & ASSOC_ARRAY_FAN_MASK;
575 }
576
577 if (have_meta) {
578 pr_devel("have meta\n");
579 goto split_node;
580 }
581
582 /* The node contains only leaves */
583 dissimilarity = 0;
584 base_seg = edit->segment_cache[0];
585 for (i = 1; i < ASSOC_ARRAY_FAN_OUT; i++)
586 dissimilarity |= edit->segment_cache[i] ^ base_seg;
587
588 pr_devel("only leaves; dissimilarity=%lx\n", dissimilarity);
589
590 if ((dissimilarity & ASSOC_ARRAY_FAN_MASK) == 0) {
591 /* The old leaves all cluster in the same slot. We will need
592 * to insert a shortcut if the new node wants to cluster with them.
593 */
594 if ((edit->segment_cache[ASSOC_ARRAY_FAN_OUT] ^ base_seg) == 0)
595 goto all_leaves_cluster_together;
596
597 /* Otherwise we can just insert a new node ahead of the old
598 * one.
599 */
600 goto present_leaves_cluster_but_not_new_leaf;
601 }
602
603split_node:
604 pr_devel("split node\n");
605
606 /* We need to split the current node; we know that the node doesn't
607 * simply contain a full set of leaves that cluster together (it
608 * contains meta pointers and/or non-clustering leaves).
609 *
610 * We need to expel at least two leaves out of a set consisting of the
611 * leaves in the node and the new leaf.
612 *
613 * We need a new node (n0) to replace the current one and a new node to
614 * take the expelled nodes (n1).
615 */
616 edit->set[0].to = assoc_array_node_to_ptr(new_n0);
617 new_n0->back_pointer = node->back_pointer;
618 new_n0->parent_slot = node->parent_slot;
619 new_n1->back_pointer = assoc_array_node_to_ptr(new_n0);
620 new_n1->parent_slot = -1; /* Need to calculate this */
621
622do_split_node:
623 pr_devel("do_split_node\n");
624
625 new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch;
626 new_n1->nr_leaves_on_branch = 0;
627
628 /* Begin by finding two matching leaves. There have to be at least two
629 * that match - even if there are meta pointers - because any leaf that
630 * would match a slot with a meta pointer in it must be somewhere
631 * behind that meta pointer and cannot be here. Further, given N
632 * remaining leaf slots, we now have N+1 leaves to go in them.
633 */
634 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
635 slot = edit->segment_cache[i];
636 if (slot != 0xff)
637 for (j = i + 1; j < ASSOC_ARRAY_FAN_OUT + 1; j++)
638 if (edit->segment_cache[j] == slot)
639 goto found_slot_for_multiple_occupancy;
640 }
641found_slot_for_multiple_occupancy:
642 pr_devel("same slot: %x %x [%02x]\n", i, j, slot);
643 BUG_ON(i >= ASSOC_ARRAY_FAN_OUT);
644 BUG_ON(j >= ASSOC_ARRAY_FAN_OUT + 1);
645 BUG_ON(slot >= ASSOC_ARRAY_FAN_OUT);
646
647 new_n1->parent_slot = slot;
648
649 /* Metadata pointers cannot change slot */
650 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++)
651 if (assoc_array_ptr_is_meta(node->slots[i]))
652 new_n0->slots[i] = node->slots[i];
653 else
654 new_n0->slots[i] = NULL;
655 BUG_ON(new_n0->slots[slot] != NULL);
656 new_n0->slots[slot] = assoc_array_node_to_ptr(new_n1);
657
658 /* Filter the leaf pointers between the new nodes */
659 free_slot = -1;
660 next_slot = 0;
661 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
662 if (assoc_array_ptr_is_meta(node->slots[i]))
663 continue;
664 if (edit->segment_cache[i] == slot) {
665 new_n1->slots[next_slot++] = node->slots[i];
666 new_n1->nr_leaves_on_branch++;
667 } else {
668 do {
669 free_slot++;
670 } while (new_n0->slots[free_slot] != NULL);
671 new_n0->slots[free_slot] = node->slots[i];
672 }
673 }
674
675 pr_devel("filtered: f=%x n=%x\n", free_slot, next_slot);
676
677 if (edit->segment_cache[ASSOC_ARRAY_FAN_OUT] != slot) {
678 do {
679 free_slot++;
680 } while (new_n0->slots[free_slot] != NULL);
681 edit->leaf_p = &new_n0->slots[free_slot];
682 edit->adjust_count_on = new_n0;
683 } else {
684 edit->leaf_p = &new_n1->slots[next_slot++];
685 edit->adjust_count_on = new_n1;
686 }
687
688 BUG_ON(next_slot <= 1);
689
690 edit->set_backpointers_to = assoc_array_node_to_ptr(new_n0);
691 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
692 if (edit->segment_cache[i] == 0xff) {
693 ptr = node->slots[i];
694 BUG_ON(assoc_array_ptr_is_leaf(ptr));
695 if (assoc_array_ptr_is_node(ptr)) {
696 side = assoc_array_ptr_to_node(ptr);
697 edit->set_backpointers[i] = &side->back_pointer;
698 } else {
699 shortcut = assoc_array_ptr_to_shortcut(ptr);
700 edit->set_backpointers[i] = &shortcut->back_pointer;
701 }
702 }
703 }
704
705 ptr = node->back_pointer;
706 if (!ptr)
707 edit->set[0].ptr = &edit->array->root;
708 else if (assoc_array_ptr_is_node(ptr))
709 edit->set[0].ptr = &assoc_array_ptr_to_node(ptr)->slots[node->parent_slot];
710 else
711 edit->set[0].ptr = &assoc_array_ptr_to_shortcut(ptr)->next_node;
712 edit->excised_meta[0] = assoc_array_node_to_ptr(node);
713 pr_devel("<--%s() = ok [split node]\n", __func__);
714 return true;
715
716present_leaves_cluster_but_not_new_leaf:
717 /* All the old leaves cluster in the same slot, but the new leaf wants
718 * to go into a different slot, so we create a new node to hold the new
719 * leaf and a pointer to a new node holding all the old leaves.
720 */
721 pr_devel("present leaves cluster but not new leaf\n");
722
723 new_n0->back_pointer = node->back_pointer;
724 new_n0->parent_slot = node->parent_slot;
725 new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch;
726 new_n1->back_pointer = assoc_array_node_to_ptr(new_n0);
727 new_n1->parent_slot = edit->segment_cache[0];
728 new_n1->nr_leaves_on_branch = node->nr_leaves_on_branch;
729 edit->adjust_count_on = new_n0;
730
731 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++)
732 new_n1->slots[i] = node->slots[i];
733
734 new_n0->slots[edit->segment_cache[0]] = assoc_array_node_to_ptr(new_n0);
735 edit->leaf_p = &new_n0->slots[edit->segment_cache[ASSOC_ARRAY_FAN_OUT]];
736
737 edit->set[0].ptr = &assoc_array_ptr_to_node(node->back_pointer)->slots[node->parent_slot];
738 edit->set[0].to = assoc_array_node_to_ptr(new_n0);
739 edit->excised_meta[0] = assoc_array_node_to_ptr(node);
740 pr_devel("<--%s() = ok [insert node before]\n", __func__);
741 return true;
742
743all_leaves_cluster_together:
744 /* All the leaves, new and old, want to cluster together in this node
745 * in the same slot, so we have to replace this node with a shortcut to
746 * skip over the identical parts of the key and then place a pair of
747 * nodes, one inside the other, at the end of the shortcut and
748 * distribute the keys between them.
749 *
750 * Firstly we need to work out where the leaves start diverging as a
751 * bit position into their keys so that we know how big the shortcut
752 * needs to be.
753 *
754 * We only need to make a single pass of N of the N+1 leaves because if
755 * any keys differ between themselves at bit X then at least one of
756 * them must also differ with the base key at bit X or before.
757 */
758 pr_devel("all leaves cluster together\n");
759 diff = INT_MAX;
760 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
761 int x = ops->diff_objects(assoc_array_ptr_to_leaf(edit->leaf),
762 assoc_array_ptr_to_leaf(node->slots[i]));
763 if (x < diff) {
764 BUG_ON(x < 0);
765 diff = x;
766 }
767 }
768 BUG_ON(diff == INT_MAX);
769 BUG_ON(diff < level + ASSOC_ARRAY_LEVEL_STEP);
770
771 keylen = round_up(diff, ASSOC_ARRAY_KEY_CHUNK_SIZE);
772 keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT;
773
774 new_s0 = kzalloc(sizeof(struct assoc_array_shortcut) +
775 keylen * sizeof(unsigned long), GFP_KERNEL);
776 if (!new_s0)
777 return false;
778 edit->new_meta[2] = assoc_array_shortcut_to_ptr(new_s0);
779
780 edit->set[0].to = assoc_array_shortcut_to_ptr(new_s0);
781 new_s0->back_pointer = node->back_pointer;
782 new_s0->parent_slot = node->parent_slot;
783 new_s0->next_node = assoc_array_node_to_ptr(new_n0);
784 new_n0->back_pointer = assoc_array_shortcut_to_ptr(new_s0);
785 new_n0->parent_slot = 0;
786 new_n1->back_pointer = assoc_array_node_to_ptr(new_n0);
787 new_n1->parent_slot = -1; /* Need to calculate this */
788
789 new_s0->skip_to_level = level = diff & ~ASSOC_ARRAY_LEVEL_STEP_MASK;
790 pr_devel("skip_to_level = %d [diff %d]\n", level, diff);
791 BUG_ON(level <= 0);
792
793 for (i = 0; i < keylen; i++)
794 new_s0->index_key[i] =
795 ops->get_key_chunk(index_key, i * ASSOC_ARRAY_KEY_CHUNK_SIZE);
796
797 blank = ULONG_MAX << (level & ASSOC_ARRAY_KEY_CHUNK_MASK);
798 pr_devel("blank off [%zu] %d: %lx\n", keylen - 1, level, blank);
799 new_s0->index_key[keylen - 1] &= ~blank;
800
801 /* This now reduces to a node splitting exercise for which we'll need
802 * to regenerate the disparity table.
803 */
804 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
805 ptr = node->slots[i];
806 base_seg = ops->get_object_key_chunk(assoc_array_ptr_to_leaf(ptr),
807 level);
808 base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK;
809 edit->segment_cache[i] = base_seg & ASSOC_ARRAY_FAN_MASK;
810 }
811
812 base_seg = ops->get_key_chunk(index_key, level);
813 base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK;
814 edit->segment_cache[ASSOC_ARRAY_FAN_OUT] = base_seg & ASSOC_ARRAY_FAN_MASK;
815 goto do_split_node;
816}
817
818/*
819 * Handle insertion into the middle of a shortcut.
820 */
821static bool assoc_array_insert_mid_shortcut(struct assoc_array_edit *edit,
822 const struct assoc_array_ops *ops,
823 struct assoc_array_walk_result *result)
824{
825 struct assoc_array_shortcut *shortcut, *new_s0, *new_s1;
826 struct assoc_array_node *node, *new_n0, *side;
827 unsigned long sc_segments, dissimilarity, blank;
828 size_t keylen;
829 int level, sc_level, diff;
830 int sc_slot;
831
832 shortcut = result->wrong_shortcut.shortcut;
833 level = result->wrong_shortcut.level;
834 sc_level = result->wrong_shortcut.sc_level;
835 sc_segments = result->wrong_shortcut.sc_segments;
836 dissimilarity = result->wrong_shortcut.dissimilarity;
837
838 pr_devel("-->%s(ix=%d dis=%lx scix=%d)\n",
839 __func__, level, dissimilarity, sc_level);
840
841 /* We need to split a shortcut and insert a node between the two
842 * pieces. Zero-length pieces will be dispensed with entirely.
843 *
844 * First of all, we need to find out in which level the first
845 * difference was.
846 */
847 diff = __ffs(dissimilarity);
848 diff &= ~ASSOC_ARRAY_LEVEL_STEP_MASK;
849 diff += sc_level & ~ASSOC_ARRAY_KEY_CHUNK_MASK;
850 pr_devel("diff=%d\n", diff);
851
852 if (!shortcut->back_pointer) {
853 edit->set[0].ptr = &edit->array->root;
854 } else if (assoc_array_ptr_is_node(shortcut->back_pointer)) {
855 node = assoc_array_ptr_to_node(shortcut->back_pointer);
856 edit->set[0].ptr = &node->slots[shortcut->parent_slot];
857 } else {
858 BUG();
859 }
860
861 edit->excised_meta[0] = assoc_array_shortcut_to_ptr(shortcut);
862
863 /* Create a new node now since we're going to need it anyway */
864 new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
865 if (!new_n0)
866 return false;
867 edit->new_meta[0] = assoc_array_node_to_ptr(new_n0);
868 edit->adjust_count_on = new_n0;
869
870 /* Insert a new shortcut before the new node if this segment isn't of
871 * zero length - otherwise we just connect the new node directly to the
872 * parent.
873 */
874 level += ASSOC_ARRAY_LEVEL_STEP;
875 if (diff > level) {
876 pr_devel("pre-shortcut %d...%d\n", level, diff);
877 keylen = round_up(diff, ASSOC_ARRAY_KEY_CHUNK_SIZE);
878 keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT;
879
880 new_s0 = kzalloc(sizeof(struct assoc_array_shortcut) +
881 keylen * sizeof(unsigned long), GFP_KERNEL);
882 if (!new_s0)
883 return false;
884 edit->new_meta[1] = assoc_array_shortcut_to_ptr(new_s0);
885 edit->set[0].to = assoc_array_shortcut_to_ptr(new_s0);
886 new_s0->back_pointer = shortcut->back_pointer;
887 new_s0->parent_slot = shortcut->parent_slot;
888 new_s0->next_node = assoc_array_node_to_ptr(new_n0);
889 new_s0->skip_to_level = diff;
890
891 new_n0->back_pointer = assoc_array_shortcut_to_ptr(new_s0);
892 new_n0->parent_slot = 0;
893
894 memcpy(new_s0->index_key, shortcut->index_key,
895 keylen * sizeof(unsigned long));
896
897 blank = ULONG_MAX << (diff & ASSOC_ARRAY_KEY_CHUNK_MASK);
898 pr_devel("blank off [%zu] %d: %lx\n", keylen - 1, diff, blank);
899 new_s0->index_key[keylen - 1] &= ~blank;
900 } else {
901 pr_devel("no pre-shortcut\n");
902 edit->set[0].to = assoc_array_node_to_ptr(new_n0);
903 new_n0->back_pointer = shortcut->back_pointer;
904 new_n0->parent_slot = shortcut->parent_slot;
905 }
906
907 side = assoc_array_ptr_to_node(shortcut->next_node);
908 new_n0->nr_leaves_on_branch = side->nr_leaves_on_branch;
909
910 /* We need to know which slot in the new node is going to take a
911 * metadata pointer.
912 */
913 sc_slot = sc_segments >> (diff & ASSOC_ARRAY_KEY_CHUNK_MASK);
914 sc_slot &= ASSOC_ARRAY_FAN_MASK;
915
916 pr_devel("new slot %lx >> %d -> %d\n",
917 sc_segments, diff & ASSOC_ARRAY_KEY_CHUNK_MASK, sc_slot);
918
919 /* Determine whether we need to follow the new node with a replacement
920 * for the current shortcut. We could in theory reuse the current
921 * shortcut if its parent slot number doesn't change - but that's a
922 * 1-in-16 chance so not worth expending the code upon.
923 */
924 level = diff + ASSOC_ARRAY_LEVEL_STEP;
925 if (level < shortcut->skip_to_level) {
926 pr_devel("post-shortcut %d...%d\n", level, shortcut->skip_to_level);
927 keylen = round_up(shortcut->skip_to_level, ASSOC_ARRAY_KEY_CHUNK_SIZE);
928 keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT;
929
930 new_s1 = kzalloc(sizeof(struct assoc_array_shortcut) +
931 keylen * sizeof(unsigned long), GFP_KERNEL);
932 if (!new_s1)
933 return false;
934 edit->new_meta[2] = assoc_array_shortcut_to_ptr(new_s1);
935
936 new_s1->back_pointer = assoc_array_node_to_ptr(new_n0);
937 new_s1->parent_slot = sc_slot;
938 new_s1->next_node = shortcut->next_node;
939 new_s1->skip_to_level = shortcut->skip_to_level;
940
941 new_n0->slots[sc_slot] = assoc_array_shortcut_to_ptr(new_s1);
942
943 memcpy(new_s1->index_key, shortcut->index_key,
944 keylen * sizeof(unsigned long));
945
946 edit->set[1].ptr = &side->back_pointer;
947 edit->set[1].to = assoc_array_shortcut_to_ptr(new_s1);
948 } else {
949 pr_devel("no post-shortcut\n");
950
951 /* We don't have to replace the pointed-to node as long as we
952 * use memory barriers to make sure the parent slot number is
953 * changed before the back pointer (the parent slot number is
954 * irrelevant to the old parent shortcut).
955 */
956 new_n0->slots[sc_slot] = shortcut->next_node;
957 edit->set_parent_slot[0].p = &side->parent_slot;
958 edit->set_parent_slot[0].to = sc_slot;
959 edit->set[1].ptr = &side->back_pointer;
960 edit->set[1].to = assoc_array_node_to_ptr(new_n0);
961 }
962
963 /* Install the new leaf in a spare slot in the new node. */
964 if (sc_slot == 0)
965 edit->leaf_p = &new_n0->slots[1];
966 else
967 edit->leaf_p = &new_n0->slots[0];
968
969 pr_devel("<--%s() = ok [split shortcut]\n", __func__);
970 return edit;
971}
972
973/**
974 * assoc_array_insert - Script insertion of an object into an associative array
975 * @array: The array to insert into.
976 * @ops: The operations to use.
977 * @index_key: The key to insert at.
978 * @object: The object to insert.
979 *
980 * Precalculate and preallocate a script for the insertion or replacement of an
981 * object in an associative array. This results in an edit script that can
982 * either be applied or cancelled.
983 *
984 * The function returns a pointer to an edit script or -ENOMEM.
985 *
986 * The caller should lock against other modifications and must continue to hold
987 * the lock until assoc_array_apply_edit() has been called.
988 *
989 * Accesses to the tree may take place concurrently with this function,
990 * provided they hold the RCU read lock.
991 */
992struct assoc_array_edit *assoc_array_insert(struct assoc_array *array,
993 const struct assoc_array_ops *ops,
994 const void *index_key,
995 void *object)
996{
997 struct assoc_array_walk_result result;
998 struct assoc_array_edit *edit;
999
1000 pr_devel("-->%s()\n", __func__);
1001
1002 /* The leaf pointer we're given must not have the bottom bit set as we
1003 * use those for type-marking the pointer. NULL pointers are also not
1004 * allowed as they indicate an empty slot but we have to allow them
1005 * here as they can be updated later.
1006 */
1007 BUG_ON(assoc_array_ptr_is_meta(object));
1008
1009 edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL);
1010 if (!edit)
1011 return ERR_PTR(-ENOMEM);
1012 edit->array = array;
1013 edit->ops = ops;
1014 edit->leaf = assoc_array_leaf_to_ptr(object);
1015 edit->adjust_count_by = 1;
1016
1017 switch (assoc_array_walk(array, ops, index_key, &result)) {
1018 case assoc_array_walk_tree_empty:
1019 /* Allocate a root node if there isn't one yet */
1020 if (!assoc_array_insert_in_empty_tree(edit))
1021 goto enomem;
1022 return edit;
1023
1024 case assoc_array_walk_found_terminal_node:
1025 /* We found a node that doesn't have a node/shortcut pointer in
1026 * the slot corresponding to the index key that we have to
1027 * follow.
1028 */
1029 if (!assoc_array_insert_into_terminal_node(edit, ops, index_key,
1030 &result))
1031 goto enomem;
1032 return edit;
1033
1034 case assoc_array_walk_found_wrong_shortcut:
1035 /* We found a shortcut that didn't match our key in a slot we
1036 * needed to follow.
1037 */
1038 if (!assoc_array_insert_mid_shortcut(edit, ops, &result))
1039 goto enomem;
1040 return edit;
1041 }
1042
1043enomem:
1044 /* Clean up after an out of memory error */
1045 pr_devel("enomem\n");
1046 assoc_array_cancel_edit(edit);
1047 return ERR_PTR(-ENOMEM);
1048}
1049
1050/**
1051 * assoc_array_insert_set_object - Set the new object pointer in an edit script
1052 * @edit: The edit script to modify.
1053 * @object: The object pointer to set.
1054 *
1055 * Change the object to be inserted in an edit script. The object pointed to
1056 * by the old object is not freed. This must be done prior to applying the
1057 * script.
1058 */
1059void assoc_array_insert_set_object(struct assoc_array_edit *edit, void *object)
1060{
1061 BUG_ON(!object);
1062 edit->leaf = assoc_array_leaf_to_ptr(object);
1063}
1064
1065struct assoc_array_delete_collapse_context {
1066 struct assoc_array_node *node;
1067 const void *skip_leaf;
1068 int slot;
1069};
1070
1071/*
1072 * Subtree collapse to node iterator.
1073 */
1074static int assoc_array_delete_collapse_iterator(const void *leaf,
1075 void *iterator_data)
1076{
1077 struct assoc_array_delete_collapse_context *collapse = iterator_data;
1078
1079 if (leaf == collapse->skip_leaf)
1080 return 0;
1081
1082 BUG_ON(collapse->slot >= ASSOC_ARRAY_FAN_OUT);
1083
1084 collapse->node->slots[collapse->slot++] = assoc_array_leaf_to_ptr(leaf);
1085 return 0;
1086}
1087
1088/**
1089 * assoc_array_delete - Script deletion of an object from an associative array
1090 * @array: The array to search.
1091 * @ops: The operations to use.
1092 * @index_key: The key to the object.
1093 *
1094 * Precalculate and preallocate a script for the deletion of an object from an
1095 * associative array. This results in an edit script that can either be
1096 * applied or cancelled.
1097 *
1098 * The function returns a pointer to an edit script if the object was found,
1099 * NULL if the object was not found or -ENOMEM.
1100 *
1101 * The caller should lock against other modifications and must continue to hold
1102 * the lock until assoc_array_apply_edit() has been called.
1103 *
1104 * Accesses to the tree may take place concurrently with this function,
1105 * provided they hold the RCU read lock.
1106 */
1107struct assoc_array_edit *assoc_array_delete(struct assoc_array *array,
1108 const struct assoc_array_ops *ops,
1109 const void *index_key)
1110{
1111 struct assoc_array_delete_collapse_context collapse;
1112 struct assoc_array_walk_result result;
1113 struct assoc_array_node *node, *new_n0;
1114 struct assoc_array_edit *edit;
1115 struct assoc_array_ptr *ptr;
1116 bool has_meta;
1117 int slot, i;
1118
1119 pr_devel("-->%s()\n", __func__);
1120
1121 edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL);
1122 if (!edit)
1123 return ERR_PTR(-ENOMEM);
1124 edit->array = array;
1125 edit->ops = ops;
1126 edit->adjust_count_by = -1;
1127
1128 switch (assoc_array_walk(array, ops, index_key, &result)) {
1129 case assoc_array_walk_found_terminal_node:
1130 /* We found a node that should contain the leaf we've been
1131 * asked to remove - *if* it's in the tree.
1132 */
1133 pr_devel("terminal_node\n");
1134 node = result.terminal_node.node;
1135
1136 for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
1137 ptr = node->slots[slot];
1138 if (ptr &&
1139 assoc_array_ptr_is_leaf(ptr) &&
1140 ops->compare_object(assoc_array_ptr_to_leaf(ptr),
1141 index_key))
1142 goto found_leaf;
1143 }
1144 case assoc_array_walk_tree_empty:
1145 case assoc_array_walk_found_wrong_shortcut:
1146 default:
1147 assoc_array_cancel_edit(edit);
1148 pr_devel("not found\n");
1149 return NULL;
1150 }
1151
1152found_leaf:
1153 BUG_ON(array->nr_leaves_on_tree <= 0);
1154
1155 /* In the simplest form of deletion we just clear the slot and release
1156 * the leaf after a suitable interval.
1157 */
1158 edit->dead_leaf = node->slots[slot];
1159 edit->set[0].ptr = &node->slots[slot];
1160 edit->set[0].to = NULL;
1161 edit->adjust_count_on = node;
1162
1163 /* If that concludes erasure of the last leaf, then delete the entire
1164 * internal array.
1165 */
1166 if (array->nr_leaves_on_tree == 1) {
1167 edit->set[1].ptr = &array->root;
1168 edit->set[1].to = NULL;
1169 edit->adjust_count_on = NULL;
1170 edit->excised_subtree = array->root;
1171 pr_devel("all gone\n");
1172 return edit;
1173 }
1174
1175 /* However, we'd also like to clear up some metadata blocks if we
1176 * possibly can.
1177 *
1178 * We go for a simple algorithm of: if this node has FAN_OUT or fewer
1179 * leaves in it, then attempt to collapse it - and attempt to
1180 * recursively collapse up the tree.
1181 *
1182 * We could also try and collapse in partially filled subtrees to take
1183 * up space in this node.
1184 */
1185 if (node->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT + 1) {
1186 struct assoc_array_node *parent, *grandparent;
1187 struct assoc_array_ptr *ptr;
1188
1189 /* First of all, we need to know if this node has metadata so
1190 * that we don't try collapsing if all the leaves are already
1191 * here.
1192 */
1193 has_meta = false;
1194 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
1195 ptr = node->slots[i];
1196 if (assoc_array_ptr_is_meta(ptr)) {
1197 has_meta = true;
1198 break;
1199 }
1200 }
1201
1202 pr_devel("leaves: %ld [m=%d]\n",
1203 node->nr_leaves_on_branch - 1, has_meta);
1204
1205 /* Look further up the tree to see if we can collapse this node
1206 * into a more proximal node too.
1207 */
1208 parent = node;
1209 collapse_up:
1210 pr_devel("collapse subtree: %ld\n", parent->nr_leaves_on_branch);
1211
1212 ptr = parent->back_pointer;
1213 if (!ptr)
1214 goto do_collapse;
1215 if (assoc_array_ptr_is_shortcut(ptr)) {
1216 struct assoc_array_shortcut *s = assoc_array_ptr_to_shortcut(ptr);
1217 ptr = s->back_pointer;
1218 if (!ptr)
1219 goto do_collapse;
1220 }
1221
1222 grandparent = assoc_array_ptr_to_node(ptr);
1223 if (grandparent->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT + 1) {
1224 parent = grandparent;
1225 goto collapse_up;
1226 }
1227
1228 do_collapse:
1229 /* There's no point collapsing if the original node has no meta
1230 * pointers to discard and if we didn't merge into one of that
1231 * node's ancestry.
1232 */
1233 if (has_meta || parent != node) {
1234 node = parent;
1235
1236 /* Create a new node to collapse into */
1237 new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
1238 if (!new_n0)
1239 goto enomem;
1240 edit->new_meta[0] = assoc_array_node_to_ptr(new_n0);
1241
1242 new_n0->back_pointer = node->back_pointer;
1243 new_n0->parent_slot = node->parent_slot;
1244 new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch;
1245 edit->adjust_count_on = new_n0;
1246
1247 collapse.node = new_n0;
1248 collapse.skip_leaf = assoc_array_ptr_to_leaf(edit->dead_leaf);
1249 collapse.slot = 0;
1250 assoc_array_subtree_iterate(assoc_array_node_to_ptr(node),
1251 node->back_pointer,
1252 assoc_array_delete_collapse_iterator,
1253 &collapse);
1254 pr_devel("collapsed %d,%lu\n", collapse.slot, new_n0->nr_leaves_on_branch);
1255 BUG_ON(collapse.slot != new_n0->nr_leaves_on_branch - 1);
1256
1257 if (!node->back_pointer) {
1258 edit->set[1].ptr = &array->root;
1259 } else if (assoc_array_ptr_is_leaf(node->back_pointer)) {
1260 BUG();
1261 } else if (assoc_array_ptr_is_node(node->back_pointer)) {
1262 struct assoc_array_node *p =
1263 assoc_array_ptr_to_node(node->back_pointer);
1264 edit->set[1].ptr = &p->slots[node->parent_slot];
1265 } else if (assoc_array_ptr_is_shortcut(node->back_pointer)) {
1266 struct assoc_array_shortcut *s =
1267 assoc_array_ptr_to_shortcut(node->back_pointer);
1268 edit->set[1].ptr = &s->next_node;
1269 }
1270 edit->set[1].to = assoc_array_node_to_ptr(new_n0);
1271 edit->excised_subtree = assoc_array_node_to_ptr(node);
1272 }
1273 }
1274
1275 return edit;
1276
1277enomem:
1278 /* Clean up after an out of memory error */
1279 pr_devel("enomem\n");
1280 assoc_array_cancel_edit(edit);
1281 return ERR_PTR(-ENOMEM);
1282}
1283
1284/**
1285 * assoc_array_clear - Script deletion of all objects from an associative array
1286 * @array: The array to clear.
1287 * @ops: The operations to use.
1288 *
1289 * Precalculate and preallocate a script for the deletion of all the objects
1290 * from an associative array. This results in an edit script that can either
1291 * be applied or cancelled.
1292 *
1293 * The function returns a pointer to an edit script if there are objects to be
1294 * deleted, NULL if there are no objects in the array or -ENOMEM.
1295 *
1296 * The caller should lock against other modifications and must continue to hold
1297 * the lock until assoc_array_apply_edit() has been called.
1298 *
1299 * Accesses to the tree may take place concurrently with this function,
1300 * provided they hold the RCU read lock.
1301 */
1302struct assoc_array_edit *assoc_array_clear(struct assoc_array *array,
1303 const struct assoc_array_ops *ops)
1304{
1305 struct assoc_array_edit *edit;
1306
1307 pr_devel("-->%s()\n", __func__);
1308
1309 if (!array->root)
1310 return NULL;
1311
1312 edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL);
1313 if (!edit)
1314 return ERR_PTR(-ENOMEM);
1315 edit->array = array;
1316 edit->ops = ops;
1317 edit->set[1].ptr = &array->root;
1318 edit->set[1].to = NULL;
1319 edit->excised_subtree = array->root;
1320 edit->ops_for_excised_subtree = ops;
1321 pr_devel("all gone\n");
1322 return edit;
1323}
1324
1325/*
1326 * Handle the deferred destruction after an applied edit.
1327 */
1328static void assoc_array_rcu_cleanup(struct rcu_head *head)
1329{
1330 struct assoc_array_edit *edit =
1331 container_of(head, struct assoc_array_edit, rcu);
1332 int i;
1333
1334 pr_devel("-->%s()\n", __func__);
1335
1336 if (edit->dead_leaf)
1337 edit->ops->free_object(assoc_array_ptr_to_leaf(edit->dead_leaf));
1338 for (i = 0; i < ARRAY_SIZE(edit->excised_meta); i++)
1339 if (edit->excised_meta[i])
1340 kfree(assoc_array_ptr_to_node(edit->excised_meta[i]));
1341
1342 if (edit->excised_subtree) {
1343 BUG_ON(assoc_array_ptr_is_leaf(edit->excised_subtree));
1344 if (assoc_array_ptr_is_node(edit->excised_subtree)) {
1345 struct assoc_array_node *n =
1346 assoc_array_ptr_to_node(edit->excised_subtree);
1347 n->back_pointer = NULL;
1348 } else {
1349 struct assoc_array_shortcut *s =
1350 assoc_array_ptr_to_shortcut(edit->excised_subtree);
1351 s->back_pointer = NULL;
1352 }
1353 assoc_array_destroy_subtree(edit->excised_subtree,
1354 edit->ops_for_excised_subtree);
1355 }
1356
1357 kfree(edit);
1358}
1359
1360/**
1361 * assoc_array_apply_edit - Apply an edit script to an associative array
1362 * @edit: The script to apply.
1363 *
1364 * Apply an edit script to an associative array to effect an insertion,
1365 * deletion or clearance. As the edit script includes preallocated memory,
1366 * this is guaranteed not to fail.
1367 *
1368 * The edit script, dead objects and dead metadata will be scheduled for
1369 * destruction after an RCU grace period to permit those doing read-only
1370 * accesses on the array to continue to do so under the RCU read lock whilst
1371 * the edit is taking place.
1372 */
1373void assoc_array_apply_edit(struct assoc_array_edit *edit)
1374{
1375 struct assoc_array_shortcut *shortcut;
1376 struct assoc_array_node *node;
1377 struct assoc_array_ptr *ptr;
1378 int i;
1379
1380 pr_devel("-->%s()\n", __func__);
1381
1382 smp_wmb();
1383 if (edit->leaf_p)
1384 *edit->leaf_p = edit->leaf;
1385
1386 smp_wmb();
1387 for (i = 0; i < ARRAY_SIZE(edit->set_parent_slot); i++)
1388 if (edit->set_parent_slot[i].p)
1389 *edit->set_parent_slot[i].p = edit->set_parent_slot[i].to;
1390
1391 smp_wmb();
1392 for (i = 0; i < ARRAY_SIZE(edit->set_backpointers); i++)
1393 if (edit->set_backpointers[i])
1394 *edit->set_backpointers[i] = edit->set_backpointers_to;
1395
1396 smp_wmb();
1397 for (i = 0; i < ARRAY_SIZE(edit->set); i++)
1398 if (edit->set[i].ptr)
1399 *edit->set[i].ptr = edit->set[i].to;
1400
1401 if (edit->array->root == NULL) {
1402 edit->array->nr_leaves_on_tree = 0;
1403 } else if (edit->adjust_count_on) {
1404 node = edit->adjust_count_on;
1405 for (;;) {
1406 node->nr_leaves_on_branch += edit->adjust_count_by;
1407
1408 ptr = node->back_pointer;
1409 if (!ptr)
1410 break;
1411 if (assoc_array_ptr_is_shortcut(ptr)) {
1412 shortcut = assoc_array_ptr_to_shortcut(ptr);
1413 ptr = shortcut->back_pointer;
1414 if (!ptr)
1415 break;
1416 }
1417 BUG_ON(!assoc_array_ptr_is_node(ptr));
1418 node = assoc_array_ptr_to_node(ptr);
1419 }
1420
1421 edit->array->nr_leaves_on_tree += edit->adjust_count_by;
1422 }
1423
1424 call_rcu(&edit->rcu, assoc_array_rcu_cleanup);
1425}
1426
1427/**
1428 * assoc_array_cancel_edit - Discard an edit script.
1429 * @edit: The script to discard.
1430 *
1431 * Free an edit script and all the preallocated data it holds without making
1432 * any changes to the associative array it was intended for.
1433 *
1434 * NOTE! In the case of an insertion script, this does _not_ release the leaf
1435 * that was to be inserted. That is left to the caller.
1436 */
1437void assoc_array_cancel_edit(struct assoc_array_edit *edit)
1438{
1439 struct assoc_array_ptr *ptr;
1440 int i;
1441
1442 pr_devel("-->%s()\n", __func__);
1443
1444 /* Clean up after an out of memory error */
1445 for (i = 0; i < ARRAY_SIZE(edit->new_meta); i++) {
1446 ptr = edit->new_meta[i];
1447 if (ptr) {
1448 if (assoc_array_ptr_is_node(ptr))
1449 kfree(assoc_array_ptr_to_node(ptr));
1450 else
1451 kfree(assoc_array_ptr_to_shortcut(ptr));
1452 }
1453 }
1454 kfree(edit);
1455}
1456
1457/**
1458 * assoc_array_gc - Garbage collect an associative array.
1459 * @array: The array to clean.
1460 * @ops: The operations to use.
1461 * @iterator: A callback function to pass judgement on each object.
1462 * @iterator_data: Private data for the callback function.
1463 *
1464 * Collect garbage from an associative array and pack down the internal tree to
1465 * save memory.
1466 *
1467 * The iterator function is asked to pass judgement upon each object in the
1468 * array. If it returns false, the object is discard and if it returns true,
1469 * the object is kept. If it returns true, it must increment the object's
1470 * usage count (or whatever it needs to do to retain it) before returning.
1471 *
1472 * This function returns 0 if successful or -ENOMEM if out of memory. In the
1473 * latter case, the array is not changed.
1474 *
1475 * The caller should lock against other modifications and must continue to hold
1476 * the lock until assoc_array_apply_edit() has been called.
1477 *
1478 * Accesses to the tree may take place concurrently with this function,
1479 * provided they hold the RCU read lock.
1480 */
1481int assoc_array_gc(struct assoc_array *array,
1482 const struct assoc_array_ops *ops,
1483 bool (*iterator)(void *object, void *iterator_data),
1484 void *iterator_data)
1485{
1486 struct assoc_array_shortcut *shortcut, *new_s;
1487 struct assoc_array_node *node, *new_n;
1488 struct assoc_array_edit *edit;
1489 struct assoc_array_ptr *cursor, *ptr;
1490 struct assoc_array_ptr *new_root, *new_parent, **new_ptr_pp;
1491 unsigned long nr_leaves_on_tree;
1492 int keylen, slot, nr_free, next_slot, i;
1493
1494 pr_devel("-->%s()\n", __func__);
1495
1496 if (!array->root)
1497 return 0;
1498
1499 edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL);
1500 if (!edit)
1501 return -ENOMEM;
1502 edit->array = array;
1503 edit->ops = ops;
1504 edit->ops_for_excised_subtree = ops;
1505 edit->set[0].ptr = &array->root;
1506 edit->excised_subtree = array->root;
1507
1508 new_root = new_parent = NULL;
1509 new_ptr_pp = &new_root;
1510 cursor = array->root;
1511
1512descend:
1513 /* If this point is a shortcut, then we need to duplicate it and
1514 * advance the target cursor.
1515 */
1516 if (assoc_array_ptr_is_shortcut(cursor)) {
1517 shortcut = assoc_array_ptr_to_shortcut(cursor);
1518 keylen = round_up(shortcut->skip_to_level, ASSOC_ARRAY_KEY_CHUNK_SIZE);
1519 keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT;
1520 new_s = kmalloc(sizeof(struct assoc_array_shortcut) +
1521 keylen * sizeof(unsigned long), GFP_KERNEL);
1522 if (!new_s)
1523 goto enomem;
1524 pr_devel("dup shortcut %p -> %p\n", shortcut, new_s);
1525 memcpy(new_s, shortcut, (sizeof(struct assoc_array_shortcut) +
1526 keylen * sizeof(unsigned long)));
1527 new_s->back_pointer = new_parent;
1528 new_s->parent_slot = shortcut->parent_slot;
1529 *new_ptr_pp = new_parent = assoc_array_shortcut_to_ptr(new_s);
1530 new_ptr_pp = &new_s->next_node;
1531 cursor = shortcut->next_node;
1532 }
1533
1534 /* Duplicate the node at this position */
1535 node = assoc_array_ptr_to_node(cursor);
1536 new_n = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL);
1537 if (!new_n)
1538 goto enomem;
1539 pr_devel("dup node %p -> %p\n", node, new_n);
1540 new_n->back_pointer = new_parent;
1541 new_n->parent_slot = node->parent_slot;
1542 *new_ptr_pp = new_parent = assoc_array_node_to_ptr(new_n);
1543 new_ptr_pp = NULL;
1544 slot = 0;
1545
1546continue_node:
1547 /* Filter across any leaves and gc any subtrees */
1548 for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
1549 ptr = node->slots[slot];
1550 if (!ptr)
1551 continue;
1552
1553 if (assoc_array_ptr_is_leaf(ptr)) {
1554 if (iterator(assoc_array_ptr_to_leaf(ptr),
1555 iterator_data))
1556 /* The iterator will have done any reference
1557 * counting on the object for us.
1558 */
1559 new_n->slots[slot] = ptr;
1560 continue;
1561 }
1562
1563 new_ptr_pp = &new_n->slots[slot];
1564 cursor = ptr;
1565 goto descend;
1566 }
1567
1568 pr_devel("-- compress node %p --\n", new_n);
1569
1570 /* Count up the number of empty slots in this node and work out the
1571 * subtree leaf count.
1572 */
1573 new_n->nr_leaves_on_branch = 0;
1574 nr_free = 0;
1575 for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
1576 ptr = new_n->slots[slot];
1577 if (!ptr)
1578 nr_free++;
1579 else if (assoc_array_ptr_is_leaf(ptr))
1580 new_n->nr_leaves_on_branch++;
1581 }
1582 pr_devel("free=%d, leaves=%lu\n", nr_free, new_n->nr_leaves_on_branch);
1583
1584 /* See what we can fold in */
1585 next_slot = 0;
1586 for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) {
1587 struct assoc_array_shortcut *s;
1588 struct assoc_array_node *child;
1589
1590 ptr = new_n->slots[slot];
1591 if (!ptr || assoc_array_ptr_is_leaf(ptr))
1592 continue;
1593
1594 s = NULL;
1595 if (assoc_array_ptr_is_shortcut(ptr)) {
1596 s = assoc_array_ptr_to_shortcut(ptr);
1597 ptr = s->next_node;
1598 }
1599
1600 child = assoc_array_ptr_to_node(ptr);
1601 new_n->nr_leaves_on_branch += child->nr_leaves_on_branch;
1602
1603 if (child->nr_leaves_on_branch <= nr_free + 1) {
1604 /* Fold the child node into this one */
1605 pr_devel("[%d] fold node %lu/%d [nx %d]\n",
1606 slot, child->nr_leaves_on_branch, nr_free + 1,
1607 next_slot);
1608
1609 /* We would already have reaped an intervening shortcut
1610 * on the way back up the tree.
1611 */
1612 BUG_ON(s);
1613
1614 new_n->slots[slot] = NULL;
1615 nr_free++;
1616 if (slot < next_slot)
1617 next_slot = slot;
1618 for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) {
1619 struct assoc_array_ptr *p = child->slots[i];
1620 if (!p)
1621 continue;
1622 BUG_ON(assoc_array_ptr_is_meta(p));
1623 while (new_n->slots[next_slot])
1624 next_slot++;
1625 BUG_ON(next_slot >= ASSOC_ARRAY_FAN_OUT);
1626 new_n->slots[next_slot++] = p;
1627 nr_free--;
1628 }
1629 kfree(child);
1630 } else {
1631 pr_devel("[%d] retain node %lu/%d [nx %d]\n",
1632 slot, child->nr_leaves_on_branch, nr_free + 1,
1633 next_slot);
1634 }
1635 }
1636
1637 pr_devel("after: %lu\n", new_n->nr_leaves_on_branch);
1638
1639 nr_leaves_on_tree = new_n->nr_leaves_on_branch;
1640
1641 /* Excise this node if it is singly occupied by a shortcut */
1642 if (nr_free == ASSOC_ARRAY_FAN_OUT - 1) {
1643 for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++)
1644 if ((ptr = new_n->slots[slot]))
1645 break;
1646
1647 if (assoc_array_ptr_is_meta(ptr) &&
1648 assoc_array_ptr_is_shortcut(ptr)) {
1649 pr_devel("excise node %p with 1 shortcut\n", new_n);
1650 new_s = assoc_array_ptr_to_shortcut(ptr);
1651 new_parent = new_n->back_pointer;
1652 slot = new_n->parent_slot;
1653 kfree(new_n);
1654 if (!new_parent) {
1655 new_s->back_pointer = NULL;
1656 new_s->parent_slot = 0;
1657 new_root = ptr;
1658 goto gc_complete;
1659 }
1660
1661 if (assoc_array_ptr_is_shortcut(new_parent)) {
1662 /* We can discard any preceding shortcut also */
1663 struct assoc_array_shortcut *s =
1664 assoc_array_ptr_to_shortcut(new_parent);
1665
1666 pr_devel("excise preceding shortcut\n");
1667
1668 new_parent = new_s->back_pointer = s->back_pointer;
1669 slot = new_s->parent_slot = s->parent_slot;
1670 kfree(s);
1671 if (!new_parent) {
1672 new_s->back_pointer = NULL;
1673 new_s->parent_slot = 0;
1674 new_root = ptr;
1675 goto gc_complete;
1676 }
1677 }
1678
1679 new_s->back_pointer = new_parent;
1680 new_s->parent_slot = slot;
1681 new_n = assoc_array_ptr_to_node(new_parent);
1682 new_n->slots[slot] = ptr;
1683 goto ascend_old_tree;
1684 }
1685 }
1686
1687 /* Excise any shortcuts we might encounter that point to nodes that
1688 * only contain leaves.
1689 */
1690 ptr = new_n->back_pointer;
1691 if (!ptr)
1692 goto gc_complete;
1693
1694 if (assoc_array_ptr_is_shortcut(ptr)) {
1695 new_s = assoc_array_ptr_to_shortcut(ptr);
1696 new_parent = new_s->back_pointer;
1697 slot = new_s->parent_slot;
1698
1699 if (new_n->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT) {
1700 struct assoc_array_node *n;
1701
1702 pr_devel("excise shortcut\n");
1703 new_n->back_pointer = new_parent;
1704 new_n->parent_slot = slot;
1705 kfree(new_s);
1706 if (!new_parent) {
1707 new_root = assoc_array_node_to_ptr(new_n);
1708 goto gc_complete;
1709 }
1710
1711 n = assoc_array_ptr_to_node(new_parent);
1712 n->slots[slot] = assoc_array_node_to_ptr(new_n);
1713 }
1714 } else {
1715 new_parent = ptr;
1716 }
1717 new_n = assoc_array_ptr_to_node(new_parent);
1718
1719ascend_old_tree:
1720 ptr = node->back_pointer;
1721 if (assoc_array_ptr_is_shortcut(ptr)) {
1722 shortcut = assoc_array_ptr_to_shortcut(ptr);
1723 slot = shortcut->parent_slot;
1724 cursor = shortcut->back_pointer;
1725 } else {
1726 slot = node->parent_slot;
1727 cursor = ptr;
1728 }
1729 BUG_ON(!ptr);
1730 node = assoc_array_ptr_to_node(cursor);
1731 slot++;
1732 goto continue_node;
1733
1734gc_complete:
1735 edit->set[0].to = new_root;
1736 assoc_array_apply_edit(edit);
1737 edit->array->nr_leaves_on_tree = nr_leaves_on_tree;
1738 return 0;
1739
1740enomem:
1741 pr_devel("enomem\n");
1742 assoc_array_destroy_subtree(new_root, edit->ops);
1743 kfree(edit);
1744 return -ENOMEM;
1745}