aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-10-28 14:35:40 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2018-10-28 14:35:40 -0400
commitdad4f140edaa3f6bb452b6913d41af1ffd672e45 (patch)
tree1c0ebdcdfcdfb4ec9af7810c5ad9bae0f791ff5c /lib
parent69d5b97c597307773fe6c59775a5d5a88bb7e6b3 (diff)
parent3a08cd52c37c793ffc199f6fc2ecfc368e284b2d (diff)
Merge branch 'xarray' of git://git.infradead.org/users/willy/linux-dax
Pull XArray conversion from Matthew Wilcox: "The XArray provides an improved interface to the radix tree data structure, providing locking as part of the API, specifying GFP flags at allocation time, eliminating preloading, less re-walking the tree, more efficient iterations and not exposing RCU-protected pointers to its users. This patch set 1. Introduces the XArray implementation 2. Converts the pagecache to use it 3. Converts memremap to use it The page cache is the most complex and important user of the radix tree, so converting it was most important. Converting the memremap code removes the only other user of the multiorder code, which allows us to remove the radix tree code that supported it. I have 40+ followup patches to convert many other users of the radix tree over to the XArray, but I'd like to get this part in first. The other conversions haven't been in linux-next and aren't suitable for applying yet, but you can see them in the xarray-conv branch if you're interested" * 'xarray' of git://git.infradead.org/users/willy/linux-dax: (90 commits) radix tree: Remove multiorder support radix tree test: Convert multiorder tests to XArray radix tree tests: Convert item_delete_rcu to XArray radix tree tests: Convert item_kill_tree to XArray radix tree tests: Move item_insert_order radix tree test suite: Remove multiorder benchmarking radix tree test suite: Remove __item_insert memremap: Convert to XArray xarray: Add range store functionality xarray: Move multiorder_check to in-kernel tests xarray: Move multiorder_shrink to kernel tests xarray: Move multiorder account test in-kernel radix tree test suite: Convert iteration test to XArray radix tree test suite: Convert tag_tagged_items to XArray radix tree: Remove radix_tree_clear_tags radix tree: Remove radix_tree_maybe_preload_order radix tree: Remove split/join code radix tree: Remove radix_tree_update_node_t page cache: Finish XArray conversion dax: Convert page fault handlers to XArray ...
Diffstat (limited to 'lib')
-rw-r--r--lib/Kconfig5
-rw-r--r--lib/Kconfig.debug3
-rw-r--r--lib/Makefile3
-rw-r--r--lib/idr.c401
-rw-r--r--lib/radix-tree.c834
-rw-r--r--lib/test_xarray.c1238
-rw-r--r--lib/xarray.c2036
7 files changed, 3578 insertions, 942 deletions
diff --git a/lib/Kconfig b/lib/Kconfig
index d82f20609939..d1573a16aa92 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -399,8 +399,11 @@ config INTERVAL_TREE
399 399
400 for more information. 400 for more information.
401 401
402config RADIX_TREE_MULTIORDER 402config XARRAY_MULTI
403 bool 403 bool
404 help
405 Support entries which occupy multiple consecutive indices in the
406 XArray.
404 407
405config ASSOCIATIVE_ARRAY 408config ASSOCIATIVE_ARRAY
406 bool 409 bool
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 04adfc3b185e..e0ba05e6f6bd 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1813,6 +1813,9 @@ config TEST_BITFIELD
1813config TEST_UUID 1813config TEST_UUID
1814 tristate "Test functions located in the uuid module at runtime" 1814 tristate "Test functions located in the uuid module at runtime"
1815 1815
1816config TEST_XARRAY
1817 tristate "Test the XArray code at runtime"
1818
1816config TEST_OVERFLOW 1819config TEST_OVERFLOW
1817 tristate "Test check_*_overflow() functions at runtime" 1820 tristate "Test check_*_overflow() functions at runtime"
1818 1821
diff --git a/lib/Makefile b/lib/Makefile
index fa3eb1b4c0e3..3d341f59f756 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -18,7 +18,7 @@ KCOV_INSTRUMENT_debugobjects.o := n
18KCOV_INSTRUMENT_dynamic_debug.o := n 18KCOV_INSTRUMENT_dynamic_debug.o := n
19 19
20lib-y := ctype.o string.o vsprintf.o cmdline.o \ 20lib-y := ctype.o string.o vsprintf.o cmdline.o \
21 rbtree.o radix-tree.o timerqueue.o\ 21 rbtree.o radix-tree.o timerqueue.o xarray.o \
22 idr.o int_sqrt.o extable.o \ 22 idr.o int_sqrt.o extable.o \
23 sha1.o chacha20.o irq_regs.o argv_split.o \ 23 sha1.o chacha20.o irq_regs.o argv_split.o \
24 flex_proportions.o ratelimit.o show_mem.o \ 24 flex_proportions.o ratelimit.o show_mem.o \
@@ -68,6 +68,7 @@ obj-$(CONFIG_TEST_PRINTF) += test_printf.o
68obj-$(CONFIG_TEST_BITMAP) += test_bitmap.o 68obj-$(CONFIG_TEST_BITMAP) += test_bitmap.o
69obj-$(CONFIG_TEST_BITFIELD) += test_bitfield.o 69obj-$(CONFIG_TEST_BITFIELD) += test_bitfield.o
70obj-$(CONFIG_TEST_UUID) += test_uuid.o 70obj-$(CONFIG_TEST_UUID) += test_uuid.o
71obj-$(CONFIG_TEST_XARRAY) += test_xarray.o
71obj-$(CONFIG_TEST_PARMAN) += test_parman.o 72obj-$(CONFIG_TEST_PARMAN) += test_parman.o
72obj-$(CONFIG_TEST_KMOD) += test_kmod.o 73obj-$(CONFIG_TEST_KMOD) += test_kmod.o
73obj-$(CONFIG_TEST_DEBUG_VIRTUAL) += test_debug_virtual.o 74obj-$(CONFIG_TEST_DEBUG_VIRTUAL) += test_debug_virtual.o
diff --git a/lib/idr.c b/lib/idr.c
index fab2fd5bc326..cb1db9b8d3f6 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -6,8 +6,6 @@
6#include <linux/spinlock.h> 6#include <linux/spinlock.h>
7#include <linux/xarray.h> 7#include <linux/xarray.h>
8 8
9DEFINE_PER_CPU(struct ida_bitmap *, ida_bitmap);
10
11/** 9/**
12 * idr_alloc_u32() - Allocate an ID. 10 * idr_alloc_u32() - Allocate an ID.
13 * @idr: IDR handle. 11 * @idr: IDR handle.
@@ -39,10 +37,8 @@ int idr_alloc_u32(struct idr *idr, void *ptr, u32 *nextid,
39 unsigned int base = idr->idr_base; 37 unsigned int base = idr->idr_base;
40 unsigned int id = *nextid; 38 unsigned int id = *nextid;
41 39
42 if (WARN_ON_ONCE(radix_tree_is_internal_node(ptr))) 40 if (WARN_ON_ONCE(!(idr->idr_rt.xa_flags & ROOT_IS_IDR)))
43 return -EINVAL; 41 idr->idr_rt.xa_flags |= IDR_RT_MARKER;
44 if (WARN_ON_ONCE(!(idr->idr_rt.gfp_mask & ROOT_IS_IDR)))
45 idr->idr_rt.gfp_mask |= IDR_RT_MARKER;
46 42
47 id = (id < base) ? 0 : id - base; 43 id = (id < base) ? 0 : id - base;
48 radix_tree_iter_init(&iter, id); 44 radix_tree_iter_init(&iter, id);
@@ -295,15 +291,13 @@ void *idr_replace(struct idr *idr, void *ptr, unsigned long id)
295 void __rcu **slot = NULL; 291 void __rcu **slot = NULL;
296 void *entry; 292 void *entry;
297 293
298 if (WARN_ON_ONCE(radix_tree_is_internal_node(ptr)))
299 return ERR_PTR(-EINVAL);
300 id -= idr->idr_base; 294 id -= idr->idr_base;
301 295
302 entry = __radix_tree_lookup(&idr->idr_rt, id, &node, &slot); 296 entry = __radix_tree_lookup(&idr->idr_rt, id, &node, &slot);
303 if (!slot || radix_tree_tag_get(&idr->idr_rt, id, IDR_FREE)) 297 if (!slot || radix_tree_tag_get(&idr->idr_rt, id, IDR_FREE))
304 return ERR_PTR(-ENOENT); 298 return ERR_PTR(-ENOENT);
305 299
306 __radix_tree_replace(&idr->idr_rt, node, slot, ptr, NULL); 300 __radix_tree_replace(&idr->idr_rt, node, slot, ptr);
307 301
308 return entry; 302 return entry;
309} 303}
@@ -324,6 +318,9 @@ EXPORT_SYMBOL(idr_replace);
324 * free the individual IDs in it. You can use ida_is_empty() to find 318 * free the individual IDs in it. You can use ida_is_empty() to find
325 * out whether the IDA has any IDs currently allocated. 319 * out whether the IDA has any IDs currently allocated.
326 * 320 *
321 * The IDA handles its own locking. It is safe to call any of the IDA
322 * functions without synchronisation in your code.
323 *
327 * IDs are currently limited to the range [0-INT_MAX]. If this is an awkward 324 * IDs are currently limited to the range [0-INT_MAX]. If this is an awkward
328 * limitation, it should be quite straightforward to raise the maximum. 325 * limitation, it should be quite straightforward to raise the maximum.
329 */ 326 */
@@ -331,161 +328,197 @@ EXPORT_SYMBOL(idr_replace);
331/* 328/*
332 * Developer's notes: 329 * Developer's notes:
333 * 330 *
334 * The IDA uses the functionality provided by the IDR & radix tree to store 331 * The IDA uses the functionality provided by the XArray to store bitmaps in
335 * bitmaps in each entry. The IDR_FREE tag means there is at least one bit 332 * each entry. The XA_FREE_MARK is only cleared when all bits in the bitmap
336 * free, unlike the IDR where it means at least one entry is free. 333 * have been set.
337 * 334 *
338 * I considered telling the radix tree that each slot is an order-10 node 335 * I considered telling the XArray that each slot is an order-10 node
339 * and storing the bit numbers in the radix tree, but the radix tree can't 336 * and indexing by bit number, but the XArray can't allow a single multi-index
340 * allow a single multiorder entry at index 0, which would significantly 337 * entry in the head, which would significantly increase memory consumption
341 * increase memory consumption for the IDA. So instead we divide the index 338 * for the IDA. So instead we divide the index by the number of bits in the
342 * by the number of bits in the leaf bitmap before doing a radix tree lookup. 339 * leaf bitmap before doing a radix tree lookup.
343 * 340 *
344 * As an optimisation, if there are only a few low bits set in any given 341 * As an optimisation, if there are only a few low bits set in any given
345 * leaf, instead of allocating a 128-byte bitmap, we use the 'exceptional 342 * leaf, instead of allocating a 128-byte bitmap, we store the bits
346 * entry' functionality of the radix tree to store BITS_PER_LONG - 2 bits 343 * as a value entry. Value entries never have the XA_FREE_MARK cleared
347 * directly in the entry. By being really tricksy, we could store 344 * because we can always convert them into a bitmap entry.
348 * BITS_PER_LONG - 1 bits, but there're diminishing returns after optimising 345 *
349 * for 0-3 allocated IDs. 346 * It would be possible to optimise further; once we've run out of a
350 * 347 * single 128-byte bitmap, we currently switch to a 576-byte node, put
351 * We allow the radix tree 'exceptional' count to get out of date. Nothing 348 * the 128-byte bitmap in the first entry and then start allocating extra
352 * in the IDA nor the radix tree code checks it. If it becomes important 349 * 128-byte entries. We could instead use the 512 bytes of the node's
353 * to maintain an accurate exceptional count, switch the rcu_assign_pointer() 350 * data as a bitmap before moving to that scheme. I do not believe this
354 * calls to radix_tree_iter_replace() which will correct the exceptional 351 * is a worthwhile optimisation; Rasmus Villemoes surveyed the current
355 * count. 352 * users of the IDA and almost none of them use more than 1024 entries.
356 * 353 * Those that do use more than the 8192 IDs that the 512 bytes would
357 * The IDA always requires a lock to alloc/free. If we add a 'test_bit' 354 * provide.
355 *
356 * The IDA always uses a lock to alloc/free. If we add a 'test_bit'
358 * equivalent, it will still need locking. Going to RCU lookup would require 357 * equivalent, it will still need locking. Going to RCU lookup would require
359 * using RCU to free bitmaps, and that's not trivial without embedding an 358 * using RCU to free bitmaps, and that's not trivial without embedding an
360 * RCU head in the bitmap, which adds a 2-pointer overhead to each 128-byte 359 * RCU head in the bitmap, which adds a 2-pointer overhead to each 128-byte
361 * bitmap, which is excessive. 360 * bitmap, which is excessive.
362 */ 361 */
363 362
364#define IDA_MAX (0x80000000U / IDA_BITMAP_BITS - 1) 363/**
365 364 * ida_alloc_range() - Allocate an unused ID.
366static int ida_get_new_above(struct ida *ida, int start) 365 * @ida: IDA handle.
366 * @min: Lowest ID to allocate.
367 * @max: Highest ID to allocate.
368 * @gfp: Memory allocation flags.
369 *
370 * Allocate an ID between @min and @max, inclusive. The allocated ID will
371 * not exceed %INT_MAX, even if @max is larger.
372 *
373 * Context: Any context.
374 * Return: The allocated ID, or %-ENOMEM if memory could not be allocated,
375 * or %-ENOSPC if there are no free IDs.
376 */
377int ida_alloc_range(struct ida *ida, unsigned int min, unsigned int max,
378 gfp_t gfp)
367{ 379{
368 struct radix_tree_root *root = &ida->ida_rt; 380 XA_STATE(xas, &ida->xa, min / IDA_BITMAP_BITS);
369 void __rcu **slot; 381 unsigned bit = min % IDA_BITMAP_BITS;
370 struct radix_tree_iter iter; 382 unsigned long flags;
371 struct ida_bitmap *bitmap; 383 struct ida_bitmap *bitmap, *alloc = NULL;
372 unsigned long index; 384
373 unsigned bit, ebit; 385 if ((int)min < 0)
374 int new; 386 return -ENOSPC;
375 387
376 index = start / IDA_BITMAP_BITS; 388 if ((int)max < 0)
377 bit = start % IDA_BITMAP_BITS; 389 max = INT_MAX;
378 ebit = bit + RADIX_TREE_EXCEPTIONAL_SHIFT; 390
379 391retry:
380 slot = radix_tree_iter_init(&iter, index); 392 xas_lock_irqsave(&xas, flags);
381 for (;;) { 393next:
382 if (slot) 394 bitmap = xas_find_marked(&xas, max / IDA_BITMAP_BITS, XA_FREE_MARK);
383 slot = radix_tree_next_slot(slot, &iter, 395 if (xas.xa_index > min / IDA_BITMAP_BITS)
384 RADIX_TREE_ITER_TAGGED); 396 bit = 0;
385 if (!slot) { 397 if (xas.xa_index * IDA_BITMAP_BITS + bit > max)
386 slot = idr_get_free(root, &iter, GFP_NOWAIT, IDA_MAX); 398 goto nospc;
387 if (IS_ERR(slot)) { 399
388 if (slot == ERR_PTR(-ENOMEM)) 400 if (xa_is_value(bitmap)) {
389 return -EAGAIN; 401 unsigned long tmp = xa_to_value(bitmap);
390 return PTR_ERR(slot); 402
403 if (bit < BITS_PER_XA_VALUE) {
404 bit = find_next_zero_bit(&tmp, BITS_PER_XA_VALUE, bit);
405 if (xas.xa_index * IDA_BITMAP_BITS + bit > max)
406 goto nospc;
407 if (bit < BITS_PER_XA_VALUE) {
408 tmp |= 1UL << bit;
409 xas_store(&xas, xa_mk_value(tmp));
410 goto out;
391 } 411 }
392 } 412 }
393 if (iter.index > index) { 413 bitmap = alloc;
394 bit = 0; 414 if (!bitmap)
395 ebit = RADIX_TREE_EXCEPTIONAL_SHIFT; 415 bitmap = kzalloc(sizeof(*bitmap), GFP_NOWAIT);
396 } 416 if (!bitmap)
397 new = iter.index * IDA_BITMAP_BITS; 417 goto alloc;
398 bitmap = rcu_dereference_raw(*slot); 418 bitmap->bitmap[0] = tmp;
399 if (radix_tree_exception(bitmap)) { 419 xas_store(&xas, bitmap);
400 unsigned long tmp = (unsigned long)bitmap; 420 if (xas_error(&xas)) {
401 ebit = find_next_zero_bit(&tmp, BITS_PER_LONG, ebit); 421 bitmap->bitmap[0] = 0;
402 if (ebit < BITS_PER_LONG) { 422 goto out;
403 tmp |= 1UL << ebit;
404 rcu_assign_pointer(*slot, (void *)tmp);
405 return new + ebit -
406 RADIX_TREE_EXCEPTIONAL_SHIFT;
407 }
408 bitmap = this_cpu_xchg(ida_bitmap, NULL);
409 if (!bitmap)
410 return -EAGAIN;
411 bitmap->bitmap[0] = tmp >> RADIX_TREE_EXCEPTIONAL_SHIFT;
412 rcu_assign_pointer(*slot, bitmap);
413 } 423 }
424 }
414 425
415 if (bitmap) { 426 if (bitmap) {
416 bit = find_next_zero_bit(bitmap->bitmap, 427 bit = find_next_zero_bit(bitmap->bitmap, IDA_BITMAP_BITS, bit);
417 IDA_BITMAP_BITS, bit); 428 if (xas.xa_index * IDA_BITMAP_BITS + bit > max)
418 new += bit; 429 goto nospc;
419 if (new < 0) 430 if (bit == IDA_BITMAP_BITS)
420 return -ENOSPC; 431 goto next;
421 if (bit == IDA_BITMAP_BITS)
422 continue;
423 432
424 __set_bit(bit, bitmap->bitmap); 433 __set_bit(bit, bitmap->bitmap);
425 if (bitmap_full(bitmap->bitmap, IDA_BITMAP_BITS)) 434 if (bitmap_full(bitmap->bitmap, IDA_BITMAP_BITS))
426 radix_tree_iter_tag_clear(root, &iter, 435 xas_clear_mark(&xas, XA_FREE_MARK);
427 IDR_FREE); 436 } else {
437 if (bit < BITS_PER_XA_VALUE) {
438 bitmap = xa_mk_value(1UL << bit);
428 } else { 439 } else {
429 new += bit; 440 bitmap = alloc;
430 if (new < 0)
431 return -ENOSPC;
432 if (ebit < BITS_PER_LONG) {
433 bitmap = (void *)((1UL << ebit) |
434 RADIX_TREE_EXCEPTIONAL_ENTRY);
435 radix_tree_iter_replace(root, &iter, slot,
436 bitmap);
437 return new;
438 }
439 bitmap = this_cpu_xchg(ida_bitmap, NULL);
440 if (!bitmap) 441 if (!bitmap)
441 return -EAGAIN; 442 bitmap = kzalloc(sizeof(*bitmap), GFP_NOWAIT);
443 if (!bitmap)
444 goto alloc;
442 __set_bit(bit, bitmap->bitmap); 445 __set_bit(bit, bitmap->bitmap);
443 radix_tree_iter_replace(root, &iter, slot, bitmap);
444 } 446 }
445 447 xas_store(&xas, bitmap);
446 return new; 448 }
449out:
450 xas_unlock_irqrestore(&xas, flags);
451 if (xas_nomem(&xas, gfp)) {
452 xas.xa_index = min / IDA_BITMAP_BITS;
453 bit = min % IDA_BITMAP_BITS;
454 goto retry;
447 } 455 }
456 if (bitmap != alloc)
457 kfree(alloc);
458 if (xas_error(&xas))
459 return xas_error(&xas);
460 return xas.xa_index * IDA_BITMAP_BITS + bit;
461alloc:
462 xas_unlock_irqrestore(&xas, flags);
463 alloc = kzalloc(sizeof(*bitmap), gfp);
464 if (!alloc)
465 return -ENOMEM;
466 xas_set(&xas, min / IDA_BITMAP_BITS);
467 bit = min % IDA_BITMAP_BITS;
468 goto retry;
469nospc:
470 xas_unlock_irqrestore(&xas, flags);
471 return -ENOSPC;
448} 472}
473EXPORT_SYMBOL(ida_alloc_range);
449 474
450static void ida_remove(struct ida *ida, int id) 475/**
476 * ida_free() - Release an allocated ID.
477 * @ida: IDA handle.
478 * @id: Previously allocated ID.
479 *
480 * Context: Any context.
481 */
482void ida_free(struct ida *ida, unsigned int id)
451{ 483{
452 unsigned long index = id / IDA_BITMAP_BITS; 484 XA_STATE(xas, &ida->xa, id / IDA_BITMAP_BITS);
453 unsigned offset = id % IDA_BITMAP_BITS; 485 unsigned bit = id % IDA_BITMAP_BITS;
454 struct ida_bitmap *bitmap; 486 struct ida_bitmap *bitmap;
455 unsigned long *btmp; 487 unsigned long flags;
456 struct radix_tree_iter iter;
457 void __rcu **slot;
458 488
459 slot = radix_tree_iter_lookup(&ida->ida_rt, &iter, index); 489 BUG_ON((int)id < 0);
460 if (!slot) 490
461 goto err; 491 xas_lock_irqsave(&xas, flags);
492 bitmap = xas_load(&xas);
462 493
463 bitmap = rcu_dereference_raw(*slot); 494 if (xa_is_value(bitmap)) {
464 if (radix_tree_exception(bitmap)) { 495 unsigned long v = xa_to_value(bitmap);
465 btmp = (unsigned long *)slot; 496 if (bit >= BITS_PER_XA_VALUE)
466 offset += RADIX_TREE_EXCEPTIONAL_SHIFT;
467 if (offset >= BITS_PER_LONG)
468 goto err; 497 goto err;
498 if (!(v & (1UL << bit)))
499 goto err;
500 v &= ~(1UL << bit);
501 if (!v)
502 goto delete;
503 xas_store(&xas, xa_mk_value(v));
469 } else { 504 } else {
470 btmp = bitmap->bitmap; 505 if (!test_bit(bit, bitmap->bitmap))
471 } 506 goto err;
472 if (!test_bit(offset, btmp)) 507 __clear_bit(bit, bitmap->bitmap);
473 goto err; 508 xas_set_mark(&xas, XA_FREE_MARK);
474 509 if (bitmap_empty(bitmap->bitmap, IDA_BITMAP_BITS)) {
475 __clear_bit(offset, btmp); 510 kfree(bitmap);
476 radix_tree_iter_tag_set(&ida->ida_rt, &iter, IDR_FREE); 511delete:
477 if (radix_tree_exception(bitmap)) { 512 xas_store(&xas, NULL);
478 if (rcu_dereference_raw(*slot) == 513 }
479 (void *)RADIX_TREE_EXCEPTIONAL_ENTRY)
480 radix_tree_iter_delete(&ida->ida_rt, &iter, slot);
481 } else if (bitmap_empty(btmp, IDA_BITMAP_BITS)) {
482 kfree(bitmap);
483 radix_tree_iter_delete(&ida->ida_rt, &iter, slot);
484 } 514 }
515 xas_unlock_irqrestore(&xas, flags);
485 return; 516 return;
486 err: 517 err:
518 xas_unlock_irqrestore(&xas, flags);
487 WARN(1, "ida_free called for id=%d which is not allocated.\n", id); 519 WARN(1, "ida_free called for id=%d which is not allocated.\n", id);
488} 520}
521EXPORT_SYMBOL(ida_free);
489 522
490/** 523/**
491 * ida_destroy() - Free all IDs. 524 * ida_destroy() - Free all IDs.
@@ -500,80 +533,60 @@ static void ida_remove(struct ida *ida, int id)
500 */ 533 */
501void ida_destroy(struct ida *ida) 534void ida_destroy(struct ida *ida)
502{ 535{
536 XA_STATE(xas, &ida->xa, 0);
537 struct ida_bitmap *bitmap;
503 unsigned long flags; 538 unsigned long flags;
504 struct radix_tree_iter iter;
505 void __rcu **slot;
506 539
507 xa_lock_irqsave(&ida->ida_rt, flags); 540 xas_lock_irqsave(&xas, flags);
508 radix_tree_for_each_slot(slot, &ida->ida_rt, &iter, 0) { 541 xas_for_each(&xas, bitmap, ULONG_MAX) {
509 struct ida_bitmap *bitmap = rcu_dereference_raw(*slot); 542 if (!xa_is_value(bitmap))
510 if (!radix_tree_exception(bitmap))
511 kfree(bitmap); 543 kfree(bitmap);
512 radix_tree_iter_delete(&ida->ida_rt, &iter, slot); 544 xas_store(&xas, NULL);
513 } 545 }
514 xa_unlock_irqrestore(&ida->ida_rt, flags); 546 xas_unlock_irqrestore(&xas, flags);
515} 547}
516EXPORT_SYMBOL(ida_destroy); 548EXPORT_SYMBOL(ida_destroy);
517 549
518/** 550#ifndef __KERNEL__
519 * ida_alloc_range() - Allocate an unused ID. 551extern void xa_dump_index(unsigned long index, unsigned int shift);
520 * @ida: IDA handle. 552#define IDA_CHUNK_SHIFT ilog2(IDA_BITMAP_BITS)
521 * @min: Lowest ID to allocate.
522 * @max: Highest ID to allocate.
523 * @gfp: Memory allocation flags.
524 *
525 * Allocate an ID between @min and @max, inclusive. The allocated ID will
526 * not exceed %INT_MAX, even if @max is larger.
527 *
528 * Context: Any context.
529 * Return: The allocated ID, or %-ENOMEM if memory could not be allocated,
530 * or %-ENOSPC if there are no free IDs.
531 */
532int ida_alloc_range(struct ida *ida, unsigned int min, unsigned int max,
533 gfp_t gfp)
534{
535 int id = 0;
536 unsigned long flags;
537 553
538 if ((int)min < 0) 554static void ida_dump_entry(void *entry, unsigned long index)
539 return -ENOSPC; 555{
540 556 unsigned long i;
541 if ((int)max < 0) 557
542 max = INT_MAX; 558 if (!entry)
543 559 return;
544again: 560
545 xa_lock_irqsave(&ida->ida_rt, flags); 561 if (xa_is_node(entry)) {
546 id = ida_get_new_above(ida, min); 562 struct xa_node *node = xa_to_node(entry);
547 if (id > (int)max) { 563 unsigned int shift = node->shift + IDA_CHUNK_SHIFT +
548 ida_remove(ida, id); 564 XA_CHUNK_SHIFT;
549 id = -ENOSPC; 565
550 } 566 xa_dump_index(index * IDA_BITMAP_BITS, shift);
551 xa_unlock_irqrestore(&ida->ida_rt, flags); 567 xa_dump_node(node);
568 for (i = 0; i < XA_CHUNK_SIZE; i++)
569 ida_dump_entry(node->slots[i],
570 index | (i << node->shift));
571 } else if (xa_is_value(entry)) {
572 xa_dump_index(index * IDA_BITMAP_BITS, ilog2(BITS_PER_LONG));
573 pr_cont("value: data %lx [%px]\n", xa_to_value(entry), entry);
574 } else {
575 struct ida_bitmap *bitmap = entry;
552 576
553 if (unlikely(id == -EAGAIN)) { 577 xa_dump_index(index * IDA_BITMAP_BITS, IDA_CHUNK_SHIFT);
554 if (!ida_pre_get(ida, gfp)) 578 pr_cont("bitmap: %p data", bitmap);
555 return -ENOMEM; 579 for (i = 0; i < IDA_BITMAP_LONGS; i++)
556 goto again; 580 pr_cont(" %lx", bitmap->bitmap[i]);
581 pr_cont("\n");
557 } 582 }
558
559 return id;
560} 583}
561EXPORT_SYMBOL(ida_alloc_range);
562 584
563/** 585static void ida_dump(struct ida *ida)
564 * ida_free() - Release an allocated ID.
565 * @ida: IDA handle.
566 * @id: Previously allocated ID.
567 *
568 * Context: Any context.
569 */
570void ida_free(struct ida *ida, unsigned int id)
571{ 586{
572 unsigned long flags; 587 struct xarray *xa = &ida->xa;
573 588 pr_debug("ida: %p node %p free %d\n", ida, xa->xa_head,
574 BUG_ON((int)id < 0); 589 xa->xa_flags >> ROOT_TAG_SHIFT);
575 xa_lock_irqsave(&ida->ida_rt, flags); 590 ida_dump_entry(xa->xa_head, 0);
576 ida_remove(ida, id);
577 xa_unlock_irqrestore(&ida->ida_rt, flags);
578} 591}
579EXPORT_SYMBOL(ida_free); 592#endif
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index bc03ecc4dfd2..1106bb6aa01e 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -38,15 +38,13 @@
38#include <linux/rcupdate.h> 38#include <linux/rcupdate.h>
39#include <linux/slab.h> 39#include <linux/slab.h>
40#include <linux/string.h> 40#include <linux/string.h>
41#include <linux/xarray.h>
41 42
42 43
43/* Number of nodes in fully populated tree of given height */
44static unsigned long height_to_maxnodes[RADIX_TREE_MAX_PATH + 1] __read_mostly;
45
46/* 44/*
47 * Radix tree node cache. 45 * Radix tree node cache.
48 */ 46 */
49static struct kmem_cache *radix_tree_node_cachep; 47struct kmem_cache *radix_tree_node_cachep;
50 48
51/* 49/*
52 * The radix tree is variable-height, so an insert operation not only has 50 * The radix tree is variable-height, so an insert operation not only has
@@ -98,24 +96,7 @@ static inline void *node_to_entry(void *ptr)
98 return (void *)((unsigned long)ptr | RADIX_TREE_INTERNAL_NODE); 96 return (void *)((unsigned long)ptr | RADIX_TREE_INTERNAL_NODE);
99} 97}
100 98
101#define RADIX_TREE_RETRY node_to_entry(NULL) 99#define RADIX_TREE_RETRY XA_RETRY_ENTRY
102
103#ifdef CONFIG_RADIX_TREE_MULTIORDER
104/* Sibling slots point directly to another slot in the same node */
105static inline
106bool is_sibling_entry(const struct radix_tree_node *parent, void *node)
107{
108 void __rcu **ptr = node;
109 return (parent->slots <= ptr) &&
110 (ptr < parent->slots + RADIX_TREE_MAP_SIZE);
111}
112#else
113static inline
114bool is_sibling_entry(const struct radix_tree_node *parent, void *node)
115{
116 return false;
117}
118#endif
119 100
120static inline unsigned long 101static inline unsigned long
121get_slot_offset(const struct radix_tree_node *parent, void __rcu **slot) 102get_slot_offset(const struct radix_tree_node *parent, void __rcu **slot)
@@ -129,24 +110,13 @@ static unsigned int radix_tree_descend(const struct radix_tree_node *parent,
129 unsigned int offset = (index >> parent->shift) & RADIX_TREE_MAP_MASK; 110 unsigned int offset = (index >> parent->shift) & RADIX_TREE_MAP_MASK;
130 void __rcu **entry = rcu_dereference_raw(parent->slots[offset]); 111 void __rcu **entry = rcu_dereference_raw(parent->slots[offset]);
131 112
132#ifdef CONFIG_RADIX_TREE_MULTIORDER
133 if (radix_tree_is_internal_node(entry)) {
134 if (is_sibling_entry(parent, entry)) {
135 void __rcu **sibentry;
136 sibentry = (void __rcu **) entry_to_node(entry);
137 offset = get_slot_offset(parent, sibentry);
138 entry = rcu_dereference_raw(*sibentry);
139 }
140 }
141#endif
142
143 *nodep = (void *)entry; 113 *nodep = (void *)entry;
144 return offset; 114 return offset;
145} 115}
146 116
147static inline gfp_t root_gfp_mask(const struct radix_tree_root *root) 117static inline gfp_t root_gfp_mask(const struct radix_tree_root *root)
148{ 118{
149 return root->gfp_mask & (__GFP_BITS_MASK & ~GFP_ZONEMASK); 119 return root->xa_flags & (__GFP_BITS_MASK & ~GFP_ZONEMASK);
150} 120}
151 121
152static inline void tag_set(struct radix_tree_node *node, unsigned int tag, 122static inline void tag_set(struct radix_tree_node *node, unsigned int tag,
@@ -169,32 +139,32 @@ static inline int tag_get(const struct radix_tree_node *node, unsigned int tag,
169 139
170static inline void root_tag_set(struct radix_tree_root *root, unsigned tag) 140static inline void root_tag_set(struct radix_tree_root *root, unsigned tag)
171{ 141{
172 root->gfp_mask |= (__force gfp_t)(1 << (tag + ROOT_TAG_SHIFT)); 142 root->xa_flags |= (__force gfp_t)(1 << (tag + ROOT_TAG_SHIFT));
173} 143}
174 144
175static inline void root_tag_clear(struct radix_tree_root *root, unsigned tag) 145static inline void root_tag_clear(struct radix_tree_root *root, unsigned tag)
176{ 146{
177 root->gfp_mask &= (__force gfp_t)~(1 << (tag + ROOT_TAG_SHIFT)); 147 root->xa_flags &= (__force gfp_t)~(1 << (tag + ROOT_TAG_SHIFT));
178} 148}
179 149
180static inline void root_tag_clear_all(struct radix_tree_root *root) 150static inline void root_tag_clear_all(struct radix_tree_root *root)
181{ 151{
182 root->gfp_mask &= (1 << ROOT_TAG_SHIFT) - 1; 152 root->xa_flags &= (__force gfp_t)((1 << ROOT_TAG_SHIFT) - 1);
183} 153}
184 154
185static inline int root_tag_get(const struct radix_tree_root *root, unsigned tag) 155static inline int root_tag_get(const struct radix_tree_root *root, unsigned tag)
186{ 156{
187 return (__force int)root->gfp_mask & (1 << (tag + ROOT_TAG_SHIFT)); 157 return (__force int)root->xa_flags & (1 << (tag + ROOT_TAG_SHIFT));
188} 158}
189 159
190static inline unsigned root_tags_get(const struct radix_tree_root *root) 160static inline unsigned root_tags_get(const struct radix_tree_root *root)
191{ 161{
192 return (__force unsigned)root->gfp_mask >> ROOT_TAG_SHIFT; 162 return (__force unsigned)root->xa_flags >> ROOT_TAG_SHIFT;
193} 163}
194 164
195static inline bool is_idr(const struct radix_tree_root *root) 165static inline bool is_idr(const struct radix_tree_root *root)
196{ 166{
197 return !!(root->gfp_mask & ROOT_IS_IDR); 167 return !!(root->xa_flags & ROOT_IS_IDR);
198} 168}
199 169
200/* 170/*
@@ -254,7 +224,7 @@ radix_tree_find_next_bit(struct radix_tree_node *node, unsigned int tag,
254 224
255static unsigned int iter_offset(const struct radix_tree_iter *iter) 225static unsigned int iter_offset(const struct radix_tree_iter *iter)
256{ 226{
257 return (iter->index >> iter_shift(iter)) & RADIX_TREE_MAP_MASK; 227 return iter->index & RADIX_TREE_MAP_MASK;
258} 228}
259 229
260/* 230/*
@@ -277,99 +247,6 @@ static unsigned long next_index(unsigned long index,
277 return (index & ~node_maxindex(node)) + (offset << node->shift); 247 return (index & ~node_maxindex(node)) + (offset << node->shift);
278} 248}
279 249
280#ifndef __KERNEL__
281static void dump_node(struct radix_tree_node *node, unsigned long index)
282{
283 unsigned long i;
284
285 pr_debug("radix node: %p offset %d indices %lu-%lu parent %p tags %lx %lx %lx shift %d count %d exceptional %d\n",
286 node, node->offset, index, index | node_maxindex(node),
287 node->parent,
288 node->tags[0][0], node->tags[1][0], node->tags[2][0],
289 node->shift, node->count, node->exceptional);
290
291 for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) {
292 unsigned long first = index | (i << node->shift);
293 unsigned long last = first | ((1UL << node->shift) - 1);
294 void *entry = node->slots[i];
295 if (!entry)
296 continue;
297 if (entry == RADIX_TREE_RETRY) {
298 pr_debug("radix retry offset %ld indices %lu-%lu parent %p\n",
299 i, first, last, node);
300 } else if (!radix_tree_is_internal_node(entry)) {
301 pr_debug("radix entry %p offset %ld indices %lu-%lu parent %p\n",
302 entry, i, first, last, node);
303 } else if (is_sibling_entry(node, entry)) {
304 pr_debug("radix sblng %p offset %ld indices %lu-%lu parent %p val %p\n",
305 entry, i, first, last, node,
306 *(void **)entry_to_node(entry));
307 } else {
308 dump_node(entry_to_node(entry), first);
309 }
310 }
311}
312
313/* For debug */
314static void radix_tree_dump(struct radix_tree_root *root)
315{
316 pr_debug("radix root: %p rnode %p tags %x\n",
317 root, root->rnode,
318 root->gfp_mask >> ROOT_TAG_SHIFT);
319 if (!radix_tree_is_internal_node(root->rnode))
320 return;
321 dump_node(entry_to_node(root->rnode), 0);
322}
323
324static void dump_ida_node(void *entry, unsigned long index)
325{
326 unsigned long i;
327
328 if (!entry)
329 return;
330
331 if (radix_tree_is_internal_node(entry)) {
332 struct radix_tree_node *node = entry_to_node(entry);
333
334 pr_debug("ida node: %p offset %d indices %lu-%lu parent %p free %lx shift %d count %d\n",
335 node, node->offset, index * IDA_BITMAP_BITS,
336 ((index | node_maxindex(node)) + 1) *
337 IDA_BITMAP_BITS - 1,
338 node->parent, node->tags[0][0], node->shift,
339 node->count);
340 for (i = 0; i < RADIX_TREE_MAP_SIZE; i++)
341 dump_ida_node(node->slots[i],
342 index | (i << node->shift));
343 } else if (radix_tree_exceptional_entry(entry)) {
344 pr_debug("ida excp: %p offset %d indices %lu-%lu data %lx\n",
345 entry, (int)(index & RADIX_TREE_MAP_MASK),
346 index * IDA_BITMAP_BITS,
347 index * IDA_BITMAP_BITS + BITS_PER_LONG -
348 RADIX_TREE_EXCEPTIONAL_SHIFT,
349 (unsigned long)entry >>
350 RADIX_TREE_EXCEPTIONAL_SHIFT);
351 } else {
352 struct ida_bitmap *bitmap = entry;
353
354 pr_debug("ida btmp: %p offset %d indices %lu-%lu data", bitmap,
355 (int)(index & RADIX_TREE_MAP_MASK),
356 index * IDA_BITMAP_BITS,
357 (index + 1) * IDA_BITMAP_BITS - 1);
358 for (i = 0; i < IDA_BITMAP_LONGS; i++)
359 pr_cont(" %lx", bitmap->bitmap[i]);
360 pr_cont("\n");
361 }
362}
363
364static void ida_dump(struct ida *ida)
365{
366 struct radix_tree_root *root = &ida->ida_rt;
367 pr_debug("ida: %p node %p free %d\n", ida, root->rnode,
368 root->gfp_mask >> ROOT_TAG_SHIFT);
369 dump_ida_node(root->rnode, 0);
370}
371#endif
372
373/* 250/*
374 * This assumes that the caller has performed appropriate preallocation, and 251 * This assumes that the caller has performed appropriate preallocation, and
375 * that the caller has pinned this thread of control to the current CPU. 252 * that the caller has pinned this thread of control to the current CPU.
@@ -378,7 +255,7 @@ static struct radix_tree_node *
378radix_tree_node_alloc(gfp_t gfp_mask, struct radix_tree_node *parent, 255radix_tree_node_alloc(gfp_t gfp_mask, struct radix_tree_node *parent,
379 struct radix_tree_root *root, 256 struct radix_tree_root *root,
380 unsigned int shift, unsigned int offset, 257 unsigned int shift, unsigned int offset,
381 unsigned int count, unsigned int exceptional) 258 unsigned int count, unsigned int nr_values)
382{ 259{
383 struct radix_tree_node *ret = NULL; 260 struct radix_tree_node *ret = NULL;
384 261
@@ -425,14 +302,14 @@ out:
425 ret->shift = shift; 302 ret->shift = shift;
426 ret->offset = offset; 303 ret->offset = offset;
427 ret->count = count; 304 ret->count = count;
428 ret->exceptional = exceptional; 305 ret->nr_values = nr_values;
429 ret->parent = parent; 306 ret->parent = parent;
430 ret->root = root; 307 ret->array = root;
431 } 308 }
432 return ret; 309 return ret;
433} 310}
434 311
435static void radix_tree_node_rcu_free(struct rcu_head *head) 312void radix_tree_node_rcu_free(struct rcu_head *head)
436{ 313{
437 struct radix_tree_node *node = 314 struct radix_tree_node *node =
438 container_of(head, struct radix_tree_node, rcu_head); 315 container_of(head, struct radix_tree_node, rcu_head);
@@ -530,77 +407,10 @@ int radix_tree_maybe_preload(gfp_t gfp_mask)
530} 407}
531EXPORT_SYMBOL(radix_tree_maybe_preload); 408EXPORT_SYMBOL(radix_tree_maybe_preload);
532 409
533#ifdef CONFIG_RADIX_TREE_MULTIORDER
534/*
535 * Preload with enough objects to ensure that we can split a single entry
536 * of order @old_order into many entries of size @new_order
537 */
538int radix_tree_split_preload(unsigned int old_order, unsigned int new_order,
539 gfp_t gfp_mask)
540{
541 unsigned top = 1 << (old_order % RADIX_TREE_MAP_SHIFT);
542 unsigned layers = (old_order / RADIX_TREE_MAP_SHIFT) -
543 (new_order / RADIX_TREE_MAP_SHIFT);
544 unsigned nr = 0;
545
546 WARN_ON_ONCE(!gfpflags_allow_blocking(gfp_mask));
547 BUG_ON(new_order >= old_order);
548
549 while (layers--)
550 nr = nr * RADIX_TREE_MAP_SIZE + 1;
551 return __radix_tree_preload(gfp_mask, top * nr);
552}
553#endif
554
555/*
556 * The same as function above, but preload number of nodes required to insert
557 * (1 << order) continuous naturally-aligned elements.
558 */
559int radix_tree_maybe_preload_order(gfp_t gfp_mask, int order)
560{
561 unsigned long nr_subtrees;
562 int nr_nodes, subtree_height;
563
564 /* Preloading doesn't help anything with this gfp mask, skip it */
565 if (!gfpflags_allow_blocking(gfp_mask)) {
566 preempt_disable();
567 return 0;
568 }
569
570 /*
571 * Calculate number and height of fully populated subtrees it takes to
572 * store (1 << order) elements.
573 */
574 nr_subtrees = 1 << order;
575 for (subtree_height = 0; nr_subtrees > RADIX_TREE_MAP_SIZE;
576 subtree_height++)
577 nr_subtrees >>= RADIX_TREE_MAP_SHIFT;
578
579 /*
580 * The worst case is zero height tree with a single item at index 0 and
581 * then inserting items starting at ULONG_MAX - (1 << order).
582 *
583 * This requires RADIX_TREE_MAX_PATH nodes to build branch from root to
584 * 0-index item.
585 */
586 nr_nodes = RADIX_TREE_MAX_PATH;
587
588 /* Plus branch to fully populated subtrees. */
589 nr_nodes += RADIX_TREE_MAX_PATH - subtree_height;
590
591 /* Root node is shared. */
592 nr_nodes--;
593
594 /* Plus nodes required to build subtrees. */
595 nr_nodes += nr_subtrees * height_to_maxnodes[subtree_height];
596
597 return __radix_tree_preload(gfp_mask, nr_nodes);
598}
599
600static unsigned radix_tree_load_root(const struct radix_tree_root *root, 410static unsigned radix_tree_load_root(const struct radix_tree_root *root,
601 struct radix_tree_node **nodep, unsigned long *maxindex) 411 struct radix_tree_node **nodep, unsigned long *maxindex)
602{ 412{
603 struct radix_tree_node *node = rcu_dereference_raw(root->rnode); 413 struct radix_tree_node *node = rcu_dereference_raw(root->xa_head);
604 414
605 *nodep = node; 415 *nodep = node;
606 416
@@ -629,7 +439,7 @@ static int radix_tree_extend(struct radix_tree_root *root, gfp_t gfp,
629 while (index > shift_maxindex(maxshift)) 439 while (index > shift_maxindex(maxshift))
630 maxshift += RADIX_TREE_MAP_SHIFT; 440 maxshift += RADIX_TREE_MAP_SHIFT;
631 441
632 entry = rcu_dereference_raw(root->rnode); 442 entry = rcu_dereference_raw(root->xa_head);
633 if (!entry && (!is_idr(root) || root_tag_get(root, IDR_FREE))) 443 if (!entry && (!is_idr(root) || root_tag_get(root, IDR_FREE)))
634 goto out; 444 goto out;
635 445
@@ -656,9 +466,9 @@ static int radix_tree_extend(struct radix_tree_root *root, gfp_t gfp,
656 BUG_ON(shift > BITS_PER_LONG); 466 BUG_ON(shift > BITS_PER_LONG);
657 if (radix_tree_is_internal_node(entry)) { 467 if (radix_tree_is_internal_node(entry)) {
658 entry_to_node(entry)->parent = node; 468 entry_to_node(entry)->parent = node;
659 } else if (radix_tree_exceptional_entry(entry)) { 469 } else if (xa_is_value(entry)) {
660 /* Moving an exceptional root->rnode to a node */ 470 /* Moving a value entry root->xa_head to a node */
661 node->exceptional = 1; 471 node->nr_values = 1;
662 } 472 }
663 /* 473 /*
664 * entry was already in the radix tree, so we do not need 474 * entry was already in the radix tree, so we do not need
@@ -666,7 +476,7 @@ static int radix_tree_extend(struct radix_tree_root *root, gfp_t gfp,
666 */ 476 */
667 node->slots[0] = (void __rcu *)entry; 477 node->slots[0] = (void __rcu *)entry;
668 entry = node_to_entry(node); 478 entry = node_to_entry(node);
669 rcu_assign_pointer(root->rnode, entry); 479 rcu_assign_pointer(root->xa_head, entry);
670 shift += RADIX_TREE_MAP_SHIFT; 480 shift += RADIX_TREE_MAP_SHIFT;
671 } while (shift <= maxshift); 481 } while (shift <= maxshift);
672out: 482out:
@@ -677,13 +487,12 @@ out:
677 * radix_tree_shrink - shrink radix tree to minimum height 487 * radix_tree_shrink - shrink radix tree to minimum height
678 * @root radix tree root 488 * @root radix tree root
679 */ 489 */
680static inline bool radix_tree_shrink(struct radix_tree_root *root, 490static inline bool radix_tree_shrink(struct radix_tree_root *root)
681 radix_tree_update_node_t update_node)
682{ 491{
683 bool shrunk = false; 492 bool shrunk = false;
684 493
685 for (;;) { 494 for (;;) {
686 struct radix_tree_node *node = rcu_dereference_raw(root->rnode); 495 struct radix_tree_node *node = rcu_dereference_raw(root->xa_head);
687 struct radix_tree_node *child; 496 struct radix_tree_node *child;
688 497
689 if (!radix_tree_is_internal_node(node)) 498 if (!radix_tree_is_internal_node(node))
@@ -692,15 +501,20 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root,
692 501
693 /* 502 /*
694 * The candidate node has more than one child, or its child 503 * The candidate node has more than one child, or its child
695 * is not at the leftmost slot, or the child is a multiorder 504 * is not at the leftmost slot, we cannot shrink.
696 * entry, we cannot shrink.
697 */ 505 */
698 if (node->count != 1) 506 if (node->count != 1)
699 break; 507 break;
700 child = rcu_dereference_raw(node->slots[0]); 508 child = rcu_dereference_raw(node->slots[0]);
701 if (!child) 509 if (!child)
702 break; 510 break;
703 if (!radix_tree_is_internal_node(child) && node->shift) 511
512 /*
513 * For an IDR, we must not shrink entry 0 into the root in
514 * case somebody calls idr_replace() with a pointer that
515 * appears to be an internal entry
516 */
517 if (!node->shift && is_idr(root))
704 break; 518 break;
705 519
706 if (radix_tree_is_internal_node(child)) 520 if (radix_tree_is_internal_node(child))
@@ -711,9 +525,9 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root,
711 * moving the node from one part of the tree to another: if it 525 * moving the node from one part of the tree to another: if it
712 * was safe to dereference the old pointer to it 526 * was safe to dereference the old pointer to it
713 * (node->slots[0]), it will be safe to dereference the new 527 * (node->slots[0]), it will be safe to dereference the new
714 * one (root->rnode) as far as dependent read barriers go. 528 * one (root->xa_head) as far as dependent read barriers go.
715 */ 529 */
716 root->rnode = (void __rcu *)child; 530 root->xa_head = (void __rcu *)child;
717 if (is_idr(root) && !tag_get(node, IDR_FREE, 0)) 531 if (is_idr(root) && !tag_get(node, IDR_FREE, 0))
718 root_tag_clear(root, IDR_FREE); 532 root_tag_clear(root, IDR_FREE);
719 533
@@ -738,8 +552,6 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root,
738 node->count = 0; 552 node->count = 0;
739 if (!radix_tree_is_internal_node(child)) { 553 if (!radix_tree_is_internal_node(child)) {
740 node->slots[0] = (void __rcu *)RADIX_TREE_RETRY; 554 node->slots[0] = (void __rcu *)RADIX_TREE_RETRY;
741 if (update_node)
742 update_node(node);
743 } 555 }
744 556
745 WARN_ON_ONCE(!list_empty(&node->private_list)); 557 WARN_ON_ONCE(!list_empty(&node->private_list));
@@ -751,8 +563,7 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root,
751} 563}
752 564
753static bool delete_node(struct radix_tree_root *root, 565static bool delete_node(struct radix_tree_root *root,
754 struct radix_tree_node *node, 566 struct radix_tree_node *node)
755 radix_tree_update_node_t update_node)
756{ 567{
757 bool deleted = false; 568 bool deleted = false;
758 569
@@ -761,9 +572,8 @@ static bool delete_node(struct radix_tree_root *root,
761 572
762 if (node->count) { 573 if (node->count) {
763 if (node_to_entry(node) == 574 if (node_to_entry(node) ==
764 rcu_dereference_raw(root->rnode)) 575 rcu_dereference_raw(root->xa_head))
765 deleted |= radix_tree_shrink(root, 576 deleted |= radix_tree_shrink(root);
766 update_node);
767 return deleted; 577 return deleted;
768 } 578 }
769 579
@@ -778,7 +588,7 @@ static bool delete_node(struct radix_tree_root *root,
778 */ 588 */
779 if (!is_idr(root)) 589 if (!is_idr(root))
780 root_tag_clear_all(root); 590 root_tag_clear_all(root);
781 root->rnode = NULL; 591 root->xa_head = NULL;
782 } 592 }
783 593
784 WARN_ON_ONCE(!list_empty(&node->private_list)); 594 WARN_ON_ONCE(!list_empty(&node->private_list));
@@ -795,7 +605,6 @@ static bool delete_node(struct radix_tree_root *root,
795 * __radix_tree_create - create a slot in a radix tree 605 * __radix_tree_create - create a slot in a radix tree
796 * @root: radix tree root 606 * @root: radix tree root
797 * @index: index key 607 * @index: index key
798 * @order: index occupies 2^order aligned slots
799 * @nodep: returns node 608 * @nodep: returns node
800 * @slotp: returns slot 609 * @slotp: returns slot
801 * 610 *
@@ -803,36 +612,34 @@ static bool delete_node(struct radix_tree_root *root,
803 * at position @index in the radix tree @root. 612 * at position @index in the radix tree @root.
804 * 613 *
805 * Until there is more than one item in the tree, no nodes are 614 * Until there is more than one item in the tree, no nodes are
806 * allocated and @root->rnode is used as a direct slot instead of 615 * allocated and @root->xa_head is used as a direct slot instead of
807 * pointing to a node, in which case *@nodep will be NULL. 616 * pointing to a node, in which case *@nodep will be NULL.
808 * 617 *
809 * Returns -ENOMEM, or 0 for success. 618 * Returns -ENOMEM, or 0 for success.
810 */ 619 */
811int __radix_tree_create(struct radix_tree_root *root, unsigned long index, 620static int __radix_tree_create(struct radix_tree_root *root,
812 unsigned order, struct radix_tree_node **nodep, 621 unsigned long index, struct radix_tree_node **nodep,
813 void __rcu ***slotp) 622 void __rcu ***slotp)
814{ 623{
815 struct radix_tree_node *node = NULL, *child; 624 struct radix_tree_node *node = NULL, *child;
816 void __rcu **slot = (void __rcu **)&root->rnode; 625 void __rcu **slot = (void __rcu **)&root->xa_head;
817 unsigned long maxindex; 626 unsigned long maxindex;
818 unsigned int shift, offset = 0; 627 unsigned int shift, offset = 0;
819 unsigned long max = index | ((1UL << order) - 1); 628 unsigned long max = index;
820 gfp_t gfp = root_gfp_mask(root); 629 gfp_t gfp = root_gfp_mask(root);
821 630
822 shift = radix_tree_load_root(root, &child, &maxindex); 631 shift = radix_tree_load_root(root, &child, &maxindex);
823 632
824 /* Make sure the tree is high enough. */ 633 /* Make sure the tree is high enough. */
825 if (order > 0 && max == ((1UL << order) - 1))
826 max++;
827 if (max > maxindex) { 634 if (max > maxindex) {
828 int error = radix_tree_extend(root, gfp, max, shift); 635 int error = radix_tree_extend(root, gfp, max, shift);
829 if (error < 0) 636 if (error < 0)
830 return error; 637 return error;
831 shift = error; 638 shift = error;
832 child = rcu_dereference_raw(root->rnode); 639 child = rcu_dereference_raw(root->xa_head);
833 } 640 }
834 641
835 while (shift > order) { 642 while (shift > 0) {
836 shift -= RADIX_TREE_MAP_SHIFT; 643 shift -= RADIX_TREE_MAP_SHIFT;
837 if (child == NULL) { 644 if (child == NULL) {
838 /* Have to add a child node. */ 645 /* Have to add a child node. */
@@ -875,8 +682,7 @@ static void radix_tree_free_nodes(struct radix_tree_node *node)
875 682
876 for (;;) { 683 for (;;) {
877 void *entry = rcu_dereference_raw(child->slots[offset]); 684 void *entry = rcu_dereference_raw(child->slots[offset]);
878 if (radix_tree_is_internal_node(entry) && 685 if (xa_is_node(entry) && child->shift) {
879 !is_sibling_entry(child, entry)) {
880 child = entry_to_node(entry); 686 child = entry_to_node(entry);
881 offset = 0; 687 offset = 0;
882 continue; 688 continue;
@@ -894,96 +700,30 @@ static void radix_tree_free_nodes(struct radix_tree_node *node)
894 } 700 }
895} 701}
896 702
897#ifdef CONFIG_RADIX_TREE_MULTIORDER
898static inline int insert_entries(struct radix_tree_node *node, 703static inline int insert_entries(struct radix_tree_node *node,
899 void __rcu **slot, void *item, unsigned order, bool replace) 704 void __rcu **slot, void *item, bool replace)
900{
901 struct radix_tree_node *child;
902 unsigned i, n, tag, offset, tags = 0;
903
904 if (node) {
905 if (order > node->shift)
906 n = 1 << (order - node->shift);
907 else
908 n = 1;
909 offset = get_slot_offset(node, slot);
910 } else {
911 n = 1;
912 offset = 0;
913 }
914
915 if (n > 1) {
916 offset = offset & ~(n - 1);
917 slot = &node->slots[offset];
918 }
919 child = node_to_entry(slot);
920
921 for (i = 0; i < n; i++) {
922 if (slot[i]) {
923 if (replace) {
924 node->count--;
925 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
926 if (tag_get(node, tag, offset + i))
927 tags |= 1 << tag;
928 } else
929 return -EEXIST;
930 }
931 }
932
933 for (i = 0; i < n; i++) {
934 struct radix_tree_node *old = rcu_dereference_raw(slot[i]);
935 if (i) {
936 rcu_assign_pointer(slot[i], child);
937 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
938 if (tags & (1 << tag))
939 tag_clear(node, tag, offset + i);
940 } else {
941 rcu_assign_pointer(slot[i], item);
942 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
943 if (tags & (1 << tag))
944 tag_set(node, tag, offset);
945 }
946 if (radix_tree_is_internal_node(old) &&
947 !is_sibling_entry(node, old) &&
948 (old != RADIX_TREE_RETRY))
949 radix_tree_free_nodes(old);
950 if (radix_tree_exceptional_entry(old))
951 node->exceptional--;
952 }
953 if (node) {
954 node->count += n;
955 if (radix_tree_exceptional_entry(item))
956 node->exceptional += n;
957 }
958 return n;
959}
960#else
961static inline int insert_entries(struct radix_tree_node *node,
962 void __rcu **slot, void *item, unsigned order, bool replace)
963{ 705{
964 if (*slot) 706 if (*slot)
965 return -EEXIST; 707 return -EEXIST;
966 rcu_assign_pointer(*slot, item); 708 rcu_assign_pointer(*slot, item);
967 if (node) { 709 if (node) {
968 node->count++; 710 node->count++;
969 if (radix_tree_exceptional_entry(item)) 711 if (xa_is_value(item))
970 node->exceptional++; 712 node->nr_values++;
971 } 713 }
972 return 1; 714 return 1;
973} 715}
974#endif
975 716
976/** 717/**
977 * __radix_tree_insert - insert into a radix tree 718 * __radix_tree_insert - insert into a radix tree
978 * @root: radix tree root 719 * @root: radix tree root
979 * @index: index key 720 * @index: index key
980 * @order: key covers the 2^order indices around index
981 * @item: item to insert 721 * @item: item to insert
982 * 722 *
983 * Insert an item into the radix tree at position @index. 723 * Insert an item into the radix tree at position @index.
984 */ 724 */
985int __radix_tree_insert(struct radix_tree_root *root, unsigned long index, 725int radix_tree_insert(struct radix_tree_root *root, unsigned long index,
986 unsigned order, void *item) 726 void *item)
987{ 727{
988 struct radix_tree_node *node; 728 struct radix_tree_node *node;
989 void __rcu **slot; 729 void __rcu **slot;
@@ -991,11 +731,11 @@ int __radix_tree_insert(struct radix_tree_root *root, unsigned long index,
991 731
992 BUG_ON(radix_tree_is_internal_node(item)); 732 BUG_ON(radix_tree_is_internal_node(item));
993 733
994 error = __radix_tree_create(root, index, order, &node, &slot); 734 error = __radix_tree_create(root, index, &node, &slot);
995 if (error) 735 if (error)
996 return error; 736 return error;
997 737
998 error = insert_entries(node, slot, item, order, false); 738 error = insert_entries(node, slot, item, false);
999 if (error < 0) 739 if (error < 0)
1000 return error; 740 return error;
1001 741
@@ -1010,7 +750,7 @@ int __radix_tree_insert(struct radix_tree_root *root, unsigned long index,
1010 750
1011 return 0; 751 return 0;
1012} 752}
1013EXPORT_SYMBOL(__radix_tree_insert); 753EXPORT_SYMBOL(radix_tree_insert);
1014 754
1015/** 755/**
1016 * __radix_tree_lookup - lookup an item in a radix tree 756 * __radix_tree_lookup - lookup an item in a radix tree
@@ -1023,7 +763,7 @@ EXPORT_SYMBOL(__radix_tree_insert);
1023 * tree @root. 763 * tree @root.
1024 * 764 *
1025 * Until there is more than one item in the tree, no nodes are 765 * Until there is more than one item in the tree, no nodes are
1026 * allocated and @root->rnode is used as a direct slot instead of 766 * allocated and @root->xa_head is used as a direct slot instead of
1027 * pointing to a node, in which case *@nodep will be NULL. 767 * pointing to a node, in which case *@nodep will be NULL.
1028 */ 768 */
1029void *__radix_tree_lookup(const struct radix_tree_root *root, 769void *__radix_tree_lookup(const struct radix_tree_root *root,
@@ -1036,7 +776,7 @@ void *__radix_tree_lookup(const struct radix_tree_root *root,
1036 776
1037 restart: 777 restart:
1038 parent = NULL; 778 parent = NULL;
1039 slot = (void __rcu **)&root->rnode; 779 slot = (void __rcu **)&root->xa_head;
1040 radix_tree_load_root(root, &node, &maxindex); 780 radix_tree_load_root(root, &node, &maxindex);
1041 if (index > maxindex) 781 if (index > maxindex)
1042 return NULL; 782 return NULL;
@@ -1049,6 +789,8 @@ void *__radix_tree_lookup(const struct radix_tree_root *root,
1049 parent = entry_to_node(node); 789 parent = entry_to_node(node);
1050 offset = radix_tree_descend(parent, &node, index); 790 offset = radix_tree_descend(parent, &node, index);
1051 slot = parent->slots + offset; 791 slot = parent->slots + offset;
792 if (parent->shift == 0)
793 break;
1052 } 794 }
1053 795
1054 if (nodep) 796 if (nodep)
@@ -1100,36 +842,12 @@ void *radix_tree_lookup(const struct radix_tree_root *root, unsigned long index)
1100} 842}
1101EXPORT_SYMBOL(radix_tree_lookup); 843EXPORT_SYMBOL(radix_tree_lookup);
1102 844
1103static inline void replace_sibling_entries(struct radix_tree_node *node,
1104 void __rcu **slot, int count, int exceptional)
1105{
1106#ifdef CONFIG_RADIX_TREE_MULTIORDER
1107 void *ptr = node_to_entry(slot);
1108 unsigned offset = get_slot_offset(node, slot) + 1;
1109
1110 while (offset < RADIX_TREE_MAP_SIZE) {
1111 if (rcu_dereference_raw(node->slots[offset]) != ptr)
1112 break;
1113 if (count < 0) {
1114 node->slots[offset] = NULL;
1115 node->count--;
1116 }
1117 node->exceptional += exceptional;
1118 offset++;
1119 }
1120#endif
1121}
1122
1123static void replace_slot(void __rcu **slot, void *item, 845static void replace_slot(void __rcu **slot, void *item,
1124 struct radix_tree_node *node, int count, int exceptional) 846 struct radix_tree_node *node, int count, int values)
1125{ 847{
1126 if (WARN_ON_ONCE(radix_tree_is_internal_node(item))) 848 if (node && (count || values)) {
1127 return;
1128
1129 if (node && (count || exceptional)) {
1130 node->count += count; 849 node->count += count;
1131 node->exceptional += exceptional; 850 node->nr_values += values;
1132 replace_sibling_entries(node, slot, count, exceptional);
1133 } 851 }
1134 852
1135 rcu_assign_pointer(*slot, item); 853 rcu_assign_pointer(*slot, item);
@@ -1172,37 +890,31 @@ static int calculate_count(struct radix_tree_root *root,
1172 * @node: pointer to tree node 890 * @node: pointer to tree node
1173 * @slot: pointer to slot in @node 891 * @slot: pointer to slot in @node
1174 * @item: new item to store in the slot. 892 * @item: new item to store in the slot.
1175 * @update_node: callback for changing leaf nodes
1176 * 893 *
1177 * For use with __radix_tree_lookup(). Caller must hold tree write locked 894 * For use with __radix_tree_lookup(). Caller must hold tree write locked
1178 * across slot lookup and replacement. 895 * across slot lookup and replacement.
1179 */ 896 */
1180void __radix_tree_replace(struct radix_tree_root *root, 897void __radix_tree_replace(struct radix_tree_root *root,
1181 struct radix_tree_node *node, 898 struct radix_tree_node *node,
1182 void __rcu **slot, void *item, 899 void __rcu **slot, void *item)
1183 radix_tree_update_node_t update_node)
1184{ 900{
1185 void *old = rcu_dereference_raw(*slot); 901 void *old = rcu_dereference_raw(*slot);
1186 int exceptional = !!radix_tree_exceptional_entry(item) - 902 int values = !!xa_is_value(item) - !!xa_is_value(old);
1187 !!radix_tree_exceptional_entry(old);
1188 int count = calculate_count(root, node, slot, item, old); 903 int count = calculate_count(root, node, slot, item, old);
1189 904
1190 /* 905 /*
1191 * This function supports replacing exceptional entries and 906 * This function supports replacing value entries and
1192 * deleting entries, but that needs accounting against the 907 * deleting entries, but that needs accounting against the
1193 * node unless the slot is root->rnode. 908 * node unless the slot is root->xa_head.
1194 */ 909 */
1195 WARN_ON_ONCE(!node && (slot != (void __rcu **)&root->rnode) && 910 WARN_ON_ONCE(!node && (slot != (void __rcu **)&root->xa_head) &&
1196 (count || exceptional)); 911 (count || values));
1197 replace_slot(slot, item, node, count, exceptional); 912 replace_slot(slot, item, node, count, values);
1198 913
1199 if (!node) 914 if (!node)
1200 return; 915 return;
1201 916
1202 if (update_node) 917 delete_node(root, node);
1203 update_node(node);
1204
1205 delete_node(root, node, update_node);
1206} 918}
1207 919
1208/** 920/**
@@ -1211,12 +923,12 @@ void __radix_tree_replace(struct radix_tree_root *root,
1211 * @slot: pointer to slot 923 * @slot: pointer to slot
1212 * @item: new item to store in the slot. 924 * @item: new item to store in the slot.
1213 * 925 *
1214 * For use with radix_tree_lookup_slot(), radix_tree_gang_lookup_slot(), 926 * For use with radix_tree_lookup_slot() and
1215 * radix_tree_gang_lookup_tag_slot(). Caller must hold tree write locked 927 * radix_tree_gang_lookup_tag_slot(). Caller must hold tree write locked
1216 * across slot lookup and replacement. 928 * across slot lookup and replacement.
1217 * 929 *
1218 * NOTE: This cannot be used to switch between non-entries (empty slots), 930 * NOTE: This cannot be used to switch between non-entries (empty slots),
1219 * regular entries, and exceptional entries, as that requires accounting 931 * regular entries, and value entries, as that requires accounting
1220 * inside the radix tree node. When switching from one type of entry or 932 * inside the radix tree node. When switching from one type of entry or
1221 * deleting, use __radix_tree_lookup() and __radix_tree_replace() or 933 * deleting, use __radix_tree_lookup() and __radix_tree_replace() or
1222 * radix_tree_iter_replace(). 934 * radix_tree_iter_replace().
@@ -1224,7 +936,7 @@ void __radix_tree_replace(struct radix_tree_root *root,
1224void radix_tree_replace_slot(struct radix_tree_root *root, 936void radix_tree_replace_slot(struct radix_tree_root *root,
1225 void __rcu **slot, void *item) 937 void __rcu **slot, void *item)
1226{ 938{
1227 __radix_tree_replace(root, NULL, slot, item, NULL); 939 __radix_tree_replace(root, NULL, slot, item);
1228} 940}
1229EXPORT_SYMBOL(radix_tree_replace_slot); 941EXPORT_SYMBOL(radix_tree_replace_slot);
1230 942
@@ -1234,162 +946,16 @@ EXPORT_SYMBOL(radix_tree_replace_slot);
1234 * @slot: pointer to slot 946 * @slot: pointer to slot
1235 * @item: new item to store in the slot. 947 * @item: new item to store in the slot.
1236 * 948 *
1237 * For use with radix_tree_split() and radix_tree_for_each_slot(). 949 * For use with radix_tree_for_each_slot().
1238 * Caller must hold tree write locked across split and replacement. 950 * Caller must hold tree write locked.
1239 */ 951 */
1240void radix_tree_iter_replace(struct radix_tree_root *root, 952void radix_tree_iter_replace(struct radix_tree_root *root,
1241 const struct radix_tree_iter *iter, 953 const struct radix_tree_iter *iter,
1242 void __rcu **slot, void *item) 954 void __rcu **slot, void *item)
1243{ 955{
1244 __radix_tree_replace(root, iter->node, slot, item, NULL); 956 __radix_tree_replace(root, iter->node, slot, item);
1245} 957}
1246 958
1247#ifdef CONFIG_RADIX_TREE_MULTIORDER
1248/**
1249 * radix_tree_join - replace multiple entries with one multiorder entry
1250 * @root: radix tree root
1251 * @index: an index inside the new entry
1252 * @order: order of the new entry
1253 * @item: new entry
1254 *
1255 * Call this function to replace several entries with one larger entry.
1256 * The existing entries are presumed to not need freeing as a result of
1257 * this call.
1258 *
1259 * The replacement entry will have all the tags set on it that were set
1260 * on any of the entries it is replacing.
1261 */
1262int radix_tree_join(struct radix_tree_root *root, unsigned long index,
1263 unsigned order, void *item)
1264{
1265 struct radix_tree_node *node;
1266 void __rcu **slot;
1267 int error;
1268
1269 BUG_ON(radix_tree_is_internal_node(item));
1270
1271 error = __radix_tree_create(root, index, order, &node, &slot);
1272 if (!error)
1273 error = insert_entries(node, slot, item, order, true);
1274 if (error > 0)
1275 error = 0;
1276
1277 return error;
1278}
1279
1280/**
1281 * radix_tree_split - Split an entry into smaller entries
1282 * @root: radix tree root
1283 * @index: An index within the large entry
1284 * @order: Order of new entries
1285 *
1286 * Call this function as the first step in replacing a multiorder entry
1287 * with several entries of lower order. After this function returns,
1288 * loop over the relevant portion of the tree using radix_tree_for_each_slot()
1289 * and call radix_tree_iter_replace() to set up each new entry.
1290 *
1291 * The tags from this entry are replicated to all the new entries.
1292 *
1293 * The radix tree should be locked against modification during the entire
1294 * replacement operation. Lock-free lookups will see RADIX_TREE_RETRY which
1295 * should prompt RCU walkers to restart the lookup from the root.
1296 */
1297int radix_tree_split(struct radix_tree_root *root, unsigned long index,
1298 unsigned order)
1299{
1300 struct radix_tree_node *parent, *node, *child;
1301 void __rcu **slot;
1302 unsigned int offset, end;
1303 unsigned n, tag, tags = 0;
1304 gfp_t gfp = root_gfp_mask(root);
1305
1306 if (!__radix_tree_lookup(root, index, &parent, &slot))
1307 return -ENOENT;
1308 if (!parent)
1309 return -ENOENT;
1310
1311 offset = get_slot_offset(parent, slot);
1312
1313 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
1314 if (tag_get(parent, tag, offset))
1315 tags |= 1 << tag;
1316
1317 for (end = offset + 1; end < RADIX_TREE_MAP_SIZE; end++) {
1318 if (!is_sibling_entry(parent,
1319 rcu_dereference_raw(parent->slots[end])))
1320 break;
1321 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
1322 if (tags & (1 << tag))
1323 tag_set(parent, tag, end);
1324 /* rcu_assign_pointer ensures tags are set before RETRY */
1325 rcu_assign_pointer(parent->slots[end], RADIX_TREE_RETRY);
1326 }
1327 rcu_assign_pointer(parent->slots[offset], RADIX_TREE_RETRY);
1328 parent->exceptional -= (end - offset);
1329
1330 if (order == parent->shift)
1331 return 0;
1332 if (order > parent->shift) {
1333 while (offset < end)
1334 offset += insert_entries(parent, &parent->slots[offset],
1335 RADIX_TREE_RETRY, order, true);
1336 return 0;
1337 }
1338
1339 node = parent;
1340
1341 for (;;) {
1342 if (node->shift > order) {
1343 child = radix_tree_node_alloc(gfp, node, root,
1344 node->shift - RADIX_TREE_MAP_SHIFT,
1345 offset, 0, 0);
1346 if (!child)
1347 goto nomem;
1348 if (node != parent) {
1349 node->count++;
1350 rcu_assign_pointer(node->slots[offset],
1351 node_to_entry(child));
1352 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
1353 if (tags & (1 << tag))
1354 tag_set(node, tag, offset);
1355 }
1356
1357 node = child;
1358 offset = 0;
1359 continue;
1360 }
1361
1362 n = insert_entries(node, &node->slots[offset],
1363 RADIX_TREE_RETRY, order, false);
1364 BUG_ON(n > RADIX_TREE_MAP_SIZE);
1365
1366 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
1367 if (tags & (1 << tag))
1368 tag_set(node, tag, offset);
1369 offset += n;
1370
1371 while (offset == RADIX_TREE_MAP_SIZE) {
1372 if (node == parent)
1373 break;
1374 offset = node->offset;
1375 child = node;
1376 node = node->parent;
1377 rcu_assign_pointer(node->slots[offset],
1378 node_to_entry(child));
1379 offset++;
1380 }
1381 if ((node == parent) && (offset == end))
1382 return 0;
1383 }
1384
1385 nomem:
1386 /* Shouldn't happen; did user forget to preload? */
1387 /* TODO: free all the allocated nodes */
1388 WARN_ON(1);
1389 return -ENOMEM;
1390}
1391#endif
1392
1393static void node_tag_set(struct radix_tree_root *root, 959static void node_tag_set(struct radix_tree_root *root,
1394 struct radix_tree_node *node, 960 struct radix_tree_node *node,
1395 unsigned int tag, unsigned int offset) 961 unsigned int tag, unsigned int offset)
@@ -1447,18 +1013,6 @@ void *radix_tree_tag_set(struct radix_tree_root *root,
1447} 1013}
1448EXPORT_SYMBOL(radix_tree_tag_set); 1014EXPORT_SYMBOL(radix_tree_tag_set);
1449 1015
1450/**
1451 * radix_tree_iter_tag_set - set a tag on the current iterator entry
1452 * @root: radix tree root
1453 * @iter: iterator state
1454 * @tag: tag to set
1455 */
1456void radix_tree_iter_tag_set(struct radix_tree_root *root,
1457 const struct radix_tree_iter *iter, unsigned int tag)
1458{
1459 node_tag_set(root, iter->node, tag, iter_offset(iter));
1460}
1461
1462static void node_tag_clear(struct radix_tree_root *root, 1016static void node_tag_clear(struct radix_tree_root *root,
1463 struct radix_tree_node *node, 1017 struct radix_tree_node *node,
1464 unsigned int tag, unsigned int offset) 1018 unsigned int tag, unsigned int offset)
@@ -1574,14 +1128,6 @@ int radix_tree_tag_get(const struct radix_tree_root *root,
1574} 1128}
1575EXPORT_SYMBOL(radix_tree_tag_get); 1129EXPORT_SYMBOL(radix_tree_tag_get);
1576 1130
1577static inline void __set_iter_shift(struct radix_tree_iter *iter,
1578 unsigned int shift)
1579{
1580#ifdef CONFIG_RADIX_TREE_MULTIORDER
1581 iter->shift = shift;
1582#endif
1583}
1584
1585/* Construct iter->tags bit-mask from node->tags[tag] array */ 1131/* Construct iter->tags bit-mask from node->tags[tag] array */
1586static void set_iter_tags(struct radix_tree_iter *iter, 1132static void set_iter_tags(struct radix_tree_iter *iter,
1587 struct radix_tree_node *node, unsigned offset, 1133 struct radix_tree_node *node, unsigned offset,
@@ -1608,92 +1154,11 @@ static void set_iter_tags(struct radix_tree_iter *iter,
1608 } 1154 }
1609} 1155}
1610 1156
1611#ifdef CONFIG_RADIX_TREE_MULTIORDER
1612static void __rcu **skip_siblings(struct radix_tree_node **nodep,
1613 void __rcu **slot, struct radix_tree_iter *iter)
1614{
1615 while (iter->index < iter->next_index) {
1616 *nodep = rcu_dereference_raw(*slot);
1617 if (*nodep && !is_sibling_entry(iter->node, *nodep))
1618 return slot;
1619 slot++;
1620 iter->index = __radix_tree_iter_add(iter, 1);
1621 iter->tags >>= 1;
1622 }
1623
1624 *nodep = NULL;
1625 return NULL;
1626}
1627
1628void __rcu **__radix_tree_next_slot(void __rcu **slot,
1629 struct radix_tree_iter *iter, unsigned flags)
1630{
1631 unsigned tag = flags & RADIX_TREE_ITER_TAG_MASK;
1632 struct radix_tree_node *node;
1633
1634 slot = skip_siblings(&node, slot, iter);
1635
1636 while (radix_tree_is_internal_node(node)) {
1637 unsigned offset;
1638 unsigned long next_index;
1639
1640 if (node == RADIX_TREE_RETRY)
1641 return slot;
1642 node = entry_to_node(node);
1643 iter->node = node;
1644 iter->shift = node->shift;
1645
1646 if (flags & RADIX_TREE_ITER_TAGGED) {
1647 offset = radix_tree_find_next_bit(node, tag, 0);
1648 if (offset == RADIX_TREE_MAP_SIZE)
1649 return NULL;
1650 slot = &node->slots[offset];
1651 iter->index = __radix_tree_iter_add(iter, offset);
1652 set_iter_tags(iter, node, offset, tag);
1653 node = rcu_dereference_raw(*slot);
1654 } else {
1655 offset = 0;
1656 slot = &node->slots[0];
1657 for (;;) {
1658 node = rcu_dereference_raw(*slot);
1659 if (node)
1660 break;
1661 slot++;
1662 offset++;
1663 if (offset == RADIX_TREE_MAP_SIZE)
1664 return NULL;
1665 }
1666 iter->index = __radix_tree_iter_add(iter, offset);
1667 }
1668 if ((flags & RADIX_TREE_ITER_CONTIG) && (offset > 0))
1669 goto none;
1670 next_index = (iter->index | shift_maxindex(iter->shift)) + 1;
1671 if (next_index < iter->next_index)
1672 iter->next_index = next_index;
1673 }
1674
1675 return slot;
1676 none:
1677 iter->next_index = 0;
1678 return NULL;
1679}
1680EXPORT_SYMBOL(__radix_tree_next_slot);
1681#else
1682static void __rcu **skip_siblings(struct radix_tree_node **nodep,
1683 void __rcu **slot, struct radix_tree_iter *iter)
1684{
1685 return slot;
1686}
1687#endif
1688
1689void __rcu **radix_tree_iter_resume(void __rcu **slot, 1157void __rcu **radix_tree_iter_resume(void __rcu **slot,
1690 struct radix_tree_iter *iter) 1158 struct radix_tree_iter *iter)
1691{ 1159{
1692 struct radix_tree_node *node;
1693
1694 slot++; 1160 slot++;
1695 iter->index = __radix_tree_iter_add(iter, 1); 1161 iter->index = __radix_tree_iter_add(iter, 1);
1696 skip_siblings(&node, slot, iter);
1697 iter->next_index = iter->index; 1162 iter->next_index = iter->index;
1698 iter->tags = 0; 1163 iter->tags = 0;
1699 return NULL; 1164 return NULL;
@@ -1744,8 +1209,7 @@ void __rcu **radix_tree_next_chunk(const struct radix_tree_root *root,
1744 iter->next_index = maxindex + 1; 1209 iter->next_index = maxindex + 1;
1745 iter->tags = 1; 1210 iter->tags = 1;
1746 iter->node = NULL; 1211 iter->node = NULL;
1747 __set_iter_shift(iter, 0); 1212 return (void __rcu **)&root->xa_head;
1748 return (void __rcu **)&root->rnode;
1749 } 1213 }
1750 1214
1751 do { 1215 do {
@@ -1765,8 +1229,6 @@ void __rcu **radix_tree_next_chunk(const struct radix_tree_root *root,
1765 while (++offset < RADIX_TREE_MAP_SIZE) { 1229 while (++offset < RADIX_TREE_MAP_SIZE) {
1766 void *slot = rcu_dereference_raw( 1230 void *slot = rcu_dereference_raw(
1767 node->slots[offset]); 1231 node->slots[offset]);
1768 if (is_sibling_entry(node, slot))
1769 continue;
1770 if (slot) 1232 if (slot)
1771 break; 1233 break;
1772 } 1234 }
@@ -1784,13 +1246,12 @@ void __rcu **radix_tree_next_chunk(const struct radix_tree_root *root,
1784 goto restart; 1246 goto restart;
1785 if (child == RADIX_TREE_RETRY) 1247 if (child == RADIX_TREE_RETRY)
1786 break; 1248 break;
1787 } while (radix_tree_is_internal_node(child)); 1249 } while (node->shift && radix_tree_is_internal_node(child));
1788 1250
1789 /* Update the iterator state */ 1251 /* Update the iterator state */
1790 iter->index = (index &~ node_maxindex(node)) | (offset << node->shift); 1252 iter->index = (index &~ node_maxindex(node)) | offset;
1791 iter->next_index = (index | node_maxindex(node)) + 1; 1253 iter->next_index = (index | node_maxindex(node)) + 1;
1792 iter->node = node; 1254 iter->node = node;
1793 __set_iter_shift(iter, node->shift);
1794 1255
1795 if (flags & RADIX_TREE_ITER_TAGGED) 1256 if (flags & RADIX_TREE_ITER_TAGGED)
1796 set_iter_tags(iter, node, offset, tag); 1257 set_iter_tags(iter, node, offset, tag);
@@ -1847,48 +1308,6 @@ radix_tree_gang_lookup(const struct radix_tree_root *root, void **results,
1847EXPORT_SYMBOL(radix_tree_gang_lookup); 1308EXPORT_SYMBOL(radix_tree_gang_lookup);
1848 1309
1849/** 1310/**
1850 * radix_tree_gang_lookup_slot - perform multiple slot lookup on radix tree
1851 * @root: radix tree root
1852 * @results: where the results of the lookup are placed
1853 * @indices: where their indices should be placed (but usually NULL)
1854 * @first_index: start the lookup from this key
1855 * @max_items: place up to this many items at *results
1856 *
1857 * Performs an index-ascending scan of the tree for present items. Places
1858 * their slots at *@results and returns the number of items which were
1859 * placed at *@results.
1860 *
1861 * The implementation is naive.
1862 *
1863 * Like radix_tree_gang_lookup as far as RCU and locking goes. Slots must
1864 * be dereferenced with radix_tree_deref_slot, and if using only RCU
1865 * protection, radix_tree_deref_slot may fail requiring a retry.
1866 */
1867unsigned int
1868radix_tree_gang_lookup_slot(const struct radix_tree_root *root,
1869 void __rcu ***results, unsigned long *indices,
1870 unsigned long first_index, unsigned int max_items)
1871{
1872 struct radix_tree_iter iter;
1873 void __rcu **slot;
1874 unsigned int ret = 0;
1875
1876 if (unlikely(!max_items))
1877 return 0;
1878
1879 radix_tree_for_each_slot(slot, root, &iter, first_index) {
1880 results[ret] = slot;
1881 if (indices)
1882 indices[ret] = iter.index;
1883 if (++ret == max_items)
1884 break;
1885 }
1886
1887 return ret;
1888}
1889EXPORT_SYMBOL(radix_tree_gang_lookup_slot);
1890
1891/**
1892 * radix_tree_gang_lookup_tag - perform multiple lookup on a radix tree 1311 * radix_tree_gang_lookup_tag - perform multiple lookup on a radix tree
1893 * based on a tag 1312 * based on a tag
1894 * @root: radix tree root 1313 * @root: radix tree root
@@ -1964,28 +1383,11 @@ radix_tree_gang_lookup_tag_slot(const struct radix_tree_root *root,
1964} 1383}
1965EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot); 1384EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot);
1966 1385
1967/**
1968 * __radix_tree_delete_node - try to free node after clearing a slot
1969 * @root: radix tree root
1970 * @node: node containing @index
1971 * @update_node: callback for changing leaf nodes
1972 *
1973 * After clearing the slot at @index in @node from radix tree
1974 * rooted at @root, call this function to attempt freeing the
1975 * node and shrinking the tree.
1976 */
1977void __radix_tree_delete_node(struct radix_tree_root *root,
1978 struct radix_tree_node *node,
1979 radix_tree_update_node_t update_node)
1980{
1981 delete_node(root, node, update_node);
1982}
1983
1984static bool __radix_tree_delete(struct radix_tree_root *root, 1386static bool __radix_tree_delete(struct radix_tree_root *root,
1985 struct radix_tree_node *node, void __rcu **slot) 1387 struct radix_tree_node *node, void __rcu **slot)
1986{ 1388{
1987 void *old = rcu_dereference_raw(*slot); 1389 void *old = rcu_dereference_raw(*slot);
1988 int exceptional = radix_tree_exceptional_entry(old) ? -1 : 0; 1390 int values = xa_is_value(old) ? -1 : 0;
1989 unsigned offset = get_slot_offset(node, slot); 1391 unsigned offset = get_slot_offset(node, slot);
1990 int tag; 1392 int tag;
1991 1393
@@ -1995,8 +1397,8 @@ static bool __radix_tree_delete(struct radix_tree_root *root,
1995 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) 1397 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
1996 node_tag_clear(root, node, tag, offset); 1398 node_tag_clear(root, node, tag, offset);
1997 1399
1998 replace_slot(slot, NULL, node, -1, exceptional); 1400 replace_slot(slot, NULL, node, -1, values);
1999 return node && delete_node(root, node, NULL); 1401 return node && delete_node(root, node);
2000} 1402}
2001 1403
2002/** 1404/**
@@ -2068,19 +1470,6 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
2068} 1470}
2069EXPORT_SYMBOL(radix_tree_delete); 1471EXPORT_SYMBOL(radix_tree_delete);
2070 1472
2071void radix_tree_clear_tags(struct radix_tree_root *root,
2072 struct radix_tree_node *node,
2073 void __rcu **slot)
2074{
2075 if (node) {
2076 unsigned int tag, offset = get_slot_offset(node, slot);
2077 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
2078 node_tag_clear(root, node, tag, offset);
2079 } else {
2080 root_tag_clear_all(root);
2081 }
2082}
2083
2084/** 1473/**
2085 * radix_tree_tagged - test whether any items in the tree are tagged 1474 * radix_tree_tagged - test whether any items in the tree are tagged
2086 * @root: radix tree root 1475 * @root: radix tree root
@@ -2106,33 +1495,12 @@ void idr_preload(gfp_t gfp_mask)
2106} 1495}
2107EXPORT_SYMBOL(idr_preload); 1496EXPORT_SYMBOL(idr_preload);
2108 1497
2109int ida_pre_get(struct ida *ida, gfp_t gfp)
2110{
2111 /*
2112 * The IDA API has no preload_end() equivalent. Instead,
2113 * ida_get_new() can return -EAGAIN, prompting the caller
2114 * to return to the ida_pre_get() step.
2115 */
2116 if (!__radix_tree_preload(gfp, IDA_PRELOAD_SIZE))
2117 preempt_enable();
2118
2119 if (!this_cpu_read(ida_bitmap)) {
2120 struct ida_bitmap *bitmap = kzalloc(sizeof(*bitmap), gfp);
2121 if (!bitmap)
2122 return 0;
2123 if (this_cpu_cmpxchg(ida_bitmap, NULL, bitmap))
2124 kfree(bitmap);
2125 }
2126
2127 return 1;
2128}
2129
2130void __rcu **idr_get_free(struct radix_tree_root *root, 1498void __rcu **idr_get_free(struct radix_tree_root *root,
2131 struct radix_tree_iter *iter, gfp_t gfp, 1499 struct radix_tree_iter *iter, gfp_t gfp,
2132 unsigned long max) 1500 unsigned long max)
2133{ 1501{
2134 struct radix_tree_node *node = NULL, *child; 1502 struct radix_tree_node *node = NULL, *child;
2135 void __rcu **slot = (void __rcu **)&root->rnode; 1503 void __rcu **slot = (void __rcu **)&root->xa_head;
2136 unsigned long maxindex, start = iter->next_index; 1504 unsigned long maxindex, start = iter->next_index;
2137 unsigned int shift, offset = 0; 1505 unsigned int shift, offset = 0;
2138 1506
@@ -2148,8 +1516,10 @@ void __rcu **idr_get_free(struct radix_tree_root *root,
2148 if (error < 0) 1516 if (error < 0)
2149 return ERR_PTR(error); 1517 return ERR_PTR(error);
2150 shift = error; 1518 shift = error;
2151 child = rcu_dereference_raw(root->rnode); 1519 child = rcu_dereference_raw(root->xa_head);
2152 } 1520 }
1521 if (start == 0 && shift == 0)
1522 shift = RADIX_TREE_MAP_SHIFT;
2153 1523
2154 while (shift) { 1524 while (shift) {
2155 shift -= RADIX_TREE_MAP_SHIFT; 1525 shift -= RADIX_TREE_MAP_SHIFT;
@@ -2192,7 +1562,6 @@ void __rcu **idr_get_free(struct radix_tree_root *root,
2192 else 1562 else
2193 iter->next_index = 1; 1563 iter->next_index = 1;
2194 iter->node = node; 1564 iter->node = node;
2195 __set_iter_shift(iter, shift);
2196 set_iter_tags(iter, node, offset, IDR_FREE); 1565 set_iter_tags(iter, node, offset, IDR_FREE);
2197 1566
2198 return slot; 1567 return slot;
@@ -2211,10 +1580,10 @@ void __rcu **idr_get_free(struct radix_tree_root *root,
2211 */ 1580 */
2212void idr_destroy(struct idr *idr) 1581void idr_destroy(struct idr *idr)
2213{ 1582{
2214 struct radix_tree_node *node = rcu_dereference_raw(idr->idr_rt.rnode); 1583 struct radix_tree_node *node = rcu_dereference_raw(idr->idr_rt.xa_head);
2215 if (radix_tree_is_internal_node(node)) 1584 if (radix_tree_is_internal_node(node))
2216 radix_tree_free_nodes(node); 1585 radix_tree_free_nodes(node);
2217 idr->idr_rt.rnode = NULL; 1586 idr->idr_rt.xa_head = NULL;
2218 root_tag_set(&idr->idr_rt, IDR_FREE); 1587 root_tag_set(&idr->idr_rt, IDR_FREE);
2219} 1588}
2220EXPORT_SYMBOL(idr_destroy); 1589EXPORT_SYMBOL(idr_destroy);
@@ -2228,31 +1597,6 @@ radix_tree_node_ctor(void *arg)
2228 INIT_LIST_HEAD(&node->private_list); 1597 INIT_LIST_HEAD(&node->private_list);
2229} 1598}
2230 1599
2231static __init unsigned long __maxindex(unsigned int height)
2232{
2233 unsigned int width = height * RADIX_TREE_MAP_SHIFT;
2234 int shift = RADIX_TREE_INDEX_BITS - width;
2235
2236 if (shift < 0)
2237 return ~0UL;
2238 if (shift >= BITS_PER_LONG)
2239 return 0UL;
2240 return ~0UL >> shift;
2241}
2242
2243static __init void radix_tree_init_maxnodes(void)
2244{
2245 unsigned long height_to_maxindex[RADIX_TREE_MAX_PATH + 1];
2246 unsigned int i, j;
2247
2248 for (i = 0; i < ARRAY_SIZE(height_to_maxindex); i++)
2249 height_to_maxindex[i] = __maxindex(i);
2250 for (i = 0; i < ARRAY_SIZE(height_to_maxnodes); i++) {
2251 for (j = i; j > 0; j--)
2252 height_to_maxnodes[i] += height_to_maxindex[j - 1] + 1;
2253 }
2254}
2255
2256static int radix_tree_cpu_dead(unsigned int cpu) 1600static int radix_tree_cpu_dead(unsigned int cpu)
2257{ 1601{
2258 struct radix_tree_preload *rtp; 1602 struct radix_tree_preload *rtp;
@@ -2266,8 +1610,6 @@ static int radix_tree_cpu_dead(unsigned int cpu)
2266 kmem_cache_free(radix_tree_node_cachep, node); 1610 kmem_cache_free(radix_tree_node_cachep, node);
2267 rtp->nr--; 1611 rtp->nr--;
2268 } 1612 }
2269 kfree(per_cpu(ida_bitmap, cpu));
2270 per_cpu(ida_bitmap, cpu) = NULL;
2271 return 0; 1613 return 0;
2272} 1614}
2273 1615
@@ -2277,11 +1619,11 @@ void __init radix_tree_init(void)
2277 1619
2278 BUILD_BUG_ON(RADIX_TREE_MAX_TAGS + __GFP_BITS_SHIFT > 32); 1620 BUILD_BUG_ON(RADIX_TREE_MAX_TAGS + __GFP_BITS_SHIFT > 32);
2279 BUILD_BUG_ON(ROOT_IS_IDR & ~GFP_ZONEMASK); 1621 BUILD_BUG_ON(ROOT_IS_IDR & ~GFP_ZONEMASK);
1622 BUILD_BUG_ON(XA_CHUNK_SIZE > 255);
2280 radix_tree_node_cachep = kmem_cache_create("radix_tree_node", 1623 radix_tree_node_cachep = kmem_cache_create("radix_tree_node",
2281 sizeof(struct radix_tree_node), 0, 1624 sizeof(struct radix_tree_node), 0,
2282 SLAB_PANIC | SLAB_RECLAIM_ACCOUNT, 1625 SLAB_PANIC | SLAB_RECLAIM_ACCOUNT,
2283 radix_tree_node_ctor); 1626 radix_tree_node_ctor);
2284 radix_tree_init_maxnodes();
2285 ret = cpuhp_setup_state_nocalls(CPUHP_RADIX_DEAD, "lib/radix:dead", 1627 ret = cpuhp_setup_state_nocalls(CPUHP_RADIX_DEAD, "lib/radix:dead",
2286 NULL, radix_tree_cpu_dead); 1628 NULL, radix_tree_cpu_dead);
2287 WARN_ON(ret < 0); 1629 WARN_ON(ret < 0);
diff --git a/lib/test_xarray.c b/lib/test_xarray.c
new file mode 100644
index 000000000000..aa47754150ce
--- /dev/null
+++ b/lib/test_xarray.c
@@ -0,0 +1,1238 @@
1// SPDX-License-Identifier: GPL-2.0+
2/*
3 * test_xarray.c: Test the XArray API
4 * Copyright (c) 2017-2018 Microsoft Corporation
5 * Author: Matthew Wilcox <willy@infradead.org>
6 */
7
8#include <linux/xarray.h>
9#include <linux/module.h>
10
11static unsigned int tests_run;
12static unsigned int tests_passed;
13
14#ifndef XA_DEBUG
15# ifdef __KERNEL__
16void xa_dump(const struct xarray *xa) { }
17# endif
18#undef XA_BUG_ON
19#define XA_BUG_ON(xa, x) do { \
20 tests_run++; \
21 if (x) { \
22 printk("BUG at %s:%d\n", __func__, __LINE__); \
23 xa_dump(xa); \
24 dump_stack(); \
25 } else { \
26 tests_passed++; \
27 } \
28} while (0)
29#endif
30
31static void *xa_store_index(struct xarray *xa, unsigned long index, gfp_t gfp)
32{
33 return xa_store(xa, index, xa_mk_value(index & LONG_MAX), gfp);
34}
35
36static void xa_alloc_index(struct xarray *xa, unsigned long index, gfp_t gfp)
37{
38 u32 id = 0;
39
40 XA_BUG_ON(xa, xa_alloc(xa, &id, UINT_MAX, xa_mk_value(index & LONG_MAX),
41 gfp) != 0);
42 XA_BUG_ON(xa, id != index);
43}
44
45static void xa_erase_index(struct xarray *xa, unsigned long index)
46{
47 XA_BUG_ON(xa, xa_erase(xa, index) != xa_mk_value(index & LONG_MAX));
48 XA_BUG_ON(xa, xa_load(xa, index) != NULL);
49}
50
51/*
52 * If anyone needs this, please move it to xarray.c. We have no current
53 * users outside the test suite because all current multislot users want
54 * to use the advanced API.
55 */
56static void *xa_store_order(struct xarray *xa, unsigned long index,
57 unsigned order, void *entry, gfp_t gfp)
58{
59 XA_STATE_ORDER(xas, xa, index, order);
60 void *curr;
61
62 do {
63 xas_lock(&xas);
64 curr = xas_store(&xas, entry);
65 xas_unlock(&xas);
66 } while (xas_nomem(&xas, gfp));
67
68 return curr;
69}
70
71static noinline void check_xa_err(struct xarray *xa)
72{
73 XA_BUG_ON(xa, xa_err(xa_store_index(xa, 0, GFP_NOWAIT)) != 0);
74 XA_BUG_ON(xa, xa_err(xa_erase(xa, 0)) != 0);
75#ifndef __KERNEL__
76 /* The kernel does not fail GFP_NOWAIT allocations */
77 XA_BUG_ON(xa, xa_err(xa_store_index(xa, 1, GFP_NOWAIT)) != -ENOMEM);
78 XA_BUG_ON(xa, xa_err(xa_store_index(xa, 1, GFP_NOWAIT)) != -ENOMEM);
79#endif
80 XA_BUG_ON(xa, xa_err(xa_store_index(xa, 1, GFP_KERNEL)) != 0);
81 XA_BUG_ON(xa, xa_err(xa_store(xa, 1, xa_mk_value(0), GFP_KERNEL)) != 0);
82 XA_BUG_ON(xa, xa_err(xa_erase(xa, 1)) != 0);
83// kills the test-suite :-(
84// XA_BUG_ON(xa, xa_err(xa_store(xa, 0, xa_mk_internal(0), 0)) != -EINVAL);
85}
86
87static noinline void check_xas_retry(struct xarray *xa)
88{
89 XA_STATE(xas, xa, 0);
90 void *entry;
91
92 xa_store_index(xa, 0, GFP_KERNEL);
93 xa_store_index(xa, 1, GFP_KERNEL);
94
95 rcu_read_lock();
96 XA_BUG_ON(xa, xas_find(&xas, ULONG_MAX) != xa_mk_value(0));
97 xa_erase_index(xa, 1);
98 XA_BUG_ON(xa, !xa_is_retry(xas_reload(&xas)));
99 XA_BUG_ON(xa, xas_retry(&xas, NULL));
100 XA_BUG_ON(xa, xas_retry(&xas, xa_mk_value(0)));
101 xas_reset(&xas);
102 XA_BUG_ON(xa, xas.xa_node != XAS_RESTART);
103 XA_BUG_ON(xa, xas_next_entry(&xas, ULONG_MAX) != xa_mk_value(0));
104 XA_BUG_ON(xa, xas.xa_node != NULL);
105
106 XA_BUG_ON(xa, xa_store_index(xa, 1, GFP_KERNEL) != NULL);
107 XA_BUG_ON(xa, !xa_is_internal(xas_reload(&xas)));
108 xas.xa_node = XAS_RESTART;
109 XA_BUG_ON(xa, xas_next_entry(&xas, ULONG_MAX) != xa_mk_value(0));
110 rcu_read_unlock();
111
112 /* Make sure we can iterate through retry entries */
113 xas_lock(&xas);
114 xas_set(&xas, 0);
115 xas_store(&xas, XA_RETRY_ENTRY);
116 xas_set(&xas, 1);
117 xas_store(&xas, XA_RETRY_ENTRY);
118
119 xas_set(&xas, 0);
120 xas_for_each(&xas, entry, ULONG_MAX) {
121 xas_store(&xas, xa_mk_value(xas.xa_index));
122 }
123 xas_unlock(&xas);
124
125 xa_erase_index(xa, 0);
126 xa_erase_index(xa, 1);
127}
128
129static noinline void check_xa_load(struct xarray *xa)
130{
131 unsigned long i, j;
132
133 for (i = 0; i < 1024; i++) {
134 for (j = 0; j < 1024; j++) {
135 void *entry = xa_load(xa, j);
136 if (j < i)
137 XA_BUG_ON(xa, xa_to_value(entry) != j);
138 else
139 XA_BUG_ON(xa, entry);
140 }
141 XA_BUG_ON(xa, xa_store_index(xa, i, GFP_KERNEL) != NULL);
142 }
143
144 for (i = 0; i < 1024; i++) {
145 for (j = 0; j < 1024; j++) {
146 void *entry = xa_load(xa, j);
147 if (j >= i)
148 XA_BUG_ON(xa, xa_to_value(entry) != j);
149 else
150 XA_BUG_ON(xa, entry);
151 }
152 xa_erase_index(xa, i);
153 }
154 XA_BUG_ON(xa, !xa_empty(xa));
155}
156
157static noinline void check_xa_mark_1(struct xarray *xa, unsigned long index)
158{
159 unsigned int order;
160 unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 8 : 1;
161
162 /* NULL elements have no marks set */
163 XA_BUG_ON(xa, xa_get_mark(xa, index, XA_MARK_0));
164 xa_set_mark(xa, index, XA_MARK_0);
165 XA_BUG_ON(xa, xa_get_mark(xa, index, XA_MARK_0));
166
167 /* Storing a pointer will not make a mark appear */
168 XA_BUG_ON(xa, xa_store_index(xa, index, GFP_KERNEL) != NULL);
169 XA_BUG_ON(xa, xa_get_mark(xa, index, XA_MARK_0));
170 xa_set_mark(xa, index, XA_MARK_0);
171 XA_BUG_ON(xa, !xa_get_mark(xa, index, XA_MARK_0));
172
173 /* Setting one mark will not set another mark */
174 XA_BUG_ON(xa, xa_get_mark(xa, index + 1, XA_MARK_0));
175 XA_BUG_ON(xa, xa_get_mark(xa, index, XA_MARK_1));
176
177 /* Storing NULL clears marks, and they can't be set again */
178 xa_erase_index(xa, index);
179 XA_BUG_ON(xa, !xa_empty(xa));
180 XA_BUG_ON(xa, xa_get_mark(xa, index, XA_MARK_0));
181 xa_set_mark(xa, index, XA_MARK_0);
182 XA_BUG_ON(xa, xa_get_mark(xa, index, XA_MARK_0));
183
184 /*
185 * Storing a multi-index entry over entries with marks gives the
186 * entire entry the union of the marks
187 */
188 BUG_ON((index % 4) != 0);
189 for (order = 2; order < max_order; order++) {
190 unsigned long base = round_down(index, 1UL << order);
191 unsigned long next = base + (1UL << order);
192 unsigned long i;
193
194 XA_BUG_ON(xa, xa_store_index(xa, index + 1, GFP_KERNEL));
195 xa_set_mark(xa, index + 1, XA_MARK_0);
196 XA_BUG_ON(xa, xa_store_index(xa, index + 2, GFP_KERNEL));
197 xa_set_mark(xa, index + 2, XA_MARK_1);
198 XA_BUG_ON(xa, xa_store_index(xa, next, GFP_KERNEL));
199 xa_store_order(xa, index, order, xa_mk_value(index),
200 GFP_KERNEL);
201 for (i = base; i < next; i++) {
202 XA_STATE(xas, xa, i);
203 unsigned int seen = 0;
204 void *entry;
205
206 XA_BUG_ON(xa, !xa_get_mark(xa, i, XA_MARK_0));
207 XA_BUG_ON(xa, !xa_get_mark(xa, i, XA_MARK_1));
208 XA_BUG_ON(xa, xa_get_mark(xa, i, XA_MARK_2));
209
210 /* We should see two elements in the array */
211 xas_for_each(&xas, entry, ULONG_MAX)
212 seen++;
213 XA_BUG_ON(xa, seen != 2);
214
215 /* One of which is marked */
216 xas_set(&xas, 0);
217 seen = 0;
218 xas_for_each_marked(&xas, entry, ULONG_MAX, XA_MARK_0)
219 seen++;
220 XA_BUG_ON(xa, seen != 1);
221 }
222 XA_BUG_ON(xa, xa_get_mark(xa, next, XA_MARK_0));
223 XA_BUG_ON(xa, xa_get_mark(xa, next, XA_MARK_1));
224 XA_BUG_ON(xa, xa_get_mark(xa, next, XA_MARK_2));
225 xa_erase_index(xa, index);
226 xa_erase_index(xa, next);
227 XA_BUG_ON(xa, !xa_empty(xa));
228 }
229 XA_BUG_ON(xa, !xa_empty(xa));
230}
231
232static noinline void check_xa_mark_2(struct xarray *xa)
233{
234 XA_STATE(xas, xa, 0);
235 unsigned long index;
236 unsigned int count = 0;
237 void *entry;
238
239 xa_store_index(xa, 0, GFP_KERNEL);
240 xa_set_mark(xa, 0, XA_MARK_0);
241 xas_lock(&xas);
242 xas_load(&xas);
243 xas_init_marks(&xas);
244 xas_unlock(&xas);
245 XA_BUG_ON(xa, !xa_get_mark(xa, 0, XA_MARK_0) == 0);
246
247 for (index = 3500; index < 4500; index++) {
248 xa_store_index(xa, index, GFP_KERNEL);
249 xa_set_mark(xa, index, XA_MARK_0);
250 }
251
252 xas_reset(&xas);
253 rcu_read_lock();
254 xas_for_each_marked(&xas, entry, ULONG_MAX, XA_MARK_0)
255 count++;
256 rcu_read_unlock();
257 XA_BUG_ON(xa, count != 1000);
258
259 xas_lock(&xas);
260 xas_for_each(&xas, entry, ULONG_MAX) {
261 xas_init_marks(&xas);
262 XA_BUG_ON(xa, !xa_get_mark(xa, xas.xa_index, XA_MARK_0));
263 XA_BUG_ON(xa, !xas_get_mark(&xas, XA_MARK_0));
264 }
265 xas_unlock(&xas);
266
267 xa_destroy(xa);
268}
269
270static noinline void check_xa_mark(struct xarray *xa)
271{
272 unsigned long index;
273
274 for (index = 0; index < 16384; index += 4)
275 check_xa_mark_1(xa, index);
276
277 check_xa_mark_2(xa);
278}
279
280static noinline void check_xa_shrink(struct xarray *xa)
281{
282 XA_STATE(xas, xa, 1);
283 struct xa_node *node;
284 unsigned int order;
285 unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 15 : 1;
286
287 XA_BUG_ON(xa, !xa_empty(xa));
288 XA_BUG_ON(xa, xa_store_index(xa, 0, GFP_KERNEL) != NULL);
289 XA_BUG_ON(xa, xa_store_index(xa, 1, GFP_KERNEL) != NULL);
290
291 /*
292 * Check that erasing the entry at 1 shrinks the tree and properly
293 * marks the node as being deleted.
294 */
295 xas_lock(&xas);
296 XA_BUG_ON(xa, xas_load(&xas) != xa_mk_value(1));
297 node = xas.xa_node;
298 XA_BUG_ON(xa, xa_entry_locked(xa, node, 0) != xa_mk_value(0));
299 XA_BUG_ON(xa, xas_store(&xas, NULL) != xa_mk_value(1));
300 XA_BUG_ON(xa, xa_load(xa, 1) != NULL);
301 XA_BUG_ON(xa, xas.xa_node != XAS_BOUNDS);
302 XA_BUG_ON(xa, xa_entry_locked(xa, node, 0) != XA_RETRY_ENTRY);
303 XA_BUG_ON(xa, xas_load(&xas) != NULL);
304 xas_unlock(&xas);
305 XA_BUG_ON(xa, xa_load(xa, 0) != xa_mk_value(0));
306 xa_erase_index(xa, 0);
307 XA_BUG_ON(xa, !xa_empty(xa));
308
309 for (order = 0; order < max_order; order++) {
310 unsigned long max = (1UL << order) - 1;
311 xa_store_order(xa, 0, order, xa_mk_value(0), GFP_KERNEL);
312 XA_BUG_ON(xa, xa_load(xa, max) != xa_mk_value(0));
313 XA_BUG_ON(xa, xa_load(xa, max + 1) != NULL);
314 rcu_read_lock();
315 node = xa_head(xa);
316 rcu_read_unlock();
317 XA_BUG_ON(xa, xa_store_index(xa, ULONG_MAX, GFP_KERNEL) !=
318 NULL);
319 rcu_read_lock();
320 XA_BUG_ON(xa, xa_head(xa) == node);
321 rcu_read_unlock();
322 XA_BUG_ON(xa, xa_load(xa, max + 1) != NULL);
323 xa_erase_index(xa, ULONG_MAX);
324 XA_BUG_ON(xa, xa->xa_head != node);
325 xa_erase_index(xa, 0);
326 }
327}
328
329static noinline void check_cmpxchg(struct xarray *xa)
330{
331 void *FIVE = xa_mk_value(5);
332 void *SIX = xa_mk_value(6);
333 void *LOTS = xa_mk_value(12345678);
334
335 XA_BUG_ON(xa, !xa_empty(xa));
336 XA_BUG_ON(xa, xa_store_index(xa, 12345678, GFP_KERNEL) != NULL);
337 XA_BUG_ON(xa, xa_insert(xa, 12345678, xa, GFP_KERNEL) != -EEXIST);
338 XA_BUG_ON(xa, xa_cmpxchg(xa, 12345678, SIX, FIVE, GFP_KERNEL) != LOTS);
339 XA_BUG_ON(xa, xa_cmpxchg(xa, 12345678, LOTS, FIVE, GFP_KERNEL) != LOTS);
340 XA_BUG_ON(xa, xa_cmpxchg(xa, 12345678, FIVE, LOTS, GFP_KERNEL) != FIVE);
341 XA_BUG_ON(xa, xa_cmpxchg(xa, 5, FIVE, NULL, GFP_KERNEL) != NULL);
342 XA_BUG_ON(xa, xa_cmpxchg(xa, 5, NULL, FIVE, GFP_KERNEL) != NULL);
343 xa_erase_index(xa, 12345678);
344 xa_erase_index(xa, 5);
345 XA_BUG_ON(xa, !xa_empty(xa));
346}
347
348static noinline void check_reserve(struct xarray *xa)
349{
350 void *entry;
351 unsigned long index = 0;
352
353 /* An array with a reserved entry is not empty */
354 XA_BUG_ON(xa, !xa_empty(xa));
355 xa_reserve(xa, 12345678, GFP_KERNEL);
356 XA_BUG_ON(xa, xa_empty(xa));
357 XA_BUG_ON(xa, xa_load(xa, 12345678));
358 xa_release(xa, 12345678);
359 XA_BUG_ON(xa, !xa_empty(xa));
360
361 /* Releasing a used entry does nothing */
362 xa_reserve(xa, 12345678, GFP_KERNEL);
363 XA_BUG_ON(xa, xa_store_index(xa, 12345678, GFP_NOWAIT) != NULL);
364 xa_release(xa, 12345678);
365 xa_erase_index(xa, 12345678);
366 XA_BUG_ON(xa, !xa_empty(xa));
367
368 /* cmpxchg sees a reserved entry as NULL */
369 xa_reserve(xa, 12345678, GFP_KERNEL);
370 XA_BUG_ON(xa, xa_cmpxchg(xa, 12345678, NULL, xa_mk_value(12345678),
371 GFP_NOWAIT) != NULL);
372 xa_release(xa, 12345678);
373 xa_erase_index(xa, 12345678);
374 XA_BUG_ON(xa, !xa_empty(xa));
375
376 /* Can iterate through a reserved entry */
377 xa_store_index(xa, 5, GFP_KERNEL);
378 xa_reserve(xa, 6, GFP_KERNEL);
379 xa_store_index(xa, 7, GFP_KERNEL);
380
381 xa_for_each(xa, entry, index, ULONG_MAX, XA_PRESENT) {
382 XA_BUG_ON(xa, index != 5 && index != 7);
383 }
384 xa_destroy(xa);
385}
386
387static noinline void check_xas_erase(struct xarray *xa)
388{
389 XA_STATE(xas, xa, 0);
390 void *entry;
391 unsigned long i, j;
392
393 for (i = 0; i < 200; i++) {
394 for (j = i; j < 2 * i + 17; j++) {
395 xas_set(&xas, j);
396 do {
397 xas_lock(&xas);
398 xas_store(&xas, xa_mk_value(j));
399 xas_unlock(&xas);
400 } while (xas_nomem(&xas, GFP_KERNEL));
401 }
402
403 xas_set(&xas, ULONG_MAX);
404 do {
405 xas_lock(&xas);
406 xas_store(&xas, xa_mk_value(0));
407 xas_unlock(&xas);
408 } while (xas_nomem(&xas, GFP_KERNEL));
409
410 xas_lock(&xas);
411 xas_store(&xas, NULL);
412
413 xas_set(&xas, 0);
414 j = i;
415 xas_for_each(&xas, entry, ULONG_MAX) {
416 XA_BUG_ON(xa, entry != xa_mk_value(j));
417 xas_store(&xas, NULL);
418 j++;
419 }
420 xas_unlock(&xas);
421 XA_BUG_ON(xa, !xa_empty(xa));
422 }
423}
424
425#ifdef CONFIG_XARRAY_MULTI
426static noinline void check_multi_store_1(struct xarray *xa, unsigned long index,
427 unsigned int order)
428{
429 XA_STATE(xas, xa, index);
430 unsigned long min = index & ~((1UL << order) - 1);
431 unsigned long max = min + (1UL << order);
432
433 xa_store_order(xa, index, order, xa_mk_value(index), GFP_KERNEL);
434 XA_BUG_ON(xa, xa_load(xa, min) != xa_mk_value(index));
435 XA_BUG_ON(xa, xa_load(xa, max - 1) != xa_mk_value(index));
436 XA_BUG_ON(xa, xa_load(xa, max) != NULL);
437 XA_BUG_ON(xa, xa_load(xa, min - 1) != NULL);
438
439 XA_BUG_ON(xa, xas_store(&xas, xa_mk_value(min)) != xa_mk_value(index));
440 XA_BUG_ON(xa, xa_load(xa, min) != xa_mk_value(min));
441 XA_BUG_ON(xa, xa_load(xa, max - 1) != xa_mk_value(min));
442 XA_BUG_ON(xa, xa_load(xa, max) != NULL);
443 XA_BUG_ON(xa, xa_load(xa, min - 1) != NULL);
444
445 xa_erase_index(xa, min);
446 XA_BUG_ON(xa, !xa_empty(xa));
447}
448
449static noinline void check_multi_store_2(struct xarray *xa, unsigned long index,
450 unsigned int order)
451{
452 XA_STATE(xas, xa, index);
453 xa_store_order(xa, index, order, xa_mk_value(0), GFP_KERNEL);
454
455 XA_BUG_ON(xa, xas_store(&xas, xa_mk_value(1)) != xa_mk_value(0));
456 XA_BUG_ON(xa, xas.xa_index != index);
457 XA_BUG_ON(xa, xas_store(&xas, NULL) != xa_mk_value(1));
458 XA_BUG_ON(xa, !xa_empty(xa));
459}
460#endif
461
462static noinline void check_multi_store(struct xarray *xa)
463{
464#ifdef CONFIG_XARRAY_MULTI
465 unsigned long i, j, k;
466 unsigned int max_order = (sizeof(long) == 4) ? 30 : 60;
467
468 /* Loading from any position returns the same value */
469 xa_store_order(xa, 0, 1, xa_mk_value(0), GFP_KERNEL);
470 XA_BUG_ON(xa, xa_load(xa, 0) != xa_mk_value(0));
471 XA_BUG_ON(xa, xa_load(xa, 1) != xa_mk_value(0));
472 XA_BUG_ON(xa, xa_load(xa, 2) != NULL);
473 rcu_read_lock();
474 XA_BUG_ON(xa, xa_to_node(xa_head(xa))->count != 2);
475 XA_BUG_ON(xa, xa_to_node(xa_head(xa))->nr_values != 2);
476 rcu_read_unlock();
477
478 /* Storing adjacent to the value does not alter the value */
479 xa_store(xa, 3, xa, GFP_KERNEL);
480 XA_BUG_ON(xa, xa_load(xa, 0) != xa_mk_value(0));
481 XA_BUG_ON(xa, xa_load(xa, 1) != xa_mk_value(0));
482 XA_BUG_ON(xa, xa_load(xa, 2) != NULL);
483 rcu_read_lock();
484 XA_BUG_ON(xa, xa_to_node(xa_head(xa))->count != 3);
485 XA_BUG_ON(xa, xa_to_node(xa_head(xa))->nr_values != 2);
486 rcu_read_unlock();
487
488 /* Overwriting multiple indexes works */
489 xa_store_order(xa, 0, 2, xa_mk_value(1), GFP_KERNEL);
490 XA_BUG_ON(xa, xa_load(xa, 0) != xa_mk_value(1));
491 XA_BUG_ON(xa, xa_load(xa, 1) != xa_mk_value(1));
492 XA_BUG_ON(xa, xa_load(xa, 2) != xa_mk_value(1));
493 XA_BUG_ON(xa, xa_load(xa, 3) != xa_mk_value(1));
494 XA_BUG_ON(xa, xa_load(xa, 4) != NULL);
495 rcu_read_lock();
496 XA_BUG_ON(xa, xa_to_node(xa_head(xa))->count != 4);
497 XA_BUG_ON(xa, xa_to_node(xa_head(xa))->nr_values != 4);
498 rcu_read_unlock();
499
500 /* We can erase multiple values with a single store */
501 xa_store_order(xa, 0, 63, NULL, GFP_KERNEL);
502 XA_BUG_ON(xa, !xa_empty(xa));
503
504 /* Even when the first slot is empty but the others aren't */
505 xa_store_index(xa, 1, GFP_KERNEL);
506 xa_store_index(xa, 2, GFP_KERNEL);
507 xa_store_order(xa, 0, 2, NULL, GFP_KERNEL);
508 XA_BUG_ON(xa, !xa_empty(xa));
509
510 for (i = 0; i < max_order; i++) {
511 for (j = 0; j < max_order; j++) {
512 xa_store_order(xa, 0, i, xa_mk_value(i), GFP_KERNEL);
513 xa_store_order(xa, 0, j, xa_mk_value(j), GFP_KERNEL);
514
515 for (k = 0; k < max_order; k++) {
516 void *entry = xa_load(xa, (1UL << k) - 1);
517 if ((i < k) && (j < k))
518 XA_BUG_ON(xa, entry != NULL);
519 else
520 XA_BUG_ON(xa, entry != xa_mk_value(j));
521 }
522
523 xa_erase(xa, 0);
524 XA_BUG_ON(xa, !xa_empty(xa));
525 }
526 }
527
528 for (i = 0; i < 20; i++) {
529 check_multi_store_1(xa, 200, i);
530 check_multi_store_1(xa, 0, i);
531 check_multi_store_1(xa, (1UL << i) + 1, i);
532 }
533 check_multi_store_2(xa, 4095, 9);
534#endif
535}
536
537static DEFINE_XARRAY_ALLOC(xa0);
538
539static noinline void check_xa_alloc(void)
540{
541 int i;
542 u32 id;
543
544 /* An empty array should assign 0 to the first alloc */
545 xa_alloc_index(&xa0, 0, GFP_KERNEL);
546
547 /* Erasing it should make the array empty again */
548 xa_erase_index(&xa0, 0);
549 XA_BUG_ON(&xa0, !xa_empty(&xa0));
550
551 /* And it should assign 0 again */
552 xa_alloc_index(&xa0, 0, GFP_KERNEL);
553
554 /* The next assigned ID should be 1 */
555 xa_alloc_index(&xa0, 1, GFP_KERNEL);
556 xa_erase_index(&xa0, 1);
557
558 /* Storing a value should mark it used */
559 xa_store_index(&xa0, 1, GFP_KERNEL);
560 xa_alloc_index(&xa0, 2, GFP_KERNEL);
561
562 /* If we then erase 0, it should be free */
563 xa_erase_index(&xa0, 0);
564 xa_alloc_index(&xa0, 0, GFP_KERNEL);
565
566 xa_erase_index(&xa0, 1);
567 xa_erase_index(&xa0, 2);
568
569 for (i = 1; i < 5000; i++) {
570 xa_alloc_index(&xa0, i, GFP_KERNEL);
571 }
572
573 xa_destroy(&xa0);
574
575 id = 0xfffffffeU;
576 XA_BUG_ON(&xa0, xa_alloc(&xa0, &id, UINT_MAX, xa_mk_value(0),
577 GFP_KERNEL) != 0);
578 XA_BUG_ON(&xa0, id != 0xfffffffeU);
579 XA_BUG_ON(&xa0, xa_alloc(&xa0, &id, UINT_MAX, xa_mk_value(0),
580 GFP_KERNEL) != 0);
581 XA_BUG_ON(&xa0, id != 0xffffffffU);
582 XA_BUG_ON(&xa0, xa_alloc(&xa0, &id, UINT_MAX, xa_mk_value(0),
583 GFP_KERNEL) != -ENOSPC);
584 XA_BUG_ON(&xa0, id != 0xffffffffU);
585 xa_destroy(&xa0);
586}
587
588static noinline void __check_store_iter(struct xarray *xa, unsigned long start,
589 unsigned int order, unsigned int present)
590{
591 XA_STATE_ORDER(xas, xa, start, order);
592 void *entry;
593 unsigned int count = 0;
594
595retry:
596 xas_lock(&xas);
597 xas_for_each_conflict(&xas, entry) {
598 XA_BUG_ON(xa, !xa_is_value(entry));
599 XA_BUG_ON(xa, entry < xa_mk_value(start));
600 XA_BUG_ON(xa, entry > xa_mk_value(start + (1UL << order) - 1));
601 count++;
602 }
603 xas_store(&xas, xa_mk_value(start));
604 xas_unlock(&xas);
605 if (xas_nomem(&xas, GFP_KERNEL)) {
606 count = 0;
607 goto retry;
608 }
609 XA_BUG_ON(xa, xas_error(&xas));
610 XA_BUG_ON(xa, count != present);
611 XA_BUG_ON(xa, xa_load(xa, start) != xa_mk_value(start));
612 XA_BUG_ON(xa, xa_load(xa, start + (1UL << order) - 1) !=
613 xa_mk_value(start));
614 xa_erase_index(xa, start);
615}
616
617static noinline void check_store_iter(struct xarray *xa)
618{
619 unsigned int i, j;
620 unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 20 : 1;
621
622 for (i = 0; i < max_order; i++) {
623 unsigned int min = 1 << i;
624 unsigned int max = (2 << i) - 1;
625 __check_store_iter(xa, 0, i, 0);
626 XA_BUG_ON(xa, !xa_empty(xa));
627 __check_store_iter(xa, min, i, 0);
628 XA_BUG_ON(xa, !xa_empty(xa));
629
630 xa_store_index(xa, min, GFP_KERNEL);
631 __check_store_iter(xa, min, i, 1);
632 XA_BUG_ON(xa, !xa_empty(xa));
633 xa_store_index(xa, max, GFP_KERNEL);
634 __check_store_iter(xa, min, i, 1);
635 XA_BUG_ON(xa, !xa_empty(xa));
636
637 for (j = 0; j < min; j++)
638 xa_store_index(xa, j, GFP_KERNEL);
639 __check_store_iter(xa, 0, i, min);
640 XA_BUG_ON(xa, !xa_empty(xa));
641 for (j = 0; j < min; j++)
642 xa_store_index(xa, min + j, GFP_KERNEL);
643 __check_store_iter(xa, min, i, min);
644 XA_BUG_ON(xa, !xa_empty(xa));
645 }
646#ifdef CONFIG_XARRAY_MULTI
647 xa_store_index(xa, 63, GFP_KERNEL);
648 xa_store_index(xa, 65, GFP_KERNEL);
649 __check_store_iter(xa, 64, 2, 1);
650 xa_erase_index(xa, 63);
651#endif
652 XA_BUG_ON(xa, !xa_empty(xa));
653}
654
655static noinline void check_multi_find(struct xarray *xa)
656{
657#ifdef CONFIG_XARRAY_MULTI
658 unsigned long index;
659
660 xa_store_order(xa, 12, 2, xa_mk_value(12), GFP_KERNEL);
661 XA_BUG_ON(xa, xa_store_index(xa, 16, GFP_KERNEL) != NULL);
662
663 index = 0;
664 XA_BUG_ON(xa, xa_find(xa, &index, ULONG_MAX, XA_PRESENT) !=
665 xa_mk_value(12));
666 XA_BUG_ON(xa, index != 12);
667 index = 13;
668 XA_BUG_ON(xa, xa_find(xa, &index, ULONG_MAX, XA_PRESENT) !=
669 xa_mk_value(12));
670 XA_BUG_ON(xa, (index < 12) || (index >= 16));
671 XA_BUG_ON(xa, xa_find_after(xa, &index, ULONG_MAX, XA_PRESENT) !=
672 xa_mk_value(16));
673 XA_BUG_ON(xa, index != 16);
674
675 xa_erase_index(xa, 12);
676 xa_erase_index(xa, 16);
677 XA_BUG_ON(xa, !xa_empty(xa));
678#endif
679}
680
681static noinline void check_multi_find_2(struct xarray *xa)
682{
683 unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 10 : 1;
684 unsigned int i, j;
685 void *entry;
686
687 for (i = 0; i < max_order; i++) {
688 unsigned long index = 1UL << i;
689 for (j = 0; j < index; j++) {
690 XA_STATE(xas, xa, j + index);
691 xa_store_index(xa, index - 1, GFP_KERNEL);
692 xa_store_order(xa, index, i, xa_mk_value(index),
693 GFP_KERNEL);
694 rcu_read_lock();
695 xas_for_each(&xas, entry, ULONG_MAX) {
696 xa_erase_index(xa, index);
697 }
698 rcu_read_unlock();
699 xa_erase_index(xa, index - 1);
700 XA_BUG_ON(xa, !xa_empty(xa));
701 }
702 }
703}
704
705static noinline void check_find(struct xarray *xa)
706{
707 unsigned long i, j, k;
708
709 XA_BUG_ON(xa, !xa_empty(xa));
710
711 /*
712 * Check xa_find with all pairs between 0 and 99 inclusive,
713 * starting at every index between 0 and 99
714 */
715 for (i = 0; i < 100; i++) {
716 XA_BUG_ON(xa, xa_store_index(xa, i, GFP_KERNEL) != NULL);
717 xa_set_mark(xa, i, XA_MARK_0);
718 for (j = 0; j < i; j++) {
719 XA_BUG_ON(xa, xa_store_index(xa, j, GFP_KERNEL) !=
720 NULL);
721 xa_set_mark(xa, j, XA_MARK_0);
722 for (k = 0; k < 100; k++) {
723 unsigned long index = k;
724 void *entry = xa_find(xa, &index, ULONG_MAX,
725 XA_PRESENT);
726 if (k <= j)
727 XA_BUG_ON(xa, index != j);
728 else if (k <= i)
729 XA_BUG_ON(xa, index != i);
730 else
731 XA_BUG_ON(xa, entry != NULL);
732
733 index = k;
734 entry = xa_find(xa, &index, ULONG_MAX,
735 XA_MARK_0);
736 if (k <= j)
737 XA_BUG_ON(xa, index != j);
738 else if (k <= i)
739 XA_BUG_ON(xa, index != i);
740 else
741 XA_BUG_ON(xa, entry != NULL);
742 }
743 xa_erase_index(xa, j);
744 XA_BUG_ON(xa, xa_get_mark(xa, j, XA_MARK_0));
745 XA_BUG_ON(xa, !xa_get_mark(xa, i, XA_MARK_0));
746 }
747 xa_erase_index(xa, i);
748 XA_BUG_ON(xa, xa_get_mark(xa, i, XA_MARK_0));
749 }
750 XA_BUG_ON(xa, !xa_empty(xa));
751 check_multi_find(xa);
752 check_multi_find_2(xa);
753}
754
755/* See find_swap_entry() in mm/shmem.c */
756static noinline unsigned long xa_find_entry(struct xarray *xa, void *item)
757{
758 XA_STATE(xas, xa, 0);
759 unsigned int checked = 0;
760 void *entry;
761
762 rcu_read_lock();
763 xas_for_each(&xas, entry, ULONG_MAX) {
764 if (xas_retry(&xas, entry))
765 continue;
766 if (entry == item)
767 break;
768 checked++;
769 if ((checked % 4) != 0)
770 continue;
771 xas_pause(&xas);
772 }
773 rcu_read_unlock();
774
775 return entry ? xas.xa_index : -1;
776}
777
778static noinline void check_find_entry(struct xarray *xa)
779{
780#ifdef CONFIG_XARRAY_MULTI
781 unsigned int order;
782 unsigned long offset, index;
783
784 for (order = 0; order < 20; order++) {
785 for (offset = 0; offset < (1UL << (order + 3));
786 offset += (1UL << order)) {
787 for (index = 0; index < (1UL << (order + 5));
788 index += (1UL << order)) {
789 xa_store_order(xa, index, order,
790 xa_mk_value(index), GFP_KERNEL);
791 XA_BUG_ON(xa, xa_load(xa, index) !=
792 xa_mk_value(index));
793 XA_BUG_ON(xa, xa_find_entry(xa,
794 xa_mk_value(index)) != index);
795 }
796 XA_BUG_ON(xa, xa_find_entry(xa, xa) != -1);
797 xa_destroy(xa);
798 }
799 }
800#endif
801
802 XA_BUG_ON(xa, xa_find_entry(xa, xa) != -1);
803 xa_store_index(xa, ULONG_MAX, GFP_KERNEL);
804 XA_BUG_ON(xa, xa_find_entry(xa, xa) != -1);
805 XA_BUG_ON(xa, xa_find_entry(xa, xa_mk_value(LONG_MAX)) != -1);
806 xa_erase_index(xa, ULONG_MAX);
807 XA_BUG_ON(xa, !xa_empty(xa));
808}
809
810static noinline void check_move_small(struct xarray *xa, unsigned long idx)
811{
812 XA_STATE(xas, xa, 0);
813 unsigned long i;
814
815 xa_store_index(xa, 0, GFP_KERNEL);
816 xa_store_index(xa, idx, GFP_KERNEL);
817
818 rcu_read_lock();
819 for (i = 0; i < idx * 4; i++) {
820 void *entry = xas_next(&xas);
821 if (i <= idx)
822 XA_BUG_ON(xa, xas.xa_node == XAS_RESTART);
823 XA_BUG_ON(xa, xas.xa_index != i);
824 if (i == 0 || i == idx)
825 XA_BUG_ON(xa, entry != xa_mk_value(i));
826 else
827 XA_BUG_ON(xa, entry != NULL);
828 }
829 xas_next(&xas);
830 XA_BUG_ON(xa, xas.xa_index != i);
831
832 do {
833 void *entry = xas_prev(&xas);
834 i--;
835 if (i <= idx)
836 XA_BUG_ON(xa, xas.xa_node == XAS_RESTART);
837 XA_BUG_ON(xa, xas.xa_index != i);
838 if (i == 0 || i == idx)
839 XA_BUG_ON(xa, entry != xa_mk_value(i));
840 else
841 XA_BUG_ON(xa, entry != NULL);
842 } while (i > 0);
843
844 xas_set(&xas, ULONG_MAX);
845 XA_BUG_ON(xa, xas_next(&xas) != NULL);
846 XA_BUG_ON(xa, xas.xa_index != ULONG_MAX);
847 XA_BUG_ON(xa, xas_next(&xas) != xa_mk_value(0));
848 XA_BUG_ON(xa, xas.xa_index != 0);
849 XA_BUG_ON(xa, xas_prev(&xas) != NULL);
850 XA_BUG_ON(xa, xas.xa_index != ULONG_MAX);
851 rcu_read_unlock();
852
853 xa_erase_index(xa, 0);
854 xa_erase_index(xa, idx);
855 XA_BUG_ON(xa, !xa_empty(xa));
856}
857
858static noinline void check_move(struct xarray *xa)
859{
860 XA_STATE(xas, xa, (1 << 16) - 1);
861 unsigned long i;
862
863 for (i = 0; i < (1 << 16); i++)
864 XA_BUG_ON(xa, xa_store_index(xa, i, GFP_KERNEL) != NULL);
865
866 rcu_read_lock();
867 do {
868 void *entry = xas_prev(&xas);
869 i--;
870 XA_BUG_ON(xa, entry != xa_mk_value(i));
871 XA_BUG_ON(xa, i != xas.xa_index);
872 } while (i != 0);
873
874 XA_BUG_ON(xa, xas_prev(&xas) != NULL);
875 XA_BUG_ON(xa, xas.xa_index != ULONG_MAX);
876
877 do {
878 void *entry = xas_next(&xas);
879 XA_BUG_ON(xa, entry != xa_mk_value(i));
880 XA_BUG_ON(xa, i != xas.xa_index);
881 i++;
882 } while (i < (1 << 16));
883 rcu_read_unlock();
884
885 for (i = (1 << 8); i < (1 << 15); i++)
886 xa_erase_index(xa, i);
887
888 i = xas.xa_index;
889
890 rcu_read_lock();
891 do {
892 void *entry = xas_prev(&xas);
893 i--;
894 if ((i < (1 << 8)) || (i >= (1 << 15)))
895 XA_BUG_ON(xa, entry != xa_mk_value(i));
896 else
897 XA_BUG_ON(xa, entry != NULL);
898 XA_BUG_ON(xa, i != xas.xa_index);
899 } while (i != 0);
900
901 XA_BUG_ON(xa, xas_prev(&xas) != NULL);
902 XA_BUG_ON(xa, xas.xa_index != ULONG_MAX);
903
904 do {
905 void *entry = xas_next(&xas);
906 if ((i < (1 << 8)) || (i >= (1 << 15)))
907 XA_BUG_ON(xa, entry != xa_mk_value(i));
908 else
909 XA_BUG_ON(xa, entry != NULL);
910 XA_BUG_ON(xa, i != xas.xa_index);
911 i++;
912 } while (i < (1 << 16));
913 rcu_read_unlock();
914
915 xa_destroy(xa);
916
917 for (i = 0; i < 16; i++)
918 check_move_small(xa, 1UL << i);
919
920 for (i = 2; i < 16; i++)
921 check_move_small(xa, (1UL << i) - 1);
922}
923
924static noinline void xa_store_many_order(struct xarray *xa,
925 unsigned long index, unsigned order)
926{
927 XA_STATE_ORDER(xas, xa, index, order);
928 unsigned int i = 0;
929
930 do {
931 xas_lock(&xas);
932 XA_BUG_ON(xa, xas_find_conflict(&xas));
933 xas_create_range(&xas);
934 if (xas_error(&xas))
935 goto unlock;
936 for (i = 0; i < (1U << order); i++) {
937 XA_BUG_ON(xa, xas_store(&xas, xa_mk_value(index + i)));
938 xas_next(&xas);
939 }
940unlock:
941 xas_unlock(&xas);
942 } while (xas_nomem(&xas, GFP_KERNEL));
943
944 XA_BUG_ON(xa, xas_error(&xas));
945}
946
947static noinline void check_create_range_1(struct xarray *xa,
948 unsigned long index, unsigned order)
949{
950 unsigned long i;
951
952 xa_store_many_order(xa, index, order);
953 for (i = index; i < index + (1UL << order); i++)
954 xa_erase_index(xa, i);
955 XA_BUG_ON(xa, !xa_empty(xa));
956}
957
958static noinline void check_create_range_2(struct xarray *xa, unsigned order)
959{
960 unsigned long i;
961 unsigned long nr = 1UL << order;
962
963 for (i = 0; i < nr * nr; i += nr)
964 xa_store_many_order(xa, i, order);
965 for (i = 0; i < nr * nr; i++)
966 xa_erase_index(xa, i);
967 XA_BUG_ON(xa, !xa_empty(xa));
968}
969
970static noinline void check_create_range_3(void)
971{
972 XA_STATE(xas, NULL, 0);
973 xas_set_err(&xas, -EEXIST);
974 xas_create_range(&xas);
975 XA_BUG_ON(NULL, xas_error(&xas) != -EEXIST);
976}
977
978static noinline void check_create_range_4(struct xarray *xa,
979 unsigned long index, unsigned order)
980{
981 XA_STATE_ORDER(xas, xa, index, order);
982 unsigned long base = xas.xa_index;
983 unsigned long i = 0;
984
985 xa_store_index(xa, index, GFP_KERNEL);
986 do {
987 xas_lock(&xas);
988 xas_create_range(&xas);
989 if (xas_error(&xas))
990 goto unlock;
991 for (i = 0; i < (1UL << order); i++) {
992 void *old = xas_store(&xas, xa_mk_value(base + i));
993 if (xas.xa_index == index)
994 XA_BUG_ON(xa, old != xa_mk_value(base + i));
995 else
996 XA_BUG_ON(xa, old != NULL);
997 xas_next(&xas);
998 }
999unlock:
1000 xas_unlock(&xas);
1001 } while (xas_nomem(&xas, GFP_KERNEL));
1002
1003 XA_BUG_ON(xa, xas_error(&xas));
1004
1005 for (i = base; i < base + (1UL << order); i++)
1006 xa_erase_index(xa, i);
1007 XA_BUG_ON(xa, !xa_empty(xa));
1008}
1009
1010static noinline void check_create_range(struct xarray *xa)
1011{
1012 unsigned int order;
1013 unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 12 : 1;
1014
1015 for (order = 0; order < max_order; order++) {
1016 check_create_range_1(xa, 0, order);
1017 check_create_range_1(xa, 1U << order, order);
1018 check_create_range_1(xa, 2U << order, order);
1019 check_create_range_1(xa, 3U << order, order);
1020 check_create_range_1(xa, 1U << 24, order);
1021 if (order < 10)
1022 check_create_range_2(xa, order);
1023
1024 check_create_range_4(xa, 0, order);
1025 check_create_range_4(xa, 1U << order, order);
1026 check_create_range_4(xa, 2U << order, order);
1027 check_create_range_4(xa, 3U << order, order);
1028 check_create_range_4(xa, 1U << 24, order);
1029
1030 check_create_range_4(xa, 1, order);
1031 check_create_range_4(xa, (1U << order) + 1, order);
1032 check_create_range_4(xa, (2U << order) + 1, order);
1033 check_create_range_4(xa, (2U << order) - 1, order);
1034 check_create_range_4(xa, (3U << order) + 1, order);
1035 check_create_range_4(xa, (3U << order) - 1, order);
1036 check_create_range_4(xa, (1U << 24) + 1, order);
1037 }
1038
1039 check_create_range_3();
1040}
1041
1042static noinline void __check_store_range(struct xarray *xa, unsigned long first,
1043 unsigned long last)
1044{
1045#ifdef CONFIG_XARRAY_MULTI
1046 xa_store_range(xa, first, last, xa_mk_value(first), GFP_KERNEL);
1047
1048 XA_BUG_ON(xa, xa_load(xa, first) != xa_mk_value(first));
1049 XA_BUG_ON(xa, xa_load(xa, last) != xa_mk_value(first));
1050 XA_BUG_ON(xa, xa_load(xa, first - 1) != NULL);
1051 XA_BUG_ON(xa, xa_load(xa, last + 1) != NULL);
1052
1053 xa_store_range(xa, first, last, NULL, GFP_KERNEL);
1054#endif
1055
1056 XA_BUG_ON(xa, !xa_empty(xa));
1057}
1058
1059static noinline void check_store_range(struct xarray *xa)
1060{
1061 unsigned long i, j;
1062
1063 for (i = 0; i < 128; i++) {
1064 for (j = i; j < 128; j++) {
1065 __check_store_range(xa, i, j);
1066 __check_store_range(xa, 128 + i, 128 + j);
1067 __check_store_range(xa, 4095 + i, 4095 + j);
1068 __check_store_range(xa, 4096 + i, 4096 + j);
1069 __check_store_range(xa, 123456 + i, 123456 + j);
1070 __check_store_range(xa, UINT_MAX + i, UINT_MAX + j);
1071 }
1072 }
1073}
1074
1075static LIST_HEAD(shadow_nodes);
1076
1077static void test_update_node(struct xa_node *node)
1078{
1079 if (node->count && node->count == node->nr_values) {
1080 if (list_empty(&node->private_list))
1081 list_add(&shadow_nodes, &node->private_list);
1082 } else {
1083 if (!list_empty(&node->private_list))
1084 list_del_init(&node->private_list);
1085 }
1086}
1087
1088static noinline void shadow_remove(struct xarray *xa)
1089{
1090 struct xa_node *node;
1091
1092 xa_lock(xa);
1093 while ((node = list_first_entry_or_null(&shadow_nodes,
1094 struct xa_node, private_list))) {
1095 XA_STATE(xas, node->array, 0);
1096 XA_BUG_ON(xa, node->array != xa);
1097 list_del_init(&node->private_list);
1098 xas.xa_node = xa_parent_locked(node->array, node);
1099 xas.xa_offset = node->offset;
1100 xas.xa_shift = node->shift + XA_CHUNK_SHIFT;
1101 xas_set_update(&xas, test_update_node);
1102 xas_store(&xas, NULL);
1103 }
1104 xa_unlock(xa);
1105}
1106
1107static noinline void check_workingset(struct xarray *xa, unsigned long index)
1108{
1109 XA_STATE(xas, xa, index);
1110 xas_set_update(&xas, test_update_node);
1111
1112 do {
1113 xas_lock(&xas);
1114 xas_store(&xas, xa_mk_value(0));
1115 xas_next(&xas);
1116 xas_store(&xas, xa_mk_value(1));
1117 xas_unlock(&xas);
1118 } while (xas_nomem(&xas, GFP_KERNEL));
1119
1120 XA_BUG_ON(xa, list_empty(&shadow_nodes));
1121
1122 xas_lock(&xas);
1123 xas_next(&xas);
1124 xas_store(&xas, &xas);
1125 XA_BUG_ON(xa, !list_empty(&shadow_nodes));
1126
1127 xas_store(&xas, xa_mk_value(2));
1128 xas_unlock(&xas);
1129 XA_BUG_ON(xa, list_empty(&shadow_nodes));
1130
1131 shadow_remove(xa);
1132 XA_BUG_ON(xa, !list_empty(&shadow_nodes));
1133 XA_BUG_ON(xa, !xa_empty(xa));
1134}
1135
1136/*
1137 * Check that the pointer / value / sibling entries are accounted the
1138 * way we expect them to be.
1139 */
1140static noinline void check_account(struct xarray *xa)
1141{
1142#ifdef CONFIG_XARRAY_MULTI
1143 unsigned int order;
1144
1145 for (order = 1; order < 12; order++) {
1146 XA_STATE(xas, xa, 1 << order);
1147
1148 xa_store_order(xa, 0, order, xa, GFP_KERNEL);
1149 xas_load(&xas);
1150 XA_BUG_ON(xa, xas.xa_node->count == 0);
1151 XA_BUG_ON(xa, xas.xa_node->count > (1 << order));
1152 XA_BUG_ON(xa, xas.xa_node->nr_values != 0);
1153
1154 xa_store_order(xa, 1 << order, order, xa_mk_value(1 << order),
1155 GFP_KERNEL);
1156 XA_BUG_ON(xa, xas.xa_node->count != xas.xa_node->nr_values * 2);
1157
1158 xa_erase(xa, 1 << order);
1159 XA_BUG_ON(xa, xas.xa_node->nr_values != 0);
1160
1161 xa_erase(xa, 0);
1162 XA_BUG_ON(xa, !xa_empty(xa));
1163 }
1164#endif
1165}
1166
1167static noinline void check_destroy(struct xarray *xa)
1168{
1169 unsigned long index;
1170
1171 XA_BUG_ON(xa, !xa_empty(xa));
1172
1173 /* Destroying an empty array is a no-op */
1174 xa_destroy(xa);
1175 XA_BUG_ON(xa, !xa_empty(xa));
1176
1177 /* Destroying an array with a single entry */
1178 for (index = 0; index < 1000; index++) {
1179 xa_store_index(xa, index, GFP_KERNEL);
1180 XA_BUG_ON(xa, xa_empty(xa));
1181 xa_destroy(xa);
1182 XA_BUG_ON(xa, !xa_empty(xa));
1183 }
1184
1185 /* Destroying an array with a single entry at ULONG_MAX */
1186 xa_store(xa, ULONG_MAX, xa, GFP_KERNEL);
1187 XA_BUG_ON(xa, xa_empty(xa));
1188 xa_destroy(xa);
1189 XA_BUG_ON(xa, !xa_empty(xa));
1190
1191#ifdef CONFIG_XARRAY_MULTI
1192 /* Destroying an array with a multi-index entry */
1193 xa_store_order(xa, 1 << 11, 11, xa, GFP_KERNEL);
1194 XA_BUG_ON(xa, xa_empty(xa));
1195 xa_destroy(xa);
1196 XA_BUG_ON(xa, !xa_empty(xa));
1197#endif
1198}
1199
1200static DEFINE_XARRAY(array);
1201
1202static int xarray_checks(void)
1203{
1204 check_xa_err(&array);
1205 check_xas_retry(&array);
1206 check_xa_load(&array);
1207 check_xa_mark(&array);
1208 check_xa_shrink(&array);
1209 check_xas_erase(&array);
1210 check_cmpxchg(&array);
1211 check_reserve(&array);
1212 check_multi_store(&array);
1213 check_xa_alloc();
1214 check_find(&array);
1215 check_find_entry(&array);
1216 check_account(&array);
1217 check_destroy(&array);
1218 check_move(&array);
1219 check_create_range(&array);
1220 check_store_range(&array);
1221 check_store_iter(&array);
1222
1223 check_workingset(&array, 0);
1224 check_workingset(&array, 64);
1225 check_workingset(&array, 4096);
1226
1227 printk("XArray: %u of %u tests passed\n", tests_passed, tests_run);
1228 return (tests_run == tests_passed) ? 0 : -EINVAL;
1229}
1230
1231static void xarray_exit(void)
1232{
1233}
1234
1235module_init(xarray_checks);
1236module_exit(xarray_exit);
1237MODULE_AUTHOR("Matthew Wilcox <willy@infradead.org>");
1238MODULE_LICENSE("GPL");
diff --git a/lib/xarray.c b/lib/xarray.c
new file mode 100644
index 000000000000..8b176f009c08
--- /dev/null
+++ b/lib/xarray.c
@@ -0,0 +1,2036 @@
1// SPDX-License-Identifier: GPL-2.0+
2/*
3 * XArray implementation
4 * Copyright (c) 2017 Microsoft Corporation
5 * Author: Matthew Wilcox <willy@infradead.org>
6 */
7
8#include <linux/bitmap.h>
9#include <linux/export.h>
10#include <linux/list.h>
11#include <linux/slab.h>
12#include <linux/xarray.h>
13
14/*
15 * Coding conventions in this file:
16 *
17 * @xa is used to refer to the entire xarray.
18 * @xas is the 'xarray operation state'. It may be either a pointer to
19 * an xa_state, or an xa_state stored on the stack. This is an unfortunate
20 * ambiguity.
21 * @index is the index of the entry being operated on
22 * @mark is an xa_mark_t; a small number indicating one of the mark bits.
23 * @node refers to an xa_node; usually the primary one being operated on by
24 * this function.
25 * @offset is the index into the slots array inside an xa_node.
26 * @parent refers to the @xa_node closer to the head than @node.
27 * @entry refers to something stored in a slot in the xarray
28 */
29
30static inline unsigned int xa_lock_type(const struct xarray *xa)
31{
32 return (__force unsigned int)xa->xa_flags & 3;
33}
34
35static inline void xas_lock_type(struct xa_state *xas, unsigned int lock_type)
36{
37 if (lock_type == XA_LOCK_IRQ)
38 xas_lock_irq(xas);
39 else if (lock_type == XA_LOCK_BH)
40 xas_lock_bh(xas);
41 else
42 xas_lock(xas);
43}
44
45static inline void xas_unlock_type(struct xa_state *xas, unsigned int lock_type)
46{
47 if (lock_type == XA_LOCK_IRQ)
48 xas_unlock_irq(xas);
49 else if (lock_type == XA_LOCK_BH)
50 xas_unlock_bh(xas);
51 else
52 xas_unlock(xas);
53}
54
55static inline bool xa_track_free(const struct xarray *xa)
56{
57 return xa->xa_flags & XA_FLAGS_TRACK_FREE;
58}
59
60static inline void xa_mark_set(struct xarray *xa, xa_mark_t mark)
61{
62 if (!(xa->xa_flags & XA_FLAGS_MARK(mark)))
63 xa->xa_flags |= XA_FLAGS_MARK(mark);
64}
65
66static inline void xa_mark_clear(struct xarray *xa, xa_mark_t mark)
67{
68 if (xa->xa_flags & XA_FLAGS_MARK(mark))
69 xa->xa_flags &= ~(XA_FLAGS_MARK(mark));
70}
71
72static inline unsigned long *node_marks(struct xa_node *node, xa_mark_t mark)
73{
74 return node->marks[(__force unsigned)mark];
75}
76
77static inline bool node_get_mark(struct xa_node *node,
78 unsigned int offset, xa_mark_t mark)
79{
80 return test_bit(offset, node_marks(node, mark));
81}
82
83/* returns true if the bit was set */
84static inline bool node_set_mark(struct xa_node *node, unsigned int offset,
85 xa_mark_t mark)
86{
87 return __test_and_set_bit(offset, node_marks(node, mark));
88}
89
90/* returns true if the bit was set */
91static inline bool node_clear_mark(struct xa_node *node, unsigned int offset,
92 xa_mark_t mark)
93{
94 return __test_and_clear_bit(offset, node_marks(node, mark));
95}
96
97static inline bool node_any_mark(struct xa_node *node, xa_mark_t mark)
98{
99 return !bitmap_empty(node_marks(node, mark), XA_CHUNK_SIZE);
100}
101
102static inline void node_mark_all(struct xa_node *node, xa_mark_t mark)
103{
104 bitmap_fill(node_marks(node, mark), XA_CHUNK_SIZE);
105}
106
107#define mark_inc(mark) do { \
108 mark = (__force xa_mark_t)((__force unsigned)(mark) + 1); \
109} while (0)
110
111/*
112 * xas_squash_marks() - Merge all marks to the first entry
113 * @xas: Array operation state.
114 *
115 * Set a mark on the first entry if any entry has it set. Clear marks on
116 * all sibling entries.
117 */
118static void xas_squash_marks(const struct xa_state *xas)
119{
120 unsigned int mark = 0;
121 unsigned int limit = xas->xa_offset + xas->xa_sibs + 1;
122
123 if (!xas->xa_sibs)
124 return;
125
126 do {
127 unsigned long *marks = xas->xa_node->marks[mark];
128 if (find_next_bit(marks, limit, xas->xa_offset + 1) == limit)
129 continue;
130 __set_bit(xas->xa_offset, marks);
131 bitmap_clear(marks, xas->xa_offset + 1, xas->xa_sibs);
132 } while (mark++ != (__force unsigned)XA_MARK_MAX);
133}
134
135/* extracts the offset within this node from the index */
136static unsigned int get_offset(unsigned long index, struct xa_node *node)
137{
138 return (index >> node->shift) & XA_CHUNK_MASK;
139}
140
141static void xas_set_offset(struct xa_state *xas)
142{
143 xas->xa_offset = get_offset(xas->xa_index, xas->xa_node);
144}
145
146/* move the index either forwards (find) or backwards (sibling slot) */
147static void xas_move_index(struct xa_state *xas, unsigned long offset)
148{
149 unsigned int shift = xas->xa_node->shift;
150 xas->xa_index &= ~XA_CHUNK_MASK << shift;
151 xas->xa_index += offset << shift;
152}
153
154static void xas_advance(struct xa_state *xas)
155{
156 xas->xa_offset++;
157 xas_move_index(xas, xas->xa_offset);
158}
159
160static void *set_bounds(struct xa_state *xas)
161{
162 xas->xa_node = XAS_BOUNDS;
163 return NULL;
164}
165
166/*
167 * Starts a walk. If the @xas is already valid, we assume that it's on
168 * the right path and just return where we've got to. If we're in an
169 * error state, return NULL. If the index is outside the current scope
170 * of the xarray, return NULL without changing @xas->xa_node. Otherwise
171 * set @xas->xa_node to NULL and return the current head of the array.
172 */
173static void *xas_start(struct xa_state *xas)
174{
175 void *entry;
176
177 if (xas_valid(xas))
178 return xas_reload(xas);
179 if (xas_error(xas))
180 return NULL;
181
182 entry = xa_head(xas->xa);
183 if (!xa_is_node(entry)) {
184 if (xas->xa_index)
185 return set_bounds(xas);
186 } else {
187 if ((xas->xa_index >> xa_to_node(entry)->shift) > XA_CHUNK_MASK)
188 return set_bounds(xas);
189 }
190
191 xas->xa_node = NULL;
192 return entry;
193}
194
195static void *xas_descend(struct xa_state *xas, struct xa_node *node)
196{
197 unsigned int offset = get_offset(xas->xa_index, node);
198 void *entry = xa_entry(xas->xa, node, offset);
199
200 xas->xa_node = node;
201 if (xa_is_sibling(entry)) {
202 offset = xa_to_sibling(entry);
203 entry = xa_entry(xas->xa, node, offset);
204 }
205
206 xas->xa_offset = offset;
207 return entry;
208}
209
210/**
211 * xas_load() - Load an entry from the XArray (advanced).
212 * @xas: XArray operation state.
213 *
214 * Usually walks the @xas to the appropriate state to load the entry
215 * stored at xa_index. However, it will do nothing and return %NULL if
216 * @xas is in an error state. xas_load() will never expand the tree.
217 *
218 * If the xa_state is set up to operate on a multi-index entry, xas_load()
219 * may return %NULL or an internal entry, even if there are entries
220 * present within the range specified by @xas.
221 *
222 * Context: Any context. The caller should hold the xa_lock or the RCU lock.
223 * Return: Usually an entry in the XArray, but see description for exceptions.
224 */
225void *xas_load(struct xa_state *xas)
226{
227 void *entry = xas_start(xas);
228
229 while (xa_is_node(entry)) {
230 struct xa_node *node = xa_to_node(entry);
231
232 if (xas->xa_shift > node->shift)
233 break;
234 entry = xas_descend(xas, node);
235 }
236 return entry;
237}
238EXPORT_SYMBOL_GPL(xas_load);
239
240/* Move the radix tree node cache here */
241extern struct kmem_cache *radix_tree_node_cachep;
242extern void radix_tree_node_rcu_free(struct rcu_head *head);
243
244#define XA_RCU_FREE ((struct xarray *)1)
245
246static void xa_node_free(struct xa_node *node)
247{
248 XA_NODE_BUG_ON(node, !list_empty(&node->private_list));
249 node->array = XA_RCU_FREE;
250 call_rcu(&node->rcu_head, radix_tree_node_rcu_free);
251}
252
253/*
254 * xas_destroy() - Free any resources allocated during the XArray operation.
255 * @xas: XArray operation state.
256 *
257 * This function is now internal-only.
258 */
259static void xas_destroy(struct xa_state *xas)
260{
261 struct xa_node *node = xas->xa_alloc;
262
263 if (!node)
264 return;
265 XA_NODE_BUG_ON(node, !list_empty(&node->private_list));
266 kmem_cache_free(radix_tree_node_cachep, node);
267 xas->xa_alloc = NULL;
268}
269
270/**
271 * xas_nomem() - Allocate memory if needed.
272 * @xas: XArray operation state.
273 * @gfp: Memory allocation flags.
274 *
275 * If we need to add new nodes to the XArray, we try to allocate memory
276 * with GFP_NOWAIT while holding the lock, which will usually succeed.
277 * If it fails, @xas is flagged as needing memory to continue. The caller
278 * should drop the lock and call xas_nomem(). If xas_nomem() succeeds,
279 * the caller should retry the operation.
280 *
281 * Forward progress is guaranteed as one node is allocated here and
282 * stored in the xa_state where it will be found by xas_alloc(). More
283 * nodes will likely be found in the slab allocator, but we do not tie
284 * them up here.
285 *
286 * Return: true if memory was needed, and was successfully allocated.
287 */
288bool xas_nomem(struct xa_state *xas, gfp_t gfp)
289{
290 if (xas->xa_node != XA_ERROR(-ENOMEM)) {
291 xas_destroy(xas);
292 return false;
293 }
294 xas->xa_alloc = kmem_cache_alloc(radix_tree_node_cachep, gfp);
295 if (!xas->xa_alloc)
296 return false;
297 XA_NODE_BUG_ON(xas->xa_alloc, !list_empty(&xas->xa_alloc->private_list));
298 xas->xa_node = XAS_RESTART;
299 return true;
300}
301EXPORT_SYMBOL_GPL(xas_nomem);
302
303/*
304 * __xas_nomem() - Drop locks and allocate memory if needed.
305 * @xas: XArray operation state.
306 * @gfp: Memory allocation flags.
307 *
308 * Internal variant of xas_nomem().
309 *
310 * Return: true if memory was needed, and was successfully allocated.
311 */
312static bool __xas_nomem(struct xa_state *xas, gfp_t gfp)
313 __must_hold(xas->xa->xa_lock)
314{
315 unsigned int lock_type = xa_lock_type(xas->xa);
316
317 if (xas->xa_node != XA_ERROR(-ENOMEM)) {
318 xas_destroy(xas);
319 return false;
320 }
321 if (gfpflags_allow_blocking(gfp)) {
322 xas_unlock_type(xas, lock_type);
323 xas->xa_alloc = kmem_cache_alloc(radix_tree_node_cachep, gfp);
324 xas_lock_type(xas, lock_type);
325 } else {
326 xas->xa_alloc = kmem_cache_alloc(radix_tree_node_cachep, gfp);
327 }
328 if (!xas->xa_alloc)
329 return false;
330 XA_NODE_BUG_ON(xas->xa_alloc, !list_empty(&xas->xa_alloc->private_list));
331 xas->xa_node = XAS_RESTART;
332 return true;
333}
334
335static void xas_update(struct xa_state *xas, struct xa_node *node)
336{
337 if (xas->xa_update)
338 xas->xa_update(node);
339 else
340 XA_NODE_BUG_ON(node, !list_empty(&node->private_list));
341}
342
343static void *xas_alloc(struct xa_state *xas, unsigned int shift)
344{
345 struct xa_node *parent = xas->xa_node;
346 struct xa_node *node = xas->xa_alloc;
347
348 if (xas_invalid(xas))
349 return NULL;
350
351 if (node) {
352 xas->xa_alloc = NULL;
353 } else {
354 node = kmem_cache_alloc(radix_tree_node_cachep,
355 GFP_NOWAIT | __GFP_NOWARN);
356 if (!node) {
357 xas_set_err(xas, -ENOMEM);
358 return NULL;
359 }
360 }
361
362 if (parent) {
363 node->offset = xas->xa_offset;
364 parent->count++;
365 XA_NODE_BUG_ON(node, parent->count > XA_CHUNK_SIZE);
366 xas_update(xas, parent);
367 }
368 XA_NODE_BUG_ON(node, shift > BITS_PER_LONG);
369 XA_NODE_BUG_ON(node, !list_empty(&node->private_list));
370 node->shift = shift;
371 node->count = 0;
372 node->nr_values = 0;
373 RCU_INIT_POINTER(node->parent, xas->xa_node);
374 node->array = xas->xa;
375
376 return node;
377}
378
379#ifdef CONFIG_XARRAY_MULTI
380/* Returns the number of indices covered by a given xa_state */
381static unsigned long xas_size(const struct xa_state *xas)
382{
383 return (xas->xa_sibs + 1UL) << xas->xa_shift;
384}
385#endif
386
387/*
388 * Use this to calculate the maximum index that will need to be created
389 * in order to add the entry described by @xas. Because we cannot store a
390 * multiple-index entry at index 0, the calculation is a little more complex
391 * than you might expect.
392 */
393static unsigned long xas_max(struct xa_state *xas)
394{
395 unsigned long max = xas->xa_index;
396
397#ifdef CONFIG_XARRAY_MULTI
398 if (xas->xa_shift || xas->xa_sibs) {
399 unsigned long mask = xas_size(xas) - 1;
400 max |= mask;
401 if (mask == max)
402 max++;
403 }
404#endif
405
406 return max;
407}
408
409/* The maximum index that can be contained in the array without expanding it */
410static unsigned long max_index(void *entry)
411{
412 if (!xa_is_node(entry))
413 return 0;
414 return (XA_CHUNK_SIZE << xa_to_node(entry)->shift) - 1;
415}
416
417static void xas_shrink(struct xa_state *xas)
418{
419 struct xarray *xa = xas->xa;
420 struct xa_node *node = xas->xa_node;
421
422 for (;;) {
423 void *entry;
424
425 XA_NODE_BUG_ON(node, node->count > XA_CHUNK_SIZE);
426 if (node->count != 1)
427 break;
428 entry = xa_entry_locked(xa, node, 0);
429 if (!entry)
430 break;
431 if (!xa_is_node(entry) && node->shift)
432 break;
433 xas->xa_node = XAS_BOUNDS;
434
435 RCU_INIT_POINTER(xa->xa_head, entry);
436 if (xa_track_free(xa) && !node_get_mark(node, 0, XA_FREE_MARK))
437 xa_mark_clear(xa, XA_FREE_MARK);
438
439 node->count = 0;
440 node->nr_values = 0;
441 if (!xa_is_node(entry))
442 RCU_INIT_POINTER(node->slots[0], XA_RETRY_ENTRY);
443 xas_update(xas, node);
444 xa_node_free(node);
445 if (!xa_is_node(entry))
446 break;
447 node = xa_to_node(entry);
448 node->parent = NULL;
449 }
450}
451
452/*
453 * xas_delete_node() - Attempt to delete an xa_node
454 * @xas: Array operation state.
455 *
456 * Attempts to delete the @xas->xa_node. This will fail if xa->node has
457 * a non-zero reference count.
458 */
459static void xas_delete_node(struct xa_state *xas)
460{
461 struct xa_node *node = xas->xa_node;
462
463 for (;;) {
464 struct xa_node *parent;
465
466 XA_NODE_BUG_ON(node, node->count > XA_CHUNK_SIZE);
467 if (node->count)
468 break;
469
470 parent = xa_parent_locked(xas->xa, node);
471 xas->xa_node = parent;
472 xas->xa_offset = node->offset;
473 xa_node_free(node);
474
475 if (!parent) {
476 xas->xa->xa_head = NULL;
477 xas->xa_node = XAS_BOUNDS;
478 return;
479 }
480
481 parent->slots[xas->xa_offset] = NULL;
482 parent->count--;
483 XA_NODE_BUG_ON(parent, parent->count > XA_CHUNK_SIZE);
484 node = parent;
485 xas_update(xas, node);
486 }
487
488 if (!node->parent)
489 xas_shrink(xas);
490}
491
492/**
493 * xas_free_nodes() - Free this node and all nodes that it references
494 * @xas: Array operation state.
495 * @top: Node to free
496 *
497 * This node has been removed from the tree. We must now free it and all
498 * of its subnodes. There may be RCU walkers with references into the tree,
499 * so we must replace all entries with retry markers.
500 */
501static void xas_free_nodes(struct xa_state *xas, struct xa_node *top)
502{
503 unsigned int offset = 0;
504 struct xa_node *node = top;
505
506 for (;;) {
507 void *entry = xa_entry_locked(xas->xa, node, offset);
508
509 if (xa_is_node(entry)) {
510 node = xa_to_node(entry);
511 offset = 0;
512 continue;
513 }
514 if (entry)
515 RCU_INIT_POINTER(node->slots[offset], XA_RETRY_ENTRY);
516 offset++;
517 while (offset == XA_CHUNK_SIZE) {
518 struct xa_node *parent;
519
520 parent = xa_parent_locked(xas->xa, node);
521 offset = node->offset + 1;
522 node->count = 0;
523 node->nr_values = 0;
524 xas_update(xas, node);
525 xa_node_free(node);
526 if (node == top)
527 return;
528 node = parent;
529 }
530 }
531}
532
533/*
534 * xas_expand adds nodes to the head of the tree until it has reached
535 * sufficient height to be able to contain @xas->xa_index
536 */
537static int xas_expand(struct xa_state *xas, void *head)
538{
539 struct xarray *xa = xas->xa;
540 struct xa_node *node = NULL;
541 unsigned int shift = 0;
542 unsigned long max = xas_max(xas);
543
544 if (!head) {
545 if (max == 0)
546 return 0;
547 while ((max >> shift) >= XA_CHUNK_SIZE)
548 shift += XA_CHUNK_SHIFT;
549 return shift + XA_CHUNK_SHIFT;
550 } else if (xa_is_node(head)) {
551 node = xa_to_node(head);
552 shift = node->shift + XA_CHUNK_SHIFT;
553 }
554 xas->xa_node = NULL;
555
556 while (max > max_index(head)) {
557 xa_mark_t mark = 0;
558
559 XA_NODE_BUG_ON(node, shift > BITS_PER_LONG);
560 node = xas_alloc(xas, shift);
561 if (!node)
562 return -ENOMEM;
563
564 node->count = 1;
565 if (xa_is_value(head))
566 node->nr_values = 1;
567 RCU_INIT_POINTER(node->slots[0], head);
568
569 /* Propagate the aggregated mark info to the new child */
570 for (;;) {
571 if (xa_track_free(xa) && mark == XA_FREE_MARK) {
572 node_mark_all(node, XA_FREE_MARK);
573 if (!xa_marked(xa, XA_FREE_MARK)) {
574 node_clear_mark(node, 0, XA_FREE_MARK);
575 xa_mark_set(xa, XA_FREE_MARK);
576 }
577 } else if (xa_marked(xa, mark)) {
578 node_set_mark(node, 0, mark);
579 }
580 if (mark == XA_MARK_MAX)
581 break;
582 mark_inc(mark);
583 }
584
585 /*
586 * Now that the new node is fully initialised, we can add
587 * it to the tree
588 */
589 if (xa_is_node(head)) {
590 xa_to_node(head)->offset = 0;
591 rcu_assign_pointer(xa_to_node(head)->parent, node);
592 }
593 head = xa_mk_node(node);
594 rcu_assign_pointer(xa->xa_head, head);
595 xas_update(xas, node);
596
597 shift += XA_CHUNK_SHIFT;
598 }
599
600 xas->xa_node = node;
601 return shift;
602}
603
604/*
605 * xas_create() - Create a slot to store an entry in.
606 * @xas: XArray operation state.
607 *
608 * Most users will not need to call this function directly, as it is called
609 * by xas_store(). It is useful for doing conditional store operations
610 * (see the xa_cmpxchg() implementation for an example).
611 *
612 * Return: If the slot already existed, returns the contents of this slot.
613 * If the slot was newly created, returns NULL. If it failed to create the
614 * slot, returns NULL and indicates the error in @xas.
615 */
616static void *xas_create(struct xa_state *xas)
617{
618 struct xarray *xa = xas->xa;
619 void *entry;
620 void __rcu **slot;
621 struct xa_node *node = xas->xa_node;
622 int shift;
623 unsigned int order = xas->xa_shift;
624
625 if (xas_top(node)) {
626 entry = xa_head_locked(xa);
627 xas->xa_node = NULL;
628 shift = xas_expand(xas, entry);
629 if (shift < 0)
630 return NULL;
631 entry = xa_head_locked(xa);
632 slot = &xa->xa_head;
633 } else if (xas_error(xas)) {
634 return NULL;
635 } else if (node) {
636 unsigned int offset = xas->xa_offset;
637
638 shift = node->shift;
639 entry = xa_entry_locked(xa, node, offset);
640 slot = &node->slots[offset];
641 } else {
642 shift = 0;
643 entry = xa_head_locked(xa);
644 slot = &xa->xa_head;
645 }
646
647 while (shift > order) {
648 shift -= XA_CHUNK_SHIFT;
649 if (!entry) {
650 node = xas_alloc(xas, shift);
651 if (!node)
652 break;
653 if (xa_track_free(xa))
654 node_mark_all(node, XA_FREE_MARK);
655 rcu_assign_pointer(*slot, xa_mk_node(node));
656 } else if (xa_is_node(entry)) {
657 node = xa_to_node(entry);
658 } else {
659 break;
660 }
661 entry = xas_descend(xas, node);
662 slot = &node->slots[xas->xa_offset];
663 }
664
665 return entry;
666}
667
668/**
669 * xas_create_range() - Ensure that stores to this range will succeed
670 * @xas: XArray operation state.
671 *
672 * Creates all of the slots in the range covered by @xas. Sets @xas to
673 * create single-index entries and positions it at the beginning of the
674 * range. This is for the benefit of users which have not yet been
675 * converted to use multi-index entries.
676 */
677void xas_create_range(struct xa_state *xas)
678{
679 unsigned long index = xas->xa_index;
680 unsigned char shift = xas->xa_shift;
681 unsigned char sibs = xas->xa_sibs;
682
683 xas->xa_index |= ((sibs + 1) << shift) - 1;
684 if (xas_is_node(xas) && xas->xa_node->shift == xas->xa_shift)
685 xas->xa_offset |= sibs;
686 xas->xa_shift = 0;
687 xas->xa_sibs = 0;
688
689 for (;;) {
690 xas_create(xas);
691 if (xas_error(xas))
692 goto restore;
693 if (xas->xa_index <= (index | XA_CHUNK_MASK))
694 goto success;
695 xas->xa_index -= XA_CHUNK_SIZE;
696
697 for (;;) {
698 struct xa_node *node = xas->xa_node;
699 xas->xa_node = xa_parent_locked(xas->xa, node);
700 xas->xa_offset = node->offset - 1;
701 if (node->offset != 0)
702 break;
703 }
704 }
705
706restore:
707 xas->xa_shift = shift;
708 xas->xa_sibs = sibs;
709 xas->xa_index = index;
710 return;
711success:
712 xas->xa_index = index;
713 if (xas->xa_node)
714 xas_set_offset(xas);
715}
716EXPORT_SYMBOL_GPL(xas_create_range);
717
718static void update_node(struct xa_state *xas, struct xa_node *node,
719 int count, int values)
720{
721 if (!node || (!count && !values))
722 return;
723
724 node->count += count;
725 node->nr_values += values;
726 XA_NODE_BUG_ON(node, node->count > XA_CHUNK_SIZE);
727 XA_NODE_BUG_ON(node, node->nr_values > XA_CHUNK_SIZE);
728 xas_update(xas, node);
729 if (count < 0)
730 xas_delete_node(xas);
731}
732
733/**
734 * xas_store() - Store this entry in the XArray.
735 * @xas: XArray operation state.
736 * @entry: New entry.
737 *
738 * If @xas is operating on a multi-index entry, the entry returned by this
739 * function is essentially meaningless (it may be an internal entry or it
740 * may be %NULL, even if there are non-NULL entries at some of the indices
741 * covered by the range). This is not a problem for any current users,
742 * and can be changed if needed.
743 *
744 * Return: The old entry at this index.
745 */
746void *xas_store(struct xa_state *xas, void *entry)
747{
748 struct xa_node *node;
749 void __rcu **slot = &xas->xa->xa_head;
750 unsigned int offset, max;
751 int count = 0;
752 int values = 0;
753 void *first, *next;
754 bool value = xa_is_value(entry);
755
756 if (entry)
757 first = xas_create(xas);
758 else
759 first = xas_load(xas);
760
761 if (xas_invalid(xas))
762 return first;
763 node = xas->xa_node;
764 if (node && (xas->xa_shift < node->shift))
765 xas->xa_sibs = 0;
766 if ((first == entry) && !xas->xa_sibs)
767 return first;
768
769 next = first;
770 offset = xas->xa_offset;
771 max = xas->xa_offset + xas->xa_sibs;
772 if (node) {
773 slot = &node->slots[offset];
774 if (xas->xa_sibs)
775 xas_squash_marks(xas);
776 }
777 if (!entry)
778 xas_init_marks(xas);
779
780 for (;;) {
781 /*
782 * Must clear the marks before setting the entry to NULL,
783 * otherwise xas_for_each_marked may find a NULL entry and
784 * stop early. rcu_assign_pointer contains a release barrier
785 * so the mark clearing will appear to happen before the
786 * entry is set to NULL.
787 */
788 rcu_assign_pointer(*slot, entry);
789 if (xa_is_node(next))
790 xas_free_nodes(xas, xa_to_node(next));
791 if (!node)
792 break;
793 count += !next - !entry;
794 values += !xa_is_value(first) - !value;
795 if (entry) {
796 if (offset == max)
797 break;
798 if (!xa_is_sibling(entry))
799 entry = xa_mk_sibling(xas->xa_offset);
800 } else {
801 if (offset == XA_CHUNK_MASK)
802 break;
803 }
804 next = xa_entry_locked(xas->xa, node, ++offset);
805 if (!xa_is_sibling(next)) {
806 if (!entry && (offset > max))
807 break;
808 first = next;
809 }
810 slot++;
811 }
812
813 update_node(xas, node, count, values);
814 return first;
815}
816EXPORT_SYMBOL_GPL(xas_store);
817
818/**
819 * xas_get_mark() - Returns the state of this mark.
820 * @xas: XArray operation state.
821 * @mark: Mark number.
822 *
823 * Return: true if the mark is set, false if the mark is clear or @xas
824 * is in an error state.
825 */
826bool xas_get_mark(const struct xa_state *xas, xa_mark_t mark)
827{
828 if (xas_invalid(xas))
829 return false;
830 if (!xas->xa_node)
831 return xa_marked(xas->xa, mark);
832 return node_get_mark(xas->xa_node, xas->xa_offset, mark);
833}
834EXPORT_SYMBOL_GPL(xas_get_mark);
835
836/**
837 * xas_set_mark() - Sets the mark on this entry and its parents.
838 * @xas: XArray operation state.
839 * @mark: Mark number.
840 *
841 * Sets the specified mark on this entry, and walks up the tree setting it
842 * on all the ancestor entries. Does nothing if @xas has not been walked to
843 * an entry, or is in an error state.
844 */
845void xas_set_mark(const struct xa_state *xas, xa_mark_t mark)
846{
847 struct xa_node *node = xas->xa_node;
848 unsigned int offset = xas->xa_offset;
849
850 if (xas_invalid(xas))
851 return;
852
853 while (node) {
854 if (node_set_mark(node, offset, mark))
855 return;
856 offset = node->offset;
857 node = xa_parent_locked(xas->xa, node);
858 }
859
860 if (!xa_marked(xas->xa, mark))
861 xa_mark_set(xas->xa, mark);
862}
863EXPORT_SYMBOL_GPL(xas_set_mark);
864
865/**
866 * xas_clear_mark() - Clears the mark on this entry and its parents.
867 * @xas: XArray operation state.
868 * @mark: Mark number.
869 *
870 * Clears the specified mark on this entry, and walks back to the head
871 * attempting to clear it on all the ancestor entries. Does nothing if
872 * @xas has not been walked to an entry, or is in an error state.
873 */
874void xas_clear_mark(const struct xa_state *xas, xa_mark_t mark)
875{
876 struct xa_node *node = xas->xa_node;
877 unsigned int offset = xas->xa_offset;
878
879 if (xas_invalid(xas))
880 return;
881
882 while (node) {
883 if (!node_clear_mark(node, offset, mark))
884 return;
885 if (node_any_mark(node, mark))
886 return;
887
888 offset = node->offset;
889 node = xa_parent_locked(xas->xa, node);
890 }
891
892 if (xa_marked(xas->xa, mark))
893 xa_mark_clear(xas->xa, mark);
894}
895EXPORT_SYMBOL_GPL(xas_clear_mark);
896
897/**
898 * xas_init_marks() - Initialise all marks for the entry
899 * @xas: Array operations state.
900 *
901 * Initialise all marks for the entry specified by @xas. If we're tracking
902 * free entries with a mark, we need to set it on all entries. All other
903 * marks are cleared.
904 *
905 * This implementation is not as efficient as it could be; we may walk
906 * up the tree multiple times.
907 */
908void xas_init_marks(const struct xa_state *xas)
909{
910 xa_mark_t mark = 0;
911
912 for (;;) {
913 if (xa_track_free(xas->xa) && mark == XA_FREE_MARK)
914 xas_set_mark(xas, mark);
915 else
916 xas_clear_mark(xas, mark);
917 if (mark == XA_MARK_MAX)
918 break;
919 mark_inc(mark);
920 }
921}
922EXPORT_SYMBOL_GPL(xas_init_marks);
923
924/**
925 * xas_pause() - Pause a walk to drop a lock.
926 * @xas: XArray operation state.
927 *
928 * Some users need to pause a walk and drop the lock they're holding in
929 * order to yield to a higher priority thread or carry out an operation
930 * on an entry. Those users should call this function before they drop
931 * the lock. It resets the @xas to be suitable for the next iteration
932 * of the loop after the user has reacquired the lock. If most entries
933 * found during a walk require you to call xas_pause(), the xa_for_each()
934 * iterator may be more appropriate.
935 *
936 * Note that xas_pause() only works for forward iteration. If a user needs
937 * to pause a reverse iteration, we will need a xas_pause_rev().
938 */
939void xas_pause(struct xa_state *xas)
940{
941 struct xa_node *node = xas->xa_node;
942
943 if (xas_invalid(xas))
944 return;
945
946 if (node) {
947 unsigned int offset = xas->xa_offset;
948 while (++offset < XA_CHUNK_SIZE) {
949 if (!xa_is_sibling(xa_entry(xas->xa, node, offset)))
950 break;
951 }
952 xas->xa_index += (offset - xas->xa_offset) << node->shift;
953 } else {
954 xas->xa_index++;
955 }
956 xas->xa_node = XAS_RESTART;
957}
958EXPORT_SYMBOL_GPL(xas_pause);
959
960/*
961 * __xas_prev() - Find the previous entry in the XArray.
962 * @xas: XArray operation state.
963 *
964 * Helper function for xas_prev() which handles all the complex cases
965 * out of line.
966 */
967void *__xas_prev(struct xa_state *xas)
968{
969 void *entry;
970
971 if (!xas_frozen(xas->xa_node))
972 xas->xa_index--;
973 if (xas_not_node(xas->xa_node))
974 return xas_load(xas);
975
976 if (xas->xa_offset != get_offset(xas->xa_index, xas->xa_node))
977 xas->xa_offset--;
978
979 while (xas->xa_offset == 255) {
980 xas->xa_offset = xas->xa_node->offset - 1;
981 xas->xa_node = xa_parent(xas->xa, xas->xa_node);
982 if (!xas->xa_node)
983 return set_bounds(xas);
984 }
985
986 for (;;) {
987 entry = xa_entry(xas->xa, xas->xa_node, xas->xa_offset);
988 if (!xa_is_node(entry))
989 return entry;
990
991 xas->xa_node = xa_to_node(entry);
992 xas_set_offset(xas);
993 }
994}
995EXPORT_SYMBOL_GPL(__xas_prev);
996
997/*
998 * __xas_next() - Find the next entry in the XArray.
999 * @xas: XArray operation state.
1000 *
1001 * Helper function for xas_next() which handles all the complex cases
1002 * out of line.
1003 */
1004void *__xas_next(struct xa_state *xas)
1005{
1006 void *entry;
1007
1008 if (!xas_frozen(xas->xa_node))
1009 xas->xa_index++;
1010 if (xas_not_node(xas->xa_node))
1011 return xas_load(xas);
1012
1013 if (xas->xa_offset != get_offset(xas->xa_index, xas->xa_node))
1014 xas->xa_offset++;
1015
1016 while (xas->xa_offset == XA_CHUNK_SIZE) {
1017 xas->xa_offset = xas->xa_node->offset + 1;
1018 xas->xa_node = xa_parent(xas->xa, xas->xa_node);
1019 if (!xas->xa_node)
1020 return set_bounds(xas);
1021 }
1022
1023 for (;;) {
1024 entry = xa_entry(xas->xa, xas->xa_node, xas->xa_offset);
1025 if (!xa_is_node(entry))
1026 return entry;
1027
1028 xas->xa_node = xa_to_node(entry);
1029 xas_set_offset(xas);
1030 }
1031}
1032EXPORT_SYMBOL_GPL(__xas_next);
1033
1034/**
1035 * xas_find() - Find the next present entry in the XArray.
1036 * @xas: XArray operation state.
1037 * @max: Highest index to return.
1038 *
1039 * If the @xas has not yet been walked to an entry, return the entry
1040 * which has an index >= xas.xa_index. If it has been walked, the entry
1041 * currently being pointed at has been processed, and so we move to the
1042 * next entry.
1043 *
1044 * If no entry is found and the array is smaller than @max, the iterator
1045 * is set to the smallest index not yet in the array. This allows @xas
1046 * to be immediately passed to xas_store().
1047 *
1048 * Return: The entry, if found, otherwise %NULL.
1049 */
1050void *xas_find(struct xa_state *xas, unsigned long max)
1051{
1052 void *entry;
1053
1054 if (xas_error(xas))
1055 return NULL;
1056
1057 if (!xas->xa_node) {
1058 xas->xa_index = 1;
1059 return set_bounds(xas);
1060 } else if (xas_top(xas->xa_node)) {
1061 entry = xas_load(xas);
1062 if (entry || xas_not_node(xas->xa_node))
1063 return entry;
1064 } else if (!xas->xa_node->shift &&
1065 xas->xa_offset != (xas->xa_index & XA_CHUNK_MASK)) {
1066 xas->xa_offset = ((xas->xa_index - 1) & XA_CHUNK_MASK) + 1;
1067 }
1068
1069 xas_advance(xas);
1070
1071 while (xas->xa_node && (xas->xa_index <= max)) {
1072 if (unlikely(xas->xa_offset == XA_CHUNK_SIZE)) {
1073 xas->xa_offset = xas->xa_node->offset + 1;
1074 xas->xa_node = xa_parent(xas->xa, xas->xa_node);
1075 continue;
1076 }
1077
1078 entry = xa_entry(xas->xa, xas->xa_node, xas->xa_offset);
1079 if (xa_is_node(entry)) {
1080 xas->xa_node = xa_to_node(entry);
1081 xas->xa_offset = 0;
1082 continue;
1083 }
1084 if (entry && !xa_is_sibling(entry))
1085 return entry;
1086
1087 xas_advance(xas);
1088 }
1089
1090 if (!xas->xa_node)
1091 xas->xa_node = XAS_BOUNDS;
1092 return NULL;
1093}
1094EXPORT_SYMBOL_GPL(xas_find);
1095
1096/**
1097 * xas_find_marked() - Find the next marked entry in the XArray.
1098 * @xas: XArray operation state.
1099 * @max: Highest index to return.
1100 * @mark: Mark number to search for.
1101 *
1102 * If the @xas has not yet been walked to an entry, return the marked entry
1103 * which has an index >= xas.xa_index. If it has been walked, the entry
1104 * currently being pointed at has been processed, and so we return the
1105 * first marked entry with an index > xas.xa_index.
1106 *
1107 * If no marked entry is found and the array is smaller than @max, @xas is
1108 * set to the bounds state and xas->xa_index is set to the smallest index
1109 * not yet in the array. This allows @xas to be immediately passed to
1110 * xas_store().
1111 *
1112 * If no entry is found before @max is reached, @xas is set to the restart
1113 * state.
1114 *
1115 * Return: The entry, if found, otherwise %NULL.
1116 */
1117void *xas_find_marked(struct xa_state *xas, unsigned long max, xa_mark_t mark)
1118{
1119 bool advance = true;
1120 unsigned int offset;
1121 void *entry;
1122
1123 if (xas_error(xas))
1124 return NULL;
1125
1126 if (!xas->xa_node) {
1127 xas->xa_index = 1;
1128 goto out;
1129 } else if (xas_top(xas->xa_node)) {
1130 advance = false;
1131 entry = xa_head(xas->xa);
1132 xas->xa_node = NULL;
1133 if (xas->xa_index > max_index(entry))
1134 goto bounds;
1135 if (!xa_is_node(entry)) {
1136 if (xa_marked(xas->xa, mark))
1137 return entry;
1138 xas->xa_index = 1;
1139 goto out;
1140 }
1141 xas->xa_node = xa_to_node(entry);
1142 xas->xa_offset = xas->xa_index >> xas->xa_node->shift;
1143 }
1144
1145 while (xas->xa_index <= max) {
1146 if (unlikely(xas->xa_offset == XA_CHUNK_SIZE)) {
1147 xas->xa_offset = xas->xa_node->offset + 1;
1148 xas->xa_node = xa_parent(xas->xa, xas->xa_node);
1149 if (!xas->xa_node)
1150 break;
1151 advance = false;
1152 continue;
1153 }
1154
1155 if (!advance) {
1156 entry = xa_entry(xas->xa, xas->xa_node, xas->xa_offset);
1157 if (xa_is_sibling(entry)) {
1158 xas->xa_offset = xa_to_sibling(entry);
1159 xas_move_index(xas, xas->xa_offset);
1160 }
1161 }
1162
1163 offset = xas_find_chunk(xas, advance, mark);
1164 if (offset > xas->xa_offset) {
1165 advance = false;
1166 xas_move_index(xas, offset);
1167 /* Mind the wrap */
1168 if ((xas->xa_index - 1) >= max)
1169 goto max;
1170 xas->xa_offset = offset;
1171 if (offset == XA_CHUNK_SIZE)
1172 continue;
1173 }
1174
1175 entry = xa_entry(xas->xa, xas->xa_node, xas->xa_offset);
1176 if (!xa_is_node(entry))
1177 return entry;
1178 xas->xa_node = xa_to_node(entry);
1179 xas_set_offset(xas);
1180 }
1181
1182out:
1183 if (!max)
1184 goto max;
1185bounds:
1186 xas->xa_node = XAS_BOUNDS;
1187 return NULL;
1188max:
1189 xas->xa_node = XAS_RESTART;
1190 return NULL;
1191}
1192EXPORT_SYMBOL_GPL(xas_find_marked);
1193
1194/**
1195 * xas_find_conflict() - Find the next present entry in a range.
1196 * @xas: XArray operation state.
1197 *
1198 * The @xas describes both a range and a position within that range.
1199 *
1200 * Context: Any context. Expects xa_lock to be held.
1201 * Return: The next entry in the range covered by @xas or %NULL.
1202 */
1203void *xas_find_conflict(struct xa_state *xas)
1204{
1205 void *curr;
1206
1207 if (xas_error(xas))
1208 return NULL;
1209
1210 if (!xas->xa_node)
1211 return NULL;
1212
1213 if (xas_top(xas->xa_node)) {
1214 curr = xas_start(xas);
1215 if (!curr)
1216 return NULL;
1217 while (xa_is_node(curr)) {
1218 struct xa_node *node = xa_to_node(curr);
1219 curr = xas_descend(xas, node);
1220 }
1221 if (curr)
1222 return curr;
1223 }
1224
1225 if (xas->xa_node->shift > xas->xa_shift)
1226 return NULL;
1227
1228 for (;;) {
1229 if (xas->xa_node->shift == xas->xa_shift) {
1230 if ((xas->xa_offset & xas->xa_sibs) == xas->xa_sibs)
1231 break;
1232 } else if (xas->xa_offset == XA_CHUNK_MASK) {
1233 xas->xa_offset = xas->xa_node->offset;
1234 xas->xa_node = xa_parent_locked(xas->xa, xas->xa_node);
1235 if (!xas->xa_node)
1236 break;
1237 continue;
1238 }
1239 curr = xa_entry_locked(xas->xa, xas->xa_node, ++xas->xa_offset);
1240 if (xa_is_sibling(curr))
1241 continue;
1242 while (xa_is_node(curr)) {
1243 xas->xa_node = xa_to_node(curr);
1244 xas->xa_offset = 0;
1245 curr = xa_entry_locked(xas->xa, xas->xa_node, 0);
1246 }
1247 if (curr)
1248 return curr;
1249 }
1250 xas->xa_offset -= xas->xa_sibs;
1251 return NULL;
1252}
1253EXPORT_SYMBOL_GPL(xas_find_conflict);
1254
1255/**
1256 * xa_init_flags() - Initialise an empty XArray with flags.
1257 * @xa: XArray.
1258 * @flags: XA_FLAG values.
1259 *
1260 * If you need to initialise an XArray with special flags (eg you need
1261 * to take the lock from interrupt context), use this function instead
1262 * of xa_init().
1263 *
1264 * Context: Any context.
1265 */
1266void xa_init_flags(struct xarray *xa, gfp_t flags)
1267{
1268 unsigned int lock_type;
1269 static struct lock_class_key xa_lock_irq;
1270 static struct lock_class_key xa_lock_bh;
1271
1272 spin_lock_init(&xa->xa_lock);
1273 xa->xa_flags = flags;
1274 xa->xa_head = NULL;
1275
1276 lock_type = xa_lock_type(xa);
1277 if (lock_type == XA_LOCK_IRQ)
1278 lockdep_set_class(&xa->xa_lock, &xa_lock_irq);
1279 else if (lock_type == XA_LOCK_BH)
1280 lockdep_set_class(&xa->xa_lock, &xa_lock_bh);
1281}
1282EXPORT_SYMBOL(xa_init_flags);
1283
1284/**
1285 * xa_load() - Load an entry from an XArray.
1286 * @xa: XArray.
1287 * @index: index into array.
1288 *
1289 * Context: Any context. Takes and releases the RCU lock.
1290 * Return: The entry at @index in @xa.
1291 */
1292void *xa_load(struct xarray *xa, unsigned long index)
1293{
1294 XA_STATE(xas, xa, index);
1295 void *entry;
1296
1297 rcu_read_lock();
1298 do {
1299 entry = xas_load(&xas);
1300 if (xa_is_zero(entry))
1301 entry = NULL;
1302 } while (xas_retry(&xas, entry));
1303 rcu_read_unlock();
1304
1305 return entry;
1306}
1307EXPORT_SYMBOL(xa_load);
1308
1309static void *xas_result(struct xa_state *xas, void *curr)
1310{
1311 if (xa_is_zero(curr))
1312 return NULL;
1313 XA_NODE_BUG_ON(xas->xa_node, xa_is_internal(curr));
1314 if (xas_error(xas))
1315 curr = xas->xa_node;
1316 return curr;
1317}
1318
1319/**
1320 * __xa_erase() - Erase this entry from the XArray while locked.
1321 * @xa: XArray.
1322 * @index: Index into array.
1323 *
1324 * If the entry at this index is a multi-index entry then all indices will
1325 * be erased, and the entry will no longer be a multi-index entry.
1326 * This function expects the xa_lock to be held on entry.
1327 *
1328 * Context: Any context. Expects xa_lock to be held on entry. May
1329 * release and reacquire xa_lock if @gfp flags permit.
1330 * Return: The old entry at this index.
1331 */
1332void *__xa_erase(struct xarray *xa, unsigned long index)
1333{
1334 XA_STATE(xas, xa, index);
1335 return xas_result(&xas, xas_store(&xas, NULL));
1336}
1337EXPORT_SYMBOL_GPL(__xa_erase);
1338
1339/**
1340 * xa_store() - Store this entry in the XArray.
1341 * @xa: XArray.
1342 * @index: Index into array.
1343 * @entry: New entry.
1344 * @gfp: Memory allocation flags.
1345 *
1346 * After this function returns, loads from this index will return @entry.
1347 * Storing into an existing multislot entry updates the entry of every index.
1348 * The marks associated with @index are unaffected unless @entry is %NULL.
1349 *
1350 * Context: Process context. Takes and releases the xa_lock. May sleep
1351 * if the @gfp flags permit.
1352 * Return: The old entry at this index on success, xa_err(-EINVAL) if @entry
1353 * cannot be stored in an XArray, or xa_err(-ENOMEM) if memory allocation
1354 * failed.
1355 */
1356void *xa_store(struct xarray *xa, unsigned long index, void *entry, gfp_t gfp)
1357{
1358 XA_STATE(xas, xa, index);
1359 void *curr;
1360
1361 if (WARN_ON_ONCE(xa_is_internal(entry)))
1362 return XA_ERROR(-EINVAL);
1363
1364 do {
1365 xas_lock(&xas);
1366 curr = xas_store(&xas, entry);
1367 if (xa_track_free(xa) && entry)
1368 xas_clear_mark(&xas, XA_FREE_MARK);
1369 xas_unlock(&xas);
1370 } while (xas_nomem(&xas, gfp));
1371
1372 return xas_result(&xas, curr);
1373}
1374EXPORT_SYMBOL(xa_store);
1375
1376/**
1377 * __xa_store() - Store this entry in the XArray.
1378 * @xa: XArray.
1379 * @index: Index into array.
1380 * @entry: New entry.
1381 * @gfp: Memory allocation flags.
1382 *
1383 * You must already be holding the xa_lock when calling this function.
1384 * It will drop the lock if needed to allocate memory, and then reacquire
1385 * it afterwards.
1386 *
1387 * Context: Any context. Expects xa_lock to be held on entry. May
1388 * release and reacquire xa_lock if @gfp flags permit.
1389 * Return: The old entry at this index or xa_err() if an error happened.
1390 */
1391void *__xa_store(struct xarray *xa, unsigned long index, void *entry, gfp_t gfp)
1392{
1393 XA_STATE(xas, xa, index);
1394 void *curr;
1395
1396 if (WARN_ON_ONCE(xa_is_internal(entry)))
1397 return XA_ERROR(-EINVAL);
1398
1399 do {
1400 curr = xas_store(&xas, entry);
1401 if (xa_track_free(xa) && entry)
1402 xas_clear_mark(&xas, XA_FREE_MARK);
1403 } while (__xas_nomem(&xas, gfp));
1404
1405 return xas_result(&xas, curr);
1406}
1407EXPORT_SYMBOL(__xa_store);
1408
1409/**
1410 * xa_cmpxchg() - Conditionally replace an entry in the XArray.
1411 * @xa: XArray.
1412 * @index: Index into array.
1413 * @old: Old value to test against.
1414 * @entry: New value to place in array.
1415 * @gfp: Memory allocation flags.
1416 *
1417 * If the entry at @index is the same as @old, replace it with @entry.
1418 * If the return value is equal to @old, then the exchange was successful.
1419 *
1420 * Context: Process context. Takes and releases the xa_lock. May sleep
1421 * if the @gfp flags permit.
1422 * Return: The old value at this index or xa_err() if an error happened.
1423 */
1424void *xa_cmpxchg(struct xarray *xa, unsigned long index,
1425 void *old, void *entry, gfp_t gfp)
1426{
1427 XA_STATE(xas, xa, index);
1428 void *curr;
1429
1430 if (WARN_ON_ONCE(xa_is_internal(entry)))
1431 return XA_ERROR(-EINVAL);
1432
1433 do {
1434 xas_lock(&xas);
1435 curr = xas_load(&xas);
1436 if (curr == XA_ZERO_ENTRY)
1437 curr = NULL;
1438 if (curr == old) {
1439 xas_store(&xas, entry);
1440 if (xa_track_free(xa) && entry)
1441 xas_clear_mark(&xas, XA_FREE_MARK);
1442 }
1443 xas_unlock(&xas);
1444 } while (xas_nomem(&xas, gfp));
1445
1446 return xas_result(&xas, curr);
1447}
1448EXPORT_SYMBOL(xa_cmpxchg);
1449
1450/**
1451 * __xa_cmpxchg() - Store this entry in the XArray.
1452 * @xa: XArray.
1453 * @index: Index into array.
1454 * @old: Old value to test against.
1455 * @entry: New entry.
1456 * @gfp: Memory allocation flags.
1457 *
1458 * You must already be holding the xa_lock when calling this function.
1459 * It will drop the lock if needed to allocate memory, and then reacquire
1460 * it afterwards.
1461 *
1462 * Context: Any context. Expects xa_lock to be held on entry. May
1463 * release and reacquire xa_lock if @gfp flags permit.
1464 * Return: The old entry at this index or xa_err() if an error happened.
1465 */
1466void *__xa_cmpxchg(struct xarray *xa, unsigned long index,
1467 void *old, void *entry, gfp_t gfp)
1468{
1469 XA_STATE(xas, xa, index);
1470 void *curr;
1471
1472 if (WARN_ON_ONCE(xa_is_internal(entry)))
1473 return XA_ERROR(-EINVAL);
1474
1475 do {
1476 curr = xas_load(&xas);
1477 if (curr == XA_ZERO_ENTRY)
1478 curr = NULL;
1479 if (curr == old) {
1480 xas_store(&xas, entry);
1481 if (xa_track_free(xa) && entry)
1482 xas_clear_mark(&xas, XA_FREE_MARK);
1483 }
1484 } while (__xas_nomem(&xas, gfp));
1485
1486 return xas_result(&xas, curr);
1487}
1488EXPORT_SYMBOL(__xa_cmpxchg);
1489
1490/**
1491 * xa_reserve() - Reserve this index in the XArray.
1492 * @xa: XArray.
1493 * @index: Index into array.
1494 * @gfp: Memory allocation flags.
1495 *
1496 * Ensures there is somewhere to store an entry at @index in the array.
1497 * If there is already something stored at @index, this function does
1498 * nothing. If there was nothing there, the entry is marked as reserved.
1499 * Loads from @index will continue to see a %NULL pointer until a
1500 * subsequent store to @index.
1501 *
1502 * If you do not use the entry that you have reserved, call xa_release()
1503 * or xa_erase() to free any unnecessary memory.
1504 *
1505 * Context: Process context. Takes and releases the xa_lock, IRQ or BH safe
1506 * if specified in XArray flags. May sleep if the @gfp flags permit.
1507 * Return: 0 if the reservation succeeded or -ENOMEM if it failed.
1508 */
1509int xa_reserve(struct xarray *xa, unsigned long index, gfp_t gfp)
1510{
1511 XA_STATE(xas, xa, index);
1512 unsigned int lock_type = xa_lock_type(xa);
1513 void *curr;
1514
1515 do {
1516 xas_lock_type(&xas, lock_type);
1517 curr = xas_load(&xas);
1518 if (!curr)
1519 xas_store(&xas, XA_ZERO_ENTRY);
1520 xas_unlock_type(&xas, lock_type);
1521 } while (xas_nomem(&xas, gfp));
1522
1523 return xas_error(&xas);
1524}
1525EXPORT_SYMBOL(xa_reserve);
1526
1527#ifdef CONFIG_XARRAY_MULTI
1528static void xas_set_range(struct xa_state *xas, unsigned long first,
1529 unsigned long last)
1530{
1531 unsigned int shift = 0;
1532 unsigned long sibs = last - first;
1533 unsigned int offset = XA_CHUNK_MASK;
1534
1535 xas_set(xas, first);
1536
1537 while ((first & XA_CHUNK_MASK) == 0) {
1538 if (sibs < XA_CHUNK_MASK)
1539 break;
1540 if ((sibs == XA_CHUNK_MASK) && (offset < XA_CHUNK_MASK))
1541 break;
1542 shift += XA_CHUNK_SHIFT;
1543 if (offset == XA_CHUNK_MASK)
1544 offset = sibs & XA_CHUNK_MASK;
1545 sibs >>= XA_CHUNK_SHIFT;
1546 first >>= XA_CHUNK_SHIFT;
1547 }
1548
1549 offset = first & XA_CHUNK_MASK;
1550 if (offset + sibs > XA_CHUNK_MASK)
1551 sibs = XA_CHUNK_MASK - offset;
1552 if ((((first + sibs + 1) << shift) - 1) > last)
1553 sibs -= 1;
1554
1555 xas->xa_shift = shift;
1556 xas->xa_sibs = sibs;
1557}
1558
1559/**
1560 * xa_store_range() - Store this entry at a range of indices in the XArray.
1561 * @xa: XArray.
1562 * @first: First index to affect.
1563 * @last: Last index to affect.
1564 * @entry: New entry.
1565 * @gfp: Memory allocation flags.
1566 *
1567 * After this function returns, loads from any index between @first and @last,
1568 * inclusive will return @entry.
1569 * Storing into an existing multislot entry updates the entry of every index.
1570 * The marks associated with @index are unaffected unless @entry is %NULL.
1571 *
1572 * Context: Process context. Takes and releases the xa_lock. May sleep
1573 * if the @gfp flags permit.
1574 * Return: %NULL on success, xa_err(-EINVAL) if @entry cannot be stored in
1575 * an XArray, or xa_err(-ENOMEM) if memory allocation failed.
1576 */
1577void *xa_store_range(struct xarray *xa, unsigned long first,
1578 unsigned long last, void *entry, gfp_t gfp)
1579{
1580 XA_STATE(xas, xa, 0);
1581
1582 if (WARN_ON_ONCE(xa_is_internal(entry)))
1583 return XA_ERROR(-EINVAL);
1584 if (last < first)
1585 return XA_ERROR(-EINVAL);
1586
1587 do {
1588 xas_lock(&xas);
1589 if (entry) {
1590 unsigned int order = (last == ~0UL) ? 64 :
1591 ilog2(last + 1);
1592 xas_set_order(&xas, last, order);
1593 xas_create(&xas);
1594 if (xas_error(&xas))
1595 goto unlock;
1596 }
1597 do {
1598 xas_set_range(&xas, first, last);
1599 xas_store(&xas, entry);
1600 if (xas_error(&xas))
1601 goto unlock;
1602 first += xas_size(&xas);
1603 } while (first <= last);
1604unlock:
1605 xas_unlock(&xas);
1606 } while (xas_nomem(&xas, gfp));
1607
1608 return xas_result(&xas, NULL);
1609}
1610EXPORT_SYMBOL(xa_store_range);
1611#endif /* CONFIG_XARRAY_MULTI */
1612
1613/**
1614 * __xa_alloc() - Find somewhere to store this entry in the XArray.
1615 * @xa: XArray.
1616 * @id: Pointer to ID.
1617 * @max: Maximum ID to allocate (inclusive).
1618 * @entry: New entry.
1619 * @gfp: Memory allocation flags.
1620 *
1621 * Allocates an unused ID in the range specified by @id and @max.
1622 * Updates the @id pointer with the index, then stores the entry at that
1623 * index. A concurrent lookup will not see an uninitialised @id.
1624 *
1625 * Context: Any context. Expects xa_lock to be held on entry. May
1626 * release and reacquire xa_lock if @gfp flags permit.
1627 * Return: 0 on success, -ENOMEM if memory allocation fails or -ENOSPC if
1628 * there is no more space in the XArray.
1629 */
1630int __xa_alloc(struct xarray *xa, u32 *id, u32 max, void *entry, gfp_t gfp)
1631{
1632 XA_STATE(xas, xa, 0);
1633 int err;
1634
1635 if (WARN_ON_ONCE(xa_is_internal(entry)))
1636 return -EINVAL;
1637 if (WARN_ON_ONCE(!xa_track_free(xa)))
1638 return -EINVAL;
1639
1640 if (!entry)
1641 entry = XA_ZERO_ENTRY;
1642
1643 do {
1644 xas.xa_index = *id;
1645 xas_find_marked(&xas, max, XA_FREE_MARK);
1646 if (xas.xa_node == XAS_RESTART)
1647 xas_set_err(&xas, -ENOSPC);
1648 xas_store(&xas, entry);
1649 xas_clear_mark(&xas, XA_FREE_MARK);
1650 } while (__xas_nomem(&xas, gfp));
1651
1652 err = xas_error(&xas);
1653 if (!err)
1654 *id = xas.xa_index;
1655 return err;
1656}
1657EXPORT_SYMBOL(__xa_alloc);
1658
1659/**
1660 * __xa_set_mark() - Set this mark on this entry while locked.
1661 * @xa: XArray.
1662 * @index: Index of entry.
1663 * @mark: Mark number.
1664 *
1665 * Attempting to set a mark on a NULL entry does not succeed.
1666 *
1667 * Context: Any context. Expects xa_lock to be held on entry.
1668 */
1669void __xa_set_mark(struct xarray *xa, unsigned long index, xa_mark_t mark)
1670{
1671 XA_STATE(xas, xa, index);
1672 void *entry = xas_load(&xas);
1673
1674 if (entry)
1675 xas_set_mark(&xas, mark);
1676}
1677EXPORT_SYMBOL_GPL(__xa_set_mark);
1678
1679/**
1680 * __xa_clear_mark() - Clear this mark on this entry while locked.
1681 * @xa: XArray.
1682 * @index: Index of entry.
1683 * @mark: Mark number.
1684 *
1685 * Context: Any context. Expects xa_lock to be held on entry.
1686 */
1687void __xa_clear_mark(struct xarray *xa, unsigned long index, xa_mark_t mark)
1688{
1689 XA_STATE(xas, xa, index);
1690 void *entry = xas_load(&xas);
1691
1692 if (entry)
1693 xas_clear_mark(&xas, mark);
1694}
1695EXPORT_SYMBOL_GPL(__xa_clear_mark);
1696
1697/**
1698 * xa_get_mark() - Inquire whether this mark is set on this entry.
1699 * @xa: XArray.
1700 * @index: Index of entry.
1701 * @mark: Mark number.
1702 *
1703 * This function uses the RCU read lock, so the result may be out of date
1704 * by the time it returns. If you need the result to be stable, use a lock.
1705 *
1706 * Context: Any context. Takes and releases the RCU lock.
1707 * Return: True if the entry at @index has this mark set, false if it doesn't.
1708 */
1709bool xa_get_mark(struct xarray *xa, unsigned long index, xa_mark_t mark)
1710{
1711 XA_STATE(xas, xa, index);
1712 void *entry;
1713
1714 rcu_read_lock();
1715 entry = xas_start(&xas);
1716 while (xas_get_mark(&xas, mark)) {
1717 if (!xa_is_node(entry))
1718 goto found;
1719 entry = xas_descend(&xas, xa_to_node(entry));
1720 }
1721 rcu_read_unlock();
1722 return false;
1723 found:
1724 rcu_read_unlock();
1725 return true;
1726}
1727EXPORT_SYMBOL(xa_get_mark);
1728
1729/**
1730 * xa_set_mark() - Set this mark on this entry.
1731 * @xa: XArray.
1732 * @index: Index of entry.
1733 * @mark: Mark number.
1734 *
1735 * Attempting to set a mark on a NULL entry does not succeed.
1736 *
1737 * Context: Process context. Takes and releases the xa_lock.
1738 */
1739void xa_set_mark(struct xarray *xa, unsigned long index, xa_mark_t mark)
1740{
1741 xa_lock(xa);
1742 __xa_set_mark(xa, index, mark);
1743 xa_unlock(xa);
1744}
1745EXPORT_SYMBOL(xa_set_mark);
1746
1747/**
1748 * xa_clear_mark() - Clear this mark on this entry.
1749 * @xa: XArray.
1750 * @index: Index of entry.
1751 * @mark: Mark number.
1752 *
1753 * Clearing a mark always succeeds.
1754 *
1755 * Context: Process context. Takes and releases the xa_lock.
1756 */
1757void xa_clear_mark(struct xarray *xa, unsigned long index, xa_mark_t mark)
1758{
1759 xa_lock(xa);
1760 __xa_clear_mark(xa, index, mark);
1761 xa_unlock(xa);
1762}
1763EXPORT_SYMBOL(xa_clear_mark);
1764
1765/**
1766 * xa_find() - Search the XArray for an entry.
1767 * @xa: XArray.
1768 * @indexp: Pointer to an index.
1769 * @max: Maximum index to search to.
1770 * @filter: Selection criterion.
1771 *
1772 * Finds the entry in @xa which matches the @filter, and has the lowest
1773 * index that is at least @indexp and no more than @max.
1774 * If an entry is found, @indexp is updated to be the index of the entry.
1775 * This function is protected by the RCU read lock, so it may not find
1776 * entries which are being simultaneously added. It will not return an
1777 * %XA_RETRY_ENTRY; if you need to see retry entries, use xas_find().
1778 *
1779 * Context: Any context. Takes and releases the RCU lock.
1780 * Return: The entry, if found, otherwise %NULL.
1781 */
1782void *xa_find(struct xarray *xa, unsigned long *indexp,
1783 unsigned long max, xa_mark_t filter)
1784{
1785 XA_STATE(xas, xa, *indexp);
1786 void *entry;
1787
1788 rcu_read_lock();
1789 do {
1790 if ((__force unsigned int)filter < XA_MAX_MARKS)
1791 entry = xas_find_marked(&xas, max, filter);
1792 else
1793 entry = xas_find(&xas, max);
1794 } while (xas_retry(&xas, entry));
1795 rcu_read_unlock();
1796
1797 if (entry)
1798 *indexp = xas.xa_index;
1799 return entry;
1800}
1801EXPORT_SYMBOL(xa_find);
1802
1803/**
1804 * xa_find_after() - Search the XArray for a present entry.
1805 * @xa: XArray.
1806 * @indexp: Pointer to an index.
1807 * @max: Maximum index to search to.
1808 * @filter: Selection criterion.
1809 *
1810 * Finds the entry in @xa which matches the @filter and has the lowest
1811 * index that is above @indexp and no more than @max.
1812 * If an entry is found, @indexp is updated to be the index of the entry.
1813 * This function is protected by the RCU read lock, so it may miss entries
1814 * which are being simultaneously added. It will not return an
1815 * %XA_RETRY_ENTRY; if you need to see retry entries, use xas_find().
1816 *
1817 * Context: Any context. Takes and releases the RCU lock.
1818 * Return: The pointer, if found, otherwise %NULL.
1819 */
1820void *xa_find_after(struct xarray *xa, unsigned long *indexp,
1821 unsigned long max, xa_mark_t filter)
1822{
1823 XA_STATE(xas, xa, *indexp + 1);
1824 void *entry;
1825
1826 rcu_read_lock();
1827 for (;;) {
1828 if ((__force unsigned int)filter < XA_MAX_MARKS)
1829 entry = xas_find_marked(&xas, max, filter);
1830 else
1831 entry = xas_find(&xas, max);
1832 if (xas.xa_shift) {
1833 if (xas.xa_index & ((1UL << xas.xa_shift) - 1))
1834 continue;
1835 } else {
1836 if (xas.xa_offset < (xas.xa_index & XA_CHUNK_MASK))
1837 continue;
1838 }
1839 if (!xas_retry(&xas, entry))
1840 break;
1841 }
1842 rcu_read_unlock();
1843
1844 if (entry)
1845 *indexp = xas.xa_index;
1846 return entry;
1847}
1848EXPORT_SYMBOL(xa_find_after);
1849
1850static unsigned int xas_extract_present(struct xa_state *xas, void **dst,
1851 unsigned long max, unsigned int n)
1852{
1853 void *entry;
1854 unsigned int i = 0;
1855
1856 rcu_read_lock();
1857 xas_for_each(xas, entry, max) {
1858 if (xas_retry(xas, entry))
1859 continue;
1860 dst[i++] = entry;
1861 if (i == n)
1862 break;
1863 }
1864 rcu_read_unlock();
1865
1866 return i;
1867}
1868
1869static unsigned int xas_extract_marked(struct xa_state *xas, void **dst,
1870 unsigned long max, unsigned int n, xa_mark_t mark)
1871{
1872 void *entry;
1873 unsigned int i = 0;
1874
1875 rcu_read_lock();
1876 xas_for_each_marked(xas, entry, max, mark) {
1877 if (xas_retry(xas, entry))
1878 continue;
1879 dst[i++] = entry;
1880 if (i == n)
1881 break;
1882 }
1883 rcu_read_unlock();
1884
1885 return i;
1886}
1887
1888/**
1889 * xa_extract() - Copy selected entries from the XArray into a normal array.
1890 * @xa: The source XArray to copy from.
1891 * @dst: The buffer to copy entries into.
1892 * @start: The first index in the XArray eligible to be selected.
1893 * @max: The last index in the XArray eligible to be selected.
1894 * @n: The maximum number of entries to copy.
1895 * @filter: Selection criterion.
1896 *
1897 * Copies up to @n entries that match @filter from the XArray. The
1898 * copied entries will have indices between @start and @max, inclusive.
1899 *
1900 * The @filter may be an XArray mark value, in which case entries which are
1901 * marked with that mark will be copied. It may also be %XA_PRESENT, in
1902 * which case all entries which are not NULL will be copied.
1903 *
1904 * The entries returned may not represent a snapshot of the XArray at a
1905 * moment in time. For example, if another thread stores to index 5, then
1906 * index 10, calling xa_extract() may return the old contents of index 5
1907 * and the new contents of index 10. Indices not modified while this
1908 * function is running will not be skipped.
1909 *
1910 * If you need stronger guarantees, holding the xa_lock across calls to this
1911 * function will prevent concurrent modification.
1912 *
1913 * Context: Any context. Takes and releases the RCU lock.
1914 * Return: The number of entries copied.
1915 */
1916unsigned int xa_extract(struct xarray *xa, void **dst, unsigned long start,
1917 unsigned long max, unsigned int n, xa_mark_t filter)
1918{
1919 XA_STATE(xas, xa, start);
1920
1921 if (!n)
1922 return 0;
1923
1924 if ((__force unsigned int)filter < XA_MAX_MARKS)
1925 return xas_extract_marked(&xas, dst, max, n, filter);
1926 return xas_extract_present(&xas, dst, max, n);
1927}
1928EXPORT_SYMBOL(xa_extract);
1929
1930/**
1931 * xa_destroy() - Free all internal data structures.
1932 * @xa: XArray.
1933 *
1934 * After calling this function, the XArray is empty and has freed all memory
1935 * allocated for its internal data structures. You are responsible for
1936 * freeing the objects referenced by the XArray.
1937 *
1938 * Context: Any context. Takes and releases the xa_lock, interrupt-safe.
1939 */
1940void xa_destroy(struct xarray *xa)
1941{
1942 XA_STATE(xas, xa, 0);
1943 unsigned long flags;
1944 void *entry;
1945
1946 xas.xa_node = NULL;
1947 xas_lock_irqsave(&xas, flags);
1948 entry = xa_head_locked(xa);
1949 RCU_INIT_POINTER(xa->xa_head, NULL);
1950 xas_init_marks(&xas);
1951 /* lockdep checks we're still holding the lock in xas_free_nodes() */
1952 if (xa_is_node(entry))
1953 xas_free_nodes(&xas, xa_to_node(entry));
1954 xas_unlock_irqrestore(&xas, flags);
1955}
1956EXPORT_SYMBOL(xa_destroy);
1957
1958#ifdef XA_DEBUG
1959void xa_dump_node(const struct xa_node *node)
1960{
1961 unsigned i, j;
1962
1963 if (!node)
1964 return;
1965 if ((unsigned long)node & 3) {
1966 pr_cont("node %px\n", node);
1967 return;
1968 }
1969
1970 pr_cont("node %px %s %d parent %px shift %d count %d values %d "
1971 "array %px list %px %px marks",
1972 node, node->parent ? "offset" : "max", node->offset,
1973 node->parent, node->shift, node->count, node->nr_values,
1974 node->array, node->private_list.prev, node->private_list.next);
1975 for (i = 0; i < XA_MAX_MARKS; i++)
1976 for (j = 0; j < XA_MARK_LONGS; j++)
1977 pr_cont(" %lx", node->marks[i][j]);
1978 pr_cont("\n");
1979}
1980
1981void xa_dump_index(unsigned long index, unsigned int shift)
1982{
1983 if (!shift)
1984 pr_info("%lu: ", index);
1985 else if (shift >= BITS_PER_LONG)
1986 pr_info("0-%lu: ", ~0UL);
1987 else
1988 pr_info("%lu-%lu: ", index, index | ((1UL << shift) - 1));
1989}
1990
1991void xa_dump_entry(const void *entry, unsigned long index, unsigned long shift)
1992{
1993 if (!entry)
1994 return;
1995
1996 xa_dump_index(index, shift);
1997
1998 if (xa_is_node(entry)) {
1999 if (shift == 0) {
2000 pr_cont("%px\n", entry);
2001 } else {
2002 unsigned long i;
2003 struct xa_node *node = xa_to_node(entry);
2004 xa_dump_node(node);
2005 for (i = 0; i < XA_CHUNK_SIZE; i++)
2006 xa_dump_entry(node->slots[i],
2007 index + (i << node->shift), node->shift);
2008 }
2009 } else if (xa_is_value(entry))
2010 pr_cont("value %ld (0x%lx) [%px]\n", xa_to_value(entry),
2011 xa_to_value(entry), entry);
2012 else if (!xa_is_internal(entry))
2013 pr_cont("%px\n", entry);
2014 else if (xa_is_retry(entry))
2015 pr_cont("retry (%ld)\n", xa_to_internal(entry));
2016 else if (xa_is_sibling(entry))
2017 pr_cont("sibling (slot %ld)\n", xa_to_sibling(entry));
2018 else if (xa_is_zero(entry))
2019 pr_cont("zero (%ld)\n", xa_to_internal(entry));
2020 else
2021 pr_cont("UNKNOWN ENTRY (%px)\n", entry);
2022}
2023
2024void xa_dump(const struct xarray *xa)
2025{
2026 void *entry = xa->xa_head;
2027 unsigned int shift = 0;
2028
2029 pr_info("xarray: %px head %px flags %x marks %d %d %d\n", xa, entry,
2030 xa->xa_flags, xa_marked(xa, XA_MARK_0),
2031 xa_marked(xa, XA_MARK_1), xa_marked(xa, XA_MARK_2));
2032 if (xa_is_node(entry))
2033 shift = xa_to_node(entry)->shift + XA_CHUNK_SHIFT;
2034 xa_dump_entry(entry, 0, shift);
2035}
2036#endif