aboutsummaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorMatthew Wilcox <willy@linux.intel.com>2016-03-17 17:21:54 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2016-03-17 18:09:34 -0400
commite61452365372570253b2b1de84bab0cdb2e62c64 (patch)
tree85a850683646afb15f7e25d3e459eb331f1811bf /lib
parent0070e28d97e72aeac2a85f538d8a452400dfe1c7 (diff)
radix_tree: add support for multi-order entries
With huge pages, it is convenient to have the radix tree be able to return an entry that covers multiple indices. Previous attempts to deal with the problem have involved inserting N duplicate entries, which is a waste of memory and leads to problems trying to handle aliased tags, or probing the tree multiple times to find alternative entries which might cover the requested index. This approach inserts one canonical entry into the tree for a given range of indices, and may also insert other entries in order to ensure that lookups find the canonical entry. This solution only tolerates inserting powers of two that are greater than the fanout of the tree. If we wish to expand the radix tree's abilities to support large-ish pages that is less than the fanout at the penultimate level of the tree, then we would need to add one more step in lookup to ensure that any sibling nodes in the final level of the tree are dereferenced and we return the canonical entry that they reference. Signed-off-by: Matthew Wilcox <willy@linux.intel.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Matthew Wilcox <willy@linux.intel.com> Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com> Cc: Ross Zwisler <ross.zwisler@linux.intel.com> Cc: Hugh Dickins <hughd@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'lib')
-rw-r--r--lib/radix-tree.c109
1 files changed, 83 insertions, 26 deletions
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 9bb440f317c9..d907dca302d5 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -333,7 +333,8 @@ static inline unsigned long radix_tree_maxindex(unsigned int height)
333/* 333/*
334 * Extend a radix tree so it can store key @index. 334 * Extend a radix tree so it can store key @index.
335 */ 335 */
336static int radix_tree_extend(struct radix_tree_root *root, unsigned long index) 336static int radix_tree_extend(struct radix_tree_root *root,
337 unsigned long index, unsigned order)
337{ 338{
338 struct radix_tree_node *node; 339 struct radix_tree_node *node;
339 struct radix_tree_node *slot; 340 struct radix_tree_node *slot;
@@ -345,7 +346,7 @@ static int radix_tree_extend(struct radix_tree_root *root, unsigned long index)
345 while (index > radix_tree_maxindex(height)) 346 while (index > radix_tree_maxindex(height))
346 height++; 347 height++;
347 348
348 if (root->rnode == NULL) { 349 if ((root->rnode == NULL) && (order == 0)) {
349 root->height = height; 350 root->height = height;
350 goto out; 351 goto out;
351 } 352 }
@@ -386,6 +387,7 @@ out:
386 * __radix_tree_create - create a slot in a radix tree 387 * __radix_tree_create - create a slot in a radix tree
387 * @root: radix tree root 388 * @root: radix tree root
388 * @index: index key 389 * @index: index key
390 * @order: index occupies 2^order aligned slots
389 * @nodep: returns node 391 * @nodep: returns node
390 * @slotp: returns slot 392 * @slotp: returns slot
391 * 393 *
@@ -399,26 +401,29 @@ out:
399 * Returns -ENOMEM, or 0 for success. 401 * Returns -ENOMEM, or 0 for success.
400 */ 402 */
401int __radix_tree_create(struct radix_tree_root *root, unsigned long index, 403int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
402 struct radix_tree_node **nodep, void ***slotp) 404 unsigned order, struct radix_tree_node **nodep,
405 void ***slotp)
403{ 406{
404 struct radix_tree_node *node = NULL, *slot; 407 struct radix_tree_node *node = NULL, *slot;
405 unsigned int height, shift, offset; 408 unsigned int height, shift, offset;
406 int error; 409 int error;
407 410
411 BUG_ON((0 < order) && (order < RADIX_TREE_MAP_SHIFT));
412
408 /* Make sure the tree is high enough. */ 413 /* Make sure the tree is high enough. */
409 if (index > radix_tree_maxindex(root->height)) { 414 if (index > radix_tree_maxindex(root->height)) {
410 error = radix_tree_extend(root, index); 415 error = radix_tree_extend(root, index, order);
411 if (error) 416 if (error)
412 return error; 417 return error;
413 } 418 }
414 419
415 slot = indirect_to_ptr(root->rnode); 420 slot = root->rnode;
416 421
417 height = root->height; 422 height = root->height;
418 shift = height * RADIX_TREE_MAP_SHIFT; 423 shift = height * RADIX_TREE_MAP_SHIFT;
419 424
420 offset = 0; /* uninitialised var warning */ 425 offset = 0; /* uninitialised var warning */
421 while (shift > 0) { 426 while (shift > order) {
422 if (slot == NULL) { 427 if (slot == NULL) {
423 /* Have to add a child node. */ 428 /* Have to add a child node. */
424 if (!(slot = radix_tree_node_alloc(root))) 429 if (!(slot = radix_tree_node_alloc(root)))
@@ -433,15 +438,31 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
433 } else 438 } else
434 rcu_assign_pointer(root->rnode, 439 rcu_assign_pointer(root->rnode,
435 ptr_to_indirect(slot)); 440 ptr_to_indirect(slot));
436 } 441 } else if (!radix_tree_is_indirect_ptr(slot))
442 break;
437 443
438 /* Go a level down */ 444 /* Go a level down */
445 height--;
439 shift -= RADIX_TREE_MAP_SHIFT; 446 shift -= RADIX_TREE_MAP_SHIFT;
440 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 447 offset = (index >> shift) & RADIX_TREE_MAP_MASK;
441 node = slot; 448 node = indirect_to_ptr(slot);
442 slot = node->slots[offset]; 449 slot = node->slots[offset];
443 slot = indirect_to_ptr(slot); 450 }
444 height--; 451
452 /* Insert pointers to the canonical entry */
453 if ((shift - order) > 0) {
454 int i, n = 1 << (shift - order);
455 offset = offset & ~(n - 1);
456 slot = ptr_to_indirect(&node->slots[offset]);
457 for (i = 0; i < n; i++) {
458 if (node->slots[offset + i])
459 return -EEXIST;
460 }
461
462 for (i = 1; i < n; i++) {
463 rcu_assign_pointer(node->slots[offset + i], slot);
464 node->count++;
465 }
445 } 466 }
446 467
447 if (nodep) 468 if (nodep)
@@ -452,15 +473,16 @@ int __radix_tree_create(struct radix_tree_root *root, unsigned long index,
452} 473}
453 474
454/** 475/**
455 * radix_tree_insert - insert into a radix tree 476 * __radix_tree_insert - insert into a radix tree
456 * @root: radix tree root 477 * @root: radix tree root
457 * @index: index key 478 * @index: index key
479 * @order: key covers the 2^order indices around index
458 * @item: item to insert 480 * @item: item to insert
459 * 481 *
460 * Insert an item into the radix tree at position @index. 482 * Insert an item into the radix tree at position @index.
461 */ 483 */
462int radix_tree_insert(struct radix_tree_root *root, 484int __radix_tree_insert(struct radix_tree_root *root, unsigned long index,
463 unsigned long index, void *item) 485 unsigned order, void *item)
464{ 486{
465 struct radix_tree_node *node; 487 struct radix_tree_node *node;
466 void **slot; 488 void **slot;
@@ -468,7 +490,7 @@ int radix_tree_insert(struct radix_tree_root *root,
468 490
469 BUG_ON(radix_tree_is_indirect_ptr(item)); 491 BUG_ON(radix_tree_is_indirect_ptr(item));
470 492
471 error = __radix_tree_create(root, index, &node, &slot); 493 error = __radix_tree_create(root, index, order, &node, &slot);
472 if (error) 494 if (error)
473 return error; 495 return error;
474 if (*slot != NULL) 496 if (*slot != NULL)
@@ -486,7 +508,7 @@ int radix_tree_insert(struct radix_tree_root *root,
486 508
487 return 0; 509 return 0;
488} 510}
489EXPORT_SYMBOL(radix_tree_insert); 511EXPORT_SYMBOL(__radix_tree_insert);
490 512
491/** 513/**
492 * __radix_tree_lookup - lookup an item in a radix tree 514 * __radix_tree_lookup - lookup an item in a radix tree
@@ -537,6 +559,8 @@ void *__radix_tree_lookup(struct radix_tree_root *root, unsigned long index,
537 node = rcu_dereference_raw(*slot); 559 node = rcu_dereference_raw(*slot);
538 if (node == NULL) 560 if (node == NULL)
539 return NULL; 561 return NULL;
562 if (!radix_tree_is_indirect_ptr(node))
563 break;
540 node = indirect_to_ptr(node); 564 node = indirect_to_ptr(node);
541 565
542 shift -= RADIX_TREE_MAP_SHIFT; 566 shift -= RADIX_TREE_MAP_SHIFT;
@@ -624,6 +648,8 @@ void *radix_tree_tag_set(struct radix_tree_root *root,
624 tag_set(slot, tag, offset); 648 tag_set(slot, tag, offset);
625 slot = slot->slots[offset]; 649 slot = slot->slots[offset];
626 BUG_ON(slot == NULL); 650 BUG_ON(slot == NULL);
651 if (!radix_tree_is_indirect_ptr(slot))
652 break;
627 slot = indirect_to_ptr(slot); 653 slot = indirect_to_ptr(slot);
628 shift -= RADIX_TREE_MAP_SHIFT; 654 shift -= RADIX_TREE_MAP_SHIFT;
629 height--; 655 height--;
@@ -669,6 +695,8 @@ void *radix_tree_tag_clear(struct radix_tree_root *root,
669 while (shift) { 695 while (shift) {
670 if (slot == NULL) 696 if (slot == NULL)
671 goto out; 697 goto out;
698 if (!radix_tree_is_indirect_ptr(slot))
699 break;
672 slot = indirect_to_ptr(slot); 700 slot = indirect_to_ptr(slot);
673 701
674 shift -= RADIX_TREE_MAP_SHIFT; 702 shift -= RADIX_TREE_MAP_SHIFT;
@@ -753,6 +781,8 @@ int radix_tree_tag_get(struct radix_tree_root *root,
753 if (height == 1) 781 if (height == 1)
754 return 1; 782 return 1;
755 node = rcu_dereference_raw(node->slots[offset]); 783 node = rcu_dereference_raw(node->slots[offset]);
784 if (!radix_tree_is_indirect_ptr(node))
785 return 1;
756 shift -= RADIX_TREE_MAP_SHIFT; 786 shift -= RADIX_TREE_MAP_SHIFT;
757 height--; 787 height--;
758 } 788 }
@@ -813,6 +843,7 @@ restart:
813 843
814 node = rnode; 844 node = rnode;
815 while (1) { 845 while (1) {
846 struct radix_tree_node *slot;
816 if ((flags & RADIX_TREE_ITER_TAGGED) ? 847 if ((flags & RADIX_TREE_ITER_TAGGED) ?
817 !test_bit(offset, node->tags[tag]) : 848 !test_bit(offset, node->tags[tag]) :
818 !node->slots[offset]) { 849 !node->slots[offset]) {
@@ -843,10 +874,12 @@ restart:
843 if (!shift) 874 if (!shift)
844 break; 875 break;
845 876
846 node = rcu_dereference_raw(node->slots[offset]); 877 slot = rcu_dereference_raw(node->slots[offset]);
847 if (node == NULL) 878 if (slot == NULL)
848 goto restart; 879 goto restart;
849 node = indirect_to_ptr(node); 880 if (!radix_tree_is_indirect_ptr(slot))
881 break;
882 node = indirect_to_ptr(slot);
850 shift -= RADIX_TREE_MAP_SHIFT; 883 shift -= RADIX_TREE_MAP_SHIFT;
851 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 884 offset = (index >> shift) & RADIX_TREE_MAP_MASK;
852 } 885 }
@@ -944,16 +977,20 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root,
944 if (!tag_get(slot, iftag, offset)) 977 if (!tag_get(slot, iftag, offset))
945 goto next; 978 goto next;
946 if (shift) { 979 if (shift) {
947 /* Go down one level */
948 shift -= RADIX_TREE_MAP_SHIFT;
949 node = slot; 980 node = slot;
950 slot = slot->slots[offset]; 981 slot = slot->slots[offset];
951 slot = indirect_to_ptr(slot); 982 if (radix_tree_is_indirect_ptr(slot)) {
952 continue; 983 slot = indirect_to_ptr(slot);
984 shift -= RADIX_TREE_MAP_SHIFT;
985 continue;
986 } else {
987 slot = node;
988 node = node->parent;
989 }
953 } 990 }
954 991
955 /* tag the leaf */ 992 /* tag the leaf */
956 tagged++; 993 tagged += 1 << shift;
957 tag_set(slot, settag, offset); 994 tag_set(slot, settag, offset);
958 995
959 /* walk back up the path tagging interior nodes */ 996 /* walk back up the path tagging interior nodes */
@@ -1201,11 +1238,20 @@ static unsigned long __locate(struct radix_tree_node *slot, void *item,
1201 goto out; 1238 goto out;
1202 } 1239 }
1203 1240
1204 shift -= RADIX_TREE_MAP_SHIFT;
1205 slot = rcu_dereference_raw(slot->slots[i]); 1241 slot = rcu_dereference_raw(slot->slots[i]);
1206 if (slot == NULL) 1242 if (slot == NULL)
1207 goto out; 1243 goto out;
1244 if (!radix_tree_is_indirect_ptr(slot)) {
1245 if (slot == item) {
1246 *found_index = index + i;
1247 index = 0;
1248 } else {
1249 index += shift;
1250 }
1251 goto out;
1252 }
1208 slot = indirect_to_ptr(slot); 1253 slot = indirect_to_ptr(slot);
1254 shift -= RADIX_TREE_MAP_SHIFT;
1209 } 1255 }
1210 1256
1211 /* Bottom level: check items */ 1257 /* Bottom level: check items */
@@ -1285,7 +1331,8 @@ static inline void radix_tree_shrink(struct radix_tree_root *root)
1285 1331
1286 /* 1332 /*
1287 * The candidate node has more than one child, or its child 1333 * The candidate node has more than one child, or its child
1288 * is not at the leftmost slot, we cannot shrink. 1334 * is not at the leftmost slot, or it is a multiorder entry,
1335 * we cannot shrink.
1289 */ 1336 */
1290 if (to_free->count != 1) 1337 if (to_free->count != 1)
1291 break; 1338 break;
@@ -1301,6 +1348,9 @@ static inline void radix_tree_shrink(struct radix_tree_root *root)
1301 * one (root->rnode) as far as dependent read barriers go. 1348 * one (root->rnode) as far as dependent read barriers go.
1302 */ 1349 */
1303 if (root->height > 1) { 1350 if (root->height > 1) {
1351 if (!radix_tree_is_indirect_ptr(slot))
1352 break;
1353
1304 slot = indirect_to_ptr(slot); 1354 slot = indirect_to_ptr(slot);
1305 slot->parent = NULL; 1355 slot->parent = NULL;
1306 slot = ptr_to_indirect(slot); 1356 slot = ptr_to_indirect(slot);
@@ -1399,7 +1449,7 @@ void *radix_tree_delete_item(struct radix_tree_root *root,
1399 unsigned long index, void *item) 1449 unsigned long index, void *item)
1400{ 1450{
1401 struct radix_tree_node *node; 1451 struct radix_tree_node *node;
1402 unsigned int offset; 1452 unsigned int offset, i;
1403 void **slot; 1453 void **slot;
1404 void *entry; 1454 void *entry;
1405 int tag; 1455 int tag;
@@ -1428,6 +1478,13 @@ void *radix_tree_delete_item(struct radix_tree_root *root,
1428 radix_tree_tag_clear(root, index, tag); 1478 radix_tree_tag_clear(root, index, tag);
1429 } 1479 }
1430 1480
1481 /* Delete any sibling slots pointing to this slot */
1482 for (i = 1; offset + i < RADIX_TREE_MAP_SIZE; i++) {
1483 if (node->slots[offset + i] != ptr_to_indirect(slot))
1484 break;
1485 node->slots[offset + i] = NULL;
1486 node->count--;
1487 }
1431 node->slots[offset] = NULL; 1488 node->slots[offset] = NULL;
1432 node->count--; 1489 node->count--;
1433 1490