summaryrefslogtreecommitdiffstats
path: root/lib/radix-tree.c
diff options
context:
space:
mode:
authorJohannes Weiner <hannes@cmpxchg.org>2016-12-12 19:43:46 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2016-12-12 21:55:08 -0500
commitf4b109c6dad54257eca837f9dd16a23f2eeab832 (patch)
treeab2ceb69c33346aeaa8e1420d04fad8623118e26 /lib/radix-tree.c
parent6d75f366b9242f9b17ed7d0b0604d7460f818f21 (diff)
lib: radix-tree: add entry deletion support to __radix_tree_replace()
Page cache shadow entry handling will be a lot simpler when it can use a single generic replacement function for pages, shadow entries, and emptying slots. Make __radix_tree_replace() properly account insertions and deletions in node->count and garbage collect nodes as they become empty. Then re-implement radix_tree_delete() on top of it. Link: http://lkml.kernel.org/r/20161117193058.GC23430@cmpxchg.org Signed-off-by: Johannes Weiner <hannes@cmpxchg.org> Reviewed-by: Jan Kara <jack@suse.cz> Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: Hugh Dickins <hughd@google.com> Cc: Matthew Wilcox <mawilcox@linuxonhyperv.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'lib/radix-tree.c')
-rw-r--r--lib/radix-tree.c227
1 files changed, 116 insertions, 111 deletions
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index f91d5b0af654..5d8930f3b3d8 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -539,6 +539,107 @@ out:
539} 539}
540 540
541/** 541/**
542 * radix_tree_shrink - shrink radix tree to minimum height
543 * @root radix tree root
544 */
545static inline bool radix_tree_shrink(struct radix_tree_root *root)
546{
547 bool shrunk = false;
548
549 for (;;) {
550 struct radix_tree_node *node = root->rnode;
551 struct radix_tree_node *child;
552
553 if (!radix_tree_is_internal_node(node))
554 break;
555 node = entry_to_node(node);
556
557 /*
558 * The candidate node has more than one child, or its child
559 * is not at the leftmost slot, or the child is a multiorder
560 * entry, we cannot shrink.
561 */
562 if (node->count != 1)
563 break;
564 child = node->slots[0];
565 if (!child)
566 break;
567 if (!radix_tree_is_internal_node(child) && node->shift)
568 break;
569
570 if (radix_tree_is_internal_node(child))
571 entry_to_node(child)->parent = NULL;
572
573 /*
574 * We don't need rcu_assign_pointer(), since we are simply
575 * moving the node from one part of the tree to another: if it
576 * was safe to dereference the old pointer to it
577 * (node->slots[0]), it will be safe to dereference the new
578 * one (root->rnode) as far as dependent read barriers go.
579 */
580 root->rnode = child;
581
582 /*
583 * We have a dilemma here. The node's slot[0] must not be
584 * NULLed in case there are concurrent lookups expecting to
585 * find the item. However if this was a bottom-level node,
586 * then it may be subject to the slot pointer being visible
587 * to callers dereferencing it. If item corresponding to
588 * slot[0] is subsequently deleted, these callers would expect
589 * their slot to become empty sooner or later.
590 *
591 * For example, lockless pagecache will look up a slot, deref
592 * the page pointer, and if the page has 0 refcount it means it
593 * was concurrently deleted from pagecache so try the deref
594 * again. Fortunately there is already a requirement for logic
595 * to retry the entire slot lookup -- the indirect pointer
596 * problem (replacing direct root node with an indirect pointer
597 * also results in a stale slot). So tag the slot as indirect
598 * to force callers to retry.
599 */
600 if (!radix_tree_is_internal_node(child))
601 node->slots[0] = RADIX_TREE_RETRY;
602
603 radix_tree_node_free(node);
604 shrunk = true;
605 }
606
607 return shrunk;
608}
609
610static bool delete_node(struct radix_tree_root *root,
611 struct radix_tree_node *node)
612{
613 bool deleted = false;
614
615 do {
616 struct radix_tree_node *parent;
617
618 if (node->count) {
619 if (node == entry_to_node(root->rnode))
620 deleted |= radix_tree_shrink(root);
621 return deleted;
622 }
623
624 parent = node->parent;
625 if (parent) {
626 parent->slots[node->offset] = NULL;
627 parent->count--;
628 } else {
629 root_tag_clear_all(root);
630 root->rnode = NULL;
631 }
632
633 radix_tree_node_free(node);
634 deleted = true;
635
636 node = parent;
637 } while (node);
638
639 return deleted;
640}
641
642/**
542 * __radix_tree_create - create a slot in a radix tree 643 * __radix_tree_create - create a slot in a radix tree
543 * @root: radix tree root 644 * @root: radix tree root
544 * @index: index key 645 * @index: index key
@@ -759,18 +860,20 @@ static void replace_slot(struct radix_tree_root *root,
759 bool warn_typeswitch) 860 bool warn_typeswitch)
760{ 861{
761 void *old = rcu_dereference_raw(*slot); 862 void *old = rcu_dereference_raw(*slot);
762 int exceptional; 863 int count, exceptional;
763 864
764 WARN_ON_ONCE(radix_tree_is_internal_node(item)); 865 WARN_ON_ONCE(radix_tree_is_internal_node(item));
765 WARN_ON_ONCE(!!item - !!old);
766 866
867 count = !!item - !!old;
767 exceptional = !!radix_tree_exceptional_entry(item) - 868 exceptional = !!radix_tree_exceptional_entry(item) -
768 !!radix_tree_exceptional_entry(old); 869 !!radix_tree_exceptional_entry(old);
769 870
770 WARN_ON_ONCE(warn_typeswitch && exceptional); 871 WARN_ON_ONCE(warn_typeswitch && (count || exceptional));
771 872
772 if (node) 873 if (node) {
874 node->count += count;
773 node->exceptional += exceptional; 875 node->exceptional += exceptional;
876 }
774 877
775 rcu_assign_pointer(*slot, item); 878 rcu_assign_pointer(*slot, item);
776} 879}
@@ -790,12 +893,14 @@ void __radix_tree_replace(struct radix_tree_root *root,
790 void **slot, void *item) 893 void **slot, void *item)
791{ 894{
792 /* 895 /*
793 * This function supports replacing exceptional entries, but 896 * This function supports replacing exceptional entries and
794 * that needs accounting against the node unless the slot is 897 * deleting entries, but that needs accounting against the
795 * root->rnode. 898 * node unless the slot is root->rnode.
796 */ 899 */
797 replace_slot(root, node, slot, item, 900 replace_slot(root, node, slot, item,
798 !node && slot != (void **)&root->rnode); 901 !node && slot != (void **)&root->rnode);
902
903 delete_node(root, node);
799} 904}
800 905
801/** 906/**
@@ -810,8 +915,8 @@ void __radix_tree_replace(struct radix_tree_root *root,
810 * 915 *
811 * NOTE: This cannot be used to switch between non-entries (empty slots), 916 * NOTE: This cannot be used to switch between non-entries (empty slots),
812 * regular entries, and exceptional entries, as that requires accounting 917 * regular entries, and exceptional entries, as that requires accounting
813 * inside the radix tree node. When switching from one type of entry to 918 * inside the radix tree node. When switching from one type of entry or
814 * another, use __radix_tree_lookup() and __radix_tree_replace(). 919 * deleting, use __radix_tree_lookup() and __radix_tree_replace().
815 */ 920 */
816void radix_tree_replace_slot(struct radix_tree_root *root, 921void radix_tree_replace_slot(struct radix_tree_root *root,
817 void **slot, void *item) 922 void **slot, void *item)
@@ -1467,75 +1572,6 @@ unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item)
1467#endif /* CONFIG_SHMEM && CONFIG_SWAP */ 1572#endif /* CONFIG_SHMEM && CONFIG_SWAP */
1468 1573
1469/** 1574/**
1470 * radix_tree_shrink - shrink radix tree to minimum height
1471 * @root radix tree root
1472 */
1473static inline bool radix_tree_shrink(struct radix_tree_root *root)
1474{
1475 bool shrunk = false;
1476
1477 for (;;) {
1478 struct radix_tree_node *node = root->rnode;
1479 struct radix_tree_node *child;
1480
1481 if (!radix_tree_is_internal_node(node))
1482 break;
1483 node = entry_to_node(node);
1484
1485 /*
1486 * The candidate node has more than one child, or its child
1487 * is not at the leftmost slot, or the child is a multiorder
1488 * entry, we cannot shrink.
1489 */
1490 if (node->count != 1)
1491 break;
1492 child = node->slots[0];
1493 if (!child)
1494 break;
1495 if (!radix_tree_is_internal_node(child) && node->shift)
1496 break;
1497
1498 if (radix_tree_is_internal_node(child))
1499 entry_to_node(child)->parent = NULL;
1500
1501 /*
1502 * We don't need rcu_assign_pointer(), since we are simply
1503 * moving the node from one part of the tree to another: if it
1504 * was safe to dereference the old pointer to it
1505 * (node->slots[0]), it will be safe to dereference the new
1506 * one (root->rnode) as far as dependent read barriers go.
1507 */
1508 root->rnode = child;
1509
1510 /*
1511 * We have a dilemma here. The node's slot[0] must not be
1512 * NULLed in case there are concurrent lookups expecting to
1513 * find the item. However if this was a bottom-level node,
1514 * then it may be subject to the slot pointer being visible
1515 * to callers dereferencing it. If item corresponding to
1516 * slot[0] is subsequently deleted, these callers would expect
1517 * their slot to become empty sooner or later.
1518 *
1519 * For example, lockless pagecache will look up a slot, deref
1520 * the page pointer, and if the page has 0 refcount it means it
1521 * was concurrently deleted from pagecache so try the deref
1522 * again. Fortunately there is already a requirement for logic
1523 * to retry the entire slot lookup -- the indirect pointer
1524 * problem (replacing direct root node with an indirect pointer
1525 * also results in a stale slot). So tag the slot as indirect
1526 * to force callers to retry.
1527 */
1528 if (!radix_tree_is_internal_node(child))
1529 node->slots[0] = RADIX_TREE_RETRY;
1530
1531 radix_tree_node_free(node);
1532 shrunk = true;
1533 }
1534
1535 return shrunk;
1536}
1537
1538/**
1539 * __radix_tree_delete_node - try to free node after clearing a slot 1575 * __radix_tree_delete_node - try to free node after clearing a slot
1540 * @root: radix tree root 1576 * @root: radix tree root
1541 * @node: node containing @index 1577 * @node: node containing @index
@@ -1549,33 +1585,7 @@ static inline bool radix_tree_shrink(struct radix_tree_root *root)
1549bool __radix_tree_delete_node(struct radix_tree_root *root, 1585bool __radix_tree_delete_node(struct radix_tree_root *root,
1550 struct radix_tree_node *node) 1586 struct radix_tree_node *node)
1551{ 1587{
1552 bool deleted = false; 1588 return delete_node(root, node);
1553
1554 do {
1555 struct radix_tree_node *parent;
1556
1557 if (node->count) {
1558 if (node == entry_to_node(root->rnode))
1559 deleted |= radix_tree_shrink(root);
1560 return deleted;
1561 }
1562
1563 parent = node->parent;
1564 if (parent) {
1565 parent->slots[node->offset] = NULL;
1566 parent->count--;
1567 } else {
1568 root_tag_clear_all(root);
1569 root->rnode = NULL;
1570 }
1571
1572 radix_tree_node_free(node);
1573 deleted = true;
1574
1575 node = parent;
1576 } while (node);
1577
1578 return deleted;
1579} 1589}
1580 1590
1581static inline void delete_sibling_entries(struct radix_tree_node *node, 1591static inline void delete_sibling_entries(struct radix_tree_node *node,
@@ -1632,12 +1642,7 @@ void *radix_tree_delete_item(struct radix_tree_root *root,
1632 node_tag_clear(root, node, tag, offset); 1642 node_tag_clear(root, node, tag, offset);
1633 1643
1634 delete_sibling_entries(node, node_to_entry(slot), offset); 1644 delete_sibling_entries(node, node_to_entry(slot), offset);
1635 node->slots[offset] = NULL; 1645 __radix_tree_replace(root, node, slot, NULL);
1636 node->count--;
1637 if (radix_tree_exceptional_entry(entry))
1638 node->exceptional--;
1639
1640 __radix_tree_delete_node(root, node);
1641 1646
1642 return entry; 1647 return entry;
1643} 1648}