aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatthew Wilcox <willy@linux.intel.com>2016-12-14 18:09:01 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2016-12-14 19:04:10 -0500
commite157b555945fb16ddc6cce605a1eb6b4135ea5f1 (patch)
treef0b53b641059c010cfd38a32ae7f4f3b27b04303
parent175542f575723e43f897ddb09d0011c13f7cf0ec (diff)
radix-tree: add radix_tree_split
This new function splits a larger multiorder entry into smaller entries (potentially multi-order entries). These entries are initialised to RADIX_TREE_RETRY to ensure that RCU walkers who see this state aren't confused. The caller should then call radix_tree_for_each_slot() and radix_tree_replace_slot() in order to turn these retry entries into the intended new entries. Tags are replicated from the original multiorder entry into each new entry. Link: http://lkml.kernel.org/r/1480369871-5271-59-git-send-email-mawilcox@linuxonhyperv.com Signed-off-by: Matthew Wilcox <willy@linux.intel.com> Tested-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com> Cc: Konstantin Khlebnikov <koct9i@gmail.com> Cc: Ross Zwisler <ross.zwisler@linux.intel.com> Cc: Matthew Wilcox <mawilcox@microsoft.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--include/linux/radix-tree.h12
-rw-r--r--lib/radix-tree.c142
-rw-r--r--tools/testing/radix-tree/multiorder.c64
3 files changed, 214 insertions, 4 deletions
diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 935293a24f7d..1f4b56120de8 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -80,6 +80,14 @@ static inline bool radix_tree_is_internal_node(void *ptr)
80#define RADIX_TREE_MAX_PATH (DIV_ROUND_UP(RADIX_TREE_INDEX_BITS, \ 80#define RADIX_TREE_MAX_PATH (DIV_ROUND_UP(RADIX_TREE_INDEX_BITS, \
81 RADIX_TREE_MAP_SHIFT)) 81 RADIX_TREE_MAP_SHIFT))
82 82
83/*
84 * @count is the count of every non-NULL element in the ->slots array
85 * whether that is an exceptional entry, a retry entry, a user pointer,
86 * a sibling entry or a pointer to the next level of the tree.
87 * @exceptional is the count of every element in ->slots which is
88 * either radix_tree_exceptional_entry() or is a sibling entry for an
89 * exceptional entry.
90 */
83struct radix_tree_node { 91struct radix_tree_node {
84 unsigned char shift; /* Bits remaining in each slot */ 92 unsigned char shift; /* Bits remaining in each slot */
85 unsigned char offset; /* Slot offset in parent */ 93 unsigned char offset; /* Slot offset in parent */
@@ -293,6 +301,8 @@ void __radix_tree_replace(struct radix_tree_root *root,
293 struct radix_tree_node *node, 301 struct radix_tree_node *node,
294 void **slot, void *item, 302 void **slot, void *item,
295 radix_tree_update_node_t update_node, void *private); 303 radix_tree_update_node_t update_node, void *private);
304void radix_tree_iter_replace(struct radix_tree_root *,
305 const struct radix_tree_iter *, void **slot, void *item);
296void radix_tree_replace_slot(struct radix_tree_root *root, 306void radix_tree_replace_slot(struct radix_tree_root *root,
297 void **slot, void *item); 307 void **slot, void *item);
298void __radix_tree_delete_node(struct radix_tree_root *root, 308void __radix_tree_delete_node(struct radix_tree_root *root,
@@ -335,6 +345,8 @@ static inline void radix_tree_preload_end(void)
335 preempt_enable(); 345 preempt_enable();
336} 346}
337 347
348int radix_tree_split(struct radix_tree_root *, unsigned long index,
349 unsigned new_order);
338int radix_tree_join(struct radix_tree_root *, unsigned long index, 350int radix_tree_join(struct radix_tree_root *, unsigned long index,
339 unsigned new_order, void *); 351 unsigned new_order, void *);
340 352
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 962cfb3a7671..ade2ed3f5190 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -22,6 +22,7 @@
22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. 22 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 */ 23 */
24 24
25#include <linux/cpu.h>
25#include <linux/errno.h> 26#include <linux/errno.h>
26#include <linux/init.h> 27#include <linux/init.h>
27#include <linux/kernel.h> 28#include <linux/kernel.h>
@@ -758,7 +759,10 @@ static inline int insert_entries(struct radix_tree_node *node, void **slot,
758 unsigned i, n, tag, offset, tags = 0; 759 unsigned i, n, tag, offset, tags = 0;
759 760
760 if (node) { 761 if (node) {
761 n = 1 << (order - node->shift); 762 if (order > node->shift)
763 n = 1 << (order - node->shift);
764 else
765 n = 1;
762 offset = get_slot_offset(node, slot); 766 offset = get_slot_offset(node, slot);
763 } else { 767 } else {
764 n = 1; 768 n = 1;
@@ -797,7 +801,8 @@ static inline int insert_entries(struct radix_tree_node *node, void **slot,
797 tag_set(node, tag, offset); 801 tag_set(node, tag, offset);
798 } 802 }
799 if (radix_tree_is_internal_node(old) && 803 if (radix_tree_is_internal_node(old) &&
800 !is_sibling_entry(node, old)) 804 !is_sibling_entry(node, old) &&
805 (old != RADIX_TREE_RETRY))
801 radix_tree_free_nodes(old); 806 radix_tree_free_nodes(old);
802 if (radix_tree_exceptional_entry(old)) 807 if (radix_tree_exceptional_entry(old))
803 node->exceptional--; 808 node->exceptional--;
@@ -1021,7 +1026,8 @@ void __radix_tree_replace(struct radix_tree_root *root,
1021 * NOTE: This cannot be used to switch between non-entries (empty slots), 1026 * NOTE: This cannot be used to switch between non-entries (empty slots),
1022 * regular entries, and exceptional entries, as that requires accounting 1027 * regular entries, and exceptional entries, as that requires accounting
1023 * inside the radix tree node. When switching from one type of entry or 1028 * inside the radix tree node. When switching from one type of entry or
1024 * deleting, use __radix_tree_lookup() and __radix_tree_replace(). 1029 * deleting, use __radix_tree_lookup() and __radix_tree_replace() or
1030 * radix_tree_iter_replace().
1025 */ 1031 */
1026void radix_tree_replace_slot(struct radix_tree_root *root, 1032void radix_tree_replace_slot(struct radix_tree_root *root,
1027 void **slot, void *item) 1033 void **slot, void *item)
@@ -1029,6 +1035,21 @@ void radix_tree_replace_slot(struct radix_tree_root *root,
1029 replace_slot(root, NULL, slot, item, true); 1035 replace_slot(root, NULL, slot, item, true);
1030} 1036}
1031 1037
1038/**
1039 * radix_tree_iter_replace - replace item in a slot
1040 * @root: radix tree root
1041 * @slot: pointer to slot
1042 * @item: new item to store in the slot.
1043 *
1044 * For use with radix_tree_split() and radix_tree_for_each_slot().
1045 * Caller must hold tree write locked across split and replacement.
1046 */
1047void radix_tree_iter_replace(struct radix_tree_root *root,
1048 const struct radix_tree_iter *iter, void **slot, void *item)
1049{
1050 __radix_tree_replace(root, iter->node, slot, item, NULL, NULL);
1051}
1052
1032#ifdef CONFIG_RADIX_TREE_MULTIORDER 1053#ifdef CONFIG_RADIX_TREE_MULTIORDER
1033/** 1054/**
1034 * radix_tree_join - replace multiple entries with one multiorder entry 1055 * radix_tree_join - replace multiple entries with one multiorder entry
@@ -1061,6 +1082,117 @@ int radix_tree_join(struct radix_tree_root *root, unsigned long index,
1061 1082
1062 return error; 1083 return error;
1063} 1084}
1085
1086/**
1087 * radix_tree_split - Split an entry into smaller entries
1088 * @root: radix tree root
1089 * @index: An index within the large entry
1090 * @order: Order of new entries
1091 *
1092 * Call this function as the first step in replacing a multiorder entry
1093 * with several entries of lower order. After this function returns,
1094 * loop over the relevant portion of the tree using radix_tree_for_each_slot()
1095 * and call radix_tree_iter_replace() to set up each new entry.
1096 *
1097 * The tags from this entry are replicated to all the new entries.
1098 *
1099 * The radix tree should be locked against modification during the entire
1100 * replacement operation. Lock-free lookups will see RADIX_TREE_RETRY which
1101 * should prompt RCU walkers to restart the lookup from the root.
1102 */
1103int radix_tree_split(struct radix_tree_root *root, unsigned long index,
1104 unsigned order)
1105{
1106 struct radix_tree_node *parent, *node, *child;
1107 void **slot;
1108 unsigned int offset, end;
1109 unsigned n, tag, tags = 0;
1110
1111 if (!__radix_tree_lookup(root, index, &parent, &slot))
1112 return -ENOENT;
1113 if (!parent)
1114 return -ENOENT;
1115
1116 offset = get_slot_offset(parent, slot);
1117
1118 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
1119 if (tag_get(parent, tag, offset))
1120 tags |= 1 << tag;
1121
1122 for (end = offset + 1; end < RADIX_TREE_MAP_SIZE; end++) {
1123 if (!is_sibling_entry(parent, parent->slots[end]))
1124 break;
1125 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
1126 if (tags & (1 << tag))
1127 tag_set(parent, tag, end);
1128 /* rcu_assign_pointer ensures tags are set before RETRY */
1129 rcu_assign_pointer(parent->slots[end], RADIX_TREE_RETRY);
1130 }
1131 rcu_assign_pointer(parent->slots[offset], RADIX_TREE_RETRY);
1132 parent->exceptional -= (end - offset);
1133
1134 if (order == parent->shift)
1135 return 0;
1136 if (order > parent->shift) {
1137 while (offset < end)
1138 offset += insert_entries(parent, &parent->slots[offset],
1139 RADIX_TREE_RETRY, order, true);
1140 return 0;
1141 }
1142
1143 node = parent;
1144
1145 for (;;) {
1146 if (node->shift > order) {
1147 child = radix_tree_node_alloc(root);
1148 if (!child)
1149 goto nomem;
1150 child->shift = node->shift - RADIX_TREE_MAP_SHIFT;
1151 child->offset = offset;
1152 child->count = 0;
1153 child->parent = node;
1154 if (node != parent) {
1155 node->count++;
1156 node->slots[offset] = node_to_entry(child);
1157 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
1158 if (tags & (1 << tag))
1159 tag_set(node, tag, offset);
1160 }
1161
1162 node = child;
1163 offset = 0;
1164 continue;
1165 }
1166
1167 n = insert_entries(node, &node->slots[offset],
1168 RADIX_TREE_RETRY, order, false);
1169 BUG_ON(n > RADIX_TREE_MAP_SIZE);
1170
1171 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
1172 if (tags & (1 << tag))
1173 tag_set(node, tag, offset);
1174 offset += n;
1175
1176 while (offset == RADIX_TREE_MAP_SIZE) {
1177 if (node == parent)
1178 break;
1179 offset = node->offset;
1180 child = node;
1181 node = node->parent;
1182 rcu_assign_pointer(node->slots[offset],
1183 node_to_entry(child));
1184 offset++;
1185 }
1186 if ((node == parent) && (offset == end))
1187 return 0;
1188 }
1189
1190 nomem:
1191 /* Shouldn't happen; did user forget to preload? */
1192 /* TODO: free all the allocated nodes */
1193 WARN_ON(1);
1194 return -ENOMEM;
1195}
1064#endif 1196#endif
1065 1197
1066/** 1198/**
@@ -1441,8 +1573,10 @@ void **radix_tree_next_chunk(struct radix_tree_root *root,
1441 child = rcu_dereference_raw(node->slots[offset]); 1573 child = rcu_dereference_raw(node->slots[offset]);
1442 } 1574 }
1443 1575
1444 if ((child == NULL) || (child == RADIX_TREE_RETRY)) 1576 if (!child)
1445 goto restart; 1577 goto restart;
1578 if (child == RADIX_TREE_RETRY)
1579 break;
1446 } while (radix_tree_is_internal_node(child)); 1580 } while (radix_tree_is_internal_node(child));
1447 1581
1448 /* Update the iterator state */ 1582 /* Update the iterator state */
diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c
index c9f656cf5f52..fa6effe997a3 100644
--- a/tools/testing/radix-tree/multiorder.c
+++ b/tools/testing/radix-tree/multiorder.c
@@ -389,6 +389,69 @@ static void multiorder_join(void)
389 } 389 }
390} 390}
391 391
392static void __multiorder_split(int old_order, int new_order)
393{
394 RADIX_TREE(tree, GFP_KERNEL);
395 void **slot;
396 struct radix_tree_iter iter;
397 struct radix_tree_node *node;
398 void *item;
399
400 item_insert_order(&tree, 0, old_order);
401 radix_tree_tag_set(&tree, 0, 2);
402 radix_tree_split(&tree, 0, new_order);
403 radix_tree_for_each_slot(slot, &tree, &iter, 0) {
404 radix_tree_iter_replace(&tree, &iter, slot,
405 item_create(iter.index, new_order));
406 }
407
408 item_kill_tree(&tree);
409
410 __radix_tree_insert(&tree, 0, old_order, (void *)0x12);
411
412 item = __radix_tree_lookup(&tree, 0, &node, NULL);
413 assert(item == (void *)0x12);
414 assert(node->exceptional > 0);
415
416 radix_tree_split(&tree, 0, new_order);
417 radix_tree_for_each_slot(slot, &tree, &iter, 0) {
418 radix_tree_iter_replace(&tree, &iter, slot,
419 item_create(iter.index, new_order));
420 }
421
422 item = __radix_tree_lookup(&tree, 0, &node, NULL);
423 assert(item != (void *)0x12);
424 assert(node->exceptional == 0);
425
426 item_kill_tree(&tree);
427
428 __radix_tree_insert(&tree, 0, old_order, (void *)0x12);
429
430 item = __radix_tree_lookup(&tree, 0, &node, NULL);
431 assert(item == (void *)0x12);
432 assert(node->exceptional > 0);
433
434 radix_tree_split(&tree, 0, new_order);
435 radix_tree_for_each_slot(slot, &tree, &iter, 0) {
436 radix_tree_iter_replace(&tree, &iter, slot, (void *)0x16);
437 }
438
439 item = __radix_tree_lookup(&tree, 0, &node, NULL);
440 assert(item == (void *)0x16);
441 assert(node->exceptional > 0);
442
443 item_kill_tree(&tree);
444}
445
446static void multiorder_split(void)
447{
448 int i, j;
449
450 for (i = 9; i < 19; i++)
451 for (j = 0; j < i; j++)
452 __multiorder_split(i, j);
453}
454
392void multiorder_checks(void) 455void multiorder_checks(void)
393{ 456{
394 int i; 457 int i;
@@ -407,4 +470,5 @@ void multiorder_checks(void)
407 multiorder_iteration(); 470 multiorder_iteration();
408 multiorder_tagged_iteration(); 471 multiorder_tagged_iteration();
409 multiorder_join(); 472 multiorder_join();
473 multiorder_split();
410} 474}