summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--include/linux/radix-tree.h6
-rw-r--r--lib/radix-tree.c171
-rw-r--r--tools/testing/radix-tree/benchmark.c91
-rw-r--r--tools/testing/radix-tree/multiorder.c247
4 files changed, 2 insertions, 513 deletions
diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 7513d0df984b..44f9abdcb5ab 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -284,12 +284,6 @@ static inline void radix_tree_preload_end(void)
284 preempt_enable(); 284 preempt_enable();
285} 285}
286 286
287int radix_tree_split_preload(unsigned old_order, unsigned new_order, gfp_t);
288int radix_tree_split(struct radix_tree_root *, unsigned long index,
289 unsigned new_order);
290int radix_tree_join(struct radix_tree_root *, unsigned long index,
291 unsigned new_order, void *);
292
293void __rcu **idr_get_free(struct radix_tree_root *root, 287void __rcu **idr_get_free(struct radix_tree_root *root,
294 struct radix_tree_iter *iter, gfp_t gfp, 288 struct radix_tree_iter *iter, gfp_t gfp,
295 unsigned long max); 289 unsigned long max);
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index c4c252185734..c52e0bef5264 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -415,28 +415,6 @@ int radix_tree_maybe_preload(gfp_t gfp_mask)
415} 415}
416EXPORT_SYMBOL(radix_tree_maybe_preload); 416EXPORT_SYMBOL(radix_tree_maybe_preload);
417 417
418#ifdef CONFIG_RADIX_TREE_MULTIORDER
419/*
420 * Preload with enough objects to ensure that we can split a single entry
421 * of order @old_order into many entries of size @new_order
422 */
423int radix_tree_split_preload(unsigned int old_order, unsigned int new_order,
424 gfp_t gfp_mask)
425{
426 unsigned top = 1 << (old_order % RADIX_TREE_MAP_SHIFT);
427 unsigned layers = (old_order / RADIX_TREE_MAP_SHIFT) -
428 (new_order / RADIX_TREE_MAP_SHIFT);
429 unsigned nr = 0;
430
431 WARN_ON_ONCE(!gfpflags_allow_blocking(gfp_mask));
432 BUG_ON(new_order >= old_order);
433
434 while (layers--)
435 nr = nr * RADIX_TREE_MAP_SIZE + 1;
436 return __radix_tree_preload(gfp_mask, top * nr);
437}
438#endif
439
440/* 418/*
441 * The same as function above, but preload number of nodes required to insert 419 * The same as function above, but preload number of nodes required to insert
442 * (1 << order) continuous naturally-aligned elements. 420 * (1 << order) continuous naturally-aligned elements.
@@ -1111,8 +1089,8 @@ EXPORT_SYMBOL(radix_tree_replace_slot);
1111 * @slot: pointer to slot 1089 * @slot: pointer to slot
1112 * @item: new item to store in the slot. 1090 * @item: new item to store in the slot.
1113 * 1091 *
1114 * For use with radix_tree_split() and radix_tree_for_each_slot(). 1092 * For use with radix_tree_for_each_slot().
1115 * Caller must hold tree write locked across split and replacement. 1093 * Caller must hold tree write locked.
1116 */ 1094 */
1117void radix_tree_iter_replace(struct radix_tree_root *root, 1095void radix_tree_iter_replace(struct radix_tree_root *root,
1118 const struct radix_tree_iter *iter, 1096 const struct radix_tree_iter *iter,
@@ -1121,151 +1099,6 @@ void radix_tree_iter_replace(struct radix_tree_root *root,
1121 __radix_tree_replace(root, iter->node, slot, item); 1099 __radix_tree_replace(root, iter->node, slot, item);
1122} 1100}
1123 1101
1124#ifdef CONFIG_RADIX_TREE_MULTIORDER
1125/**
1126 * radix_tree_join - replace multiple entries with one multiorder entry
1127 * @root: radix tree root
1128 * @index: an index inside the new entry
1129 * @order: order of the new entry
1130 * @item: new entry
1131 *
1132 * Call this function to replace several entries with one larger entry.
1133 * The existing entries are presumed to not need freeing as a result of
1134 * this call.
1135 *
1136 * The replacement entry will have all the tags set on it that were set
1137 * on any of the entries it is replacing.
1138 */
1139int radix_tree_join(struct radix_tree_root *root, unsigned long index,
1140 unsigned order, void *item)
1141{
1142 struct radix_tree_node *node;
1143 void __rcu **slot;
1144 int error;
1145
1146 BUG_ON(radix_tree_is_internal_node(item));
1147
1148 error = __radix_tree_create(root, index, order, &node, &slot);
1149 if (!error)
1150 error = insert_entries(node, slot, item, order, true);
1151 if (error > 0)
1152 error = 0;
1153
1154 return error;
1155}
1156
1157/**
1158 * radix_tree_split - Split an entry into smaller entries
1159 * @root: radix tree root
1160 * @index: An index within the large entry
1161 * @order: Order of new entries
1162 *
1163 * Call this function as the first step in replacing a multiorder entry
1164 * with several entries of lower order. After this function returns,
1165 * loop over the relevant portion of the tree using radix_tree_for_each_slot()
1166 * and call radix_tree_iter_replace() to set up each new entry.
1167 *
1168 * The tags from this entry are replicated to all the new entries.
1169 *
1170 * The radix tree should be locked against modification during the entire
1171 * replacement operation. Lock-free lookups will see RADIX_TREE_RETRY which
1172 * should prompt RCU walkers to restart the lookup from the root.
1173 */
1174int radix_tree_split(struct radix_tree_root *root, unsigned long index,
1175 unsigned order)
1176{
1177 struct radix_tree_node *parent, *node, *child;
1178 void __rcu **slot;
1179 unsigned int offset, end;
1180 unsigned n, tag, tags = 0;
1181 gfp_t gfp = root_gfp_mask(root);
1182
1183 if (!__radix_tree_lookup(root, index, &parent, &slot))
1184 return -ENOENT;
1185 if (!parent)
1186 return -ENOENT;
1187
1188 offset = get_slot_offset(parent, slot);
1189
1190 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
1191 if (tag_get(parent, tag, offset))
1192 tags |= 1 << tag;
1193
1194 for (end = offset + 1; end < RADIX_TREE_MAP_SIZE; end++) {
1195 if (!xa_is_sibling(rcu_dereference_raw(parent->slots[end])))
1196 break;
1197 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
1198 if (tags & (1 << tag))
1199 tag_set(parent, tag, end);
1200 /* rcu_assign_pointer ensures tags are set before RETRY */
1201 rcu_assign_pointer(parent->slots[end], RADIX_TREE_RETRY);
1202 }
1203 rcu_assign_pointer(parent->slots[offset], RADIX_TREE_RETRY);
1204 parent->nr_values -= (end - offset);
1205
1206 if (order == parent->shift)
1207 return 0;
1208 if (order > parent->shift) {
1209 while (offset < end)
1210 offset += insert_entries(parent, &parent->slots[offset],
1211 RADIX_TREE_RETRY, order, true);
1212 return 0;
1213 }
1214
1215 node = parent;
1216
1217 for (;;) {
1218 if (node->shift > order) {
1219 child = radix_tree_node_alloc(gfp, node, root,
1220 node->shift - RADIX_TREE_MAP_SHIFT,
1221 offset, 0, 0);
1222 if (!child)
1223 goto nomem;
1224 if (node != parent) {
1225 node->count++;
1226 rcu_assign_pointer(node->slots[offset],
1227 node_to_entry(child));
1228 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
1229 if (tags & (1 << tag))
1230 tag_set(node, tag, offset);
1231 }
1232
1233 node = child;
1234 offset = 0;
1235 continue;
1236 }
1237
1238 n = insert_entries(node, &node->slots[offset],
1239 RADIX_TREE_RETRY, order, false);
1240 BUG_ON(n > RADIX_TREE_MAP_SIZE);
1241
1242 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++)
1243 if (tags & (1 << tag))
1244 tag_set(node, tag, offset);
1245 offset += n;
1246
1247 while (offset == RADIX_TREE_MAP_SIZE) {
1248 if (node == parent)
1249 break;
1250 offset = node->offset;
1251 child = node;
1252 node = node->parent;
1253 rcu_assign_pointer(node->slots[offset],
1254 node_to_entry(child));
1255 offset++;
1256 }
1257 if ((node == parent) && (offset == end))
1258 return 0;
1259 }
1260
1261 nomem:
1262 /* Shouldn't happen; did user forget to preload? */
1263 /* TODO: free all the allocated nodes */
1264 WARN_ON(1);
1265 return -ENOMEM;
1266}
1267#endif
1268
1269static void node_tag_set(struct radix_tree_root *root, 1102static void node_tag_set(struct radix_tree_root *root,
1270 struct radix_tree_node *node, 1103 struct radix_tree_node *node,
1271 unsigned int tag, unsigned int offset) 1104 unsigned int tag, unsigned int offset)
diff --git a/tools/testing/radix-tree/benchmark.c b/tools/testing/radix-tree/benchmark.c
index 99c40f3ed133..35741b9c2a4a 100644
--- a/tools/testing/radix-tree/benchmark.c
+++ b/tools/testing/radix-tree/benchmark.c
@@ -146,90 +146,6 @@ static void benchmark_size(unsigned long size, unsigned long step, int order)
146 rcu_barrier(); 146 rcu_barrier();
147} 147}
148 148
149static long long __benchmark_split(unsigned long index,
150 int old_order, int new_order)
151{
152 struct timespec start, finish;
153 long long nsec;
154 RADIX_TREE(tree, GFP_ATOMIC);
155
156 item_insert_order(&tree, index, old_order);
157
158 clock_gettime(CLOCK_MONOTONIC, &start);
159 radix_tree_split(&tree, index, new_order);
160 clock_gettime(CLOCK_MONOTONIC, &finish);
161 nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC +
162 (finish.tv_nsec - start.tv_nsec);
163
164 item_kill_tree(&tree);
165
166 return nsec;
167
168}
169
170static void benchmark_split(unsigned long size, unsigned long step)
171{
172 int i, j, idx;
173 long long nsec = 0;
174
175
176 for (idx = 0; idx < size; idx += step) {
177 for (i = 3; i < 11; i++) {
178 for (j = 0; j < i; j++) {
179 nsec += __benchmark_split(idx, i, j);
180 }
181 }
182 }
183
184 printv(2, "Size %8ld, step %8ld, split time %10lld ns\n",
185 size, step, nsec);
186
187}
188
189static long long __benchmark_join(unsigned long index,
190 unsigned order1, unsigned order2)
191{
192 unsigned long loc;
193 struct timespec start, finish;
194 long long nsec;
195 void *item, *item2 = item_create(index + 1, order1);
196 RADIX_TREE(tree, GFP_KERNEL);
197
198 item_insert_order(&tree, index, order2);
199 item = radix_tree_lookup(&tree, index);
200
201 clock_gettime(CLOCK_MONOTONIC, &start);
202 radix_tree_join(&tree, index + 1, order1, item2);
203 clock_gettime(CLOCK_MONOTONIC, &finish);
204 nsec = (finish.tv_sec - start.tv_sec) * NSEC_PER_SEC +
205 (finish.tv_nsec - start.tv_nsec);
206
207 loc = find_item(&tree, item);
208 if (loc == -1)
209 free(item);
210
211 item_kill_tree(&tree);
212
213 return nsec;
214}
215
216static void benchmark_join(unsigned long step)
217{
218 int i, j, idx;
219 long long nsec = 0;
220
221 for (idx = 0; idx < 1 << 10; idx += step) {
222 for (i = 1; i < 15; i++) {
223 for (j = 0; j < i; j++) {
224 nsec += __benchmark_join(idx, i, j);
225 }
226 }
227 }
228
229 printv(2, "Size %8d, step %8ld, join time %10lld ns\n",
230 1 << 10, step, nsec);
231}
232
233void benchmark(void) 149void benchmark(void)
234{ 150{
235 unsigned long size[] = {1 << 10, 1 << 20, 0}; 151 unsigned long size[] = {1 << 10, 1 << 20, 0};
@@ -247,11 +163,4 @@ void benchmark(void)
247 for (c = 0; size[c]; c++) 163 for (c = 0; size[c]; c++)
248 for (s = 0; step[s]; s++) 164 for (s = 0; step[s]; s++)
249 benchmark_size(size[c], step[s] << 9, 9); 165 benchmark_size(size[c], step[s] << 9, 9);
250
251 for (c = 0; size[c]; c++)
252 for (s = 0; step[s]; s++)
253 benchmark_split(size[c], step[s]);
254
255 for (s = 0; step[s]; s++)
256 benchmark_join(step[s]);
257} 166}
diff --git a/tools/testing/radix-tree/multiorder.c b/tools/testing/radix-tree/multiorder.c
index 0e0ff26c9bcb..221e042d1b89 100644
--- a/tools/testing/radix-tree/multiorder.c
+++ b/tools/testing/radix-tree/multiorder.c
@@ -356,251 +356,6 @@ void multiorder_tagged_iteration(void)
356 item_kill_tree(&tree); 356 item_kill_tree(&tree);
357} 357}
358 358
359/*
360 * Basic join checks: make sure we can't find an entry in the tree after
361 * a larger entry has replaced it
362 */
363static void multiorder_join1(unsigned long index,
364 unsigned order1, unsigned order2)
365{
366 unsigned long loc;
367 void *item, *item2 = item_create(index + 1, order1);
368 RADIX_TREE(tree, GFP_KERNEL);
369
370 item_insert_order(&tree, index, order2);
371 item = radix_tree_lookup(&tree, index);
372 radix_tree_join(&tree, index + 1, order1, item2);
373 loc = find_item(&tree, item);
374 if (loc == -1)
375 free(item);
376 item = radix_tree_lookup(&tree, index + 1);
377 assert(item == item2);
378 item_kill_tree(&tree);
379}
380
381/*
382 * Check that the accounting of value entries is handled correctly
383 * by joining a value entry to a normal pointer.
384 */
385static void multiorder_join2(unsigned order1, unsigned order2)
386{
387 RADIX_TREE(tree, GFP_KERNEL);
388 struct radix_tree_node *node;
389 void *item1 = item_create(0, order1);
390 void *item2;
391
392 item_insert_order(&tree, 0, order2);
393 radix_tree_insert(&tree, 1 << order2, xa_mk_value(5));
394 item2 = __radix_tree_lookup(&tree, 1 << order2, &node, NULL);
395 assert(item2 == xa_mk_value(5));
396 assert(node->nr_values == 1);
397
398 item2 = radix_tree_lookup(&tree, 0);
399 free(item2);
400
401 radix_tree_join(&tree, 0, order1, item1);
402 item2 = __radix_tree_lookup(&tree, 1 << order2, &node, NULL);
403 assert(item2 == item1);
404 assert(node->nr_values == 0);
405 item_kill_tree(&tree);
406}
407
408/*
409 * This test revealed an accounting bug for value entries at one point.
410 * Nodes were being freed back into the pool with an elevated exception count
411 * by radix_tree_join() and then radix_tree_split() was failing to zero the
412 * count of value entries.
413 */
414static void multiorder_join3(unsigned int order)
415{
416 RADIX_TREE(tree, GFP_KERNEL);
417 struct radix_tree_node *node;
418 void **slot;
419 struct radix_tree_iter iter;
420 unsigned long i;
421
422 for (i = 0; i < (1 << order); i++) {
423 radix_tree_insert(&tree, i, xa_mk_value(5));
424 }
425
426 radix_tree_join(&tree, 0, order, xa_mk_value(7));
427 rcu_barrier();
428
429 radix_tree_split(&tree, 0, 0);
430
431 radix_tree_for_each_slot(slot, &tree, &iter, 0) {
432 radix_tree_iter_replace(&tree, &iter, slot, xa_mk_value(5));
433 }
434
435 __radix_tree_lookup(&tree, 0, &node, NULL);
436 assert(node->nr_values == node->count);
437
438 item_kill_tree(&tree);
439}
440
441static void multiorder_join(void)
442{
443 int i, j, idx;
444
445 for (idx = 0; idx < 1024; idx = idx * 2 + 3) {
446 for (i = 1; i < 15; i++) {
447 for (j = 0; j < i; j++) {
448 multiorder_join1(idx, i, j);
449 }
450 }
451 }
452
453 for (i = 1; i < 15; i++) {
454 for (j = 0; j < i; j++) {
455 multiorder_join2(i, j);
456 }
457 }
458
459 for (i = 3; i < 10; i++) {
460 multiorder_join3(i);
461 }
462}
463
464static void check_mem(unsigned old_order, unsigned new_order, unsigned alloc)
465{
466 struct radix_tree_preload *rtp = &radix_tree_preloads;
467 if (rtp->nr != 0)
468 printv(2, "split(%u %u) remaining %u\n", old_order, new_order,
469 rtp->nr);
470 /*
471 * Can't check for equality here as some nodes may have been
472 * RCU-freed while we ran. But we should never finish with more
473 * nodes allocated since they should have all been preloaded.
474 */
475 if (nr_allocated > alloc)
476 printv(2, "split(%u %u) allocated %u %u\n", old_order, new_order,
477 alloc, nr_allocated);
478}
479
480static void __multiorder_split(int old_order, int new_order)
481{
482 RADIX_TREE(tree, GFP_ATOMIC);
483 void **slot;
484 struct radix_tree_iter iter;
485 unsigned alloc;
486 struct item *item;
487
488 radix_tree_preload(GFP_KERNEL);
489 assert(item_insert_order(&tree, 0, old_order) == 0);
490 radix_tree_preload_end();
491
492 /* Wipe out the preloaded cache or it'll confuse check_mem() */
493 radix_tree_cpu_dead(0);
494
495 item = radix_tree_tag_set(&tree, 0, 2);
496
497 radix_tree_split_preload(old_order, new_order, GFP_KERNEL);
498 alloc = nr_allocated;
499 radix_tree_split(&tree, 0, new_order);
500 check_mem(old_order, new_order, alloc);
501 radix_tree_for_each_slot(slot, &tree, &iter, 0) {
502 radix_tree_iter_replace(&tree, &iter, slot,
503 item_create(iter.index, new_order));
504 }
505 radix_tree_preload_end();
506
507 item_kill_tree(&tree);
508 free(item);
509}
510
511static void __multiorder_split2(int old_order, int new_order)
512{
513 RADIX_TREE(tree, GFP_KERNEL);
514 void **slot;
515 struct radix_tree_iter iter;
516 struct radix_tree_node *node;
517 void *item;
518
519 __radix_tree_insert(&tree, 0, old_order, xa_mk_value(5));
520
521 item = __radix_tree_lookup(&tree, 0, &node, NULL);
522 assert(item == xa_mk_value(5));
523 assert(node->nr_values > 0);
524
525 radix_tree_split(&tree, 0, new_order);
526 radix_tree_for_each_slot(slot, &tree, &iter, 0) {
527 radix_tree_iter_replace(&tree, &iter, slot,
528 item_create(iter.index, new_order));
529 }
530
531 item = __radix_tree_lookup(&tree, 0, &node, NULL);
532 assert(item != xa_mk_value(5));
533 assert(node->nr_values == 0);
534
535 item_kill_tree(&tree);
536}
537
538static void __multiorder_split3(int old_order, int new_order)
539{
540 RADIX_TREE(tree, GFP_KERNEL);
541 void **slot;
542 struct radix_tree_iter iter;
543 struct radix_tree_node *node;
544 void *item;
545
546 __radix_tree_insert(&tree, 0, old_order, xa_mk_value(5));
547
548 item = __radix_tree_lookup(&tree, 0, &node, NULL);
549 assert(item == xa_mk_value(5));
550 assert(node->nr_values > 0);
551
552 radix_tree_split(&tree, 0, new_order);
553 radix_tree_for_each_slot(slot, &tree, &iter, 0) {
554 radix_tree_iter_replace(&tree, &iter, slot, xa_mk_value(7));
555 }
556
557 item = __radix_tree_lookup(&tree, 0, &node, NULL);
558 assert(item == xa_mk_value(7));
559 assert(node->nr_values > 0);
560
561 item_kill_tree(&tree);
562
563 __radix_tree_insert(&tree, 0, old_order, xa_mk_value(5));
564
565 item = __radix_tree_lookup(&tree, 0, &node, NULL);
566 assert(item == xa_mk_value(5));
567 assert(node->nr_values > 0);
568
569 radix_tree_split(&tree, 0, new_order);
570 radix_tree_for_each_slot(slot, &tree, &iter, 0) {
571 if (iter.index == (1 << new_order))
572 radix_tree_iter_replace(&tree, &iter, slot,
573 xa_mk_value(7));
574 else
575 radix_tree_iter_replace(&tree, &iter, slot, NULL);
576 }
577
578 item = __radix_tree_lookup(&tree, 1 << new_order, &node, NULL);
579 assert(item == xa_mk_value(7));
580 assert(node->count == node->nr_values);
581 do {
582 node = node->parent;
583 if (!node)
584 break;
585 assert(node->count == 1);
586 assert(node->nr_values == 0);
587 } while (1);
588
589 item_kill_tree(&tree);
590}
591
592static void multiorder_split(void)
593{
594 int i, j;
595
596 for (i = 3; i < 11; i++)
597 for (j = 0; j < i; j++) {
598 __multiorder_split(i, j);
599 __multiorder_split2(i, j);
600 __multiorder_split3(i, j);
601 }
602}
603
604static void multiorder_account(void) 359static void multiorder_account(void)
605{ 360{
606 RADIX_TREE(tree, GFP_KERNEL); 361 RADIX_TREE(tree, GFP_KERNEL);
@@ -702,8 +457,6 @@ void multiorder_checks(void)
702 multiorder_tag_tests(); 457 multiorder_tag_tests();
703 multiorder_iteration(); 458 multiorder_iteration();
704 multiorder_tagged_iteration(); 459 multiorder_tagged_iteration();
705 multiorder_join();
706 multiorder_split();
707 multiorder_account(); 460 multiorder_account();
708 multiorder_iteration_race(); 461 multiorder_iteration_race();
709 462