aboutsummaryrefslogtreecommitdiffstats
path: root/lib/radix-tree.c
diff options
context:
space:
mode:
authorHugh Dickins <hughd@google.com>2012-01-12 20:20:41 -0500
committerLinus Torvalds <torvalds@linux-foundation.org>2012-01-12 23:13:12 -0500
commite2bdb933ab8b7db71c318a4ddcf78a9fffd61ecb (patch)
tree23f39b8a9996135b893442c41d6da05c7988a0db /lib/radix-tree.c
parent928da837aca77a9d3cb5076bf07b3224b1ba293b (diff)
radix_tree: take radix_tree_path off stack
Down, down in the deepest depths of GFP_NOIO page reclaim, we have shrink_page_list() calling __remove_mapping() calling __delete_from_ swap_cache() or __delete_from_page_cache(). You would not expect those to need much stack, but in fact they call radix_tree_delete(): which declares a 192-byte radix_tree_path array on its stack (to record the node,offsets it visits when descending, in case it needs to ascend to update them). And if any tag is still set [1], that calls radix_tree_tag_clear(), which declares a further such 192-byte radix_tree_path array on the stack. (At least we have interrupts disabled here, so won't then be pushing registers too.) That was probably a good choice when most users were 32-bit (array of half the size), and adding fields to radix_tree_node would have bloated it unnecessarily. But nowadays many are 64-bit, and each radix_tree_node contains a struct rcu_head, which is only used when freeing; whereas the radix_tree_path info is only used for updating the tree (deleting, clearing tags or setting tags if tagged) when a lock must be held, of no interest when accessing the tree locklessly. So add a parent pointer to the radix_tree_node, in union with the rcu_head, and remove all uses of the radix_tree_path. There would be space in that union to save the offset when descending as before (we can argue that a lock must already be held to exclude other users), but recalculating it when ascending is both easy (a constant shift and a constant mask) and uncommon, so it seems better just to do that. Two little optimizations: no need to decrement height when descending, adjusting shift is enough; and once radix_tree_tag_if_tagged() has set tag on a node and its ancestors, it need not ascend from that node again. perf on the radix tree test harness reports radix_tree_insert() as 2% slower (now having to set parent), but radix_tree_delete() 24% faster. Surely that's an exaggeration from rtth's artificially low map shift 3, but forcing it back to 6 still rates radix_tree_delete() 8% faster. [1] Can a pagecache tag (dirty, writeback or towrite) actually still be set at the time of radix_tree_delete()? Perhaps not if the filesystem is well-behaved. But although I've not tracked any stack overflow down to this cause, I have observed a curious case in which a dirty tag is set and left set on tmpfs: page migration's migrate_page_copy() happens to use __set_page_dirty_nobuffers() to set PageDirty on the newpage, and that sets PAGECACHE_TAG_DIRTY as a side-effect - harmless to a filesystem which doesn't use tags, except for this stack depth issue. Signed-off-by: Hugh Dickins <hughd@google.com> Cc: Jan Kara <jack@suse.cz> Cc: Dave Chinner <david@fromorbit.com> Cc: Mel Gorman <mgorman@suse.de> Cc: Nai Xia <nai.xia@gmail.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'lib/radix-tree.c')
-rw-r--r--lib/radix-tree.c154
1 files changed, 76 insertions, 78 deletions
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index d9df7454519c..dc63d0818394 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -48,16 +48,14 @@
48struct radix_tree_node { 48struct radix_tree_node {
49 unsigned int height; /* Height from the bottom */ 49 unsigned int height; /* Height from the bottom */
50 unsigned int count; 50 unsigned int count;
51 struct rcu_head rcu_head; 51 union {
52 struct radix_tree_node *parent; /* Used when ascending tree */
53 struct rcu_head rcu_head; /* Used when freeing node */
54 };
52 void __rcu *slots[RADIX_TREE_MAP_SIZE]; 55 void __rcu *slots[RADIX_TREE_MAP_SIZE];
53 unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS]; 56 unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS];
54}; 57};
55 58
56struct radix_tree_path {
57 struct radix_tree_node *node;
58 int offset;
59};
60
61#define RADIX_TREE_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(unsigned long)) 59#define RADIX_TREE_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(unsigned long))
62#define RADIX_TREE_MAX_PATH (DIV_ROUND_UP(RADIX_TREE_INDEX_BITS, \ 60#define RADIX_TREE_MAX_PATH (DIV_ROUND_UP(RADIX_TREE_INDEX_BITS, \
63 RADIX_TREE_MAP_SHIFT)) 61 RADIX_TREE_MAP_SHIFT))
@@ -256,6 +254,7 @@ static inline unsigned long radix_tree_maxindex(unsigned int height)
256static int radix_tree_extend(struct radix_tree_root *root, unsigned long index) 254static int radix_tree_extend(struct radix_tree_root *root, unsigned long index)
257{ 255{
258 struct radix_tree_node *node; 256 struct radix_tree_node *node;
257 struct radix_tree_node *slot;
259 unsigned int height; 258 unsigned int height;
260 int tag; 259 int tag;
261 260
@@ -274,18 +273,23 @@ static int radix_tree_extend(struct radix_tree_root *root, unsigned long index)
274 if (!(node = radix_tree_node_alloc(root))) 273 if (!(node = radix_tree_node_alloc(root)))
275 return -ENOMEM; 274 return -ENOMEM;
276 275
277 /* Increase the height. */
278 node->slots[0] = indirect_to_ptr(root->rnode);
279
280 /* Propagate the aggregated tag info into the new root */ 276 /* Propagate the aggregated tag info into the new root */
281 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { 277 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
282 if (root_tag_get(root, tag)) 278 if (root_tag_get(root, tag))
283 tag_set(node, tag, 0); 279 tag_set(node, tag, 0);
284 } 280 }
285 281
282 /* Increase the height. */
286 newheight = root->height+1; 283 newheight = root->height+1;
287 node->height = newheight; 284 node->height = newheight;
288 node->count = 1; 285 node->count = 1;
286 node->parent = NULL;
287 slot = root->rnode;
288 if (newheight > 1) {
289 slot = indirect_to_ptr(slot);
290 slot->parent = node;
291 }
292 node->slots[0] = slot;
289 node = ptr_to_indirect(node); 293 node = ptr_to_indirect(node);
290 rcu_assign_pointer(root->rnode, node); 294 rcu_assign_pointer(root->rnode, node);
291 root->height = newheight; 295 root->height = newheight;
@@ -331,6 +335,7 @@ int radix_tree_insert(struct radix_tree_root *root,
331 if (!(slot = radix_tree_node_alloc(root))) 335 if (!(slot = radix_tree_node_alloc(root)))
332 return -ENOMEM; 336 return -ENOMEM;
333 slot->height = height; 337 slot->height = height;
338 slot->parent = node;
334 if (node) { 339 if (node) {
335 rcu_assign_pointer(node->slots[offset], slot); 340 rcu_assign_pointer(node->slots[offset], slot);
336 node->count++; 341 node->count++;
@@ -504,47 +509,41 @@ EXPORT_SYMBOL(radix_tree_tag_set);
504void *radix_tree_tag_clear(struct radix_tree_root *root, 509void *radix_tree_tag_clear(struct radix_tree_root *root,
505 unsigned long index, unsigned int tag) 510 unsigned long index, unsigned int tag)
506{ 511{
507 /* 512 struct radix_tree_node *node = NULL;
508 * The radix tree path needs to be one longer than the maximum path
509 * since the "list" is null terminated.
510 */
511 struct radix_tree_path path[RADIX_TREE_MAX_PATH + 1], *pathp = path;
512 struct radix_tree_node *slot = NULL; 513 struct radix_tree_node *slot = NULL;
513 unsigned int height, shift; 514 unsigned int height, shift;
515 int uninitialized_var(offset);
514 516
515 height = root->height; 517 height = root->height;
516 if (index > radix_tree_maxindex(height)) 518 if (index > radix_tree_maxindex(height))
517 goto out; 519 goto out;
518 520
519 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 521 shift = height * RADIX_TREE_MAP_SHIFT;
520 pathp->node = NULL;
521 slot = indirect_to_ptr(root->rnode); 522 slot = indirect_to_ptr(root->rnode);
522 523
523 while (height > 0) { 524 while (shift) {
524 int offset;
525
526 if (slot == NULL) 525 if (slot == NULL)
527 goto out; 526 goto out;
528 527
528 shift -= RADIX_TREE_MAP_SHIFT;
529 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 529 offset = (index >> shift) & RADIX_TREE_MAP_MASK;
530 pathp[1].offset = offset; 530 node = slot;
531 pathp[1].node = slot;
532 slot = slot->slots[offset]; 531 slot = slot->slots[offset];
533 pathp++;
534 shift -= RADIX_TREE_MAP_SHIFT;
535 height--;
536 } 532 }
537 533
538 if (slot == NULL) 534 if (slot == NULL)
539 goto out; 535 goto out;
540 536
541 while (pathp->node) { 537 while (node) {
542 if (!tag_get(pathp->node, tag, pathp->offset)) 538 if (!tag_get(node, tag, offset))
543 goto out; 539 goto out;
544 tag_clear(pathp->node, tag, pathp->offset); 540 tag_clear(node, tag, offset);
545 if (any_tag_set(pathp->node, tag)) 541 if (any_tag_set(node, tag))
546 goto out; 542 goto out;
547 pathp--; 543
544 index >>= RADIX_TREE_MAP_SHIFT;
545 offset = index & RADIX_TREE_MAP_MASK;
546 node = node->parent;
548 } 547 }
549 548
550 /* clear the root's tag bit */ 549 /* clear the root's tag bit */
@@ -646,8 +645,7 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root,
646 unsigned int iftag, unsigned int settag) 645 unsigned int iftag, unsigned int settag)
647{ 646{
648 unsigned int height = root->height; 647 unsigned int height = root->height;
649 struct radix_tree_path path[height]; 648 struct radix_tree_node *node = NULL;
650 struct radix_tree_path *pathp = path;
651 struct radix_tree_node *slot; 649 struct radix_tree_node *slot;
652 unsigned int shift; 650 unsigned int shift;
653 unsigned long tagged = 0; 651 unsigned long tagged = 0;
@@ -671,14 +669,8 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root,
671 shift = (height - 1) * RADIX_TREE_MAP_SHIFT; 669 shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
672 slot = indirect_to_ptr(root->rnode); 670 slot = indirect_to_ptr(root->rnode);
673 671
674 /*
675 * we fill the path from (root->height - 2) to 0, leaving the index at
676 * (root->height - 1) as a terminator. Zero the node in the terminator
677 * so that we can use this to end walk loops back up the path.
678 */
679 path[height - 1].node = NULL;
680
681 for (;;) { 672 for (;;) {
673 unsigned long upindex;
682 int offset; 674 int offset;
683 675
684 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 676 offset = (index >> shift) & RADIX_TREE_MAP_MASK;
@@ -686,12 +678,10 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root,
686 goto next; 678 goto next;
687 if (!tag_get(slot, iftag, offset)) 679 if (!tag_get(slot, iftag, offset))
688 goto next; 680 goto next;
689 if (height > 1) { 681 if (shift) {
690 /* Go down one level */ 682 /* Go down one level */
691 height--;
692 shift -= RADIX_TREE_MAP_SHIFT; 683 shift -= RADIX_TREE_MAP_SHIFT;
693 path[height - 1].node = slot; 684 node = slot;
694 path[height - 1].offset = offset;
695 slot = slot->slots[offset]; 685 slot = slot->slots[offset];
696 continue; 686 continue;
697 } 687 }
@@ -701,15 +691,27 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root,
701 tag_set(slot, settag, offset); 691 tag_set(slot, settag, offset);
702 692
703 /* walk back up the path tagging interior nodes */ 693 /* walk back up the path tagging interior nodes */
704 pathp = &path[0]; 694 upindex = index;
705 while (pathp->node) { 695 while (node) {
696 upindex >>= RADIX_TREE_MAP_SHIFT;
697 offset = upindex & RADIX_TREE_MAP_MASK;
698
706 /* stop if we find a node with the tag already set */ 699 /* stop if we find a node with the tag already set */
707 if (tag_get(pathp->node, settag, pathp->offset)) 700 if (tag_get(node, settag, offset))
708 break; 701 break;
709 tag_set(pathp->node, settag, pathp->offset); 702 tag_set(node, settag, offset);
710 pathp++; 703 node = node->parent;
711 } 704 }
712 705
706 /*
707 * Small optimization: now clear that node pointer.
708 * Since all of this slot's ancestors now have the tag set
709 * from setting it above, we have no further need to walk
710 * back up the tree setting tags, until we update slot to
711 * point to another radix_tree_node.
712 */
713 node = NULL;
714
713next: 715next:
714 /* Go to next item at level determined by 'shift' */ 716 /* Go to next item at level determined by 'shift' */
715 index = ((index >> shift) + 1) << shift; 717 index = ((index >> shift) + 1) << shift;
@@ -724,8 +726,7 @@ next:
724 * last_index is guaranteed to be in the tree, what 726 * last_index is guaranteed to be in the tree, what
725 * we do below cannot wander astray. 727 * we do below cannot wander astray.
726 */ 728 */
727 slot = path[height - 1].node; 729 slot = slot->parent;
728 height++;
729 shift += RADIX_TREE_MAP_SHIFT; 730 shift += RADIX_TREE_MAP_SHIFT;
730 } 731 }
731 } 732 }
@@ -1299,7 +1300,7 @@ static inline void radix_tree_shrink(struct radix_tree_root *root)
1299 /* try to shrink tree height */ 1300 /* try to shrink tree height */
1300 while (root->height > 0) { 1301 while (root->height > 0) {
1301 struct radix_tree_node *to_free = root->rnode; 1302 struct radix_tree_node *to_free = root->rnode;
1302 void *newptr; 1303 struct radix_tree_node *slot;
1303 1304
1304 BUG_ON(!radix_tree_is_indirect_ptr(to_free)); 1305 BUG_ON(!radix_tree_is_indirect_ptr(to_free));
1305 to_free = indirect_to_ptr(to_free); 1306 to_free = indirect_to_ptr(to_free);
@@ -1320,10 +1321,12 @@ static inline void radix_tree_shrink(struct radix_tree_root *root)
1320 * (to_free->slots[0]), it will be safe to dereference the new 1321 * (to_free->slots[0]), it will be safe to dereference the new
1321 * one (root->rnode) as far as dependent read barriers go. 1322 * one (root->rnode) as far as dependent read barriers go.
1322 */ 1323 */
1323 newptr = to_free->slots[0]; 1324 slot = to_free->slots[0];
1324 if (root->height > 1) 1325 if (root->height > 1) {
1325 newptr = ptr_to_indirect(newptr); 1326 slot->parent = NULL;
1326 root->rnode = newptr; 1327 slot = ptr_to_indirect(slot);
1328 }
1329 root->rnode = slot;
1327 root->height--; 1330 root->height--;
1328 1331
1329 /* 1332 /*
@@ -1363,16 +1366,12 @@ static inline void radix_tree_shrink(struct radix_tree_root *root)
1363 */ 1366 */
1364void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) 1367void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
1365{ 1368{
1366 /* 1369 struct radix_tree_node *node = NULL;
1367 * The radix tree path needs to be one longer than the maximum path
1368 * since the "list" is null terminated.
1369 */
1370 struct radix_tree_path path[RADIX_TREE_MAX_PATH + 1], *pathp = path;
1371 struct radix_tree_node *slot = NULL; 1370 struct radix_tree_node *slot = NULL;
1372 struct radix_tree_node *to_free; 1371 struct radix_tree_node *to_free;
1373 unsigned int height, shift; 1372 unsigned int height, shift;
1374 int tag; 1373 int tag;
1375 int offset; 1374 int uninitialized_var(offset);
1376 1375
1377 height = root->height; 1376 height = root->height;
1378 if (index > radix_tree_maxindex(height)) 1377 if (index > radix_tree_maxindex(height))
@@ -1385,39 +1384,35 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
1385 goto out; 1384 goto out;
1386 } 1385 }
1387 slot = indirect_to_ptr(slot); 1386 slot = indirect_to_ptr(slot);
1388 1387 shift = height * RADIX_TREE_MAP_SHIFT;
1389 shift = (height - 1) * RADIX_TREE_MAP_SHIFT;
1390 pathp->node = NULL;
1391 1388
1392 do { 1389 do {
1393 if (slot == NULL) 1390 if (slot == NULL)
1394 goto out; 1391 goto out;
1395 1392
1396 pathp++; 1393 shift -= RADIX_TREE_MAP_SHIFT;
1397 offset = (index >> shift) & RADIX_TREE_MAP_MASK; 1394 offset = (index >> shift) & RADIX_TREE_MAP_MASK;
1398 pathp->offset = offset; 1395 node = slot;
1399 pathp->node = slot;
1400 slot = slot->slots[offset]; 1396 slot = slot->slots[offset];
1401 shift -= RADIX_TREE_MAP_SHIFT; 1397 } while (shift);
1402 height--;
1403 } while (height > 0);
1404 1398
1405 if (slot == NULL) 1399 if (slot == NULL)
1406 goto out; 1400 goto out;
1407 1401
1408 /* 1402 /*
1409 * Clear all tags associated with the just-deleted item 1403 * Clear all tags associated with the item to be deleted.
1404 * This way of doing it would be inefficient, but seldom is any set.
1410 */ 1405 */
1411 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { 1406 for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) {
1412 if (tag_get(pathp->node, tag, pathp->offset)) 1407 if (tag_get(node, tag, offset))
1413 radix_tree_tag_clear(root, index, tag); 1408 radix_tree_tag_clear(root, index, tag);
1414 } 1409 }
1415 1410
1416 to_free = NULL; 1411 to_free = NULL;
1417 /* Now free the nodes we do not need anymore */ 1412 /* Now free the nodes we do not need anymore */
1418 while (pathp->node) { 1413 while (node) {
1419 pathp->node->slots[pathp->offset] = NULL; 1414 node->slots[offset] = NULL;
1420 pathp->node->count--; 1415 node->count--;
1421 /* 1416 /*
1422 * Queue the node for deferred freeing after the 1417 * Queue the node for deferred freeing after the
1423 * last reference to it disappears (set NULL, above). 1418 * last reference to it disappears (set NULL, above).
@@ -1425,17 +1420,20 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index)
1425 if (to_free) 1420 if (to_free)
1426 radix_tree_node_free(to_free); 1421 radix_tree_node_free(to_free);
1427 1422
1428 if (pathp->node->count) { 1423 if (node->count) {
1429 if (pathp->node == indirect_to_ptr(root->rnode)) 1424 if (node == indirect_to_ptr(root->rnode))
1430 radix_tree_shrink(root); 1425 radix_tree_shrink(root);
1431 goto out; 1426 goto out;
1432 } 1427 }
1433 1428
1434 /* Node with zero slots in use so free it */ 1429 /* Node with zero slots in use so free it */
1435 to_free = pathp->node; 1430 to_free = node;
1436 pathp--;
1437 1431
1432 index >>= RADIX_TREE_MAP_SHIFT;
1433 offset = index & RADIX_TREE_MAP_MASK;
1434 node = node->parent;
1438 } 1435 }
1436
1439 root_tag_clear_all(root); 1437 root_tag_clear_all(root);
1440 root->height = 0; 1438 root->height = 0;
1441 root->rnode = NULL; 1439 root->rnode = NULL;