diff options
author | Hugh Dickins <hughd@google.com> | 2012-01-12 20:20:41 -0500 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-01-12 23:13:12 -0500 |
commit | e2bdb933ab8b7db71c318a4ddcf78a9fffd61ecb (patch) | |
tree | 23f39b8a9996135b893442c41d6da05c7988a0db /lib | |
parent | 928da837aca77a9d3cb5076bf07b3224b1ba293b (diff) |
radix_tree: take radix_tree_path off stack
Down, down in the deepest depths of GFP_NOIO page reclaim, we have
shrink_page_list() calling __remove_mapping() calling __delete_from_
swap_cache() or __delete_from_page_cache().
You would not expect those to need much stack, but in fact they call
radix_tree_delete(): which declares a 192-byte radix_tree_path array on
its stack (to record the node,offsets it visits when descending, in case
it needs to ascend to update them). And if any tag is still set [1],
that calls radix_tree_tag_clear(), which declares a further such
192-byte radix_tree_path array on the stack. (At least we have
interrupts disabled here, so won't then be pushing registers too.)
That was probably a good choice when most users were 32-bit (array of
half the size), and adding fields to radix_tree_node would have bloated
it unnecessarily. But nowadays many are 64-bit, and each
radix_tree_node contains a struct rcu_head, which is only used when
freeing; whereas the radix_tree_path info is only used for updating the
tree (deleting, clearing tags or setting tags if tagged) when a lock
must be held, of no interest when accessing the tree locklessly.
So add a parent pointer to the radix_tree_node, in union with the
rcu_head, and remove all uses of the radix_tree_path. There would be
space in that union to save the offset when descending as before (we can
argue that a lock must already be held to exclude other users), but
recalculating it when ascending is both easy (a constant shift and a
constant mask) and uncommon, so it seems better just to do that.
Two little optimizations: no need to decrement height when descending,
adjusting shift is enough; and once radix_tree_tag_if_tagged() has set
tag on a node and its ancestors, it need not ascend from that node
again.
perf on the radix tree test harness reports radix_tree_insert() as 2%
slower (now having to set parent), but radix_tree_delete() 24% faster.
Surely that's an exaggeration from rtth's artificially low map shift 3,
but forcing it back to 6 still rates radix_tree_delete() 8% faster.
[1] Can a pagecache tag (dirty, writeback or towrite) actually still be
set at the time of radix_tree_delete()? Perhaps not if the filesystem is
well-behaved. But although I've not tracked any stack overflow down to
this cause, I have observed a curious case in which a dirty tag is set
and left set on tmpfs: page migration's migrate_page_copy() happens to
use __set_page_dirty_nobuffers() to set PageDirty on the newpage, and
that sets PAGECACHE_TAG_DIRTY as a side-effect - harmless to a
filesystem which doesn't use tags, except for this stack depth issue.
Signed-off-by: Hugh Dickins <hughd@google.com>
Cc: Jan Kara <jack@suse.cz>
Cc: Dave Chinner <david@fromorbit.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Nai Xia <nai.xia@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Diffstat (limited to 'lib')
-rw-r--r-- | lib/radix-tree.c | 154 |
1 files changed, 76 insertions, 78 deletions
diff --git a/lib/radix-tree.c b/lib/radix-tree.c index d9df7454519c..dc63d0818394 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c | |||
@@ -48,16 +48,14 @@ | |||
48 | struct radix_tree_node { | 48 | struct radix_tree_node { |
49 | unsigned int height; /* Height from the bottom */ | 49 | unsigned int height; /* Height from the bottom */ |
50 | unsigned int count; | 50 | unsigned int count; |
51 | struct rcu_head rcu_head; | 51 | union { |
52 | struct radix_tree_node *parent; /* Used when ascending tree */ | ||
53 | struct rcu_head rcu_head; /* Used when freeing node */ | ||
54 | }; | ||
52 | void __rcu *slots[RADIX_TREE_MAP_SIZE]; | 55 | void __rcu *slots[RADIX_TREE_MAP_SIZE]; |
53 | unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS]; | 56 | unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS]; |
54 | }; | 57 | }; |
55 | 58 | ||
56 | struct radix_tree_path { | ||
57 | struct radix_tree_node *node; | ||
58 | int offset; | ||
59 | }; | ||
60 | |||
61 | #define RADIX_TREE_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(unsigned long)) | 59 | #define RADIX_TREE_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(unsigned long)) |
62 | #define RADIX_TREE_MAX_PATH (DIV_ROUND_UP(RADIX_TREE_INDEX_BITS, \ | 60 | #define RADIX_TREE_MAX_PATH (DIV_ROUND_UP(RADIX_TREE_INDEX_BITS, \ |
63 | RADIX_TREE_MAP_SHIFT)) | 61 | RADIX_TREE_MAP_SHIFT)) |
@@ -256,6 +254,7 @@ static inline unsigned long radix_tree_maxindex(unsigned int height) | |||
256 | static int radix_tree_extend(struct radix_tree_root *root, unsigned long index) | 254 | static int radix_tree_extend(struct radix_tree_root *root, unsigned long index) |
257 | { | 255 | { |
258 | struct radix_tree_node *node; | 256 | struct radix_tree_node *node; |
257 | struct radix_tree_node *slot; | ||
259 | unsigned int height; | 258 | unsigned int height; |
260 | int tag; | 259 | int tag; |
261 | 260 | ||
@@ -274,18 +273,23 @@ static int radix_tree_extend(struct radix_tree_root *root, unsigned long index) | |||
274 | if (!(node = radix_tree_node_alloc(root))) | 273 | if (!(node = radix_tree_node_alloc(root))) |
275 | return -ENOMEM; | 274 | return -ENOMEM; |
276 | 275 | ||
277 | /* Increase the height. */ | ||
278 | node->slots[0] = indirect_to_ptr(root->rnode); | ||
279 | |||
280 | /* Propagate the aggregated tag info into the new root */ | 276 | /* Propagate the aggregated tag info into the new root */ |
281 | for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { | 277 | for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { |
282 | if (root_tag_get(root, tag)) | 278 | if (root_tag_get(root, tag)) |
283 | tag_set(node, tag, 0); | 279 | tag_set(node, tag, 0); |
284 | } | 280 | } |
285 | 281 | ||
282 | /* Increase the height. */ | ||
286 | newheight = root->height+1; | 283 | newheight = root->height+1; |
287 | node->height = newheight; | 284 | node->height = newheight; |
288 | node->count = 1; | 285 | node->count = 1; |
286 | node->parent = NULL; | ||
287 | slot = root->rnode; | ||
288 | if (newheight > 1) { | ||
289 | slot = indirect_to_ptr(slot); | ||
290 | slot->parent = node; | ||
291 | } | ||
292 | node->slots[0] = slot; | ||
289 | node = ptr_to_indirect(node); | 293 | node = ptr_to_indirect(node); |
290 | rcu_assign_pointer(root->rnode, node); | 294 | rcu_assign_pointer(root->rnode, node); |
291 | root->height = newheight; | 295 | root->height = newheight; |
@@ -331,6 +335,7 @@ int radix_tree_insert(struct radix_tree_root *root, | |||
331 | if (!(slot = radix_tree_node_alloc(root))) | 335 | if (!(slot = radix_tree_node_alloc(root))) |
332 | return -ENOMEM; | 336 | return -ENOMEM; |
333 | slot->height = height; | 337 | slot->height = height; |
338 | slot->parent = node; | ||
334 | if (node) { | 339 | if (node) { |
335 | rcu_assign_pointer(node->slots[offset], slot); | 340 | rcu_assign_pointer(node->slots[offset], slot); |
336 | node->count++; | 341 | node->count++; |
@@ -504,47 +509,41 @@ EXPORT_SYMBOL(radix_tree_tag_set); | |||
504 | void *radix_tree_tag_clear(struct radix_tree_root *root, | 509 | void *radix_tree_tag_clear(struct radix_tree_root *root, |
505 | unsigned long index, unsigned int tag) | 510 | unsigned long index, unsigned int tag) |
506 | { | 511 | { |
507 | /* | 512 | struct radix_tree_node *node = NULL; |
508 | * The radix tree path needs to be one longer than the maximum path | ||
509 | * since the "list" is null terminated. | ||
510 | */ | ||
511 | struct radix_tree_path path[RADIX_TREE_MAX_PATH + 1], *pathp = path; | ||
512 | struct radix_tree_node *slot = NULL; | 513 | struct radix_tree_node *slot = NULL; |
513 | unsigned int height, shift; | 514 | unsigned int height, shift; |
515 | int uninitialized_var(offset); | ||
514 | 516 | ||
515 | height = root->height; | 517 | height = root->height; |
516 | if (index > radix_tree_maxindex(height)) | 518 | if (index > radix_tree_maxindex(height)) |
517 | goto out; | 519 | goto out; |
518 | 520 | ||
519 | shift = (height - 1) * RADIX_TREE_MAP_SHIFT; | 521 | shift = height * RADIX_TREE_MAP_SHIFT; |
520 | pathp->node = NULL; | ||
521 | slot = indirect_to_ptr(root->rnode); | 522 | slot = indirect_to_ptr(root->rnode); |
522 | 523 | ||
523 | while (height > 0) { | 524 | while (shift) { |
524 | int offset; | ||
525 | |||
526 | if (slot == NULL) | 525 | if (slot == NULL) |
527 | goto out; | 526 | goto out; |
528 | 527 | ||
528 | shift -= RADIX_TREE_MAP_SHIFT; | ||
529 | offset = (index >> shift) & RADIX_TREE_MAP_MASK; | 529 | offset = (index >> shift) & RADIX_TREE_MAP_MASK; |
530 | pathp[1].offset = offset; | 530 | node = slot; |
531 | pathp[1].node = slot; | ||
532 | slot = slot->slots[offset]; | 531 | slot = slot->slots[offset]; |
533 | pathp++; | ||
534 | shift -= RADIX_TREE_MAP_SHIFT; | ||
535 | height--; | ||
536 | } | 532 | } |
537 | 533 | ||
538 | if (slot == NULL) | 534 | if (slot == NULL) |
539 | goto out; | 535 | goto out; |
540 | 536 | ||
541 | while (pathp->node) { | 537 | while (node) { |
542 | if (!tag_get(pathp->node, tag, pathp->offset)) | 538 | if (!tag_get(node, tag, offset)) |
543 | goto out; | 539 | goto out; |
544 | tag_clear(pathp->node, tag, pathp->offset); | 540 | tag_clear(node, tag, offset); |
545 | if (any_tag_set(pathp->node, tag)) | 541 | if (any_tag_set(node, tag)) |
546 | goto out; | 542 | goto out; |
547 | pathp--; | 543 | |
544 | index >>= RADIX_TREE_MAP_SHIFT; | ||
545 | offset = index & RADIX_TREE_MAP_MASK; | ||
546 | node = node->parent; | ||
548 | } | 547 | } |
549 | 548 | ||
550 | /* clear the root's tag bit */ | 549 | /* clear the root's tag bit */ |
@@ -646,8 +645,7 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root, | |||
646 | unsigned int iftag, unsigned int settag) | 645 | unsigned int iftag, unsigned int settag) |
647 | { | 646 | { |
648 | unsigned int height = root->height; | 647 | unsigned int height = root->height; |
649 | struct radix_tree_path path[height]; | 648 | struct radix_tree_node *node = NULL; |
650 | struct radix_tree_path *pathp = path; | ||
651 | struct radix_tree_node *slot; | 649 | struct radix_tree_node *slot; |
652 | unsigned int shift; | 650 | unsigned int shift; |
653 | unsigned long tagged = 0; | 651 | unsigned long tagged = 0; |
@@ -671,14 +669,8 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root, | |||
671 | shift = (height - 1) * RADIX_TREE_MAP_SHIFT; | 669 | shift = (height - 1) * RADIX_TREE_MAP_SHIFT; |
672 | slot = indirect_to_ptr(root->rnode); | 670 | slot = indirect_to_ptr(root->rnode); |
673 | 671 | ||
674 | /* | ||
675 | * we fill the path from (root->height - 2) to 0, leaving the index at | ||
676 | * (root->height - 1) as a terminator. Zero the node in the terminator | ||
677 | * so that we can use this to end walk loops back up the path. | ||
678 | */ | ||
679 | path[height - 1].node = NULL; | ||
680 | |||
681 | for (;;) { | 672 | for (;;) { |
673 | unsigned long upindex; | ||
682 | int offset; | 674 | int offset; |
683 | 675 | ||
684 | offset = (index >> shift) & RADIX_TREE_MAP_MASK; | 676 | offset = (index >> shift) & RADIX_TREE_MAP_MASK; |
@@ -686,12 +678,10 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root, | |||
686 | goto next; | 678 | goto next; |
687 | if (!tag_get(slot, iftag, offset)) | 679 | if (!tag_get(slot, iftag, offset)) |
688 | goto next; | 680 | goto next; |
689 | if (height > 1) { | 681 | if (shift) { |
690 | /* Go down one level */ | 682 | /* Go down one level */ |
691 | height--; | ||
692 | shift -= RADIX_TREE_MAP_SHIFT; | 683 | shift -= RADIX_TREE_MAP_SHIFT; |
693 | path[height - 1].node = slot; | 684 | node = slot; |
694 | path[height - 1].offset = offset; | ||
695 | slot = slot->slots[offset]; | 685 | slot = slot->slots[offset]; |
696 | continue; | 686 | continue; |
697 | } | 687 | } |
@@ -701,15 +691,27 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root, | |||
701 | tag_set(slot, settag, offset); | 691 | tag_set(slot, settag, offset); |
702 | 692 | ||
703 | /* walk back up the path tagging interior nodes */ | 693 | /* walk back up the path tagging interior nodes */ |
704 | pathp = &path[0]; | 694 | upindex = index; |
705 | while (pathp->node) { | 695 | while (node) { |
696 | upindex >>= RADIX_TREE_MAP_SHIFT; | ||
697 | offset = upindex & RADIX_TREE_MAP_MASK; | ||
698 | |||
706 | /* stop if we find a node with the tag already set */ | 699 | /* stop if we find a node with the tag already set */ |
707 | if (tag_get(pathp->node, settag, pathp->offset)) | 700 | if (tag_get(node, settag, offset)) |
708 | break; | 701 | break; |
709 | tag_set(pathp->node, settag, pathp->offset); | 702 | tag_set(node, settag, offset); |
710 | pathp++; | 703 | node = node->parent; |
711 | } | 704 | } |
712 | 705 | ||
706 | /* | ||
707 | * Small optimization: now clear that node pointer. | ||
708 | * Since all of this slot's ancestors now have the tag set | ||
709 | * from setting it above, we have no further need to walk | ||
710 | * back up the tree setting tags, until we update slot to | ||
711 | * point to another radix_tree_node. | ||
712 | */ | ||
713 | node = NULL; | ||
714 | |||
713 | next: | 715 | next: |
714 | /* Go to next item at level determined by 'shift' */ | 716 | /* Go to next item at level determined by 'shift' */ |
715 | index = ((index >> shift) + 1) << shift; | 717 | index = ((index >> shift) + 1) << shift; |
@@ -724,8 +726,7 @@ next: | |||
724 | * last_index is guaranteed to be in the tree, what | 726 | * last_index is guaranteed to be in the tree, what |
725 | * we do below cannot wander astray. | 727 | * we do below cannot wander astray. |
726 | */ | 728 | */ |
727 | slot = path[height - 1].node; | 729 | slot = slot->parent; |
728 | height++; | ||
729 | shift += RADIX_TREE_MAP_SHIFT; | 730 | shift += RADIX_TREE_MAP_SHIFT; |
730 | } | 731 | } |
731 | } | 732 | } |
@@ -1299,7 +1300,7 @@ static inline void radix_tree_shrink(struct radix_tree_root *root) | |||
1299 | /* try to shrink tree height */ | 1300 | /* try to shrink tree height */ |
1300 | while (root->height > 0) { | 1301 | while (root->height > 0) { |
1301 | struct radix_tree_node *to_free = root->rnode; | 1302 | struct radix_tree_node *to_free = root->rnode; |
1302 | void *newptr; | 1303 | struct radix_tree_node *slot; |
1303 | 1304 | ||
1304 | BUG_ON(!radix_tree_is_indirect_ptr(to_free)); | 1305 | BUG_ON(!radix_tree_is_indirect_ptr(to_free)); |
1305 | to_free = indirect_to_ptr(to_free); | 1306 | to_free = indirect_to_ptr(to_free); |
@@ -1320,10 +1321,12 @@ static inline void radix_tree_shrink(struct radix_tree_root *root) | |||
1320 | * (to_free->slots[0]), it will be safe to dereference the new | 1321 | * (to_free->slots[0]), it will be safe to dereference the new |
1321 | * one (root->rnode) as far as dependent read barriers go. | 1322 | * one (root->rnode) as far as dependent read barriers go. |
1322 | */ | 1323 | */ |
1323 | newptr = to_free->slots[0]; | 1324 | slot = to_free->slots[0]; |
1324 | if (root->height > 1) | 1325 | if (root->height > 1) { |
1325 | newptr = ptr_to_indirect(newptr); | 1326 | slot->parent = NULL; |
1326 | root->rnode = newptr; | 1327 | slot = ptr_to_indirect(slot); |
1328 | } | ||
1329 | root->rnode = slot; | ||
1327 | root->height--; | 1330 | root->height--; |
1328 | 1331 | ||
1329 | /* | 1332 | /* |
@@ -1363,16 +1366,12 @@ static inline void radix_tree_shrink(struct radix_tree_root *root) | |||
1363 | */ | 1366 | */ |
1364 | void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) | 1367 | void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) |
1365 | { | 1368 | { |
1366 | /* | 1369 | struct radix_tree_node *node = NULL; |
1367 | * The radix tree path needs to be one longer than the maximum path | ||
1368 | * since the "list" is null terminated. | ||
1369 | */ | ||
1370 | struct radix_tree_path path[RADIX_TREE_MAX_PATH + 1], *pathp = path; | ||
1371 | struct radix_tree_node *slot = NULL; | 1370 | struct radix_tree_node *slot = NULL; |
1372 | struct radix_tree_node *to_free; | 1371 | struct radix_tree_node *to_free; |
1373 | unsigned int height, shift; | 1372 | unsigned int height, shift; |
1374 | int tag; | 1373 | int tag; |
1375 | int offset; | 1374 | int uninitialized_var(offset); |
1376 | 1375 | ||
1377 | height = root->height; | 1376 | height = root->height; |
1378 | if (index > radix_tree_maxindex(height)) | 1377 | if (index > radix_tree_maxindex(height)) |
@@ -1385,39 +1384,35 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) | |||
1385 | goto out; | 1384 | goto out; |
1386 | } | 1385 | } |
1387 | slot = indirect_to_ptr(slot); | 1386 | slot = indirect_to_ptr(slot); |
1388 | 1387 | shift = height * RADIX_TREE_MAP_SHIFT; | |
1389 | shift = (height - 1) * RADIX_TREE_MAP_SHIFT; | ||
1390 | pathp->node = NULL; | ||
1391 | 1388 | ||
1392 | do { | 1389 | do { |
1393 | if (slot == NULL) | 1390 | if (slot == NULL) |
1394 | goto out; | 1391 | goto out; |
1395 | 1392 | ||
1396 | pathp++; | 1393 | shift -= RADIX_TREE_MAP_SHIFT; |
1397 | offset = (index >> shift) & RADIX_TREE_MAP_MASK; | 1394 | offset = (index >> shift) & RADIX_TREE_MAP_MASK; |
1398 | pathp->offset = offset; | 1395 | node = slot; |
1399 | pathp->node = slot; | ||
1400 | slot = slot->slots[offset]; | 1396 | slot = slot->slots[offset]; |
1401 | shift -= RADIX_TREE_MAP_SHIFT; | 1397 | } while (shift); |
1402 | height--; | ||
1403 | } while (height > 0); | ||
1404 | 1398 | ||
1405 | if (slot == NULL) | 1399 | if (slot == NULL) |
1406 | goto out; | 1400 | goto out; |
1407 | 1401 | ||
1408 | /* | 1402 | /* |
1409 | * Clear all tags associated with the just-deleted item | 1403 | * Clear all tags associated with the item to be deleted. |
1404 | * This way of doing it would be inefficient, but seldom is any set. | ||
1410 | */ | 1405 | */ |
1411 | for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { | 1406 | for (tag = 0; tag < RADIX_TREE_MAX_TAGS; tag++) { |
1412 | if (tag_get(pathp->node, tag, pathp->offset)) | 1407 | if (tag_get(node, tag, offset)) |
1413 | radix_tree_tag_clear(root, index, tag); | 1408 | radix_tree_tag_clear(root, index, tag); |
1414 | } | 1409 | } |
1415 | 1410 | ||
1416 | to_free = NULL; | 1411 | to_free = NULL; |
1417 | /* Now free the nodes we do not need anymore */ | 1412 | /* Now free the nodes we do not need anymore */ |
1418 | while (pathp->node) { | 1413 | while (node) { |
1419 | pathp->node->slots[pathp->offset] = NULL; | 1414 | node->slots[offset] = NULL; |
1420 | pathp->node->count--; | 1415 | node->count--; |
1421 | /* | 1416 | /* |
1422 | * Queue the node for deferred freeing after the | 1417 | * Queue the node for deferred freeing after the |
1423 | * last reference to it disappears (set NULL, above). | 1418 | * last reference to it disappears (set NULL, above). |
@@ -1425,17 +1420,20 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) | |||
1425 | if (to_free) | 1420 | if (to_free) |
1426 | radix_tree_node_free(to_free); | 1421 | radix_tree_node_free(to_free); |
1427 | 1422 | ||
1428 | if (pathp->node->count) { | 1423 | if (node->count) { |
1429 | if (pathp->node == indirect_to_ptr(root->rnode)) | 1424 | if (node == indirect_to_ptr(root->rnode)) |
1430 | radix_tree_shrink(root); | 1425 | radix_tree_shrink(root); |
1431 | goto out; | 1426 | goto out; |
1432 | } | 1427 | } |
1433 | 1428 | ||
1434 | /* Node with zero slots in use so free it */ | 1429 | /* Node with zero slots in use so free it */ |
1435 | to_free = pathp->node; | 1430 | to_free = node; |
1436 | pathp--; | ||
1437 | 1431 | ||
1432 | index >>= RADIX_TREE_MAP_SHIFT; | ||
1433 | offset = index & RADIX_TREE_MAP_MASK; | ||
1434 | node = node->parent; | ||
1438 | } | 1435 | } |
1436 | |||
1439 | root_tag_clear_all(root); | 1437 | root_tag_clear_all(root); |
1440 | root->height = 0; | 1438 | root->height = 0; |
1441 | root->rnode = NULL; | 1439 | root->rnode = NULL; |