aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorFrederic Weisbecker <fweisbec@gmail.com>2009-05-13 20:56:39 -0400
committerFrederic Weisbecker <fweisbec@gmail.com>2009-09-14 01:18:22 -0400
commit2ac626955ed62ee8596f00581f959cc86e6198d1 (patch)
treea5bd28e558b889ad7c026199f6032b2ff33f9103 /fs
parentc63e3c0b2498adec921b06c670d12c8c74b85538 (diff)
kill-the-bkl/reiserfs: unlock only when needed in search_by_key
search_by_key() is the site which most requires the lock. This is mostly because it is a very central function and also because it releases/reaqcuires the write lock at least once each time it is called. Such release/reacquire creates a lot of contention in this place and also opens more the window which let another thread changing the tree. When it happens, the current path searching over the tree must be retried from the beggining (the root) which is a wasteful and time consuming recovery. This patch factorizes two release/reacquire sequences: - reading leaf nodes blocks - reading current block The latter immediately follows the former. The whole sequence is safe as a single unlocked section because we check just after if the tree has changed during these operations. Cc: Jeff Mahoney <jeffm@suse.com> Cc: Chris Mason <chris.mason@oracle.com> Cc: Ingo Molnar <mingo@elte.hu> Cc: Alexander Beregalov <a.beregalov@gmail.com> Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/reiserfs/stree.c42
1 files changed, 34 insertions, 8 deletions
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index 960c9114f6d3..6b025a42d510 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -519,12 +519,22 @@ static int is_tree_node(struct buffer_head *bh, int level)
519 519
520#define SEARCH_BY_KEY_READA 16 520#define SEARCH_BY_KEY_READA 16
521 521
522/* The function is NOT SCHEDULE-SAFE! */ 522/*
523static void search_by_key_reada(struct super_block *s, 523 * The function is NOT SCHEDULE-SAFE!
524 * It might unlock the write lock if we needed to wait for a block
525 * to be read. Note that in this case it won't recover the lock to avoid
526 * high contention resulting from too much lock requests, especially
527 * the caller (search_by_key) will perform other schedule-unsafe
528 * operations just after calling this function.
529 *
530 * @return true if we have unlocked
531 */
532static bool search_by_key_reada(struct super_block *s,
524 struct buffer_head **bh, 533 struct buffer_head **bh,
525 b_blocknr_t *b, int num) 534 b_blocknr_t *b, int num)
526{ 535{
527 int i, j; 536 int i, j;
537 bool unlocked = false;
528 538
529 for (i = 0; i < num; i++) { 539 for (i = 0; i < num; i++) {
530 bh[i] = sb_getblk(s, b[i]); 540 bh[i] = sb_getblk(s, b[i]);
@@ -536,16 +546,21 @@ static void search_by_key_reada(struct super_block *s,
536 * the lock. But it's still fine because we check later 546 * the lock. But it's still fine because we check later
537 * if the tree changed 547 * if the tree changed
538 */ 548 */
539 reiserfs_write_unlock(s);
540 for (j = 0; j < i; j++) { 549 for (j = 0; j < i; j++) {
541 /* 550 /*
542 * note, this needs attention if we are getting rid of the BKL 551 * note, this needs attention if we are getting rid of the BKL
543 * you have to make sure the prepared bit isn't set on this buffer 552 * you have to make sure the prepared bit isn't set on this buffer
544 */ 553 */
545 if (!buffer_uptodate(bh[j])) 554 if (!buffer_uptodate(bh[j])) {
555 if (!unlocked) {
556 reiserfs_write_unlock(s);
557 unlocked = true;
558 }
546 ll_rw_block(READA, 1, bh + j); 559 ll_rw_block(READA, 1, bh + j);
560 }
547 brelse(bh[j]); 561 brelse(bh[j]);
548 } 562 }
563 return unlocked;
549} 564}
550 565
551/************************************************************************** 566/**************************************************************************
@@ -633,15 +648,26 @@ int search_by_key(struct super_block *sb, const struct cpu_key *key, /* Key to s
633 have a pointer to it. */ 648 have a pointer to it. */
634 if ((bh = last_element->pe_buffer = 649 if ((bh = last_element->pe_buffer =
635 sb_getblk(sb, block_number))) { 650 sb_getblk(sb, block_number))) {
651 bool unlocked = false;
652
636 if (!buffer_uptodate(bh) && reada_count > 1) 653 if (!buffer_uptodate(bh) && reada_count > 1)
637 /* will unlock the write lock */ 654 /* may unlock the write lock */
638 search_by_key_reada(sb, reada_bh, 655 unlocked = search_by_key_reada(sb, reada_bh,
639 reada_blocks, reada_count); 656 reada_blocks, reada_count);
640 else 657 /*
658 * If we haven't already unlocked the write lock,
659 * then we need to do that here before reading
660 * the current block
661 */
662 if (!buffer_uptodate(bh) && !unlocked) {
641 reiserfs_write_unlock(sb); 663 reiserfs_write_unlock(sb);
664 unlocked = true;
665 }
642 ll_rw_block(READ, 1, &bh); 666 ll_rw_block(READ, 1, &bh);
643 wait_on_buffer(bh); 667 wait_on_buffer(bh);
644 reiserfs_write_lock(sb); 668
669 if (unlocked)
670 reiserfs_write_lock(sb);
645 if (!buffer_uptodate(bh)) 671 if (!buffer_uptodate(bh))
646 goto io_error; 672 goto io_error;
647 } else { 673 } else {