aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4
diff options
context:
space:
mode:
authorJan Kara <jack@suse.cz>2014-05-26 11:56:53 -0400
committerTheodore Ts'o <tytso@mit.edu>2014-05-26 11:56:53 -0400
commitd745a8c20c1f864c10ca78d0f89219633861b7e9 (patch)
treec61f4f25b8bf4f25c30d8bc7933645b49387649f /fs/ext4
parentcd2c080c33fdab4ecf5ad43c88be0d3b646d272b (diff)
ext4: reduce contention on s_orphan_lock
Shuffle code around in ext4_orphan_add() and ext4_orphan_del() so that we avoid taking global s_orphan_lock in some cases and hold it for shorter time in other cases. Signed-off-by: Jan Kara <jack@suse.cz> Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs/ext4')
-rw-r--r--fs/ext4/namei.c109
1 files changed, 65 insertions, 44 deletions
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index eb61584ca5a5..3520ab8a6639 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2540,13 +2540,17 @@ static int empty_dir(struct inode *inode)
2540 return 1; 2540 return 1;
2541} 2541}
2542 2542
2543/* ext4_orphan_add() links an unlinked or truncated inode into a list of 2543/*
2544 * ext4_orphan_add() links an unlinked or truncated inode into a list of
2544 * such inodes, starting at the superblock, in case we crash before the 2545 * such inodes, starting at the superblock, in case we crash before the
2545 * file is closed/deleted, or in case the inode truncate spans multiple 2546 * file is closed/deleted, or in case the inode truncate spans multiple
2546 * transactions and the last transaction is not recovered after a crash. 2547 * transactions and the last transaction is not recovered after a crash.
2547 * 2548 *
2548 * At filesystem recovery time, we walk this list deleting unlinked 2549 * At filesystem recovery time, we walk this list deleting unlinked
2549 * inodes and truncating linked inodes in ext4_orphan_cleanup(). 2550 * inodes and truncating linked inodes in ext4_orphan_cleanup().
2551 *
2552 * Orphan list manipulation functions must be called under i_mutex unless
2553 * we are just creating the inode or deleting it.
2550 */ 2554 */
2551int ext4_orphan_add(handle_t *handle, struct inode *inode) 2555int ext4_orphan_add(handle_t *handle, struct inode *inode)
2552{ 2556{
@@ -2554,13 +2558,19 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
2554 struct ext4_sb_info *sbi = EXT4_SB(sb); 2558 struct ext4_sb_info *sbi = EXT4_SB(sb);
2555 struct ext4_iloc iloc; 2559 struct ext4_iloc iloc;
2556 int err = 0, rc; 2560 int err = 0, rc;
2561 bool dirty = false;
2557 2562
2558 if (!sbi->s_journal) 2563 if (!sbi->s_journal)
2559 return 0; 2564 return 0;
2560 2565
2561 mutex_lock(&sbi->s_orphan_lock); 2566 WARN_ON_ONCE(!(inode->i_state & (I_NEW | I_FREEING)) &&
2567 !mutex_is_locked(&inode->i_mutex));
2568 /*
2569 * Exit early if inode already is on orphan list. This is a big speedup
2570 * since we don't have to contend on the global s_orphan_lock.
2571 */
2562 if (!list_empty(&EXT4_I(inode)->i_orphan)) 2572 if (!list_empty(&EXT4_I(inode)->i_orphan))
2563 goto out_unlock; 2573 return 0;
2564 2574
2565 /* 2575 /*
2566 * Orphan handling is only valid for files with data blocks 2576 * Orphan handling is only valid for files with data blocks
@@ -2574,44 +2584,47 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
2574 BUFFER_TRACE(sbi->s_sbh, "get_write_access"); 2584 BUFFER_TRACE(sbi->s_sbh, "get_write_access");
2575 err = ext4_journal_get_write_access(handle, sbi->s_sbh); 2585 err = ext4_journal_get_write_access(handle, sbi->s_sbh);
2576 if (err) 2586 if (err)
2577 goto out_unlock; 2587 goto out;
2578 2588
2579 err = ext4_reserve_inode_write(handle, inode, &iloc); 2589 err = ext4_reserve_inode_write(handle, inode, &iloc);
2580 if (err) 2590 if (err)
2581 goto out_unlock; 2591 goto out;
2592
2593 mutex_lock(&sbi->s_orphan_lock);
2582 /* 2594 /*
2583 * Due to previous errors inode may be already a part of on-disk 2595 * Due to previous errors inode may be already a part of on-disk
2584 * orphan list. If so skip on-disk list modification. 2596 * orphan list. If so skip on-disk list modification.
2585 */ 2597 */
2586 if (NEXT_ORPHAN(inode) && NEXT_ORPHAN(inode) <= 2598 if (!NEXT_ORPHAN(inode) || NEXT_ORPHAN(inode) >
2587 (le32_to_cpu(sbi->s_es->s_inodes_count))) 2599 (le32_to_cpu(sbi->s_es->s_inodes_count))) {
2588 goto mem_insert; 2600 /* Insert this inode at the head of the on-disk orphan list */
2589 2601 NEXT_ORPHAN(inode) = le32_to_cpu(sbi->s_es->s_last_orphan);
2590 /* Insert this inode at the head of the on-disk orphan list... */ 2602 sbi->s_es->s_last_orphan = cpu_to_le32(inode->i_ino);
2591 NEXT_ORPHAN(inode) = le32_to_cpu(sbi->s_es->s_last_orphan); 2603 dirty = true;
2592 sbi->s_es->s_last_orphan = cpu_to_le32(inode->i_ino); 2604 }
2593 err = ext4_handle_dirty_super(handle, sb); 2605 list_add(&EXT4_I(inode)->i_orphan, &sbi->s_orphan);
2594 rc = ext4_mark_iloc_dirty(handle, inode, &iloc); 2606 mutex_unlock(&sbi->s_orphan_lock);
2595 if (!err)
2596 err = rc;
2597
2598 /* Only add to the head of the in-memory list if all the
2599 * previous operations succeeded. If the orphan_add is going to
2600 * fail (possibly taking the journal offline), we can't risk
2601 * leaving the inode on the orphan list: stray orphan-list
2602 * entries can cause panics at unmount time.
2603 *
2604 * This is safe: on error we're going to ignore the orphan list
2605 * anyway on the next recovery. */
2606mem_insert:
2607 if (!err)
2608 list_add(&EXT4_I(inode)->i_orphan, &sbi->s_orphan);
2609 2607
2608 if (dirty) {
2609 err = ext4_handle_dirty_super(handle, sb);
2610 rc = ext4_mark_iloc_dirty(handle, inode, &iloc);
2611 if (!err)
2612 err = rc;
2613 if (err) {
2614 /*
2615 * We have to remove inode from in-memory list if
2616 * addition to on disk orphan list failed. Stray orphan
2617 * list entries can cause panics at unmount time.
2618 */
2619 mutex_lock(&sbi->s_orphan_lock);
2620 list_del(&EXT4_I(inode)->i_orphan);
2621 mutex_unlock(&sbi->s_orphan_lock);
2622 }
2623 }
2610 jbd_debug(4, "superblock will point to %lu\n", inode->i_ino); 2624 jbd_debug(4, "superblock will point to %lu\n", inode->i_ino);
2611 jbd_debug(4, "orphan inode %lu will point to %d\n", 2625 jbd_debug(4, "orphan inode %lu will point to %d\n",
2612 inode->i_ino, NEXT_ORPHAN(inode)); 2626 inode->i_ino, NEXT_ORPHAN(inode));
2613out_unlock: 2627out:
2614 mutex_unlock(&sbi->s_orphan_lock);
2615 ext4_std_error(sb, err); 2628 ext4_std_error(sb, err);
2616 return err; 2629 return err;
2617} 2630}
@@ -2632,35 +2645,43 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
2632 if (!sbi->s_journal && !(sbi->s_mount_state & EXT4_ORPHAN_FS)) 2645 if (!sbi->s_journal && !(sbi->s_mount_state & EXT4_ORPHAN_FS))
2633 return 0; 2646 return 0;
2634 2647
2635 mutex_lock(&sbi->s_orphan_lock); 2648 WARN_ON_ONCE(!(inode->i_state & (I_NEW | I_FREEING)) &&
2649 !mutex_is_locked(&inode->i_mutex));
2650 /* Do this quick check before taking global s_orphan_lock. */
2636 if (list_empty(&ei->i_orphan)) 2651 if (list_empty(&ei->i_orphan))
2637 goto out; 2652 return 0;
2638 2653
2639 ino_next = NEXT_ORPHAN(inode); 2654 if (handle) {
2640 prev = ei->i_orphan.prev; 2655 /* Grab inode buffer early before taking global s_orphan_lock */
2656 err = ext4_reserve_inode_write(handle, inode, &iloc);
2657 }
2641 2658
2659 mutex_lock(&sbi->s_orphan_lock);
2642 jbd_debug(4, "remove inode %lu from orphan list\n", inode->i_ino); 2660 jbd_debug(4, "remove inode %lu from orphan list\n", inode->i_ino);
2643 2661
2662 prev = ei->i_orphan.prev;
2644 list_del_init(&ei->i_orphan); 2663 list_del_init(&ei->i_orphan);
2645 2664
2646 /* If we're on an error path, we may not have a valid 2665 /* If we're on an error path, we may not have a valid
2647 * transaction handle with which to update the orphan list on 2666 * transaction handle with which to update the orphan list on
2648 * disk, but we still need to remove the inode from the linked 2667 * disk, but we still need to remove the inode from the linked
2649 * list in memory. */ 2668 * list in memory. */
2650 if (!handle) 2669 if (!handle || err) {
2651 goto out; 2670 mutex_unlock(&sbi->s_orphan_lock);
2652
2653 err = ext4_reserve_inode_write(handle, inode, &iloc);
2654 if (err)
2655 goto out_err; 2671 goto out_err;
2672 }
2656 2673
2674 ino_next = NEXT_ORPHAN(inode);
2657 if (prev == &sbi->s_orphan) { 2675 if (prev == &sbi->s_orphan) {
2658 jbd_debug(4, "superblock will point to %u\n", ino_next); 2676 jbd_debug(4, "superblock will point to %u\n", ino_next);
2659 BUFFER_TRACE(sbi->s_sbh, "get_write_access"); 2677 BUFFER_TRACE(sbi->s_sbh, "get_write_access");
2660 err = ext4_journal_get_write_access(handle, sbi->s_sbh); 2678 err = ext4_journal_get_write_access(handle, sbi->s_sbh);
2661 if (err) 2679 if (err) {
2680 mutex_unlock(&sbi->s_orphan_lock);
2662 goto out_brelse; 2681 goto out_brelse;
2682 }
2663 sbi->s_es->s_last_orphan = cpu_to_le32(ino_next); 2683 sbi->s_es->s_last_orphan = cpu_to_le32(ino_next);
2684 mutex_unlock(&sbi->s_orphan_lock);
2664 err = ext4_handle_dirty_super(handle, inode->i_sb); 2685 err = ext4_handle_dirty_super(handle, inode->i_sb);
2665 } else { 2686 } else {
2666 struct ext4_iloc iloc2; 2687 struct ext4_iloc iloc2;
@@ -2670,20 +2691,20 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
2670 jbd_debug(4, "orphan inode %lu will point to %u\n", 2691 jbd_debug(4, "orphan inode %lu will point to %u\n",
2671 i_prev->i_ino, ino_next); 2692 i_prev->i_ino, ino_next);
2672 err = ext4_reserve_inode_write(handle, i_prev, &iloc2); 2693 err = ext4_reserve_inode_write(handle, i_prev, &iloc2);
2673 if (err) 2694 if (err) {
2695 mutex_unlock(&sbi->s_orphan_lock);
2674 goto out_brelse; 2696 goto out_brelse;
2697 }
2675 NEXT_ORPHAN(i_prev) = ino_next; 2698 NEXT_ORPHAN(i_prev) = ino_next;
2676 err = ext4_mark_iloc_dirty(handle, i_prev, &iloc2); 2699 err = ext4_mark_iloc_dirty(handle, i_prev, &iloc2);
2700 mutex_unlock(&sbi->s_orphan_lock);
2677 } 2701 }
2678 if (err) 2702 if (err)
2679 goto out_brelse; 2703 goto out_brelse;
2680 NEXT_ORPHAN(inode) = 0; 2704 NEXT_ORPHAN(inode) = 0;
2681 err = ext4_mark_iloc_dirty(handle, inode, &iloc); 2705 err = ext4_mark_iloc_dirty(handle, inode, &iloc);
2682
2683out_err: 2706out_err:
2684 ext4_std_error(inode->i_sb, err); 2707 ext4_std_error(inode->i_sb, err);
2685out:
2686 mutex_unlock(&sbi->s_orphan_lock);
2687 return err; 2708 return err;
2688 2709
2689out_brelse: 2710out_brelse: