diff options
author | Jan Kara <jack@suse.cz> | 2014-05-26 11:56:53 -0400 |
---|---|---|
committer | Theodore Ts'o <tytso@mit.edu> | 2014-05-26 11:56:53 -0400 |
commit | d745a8c20c1f864c10ca78d0f89219633861b7e9 (patch) | |
tree | c61f4f25b8bf4f25c30d8bc7933645b49387649f /fs/ext4 | |
parent | cd2c080c33fdab4ecf5ad43c88be0d3b646d272b (diff) |
ext4: reduce contention on s_orphan_lock
Shuffle code around in ext4_orphan_add() and ext4_orphan_del() so that
we avoid taking global s_orphan_lock in some cases and hold it for
shorter time in other cases.
Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
Diffstat (limited to 'fs/ext4')
-rw-r--r-- | fs/ext4/namei.c | 109 |
1 files changed, 65 insertions, 44 deletions
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index eb61584ca5a5..3520ab8a6639 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c | |||
@@ -2540,13 +2540,17 @@ static int empty_dir(struct inode *inode) | |||
2540 | return 1; | 2540 | return 1; |
2541 | } | 2541 | } |
2542 | 2542 | ||
2543 | /* ext4_orphan_add() links an unlinked or truncated inode into a list of | 2543 | /* |
2544 | * ext4_orphan_add() links an unlinked or truncated inode into a list of | ||
2544 | * such inodes, starting at the superblock, in case we crash before the | 2545 | * such inodes, starting at the superblock, in case we crash before the |
2545 | * file is closed/deleted, or in case the inode truncate spans multiple | 2546 | * file is closed/deleted, or in case the inode truncate spans multiple |
2546 | * transactions and the last transaction is not recovered after a crash. | 2547 | * transactions and the last transaction is not recovered after a crash. |
2547 | * | 2548 | * |
2548 | * At filesystem recovery time, we walk this list deleting unlinked | 2549 | * At filesystem recovery time, we walk this list deleting unlinked |
2549 | * inodes and truncating linked inodes in ext4_orphan_cleanup(). | 2550 | * inodes and truncating linked inodes in ext4_orphan_cleanup(). |
2551 | * | ||
2552 | * Orphan list manipulation functions must be called under i_mutex unless | ||
2553 | * we are just creating the inode or deleting it. | ||
2550 | */ | 2554 | */ |
2551 | int ext4_orphan_add(handle_t *handle, struct inode *inode) | 2555 | int ext4_orphan_add(handle_t *handle, struct inode *inode) |
2552 | { | 2556 | { |
@@ -2554,13 +2558,19 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) | |||
2554 | struct ext4_sb_info *sbi = EXT4_SB(sb); | 2558 | struct ext4_sb_info *sbi = EXT4_SB(sb); |
2555 | struct ext4_iloc iloc; | 2559 | struct ext4_iloc iloc; |
2556 | int err = 0, rc; | 2560 | int err = 0, rc; |
2561 | bool dirty = false; | ||
2557 | 2562 | ||
2558 | if (!sbi->s_journal) | 2563 | if (!sbi->s_journal) |
2559 | return 0; | 2564 | return 0; |
2560 | 2565 | ||
2561 | mutex_lock(&sbi->s_orphan_lock); | 2566 | WARN_ON_ONCE(!(inode->i_state & (I_NEW | I_FREEING)) && |
2567 | !mutex_is_locked(&inode->i_mutex)); | ||
2568 | /* | ||
2569 | * Exit early if inode already is on orphan list. This is a big speedup | ||
2570 | * since we don't have to contend on the global s_orphan_lock. | ||
2571 | */ | ||
2562 | if (!list_empty(&EXT4_I(inode)->i_orphan)) | 2572 | if (!list_empty(&EXT4_I(inode)->i_orphan)) |
2563 | goto out_unlock; | 2573 | return 0; |
2564 | 2574 | ||
2565 | /* | 2575 | /* |
2566 | * Orphan handling is only valid for files with data blocks | 2576 | * Orphan handling is only valid for files with data blocks |
@@ -2574,44 +2584,47 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode) | |||
2574 | BUFFER_TRACE(sbi->s_sbh, "get_write_access"); | 2584 | BUFFER_TRACE(sbi->s_sbh, "get_write_access"); |
2575 | err = ext4_journal_get_write_access(handle, sbi->s_sbh); | 2585 | err = ext4_journal_get_write_access(handle, sbi->s_sbh); |
2576 | if (err) | 2586 | if (err) |
2577 | goto out_unlock; | 2587 | goto out; |
2578 | 2588 | ||
2579 | err = ext4_reserve_inode_write(handle, inode, &iloc); | 2589 | err = ext4_reserve_inode_write(handle, inode, &iloc); |
2580 | if (err) | 2590 | if (err) |
2581 | goto out_unlock; | 2591 | goto out; |
2592 | |||
2593 | mutex_lock(&sbi->s_orphan_lock); | ||
2582 | /* | 2594 | /* |
2583 | * Due to previous errors inode may be already a part of on-disk | 2595 | * Due to previous errors inode may be already a part of on-disk |
2584 | * orphan list. If so skip on-disk list modification. | 2596 | * orphan list. If so skip on-disk list modification. |
2585 | */ | 2597 | */ |
2586 | if (NEXT_ORPHAN(inode) && NEXT_ORPHAN(inode) <= | 2598 | if (!NEXT_ORPHAN(inode) || NEXT_ORPHAN(inode) > |
2587 | (le32_to_cpu(sbi->s_es->s_inodes_count))) | 2599 | (le32_to_cpu(sbi->s_es->s_inodes_count))) { |
2588 | goto mem_insert; | 2600 | /* Insert this inode at the head of the on-disk orphan list */ |
2589 | 2601 | NEXT_ORPHAN(inode) = le32_to_cpu(sbi->s_es->s_last_orphan); | |
2590 | /* Insert this inode at the head of the on-disk orphan list... */ | 2602 | sbi->s_es->s_last_orphan = cpu_to_le32(inode->i_ino); |
2591 | NEXT_ORPHAN(inode) = le32_to_cpu(sbi->s_es->s_last_orphan); | 2603 | dirty = true; |
2592 | sbi->s_es->s_last_orphan = cpu_to_le32(inode->i_ino); | 2604 | } |
2593 | err = ext4_handle_dirty_super(handle, sb); | 2605 | list_add(&EXT4_I(inode)->i_orphan, &sbi->s_orphan); |
2594 | rc = ext4_mark_iloc_dirty(handle, inode, &iloc); | 2606 | mutex_unlock(&sbi->s_orphan_lock); |
2595 | if (!err) | ||
2596 | err = rc; | ||
2597 | |||
2598 | /* Only add to the head of the in-memory list if all the | ||
2599 | * previous operations succeeded. If the orphan_add is going to | ||
2600 | * fail (possibly taking the journal offline), we can't risk | ||
2601 | * leaving the inode on the orphan list: stray orphan-list | ||
2602 | * entries can cause panics at unmount time. | ||
2603 | * | ||
2604 | * This is safe: on error we're going to ignore the orphan list | ||
2605 | * anyway on the next recovery. */ | ||
2606 | mem_insert: | ||
2607 | if (!err) | ||
2608 | list_add(&EXT4_I(inode)->i_orphan, &sbi->s_orphan); | ||
2609 | 2607 | ||
2608 | if (dirty) { | ||
2609 | err = ext4_handle_dirty_super(handle, sb); | ||
2610 | rc = ext4_mark_iloc_dirty(handle, inode, &iloc); | ||
2611 | if (!err) | ||
2612 | err = rc; | ||
2613 | if (err) { | ||
2614 | /* | ||
2615 | * We have to remove inode from in-memory list if | ||
2616 | * addition to on disk orphan list failed. Stray orphan | ||
2617 | * list entries can cause panics at unmount time. | ||
2618 | */ | ||
2619 | mutex_lock(&sbi->s_orphan_lock); | ||
2620 | list_del(&EXT4_I(inode)->i_orphan); | ||
2621 | mutex_unlock(&sbi->s_orphan_lock); | ||
2622 | } | ||
2623 | } | ||
2610 | jbd_debug(4, "superblock will point to %lu\n", inode->i_ino); | 2624 | jbd_debug(4, "superblock will point to %lu\n", inode->i_ino); |
2611 | jbd_debug(4, "orphan inode %lu will point to %d\n", | 2625 | jbd_debug(4, "orphan inode %lu will point to %d\n", |
2612 | inode->i_ino, NEXT_ORPHAN(inode)); | 2626 | inode->i_ino, NEXT_ORPHAN(inode)); |
2613 | out_unlock: | 2627 | out: |
2614 | mutex_unlock(&sbi->s_orphan_lock); | ||
2615 | ext4_std_error(sb, err); | 2628 | ext4_std_error(sb, err); |
2616 | return err; | 2629 | return err; |
2617 | } | 2630 | } |
@@ -2632,35 +2645,43 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode) | |||
2632 | if (!sbi->s_journal && !(sbi->s_mount_state & EXT4_ORPHAN_FS)) | 2645 | if (!sbi->s_journal && !(sbi->s_mount_state & EXT4_ORPHAN_FS)) |
2633 | return 0; | 2646 | return 0; |
2634 | 2647 | ||
2635 | mutex_lock(&sbi->s_orphan_lock); | 2648 | WARN_ON_ONCE(!(inode->i_state & (I_NEW | I_FREEING)) && |
2649 | !mutex_is_locked(&inode->i_mutex)); | ||
2650 | /* Do this quick check before taking global s_orphan_lock. */ | ||
2636 | if (list_empty(&ei->i_orphan)) | 2651 | if (list_empty(&ei->i_orphan)) |
2637 | goto out; | 2652 | return 0; |
2638 | 2653 | ||
2639 | ino_next = NEXT_ORPHAN(inode); | 2654 | if (handle) { |
2640 | prev = ei->i_orphan.prev; | 2655 | /* Grab inode buffer early before taking global s_orphan_lock */ |
2656 | err = ext4_reserve_inode_write(handle, inode, &iloc); | ||
2657 | } | ||
2641 | 2658 | ||
2659 | mutex_lock(&sbi->s_orphan_lock); | ||
2642 | jbd_debug(4, "remove inode %lu from orphan list\n", inode->i_ino); | 2660 | jbd_debug(4, "remove inode %lu from orphan list\n", inode->i_ino); |
2643 | 2661 | ||
2662 | prev = ei->i_orphan.prev; | ||
2644 | list_del_init(&ei->i_orphan); | 2663 | list_del_init(&ei->i_orphan); |
2645 | 2664 | ||
2646 | /* If we're on an error path, we may not have a valid | 2665 | /* If we're on an error path, we may not have a valid |
2647 | * transaction handle with which to update the orphan list on | 2666 | * transaction handle with which to update the orphan list on |
2648 | * disk, but we still need to remove the inode from the linked | 2667 | * disk, but we still need to remove the inode from the linked |
2649 | * list in memory. */ | 2668 | * list in memory. */ |
2650 | if (!handle) | 2669 | if (!handle || err) { |
2651 | goto out; | 2670 | mutex_unlock(&sbi->s_orphan_lock); |
2652 | |||
2653 | err = ext4_reserve_inode_write(handle, inode, &iloc); | ||
2654 | if (err) | ||
2655 | goto out_err; | 2671 | goto out_err; |
2672 | } | ||
2656 | 2673 | ||
2674 | ino_next = NEXT_ORPHAN(inode); | ||
2657 | if (prev == &sbi->s_orphan) { | 2675 | if (prev == &sbi->s_orphan) { |
2658 | jbd_debug(4, "superblock will point to %u\n", ino_next); | 2676 | jbd_debug(4, "superblock will point to %u\n", ino_next); |
2659 | BUFFER_TRACE(sbi->s_sbh, "get_write_access"); | 2677 | BUFFER_TRACE(sbi->s_sbh, "get_write_access"); |
2660 | err = ext4_journal_get_write_access(handle, sbi->s_sbh); | 2678 | err = ext4_journal_get_write_access(handle, sbi->s_sbh); |
2661 | if (err) | 2679 | if (err) { |
2680 | mutex_unlock(&sbi->s_orphan_lock); | ||
2662 | goto out_brelse; | 2681 | goto out_brelse; |
2682 | } | ||
2663 | sbi->s_es->s_last_orphan = cpu_to_le32(ino_next); | 2683 | sbi->s_es->s_last_orphan = cpu_to_le32(ino_next); |
2684 | mutex_unlock(&sbi->s_orphan_lock); | ||
2664 | err = ext4_handle_dirty_super(handle, inode->i_sb); | 2685 | err = ext4_handle_dirty_super(handle, inode->i_sb); |
2665 | } else { | 2686 | } else { |
2666 | struct ext4_iloc iloc2; | 2687 | struct ext4_iloc iloc2; |
@@ -2670,20 +2691,20 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode) | |||
2670 | jbd_debug(4, "orphan inode %lu will point to %u\n", | 2691 | jbd_debug(4, "orphan inode %lu will point to %u\n", |
2671 | i_prev->i_ino, ino_next); | 2692 | i_prev->i_ino, ino_next); |
2672 | err = ext4_reserve_inode_write(handle, i_prev, &iloc2); | 2693 | err = ext4_reserve_inode_write(handle, i_prev, &iloc2); |
2673 | if (err) | 2694 | if (err) { |
2695 | mutex_unlock(&sbi->s_orphan_lock); | ||
2674 | goto out_brelse; | 2696 | goto out_brelse; |
2697 | } | ||
2675 | NEXT_ORPHAN(i_prev) = ino_next; | 2698 | NEXT_ORPHAN(i_prev) = ino_next; |
2676 | err = ext4_mark_iloc_dirty(handle, i_prev, &iloc2); | 2699 | err = ext4_mark_iloc_dirty(handle, i_prev, &iloc2); |
2700 | mutex_unlock(&sbi->s_orphan_lock); | ||
2677 | } | 2701 | } |
2678 | if (err) | 2702 | if (err) |
2679 | goto out_brelse; | 2703 | goto out_brelse; |
2680 | NEXT_ORPHAN(inode) = 0; | 2704 | NEXT_ORPHAN(inode) = 0; |
2681 | err = ext4_mark_iloc_dirty(handle, inode, &iloc); | 2705 | err = ext4_mark_iloc_dirty(handle, inode, &iloc); |
2682 | |||
2683 | out_err: | 2706 | out_err: |
2684 | ext4_std_error(inode->i_sb, err); | 2707 | ext4_std_error(inode->i_sb, err); |
2685 | out: | ||
2686 | mutex_unlock(&sbi->s_orphan_lock); | ||
2687 | return err; | 2708 | return err; |
2688 | 2709 | ||
2689 | out_brelse: | 2710 | out_brelse: |