diff options
Diffstat (limited to 'fs/ext4/inode.c')
-rw-r--r-- | fs/ext4/inode.c | 153 |
1 files changed, 78 insertions, 75 deletions
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index bb717cbb749c..7dd9b50d5ebc 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -429,16 +429,13 @@ static ext4_fsblk_t ext4_find_near(struct inode *inode, Indirect *ind) | |||
429 | * ext4_find_goal - find a prefered place for allocation. | 429 | * ext4_find_goal - find a prefered place for allocation. |
430 | * @inode: owner | 430 | * @inode: owner |
431 | * @block: block we want | 431 | * @block: block we want |
432 | * @chain: chain of indirect blocks | ||
433 | * @partial: pointer to the last triple within a chain | 432 | * @partial: pointer to the last triple within a chain |
434 | * @goal: place to store the result. | ||
435 | * | 433 | * |
436 | * Normally this function find the prefered place for block allocation, | 434 | * Normally this function find the prefered place for block allocation, |
437 | * stores it in *@goal and returns zero. | 435 | * returns it. |
438 | */ | 436 | */ |
439 | |||
440 | static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block, | 437 | static ext4_fsblk_t ext4_find_goal(struct inode *inode, ext4_lblk_t block, |
441 | Indirect chain[4], Indirect *partial) | 438 | Indirect *partial) |
442 | { | 439 | { |
443 | struct ext4_block_alloc_info *block_i; | 440 | struct ext4_block_alloc_info *block_i; |
444 | 441 | ||
@@ -839,7 +836,7 @@ int ext4_get_blocks_handle(handle_t *handle, struct inode *inode, | |||
839 | if (S_ISREG(inode->i_mode) && (!ei->i_block_alloc_info)) | 836 | if (S_ISREG(inode->i_mode) && (!ei->i_block_alloc_info)) |
840 | ext4_init_block_alloc_info(inode); | 837 | ext4_init_block_alloc_info(inode); |
841 | 838 | ||
842 | goal = ext4_find_goal(inode, iblock, chain, partial); | 839 | goal = ext4_find_goal(inode, iblock, partial); |
843 | 840 | ||
844 | /* the number of blocks need to allocate for [d,t]indirect blocks */ | 841 | /* the number of blocks need to allocate for [d,t]indirect blocks */ |
845 | indirect_blks = (chain + depth) - partial - 1; | 842 | indirect_blks = (chain + depth) - partial - 1; |
@@ -895,7 +892,16 @@ out: | |||
895 | return err; | 892 | return err; |
896 | } | 893 | } |
897 | 894 | ||
898 | #define DIO_CREDITS (EXT4_RESERVE_TRANS_BLOCKS + 32) | 895 | /* Maximum number of blocks we map for direct IO at once. */ |
896 | #define DIO_MAX_BLOCKS 4096 | ||
897 | /* | ||
898 | * Number of credits we need for writing DIO_MAX_BLOCKS: | ||
899 | * We need sb + group descriptor + bitmap + inode -> 4 | ||
900 | * For B blocks with A block pointers per block we need: | ||
901 | * 1 (triple ind.) + (B/A/A + 2) (doubly ind.) + (B/A + 2) (indirect). | ||
902 | * If we plug in 4096 for B and 256 for A (for 1KB block size), we get 25. | ||
903 | */ | ||
904 | #define DIO_CREDITS 25 | ||
899 | 905 | ||
900 | int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, | 906 | int ext4_get_blocks_wrap(handle_t *handle, struct inode *inode, sector_t block, |
901 | unsigned long max_blocks, struct buffer_head *bh, | 907 | unsigned long max_blocks, struct buffer_head *bh, |
@@ -942,49 +948,31 @@ static int ext4_get_block(struct inode *inode, sector_t iblock, | |||
942 | struct buffer_head *bh_result, int create) | 948 | struct buffer_head *bh_result, int create) |
943 | { | 949 | { |
944 | handle_t *handle = ext4_journal_current_handle(); | 950 | handle_t *handle = ext4_journal_current_handle(); |
945 | int ret = 0; | 951 | int ret = 0, started = 0; |
946 | unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; | 952 | unsigned max_blocks = bh_result->b_size >> inode->i_blkbits; |
947 | 953 | ||
948 | if (!create) | 954 | if (create && !handle) { |
949 | goto get_block; /* A read */ | 955 | /* Direct IO write... */ |
950 | 956 | if (max_blocks > DIO_MAX_BLOCKS) | |
951 | if (max_blocks == 1) | 957 | max_blocks = DIO_MAX_BLOCKS; |
952 | goto get_block; /* A single block get */ | 958 | handle = ext4_journal_start(inode, DIO_CREDITS + |
953 | 959 | 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb)); | |
954 | if (handle->h_transaction->t_state == T_LOCKED) { | 960 | if (IS_ERR(handle)) { |
955 | /* | ||
956 | * Huge direct-io writes can hold off commits for long | ||
957 | * periods of time. Let this commit run. | ||
958 | */ | ||
959 | ext4_journal_stop(handle); | ||
960 | handle = ext4_journal_start(inode, DIO_CREDITS); | ||
961 | if (IS_ERR(handle)) | ||
962 | ret = PTR_ERR(handle); | 961 | ret = PTR_ERR(handle); |
963 | goto get_block; | 962 | goto out; |
964 | } | ||
965 | |||
966 | if (handle->h_buffer_credits <= EXT4_RESERVE_TRANS_BLOCKS) { | ||
967 | /* | ||
968 | * Getting low on buffer credits... | ||
969 | */ | ||
970 | ret = ext4_journal_extend(handle, DIO_CREDITS); | ||
971 | if (ret > 0) { | ||
972 | /* | ||
973 | * Couldn't extend the transaction. Start a new one. | ||
974 | */ | ||
975 | ret = ext4_journal_restart(handle, DIO_CREDITS); | ||
976 | } | 963 | } |
964 | started = 1; | ||
977 | } | 965 | } |
978 | 966 | ||
979 | get_block: | 967 | ret = ext4_get_blocks_wrap(handle, inode, iblock, |
980 | if (ret == 0) { | ||
981 | ret = ext4_get_blocks_wrap(handle, inode, iblock, | ||
982 | max_blocks, bh_result, create, 0); | 968 | max_blocks, bh_result, create, 0); |
983 | if (ret > 0) { | 969 | if (ret > 0) { |
984 | bh_result->b_size = (ret << inode->i_blkbits); | 970 | bh_result->b_size = (ret << inode->i_blkbits); |
985 | ret = 0; | 971 | ret = 0; |
986 | } | ||
987 | } | 972 | } |
973 | if (started) | ||
974 | ext4_journal_stop(handle); | ||
975 | out: | ||
988 | return ret; | 976 | return ret; |
989 | } | 977 | } |
990 | 978 | ||
@@ -1674,7 +1662,8 @@ static int ext4_releasepage(struct page *page, gfp_t wait) | |||
1674 | * if the machine crashes during the write. | 1662 | * if the machine crashes during the write. |
1675 | * | 1663 | * |
1676 | * If the O_DIRECT write is intantiating holes inside i_size and the machine | 1664 | * If the O_DIRECT write is intantiating holes inside i_size and the machine |
1677 | * crashes then stale disk data _may_ be exposed inside the file. | 1665 | * crashes then stale disk data _may_ be exposed inside the file. But current |
1666 | * VFS code falls back into buffered path in that case so we are safe. | ||
1678 | */ | 1667 | */ |
1679 | static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb, | 1668 | static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb, |
1680 | const struct iovec *iov, loff_t offset, | 1669 | const struct iovec *iov, loff_t offset, |
@@ -1683,7 +1672,7 @@ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb, | |||
1683 | struct file *file = iocb->ki_filp; | 1672 | struct file *file = iocb->ki_filp; |
1684 | struct inode *inode = file->f_mapping->host; | 1673 | struct inode *inode = file->f_mapping->host; |
1685 | struct ext4_inode_info *ei = EXT4_I(inode); | 1674 | struct ext4_inode_info *ei = EXT4_I(inode); |
1686 | handle_t *handle = NULL; | 1675 | handle_t *handle; |
1687 | ssize_t ret; | 1676 | ssize_t ret; |
1688 | int orphan = 0; | 1677 | int orphan = 0; |
1689 | size_t count = iov_length(iov, nr_segs); | 1678 | size_t count = iov_length(iov, nr_segs); |
@@ -1691,17 +1680,21 @@ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb, | |||
1691 | if (rw == WRITE) { | 1680 | if (rw == WRITE) { |
1692 | loff_t final_size = offset + count; | 1681 | loff_t final_size = offset + count; |
1693 | 1682 | ||
1694 | handle = ext4_journal_start(inode, DIO_CREDITS); | ||
1695 | if (IS_ERR(handle)) { | ||
1696 | ret = PTR_ERR(handle); | ||
1697 | goto out; | ||
1698 | } | ||
1699 | if (final_size > inode->i_size) { | 1683 | if (final_size > inode->i_size) { |
1684 | /* Credits for sb + inode write */ | ||
1685 | handle = ext4_journal_start(inode, 2); | ||
1686 | if (IS_ERR(handle)) { | ||
1687 | ret = PTR_ERR(handle); | ||
1688 | goto out; | ||
1689 | } | ||
1700 | ret = ext4_orphan_add(handle, inode); | 1690 | ret = ext4_orphan_add(handle, inode); |
1701 | if (ret) | 1691 | if (ret) { |
1702 | goto out_stop; | 1692 | ext4_journal_stop(handle); |
1693 | goto out; | ||
1694 | } | ||
1703 | orphan = 1; | 1695 | orphan = 1; |
1704 | ei->i_disksize = inode->i_size; | 1696 | ei->i_disksize = inode->i_size; |
1697 | ext4_journal_stop(handle); | ||
1705 | } | 1698 | } |
1706 | } | 1699 | } |
1707 | 1700 | ||
@@ -1709,18 +1702,21 @@ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb, | |||
1709 | offset, nr_segs, | 1702 | offset, nr_segs, |
1710 | ext4_get_block, NULL); | 1703 | ext4_get_block, NULL); |
1711 | 1704 | ||
1712 | /* | 1705 | if (orphan) { |
1713 | * Reacquire the handle: ext4_get_block() can restart the transaction | ||
1714 | */ | ||
1715 | handle = ext4_journal_current_handle(); | ||
1716 | |||
1717 | out_stop: | ||
1718 | if (handle) { | ||
1719 | int err; | 1706 | int err; |
1720 | 1707 | ||
1721 | if (orphan && inode->i_nlink) | 1708 | /* Credits for sb + inode write */ |
1709 | handle = ext4_journal_start(inode, 2); | ||
1710 | if (IS_ERR(handle)) { | ||
1711 | /* This is really bad luck. We've written the data | ||
1712 | * but cannot extend i_size. Bail out and pretend | ||
1713 | * the write failed... */ | ||
1714 | ret = PTR_ERR(handle); | ||
1715 | goto out; | ||
1716 | } | ||
1717 | if (inode->i_nlink) | ||
1722 | ext4_orphan_del(handle, inode); | 1718 | ext4_orphan_del(handle, inode); |
1723 | if (orphan && ret > 0) { | 1719 | if (ret > 0) { |
1724 | loff_t end = offset + ret; | 1720 | loff_t end = offset + ret; |
1725 | if (end > inode->i_size) { | 1721 | if (end > inode->i_size) { |
1726 | ei->i_disksize = end; | 1722 | ei->i_disksize = end; |
@@ -1840,7 +1836,7 @@ int ext4_block_truncate_page(handle_t *handle, struct page *page, | |||
1840 | */ | 1836 | */ |
1841 | if (!page_has_buffers(page) && test_opt(inode->i_sb, NOBH) && | 1837 | if (!page_has_buffers(page) && test_opt(inode->i_sb, NOBH) && |
1842 | ext4_should_writeback_data(inode) && PageUptodate(page)) { | 1838 | ext4_should_writeback_data(inode) && PageUptodate(page)) { |
1843 | zero_user_page(page, offset, length, KM_USER0); | 1839 | zero_user(page, offset, length); |
1844 | set_page_dirty(page); | 1840 | set_page_dirty(page); |
1845 | goto unlock; | 1841 | goto unlock; |
1846 | } | 1842 | } |
@@ -1893,7 +1889,7 @@ int ext4_block_truncate_page(handle_t *handle, struct page *page, | |||
1893 | goto unlock; | 1889 | goto unlock; |
1894 | } | 1890 | } |
1895 | 1891 | ||
1896 | zero_user_page(page, offset, length, KM_USER0); | 1892 | zero_user(page, offset, length); |
1897 | 1893 | ||
1898 | BUFFER_TRACE(bh, "zeroed end of block"); | 1894 | BUFFER_TRACE(bh, "zeroed end of block"); |
1899 | 1895 | ||
@@ -2683,21 +2679,31 @@ static blkcnt_t ext4_inode_blocks(struct ext4_inode *raw_inode, | |||
2683 | } | 2679 | } |
2684 | } | 2680 | } |
2685 | 2681 | ||
2686 | void ext4_read_inode(struct inode * inode) | 2682 | struct inode *ext4_iget(struct super_block *sb, unsigned long ino) |
2687 | { | 2683 | { |
2688 | struct ext4_iloc iloc; | 2684 | struct ext4_iloc iloc; |
2689 | struct ext4_inode *raw_inode; | 2685 | struct ext4_inode *raw_inode; |
2690 | struct ext4_inode_info *ei = EXT4_I(inode); | 2686 | struct ext4_inode_info *ei; |
2691 | struct buffer_head *bh; | 2687 | struct buffer_head *bh; |
2688 | struct inode *inode; | ||
2689 | long ret; | ||
2692 | int block; | 2690 | int block; |
2693 | 2691 | ||
2692 | inode = iget_locked(sb, ino); | ||
2693 | if (!inode) | ||
2694 | return ERR_PTR(-ENOMEM); | ||
2695 | if (!(inode->i_state & I_NEW)) | ||
2696 | return inode; | ||
2697 | |||
2698 | ei = EXT4_I(inode); | ||
2694 | #ifdef CONFIG_EXT4DEV_FS_POSIX_ACL | 2699 | #ifdef CONFIG_EXT4DEV_FS_POSIX_ACL |
2695 | ei->i_acl = EXT4_ACL_NOT_CACHED; | 2700 | ei->i_acl = EXT4_ACL_NOT_CACHED; |
2696 | ei->i_default_acl = EXT4_ACL_NOT_CACHED; | 2701 | ei->i_default_acl = EXT4_ACL_NOT_CACHED; |
2697 | #endif | 2702 | #endif |
2698 | ei->i_block_alloc_info = NULL; | 2703 | ei->i_block_alloc_info = NULL; |
2699 | 2704 | ||
2700 | if (__ext4_get_inode_loc(inode, &iloc, 0)) | 2705 | ret = __ext4_get_inode_loc(inode, &iloc, 0); |
2706 | if (ret < 0) | ||
2701 | goto bad_inode; | 2707 | goto bad_inode; |
2702 | bh = iloc.bh; | 2708 | bh = iloc.bh; |
2703 | raw_inode = ext4_raw_inode(&iloc); | 2709 | raw_inode = ext4_raw_inode(&iloc); |
@@ -2723,6 +2729,7 @@ void ext4_read_inode(struct inode * inode) | |||
2723 | !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) { | 2729 | !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) { |
2724 | /* this inode is deleted */ | 2730 | /* this inode is deleted */ |
2725 | brelse (bh); | 2731 | brelse (bh); |
2732 | ret = -ESTALE; | ||
2726 | goto bad_inode; | 2733 | goto bad_inode; |
2727 | } | 2734 | } |
2728 | /* The only unlinked inodes we let through here have | 2735 | /* The only unlinked inodes we let through here have |
@@ -2750,17 +2757,12 @@ void ext4_read_inode(struct inode * inode) | |||
2750 | ei->i_data[block] = raw_inode->i_block[block]; | 2757 | ei->i_data[block] = raw_inode->i_block[block]; |
2751 | INIT_LIST_HEAD(&ei->i_orphan); | 2758 | INIT_LIST_HEAD(&ei->i_orphan); |
2752 | 2759 | ||
2753 | if (inode->i_ino >= EXT4_FIRST_INO(inode->i_sb) + 1 && | 2760 | if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { |
2754 | EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { | ||
2755 | /* | ||
2756 | * When mke2fs creates big inodes it does not zero out | ||
2757 | * the unused bytes above EXT4_GOOD_OLD_INODE_SIZE, | ||
2758 | * so ignore those first few inodes. | ||
2759 | */ | ||
2760 | ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize); | 2761 | ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize); |
2761 | if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > | 2762 | if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > |
2762 | EXT4_INODE_SIZE(inode->i_sb)) { | 2763 | EXT4_INODE_SIZE(inode->i_sb)) { |
2763 | brelse (bh); | 2764 | brelse (bh); |
2765 | ret = -EIO; | ||
2764 | goto bad_inode; | 2766 | goto bad_inode; |
2765 | } | 2767 | } |
2766 | if (ei->i_extra_isize == 0) { | 2768 | if (ei->i_extra_isize == 0) { |
@@ -2814,11 +2816,12 @@ void ext4_read_inode(struct inode * inode) | |||
2814 | } | 2816 | } |
2815 | brelse (iloc.bh); | 2817 | brelse (iloc.bh); |
2816 | ext4_set_inode_flags(inode); | 2818 | ext4_set_inode_flags(inode); |
2817 | return; | 2819 | unlock_new_inode(inode); |
2820 | return inode; | ||
2818 | 2821 | ||
2819 | bad_inode: | 2822 | bad_inode: |
2820 | make_bad_inode(inode); | 2823 | iget_failed(inode); |
2821 | return; | 2824 | return ERR_PTR(ret); |
2822 | } | 2825 | } |
2823 | 2826 | ||
2824 | static int ext4_inode_blocks_set(handle_t *handle, | 2827 | static int ext4_inode_blocks_set(handle_t *handle, |