diff options
author | Jiri Kosina <jkosina@suse.cz> | 2011-04-26 04:22:15 -0400 |
---|---|---|
committer | Jiri Kosina <jkosina@suse.cz> | 2011-04-26 04:22:59 -0400 |
commit | 07f9479a40cc778bc1462ada11f95b01360ae4ff (patch) | |
tree | 0676cf38df3844004bb3ebfd99dfa67a4a8998f5 /fs/ext4 | |
parent | 9d5e6bdb3013acfb311ab407eeca0b6a6a3dedbf (diff) | |
parent | cd2e49e90f1cae7726c9a2c54488d881d7f1cd1c (diff) |
Merge branch 'master' into for-next
Fast-forwarded to current state of Linus' tree as there are patches to be
applied for files that didn't exist on the old branch.
Diffstat (limited to 'fs/ext4')
-rw-r--r-- | fs/ext4/acl.c | 2 | ||||
-rw-r--r-- | fs/ext4/balloc.c | 5 | ||||
-rw-r--r-- | fs/ext4/ext4.h | 12 | ||||
-rw-r--r-- | fs/ext4/ext4_jbd2.h | 11 | ||||
-rw-r--r-- | fs/ext4/extents.c | 223 | ||||
-rw-r--r-- | fs/ext4/fsync.c | 33 | ||||
-rw-r--r-- | fs/ext4/ialloc.c | 8 | ||||
-rw-r--r-- | fs/ext4/inode.c | 467 | ||||
-rw-r--r-- | fs/ext4/ioctl.c | 15 | ||||
-rw-r--r-- | fs/ext4/mballoc.c | 36 | ||||
-rw-r--r-- | fs/ext4/mballoc.h | 2 | ||||
-rw-r--r-- | fs/ext4/migrate.c | 12 | ||||
-rw-r--r-- | fs/ext4/namei.c | 13 | ||||
-rw-r--r-- | fs/ext4/page-io.c | 16 | ||||
-rw-r--r-- | fs/ext4/resize.c | 12 | ||||
-rw-r--r-- | fs/ext4/super.c | 126 | ||||
-rw-r--r-- | fs/ext4/xattr.c | 4 |
17 files changed, 579 insertions, 418 deletions
diff --git a/fs/ext4/acl.c b/fs/ext4/acl.c index e0270d1f8d82..21eacd7b7d79 100644 --- a/fs/ext4/acl.c +++ b/fs/ext4/acl.c | |||
@@ -433,7 +433,7 @@ ext4_xattr_set_acl(struct dentry *dentry, const char *name, const void *value, | |||
433 | return -EINVAL; | 433 | return -EINVAL; |
434 | if (!test_opt(inode->i_sb, POSIX_ACL)) | 434 | if (!test_opt(inode->i_sb, POSIX_ACL)) |
435 | return -EOPNOTSUPP; | 435 | return -EOPNOTSUPP; |
436 | if (!is_owner_or_cap(inode)) | 436 | if (!inode_owner_or_capable(inode)) |
437 | return -EPERM; | 437 | return -EPERM; |
438 | 438 | ||
439 | if (value) { | 439 | if (value) { |
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c index adf96b822781..1c67139ad4b4 100644 --- a/fs/ext4/balloc.c +++ b/fs/ext4/balloc.c | |||
@@ -21,6 +21,8 @@ | |||
21 | #include "ext4_jbd2.h" | 21 | #include "ext4_jbd2.h" |
22 | #include "mballoc.h" | 22 | #include "mballoc.h" |
23 | 23 | ||
24 | #include <trace/events/ext4.h> | ||
25 | |||
24 | /* | 26 | /* |
25 | * balloc.c contains the blocks allocation and deallocation routines | 27 | * balloc.c contains the blocks allocation and deallocation routines |
26 | */ | 28 | */ |
@@ -342,6 +344,7 @@ ext4_read_block_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
342 | * We do it here so the bitmap uptodate bit | 344 | * We do it here so the bitmap uptodate bit |
343 | * get set with buffer lock held. | 345 | * get set with buffer lock held. |
344 | */ | 346 | */ |
347 | trace_ext4_read_block_bitmap_load(sb, block_group); | ||
345 | set_bitmap_uptodate(bh); | 348 | set_bitmap_uptodate(bh); |
346 | if (bh_submit_read(bh) < 0) { | 349 | if (bh_submit_read(bh) < 0) { |
347 | put_bh(bh); | 350 | put_bh(bh); |
@@ -544,7 +547,7 @@ int ext4_claim_free_blocks(struct ext4_sb_info *sbi, | |||
544 | * | 547 | * |
545 | * ext4_should_retry_alloc() is called when ENOSPC is returned, and if | 548 | * ext4_should_retry_alloc() is called when ENOSPC is returned, and if |
546 | * it is profitable to retry the operation, this function will wait | 549 | * it is profitable to retry the operation, this function will wait |
547 | * for the current or commiting transaction to complete, and then | 550 | * for the current or committing transaction to complete, and then |
548 | * return TRUE. | 551 | * return TRUE. |
549 | * | 552 | * |
550 | * if the total number of retries exceed three times, return FALSE. | 553 | * if the total number of retries exceed three times, return FALSE. |
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 3aa0b72b3b94..4daaf2b753f4 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h | |||
@@ -923,14 +923,14 @@ struct ext4_inode_info { | |||
923 | #define test_opt2(sb, opt) (EXT4_SB(sb)->s_mount_opt2 & \ | 923 | #define test_opt2(sb, opt) (EXT4_SB(sb)->s_mount_opt2 & \ |
924 | EXT4_MOUNT2_##opt) | 924 | EXT4_MOUNT2_##opt) |
925 | 925 | ||
926 | #define ext4_set_bit ext2_set_bit | 926 | #define ext4_set_bit __test_and_set_bit_le |
927 | #define ext4_set_bit_atomic ext2_set_bit_atomic | 927 | #define ext4_set_bit_atomic ext2_set_bit_atomic |
928 | #define ext4_clear_bit ext2_clear_bit | 928 | #define ext4_clear_bit __test_and_clear_bit_le |
929 | #define ext4_clear_bit_atomic ext2_clear_bit_atomic | 929 | #define ext4_clear_bit_atomic ext2_clear_bit_atomic |
930 | #define ext4_test_bit ext2_test_bit | 930 | #define ext4_test_bit test_bit_le |
931 | #define ext4_find_first_zero_bit ext2_find_first_zero_bit | 931 | #define ext4_find_first_zero_bit find_first_zero_bit_le |
932 | #define ext4_find_next_zero_bit ext2_find_next_zero_bit | 932 | #define ext4_find_next_zero_bit find_next_zero_bit_le |
933 | #define ext4_find_next_bit ext2_find_next_bit | 933 | #define ext4_find_next_bit find_next_bit_le |
934 | 934 | ||
935 | /* | 935 | /* |
936 | * Maximal mount counts between two filesystem checks | 936 | * Maximal mount counts between two filesystem checks |
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h index d8b992e658c1..d0f53538a57f 100644 --- a/fs/ext4/ext4_jbd2.h +++ b/fs/ext4/ext4_jbd2.h | |||
@@ -86,8 +86,8 @@ | |||
86 | 86 | ||
87 | #ifdef CONFIG_QUOTA | 87 | #ifdef CONFIG_QUOTA |
88 | /* Amount of blocks needed for quota update - we know that the structure was | 88 | /* Amount of blocks needed for quota update - we know that the structure was |
89 | * allocated so we need to update only inode+data */ | 89 | * allocated so we need to update only data block */ |
90 | #define EXT4_QUOTA_TRANS_BLOCKS(sb) (test_opt(sb, QUOTA) ? 2 : 0) | 90 | #define EXT4_QUOTA_TRANS_BLOCKS(sb) (test_opt(sb, QUOTA) ? 1 : 0) |
91 | /* Amount of blocks needed for quota insert/delete - we do some block writes | 91 | /* Amount of blocks needed for quota insert/delete - we do some block writes |
92 | * but inode, sb and group updates are done only once */ | 92 | * but inode, sb and group updates are done only once */ |
93 | #define EXT4_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\ | 93 | #define EXT4_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\ |
@@ -202,13 +202,6 @@ static inline int ext4_handle_has_enough_credits(handle_t *handle, int needed) | |||
202 | return 1; | 202 | return 1; |
203 | } | 203 | } |
204 | 204 | ||
205 | static inline void ext4_journal_release_buffer(handle_t *handle, | ||
206 | struct buffer_head *bh) | ||
207 | { | ||
208 | if (ext4_handle_valid(handle)) | ||
209 | jbd2_journal_release_buffer(handle, bh); | ||
210 | } | ||
211 | |||
212 | static inline handle_t *ext4_journal_start(struct inode *inode, int nblocks) | 205 | static inline handle_t *ext4_journal_start(struct inode *inode, int nblocks) |
213 | { | 206 | { |
214 | return ext4_journal_start_sb(inode->i_sb, nblocks); | 207 | return ext4_journal_start_sb(inode->i_sb, nblocks); |
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 7516fb9c0bd5..4890d6f3ad15 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c | |||
@@ -44,6 +44,8 @@ | |||
44 | #include "ext4_jbd2.h" | 44 | #include "ext4_jbd2.h" |
45 | #include "ext4_extents.h" | 45 | #include "ext4_extents.h" |
46 | 46 | ||
47 | #include <trace/events/ext4.h> | ||
48 | |||
47 | static int ext4_ext_truncate_extend_restart(handle_t *handle, | 49 | static int ext4_ext_truncate_extend_restart(handle_t *handle, |
48 | struct inode *inode, | 50 | struct inode *inode, |
49 | int needed) | 51 | int needed) |
@@ -664,6 +666,8 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block, | |||
664 | if (unlikely(!bh)) | 666 | if (unlikely(!bh)) |
665 | goto err; | 667 | goto err; |
666 | if (!bh_uptodate_or_lock(bh)) { | 668 | if (!bh_uptodate_or_lock(bh)) { |
669 | trace_ext4_ext_load_extent(inode, block, | ||
670 | path[ppos].p_block); | ||
667 | if (bh_submit_read(bh) < 0) { | 671 | if (bh_submit_read(bh) < 0) { |
668 | put_bh(bh); | 672 | put_bh(bh); |
669 | goto err; | 673 | goto err; |
@@ -1034,7 +1038,7 @@ cleanup: | |||
1034 | for (i = 0; i < depth; i++) { | 1038 | for (i = 0; i < depth; i++) { |
1035 | if (!ablocks[i]) | 1039 | if (!ablocks[i]) |
1036 | continue; | 1040 | continue; |
1037 | ext4_free_blocks(handle, inode, 0, ablocks[i], 1, | 1041 | ext4_free_blocks(handle, inode, NULL, ablocks[i], 1, |
1038 | EXT4_FREE_BLOCKS_METADATA); | 1042 | EXT4_FREE_BLOCKS_METADATA); |
1039 | } | 1043 | } |
1040 | } | 1044 | } |
@@ -1725,7 +1729,7 @@ repeat: | |||
1725 | BUG_ON(npath->p_depth != path->p_depth); | 1729 | BUG_ON(npath->p_depth != path->p_depth); |
1726 | eh = npath[depth].p_hdr; | 1730 | eh = npath[depth].p_hdr; |
1727 | if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max)) { | 1731 | if (le16_to_cpu(eh->eh_entries) < le16_to_cpu(eh->eh_max)) { |
1728 | ext_debug("next leaf isnt full(%d)\n", | 1732 | ext_debug("next leaf isn't full(%d)\n", |
1729 | le16_to_cpu(eh->eh_entries)); | 1733 | le16_to_cpu(eh->eh_entries)); |
1730 | path = npath; | 1734 | path = npath; |
1731 | goto repeat; | 1735 | goto repeat; |
@@ -2059,7 +2063,7 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, | |||
2059 | if (err) | 2063 | if (err) |
2060 | return err; | 2064 | return err; |
2061 | ext_debug("index is empty, remove it, free block %llu\n", leaf); | 2065 | ext_debug("index is empty, remove it, free block %llu\n", leaf); |
2062 | ext4_free_blocks(handle, inode, 0, leaf, 1, | 2066 | ext4_free_blocks(handle, inode, NULL, leaf, 1, |
2063 | EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); | 2067 | EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); |
2064 | return err; | 2068 | return err; |
2065 | } | 2069 | } |
@@ -2156,7 +2160,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode, | |||
2156 | num = le32_to_cpu(ex->ee_block) + ee_len - from; | 2160 | num = le32_to_cpu(ex->ee_block) + ee_len - from; |
2157 | start = ext4_ext_pblock(ex) + ee_len - num; | 2161 | start = ext4_ext_pblock(ex) + ee_len - num; |
2158 | ext_debug("free last %u blocks starting %llu\n", num, start); | 2162 | ext_debug("free last %u blocks starting %llu\n", num, start); |
2159 | ext4_free_blocks(handle, inode, 0, start, num, flags); | 2163 | ext4_free_blocks(handle, inode, NULL, start, num, flags); |
2160 | } else if (from == le32_to_cpu(ex->ee_block) | 2164 | } else if (from == le32_to_cpu(ex->ee_block) |
2161 | && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) { | 2165 | && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) { |
2162 | printk(KERN_INFO "strange request: removal %u-%u from %u:%u\n", | 2166 | printk(KERN_INFO "strange request: removal %u-%u from %u:%u\n", |
@@ -2529,7 +2533,7 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex) | |||
2529 | /* | 2533 | /* |
2530 | * This function is called by ext4_ext_map_blocks() if someone tries to write | 2534 | * This function is called by ext4_ext_map_blocks() if someone tries to write |
2531 | * to an uninitialized extent. It may result in splitting the uninitialized | 2535 | * to an uninitialized extent. It may result in splitting the uninitialized |
2532 | * extent into multiple extents (upto three - one initialized and two | 2536 | * extent into multiple extents (up to three - one initialized and two |
2533 | * uninitialized). | 2537 | * uninitialized). |
2534 | * There are three possibilities: | 2538 | * There are three possibilities: |
2535 | * a> There is no split required: Entire extent should be initialized | 2539 | * a> There is no split required: Entire extent should be initialized |
@@ -3108,14 +3112,13 @@ static int check_eofblocks_fl(handle_t *handle, struct inode *inode, | |||
3108 | { | 3112 | { |
3109 | int i, depth; | 3113 | int i, depth; |
3110 | struct ext4_extent_header *eh; | 3114 | struct ext4_extent_header *eh; |
3111 | struct ext4_extent *ex, *last_ex; | 3115 | struct ext4_extent *last_ex; |
3112 | 3116 | ||
3113 | if (!ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS)) | 3117 | if (!ext4_test_inode_flag(inode, EXT4_INODE_EOFBLOCKS)) |
3114 | return 0; | 3118 | return 0; |
3115 | 3119 | ||
3116 | depth = ext_depth(inode); | 3120 | depth = ext_depth(inode); |
3117 | eh = path[depth].p_hdr; | 3121 | eh = path[depth].p_hdr; |
3118 | ex = path[depth].p_ext; | ||
3119 | 3122 | ||
3120 | if (unlikely(!eh->eh_entries)) { | 3123 | if (unlikely(!eh->eh_entries)) { |
3121 | EXT4_ERROR_INODE(inode, "eh->eh_entries == 0 and " | 3124 | EXT4_ERROR_INODE(inode, "eh->eh_entries == 0 and " |
@@ -3171,7 +3174,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, | |||
3171 | path, flags); | 3174 | path, flags); |
3172 | /* | 3175 | /* |
3173 | * Flag the inode(non aio case) or end_io struct (aio case) | 3176 | * Flag the inode(non aio case) or end_io struct (aio case) |
3174 | * that this IO needs to convertion to written when IO is | 3177 | * that this IO needs to conversion to written when IO is |
3175 | * completed | 3178 | * completed |
3176 | */ | 3179 | */ |
3177 | if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) { | 3180 | if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) { |
@@ -3295,9 +3298,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3295 | struct ext4_map_blocks *map, int flags) | 3298 | struct ext4_map_blocks *map, int flags) |
3296 | { | 3299 | { |
3297 | struct ext4_ext_path *path = NULL; | 3300 | struct ext4_ext_path *path = NULL; |
3298 | struct ext4_extent_header *eh; | ||
3299 | struct ext4_extent newex, *ex; | 3301 | struct ext4_extent newex, *ex; |
3300 | ext4_fsblk_t newblock; | 3302 | ext4_fsblk_t newblock = 0; |
3301 | int err = 0, depth, ret; | 3303 | int err = 0, depth, ret; |
3302 | unsigned int allocated = 0; | 3304 | unsigned int allocated = 0; |
3303 | struct ext4_allocation_request ar; | 3305 | struct ext4_allocation_request ar; |
@@ -3305,6 +3307,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3305 | 3307 | ||
3306 | ext_debug("blocks %u/%u requested for inode %lu\n", | 3308 | ext_debug("blocks %u/%u requested for inode %lu\n", |
3307 | map->m_lblk, map->m_len, inode->i_ino); | 3309 | map->m_lblk, map->m_len, inode->i_ino); |
3310 | trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags); | ||
3308 | 3311 | ||
3309 | /* check in cache */ | 3312 | /* check in cache */ |
3310 | if (ext4_ext_in_cache(inode, map->m_lblk, &newex)) { | 3313 | if (ext4_ext_in_cache(inode, map->m_lblk, &newex)) { |
@@ -3352,7 +3355,6 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3352 | err = -EIO; | 3355 | err = -EIO; |
3353 | goto out2; | 3356 | goto out2; |
3354 | } | 3357 | } |
3355 | eh = path[depth].p_hdr; | ||
3356 | 3358 | ||
3357 | ex = path[depth].p_ext; | 3359 | ex = path[depth].p_ext; |
3358 | if (ex) { | 3360 | if (ex) { |
@@ -3458,10 +3460,10 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3458 | ext4_ext_mark_uninitialized(&newex); | 3460 | ext4_ext_mark_uninitialized(&newex); |
3459 | /* | 3461 | /* |
3460 | * io_end structure was created for every IO write to an | 3462 | * io_end structure was created for every IO write to an |
3461 | * uninitialized extent. To avoid unecessary conversion, | 3463 | * uninitialized extent. To avoid unnecessary conversion, |
3462 | * here we flag the IO that really needs the conversion. | 3464 | * here we flag the IO that really needs the conversion. |
3463 | * For non asycn direct IO case, flag the inode state | 3465 | * For non asycn direct IO case, flag the inode state |
3464 | * that we need to perform convertion when IO is done. | 3466 | * that we need to perform conversion when IO is done. |
3465 | */ | 3467 | */ |
3466 | if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { | 3468 | if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { |
3467 | if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) { | 3469 | if (io && !(io->flag & EXT4_IO_END_UNWRITTEN)) { |
@@ -3485,7 +3487,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode, | |||
3485 | /* not a good idea to call discard here directly, | 3487 | /* not a good idea to call discard here directly, |
3486 | * but otherwise we'd need to call it every free() */ | 3488 | * but otherwise we'd need to call it every free() */ |
3487 | ext4_discard_preallocations(inode); | 3489 | ext4_discard_preallocations(inode); |
3488 | ext4_free_blocks(handle, inode, 0, ext4_ext_pblock(&newex), | 3490 | ext4_free_blocks(handle, inode, NULL, ext4_ext_pblock(&newex), |
3489 | ext4_ext_get_actual_len(&newex), 0); | 3491 | ext4_ext_get_actual_len(&newex), 0); |
3490 | goto out2; | 3492 | goto out2; |
3491 | } | 3493 | } |
@@ -3525,6 +3527,8 @@ out2: | |||
3525 | ext4_ext_drop_refs(path); | 3527 | ext4_ext_drop_refs(path); |
3526 | kfree(path); | 3528 | kfree(path); |
3527 | } | 3529 | } |
3530 | trace_ext4_ext_map_blocks_exit(inode, map->m_lblk, | ||
3531 | newblock, map->m_len, err ? err : allocated); | ||
3528 | return err ? err : allocated; | 3532 | return err ? err : allocated; |
3529 | } | 3533 | } |
3530 | 3534 | ||
@@ -3658,6 +3662,7 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) | |||
3658 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) | 3662 | if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) |
3659 | return -EOPNOTSUPP; | 3663 | return -EOPNOTSUPP; |
3660 | 3664 | ||
3665 | trace_ext4_fallocate_enter(inode, offset, len, mode); | ||
3661 | map.m_lblk = offset >> blkbits; | 3666 | map.m_lblk = offset >> blkbits; |
3662 | /* | 3667 | /* |
3663 | * We can't just convert len to max_blocks because | 3668 | * We can't just convert len to max_blocks because |
@@ -3673,6 +3678,7 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len) | |||
3673 | ret = inode_newsize_ok(inode, (len + offset)); | 3678 | ret = inode_newsize_ok(inode, (len + offset)); |
3674 | if (ret) { | 3679 | if (ret) { |
3675 | mutex_unlock(&inode->i_mutex); | 3680 | mutex_unlock(&inode->i_mutex); |
3681 | trace_ext4_fallocate_exit(inode, offset, max_blocks, ret); | ||
3676 | return ret; | 3682 | return ret; |
3677 | } | 3683 | } |
3678 | retry: | 3684 | retry: |
@@ -3717,6 +3723,8 @@ retry: | |||
3717 | goto retry; | 3723 | goto retry; |
3718 | } | 3724 | } |
3719 | mutex_unlock(&inode->i_mutex); | 3725 | mutex_unlock(&inode->i_mutex); |
3726 | trace_ext4_fallocate_exit(inode, offset, max_blocks, | ||
3727 | ret > 0 ? ret2 : ret); | ||
3720 | return ret > 0 ? ret2 : ret; | 3728 | return ret > 0 ? ret2 : ret; |
3721 | } | 3729 | } |
3722 | 3730 | ||
@@ -3775,6 +3783,7 @@ int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, | |||
3775 | } | 3783 | } |
3776 | return ret > 0 ? ret2 : ret; | 3784 | return ret > 0 ? ret2 : ret; |
3777 | } | 3785 | } |
3786 | |||
3778 | /* | 3787 | /* |
3779 | * Callback function called for each extent to gather FIEMAP information. | 3788 | * Callback function called for each extent to gather FIEMAP information. |
3780 | */ | 3789 | */ |
@@ -3782,38 +3791,162 @@ static int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path, | |||
3782 | struct ext4_ext_cache *newex, struct ext4_extent *ex, | 3791 | struct ext4_ext_cache *newex, struct ext4_extent *ex, |
3783 | void *data) | 3792 | void *data) |
3784 | { | 3793 | { |
3785 | struct fiemap_extent_info *fieinfo = data; | ||
3786 | unsigned char blksize_bits = inode->i_sb->s_blocksize_bits; | ||
3787 | __u64 logical; | 3794 | __u64 logical; |
3788 | __u64 physical; | 3795 | __u64 physical; |
3789 | __u64 length; | 3796 | __u64 length; |
3797 | loff_t size; | ||
3790 | __u32 flags = 0; | 3798 | __u32 flags = 0; |
3791 | int error; | 3799 | int ret = 0; |
3800 | struct fiemap_extent_info *fieinfo = data; | ||
3801 | unsigned char blksize_bits; | ||
3792 | 3802 | ||
3793 | logical = (__u64)newex->ec_block << blksize_bits; | 3803 | blksize_bits = inode->i_sb->s_blocksize_bits; |
3804 | logical = (__u64)newex->ec_block << blksize_bits; | ||
3794 | 3805 | ||
3795 | if (newex->ec_start == 0) { | 3806 | if (newex->ec_start == 0) { |
3796 | pgoff_t offset; | 3807 | /* |
3797 | struct page *page; | 3808 | * No extent in extent-tree contains block @newex->ec_start, |
3809 | * then the block may stay in 1)a hole or 2)delayed-extent. | ||
3810 | * | ||
3811 | * Holes or delayed-extents are processed as follows. | ||
3812 | * 1. lookup dirty pages with specified range in pagecache. | ||
3813 | * If no page is got, then there is no delayed-extent and | ||
3814 | * return with EXT_CONTINUE. | ||
3815 | * 2. find the 1st mapped buffer, | ||
3816 | * 3. check if the mapped buffer is both in the request range | ||
3817 | * and a delayed buffer. If not, there is no delayed-extent, | ||
3818 | * then return. | ||
3819 | * 4. a delayed-extent is found, the extent will be collected. | ||
3820 | */ | ||
3821 | ext4_lblk_t end = 0; | ||
3822 | pgoff_t last_offset; | ||
3823 | pgoff_t offset; | ||
3824 | pgoff_t index; | ||
3825 | struct page **pages = NULL; | ||
3798 | struct buffer_head *bh = NULL; | 3826 | struct buffer_head *bh = NULL; |
3827 | struct buffer_head *head = NULL; | ||
3828 | unsigned int nr_pages = PAGE_SIZE / sizeof(struct page *); | ||
3829 | |||
3830 | pages = kmalloc(PAGE_SIZE, GFP_KERNEL); | ||
3831 | if (pages == NULL) | ||
3832 | return -ENOMEM; | ||
3799 | 3833 | ||
3800 | offset = logical >> PAGE_SHIFT; | 3834 | offset = logical >> PAGE_SHIFT; |
3801 | page = find_get_page(inode->i_mapping, offset); | 3835 | repeat: |
3802 | if (!page || !page_has_buffers(page)) | 3836 | last_offset = offset; |
3803 | return EXT_CONTINUE; | 3837 | head = NULL; |
3838 | ret = find_get_pages_tag(inode->i_mapping, &offset, | ||
3839 | PAGECACHE_TAG_DIRTY, nr_pages, pages); | ||
3840 | |||
3841 | if (!(flags & FIEMAP_EXTENT_DELALLOC)) { | ||
3842 | /* First time, try to find a mapped buffer. */ | ||
3843 | if (ret == 0) { | ||
3844 | out: | ||
3845 | for (index = 0; index < ret; index++) | ||
3846 | page_cache_release(pages[index]); | ||
3847 | /* just a hole. */ | ||
3848 | kfree(pages); | ||
3849 | return EXT_CONTINUE; | ||
3850 | } | ||
3804 | 3851 | ||
3805 | bh = page_buffers(page); | 3852 | /* Try to find the 1st mapped buffer. */ |
3853 | end = ((__u64)pages[0]->index << PAGE_SHIFT) >> | ||
3854 | blksize_bits; | ||
3855 | if (!page_has_buffers(pages[0])) | ||
3856 | goto out; | ||
3857 | head = page_buffers(pages[0]); | ||
3858 | if (!head) | ||
3859 | goto out; | ||
3806 | 3860 | ||
3807 | if (!bh) | 3861 | bh = head; |
3808 | return EXT_CONTINUE; | 3862 | do { |
3863 | if (buffer_mapped(bh)) { | ||
3864 | /* get the 1st mapped buffer. */ | ||
3865 | if (end > newex->ec_block + | ||
3866 | newex->ec_len) | ||
3867 | /* The buffer is out of | ||
3868 | * the request range. | ||
3869 | */ | ||
3870 | goto out; | ||
3871 | goto found_mapped_buffer; | ||
3872 | } | ||
3873 | bh = bh->b_this_page; | ||
3874 | end++; | ||
3875 | } while (bh != head); | ||
3809 | 3876 | ||
3810 | if (buffer_delay(bh)) { | 3877 | /* No mapped buffer found. */ |
3811 | flags |= FIEMAP_EXTENT_DELALLOC; | 3878 | goto out; |
3812 | page_cache_release(page); | ||
3813 | } else { | 3879 | } else { |
3814 | page_cache_release(page); | 3880 | /*Find contiguous delayed buffers. */ |
3815 | return EXT_CONTINUE; | 3881 | if (ret > 0 && pages[0]->index == last_offset) |
3882 | head = page_buffers(pages[0]); | ||
3883 | bh = head; | ||
3816 | } | 3884 | } |
3885 | |||
3886 | found_mapped_buffer: | ||
3887 | if (bh != NULL && buffer_delay(bh)) { | ||
3888 | /* 1st or contiguous delayed buffer found. */ | ||
3889 | if (!(flags & FIEMAP_EXTENT_DELALLOC)) { | ||
3890 | /* | ||
3891 | * 1st delayed buffer found, record | ||
3892 | * the start of extent. | ||
3893 | */ | ||
3894 | flags |= FIEMAP_EXTENT_DELALLOC; | ||
3895 | newex->ec_block = end; | ||
3896 | logical = (__u64)end << blksize_bits; | ||
3897 | } | ||
3898 | /* Find contiguous delayed buffers. */ | ||
3899 | do { | ||
3900 | if (!buffer_delay(bh)) | ||
3901 | goto found_delayed_extent; | ||
3902 | bh = bh->b_this_page; | ||
3903 | end++; | ||
3904 | } while (bh != head); | ||
3905 | |||
3906 | for (index = 1; index < ret; index++) { | ||
3907 | if (!page_has_buffers(pages[index])) { | ||
3908 | bh = NULL; | ||
3909 | break; | ||
3910 | } | ||
3911 | head = page_buffers(pages[index]); | ||
3912 | if (!head) { | ||
3913 | bh = NULL; | ||
3914 | break; | ||
3915 | } | ||
3916 | if (pages[index]->index != | ||
3917 | pages[0]->index + index) { | ||
3918 | /* Blocks are not contiguous. */ | ||
3919 | bh = NULL; | ||
3920 | break; | ||
3921 | } | ||
3922 | bh = head; | ||
3923 | do { | ||
3924 | if (!buffer_delay(bh)) | ||
3925 | /* Delayed-extent ends. */ | ||
3926 | goto found_delayed_extent; | ||
3927 | bh = bh->b_this_page; | ||
3928 | end++; | ||
3929 | } while (bh != head); | ||
3930 | } | ||
3931 | } else if (!(flags & FIEMAP_EXTENT_DELALLOC)) | ||
3932 | /* a hole found. */ | ||
3933 | goto out; | ||
3934 | |||
3935 | found_delayed_extent: | ||
3936 | newex->ec_len = min(end - newex->ec_block, | ||
3937 | (ext4_lblk_t)EXT_INIT_MAX_LEN); | ||
3938 | if (ret == nr_pages && bh != NULL && | ||
3939 | newex->ec_len < EXT_INIT_MAX_LEN && | ||
3940 | buffer_delay(bh)) { | ||
3941 | /* Have not collected an extent and continue. */ | ||
3942 | for (index = 0; index < ret; index++) | ||
3943 | page_cache_release(pages[index]); | ||
3944 | goto repeat; | ||
3945 | } | ||
3946 | |||
3947 | for (index = 0; index < ret; index++) | ||
3948 | page_cache_release(pages[index]); | ||
3949 | kfree(pages); | ||
3817 | } | 3950 | } |
3818 | 3951 | ||
3819 | physical = (__u64)newex->ec_start << blksize_bits; | 3952 | physical = (__u64)newex->ec_start << blksize_bits; |
@@ -3822,32 +3955,16 @@ static int ext4_ext_fiemap_cb(struct inode *inode, struct ext4_ext_path *path, | |||
3822 | if (ex && ext4_ext_is_uninitialized(ex)) | 3955 | if (ex && ext4_ext_is_uninitialized(ex)) |
3823 | flags |= FIEMAP_EXTENT_UNWRITTEN; | 3956 | flags |= FIEMAP_EXTENT_UNWRITTEN; |
3824 | 3957 | ||
3825 | /* | 3958 | size = i_size_read(inode); |
3826 | * If this extent reaches EXT_MAX_BLOCK, it must be last. | 3959 | if (logical + length >= size) |
3827 | * | ||
3828 | * Or if ext4_ext_next_allocated_block is EXT_MAX_BLOCK, | ||
3829 | * this also indicates no more allocated blocks. | ||
3830 | * | ||
3831 | * XXX this might miss a single-block extent at EXT_MAX_BLOCK | ||
3832 | */ | ||
3833 | if (ext4_ext_next_allocated_block(path) == EXT_MAX_BLOCK || | ||
3834 | newex->ec_block + newex->ec_len - 1 == EXT_MAX_BLOCK) { | ||
3835 | loff_t size = i_size_read(inode); | ||
3836 | loff_t bs = EXT4_BLOCK_SIZE(inode->i_sb); | ||
3837 | |||
3838 | flags |= FIEMAP_EXTENT_LAST; | 3960 | flags |= FIEMAP_EXTENT_LAST; |
3839 | if ((flags & FIEMAP_EXTENT_DELALLOC) && | ||
3840 | logical+length > size) | ||
3841 | length = (size - logical + bs - 1) & ~(bs-1); | ||
3842 | } | ||
3843 | 3961 | ||
3844 | error = fiemap_fill_next_extent(fieinfo, logical, physical, | 3962 | ret = fiemap_fill_next_extent(fieinfo, logical, physical, |
3845 | length, flags); | 3963 | length, flags); |
3846 | if (error < 0) | 3964 | if (ret < 0) |
3847 | return error; | 3965 | return ret; |
3848 | if (error == 1) | 3966 | if (ret == 1) |
3849 | return EXT_BREAK; | 3967 | return EXT_BREAK; |
3850 | |||
3851 | return EXT_CONTINUE; | 3968 | return EXT_CONTINUE; |
3852 | } | 3969 | } |
3853 | 3970 | ||
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c index 7829b287822a..e9473cbe80df 100644 --- a/fs/ext4/fsync.c +++ b/fs/ext4/fsync.c | |||
@@ -101,7 +101,7 @@ extern int ext4_flush_completed_IO(struct inode *inode) | |||
101 | * to the work-to-be schedule is freed. | 101 | * to the work-to-be schedule is freed. |
102 | * | 102 | * |
103 | * Thus we need to keep the io structure still valid here after | 103 | * Thus we need to keep the io structure still valid here after |
104 | * convertion finished. The io structure has a flag to | 104 | * conversion finished. The io structure has a flag to |
105 | * avoid double converting from both fsync and background work | 105 | * avoid double converting from both fsync and background work |
106 | * queue work. | 106 | * queue work. |
107 | */ | 107 | */ |
@@ -125,9 +125,11 @@ extern int ext4_flush_completed_IO(struct inode *inode) | |||
125 | * the parent directory's parent as well, and so on recursively, if | 125 | * the parent directory's parent as well, and so on recursively, if |
126 | * they are also freshly created. | 126 | * they are also freshly created. |
127 | */ | 127 | */ |
128 | static void ext4_sync_parent(struct inode *inode) | 128 | static int ext4_sync_parent(struct inode *inode) |
129 | { | 129 | { |
130 | struct writeback_control wbc; | ||
130 | struct dentry *dentry = NULL; | 131 | struct dentry *dentry = NULL; |
132 | int ret = 0; | ||
131 | 133 | ||
132 | while (inode && ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) { | 134 | while (inode && ext4_test_inode_state(inode, EXT4_STATE_NEWENTRY)) { |
133 | ext4_clear_inode_state(inode, EXT4_STATE_NEWENTRY); | 135 | ext4_clear_inode_state(inode, EXT4_STATE_NEWENTRY); |
@@ -136,8 +138,17 @@ static void ext4_sync_parent(struct inode *inode) | |||
136 | if (!dentry || !dentry->d_parent || !dentry->d_parent->d_inode) | 138 | if (!dentry || !dentry->d_parent || !dentry->d_parent->d_inode) |
137 | break; | 139 | break; |
138 | inode = dentry->d_parent->d_inode; | 140 | inode = dentry->d_parent->d_inode; |
139 | sync_mapping_buffers(inode->i_mapping); | 141 | ret = sync_mapping_buffers(inode->i_mapping); |
142 | if (ret) | ||
143 | break; | ||
144 | memset(&wbc, 0, sizeof(wbc)); | ||
145 | wbc.sync_mode = WB_SYNC_ALL; | ||
146 | wbc.nr_to_write = 0; /* only write out the inode */ | ||
147 | ret = sync_inode(inode, &wbc); | ||
148 | if (ret) | ||
149 | break; | ||
140 | } | 150 | } |
151 | return ret; | ||
141 | } | 152 | } |
142 | 153 | ||
143 | /* | 154 | /* |
@@ -164,20 +175,20 @@ int ext4_sync_file(struct file *file, int datasync) | |||
164 | 175 | ||
165 | J_ASSERT(ext4_journal_current_handle() == NULL); | 176 | J_ASSERT(ext4_journal_current_handle() == NULL); |
166 | 177 | ||
167 | trace_ext4_sync_file(file, datasync); | 178 | trace_ext4_sync_file_enter(file, datasync); |
168 | 179 | ||
169 | if (inode->i_sb->s_flags & MS_RDONLY) | 180 | if (inode->i_sb->s_flags & MS_RDONLY) |
170 | return 0; | 181 | return 0; |
171 | 182 | ||
172 | ret = ext4_flush_completed_IO(inode); | 183 | ret = ext4_flush_completed_IO(inode); |
173 | if (ret < 0) | 184 | if (ret < 0) |
174 | return ret; | 185 | goto out; |
175 | 186 | ||
176 | if (!journal) { | 187 | if (!journal) { |
177 | ret = generic_file_fsync(file, datasync); | 188 | ret = generic_file_fsync(file, datasync); |
178 | if (!ret && !list_empty(&inode->i_dentry)) | 189 | if (!ret && !list_empty(&inode->i_dentry)) |
179 | ext4_sync_parent(inode); | 190 | ret = ext4_sync_parent(inode); |
180 | return ret; | 191 | goto out; |
181 | } | 192 | } |
182 | 193 | ||
183 | /* | 194 | /* |
@@ -194,8 +205,10 @@ int ext4_sync_file(struct file *file, int datasync) | |||
194 | * (they were dirtied by commit). But that's OK - the blocks are | 205 | * (they were dirtied by commit). But that's OK - the blocks are |
195 | * safe in-journal, which is all fsync() needs to ensure. | 206 | * safe in-journal, which is all fsync() needs to ensure. |
196 | */ | 207 | */ |
197 | if (ext4_should_journal_data(inode)) | 208 | if (ext4_should_journal_data(inode)) { |
198 | return ext4_force_commit(inode->i_sb); | 209 | ret = ext4_force_commit(inode->i_sb); |
210 | goto out; | ||
211 | } | ||
199 | 212 | ||
200 | commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid; | 213 | commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid; |
201 | if (jbd2_log_start_commit(journal, commit_tid)) { | 214 | if (jbd2_log_start_commit(journal, commit_tid)) { |
@@ -215,5 +228,7 @@ int ext4_sync_file(struct file *file, int datasync) | |||
215 | ret = jbd2_log_wait_commit(journal, commit_tid); | 228 | ret = jbd2_log_wait_commit(journal, commit_tid); |
216 | } else if (journal->j_flags & JBD2_BARRIER) | 229 | } else if (journal->j_flags & JBD2_BARRIER) |
217 | blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); | 230 | blkdev_issue_flush(inode->i_sb->s_bdev, GFP_KERNEL, NULL); |
231 | out: | ||
232 | trace_ext4_sync_file_exit(inode, ret); | ||
218 | return ret; | 233 | return ret; |
219 | } | 234 | } |
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index 78b79e1bd7ed..21bb2f61e502 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c | |||
@@ -152,6 +152,7 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group) | |||
152 | * We do it here so the bitmap uptodate bit | 152 | * We do it here so the bitmap uptodate bit |
153 | * get set with buffer lock held. | 153 | * get set with buffer lock held. |
154 | */ | 154 | */ |
155 | trace_ext4_load_inode_bitmap(sb, block_group); | ||
155 | set_bitmap_uptodate(bh); | 156 | set_bitmap_uptodate(bh); |
156 | if (bh_submit_read(bh) < 0) { | 157 | if (bh_submit_read(bh) < 0) { |
157 | put_bh(bh); | 158 | put_bh(bh); |
@@ -649,7 +650,7 @@ static int find_group_other(struct super_block *sb, struct inode *parent, | |||
649 | *group = parent_group + flex_size; | 650 | *group = parent_group + flex_size; |
650 | if (*group > ngroups) | 651 | if (*group > ngroups) |
651 | *group = 0; | 652 | *group = 0; |
652 | return find_group_orlov(sb, parent, group, mode, 0); | 653 | return find_group_orlov(sb, parent, group, mode, NULL); |
653 | } | 654 | } |
654 | 655 | ||
655 | /* | 656 | /* |
@@ -1054,6 +1055,11 @@ got: | |||
1054 | } | 1055 | } |
1055 | } | 1056 | } |
1056 | 1057 | ||
1058 | if (ext4_handle_valid(handle)) { | ||
1059 | ei->i_sync_tid = handle->h_transaction->t_tid; | ||
1060 | ei->i_datasync_tid = handle->h_transaction->t_tid; | ||
1061 | } | ||
1062 | |||
1057 | err = ext4_mark_inode_dirty(handle, inode); | 1063 | err = ext4_mark_inode_dirty(handle, inode); |
1058 | if (err) { | 1064 | if (err) { |
1059 | ext4_std_error(sb, err); | 1065 | ext4_std_error(sb, err); |
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 9f7f9e49914f..f2fa5e8a582c 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c | |||
@@ -173,7 +173,7 @@ int ext4_truncate_restart_trans(handle_t *handle, struct inode *inode, | |||
173 | BUG_ON(EXT4_JOURNAL(inode) == NULL); | 173 | BUG_ON(EXT4_JOURNAL(inode) == NULL); |
174 | jbd_debug(2, "restarting handle %p\n", handle); | 174 | jbd_debug(2, "restarting handle %p\n", handle); |
175 | up_write(&EXT4_I(inode)->i_data_sem); | 175 | up_write(&EXT4_I(inode)->i_data_sem); |
176 | ret = ext4_journal_restart(handle, blocks_for_truncate(inode)); | 176 | ret = ext4_journal_restart(handle, nblocks); |
177 | down_write(&EXT4_I(inode)->i_data_sem); | 177 | down_write(&EXT4_I(inode)->i_data_sem); |
178 | ext4_discard_preallocations(inode); | 178 | ext4_discard_preallocations(inode); |
179 | 179 | ||
@@ -720,7 +720,7 @@ allocated: | |||
720 | return ret; | 720 | return ret; |
721 | failed_out: | 721 | failed_out: |
722 | for (i = 0; i < index; i++) | 722 | for (i = 0; i < index; i++) |
723 | ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, 0); | 723 | ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, 0); |
724 | return ret; | 724 | return ret; |
725 | } | 725 | } |
726 | 726 | ||
@@ -823,20 +823,20 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode, | |||
823 | return err; | 823 | return err; |
824 | failed: | 824 | failed: |
825 | /* Allocation failed, free what we already allocated */ | 825 | /* Allocation failed, free what we already allocated */ |
826 | ext4_free_blocks(handle, inode, 0, new_blocks[0], 1, 0); | 826 | ext4_free_blocks(handle, inode, NULL, new_blocks[0], 1, 0); |
827 | for (i = 1; i <= n ; i++) { | 827 | for (i = 1; i <= n ; i++) { |
828 | /* | 828 | /* |
829 | * branch[i].bh is newly allocated, so there is no | 829 | * branch[i].bh is newly allocated, so there is no |
830 | * need to revoke the block, which is why we don't | 830 | * need to revoke the block, which is why we don't |
831 | * need to set EXT4_FREE_BLOCKS_METADATA. | 831 | * need to set EXT4_FREE_BLOCKS_METADATA. |
832 | */ | 832 | */ |
833 | ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, | 833 | ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, |
834 | EXT4_FREE_BLOCKS_FORGET); | 834 | EXT4_FREE_BLOCKS_FORGET); |
835 | } | 835 | } |
836 | for (i = n+1; i < indirect_blks; i++) | 836 | for (i = n+1; i < indirect_blks; i++) |
837 | ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, 0); | 837 | ext4_free_blocks(handle, inode, NULL, new_blocks[i], 1, 0); |
838 | 838 | ||
839 | ext4_free_blocks(handle, inode, 0, new_blocks[i], num, 0); | 839 | ext4_free_blocks(handle, inode, NULL, new_blocks[i], num, 0); |
840 | 840 | ||
841 | return err; | 841 | return err; |
842 | } | 842 | } |
@@ -924,7 +924,7 @@ err_out: | |||
924 | ext4_free_blocks(handle, inode, where[i].bh, 0, 1, | 924 | ext4_free_blocks(handle, inode, where[i].bh, 0, 1, |
925 | EXT4_FREE_BLOCKS_FORGET); | 925 | EXT4_FREE_BLOCKS_FORGET); |
926 | } | 926 | } |
927 | ext4_free_blocks(handle, inode, 0, le32_to_cpu(where[num].key), | 927 | ext4_free_blocks(handle, inode, NULL, le32_to_cpu(where[num].key), |
928 | blks, 0); | 928 | blks, 0); |
929 | 929 | ||
930 | return err; | 930 | return err; |
@@ -973,6 +973,7 @@ static int ext4_ind_map_blocks(handle_t *handle, struct inode *inode, | |||
973 | int count = 0; | 973 | int count = 0; |
974 | ext4_fsblk_t first_block = 0; | 974 | ext4_fsblk_t first_block = 0; |
975 | 975 | ||
976 | trace_ext4_ind_map_blocks_enter(inode, map->m_lblk, map->m_len, flags); | ||
976 | J_ASSERT(!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))); | 977 | J_ASSERT(!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))); |
977 | J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0); | 978 | J_ASSERT(handle != NULL || (flags & EXT4_GET_BLOCKS_CREATE) == 0); |
978 | depth = ext4_block_to_path(inode, map->m_lblk, offsets, | 979 | depth = ext4_block_to_path(inode, map->m_lblk, offsets, |
@@ -1058,6 +1059,8 @@ cleanup: | |||
1058 | partial--; | 1059 | partial--; |
1059 | } | 1060 | } |
1060 | out: | 1061 | out: |
1062 | trace_ext4_ind_map_blocks_exit(inode, map->m_lblk, | ||
1063 | map->m_pblk, map->m_len, err); | ||
1061 | return err; | 1064 | return err; |
1062 | } | 1065 | } |
1063 | 1066 | ||
@@ -2060,7 +2063,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd, | |||
2060 | if (nr_pages == 0) | 2063 | if (nr_pages == 0) |
2061 | break; | 2064 | break; |
2062 | for (i = 0; i < nr_pages; i++) { | 2065 | for (i = 0; i < nr_pages; i++) { |
2063 | int commit_write = 0, redirty_page = 0; | 2066 | int commit_write = 0, skip_page = 0; |
2064 | struct page *page = pvec.pages[i]; | 2067 | struct page *page = pvec.pages[i]; |
2065 | 2068 | ||
2066 | index = page->index; | 2069 | index = page->index; |
@@ -2086,14 +2089,12 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd, | |||
2086 | * If the page does not have buffers (for | 2089 | * If the page does not have buffers (for |
2087 | * whatever reason), try to create them using | 2090 | * whatever reason), try to create them using |
2088 | * __block_write_begin. If this fails, | 2091 | * __block_write_begin. If this fails, |
2089 | * redirty the page and move on. | 2092 | * skip the page and move on. |
2090 | */ | 2093 | */ |
2091 | if (!page_has_buffers(page)) { | 2094 | if (!page_has_buffers(page)) { |
2092 | if (__block_write_begin(page, 0, len, | 2095 | if (__block_write_begin(page, 0, len, |
2093 | noalloc_get_block_write)) { | 2096 | noalloc_get_block_write)) { |
2094 | redirty_page: | 2097 | skip_page: |
2095 | redirty_page_for_writepage(mpd->wbc, | ||
2096 | page); | ||
2097 | unlock_page(page); | 2098 | unlock_page(page); |
2098 | continue; | 2099 | continue; |
2099 | } | 2100 | } |
@@ -2104,7 +2105,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd, | |||
2104 | block_start = 0; | 2105 | block_start = 0; |
2105 | do { | 2106 | do { |
2106 | if (!bh) | 2107 | if (!bh) |
2107 | goto redirty_page; | 2108 | goto skip_page; |
2108 | if (map && (cur_logical >= map->m_lblk) && | 2109 | if (map && (cur_logical >= map->m_lblk) && |
2109 | (cur_logical <= (map->m_lblk + | 2110 | (cur_logical <= (map->m_lblk + |
2110 | (map->m_len - 1)))) { | 2111 | (map->m_len - 1)))) { |
@@ -2120,22 +2121,23 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd, | |||
2120 | clear_buffer_unwritten(bh); | 2121 | clear_buffer_unwritten(bh); |
2121 | } | 2122 | } |
2122 | 2123 | ||
2123 | /* redirty page if block allocation undone */ | 2124 | /* skip page if block allocation undone */ |
2124 | if (buffer_delay(bh) || buffer_unwritten(bh)) | 2125 | if (buffer_delay(bh) || buffer_unwritten(bh)) |
2125 | redirty_page = 1; | 2126 | skip_page = 1; |
2126 | bh = bh->b_this_page; | 2127 | bh = bh->b_this_page; |
2127 | block_start += bh->b_size; | 2128 | block_start += bh->b_size; |
2128 | cur_logical++; | 2129 | cur_logical++; |
2129 | pblock++; | 2130 | pblock++; |
2130 | } while (bh != page_bufs); | 2131 | } while (bh != page_bufs); |
2131 | 2132 | ||
2132 | if (redirty_page) | 2133 | if (skip_page) |
2133 | goto redirty_page; | 2134 | goto skip_page; |
2134 | 2135 | ||
2135 | if (commit_write) | 2136 | if (commit_write) |
2136 | /* mark the buffer_heads as dirty & uptodate */ | 2137 | /* mark the buffer_heads as dirty & uptodate */ |
2137 | block_commit_write(page, 0, len); | 2138 | block_commit_write(page, 0, len); |
2138 | 2139 | ||
2140 | clear_page_dirty_for_io(page); | ||
2139 | /* | 2141 | /* |
2140 | * Delalloc doesn't support data journalling, | 2142 | * Delalloc doesn't support data journalling, |
2141 | * but eventually maybe we'll lift this | 2143 | * but eventually maybe we'll lift this |
@@ -2165,8 +2167,7 @@ static int mpage_da_submit_io(struct mpage_da_data *mpd, | |||
2165 | return ret; | 2167 | return ret; |
2166 | } | 2168 | } |
2167 | 2169 | ||
2168 | static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd, | 2170 | static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd) |
2169 | sector_t logical, long blk_cnt) | ||
2170 | { | 2171 | { |
2171 | int nr_pages, i; | 2172 | int nr_pages, i; |
2172 | pgoff_t index, end; | 2173 | pgoff_t index, end; |
@@ -2174,9 +2175,8 @@ static void ext4_da_block_invalidatepages(struct mpage_da_data *mpd, | |||
2174 | struct inode *inode = mpd->inode; | 2175 | struct inode *inode = mpd->inode; |
2175 | struct address_space *mapping = inode->i_mapping; | 2176 | struct address_space *mapping = inode->i_mapping; |
2176 | 2177 | ||
2177 | index = logical >> (PAGE_CACHE_SHIFT - inode->i_blkbits); | 2178 | index = mpd->first_page; |
2178 | end = (logical + blk_cnt - 1) >> | 2179 | end = mpd->next_page - 1; |
2179 | (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
2180 | while (index <= end) { | 2180 | while (index <= end) { |
2181 | nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE); | 2181 | nr_pages = pagevec_lookup(&pvec, mapping, index, PAGEVEC_SIZE); |
2182 | if (nr_pages == 0) | 2182 | if (nr_pages == 0) |
@@ -2279,9 +2279,8 @@ static void mpage_da_map_and_submit(struct mpage_da_data *mpd) | |||
2279 | err = blks; | 2279 | err = blks; |
2280 | /* | 2280 | /* |
2281 | * If get block returns EAGAIN or ENOSPC and there | 2281 | * If get block returns EAGAIN or ENOSPC and there |
2282 | * appears to be free blocks we will call | 2282 | * appears to be free blocks we will just let |
2283 | * ext4_writepage() for all of the pages which will | 2283 | * mpage_da_submit_io() unlock all of the pages. |
2284 | * just redirty the pages. | ||
2285 | */ | 2284 | */ |
2286 | if (err == -EAGAIN) | 2285 | if (err == -EAGAIN) |
2287 | goto submit_io; | 2286 | goto submit_io; |
@@ -2312,8 +2311,10 @@ static void mpage_da_map_and_submit(struct mpage_da_data *mpd) | |||
2312 | ext4_print_free_blocks(mpd->inode); | 2311 | ext4_print_free_blocks(mpd->inode); |
2313 | } | 2312 | } |
2314 | /* invalidate all the pages */ | 2313 | /* invalidate all the pages */ |
2315 | ext4_da_block_invalidatepages(mpd, next, | 2314 | ext4_da_block_invalidatepages(mpd); |
2316 | mpd->b_size >> mpd->inode->i_blkbits); | 2315 | |
2316 | /* Mark this page range as having been completed */ | ||
2317 | mpd->io_done = 1; | ||
2317 | return; | 2318 | return; |
2318 | } | 2319 | } |
2319 | BUG_ON(blks == 0); | 2320 | BUG_ON(blks == 0); |
@@ -2438,102 +2439,6 @@ static int ext4_bh_delay_or_unwritten(handle_t *handle, struct buffer_head *bh) | |||
2438 | } | 2439 | } |
2439 | 2440 | ||
2440 | /* | 2441 | /* |
2441 | * __mpage_da_writepage - finds extent of pages and blocks | ||
2442 | * | ||
2443 | * @page: page to consider | ||
2444 | * @wbc: not used, we just follow rules | ||
2445 | * @data: context | ||
2446 | * | ||
2447 | * The function finds extents of pages and scan them for all blocks. | ||
2448 | */ | ||
2449 | static int __mpage_da_writepage(struct page *page, | ||
2450 | struct writeback_control *wbc, | ||
2451 | struct mpage_da_data *mpd) | ||
2452 | { | ||
2453 | struct inode *inode = mpd->inode; | ||
2454 | struct buffer_head *bh, *head; | ||
2455 | sector_t logical; | ||
2456 | |||
2457 | /* | ||
2458 | * Can we merge this page to current extent? | ||
2459 | */ | ||
2460 | if (mpd->next_page != page->index) { | ||
2461 | /* | ||
2462 | * Nope, we can't. So, we map non-allocated blocks | ||
2463 | * and start IO on them | ||
2464 | */ | ||
2465 | if (mpd->next_page != mpd->first_page) { | ||
2466 | mpage_da_map_and_submit(mpd); | ||
2467 | /* | ||
2468 | * skip rest of the page in the page_vec | ||
2469 | */ | ||
2470 | redirty_page_for_writepage(wbc, page); | ||
2471 | unlock_page(page); | ||
2472 | return MPAGE_DA_EXTENT_TAIL; | ||
2473 | } | ||
2474 | |||
2475 | /* | ||
2476 | * Start next extent of pages ... | ||
2477 | */ | ||
2478 | mpd->first_page = page->index; | ||
2479 | |||
2480 | /* | ||
2481 | * ... and blocks | ||
2482 | */ | ||
2483 | mpd->b_size = 0; | ||
2484 | mpd->b_state = 0; | ||
2485 | mpd->b_blocknr = 0; | ||
2486 | } | ||
2487 | |||
2488 | mpd->next_page = page->index + 1; | ||
2489 | logical = (sector_t) page->index << | ||
2490 | (PAGE_CACHE_SHIFT - inode->i_blkbits); | ||
2491 | |||
2492 | if (!page_has_buffers(page)) { | ||
2493 | mpage_add_bh_to_extent(mpd, logical, PAGE_CACHE_SIZE, | ||
2494 | (1 << BH_Dirty) | (1 << BH_Uptodate)); | ||
2495 | if (mpd->io_done) | ||
2496 | return MPAGE_DA_EXTENT_TAIL; | ||
2497 | } else { | ||
2498 | /* | ||
2499 | * Page with regular buffer heads, just add all dirty ones | ||
2500 | */ | ||
2501 | head = page_buffers(page); | ||
2502 | bh = head; | ||
2503 | do { | ||
2504 | BUG_ON(buffer_locked(bh)); | ||
2505 | /* | ||
2506 | * We need to try to allocate | ||
2507 | * unmapped blocks in the same page. | ||
2508 | * Otherwise we won't make progress | ||
2509 | * with the page in ext4_writepage | ||
2510 | */ | ||
2511 | if (ext4_bh_delay_or_unwritten(NULL, bh)) { | ||
2512 | mpage_add_bh_to_extent(mpd, logical, | ||
2513 | bh->b_size, | ||
2514 | bh->b_state); | ||
2515 | if (mpd->io_done) | ||
2516 | return MPAGE_DA_EXTENT_TAIL; | ||
2517 | } else if (buffer_dirty(bh) && (buffer_mapped(bh))) { | ||
2518 | /* | ||
2519 | * mapped dirty buffer. We need to update | ||
2520 | * the b_state because we look at | ||
2521 | * b_state in mpage_da_map_blocks. We don't | ||
2522 | * update b_size because if we find an | ||
2523 | * unmapped buffer_head later we need to | ||
2524 | * use the b_state flag of that buffer_head. | ||
2525 | */ | ||
2526 | if (mpd->b_size == 0) | ||
2527 | mpd->b_state = bh->b_state & BH_FLAGS; | ||
2528 | } | ||
2529 | logical++; | ||
2530 | } while ((bh = bh->b_this_page) != head); | ||
2531 | } | ||
2532 | |||
2533 | return 0; | ||
2534 | } | ||
2535 | |||
2536 | /* | ||
2537 | * This is a special get_blocks_t callback which is used by | 2442 | * This is a special get_blocks_t callback which is used by |
2538 | * ext4_da_write_begin(). It will either return mapped block or | 2443 | * ext4_da_write_begin(). It will either return mapped block or |
2539 | * reserve space for a single block. | 2444 | * reserve space for a single block. |
@@ -2684,7 +2589,7 @@ static void ext4_end_io_buffer_write(struct buffer_head *bh, int uptodate); | |||
2684 | * because we should have holes filled from ext4_page_mkwrite(). We even don't | 2589 | * because we should have holes filled from ext4_page_mkwrite(). We even don't |
2685 | * need to file the inode to the transaction's list in ordered mode because if | 2590 | * need to file the inode to the transaction's list in ordered mode because if |
2686 | * we are writing back data added by write(), the inode is already there and if | 2591 | * we are writing back data added by write(), the inode is already there and if |
2687 | * we are writing back data modified via mmap(), noone guarantees in which | 2592 | * we are writing back data modified via mmap(), no one guarantees in which |
2688 | * transaction the data will hit the disk. In case we are journaling data, we | 2593 | * transaction the data will hit the disk. In case we are journaling data, we |
2689 | * cannot start transaction directly because transaction start ranks above page | 2594 | * cannot start transaction directly because transaction start ranks above page |
2690 | * lock so we have to do some magic. | 2595 | * lock so we have to do some magic. |
@@ -2786,7 +2691,7 @@ static int ext4_writepage(struct page *page, | |||
2786 | 2691 | ||
2787 | /* | 2692 | /* |
2788 | * This is called via ext4_da_writepages() to | 2693 | * This is called via ext4_da_writepages() to |
2789 | * calulate the total number of credits to reserve to fit | 2694 | * calculate the total number of credits to reserve to fit |
2790 | * a single extent allocation into a single transaction, | 2695 | * a single extent allocation into a single transaction, |
2791 | * ext4_da_writpeages() will loop calling this before | 2696 | * ext4_da_writpeages() will loop calling this before |
2792 | * the block allocation. | 2697 | * the block allocation. |
@@ -2811,27 +2716,27 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode) | |||
2811 | 2716 | ||
2812 | /* | 2717 | /* |
2813 | * write_cache_pages_da - walk the list of dirty pages of the given | 2718 | * write_cache_pages_da - walk the list of dirty pages of the given |
2814 | * address space and call the callback function (which usually writes | 2719 | * address space and accumulate pages that need writing, and call |
2815 | * the pages). | 2720 | * mpage_da_map_and_submit to map a single contiguous memory region |
2816 | * | 2721 | * and then write them. |
2817 | * This is a forked version of write_cache_pages(). Differences: | ||
2818 | * Range cyclic is ignored. | ||
2819 | * no_nrwrite_index_update is always presumed true | ||
2820 | */ | 2722 | */ |
2821 | static int write_cache_pages_da(struct address_space *mapping, | 2723 | static int write_cache_pages_da(struct address_space *mapping, |
2822 | struct writeback_control *wbc, | 2724 | struct writeback_control *wbc, |
2823 | struct mpage_da_data *mpd, | 2725 | struct mpage_da_data *mpd, |
2824 | pgoff_t *done_index) | 2726 | pgoff_t *done_index) |
2825 | { | 2727 | { |
2826 | int ret = 0; | 2728 | struct buffer_head *bh, *head; |
2827 | int done = 0; | 2729 | struct inode *inode = mapping->host; |
2828 | struct pagevec pvec; | 2730 | struct pagevec pvec; |
2829 | unsigned nr_pages; | 2731 | unsigned int nr_pages; |
2830 | pgoff_t index; | 2732 | sector_t logical; |
2831 | pgoff_t end; /* Inclusive */ | 2733 | pgoff_t index, end; |
2832 | long nr_to_write = wbc->nr_to_write; | 2734 | long nr_to_write = wbc->nr_to_write; |
2833 | int tag; | 2735 | int i, tag, ret = 0; |
2834 | 2736 | ||
2737 | memset(mpd, 0, sizeof(struct mpage_da_data)); | ||
2738 | mpd->wbc = wbc; | ||
2739 | mpd->inode = inode; | ||
2835 | pagevec_init(&pvec, 0); | 2740 | pagevec_init(&pvec, 0); |
2836 | index = wbc->range_start >> PAGE_CACHE_SHIFT; | 2741 | index = wbc->range_start >> PAGE_CACHE_SHIFT; |
2837 | end = wbc->range_end >> PAGE_CACHE_SHIFT; | 2742 | end = wbc->range_end >> PAGE_CACHE_SHIFT; |
@@ -2842,13 +2747,11 @@ static int write_cache_pages_da(struct address_space *mapping, | |||
2842 | tag = PAGECACHE_TAG_DIRTY; | 2747 | tag = PAGECACHE_TAG_DIRTY; |
2843 | 2748 | ||
2844 | *done_index = index; | 2749 | *done_index = index; |
2845 | while (!done && (index <= end)) { | 2750 | while (index <= end) { |
2846 | int i; | ||
2847 | |||
2848 | nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, | 2751 | nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag, |
2849 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); | 2752 | min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1); |
2850 | if (nr_pages == 0) | 2753 | if (nr_pages == 0) |
2851 | break; | 2754 | return 0; |
2852 | 2755 | ||
2853 | for (i = 0; i < nr_pages; i++) { | 2756 | for (i = 0; i < nr_pages; i++) { |
2854 | struct page *page = pvec.pages[i]; | 2757 | struct page *page = pvec.pages[i]; |
@@ -2860,60 +2763,100 @@ static int write_cache_pages_da(struct address_space *mapping, | |||
2860 | * mapping. However, page->index will not change | 2763 | * mapping. However, page->index will not change |
2861 | * because we have a reference on the page. | 2764 | * because we have a reference on the page. |
2862 | */ | 2765 | */ |
2863 | if (page->index > end) { | 2766 | if (page->index > end) |
2864 | done = 1; | 2767 | goto out; |
2865 | break; | ||
2866 | } | ||
2867 | 2768 | ||
2868 | *done_index = page->index + 1; | 2769 | *done_index = page->index + 1; |
2869 | 2770 | ||
2771 | /* | ||
2772 | * If we can't merge this page, and we have | ||
2773 | * accumulated an contiguous region, write it | ||
2774 | */ | ||
2775 | if ((mpd->next_page != page->index) && | ||
2776 | (mpd->next_page != mpd->first_page)) { | ||
2777 | mpage_da_map_and_submit(mpd); | ||
2778 | goto ret_extent_tail; | ||
2779 | } | ||
2780 | |||
2870 | lock_page(page); | 2781 | lock_page(page); |
2871 | 2782 | ||
2872 | /* | 2783 | /* |
2873 | * Page truncated or invalidated. We can freely skip it | 2784 | * If the page is no longer dirty, or its |
2874 | * then, even for data integrity operations: the page | 2785 | * mapping no longer corresponds to inode we |
2875 | * has disappeared concurrently, so there could be no | 2786 | * are writing (which means it has been |
2876 | * real expectation of this data interity operation | 2787 | * truncated or invalidated), or the page is |
2877 | * even if there is now a new, dirty page at the same | 2788 | * already under writeback and we are not |
2878 | * pagecache address. | 2789 | * doing a data integrity writeback, skip the page |
2879 | */ | 2790 | */ |
2880 | if (unlikely(page->mapping != mapping)) { | 2791 | if (!PageDirty(page) || |
2881 | continue_unlock: | 2792 | (PageWriteback(page) && |
2793 | (wbc->sync_mode == WB_SYNC_NONE)) || | ||
2794 | unlikely(page->mapping != mapping)) { | ||
2882 | unlock_page(page); | 2795 | unlock_page(page); |
2883 | continue; | 2796 | continue; |
2884 | } | 2797 | } |
2885 | 2798 | ||
2886 | if (!PageDirty(page)) { | 2799 | if (PageWriteback(page)) |
2887 | /* someone wrote it for us */ | 2800 | wait_on_page_writeback(page); |
2888 | goto continue_unlock; | ||
2889 | } | ||
2890 | |||
2891 | if (PageWriteback(page)) { | ||
2892 | if (wbc->sync_mode != WB_SYNC_NONE) | ||
2893 | wait_on_page_writeback(page); | ||
2894 | else | ||
2895 | goto continue_unlock; | ||
2896 | } | ||
2897 | 2801 | ||
2898 | BUG_ON(PageWriteback(page)); | 2802 | BUG_ON(PageWriteback(page)); |
2899 | if (!clear_page_dirty_for_io(page)) | ||
2900 | goto continue_unlock; | ||
2901 | 2803 | ||
2902 | ret = __mpage_da_writepage(page, wbc, mpd); | 2804 | if (mpd->next_page != page->index) |
2903 | if (unlikely(ret)) { | 2805 | mpd->first_page = page->index; |
2904 | if (ret == AOP_WRITEPAGE_ACTIVATE) { | 2806 | mpd->next_page = page->index + 1; |
2905 | unlock_page(page); | 2807 | logical = (sector_t) page->index << |
2906 | ret = 0; | 2808 | (PAGE_CACHE_SHIFT - inode->i_blkbits); |
2907 | } else { | 2809 | |
2908 | done = 1; | 2810 | if (!page_has_buffers(page)) { |
2909 | break; | 2811 | mpage_add_bh_to_extent(mpd, logical, |
2910 | } | 2812 | PAGE_CACHE_SIZE, |
2813 | (1 << BH_Dirty) | (1 << BH_Uptodate)); | ||
2814 | if (mpd->io_done) | ||
2815 | goto ret_extent_tail; | ||
2816 | } else { | ||
2817 | /* | ||
2818 | * Page with regular buffer heads, | ||
2819 | * just add all dirty ones | ||
2820 | */ | ||
2821 | head = page_buffers(page); | ||
2822 | bh = head; | ||
2823 | do { | ||
2824 | BUG_ON(buffer_locked(bh)); | ||
2825 | /* | ||
2826 | * We need to try to allocate | ||
2827 | * unmapped blocks in the same page. | ||
2828 | * Otherwise we won't make progress | ||
2829 | * with the page in ext4_writepage | ||
2830 | */ | ||
2831 | if (ext4_bh_delay_or_unwritten(NULL, bh)) { | ||
2832 | mpage_add_bh_to_extent(mpd, logical, | ||
2833 | bh->b_size, | ||
2834 | bh->b_state); | ||
2835 | if (mpd->io_done) | ||
2836 | goto ret_extent_tail; | ||
2837 | } else if (buffer_dirty(bh) && (buffer_mapped(bh))) { | ||
2838 | /* | ||
2839 | * mapped dirty buffer. We need | ||
2840 | * to update the b_state | ||
2841 | * because we look at b_state | ||
2842 | * in mpage_da_map_blocks. We | ||
2843 | * don't update b_size because | ||
2844 | * if we find an unmapped | ||
2845 | * buffer_head later we need to | ||
2846 | * use the b_state flag of that | ||
2847 | * buffer_head. | ||
2848 | */ | ||
2849 | if (mpd->b_size == 0) | ||
2850 | mpd->b_state = bh->b_state & BH_FLAGS; | ||
2851 | } | ||
2852 | logical++; | ||
2853 | } while ((bh = bh->b_this_page) != head); | ||
2911 | } | 2854 | } |
2912 | 2855 | ||
2913 | if (nr_to_write > 0) { | 2856 | if (nr_to_write > 0) { |
2914 | nr_to_write--; | 2857 | nr_to_write--; |
2915 | if (nr_to_write == 0 && | 2858 | if (nr_to_write == 0 && |
2916 | wbc->sync_mode == WB_SYNC_NONE) { | 2859 | wbc->sync_mode == WB_SYNC_NONE) |
2917 | /* | 2860 | /* |
2918 | * We stop writing back only if we are | 2861 | * We stop writing back only if we are |
2919 | * not doing integrity sync. In case of | 2862 | * not doing integrity sync. In case of |
@@ -2924,14 +2867,18 @@ continue_unlock: | |||
2924 | * pages, but have not synced all of the | 2867 | * pages, but have not synced all of the |
2925 | * old dirty pages. | 2868 | * old dirty pages. |
2926 | */ | 2869 | */ |
2927 | done = 1; | 2870 | goto out; |
2928 | break; | ||
2929 | } | ||
2930 | } | 2871 | } |
2931 | } | 2872 | } |
2932 | pagevec_release(&pvec); | 2873 | pagevec_release(&pvec); |
2933 | cond_resched(); | 2874 | cond_resched(); |
2934 | } | 2875 | } |
2876 | return 0; | ||
2877 | ret_extent_tail: | ||
2878 | ret = MPAGE_DA_EXTENT_TAIL; | ||
2879 | out: | ||
2880 | pagevec_release(&pvec); | ||
2881 | cond_resched(); | ||
2935 | return ret; | 2882 | return ret; |
2936 | } | 2883 | } |
2937 | 2884 | ||
@@ -2945,7 +2892,6 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
2945 | struct mpage_da_data mpd; | 2892 | struct mpage_da_data mpd; |
2946 | struct inode *inode = mapping->host; | 2893 | struct inode *inode = mapping->host; |
2947 | int pages_written = 0; | 2894 | int pages_written = 0; |
2948 | long pages_skipped; | ||
2949 | unsigned int max_pages; | 2895 | unsigned int max_pages; |
2950 | int range_cyclic, cycled = 1, io_done = 0; | 2896 | int range_cyclic, cycled = 1, io_done = 0; |
2951 | int needed_blocks, ret = 0; | 2897 | int needed_blocks, ret = 0; |
@@ -3028,11 +2974,6 @@ static int ext4_da_writepages(struct address_space *mapping, | |||
3028 | wbc->nr_to_write = desired_nr_to_write; | 2974 | wbc->nr_to_write = desired_nr_to_write; |
3029 | } | 2975 | } |
3030 | 2976 | ||
3031 | mpd.wbc = wbc; | ||
3032 | mpd.inode = mapping->host; | ||
3033 | |||
3034 | pages_skipped = wbc->pages_skipped; | ||
3035 | |||
3036 | retry: | 2977 | retry: |
3037 | if (wbc->sync_mode == WB_SYNC_ALL) | 2978 | if (wbc->sync_mode == WB_SYNC_ALL) |
3038 | tag_pages_for_writeback(mapping, index, end); | 2979 | tag_pages_for_writeback(mapping, index, end); |
@@ -3059,22 +3000,10 @@ retry: | |||
3059 | } | 3000 | } |
3060 | 3001 | ||
3061 | /* | 3002 | /* |
3062 | * Now call __mpage_da_writepage to find the next | 3003 | * Now call write_cache_pages_da() to find the next |
3063 | * contiguous region of logical blocks that need | 3004 | * contiguous region of logical blocks that need |
3064 | * blocks to be allocated by ext4. We don't actually | 3005 | * blocks to be allocated by ext4 and submit them. |
3065 | * submit the blocks for I/O here, even though | ||
3066 | * write_cache_pages thinks it will, and will set the | ||
3067 | * pages as clean for write before calling | ||
3068 | * __mpage_da_writepage(). | ||
3069 | */ | 3006 | */ |
3070 | mpd.b_size = 0; | ||
3071 | mpd.b_state = 0; | ||
3072 | mpd.b_blocknr = 0; | ||
3073 | mpd.first_page = 0; | ||
3074 | mpd.next_page = 0; | ||
3075 | mpd.io_done = 0; | ||
3076 | mpd.pages_written = 0; | ||
3077 | mpd.retval = 0; | ||
3078 | ret = write_cache_pages_da(mapping, wbc, &mpd, &done_index); | 3007 | ret = write_cache_pages_da(mapping, wbc, &mpd, &done_index); |
3079 | /* | 3008 | /* |
3080 | * If we have a contiguous extent of pages and we | 3009 | * If we have a contiguous extent of pages and we |
@@ -3096,7 +3025,6 @@ retry: | |||
3096 | * and try again | 3025 | * and try again |
3097 | */ | 3026 | */ |
3098 | jbd2_journal_force_commit_nested(sbi->s_journal); | 3027 | jbd2_journal_force_commit_nested(sbi->s_journal); |
3099 | wbc->pages_skipped = pages_skipped; | ||
3100 | ret = 0; | 3028 | ret = 0; |
3101 | } else if (ret == MPAGE_DA_EXTENT_TAIL) { | 3029 | } else if (ret == MPAGE_DA_EXTENT_TAIL) { |
3102 | /* | 3030 | /* |
@@ -3104,7 +3032,6 @@ retry: | |||
3104 | * rest of the pages | 3032 | * rest of the pages |
3105 | */ | 3033 | */ |
3106 | pages_written += mpd.pages_written; | 3034 | pages_written += mpd.pages_written; |
3107 | wbc->pages_skipped = pages_skipped; | ||
3108 | ret = 0; | 3035 | ret = 0; |
3109 | io_done = 1; | 3036 | io_done = 1; |
3110 | } else if (wbc->nr_to_write) | 3037 | } else if (wbc->nr_to_write) |
@@ -3122,11 +3049,6 @@ retry: | |||
3122 | wbc->range_end = mapping->writeback_index - 1; | 3049 | wbc->range_end = mapping->writeback_index - 1; |
3123 | goto retry; | 3050 | goto retry; |
3124 | } | 3051 | } |
3125 | if (pages_skipped != wbc->pages_skipped) | ||
3126 | ext4_msg(inode->i_sb, KERN_CRIT, | ||
3127 | "This should not happen leaving %s " | ||
3128 | "with nr_to_write = %ld ret = %d", | ||
3129 | __func__, wbc->nr_to_write, ret); | ||
3130 | 3052 | ||
3131 | /* Update index */ | 3053 | /* Update index */ |
3132 | wbc->range_cyclic = range_cyclic; | 3054 | wbc->range_cyclic = range_cyclic; |
@@ -3383,7 +3305,7 @@ int ext4_alloc_da_blocks(struct inode *inode) | |||
3383 | * the pages by calling redirty_page_for_writepage() but that | 3305 | * the pages by calling redirty_page_for_writepage() but that |
3384 | * would be ugly in the extreme. So instead we would need to | 3306 | * would be ugly in the extreme. So instead we would need to |
3385 | * replicate parts of the code in the above functions, | 3307 | * replicate parts of the code in the above functions, |
3386 | * simplifying them becuase we wouldn't actually intend to | 3308 | * simplifying them because we wouldn't actually intend to |
3387 | * write out the pages, but rather only collect contiguous | 3309 | * write out the pages, but rather only collect contiguous |
3388 | * logical block extents, call the multi-block allocator, and | 3310 | * logical block extents, call the multi-block allocator, and |
3389 | * then update the buffer heads with the block allocations. | 3311 | * then update the buffer heads with the block allocations. |
@@ -3460,6 +3382,7 @@ static sector_t ext4_bmap(struct address_space *mapping, sector_t block) | |||
3460 | 3382 | ||
3461 | static int ext4_readpage(struct file *file, struct page *page) | 3383 | static int ext4_readpage(struct file *file, struct page *page) |
3462 | { | 3384 | { |
3385 | trace_ext4_readpage(page); | ||
3463 | return mpage_readpage(page, ext4_get_block); | 3386 | return mpage_readpage(page, ext4_get_block); |
3464 | } | 3387 | } |
3465 | 3388 | ||
@@ -3494,6 +3417,8 @@ static void ext4_invalidatepage(struct page *page, unsigned long offset) | |||
3494 | { | 3417 | { |
3495 | journal_t *journal = EXT4_JOURNAL(page->mapping->host); | 3418 | journal_t *journal = EXT4_JOURNAL(page->mapping->host); |
3496 | 3419 | ||
3420 | trace_ext4_invalidatepage(page, offset); | ||
3421 | |||
3497 | /* | 3422 | /* |
3498 | * free any io_end structure allocated for buffers to be discarded | 3423 | * free any io_end structure allocated for buffers to be discarded |
3499 | */ | 3424 | */ |
@@ -3515,6 +3440,8 @@ static int ext4_releasepage(struct page *page, gfp_t wait) | |||
3515 | { | 3440 | { |
3516 | journal_t *journal = EXT4_JOURNAL(page->mapping->host); | 3441 | journal_t *journal = EXT4_JOURNAL(page->mapping->host); |
3517 | 3442 | ||
3443 | trace_ext4_releasepage(page); | ||
3444 | |||
3518 | WARN_ON(PageChecked(page)); | 3445 | WARN_ON(PageChecked(page)); |
3519 | if (!page_has_buffers(page)) | 3446 | if (!page_has_buffers(page)) |
3520 | return 0; | 3447 | return 0; |
@@ -3768,7 +3695,7 @@ retry: | |||
3768 | * | 3695 | * |
3769 | * The unwrritten extents will be converted to written when DIO is completed. | 3696 | * The unwrritten extents will be converted to written when DIO is completed. |
3770 | * For async direct IO, since the IO may still pending when return, we | 3697 | * For async direct IO, since the IO may still pending when return, we |
3771 | * set up an end_io call back function, which will do the convertion | 3698 | * set up an end_io call back function, which will do the conversion |
3772 | * when async direct IO completed. | 3699 | * when async direct IO completed. |
3773 | * | 3700 | * |
3774 | * If the O_DIRECT write will extend the file then add this inode to the | 3701 | * If the O_DIRECT write will extend the file then add this inode to the |
@@ -3791,7 +3718,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
3791 | * We could direct write to holes and fallocate. | 3718 | * We could direct write to holes and fallocate. |
3792 | * | 3719 | * |
3793 | * Allocated blocks to fill the hole are marked as uninitialized | 3720 | * Allocated blocks to fill the hole are marked as uninitialized |
3794 | * to prevent paralel buffered read to expose the stale data | 3721 | * to prevent parallel buffered read to expose the stale data |
3795 | * before DIO complete the data IO. | 3722 | * before DIO complete the data IO. |
3796 | * | 3723 | * |
3797 | * As to previously fallocated extents, ext4 get_block | 3724 | * As to previously fallocated extents, ext4 get_block |
@@ -3852,7 +3779,7 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb, | |||
3852 | int err; | 3779 | int err; |
3853 | /* | 3780 | /* |
3854 | * for non AIO case, since the IO is already | 3781 | * for non AIO case, since the IO is already |
3855 | * completed, we could do the convertion right here | 3782 | * completed, we could do the conversion right here |
3856 | */ | 3783 | */ |
3857 | err = ext4_convert_unwritten_extents(inode, | 3784 | err = ext4_convert_unwritten_extents(inode, |
3858 | offset, ret); | 3785 | offset, ret); |
@@ -3873,11 +3800,16 @@ static ssize_t ext4_direct_IO(int rw, struct kiocb *iocb, | |||
3873 | { | 3800 | { |
3874 | struct file *file = iocb->ki_filp; | 3801 | struct file *file = iocb->ki_filp; |
3875 | struct inode *inode = file->f_mapping->host; | 3802 | struct inode *inode = file->f_mapping->host; |
3803 | ssize_t ret; | ||
3876 | 3804 | ||
3805 | trace_ext4_direct_IO_enter(inode, offset, iov_length(iov, nr_segs), rw); | ||
3877 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) | 3806 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) |
3878 | return ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs); | 3807 | ret = ext4_ext_direct_IO(rw, iocb, iov, offset, nr_segs); |
3879 | 3808 | else | |
3880 | return ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs); | 3809 | ret = ext4_ind_direct_IO(rw, iocb, iov, offset, nr_segs); |
3810 | trace_ext4_direct_IO_exit(inode, offset, | ||
3811 | iov_length(iov, nr_segs), rw, ret); | ||
3812 | return ret; | ||
3881 | } | 3813 | } |
3882 | 3814 | ||
3883 | /* | 3815 | /* |
@@ -3903,7 +3835,6 @@ static const struct address_space_operations ext4_ordered_aops = { | |||
3903 | .readpage = ext4_readpage, | 3835 | .readpage = ext4_readpage, |
3904 | .readpages = ext4_readpages, | 3836 | .readpages = ext4_readpages, |
3905 | .writepage = ext4_writepage, | 3837 | .writepage = ext4_writepage, |
3906 | .sync_page = block_sync_page, | ||
3907 | .write_begin = ext4_write_begin, | 3838 | .write_begin = ext4_write_begin, |
3908 | .write_end = ext4_ordered_write_end, | 3839 | .write_end = ext4_ordered_write_end, |
3909 | .bmap = ext4_bmap, | 3840 | .bmap = ext4_bmap, |
@@ -3919,7 +3850,6 @@ static const struct address_space_operations ext4_writeback_aops = { | |||
3919 | .readpage = ext4_readpage, | 3850 | .readpage = ext4_readpage, |
3920 | .readpages = ext4_readpages, | 3851 | .readpages = ext4_readpages, |
3921 | .writepage = ext4_writepage, | 3852 | .writepage = ext4_writepage, |
3922 | .sync_page = block_sync_page, | ||
3923 | .write_begin = ext4_write_begin, | 3853 | .write_begin = ext4_write_begin, |
3924 | .write_end = ext4_writeback_write_end, | 3854 | .write_end = ext4_writeback_write_end, |
3925 | .bmap = ext4_bmap, | 3855 | .bmap = ext4_bmap, |
@@ -3935,7 +3865,6 @@ static const struct address_space_operations ext4_journalled_aops = { | |||
3935 | .readpage = ext4_readpage, | 3865 | .readpage = ext4_readpage, |
3936 | .readpages = ext4_readpages, | 3866 | .readpages = ext4_readpages, |
3937 | .writepage = ext4_writepage, | 3867 | .writepage = ext4_writepage, |
3938 | .sync_page = block_sync_page, | ||
3939 | .write_begin = ext4_write_begin, | 3868 | .write_begin = ext4_write_begin, |
3940 | .write_end = ext4_journalled_write_end, | 3869 | .write_end = ext4_journalled_write_end, |
3941 | .set_page_dirty = ext4_journalled_set_page_dirty, | 3870 | .set_page_dirty = ext4_journalled_set_page_dirty, |
@@ -3951,7 +3880,6 @@ static const struct address_space_operations ext4_da_aops = { | |||
3951 | .readpages = ext4_readpages, | 3880 | .readpages = ext4_readpages, |
3952 | .writepage = ext4_writepage, | 3881 | .writepage = ext4_writepage, |
3953 | .writepages = ext4_da_writepages, | 3882 | .writepages = ext4_da_writepages, |
3954 | .sync_page = block_sync_page, | ||
3955 | .write_begin = ext4_da_write_begin, | 3883 | .write_begin = ext4_da_write_begin, |
3956 | .write_end = ext4_da_write_end, | 3884 | .write_end = ext4_da_write_end, |
3957 | .bmap = ext4_bmap, | 3885 | .bmap = ext4_bmap, |
@@ -4098,7 +4026,7 @@ static inline int all_zeroes(__le32 *p, __le32 *q) | |||
4098 | * | 4026 | * |
4099 | * When we do truncate() we may have to clean the ends of several | 4027 | * When we do truncate() we may have to clean the ends of several |
4100 | * indirect blocks but leave the blocks themselves alive. Block is | 4028 | * indirect blocks but leave the blocks themselves alive. Block is |
4101 | * partially truncated if some data below the new i_size is refered | 4029 | * partially truncated if some data below the new i_size is referred |
4102 | * from it (and it is on the path to the first completely truncated | 4030 | * from it (and it is on the path to the first completely truncated |
4103 | * data block, indeed). We have to free the top of that path along | 4031 | * data block, indeed). We have to free the top of that path along |
4104 | * with everything to the right of the path. Since no allocation | 4032 | * with everything to the right of the path. Since no allocation |
@@ -4177,6 +4105,9 @@ no_top: | |||
4177 | * | 4105 | * |
4178 | * We release `count' blocks on disk, but (last - first) may be greater | 4106 | * We release `count' blocks on disk, but (last - first) may be greater |
4179 | * than `count' because there can be holes in there. | 4107 | * than `count' because there can be holes in there. |
4108 | * | ||
4109 | * Return 0 on success, 1 on invalid block range | ||
4110 | * and < 0 on fatal error. | ||
4180 | */ | 4111 | */ |
4181 | static int ext4_clear_blocks(handle_t *handle, struct inode *inode, | 4112 | static int ext4_clear_blocks(handle_t *handle, struct inode *inode, |
4182 | struct buffer_head *bh, | 4113 | struct buffer_head *bh, |
@@ -4203,33 +4134,32 @@ static int ext4_clear_blocks(handle_t *handle, struct inode *inode, | |||
4203 | if (bh) { | 4134 | if (bh) { |
4204 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); | 4135 | BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); |
4205 | err = ext4_handle_dirty_metadata(handle, inode, bh); | 4136 | err = ext4_handle_dirty_metadata(handle, inode, bh); |
4206 | if (unlikely(err)) { | 4137 | if (unlikely(err)) |
4207 | ext4_std_error(inode->i_sb, err); | 4138 | goto out_err; |
4208 | return 1; | ||
4209 | } | ||
4210 | } | 4139 | } |
4211 | err = ext4_mark_inode_dirty(handle, inode); | 4140 | err = ext4_mark_inode_dirty(handle, inode); |
4212 | if (unlikely(err)) { | 4141 | if (unlikely(err)) |
4213 | ext4_std_error(inode->i_sb, err); | 4142 | goto out_err; |
4214 | return 1; | ||
4215 | } | ||
4216 | err = ext4_truncate_restart_trans(handle, inode, | 4143 | err = ext4_truncate_restart_trans(handle, inode, |
4217 | blocks_for_truncate(inode)); | 4144 | blocks_for_truncate(inode)); |
4218 | if (unlikely(err)) { | 4145 | if (unlikely(err)) |
4219 | ext4_std_error(inode->i_sb, err); | 4146 | goto out_err; |
4220 | return 1; | ||
4221 | } | ||
4222 | if (bh) { | 4147 | if (bh) { |
4223 | BUFFER_TRACE(bh, "retaking write access"); | 4148 | BUFFER_TRACE(bh, "retaking write access"); |
4224 | ext4_journal_get_write_access(handle, bh); | 4149 | err = ext4_journal_get_write_access(handle, bh); |
4150 | if (unlikely(err)) | ||
4151 | goto out_err; | ||
4225 | } | 4152 | } |
4226 | } | 4153 | } |
4227 | 4154 | ||
4228 | for (p = first; p < last; p++) | 4155 | for (p = first; p < last; p++) |
4229 | *p = 0; | 4156 | *p = 0; |
4230 | 4157 | ||
4231 | ext4_free_blocks(handle, inode, 0, block_to_free, count, flags); | 4158 | ext4_free_blocks(handle, inode, NULL, block_to_free, count, flags); |
4232 | return 0; | 4159 | return 0; |
4160 | out_err: | ||
4161 | ext4_std_error(inode->i_sb, err); | ||
4162 | return err; | ||
4233 | } | 4163 | } |
4234 | 4164 | ||
4235 | /** | 4165 | /** |
@@ -4240,7 +4170,7 @@ static int ext4_clear_blocks(handle_t *handle, struct inode *inode, | |||
4240 | * @first: array of block numbers | 4170 | * @first: array of block numbers |
4241 | * @last: points immediately past the end of array | 4171 | * @last: points immediately past the end of array |
4242 | * | 4172 | * |
4243 | * We are freeing all blocks refered from that array (numbers are stored as | 4173 | * We are freeing all blocks referred from that array (numbers are stored as |
4244 | * little-endian 32-bit) and updating @inode->i_blocks appropriately. | 4174 | * little-endian 32-bit) and updating @inode->i_blocks appropriately. |
4245 | * | 4175 | * |
4246 | * We accumulate contiguous runs of blocks to free. Conveniently, if these | 4176 | * We accumulate contiguous runs of blocks to free. Conveniently, if these |
@@ -4263,7 +4193,7 @@ static void ext4_free_data(handle_t *handle, struct inode *inode, | |||
4263 | ext4_fsblk_t nr; /* Current block # */ | 4193 | ext4_fsblk_t nr; /* Current block # */ |
4264 | __le32 *p; /* Pointer into inode/ind | 4194 | __le32 *p; /* Pointer into inode/ind |
4265 | for current block */ | 4195 | for current block */ |
4266 | int err; | 4196 | int err = 0; |
4267 | 4197 | ||
4268 | if (this_bh) { /* For indirect block */ | 4198 | if (this_bh) { /* For indirect block */ |
4269 | BUFFER_TRACE(this_bh, "get_write_access"); | 4199 | BUFFER_TRACE(this_bh, "get_write_access"); |
@@ -4285,9 +4215,10 @@ static void ext4_free_data(handle_t *handle, struct inode *inode, | |||
4285 | } else if (nr == block_to_free + count) { | 4215 | } else if (nr == block_to_free + count) { |
4286 | count++; | 4216 | count++; |
4287 | } else { | 4217 | } else { |
4288 | if (ext4_clear_blocks(handle, inode, this_bh, | 4218 | err = ext4_clear_blocks(handle, inode, this_bh, |
4289 | block_to_free, count, | 4219 | block_to_free, count, |
4290 | block_to_free_p, p)) | 4220 | block_to_free_p, p); |
4221 | if (err) | ||
4291 | break; | 4222 | break; |
4292 | block_to_free = nr; | 4223 | block_to_free = nr; |
4293 | block_to_free_p = p; | 4224 | block_to_free_p = p; |
@@ -4296,9 +4227,12 @@ static void ext4_free_data(handle_t *handle, struct inode *inode, | |||
4296 | } | 4227 | } |
4297 | } | 4228 | } |
4298 | 4229 | ||
4299 | if (count > 0) | 4230 | if (!err && count > 0) |
4300 | ext4_clear_blocks(handle, inode, this_bh, block_to_free, | 4231 | err = ext4_clear_blocks(handle, inode, this_bh, block_to_free, |
4301 | count, block_to_free_p, p); | 4232 | count, block_to_free_p, p); |
4233 | if (err < 0) | ||
4234 | /* fatal error */ | ||
4235 | return; | ||
4302 | 4236 | ||
4303 | if (this_bh) { | 4237 | if (this_bh) { |
4304 | BUFFER_TRACE(this_bh, "call ext4_handle_dirty_metadata"); | 4238 | BUFFER_TRACE(this_bh, "call ext4_handle_dirty_metadata"); |
@@ -4328,7 +4262,7 @@ static void ext4_free_data(handle_t *handle, struct inode *inode, | |||
4328 | * @last: pointer immediately past the end of array | 4262 | * @last: pointer immediately past the end of array |
4329 | * @depth: depth of the branches to free | 4263 | * @depth: depth of the branches to free |
4330 | * | 4264 | * |
4331 | * We are freeing all blocks refered from these branches (numbers are | 4265 | * We are freeing all blocks referred from these branches (numbers are |
4332 | * stored as little-endian 32-bit) and updating @inode->i_blocks | 4266 | * stored as little-endian 32-bit) and updating @inode->i_blocks |
4333 | * appropriately. | 4267 | * appropriately. |
4334 | */ | 4268 | */ |
@@ -4416,7 +4350,7 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode, | |||
4416 | * transaction where the data blocks are | 4350 | * transaction where the data blocks are |
4417 | * actually freed. | 4351 | * actually freed. |
4418 | */ | 4352 | */ |
4419 | ext4_free_blocks(handle, inode, 0, nr, 1, | 4353 | ext4_free_blocks(handle, inode, NULL, nr, 1, |
4420 | EXT4_FREE_BLOCKS_METADATA| | 4354 | EXT4_FREE_BLOCKS_METADATA| |
4421 | EXT4_FREE_BLOCKS_FORGET); | 4355 | EXT4_FREE_BLOCKS_FORGET); |
4422 | 4356 | ||
@@ -4496,10 +4430,12 @@ void ext4_truncate(struct inode *inode) | |||
4496 | Indirect chain[4]; | 4430 | Indirect chain[4]; |
4497 | Indirect *partial; | 4431 | Indirect *partial; |
4498 | __le32 nr = 0; | 4432 | __le32 nr = 0; |
4499 | int n; | 4433 | int n = 0; |
4500 | ext4_lblk_t last_block; | 4434 | ext4_lblk_t last_block, max_block; |
4501 | unsigned blocksize = inode->i_sb->s_blocksize; | 4435 | unsigned blocksize = inode->i_sb->s_blocksize; |
4502 | 4436 | ||
4437 | trace_ext4_truncate_enter(inode); | ||
4438 | |||
4503 | if (!ext4_can_truncate(inode)) | 4439 | if (!ext4_can_truncate(inode)) |
4504 | return; | 4440 | return; |
4505 | 4441 | ||
@@ -4510,6 +4446,7 @@ void ext4_truncate(struct inode *inode) | |||
4510 | 4446 | ||
4511 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { | 4447 | if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { |
4512 | ext4_ext_truncate(inode); | 4448 | ext4_ext_truncate(inode); |
4449 | trace_ext4_truncate_exit(inode); | ||
4513 | return; | 4450 | return; |
4514 | } | 4451 | } |
4515 | 4452 | ||
@@ -4519,14 +4456,18 @@ void ext4_truncate(struct inode *inode) | |||
4519 | 4456 | ||
4520 | last_block = (inode->i_size + blocksize-1) | 4457 | last_block = (inode->i_size + blocksize-1) |
4521 | >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); | 4458 | >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); |
4459 | max_block = (EXT4_SB(inode->i_sb)->s_bitmap_maxbytes + blocksize-1) | ||
4460 | >> EXT4_BLOCK_SIZE_BITS(inode->i_sb); | ||
4522 | 4461 | ||
4523 | if (inode->i_size & (blocksize - 1)) | 4462 | if (inode->i_size & (blocksize - 1)) |
4524 | if (ext4_block_truncate_page(handle, mapping, inode->i_size)) | 4463 | if (ext4_block_truncate_page(handle, mapping, inode->i_size)) |
4525 | goto out_stop; | 4464 | goto out_stop; |
4526 | 4465 | ||
4527 | n = ext4_block_to_path(inode, last_block, offsets, NULL); | 4466 | if (last_block != max_block) { |
4528 | if (n == 0) | 4467 | n = ext4_block_to_path(inode, last_block, offsets, NULL); |
4529 | goto out_stop; /* error */ | 4468 | if (n == 0) |
4469 | goto out_stop; /* error */ | ||
4470 | } | ||
4530 | 4471 | ||
4531 | /* | 4472 | /* |
4532 | * OK. This truncate is going to happen. We add the inode to the | 4473 | * OK. This truncate is going to happen. We add the inode to the |
@@ -4557,7 +4498,13 @@ void ext4_truncate(struct inode *inode) | |||
4557 | */ | 4498 | */ |
4558 | ei->i_disksize = inode->i_size; | 4499 | ei->i_disksize = inode->i_size; |
4559 | 4500 | ||
4560 | if (n == 1) { /* direct blocks */ | 4501 | if (last_block == max_block) { |
4502 | /* | ||
4503 | * It is unnecessary to free any data blocks if last_block is | ||
4504 | * equal to the indirect block limit. | ||
4505 | */ | ||
4506 | goto out_unlock; | ||
4507 | } else if (n == 1) { /* direct blocks */ | ||
4561 | ext4_free_data(handle, inode, NULL, i_data+offsets[0], | 4508 | ext4_free_data(handle, inode, NULL, i_data+offsets[0], |
4562 | i_data + EXT4_NDIR_BLOCKS); | 4509 | i_data + EXT4_NDIR_BLOCKS); |
4563 | goto do_indirects; | 4510 | goto do_indirects; |
@@ -4617,6 +4564,7 @@ do_indirects: | |||
4617 | ; | 4564 | ; |
4618 | } | 4565 | } |
4619 | 4566 | ||
4567 | out_unlock: | ||
4620 | up_write(&ei->i_data_sem); | 4568 | up_write(&ei->i_data_sem); |
4621 | inode->i_mtime = inode->i_ctime = ext4_current_time(inode); | 4569 | inode->i_mtime = inode->i_ctime = ext4_current_time(inode); |
4622 | ext4_mark_inode_dirty(handle, inode); | 4570 | ext4_mark_inode_dirty(handle, inode); |
@@ -4639,6 +4587,7 @@ out_stop: | |||
4639 | ext4_orphan_del(handle, inode); | 4587 | ext4_orphan_del(handle, inode); |
4640 | 4588 | ||
4641 | ext4_journal_stop(handle); | 4589 | ext4_journal_stop(handle); |
4590 | trace_ext4_truncate_exit(inode); | ||
4642 | } | 4591 | } |
4643 | 4592 | ||
4644 | /* | 4593 | /* |
@@ -4770,6 +4719,7 @@ make_io: | |||
4770 | * has in-inode xattrs, or we don't have this inode in memory. | 4719 | * has in-inode xattrs, or we don't have this inode in memory. |
4771 | * Read the block from disk. | 4720 | * Read the block from disk. |
4772 | */ | 4721 | */ |
4722 | trace_ext4_load_inode(inode); | ||
4773 | get_bh(bh); | 4723 | get_bh(bh); |
4774 | bh->b_end_io = end_buffer_read_sync; | 4724 | bh->b_end_io = end_buffer_read_sync; |
4775 | submit_bh(READ_META, bh); | 4725 | submit_bh(READ_META, bh); |
@@ -4875,7 +4825,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino) | |||
4875 | return inode; | 4825 | return inode; |
4876 | 4826 | ||
4877 | ei = EXT4_I(inode); | 4827 | ei = EXT4_I(inode); |
4878 | iloc.bh = 0; | 4828 | iloc.bh = NULL; |
4879 | 4829 | ||
4880 | ret = __ext4_get_inode_loc(inode, &iloc, 0); | 4830 | ret = __ext4_get_inode_loc(inode, &iloc, 0); |
4881 | if (ret < 0) | 4831 | if (ret < 0) |
@@ -5460,13 +5410,12 @@ static int ext4_indirect_trans_blocks(struct inode *inode, int nrblocks, | |||
5460 | /* if nrblocks are contiguous */ | 5410 | /* if nrblocks are contiguous */ |
5461 | if (chunk) { | 5411 | if (chunk) { |
5462 | /* | 5412 | /* |
5463 | * With N contiguous data blocks, it need at most | 5413 | * With N contiguous data blocks, we need at most |
5464 | * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) indirect blocks | 5414 | * N/EXT4_ADDR_PER_BLOCK(inode->i_sb) + 1 indirect blocks, |
5465 | * 2 dindirect blocks | 5415 | * 2 dindirect blocks, and 1 tindirect block |
5466 | * 1 tindirect block | ||
5467 | */ | 5416 | */ |
5468 | indirects = nrblocks / EXT4_ADDR_PER_BLOCK(inode->i_sb); | 5417 | return DIV_ROUND_UP(nrblocks, |
5469 | return indirects + 3; | 5418 | EXT4_ADDR_PER_BLOCK(inode->i_sb)) + 4; |
5470 | } | 5419 | } |
5471 | /* | 5420 | /* |
5472 | * if nrblocks are not contiguous, worse case, each block touch | 5421 | * if nrblocks are not contiguous, worse case, each block touch |
@@ -5540,7 +5489,7 @@ static int ext4_meta_trans_blocks(struct inode *inode, int nrblocks, int chunk) | |||
5540 | } | 5489 | } |
5541 | 5490 | ||
5542 | /* | 5491 | /* |
5543 | * Calulate the total number of credits to reserve to fit | 5492 | * Calculate the total number of credits to reserve to fit |
5544 | * the modification of a single pages into a single transaction, | 5493 | * the modification of a single pages into a single transaction, |
5545 | * which may include multiple chunks of block allocations. | 5494 | * which may include multiple chunks of block allocations. |
5546 | * | 5495 | * |
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c index eb3bc2fe647e..808c554e773f 100644 --- a/fs/ext4/ioctl.c +++ b/fs/ext4/ioctl.c | |||
@@ -38,7 +38,7 @@ long ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg) | |||
38 | unsigned int oldflags; | 38 | unsigned int oldflags; |
39 | unsigned int jflag; | 39 | unsigned int jflag; |
40 | 40 | ||
41 | if (!is_owner_or_cap(inode)) | 41 | if (!inode_owner_or_capable(inode)) |
42 | return -EACCES; | 42 | return -EACCES; |
43 | 43 | ||
44 | if (get_user(flags, (int __user *) arg)) | 44 | if (get_user(flags, (int __user *) arg)) |
@@ -146,7 +146,7 @@ flags_out: | |||
146 | __u32 generation; | 146 | __u32 generation; |
147 | int err; | 147 | int err; |
148 | 148 | ||
149 | if (!is_owner_or_cap(inode)) | 149 | if (!inode_owner_or_capable(inode)) |
150 | return -EPERM; | 150 | return -EPERM; |
151 | 151 | ||
152 | err = mnt_want_write(filp->f_path.mnt); | 152 | err = mnt_want_write(filp->f_path.mnt); |
@@ -298,7 +298,7 @@ mext_out: | |||
298 | case EXT4_IOC_MIGRATE: | 298 | case EXT4_IOC_MIGRATE: |
299 | { | 299 | { |
300 | int err; | 300 | int err; |
301 | if (!is_owner_or_cap(inode)) | 301 | if (!inode_owner_or_capable(inode)) |
302 | return -EACCES; | 302 | return -EACCES; |
303 | 303 | ||
304 | err = mnt_want_write(filp->f_path.mnt); | 304 | err = mnt_want_write(filp->f_path.mnt); |
@@ -320,7 +320,7 @@ mext_out: | |||
320 | case EXT4_IOC_ALLOC_DA_BLKS: | 320 | case EXT4_IOC_ALLOC_DA_BLKS: |
321 | { | 321 | { |
322 | int err; | 322 | int err; |
323 | if (!is_owner_or_cap(inode)) | 323 | if (!inode_owner_or_capable(inode)) |
324 | return -EACCES; | 324 | return -EACCES; |
325 | 325 | ||
326 | err = mnt_want_write(filp->f_path.mnt); | 326 | err = mnt_want_write(filp->f_path.mnt); |
@@ -334,16 +334,22 @@ mext_out: | |||
334 | case FITRIM: | 334 | case FITRIM: |
335 | { | 335 | { |
336 | struct super_block *sb = inode->i_sb; | 336 | struct super_block *sb = inode->i_sb; |
337 | struct request_queue *q = bdev_get_queue(sb->s_bdev); | ||
337 | struct fstrim_range range; | 338 | struct fstrim_range range; |
338 | int ret = 0; | 339 | int ret = 0; |
339 | 340 | ||
340 | if (!capable(CAP_SYS_ADMIN)) | 341 | if (!capable(CAP_SYS_ADMIN)) |
341 | return -EPERM; | 342 | return -EPERM; |
342 | 343 | ||
344 | if (!blk_queue_discard(q)) | ||
345 | return -EOPNOTSUPP; | ||
346 | |||
343 | if (copy_from_user(&range, (struct fstrim_range *)arg, | 347 | if (copy_from_user(&range, (struct fstrim_range *)arg, |
344 | sizeof(range))) | 348 | sizeof(range))) |
345 | return -EFAULT; | 349 | return -EFAULT; |
346 | 350 | ||
351 | range.minlen = max((unsigned int)range.minlen, | ||
352 | q->limits.discard_granularity); | ||
347 | ret = ext4_trim_fs(sb, &range); | 353 | ret = ext4_trim_fs(sb, &range); |
348 | if (ret < 0) | 354 | if (ret < 0) |
349 | return ret; | 355 | return ret; |
@@ -421,6 +427,7 @@ long ext4_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg) | |||
421 | return err; | 427 | return err; |
422 | } | 428 | } |
423 | case EXT4_IOC_MOVE_EXT: | 429 | case EXT4_IOC_MOVE_EXT: |
430 | case FITRIM: | ||
424 | break; | 431 | break; |
425 | default: | 432 | default: |
426 | return -ENOIOCTLCMD; | 433 | return -ENOIOCTLCMD; |
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index d1fe09aea73d..d8a16eecf1d5 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c | |||
@@ -92,7 +92,7 @@ | |||
92 | * between CPUs. It is possible to get scheduled at this point. | 92 | * between CPUs. It is possible to get scheduled at this point. |
93 | * | 93 | * |
94 | * The locality group prealloc space is used looking at whether we have | 94 | * The locality group prealloc space is used looking at whether we have |
95 | * enough free space (pa_free) withing the prealloc space. | 95 | * enough free space (pa_free) within the prealloc space. |
96 | * | 96 | * |
97 | * If we can't allocate blocks via inode prealloc or/and locality group | 97 | * If we can't allocate blocks via inode prealloc or/and locality group |
98 | * prealloc then we look at the buddy cache. The buddy cache is represented | 98 | * prealloc then we look at the buddy cache. The buddy cache is represented |
@@ -432,9 +432,10 @@ static void *mb_find_buddy(struct ext4_buddy *e4b, int order, int *max) | |||
432 | } | 432 | } |
433 | 433 | ||
434 | /* at order 0 we see each particular block */ | 434 | /* at order 0 we see each particular block */ |
435 | *max = 1 << (e4b->bd_blkbits + 3); | 435 | if (order == 0) { |
436 | if (order == 0) | 436 | *max = 1 << (e4b->bd_blkbits + 3); |
437 | return EXT4_MB_BITMAP(e4b); | 437 | return EXT4_MB_BITMAP(e4b); |
438 | } | ||
438 | 439 | ||
439 | bb = EXT4_MB_BUDDY(e4b) + EXT4_SB(e4b->bd_sb)->s_mb_offsets[order]; | 440 | bb = EXT4_MB_BUDDY(e4b) + EXT4_SB(e4b->bd_sb)->s_mb_offsets[order]; |
440 | *max = EXT4_SB(e4b->bd_sb)->s_mb_maxs[order]; | 441 | *max = EXT4_SB(e4b->bd_sb)->s_mb_maxs[order]; |
@@ -616,7 +617,6 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file, | |||
616 | MB_CHECK_ASSERT(e4b->bd_info->bb_fragments == fragments); | 617 | MB_CHECK_ASSERT(e4b->bd_info->bb_fragments == fragments); |
617 | 618 | ||
618 | grp = ext4_get_group_info(sb, e4b->bd_group); | 619 | grp = ext4_get_group_info(sb, e4b->bd_group); |
619 | buddy = mb_find_buddy(e4b, 0, &max); | ||
620 | list_for_each(cur, &grp->bb_prealloc_list) { | 620 | list_for_each(cur, &grp->bb_prealloc_list) { |
621 | ext4_group_t groupnr; | 621 | ext4_group_t groupnr; |
622 | struct ext4_prealloc_space *pa; | 622 | struct ext4_prealloc_space *pa; |
@@ -635,7 +635,12 @@ static int __mb_check_buddy(struct ext4_buddy *e4b, char *file, | |||
635 | #define mb_check_buddy(e4b) | 635 | #define mb_check_buddy(e4b) |
636 | #endif | 636 | #endif |
637 | 637 | ||
638 | /* FIXME!! need more doc */ | 638 | /* |
639 | * Divide blocks started from @first with length @len into | ||
640 | * smaller chunks with power of 2 blocks. | ||
641 | * Clear the bits in bitmap which the blocks of the chunk(s) covered, | ||
642 | * then increase bb_counters[] for corresponded chunk size. | ||
643 | */ | ||
639 | static void ext4_mb_mark_free_simple(struct super_block *sb, | 644 | static void ext4_mb_mark_free_simple(struct super_block *sb, |
640 | void *buddy, ext4_grpblk_t first, ext4_grpblk_t len, | 645 | void *buddy, ext4_grpblk_t first, ext4_grpblk_t len, |
641 | struct ext4_group_info *grp) | 646 | struct ext4_group_info *grp) |
@@ -2381,7 +2386,7 @@ static int ext4_mb_init_backend(struct super_block *sb) | |||
2381 | /* An 8TB filesystem with 64-bit pointers requires a 4096 byte | 2386 | /* An 8TB filesystem with 64-bit pointers requires a 4096 byte |
2382 | * kmalloc. A 128kb malloc should suffice for a 256TB filesystem. | 2387 | * kmalloc. A 128kb malloc should suffice for a 256TB filesystem. |
2383 | * So a two level scheme suffices for now. */ | 2388 | * So a two level scheme suffices for now. */ |
2384 | sbi->s_group_info = kmalloc(array_size, GFP_KERNEL); | 2389 | sbi->s_group_info = kzalloc(array_size, GFP_KERNEL); |
2385 | if (sbi->s_group_info == NULL) { | 2390 | if (sbi->s_group_info == NULL) { |
2386 | printk(KERN_ERR "EXT4-fs: can't allocate buddy meta group\n"); | 2391 | printk(KERN_ERR "EXT4-fs: can't allocate buddy meta group\n"); |
2387 | return -ENOMEM; | 2392 | return -ENOMEM; |
@@ -3208,7 +3213,7 @@ ext4_mb_check_group_pa(ext4_fsblk_t goal_block, | |||
3208 | cur_distance = abs(goal_block - cpa->pa_pstart); | 3213 | cur_distance = abs(goal_block - cpa->pa_pstart); |
3209 | new_distance = abs(goal_block - pa->pa_pstart); | 3214 | new_distance = abs(goal_block - pa->pa_pstart); |
3210 | 3215 | ||
3211 | if (cur_distance < new_distance) | 3216 | if (cur_distance <= new_distance) |
3212 | return cpa; | 3217 | return cpa; |
3213 | 3218 | ||
3214 | /* drop the previous reference */ | 3219 | /* drop the previous reference */ |
@@ -3907,7 +3912,8 @@ static void ext4_mb_show_ac(struct ext4_allocation_context *ac) | |||
3907 | struct super_block *sb = ac->ac_sb; | 3912 | struct super_block *sb = ac->ac_sb; |
3908 | ext4_group_t ngroups, i; | 3913 | ext4_group_t ngroups, i; |
3909 | 3914 | ||
3910 | if (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED) | 3915 | if (!mb_enable_debug || |
3916 | (EXT4_SB(sb)->s_mount_flags & EXT4_MF_FS_ABORTED)) | ||
3911 | return; | 3917 | return; |
3912 | 3918 | ||
3913 | printk(KERN_ERR "EXT4-fs: Can't allocate:" | 3919 | printk(KERN_ERR "EXT4-fs: Can't allocate:" |
@@ -4753,7 +4759,8 @@ static int ext4_trim_extent(struct super_block *sb, int start, int count, | |||
4753 | * bitmap. Then issue a TRIM command on this extent and free the extent in | 4759 | * bitmap. Then issue a TRIM command on this extent and free the extent in |
4754 | * the group buddy bitmap. This is done until whole group is scanned. | 4760 | * the group buddy bitmap. This is done until whole group is scanned. |
4755 | */ | 4761 | */ |
4756 | ext4_grpblk_t ext4_trim_all_free(struct super_block *sb, struct ext4_buddy *e4b, | 4762 | static ext4_grpblk_t |
4763 | ext4_trim_all_free(struct super_block *sb, struct ext4_buddy *e4b, | ||
4757 | ext4_grpblk_t start, ext4_grpblk_t max, ext4_grpblk_t minblocks) | 4764 | ext4_grpblk_t start, ext4_grpblk_t max, ext4_grpblk_t minblocks) |
4758 | { | 4765 | { |
4759 | void *bitmap; | 4766 | void *bitmap; |
@@ -4863,10 +4870,15 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range) | |||
4863 | break; | 4870 | break; |
4864 | } | 4871 | } |
4865 | 4872 | ||
4866 | if (len >= EXT4_BLOCKS_PER_GROUP(sb)) | 4873 | /* |
4867 | len -= (EXT4_BLOCKS_PER_GROUP(sb) - first_block); | 4874 | * For all the groups except the last one, last block will |
4868 | else | 4875 | * always be EXT4_BLOCKS_PER_GROUP(sb), so we only need to |
4876 | * change it for the last group in which case start + | ||
4877 | * len < EXT4_BLOCKS_PER_GROUP(sb). | ||
4878 | */ | ||
4879 | if (first_block + len < EXT4_BLOCKS_PER_GROUP(sb)) | ||
4869 | last_block = first_block + len; | 4880 | last_block = first_block + len; |
4881 | len -= last_block - first_block; | ||
4870 | 4882 | ||
4871 | if (e4b.bd_info->bb_free >= minlen) { | 4883 | if (e4b.bd_info->bb_free >= minlen) { |
4872 | cnt = ext4_trim_all_free(sb, &e4b, first_block, | 4884 | cnt = ext4_trim_all_free(sb, &e4b, first_block, |
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index b619322c76f0..22bd4d7f289b 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h | |||
@@ -169,7 +169,7 @@ struct ext4_allocation_context { | |||
169 | /* original request */ | 169 | /* original request */ |
170 | struct ext4_free_extent ac_o_ex; | 170 | struct ext4_free_extent ac_o_ex; |
171 | 171 | ||
172 | /* goal request (after normalization) */ | 172 | /* goal request (normalized ac_o_ex) */ |
173 | struct ext4_free_extent ac_g_ex; | 173 | struct ext4_free_extent ac_g_ex; |
174 | 174 | ||
175 | /* the best found extent */ | 175 | /* the best found extent */ |
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c index b0a126f23c20..92816b4e0f16 100644 --- a/fs/ext4/migrate.c +++ b/fs/ext4/migrate.c | |||
@@ -263,7 +263,7 @@ static int free_dind_blocks(handle_t *handle, | |||
263 | for (i = 0; i < max_entries; i++) { | 263 | for (i = 0; i < max_entries; i++) { |
264 | if (tmp_idata[i]) { | 264 | if (tmp_idata[i]) { |
265 | extend_credit_for_blkdel(handle, inode); | 265 | extend_credit_for_blkdel(handle, inode); |
266 | ext4_free_blocks(handle, inode, 0, | 266 | ext4_free_blocks(handle, inode, NULL, |
267 | le32_to_cpu(tmp_idata[i]), 1, | 267 | le32_to_cpu(tmp_idata[i]), 1, |
268 | EXT4_FREE_BLOCKS_METADATA | | 268 | EXT4_FREE_BLOCKS_METADATA | |
269 | EXT4_FREE_BLOCKS_FORGET); | 269 | EXT4_FREE_BLOCKS_FORGET); |
@@ -271,7 +271,7 @@ static int free_dind_blocks(handle_t *handle, | |||
271 | } | 271 | } |
272 | put_bh(bh); | 272 | put_bh(bh); |
273 | extend_credit_for_blkdel(handle, inode); | 273 | extend_credit_for_blkdel(handle, inode); |
274 | ext4_free_blocks(handle, inode, 0, le32_to_cpu(i_data), 1, | 274 | ext4_free_blocks(handle, inode, NULL, le32_to_cpu(i_data), 1, |
275 | EXT4_FREE_BLOCKS_METADATA | | 275 | EXT4_FREE_BLOCKS_METADATA | |
276 | EXT4_FREE_BLOCKS_FORGET); | 276 | EXT4_FREE_BLOCKS_FORGET); |
277 | return 0; | 277 | return 0; |
@@ -302,7 +302,7 @@ static int free_tind_blocks(handle_t *handle, | |||
302 | } | 302 | } |
303 | put_bh(bh); | 303 | put_bh(bh); |
304 | extend_credit_for_blkdel(handle, inode); | 304 | extend_credit_for_blkdel(handle, inode); |
305 | ext4_free_blocks(handle, inode, 0, le32_to_cpu(i_data), 1, | 305 | ext4_free_blocks(handle, inode, NULL, le32_to_cpu(i_data), 1, |
306 | EXT4_FREE_BLOCKS_METADATA | | 306 | EXT4_FREE_BLOCKS_METADATA | |
307 | EXT4_FREE_BLOCKS_FORGET); | 307 | EXT4_FREE_BLOCKS_FORGET); |
308 | return 0; | 308 | return 0; |
@@ -315,7 +315,7 @@ static int free_ind_block(handle_t *handle, struct inode *inode, __le32 *i_data) | |||
315 | /* ei->i_data[EXT4_IND_BLOCK] */ | 315 | /* ei->i_data[EXT4_IND_BLOCK] */ |
316 | if (i_data[0]) { | 316 | if (i_data[0]) { |
317 | extend_credit_for_blkdel(handle, inode); | 317 | extend_credit_for_blkdel(handle, inode); |
318 | ext4_free_blocks(handle, inode, 0, | 318 | ext4_free_blocks(handle, inode, NULL, |
319 | le32_to_cpu(i_data[0]), 1, | 319 | le32_to_cpu(i_data[0]), 1, |
320 | EXT4_FREE_BLOCKS_METADATA | | 320 | EXT4_FREE_BLOCKS_METADATA | |
321 | EXT4_FREE_BLOCKS_FORGET); | 321 | EXT4_FREE_BLOCKS_FORGET); |
@@ -428,7 +428,7 @@ static int free_ext_idx(handle_t *handle, struct inode *inode, | |||
428 | } | 428 | } |
429 | put_bh(bh); | 429 | put_bh(bh); |
430 | extend_credit_for_blkdel(handle, inode); | 430 | extend_credit_for_blkdel(handle, inode); |
431 | ext4_free_blocks(handle, inode, 0, block, 1, | 431 | ext4_free_blocks(handle, inode, NULL, block, 1, |
432 | EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); | 432 | EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET); |
433 | return retval; | 433 | return retval; |
434 | } | 434 | } |
@@ -517,7 +517,7 @@ int ext4_ext_migrate(struct inode *inode) | |||
517 | * start with one credit accounted for | 517 | * start with one credit accounted for |
518 | * superblock modification. | 518 | * superblock modification. |
519 | * | 519 | * |
520 | * For the tmp_inode we already have commited the | 520 | * For the tmp_inode we already have committed the |
521 | * trascation that created the inode. Later as and | 521 | * trascation that created the inode. Later as and |
522 | * when we add extents we extent the journal | 522 | * when we add extents we extent the journal |
523 | */ | 523 | */ |
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index e781b7ea5630..67fd0b025858 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c | |||
@@ -40,6 +40,7 @@ | |||
40 | #include "xattr.h" | 40 | #include "xattr.h" |
41 | #include "acl.h" | 41 | #include "acl.h" |
42 | 42 | ||
43 | #include <trace/events/ext4.h> | ||
43 | /* | 44 | /* |
44 | * define how far ahead to read directories while searching them. | 45 | * define how far ahead to read directories while searching them. |
45 | */ | 46 | */ |
@@ -2183,6 +2184,7 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry) | |||
2183 | struct ext4_dir_entry_2 *de; | 2184 | struct ext4_dir_entry_2 *de; |
2184 | handle_t *handle; | 2185 | handle_t *handle; |
2185 | 2186 | ||
2187 | trace_ext4_unlink_enter(dir, dentry); | ||
2186 | /* Initialize quotas before so that eventual writes go | 2188 | /* Initialize quotas before so that eventual writes go |
2187 | * in separate transaction */ | 2189 | * in separate transaction */ |
2188 | dquot_initialize(dir); | 2190 | dquot_initialize(dir); |
@@ -2228,6 +2230,7 @@ static int ext4_unlink(struct inode *dir, struct dentry *dentry) | |||
2228 | end_unlink: | 2230 | end_unlink: |
2229 | ext4_journal_stop(handle); | 2231 | ext4_journal_stop(handle); |
2230 | brelse(bh); | 2232 | brelse(bh); |
2233 | trace_ext4_unlink_exit(dentry, retval); | ||
2231 | return retval; | 2234 | return retval; |
2232 | } | 2235 | } |
2233 | 2236 | ||
@@ -2402,6 +2405,10 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
2402 | if (!new_inode && new_dir != old_dir && | 2405 | if (!new_inode && new_dir != old_dir && |
2403 | EXT4_DIR_LINK_MAX(new_dir)) | 2406 | EXT4_DIR_LINK_MAX(new_dir)) |
2404 | goto end_rename; | 2407 | goto end_rename; |
2408 | BUFFER_TRACE(dir_bh, "get_write_access"); | ||
2409 | retval = ext4_journal_get_write_access(handle, dir_bh); | ||
2410 | if (retval) | ||
2411 | goto end_rename; | ||
2405 | } | 2412 | } |
2406 | if (!new_bh) { | 2413 | if (!new_bh) { |
2407 | retval = ext4_add_entry(handle, new_dentry, old_inode); | 2414 | retval = ext4_add_entry(handle, new_dentry, old_inode); |
@@ -2409,7 +2416,9 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
2409 | goto end_rename; | 2416 | goto end_rename; |
2410 | } else { | 2417 | } else { |
2411 | BUFFER_TRACE(new_bh, "get write access"); | 2418 | BUFFER_TRACE(new_bh, "get write access"); |
2412 | ext4_journal_get_write_access(handle, new_bh); | 2419 | retval = ext4_journal_get_write_access(handle, new_bh); |
2420 | if (retval) | ||
2421 | goto end_rename; | ||
2413 | new_de->inode = cpu_to_le32(old_inode->i_ino); | 2422 | new_de->inode = cpu_to_le32(old_inode->i_ino); |
2414 | if (EXT4_HAS_INCOMPAT_FEATURE(new_dir->i_sb, | 2423 | if (EXT4_HAS_INCOMPAT_FEATURE(new_dir->i_sb, |
2415 | EXT4_FEATURE_INCOMPAT_FILETYPE)) | 2424 | EXT4_FEATURE_INCOMPAT_FILETYPE)) |
@@ -2470,8 +2479,6 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, | |||
2470 | old_dir->i_ctime = old_dir->i_mtime = ext4_current_time(old_dir); | 2479 | old_dir->i_ctime = old_dir->i_mtime = ext4_current_time(old_dir); |
2471 | ext4_update_dx_flag(old_dir); | 2480 | ext4_update_dx_flag(old_dir); |
2472 | if (dir_bh) { | 2481 | if (dir_bh) { |
2473 | BUFFER_TRACE(dir_bh, "get_write_access"); | ||
2474 | ext4_journal_get_write_access(handle, dir_bh); | ||
2475 | PARENT_INO(dir_bh->b_data, new_dir->i_sb->s_blocksize) = | 2482 | PARENT_INO(dir_bh->b_data, new_dir->i_sb->s_blocksize) = |
2476 | cpu_to_le32(new_dir->i_ino); | 2483 | cpu_to_le32(new_dir->i_ino); |
2477 | BUFFER_TRACE(dir_bh, "call ext4_handle_dirty_metadata"); | 2484 | BUFFER_TRACE(dir_bh, "call ext4_handle_dirty_metadata"); |
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c index 955cc309142f..b6dbd056fcb1 100644 --- a/fs/ext4/page-io.c +++ b/fs/ext4/page-io.c | |||
@@ -259,6 +259,11 @@ static void ext4_end_bio(struct bio *bio, int error) | |||
259 | bi_sector >> (inode->i_blkbits - 9)); | 259 | bi_sector >> (inode->i_blkbits - 9)); |
260 | } | 260 | } |
261 | 261 | ||
262 | if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) { | ||
263 | ext4_free_io_end(io_end); | ||
264 | return; | ||
265 | } | ||
266 | |||
262 | /* Add the io_end to per-inode completed io list*/ | 267 | /* Add the io_end to per-inode completed io list*/ |
263 | spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); | 268 | spin_lock_irqsave(&EXT4_I(inode)->i_completed_io_lock, flags); |
264 | list_add_tail(&io_end->list, &EXT4_I(inode)->i_completed_io_list); | 269 | list_add_tail(&io_end->list, &EXT4_I(inode)->i_completed_io_list); |
@@ -279,9 +284,9 @@ void ext4_io_submit(struct ext4_io_submit *io) | |||
279 | BUG_ON(bio_flagged(io->io_bio, BIO_EOPNOTSUPP)); | 284 | BUG_ON(bio_flagged(io->io_bio, BIO_EOPNOTSUPP)); |
280 | bio_put(io->io_bio); | 285 | bio_put(io->io_bio); |
281 | } | 286 | } |
282 | io->io_bio = 0; | 287 | io->io_bio = NULL; |
283 | io->io_op = 0; | 288 | io->io_op = 0; |
284 | io->io_end = 0; | 289 | io->io_end = NULL; |
285 | } | 290 | } |
286 | 291 | ||
287 | static int io_submit_init(struct ext4_io_submit *io, | 292 | static int io_submit_init(struct ext4_io_submit *io, |
@@ -310,8 +315,7 @@ static int io_submit_init(struct ext4_io_submit *io, | |||
310 | io_end->offset = (page->index << PAGE_CACHE_SHIFT) + bh_offset(bh); | 315 | io_end->offset = (page->index << PAGE_CACHE_SHIFT) + bh_offset(bh); |
311 | 316 | ||
312 | io->io_bio = bio; | 317 | io->io_bio = bio; |
313 | io->io_op = (wbc->sync_mode == WB_SYNC_ALL ? | 318 | io->io_op = (wbc->sync_mode == WB_SYNC_ALL ? WRITE_SYNC : WRITE); |
314 | WRITE_SYNC_PLUG : WRITE); | ||
315 | io->io_next_block = bh->b_blocknr; | 319 | io->io_next_block = bh->b_blocknr; |
316 | return 0; | 320 | return 0; |
317 | } | 321 | } |
@@ -381,8 +385,6 @@ int ext4_bio_write_page(struct ext4_io_submit *io, | |||
381 | 385 | ||
382 | BUG_ON(!PageLocked(page)); | 386 | BUG_ON(!PageLocked(page)); |
383 | BUG_ON(PageWriteback(page)); | 387 | BUG_ON(PageWriteback(page)); |
384 | set_page_writeback(page); | ||
385 | ClearPageError(page); | ||
386 | 388 | ||
387 | io_page = kmem_cache_alloc(io_page_cachep, GFP_NOFS); | 389 | io_page = kmem_cache_alloc(io_page_cachep, GFP_NOFS); |
388 | if (!io_page) { | 390 | if (!io_page) { |
@@ -393,6 +395,8 @@ int ext4_bio_write_page(struct ext4_io_submit *io, | |||
393 | io_page->p_page = page; | 395 | io_page->p_page = page; |
394 | atomic_set(&io_page->p_count, 1); | 396 | atomic_set(&io_page->p_count, 1); |
395 | get_page(page); | 397 | get_page(page); |
398 | set_page_writeback(page); | ||
399 | ClearPageError(page); | ||
396 | 400 | ||
397 | for (bh = head = page_buffers(page), block_start = 0; | 401 | for (bh = head = page_buffers(page), block_start = 0; |
398 | bh != head || !block_start; | 402 | bh != head || !block_start; |
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c index 3ecc6e45d2f9..80bbc9c60c24 100644 --- a/fs/ext4/resize.c +++ b/fs/ext4/resize.c | |||
@@ -230,7 +230,7 @@ static int setup_new_group_blocks(struct super_block *sb, | |||
230 | } | 230 | } |
231 | 231 | ||
232 | /* Zero out all of the reserved backup group descriptor table blocks */ | 232 | /* Zero out all of the reserved backup group descriptor table blocks */ |
233 | ext4_debug("clear inode table blocks %#04llx -> %#04llx\n", | 233 | ext4_debug("clear inode table blocks %#04llx -> %#04lx\n", |
234 | block, sbi->s_itb_per_group); | 234 | block, sbi->s_itb_per_group); |
235 | err = sb_issue_zeroout(sb, gdblocks + start + 1, reserved_gdb, | 235 | err = sb_issue_zeroout(sb, gdblocks + start + 1, reserved_gdb, |
236 | GFP_NOFS); | 236 | GFP_NOFS); |
@@ -248,7 +248,7 @@ static int setup_new_group_blocks(struct super_block *sb, | |||
248 | 248 | ||
249 | /* Zero out all of the inode table blocks */ | 249 | /* Zero out all of the inode table blocks */ |
250 | block = input->inode_table; | 250 | block = input->inode_table; |
251 | ext4_debug("clear inode table blocks %#04llx -> %#04llx\n", | 251 | ext4_debug("clear inode table blocks %#04llx -> %#04lx\n", |
252 | block, sbi->s_itb_per_group); | 252 | block, sbi->s_itb_per_group); |
253 | err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group, GFP_NOFS); | 253 | err = sb_issue_zeroout(sb, block, sbi->s_itb_per_group, GFP_NOFS); |
254 | if (err) | 254 | if (err) |
@@ -499,12 +499,12 @@ static int add_new_gdb(handle_t *handle, struct inode *inode, | |||
499 | return err; | 499 | return err; |
500 | 500 | ||
501 | exit_inode: | 501 | exit_inode: |
502 | /* ext4_journal_release_buffer(handle, iloc.bh); */ | 502 | /* ext4_handle_release_buffer(handle, iloc.bh); */ |
503 | brelse(iloc.bh); | 503 | brelse(iloc.bh); |
504 | exit_dindj: | 504 | exit_dindj: |
505 | /* ext4_journal_release_buffer(handle, dind); */ | 505 | /* ext4_handle_release_buffer(handle, dind); */ |
506 | exit_sbh: | 506 | exit_sbh: |
507 | /* ext4_journal_release_buffer(handle, EXT4_SB(sb)->s_sbh); */ | 507 | /* ext4_handle_release_buffer(handle, EXT4_SB(sb)->s_sbh); */ |
508 | exit_dind: | 508 | exit_dind: |
509 | brelse(dind); | 509 | brelse(dind); |
510 | exit_bh: | 510 | exit_bh: |
@@ -586,7 +586,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode, | |||
586 | /* | 586 | /* |
587 | int j; | 587 | int j; |
588 | for (j = 0; j < i; j++) | 588 | for (j = 0; j < i; j++) |
589 | ext4_journal_release_buffer(handle, primary[j]); | 589 | ext4_handle_release_buffer(handle, primary[j]); |
590 | */ | 590 | */ |
591 | goto exit_bh; | 591 | goto exit_bh; |
592 | } | 592 | } |
diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 203f9e4a70be..8553dfb310af 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c | |||
@@ -54,9 +54,9 @@ | |||
54 | 54 | ||
55 | static struct proc_dir_entry *ext4_proc_root; | 55 | static struct proc_dir_entry *ext4_proc_root; |
56 | static struct kset *ext4_kset; | 56 | static struct kset *ext4_kset; |
57 | struct ext4_lazy_init *ext4_li_info; | 57 | static struct ext4_lazy_init *ext4_li_info; |
58 | struct mutex ext4_li_mtx; | 58 | static struct mutex ext4_li_mtx; |
59 | struct ext4_features *ext4_feat; | 59 | static struct ext4_features *ext4_feat; |
60 | 60 | ||
61 | static int ext4_load_journal(struct super_block *, struct ext4_super_block *, | 61 | static int ext4_load_journal(struct super_block *, struct ext4_super_block *, |
62 | unsigned long journal_devnum); | 62 | unsigned long journal_devnum); |
@@ -75,6 +75,7 @@ static void ext4_write_super(struct super_block *sb); | |||
75 | static int ext4_freeze(struct super_block *sb); | 75 | static int ext4_freeze(struct super_block *sb); |
76 | static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags, | 76 | static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags, |
77 | const char *dev_name, void *data); | 77 | const char *dev_name, void *data); |
78 | static int ext4_feature_set_ok(struct super_block *sb, int readonly); | ||
78 | static void ext4_destroy_lazyinit_thread(void); | 79 | static void ext4_destroy_lazyinit_thread(void); |
79 | static void ext4_unregister_li_request(struct super_block *sb); | 80 | static void ext4_unregister_li_request(struct super_block *sb); |
80 | static void ext4_clear_request_list(void); | 81 | static void ext4_clear_request_list(void); |
@@ -241,27 +242,44 @@ static void ext4_put_nojournal(handle_t *handle) | |||
241 | * journal_end calls result in the superblock being marked dirty, so | 242 | * journal_end calls result in the superblock being marked dirty, so |
242 | * that sync() will call the filesystem's write_super callback if | 243 | * that sync() will call the filesystem's write_super callback if |
243 | * appropriate. | 244 | * appropriate. |
245 | * | ||
246 | * To avoid j_barrier hold in userspace when a user calls freeze(), | ||
247 | * ext4 prevents a new handle from being started by s_frozen, which | ||
248 | * is in an upper layer. | ||
244 | */ | 249 | */ |
245 | handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks) | 250 | handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks) |
246 | { | 251 | { |
247 | journal_t *journal; | 252 | journal_t *journal; |
253 | handle_t *handle; | ||
248 | 254 | ||
249 | if (sb->s_flags & MS_RDONLY) | 255 | if (sb->s_flags & MS_RDONLY) |
250 | return ERR_PTR(-EROFS); | 256 | return ERR_PTR(-EROFS); |
251 | 257 | ||
252 | vfs_check_frozen(sb, SB_FREEZE_TRANS); | ||
253 | /* Special case here: if the journal has aborted behind our | ||
254 | * backs (eg. EIO in the commit thread), then we still need to | ||
255 | * take the FS itself readonly cleanly. */ | ||
256 | journal = EXT4_SB(sb)->s_journal; | 258 | journal = EXT4_SB(sb)->s_journal; |
257 | if (journal) { | 259 | handle = ext4_journal_current_handle(); |
258 | if (is_journal_aborted(journal)) { | 260 | |
259 | ext4_abort(sb, "Detected aborted journal"); | 261 | /* |
260 | return ERR_PTR(-EROFS); | 262 | * If a handle has been started, it should be allowed to |
261 | } | 263 | * finish, otherwise deadlock could happen between freeze |
262 | return jbd2_journal_start(journal, nblocks); | 264 | * and others(e.g. truncate) due to the restart of the |
265 | * journal handle if the filesystem is forzen and active | ||
266 | * handles are not stopped. | ||
267 | */ | ||
268 | if (!handle) | ||
269 | vfs_check_frozen(sb, SB_FREEZE_TRANS); | ||
270 | |||
271 | if (!journal) | ||
272 | return ext4_get_nojournal(); | ||
273 | /* | ||
274 | * Special case here: if the journal has aborted behind our | ||
275 | * backs (eg. EIO in the commit thread), then we still need to | ||
276 | * take the FS itself readonly cleanly. | ||
277 | */ | ||
278 | if (is_journal_aborted(journal)) { | ||
279 | ext4_abort(sb, "Detected aborted journal"); | ||
280 | return ERR_PTR(-EROFS); | ||
263 | } | 281 | } |
264 | return ext4_get_nojournal(); | 282 | return jbd2_journal_start(journal, nblocks); |
265 | } | 283 | } |
266 | 284 | ||
267 | /* | 285 | /* |
@@ -594,7 +612,7 @@ __acquires(bitlock) | |||
594 | 612 | ||
595 | vaf.fmt = fmt; | 613 | vaf.fmt = fmt; |
596 | vaf.va = &args; | 614 | vaf.va = &args; |
597 | printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u", | 615 | printk(KERN_CRIT "EXT4-fs error (device %s): %s:%d: group %u, ", |
598 | sb->s_id, function, line, grp); | 616 | sb->s_id, function, line, grp); |
599 | if (ino) | 617 | if (ino) |
600 | printk(KERN_CONT "inode %lu: ", ino); | 618 | printk(KERN_CONT "inode %lu: ", ino); |
@@ -616,7 +634,7 @@ __acquires(bitlock) | |||
616 | * filesystem will have already been marked read/only and the | 634 | * filesystem will have already been marked read/only and the |
617 | * journal has been aborted. We return 1 as a hint to callers | 635 | * journal has been aborted. We return 1 as a hint to callers |
618 | * who might what to use the return value from | 636 | * who might what to use the return value from |
619 | * ext4_grp_locked_error() to distinguish beween the | 637 | * ext4_grp_locked_error() to distinguish between the |
620 | * ERRORS_CONT and ERRORS_RO case, and perhaps return more | 638 | * ERRORS_CONT and ERRORS_RO case, and perhaps return more |
621 | * aggressively from the ext4 function in question, with a | 639 | * aggressively from the ext4 function in question, with a |
622 | * more appropriate error code. | 640 | * more appropriate error code. |
@@ -997,13 +1015,10 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
997 | if (test_opt(sb, OLDALLOC)) | 1015 | if (test_opt(sb, OLDALLOC)) |
998 | seq_puts(seq, ",oldalloc"); | 1016 | seq_puts(seq, ",oldalloc"); |
999 | #ifdef CONFIG_EXT4_FS_XATTR | 1017 | #ifdef CONFIG_EXT4_FS_XATTR |
1000 | if (test_opt(sb, XATTR_USER) && | 1018 | if (test_opt(sb, XATTR_USER)) |
1001 | !(def_mount_opts & EXT4_DEFM_XATTR_USER)) | ||
1002 | seq_puts(seq, ",user_xattr"); | 1019 | seq_puts(seq, ",user_xattr"); |
1003 | if (!test_opt(sb, XATTR_USER) && | 1020 | if (!test_opt(sb, XATTR_USER)) |
1004 | (def_mount_opts & EXT4_DEFM_XATTR_USER)) { | ||
1005 | seq_puts(seq, ",nouser_xattr"); | 1021 | seq_puts(seq, ",nouser_xattr"); |
1006 | } | ||
1007 | #endif | 1022 | #endif |
1008 | #ifdef CONFIG_EXT4_FS_POSIX_ACL | 1023 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
1009 | if (test_opt(sb, POSIX_ACL) && !(def_mount_opts & EXT4_DEFM_ACL)) | 1024 | if (test_opt(sb, POSIX_ACL) && !(def_mount_opts & EXT4_DEFM_ACL)) |
@@ -1041,8 +1056,8 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs) | |||
1041 | !(def_mount_opts & EXT4_DEFM_NODELALLOC)) | 1056 | !(def_mount_opts & EXT4_DEFM_NODELALLOC)) |
1042 | seq_puts(seq, ",nodelalloc"); | 1057 | seq_puts(seq, ",nodelalloc"); |
1043 | 1058 | ||
1044 | if (test_opt(sb, MBLK_IO_SUBMIT)) | 1059 | if (!test_opt(sb, MBLK_IO_SUBMIT)) |
1045 | seq_puts(seq, ",mblk_io_submit"); | 1060 | seq_puts(seq, ",nomblk_io_submit"); |
1046 | if (sbi->s_stripe) | 1061 | if (sbi->s_stripe) |
1047 | seq_printf(seq, ",stripe=%lu", sbi->s_stripe); | 1062 | seq_printf(seq, ",stripe=%lu", sbi->s_stripe); |
1048 | /* | 1063 | /* |
@@ -1451,7 +1466,7 @@ static int parse_options(char *options, struct super_block *sb, | |||
1451 | * Initialize args struct so we know whether arg was | 1466 | * Initialize args struct so we know whether arg was |
1452 | * found; some options take optional arguments. | 1467 | * found; some options take optional arguments. |
1453 | */ | 1468 | */ |
1454 | args[0].to = args[0].from = 0; | 1469 | args[0].to = args[0].from = NULL; |
1455 | token = match_token(p, tokens, args); | 1470 | token = match_token(p, tokens, args); |
1456 | switch (token) { | 1471 | switch (token) { |
1457 | case Opt_bsd_df: | 1472 | case Opt_bsd_df: |
@@ -1771,7 +1786,7 @@ set_qf_format: | |||
1771 | return 0; | 1786 | return 0; |
1772 | if (option < 0 || option > (1 << 30)) | 1787 | if (option < 0 || option > (1 << 30)) |
1773 | return 0; | 1788 | return 0; |
1774 | if (!is_power_of_2(option)) { | 1789 | if (option && !is_power_of_2(option)) { |
1775 | ext4_msg(sb, KERN_ERR, | 1790 | ext4_msg(sb, KERN_ERR, |
1776 | "EXT4-fs: inode_readahead_blks" | 1791 | "EXT4-fs: inode_readahead_blks" |
1777 | " must be a power of 2"); | 1792 | " must be a power of 2"); |
@@ -2120,6 +2135,13 @@ static void ext4_orphan_cleanup(struct super_block *sb, | |||
2120 | return; | 2135 | return; |
2121 | } | 2136 | } |
2122 | 2137 | ||
2138 | /* Check if feature set would not allow a r/w mount */ | ||
2139 | if (!ext4_feature_set_ok(sb, 0)) { | ||
2140 | ext4_msg(sb, KERN_INFO, "Skipping orphan cleanup due to " | ||
2141 | "unknown ROCOMPAT features"); | ||
2142 | return; | ||
2143 | } | ||
2144 | |||
2123 | if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) { | 2145 | if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) { |
2124 | if (es->s_last_orphan) | 2146 | if (es->s_last_orphan) |
2125 | jbd_debug(1, "Errors on filesystem, " | 2147 | jbd_debug(1, "Errors on filesystem, " |
@@ -2412,7 +2434,7 @@ static ssize_t inode_readahead_blks_store(struct ext4_attr *a, | |||
2412 | if (parse_strtoul(buf, 0x40000000, &t)) | 2434 | if (parse_strtoul(buf, 0x40000000, &t)) |
2413 | return -EINVAL; | 2435 | return -EINVAL; |
2414 | 2436 | ||
2415 | if (!is_power_of_2(t)) | 2437 | if (t && !is_power_of_2(t)) |
2416 | return -EINVAL; | 2438 | return -EINVAL; |
2417 | 2439 | ||
2418 | sbi->s_inode_readahead_blks = t; | 2440 | sbi->s_inode_readahead_blks = t; |
@@ -2970,6 +2992,12 @@ static int ext4_register_li_request(struct super_block *sb, | |||
2970 | mutex_unlock(&ext4_li_info->li_list_mtx); | 2992 | mutex_unlock(&ext4_li_info->li_list_mtx); |
2971 | 2993 | ||
2972 | sbi->s_li_request = elr; | 2994 | sbi->s_li_request = elr; |
2995 | /* | ||
2996 | * set elr to NULL here since it has been inserted to | ||
2997 | * the request_list and the removal and free of it is | ||
2998 | * handled by ext4_clear_request_list from now on. | ||
2999 | */ | ||
3000 | elr = NULL; | ||
2973 | 3001 | ||
2974 | if (!(ext4_li_info->li_state & EXT4_LAZYINIT_RUNNING)) { | 3002 | if (!(ext4_li_info->li_state & EXT4_LAZYINIT_RUNNING)) { |
2975 | ret = ext4_run_lazyinit_thread(); | 3003 | ret = ext4_run_lazyinit_thread(); |
@@ -3095,14 +3123,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3095 | } | 3123 | } |
3096 | if (def_mount_opts & EXT4_DEFM_UID16) | 3124 | if (def_mount_opts & EXT4_DEFM_UID16) |
3097 | set_opt(sb, NO_UID32); | 3125 | set_opt(sb, NO_UID32); |
3126 | /* xattr user namespace & acls are now defaulted on */ | ||
3098 | #ifdef CONFIG_EXT4_FS_XATTR | 3127 | #ifdef CONFIG_EXT4_FS_XATTR |
3099 | if (def_mount_opts & EXT4_DEFM_XATTR_USER) | 3128 | set_opt(sb, XATTR_USER); |
3100 | set_opt(sb, XATTR_USER); | ||
3101 | #endif | 3129 | #endif |
3102 | #ifdef CONFIG_EXT4_FS_POSIX_ACL | 3130 | #ifdef CONFIG_EXT4_FS_POSIX_ACL |
3103 | if (def_mount_opts & EXT4_DEFM_ACL) | 3131 | set_opt(sb, POSIX_ACL); |
3104 | set_opt(sb, POSIX_ACL); | ||
3105 | #endif | 3132 | #endif |
3133 | set_opt(sb, MBLK_IO_SUBMIT); | ||
3106 | if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA) | 3134 | if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA) |
3107 | set_opt(sb, JOURNAL_DATA); | 3135 | set_opt(sb, JOURNAL_DATA); |
3108 | else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED) | 3136 | else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED) |
@@ -3380,6 +3408,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent) | |||
3380 | get_random_bytes(&sbi->s_next_generation, sizeof(u32)); | 3408 | get_random_bytes(&sbi->s_next_generation, sizeof(u32)); |
3381 | spin_lock_init(&sbi->s_next_gen_lock); | 3409 | spin_lock_init(&sbi->s_next_gen_lock); |
3382 | 3410 | ||
3411 | init_timer(&sbi->s_err_report); | ||
3412 | sbi->s_err_report.function = print_daily_error_info; | ||
3413 | sbi->s_err_report.data = (unsigned long) sb; | ||
3414 | |||
3383 | err = percpu_counter_init(&sbi->s_freeblocks_counter, | 3415 | err = percpu_counter_init(&sbi->s_freeblocks_counter, |
3384 | ext4_count_free_blocks(sb)); | 3416 | ext4_count_free_blocks(sb)); |
3385 | if (!err) { | 3417 | if (!err) { |
@@ -3516,7 +3548,7 @@ no_journal: | |||
3516 | * concurrency isn't really necessary. Limit it to 1. | 3548 | * concurrency isn't really necessary. Limit it to 1. |
3517 | */ | 3549 | */ |
3518 | EXT4_SB(sb)->dio_unwritten_wq = | 3550 | EXT4_SB(sb)->dio_unwritten_wq = |
3519 | alloc_workqueue("ext4-dio-unwritten", WQ_MEM_RECLAIM, 1); | 3551 | alloc_workqueue("ext4-dio-unwritten", WQ_MEM_RECLAIM | WQ_UNBOUND, 1); |
3520 | if (!EXT4_SB(sb)->dio_unwritten_wq) { | 3552 | if (!EXT4_SB(sb)->dio_unwritten_wq) { |
3521 | printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n"); | 3553 | printk(KERN_ERR "EXT4-fs: failed to create DIO workqueue\n"); |
3522 | goto failed_mount_wq; | 3554 | goto failed_mount_wq; |
@@ -3531,17 +3563,16 @@ no_journal: | |||
3531 | if (IS_ERR(root)) { | 3563 | if (IS_ERR(root)) { |
3532 | ext4_msg(sb, KERN_ERR, "get root inode failed"); | 3564 | ext4_msg(sb, KERN_ERR, "get root inode failed"); |
3533 | ret = PTR_ERR(root); | 3565 | ret = PTR_ERR(root); |
3566 | root = NULL; | ||
3534 | goto failed_mount4; | 3567 | goto failed_mount4; |
3535 | } | 3568 | } |
3536 | if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { | 3569 | if (!S_ISDIR(root->i_mode) || !root->i_blocks || !root->i_size) { |
3537 | iput(root); | ||
3538 | ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck"); | 3570 | ext4_msg(sb, KERN_ERR, "corrupt root inode, run e2fsck"); |
3539 | goto failed_mount4; | 3571 | goto failed_mount4; |
3540 | } | 3572 | } |
3541 | sb->s_root = d_alloc_root(root); | 3573 | sb->s_root = d_alloc_root(root); |
3542 | if (!sb->s_root) { | 3574 | if (!sb->s_root) { |
3543 | ext4_msg(sb, KERN_ERR, "get root dentry failed"); | 3575 | ext4_msg(sb, KERN_ERR, "get root dentry failed"); |
3544 | iput(root); | ||
3545 | ret = -ENOMEM; | 3576 | ret = -ENOMEM; |
3546 | goto failed_mount4; | 3577 | goto failed_mount4; |
3547 | } | 3578 | } |
@@ -3642,9 +3673,6 @@ no_journal: | |||
3642 | "Opts: %s%s%s", descr, sbi->s_es->s_mount_opts, | 3673 | "Opts: %s%s%s", descr, sbi->s_es->s_mount_opts, |
3643 | *sbi->s_es->s_mount_opts ? "; " : "", orig_data); | 3674 | *sbi->s_es->s_mount_opts ? "; " : "", orig_data); |
3644 | 3675 | ||
3645 | init_timer(&sbi->s_err_report); | ||
3646 | sbi->s_err_report.function = print_daily_error_info; | ||
3647 | sbi->s_err_report.data = (unsigned long) sb; | ||
3648 | if (es->s_error_count) | 3676 | if (es->s_error_count) |
3649 | mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */ | 3677 | mod_timer(&sbi->s_err_report, jiffies + 300*HZ); /* 5 minutes */ |
3650 | 3678 | ||
@@ -3657,6 +3685,8 @@ cantfind_ext4: | |||
3657 | goto failed_mount; | 3685 | goto failed_mount; |
3658 | 3686 | ||
3659 | failed_mount4: | 3687 | failed_mount4: |
3688 | iput(root); | ||
3689 | sb->s_root = NULL; | ||
3660 | ext4_msg(sb, KERN_ERR, "mount failed"); | 3690 | ext4_msg(sb, KERN_ERR, "mount failed"); |
3661 | destroy_workqueue(EXT4_SB(sb)->dio_unwritten_wq); | 3691 | destroy_workqueue(EXT4_SB(sb)->dio_unwritten_wq); |
3662 | failed_mount_wq: | 3692 | failed_mount_wq: |
@@ -3666,6 +3696,7 @@ failed_mount_wq: | |||
3666 | sbi->s_journal = NULL; | 3696 | sbi->s_journal = NULL; |
3667 | } | 3697 | } |
3668 | failed_mount3: | 3698 | failed_mount3: |
3699 | del_timer(&sbi->s_err_report); | ||
3669 | if (sbi->s_flex_groups) { | 3700 | if (sbi->s_flex_groups) { |
3670 | if (is_vmalloc_addr(sbi->s_flex_groups)) | 3701 | if (is_vmalloc_addr(sbi->s_flex_groups)) |
3671 | vfree(sbi->s_flex_groups); | 3702 | vfree(sbi->s_flex_groups); |
@@ -4132,6 +4163,11 @@ static int ext4_sync_fs(struct super_block *sb, int wait) | |||
4132 | /* | 4163 | /* |
4133 | * LVM calls this function before a (read-only) snapshot is created. This | 4164 | * LVM calls this function before a (read-only) snapshot is created. This |
4134 | * gives us a chance to flush the journal completely and mark the fs clean. | 4165 | * gives us a chance to flush the journal completely and mark the fs clean. |
4166 | * | ||
4167 | * Note that only this function cannot bring a filesystem to be in a clean | ||
4168 | * state independently, because ext4 prevents a new handle from being started | ||
4169 | * by @sb->s_frozen, which stays in an upper layer. It thus needs help from | ||
4170 | * the upper layer. | ||
4135 | */ | 4171 | */ |
4136 | static int ext4_freeze(struct super_block *sb) | 4172 | static int ext4_freeze(struct super_block *sb) |
4137 | { | 4173 | { |
@@ -4608,17 +4644,30 @@ static int ext4_quota_on(struct super_block *sb, int type, int format_id, | |||
4608 | 4644 | ||
4609 | static int ext4_quota_off(struct super_block *sb, int type) | 4645 | static int ext4_quota_off(struct super_block *sb, int type) |
4610 | { | 4646 | { |
4647 | struct inode *inode = sb_dqopt(sb)->files[type]; | ||
4648 | handle_t *handle; | ||
4649 | |||
4611 | /* Force all delayed allocation blocks to be allocated. | 4650 | /* Force all delayed allocation blocks to be allocated. |
4612 | * Caller already holds s_umount sem */ | 4651 | * Caller already holds s_umount sem */ |
4613 | if (test_opt(sb, DELALLOC)) | 4652 | if (test_opt(sb, DELALLOC)) |
4614 | sync_filesystem(sb); | 4653 | sync_filesystem(sb); |
4615 | 4654 | ||
4655 | /* Update modification times of quota files when userspace can | ||
4656 | * start looking at them */ | ||
4657 | handle = ext4_journal_start(inode, 1); | ||
4658 | if (IS_ERR(handle)) | ||
4659 | goto out; | ||
4660 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; | ||
4661 | ext4_mark_inode_dirty(handle, inode); | ||
4662 | ext4_journal_stop(handle); | ||
4663 | |||
4664 | out: | ||
4616 | return dquot_quota_off(sb, type); | 4665 | return dquot_quota_off(sb, type); |
4617 | } | 4666 | } |
4618 | 4667 | ||
4619 | /* Read data from quotafile - avoid pagecache and such because we cannot afford | 4668 | /* Read data from quotafile - avoid pagecache and such because we cannot afford |
4620 | * acquiring the locks... As quota files are never truncated and quota code | 4669 | * acquiring the locks... As quota files are never truncated and quota code |
4621 | * itself serializes the operations (and noone else should touch the files) | 4670 | * itself serializes the operations (and no one else should touch the files) |
4622 | * we don't have to be afraid of races */ | 4671 | * we don't have to be afraid of races */ |
4623 | static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, | 4672 | static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data, |
4624 | size_t len, loff_t off) | 4673 | size_t len, loff_t off) |
@@ -4708,9 +4757,8 @@ out: | |||
4708 | if (inode->i_size < off + len) { | 4757 | if (inode->i_size < off + len) { |
4709 | i_size_write(inode, off + len); | 4758 | i_size_write(inode, off + len); |
4710 | EXT4_I(inode)->i_disksize = inode->i_size; | 4759 | EXT4_I(inode)->i_disksize = inode->i_size; |
4760 | ext4_mark_inode_dirty(handle, inode); | ||
4711 | } | 4761 | } |
4712 | inode->i_mtime = inode->i_ctime = CURRENT_TIME; | ||
4713 | ext4_mark_inode_dirty(handle, inode); | ||
4714 | mutex_unlock(&inode->i_mutex); | 4762 | mutex_unlock(&inode->i_mutex); |
4715 | return len; | 4763 | return len; |
4716 | } | 4764 | } |
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index fc32176eee39..b545ca1c459c 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c | |||
@@ -735,7 +735,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, | |||
735 | int offset = (char *)s->here - bs->bh->b_data; | 735 | int offset = (char *)s->here - bs->bh->b_data; |
736 | 736 | ||
737 | unlock_buffer(bs->bh); | 737 | unlock_buffer(bs->bh); |
738 | jbd2_journal_release_buffer(handle, bs->bh); | 738 | ext4_handle_release_buffer(handle, bs->bh); |
739 | if (ce) { | 739 | if (ce) { |
740 | mb_cache_entry_release(ce); | 740 | mb_cache_entry_release(ce); |
741 | ce = NULL; | 741 | ce = NULL; |
@@ -833,7 +833,7 @@ inserted: | |||
833 | new_bh = sb_getblk(sb, block); | 833 | new_bh = sb_getblk(sb, block); |
834 | if (!new_bh) { | 834 | if (!new_bh) { |
835 | getblk_failed: | 835 | getblk_failed: |
836 | ext4_free_blocks(handle, inode, 0, block, 1, | 836 | ext4_free_blocks(handle, inode, NULL, block, 1, |
837 | EXT4_FREE_BLOCKS_METADATA); | 837 | EXT4_FREE_BLOCKS_METADATA); |
838 | error = -EIO; | 838 | error = -EIO; |
839 | goto cleanup; | 839 | goto cleanup; |