aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4')
-rw-r--r--fs/ext4/Kconfig20
-rw-r--r--fs/ext4/balloc.c46
-rw-r--r--fs/ext4/block_validity.c3
-rw-r--r--fs/ext4/ext4.h25
-rw-r--r--fs/ext4/ext4_jbd2.c82
-rw-r--r--fs/ext4/ext4_jbd2.h44
-rw-r--r--fs/ext4/extents.c80
-rw-r--r--fs/ext4/fsync.c54
-rw-r--r--fs/ext4/inode.c253
-rw-r--r--fs/ext4/ioctl.c29
-rw-r--r--fs/ext4/mballoc.c103
-rw-r--r--fs/ext4/migrate.c27
-rw-r--r--fs/ext4/move_extent.c282
-rw-r--r--fs/ext4/namei.c46
-rw-r--r--fs/ext4/resize.c2
-rw-r--r--fs/ext4/super.c185
-rw-r--r--fs/ext4/xattr.c15
17 files changed, 704 insertions, 592 deletions
diff --git a/fs/ext4/Kconfig b/fs/ext4/Kconfig
index d5c0ea2e8f2d..9acf7e808139 100644
--- a/fs/ext4/Kconfig
+++ b/fs/ext4/Kconfig
@@ -26,19 +26,15 @@ config EXT4_FS
26 26
27 If unsure, say N. 27 If unsure, say N.
28 28
29config EXT4DEV_COMPAT 29config EXT4_USE_FOR_EXT23
30 bool "Enable ext4dev compatibility" 30 bool "Use ext4 for ext2/ext3 file systems"
31 depends on EXT4_FS 31 depends on EXT3_FS=n || EXT2_FS=n
32 default y
32 help 33 help
33 Starting with 2.6.28, the name of the ext4 filesystem was 34 Allow the ext4 file system driver code to be used for ext2 or
34 renamed from ext4dev to ext4. Unfortunately there are some 35 ext3 file system mounts. This allows users to reduce their
35 legacy userspace programs (such as klibc's fstype) have 36 compiled kernel size by using one file system driver for
36 "ext4dev" hardcoded. 37 ext2, ext3, and ext4 file systems.
37
38 To enable backwards compatibility so that systems that are
39 still expecting to mount ext4 filesystems using ext4dev,
40 choose Y here. This feature will go away by 2.6.31, so
41 please arrange to get your userspace programs fixed!
42 38
43config EXT4_FS_XATTR 39config EXT4_FS_XATTR
44 bool "Ext4 extended attributes" 40 bool "Ext4 extended attributes"
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 1d0418980f8d..22bc7435d913 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -499,44 +499,6 @@ error_return:
499} 499}
500 500
501/** 501/**
502 * ext4_free_blocks() -- Free given blocks and update quota
503 * @handle: handle for this transaction
504 * @inode: inode
505 * @block: start physical block to free
506 * @count: number of blocks to count
507 * @metadata: Are these metadata blocks
508 */
509void ext4_free_blocks(handle_t *handle, struct inode *inode,
510 ext4_fsblk_t block, unsigned long count,
511 int metadata)
512{
513 struct super_block *sb;
514 unsigned long dquot_freed_blocks;
515
516 /* this isn't the right place to decide whether block is metadata
517 * inode.c/extents.c knows better, but for safety ... */
518 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
519 metadata = 1;
520
521 /* We need to make sure we don't reuse
522 * block released untill the transaction commit.
523 * writeback mode have weak data consistency so
524 * don't force data as metadata when freeing block
525 * for writeback mode.
526 */
527 if (metadata == 0 && !ext4_should_writeback_data(inode))
528 metadata = 1;
529
530 sb = inode->i_sb;
531
532 ext4_mb_free_blocks(handle, inode, block, count,
533 metadata, &dquot_freed_blocks);
534 if (dquot_freed_blocks)
535 vfs_dq_free_block(inode, dquot_freed_blocks);
536 return;
537}
538
539/**
540 * ext4_has_free_blocks() 502 * ext4_has_free_blocks()
541 * @sbi: in-core super block structure. 503 * @sbi: in-core super block structure.
542 * @nblocks: number of needed blocks 504 * @nblocks: number of needed blocks
@@ -761,7 +723,13 @@ static unsigned long ext4_bg_num_gdb_meta(struct super_block *sb,
761static unsigned long ext4_bg_num_gdb_nometa(struct super_block *sb, 723static unsigned long ext4_bg_num_gdb_nometa(struct super_block *sb,
762 ext4_group_t group) 724 ext4_group_t group)
763{ 725{
764 return ext4_bg_has_super(sb, group) ? EXT4_SB(sb)->s_gdb_count : 0; 726 if (!ext4_bg_has_super(sb, group))
727 return 0;
728
729 if (EXT4_HAS_INCOMPAT_FEATURE(sb,EXT4_FEATURE_INCOMPAT_META_BG))
730 return le32_to_cpu(EXT4_SB(sb)->s_es->s_first_meta_bg);
731 else
732 return EXT4_SB(sb)->s_gdb_count;
765} 733}
766 734
767/** 735/**
diff --git a/fs/ext4/block_validity.c b/fs/ext4/block_validity.c
index 50784ef07563..4df8621ec31c 100644
--- a/fs/ext4/block_validity.c
+++ b/fs/ext4/block_validity.c
@@ -160,7 +160,7 @@ int ext4_setup_system_zone(struct super_block *sb)
160 if (ext4_bg_has_super(sb, i) && 160 if (ext4_bg_has_super(sb, i) &&
161 ((i < 5) || ((i % flex_size) == 0))) 161 ((i < 5) || ((i % flex_size) == 0)))
162 add_system_zone(sbi, ext4_group_first_block_no(sb, i), 162 add_system_zone(sbi, ext4_group_first_block_no(sb, i),
163 sbi->s_gdb_count + 1); 163 ext4_bg_num_gdb(sb, i) + 1);
164 gdp = ext4_get_group_desc(sb, i, NULL); 164 gdp = ext4_get_group_desc(sb, i, NULL);
165 ret = add_system_zone(sbi, ext4_block_bitmap(sb, gdp), 1); 165 ret = add_system_zone(sbi, ext4_block_bitmap(sb, gdp), 1);
166 if (ret) 166 if (ret)
@@ -228,6 +228,7 @@ int ext4_data_block_valid(struct ext4_sb_info *sbi, ext4_fsblk_t start_blk,
228 struct rb_node *n = sbi->system_blks.rb_node; 228 struct rb_node *n = sbi->system_blks.rb_node;
229 229
230 if ((start_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) || 230 if ((start_blk <= le32_to_cpu(sbi->s_es->s_first_data_block)) ||
231 (start_blk + count < start_blk) ||
231 (start_blk + count > ext4_blocks_count(sbi->s_es))) 232 (start_blk + count > ext4_blocks_count(sbi->s_es)))
232 return 0; 233 return 0;
233 while (n) { 234 while (n) {
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 984ca0cb38c3..ab31e65d46d0 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -322,6 +322,7 @@ static inline __u32 ext4_mask_flags(umode_t mode, __u32 flags)
322#define EXT4_STATE_NO_EXPAND 0x00000008 /* No space for expansion */ 322#define EXT4_STATE_NO_EXPAND 0x00000008 /* No space for expansion */
323#define EXT4_STATE_DA_ALLOC_CLOSE 0x00000010 /* Alloc DA blks on close */ 323#define EXT4_STATE_DA_ALLOC_CLOSE 0x00000010 /* Alloc DA blks on close */
324#define EXT4_STATE_EXT_MIGRATE 0x00000020 /* Inode is migrating */ 324#define EXT4_STATE_EXT_MIGRATE 0x00000020 /* Inode is migrating */
325#define EXT4_STATE_DIO_UNWRITTEN 0x00000040 /* need convert on dio done*/
325 326
326/* Used to pass group descriptor data when online resize is done */ 327/* Used to pass group descriptor data when online resize is done */
327struct ext4_new_group_input { 328struct ext4_new_group_input {
@@ -375,6 +376,12 @@ struct ext4_new_group_data {
375 EXT4_GET_BLOCKS_DIO_CREATE_EXT) 376 EXT4_GET_BLOCKS_DIO_CREATE_EXT)
376 377
377/* 378/*
379 * Flags used by ext4_free_blocks
380 */
381#define EXT4_FREE_BLOCKS_METADATA 0x0001
382#define EXT4_FREE_BLOCKS_FORGET 0x0002
383
384/*
378 * ioctl commands 385 * ioctl commands
379 */ 386 */
380#define EXT4_IOC_GETFLAGS FS_IOC_GETFLAGS 387#define EXT4_IOC_GETFLAGS FS_IOC_GETFLAGS
@@ -702,6 +709,13 @@ struct ext4_inode_info {
702 struct list_head i_aio_dio_complete_list; 709 struct list_head i_aio_dio_complete_list;
703 /* current io_end structure for async DIO write*/ 710 /* current io_end structure for async DIO write*/
704 ext4_io_end_t *cur_aio_dio; 711 ext4_io_end_t *cur_aio_dio;
712
713 /*
714 * Transactions that contain inode's metadata needed to complete
715 * fsync and fdatasync, respectively.
716 */
717 tid_t i_sync_tid;
718 tid_t i_datasync_tid;
705}; 719};
706 720
707/* 721/*
@@ -743,11 +757,13 @@ struct ext4_inode_info {
743#define EXT4_MOUNT_QUOTA 0x80000 /* Some quota option set */ 757#define EXT4_MOUNT_QUOTA 0x80000 /* Some quota option set */
744#define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */ 758#define EXT4_MOUNT_USRQUOTA 0x100000 /* "old" user quota */
745#define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */ 759#define EXT4_MOUNT_GRPQUOTA 0x200000 /* "old" group quota */
760#define EXT4_MOUNT_JOURNAL_CHECKSUM 0x800000 /* Journal checksums */
746#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */ 761#define EXT4_MOUNT_JOURNAL_ASYNC_COMMIT 0x1000000 /* Journal Async Commit */
747#define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */ 762#define EXT4_MOUNT_I_VERSION 0x2000000 /* i_version support */
748#define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */ 763#define EXT4_MOUNT_DELALLOC 0x8000000 /* Delalloc support */
749#define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */ 764#define EXT4_MOUNT_DATA_ERR_ABORT 0x10000000 /* Abort on file data write */
750#define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */ 765#define EXT4_MOUNT_BLOCK_VALIDITY 0x20000000 /* Block validity checking */
766#define EXT4_MOUNT_DISCARD 0x40000000 /* Issue DISCARD requests */
751 767
752#define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt 768#define clear_opt(o, opt) o &= ~EXT4_MOUNT_##opt
753#define set_opt(o, opt) o |= EXT4_MOUNT_##opt 769#define set_opt(o, opt) o |= EXT4_MOUNT_##opt
@@ -1322,8 +1338,6 @@ extern ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
1322 ext4_fsblk_t goal, unsigned long *count, int *errp); 1338 ext4_fsblk_t goal, unsigned long *count, int *errp);
1323extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi, s64 nblocks); 1339extern int ext4_claim_free_blocks(struct ext4_sb_info *sbi, s64 nblocks);
1324extern int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks); 1340extern int ext4_has_free_blocks(struct ext4_sb_info *sbi, s64 nblocks);
1325extern void ext4_free_blocks(handle_t *handle, struct inode *inode,
1326 ext4_fsblk_t block, unsigned long count, int metadata);
1327extern void ext4_add_groupblocks(handle_t *handle, struct super_block *sb, 1341extern void ext4_add_groupblocks(handle_t *handle, struct super_block *sb,
1328 ext4_fsblk_t block, unsigned long count); 1342 ext4_fsblk_t block, unsigned long count);
1329extern ext4_fsblk_t ext4_count_free_blocks(struct super_block *); 1343extern ext4_fsblk_t ext4_count_free_blocks(struct super_block *);
@@ -1382,16 +1396,15 @@ extern int ext4_mb_reserve_blocks(struct super_block *, int);
1382extern void ext4_discard_preallocations(struct inode *); 1396extern void ext4_discard_preallocations(struct inode *);
1383extern int __init init_ext4_mballoc(void); 1397extern int __init init_ext4_mballoc(void);
1384extern void exit_ext4_mballoc(void); 1398extern void exit_ext4_mballoc(void);
1385extern void ext4_mb_free_blocks(handle_t *, struct inode *, 1399extern void ext4_free_blocks(handle_t *handle, struct inode *inode,
1386 ext4_fsblk_t, unsigned long, int, unsigned long *); 1400 struct buffer_head *bh, ext4_fsblk_t block,
1401 unsigned long count, int flags);
1387extern int ext4_mb_add_groupinfo(struct super_block *sb, 1402extern int ext4_mb_add_groupinfo(struct super_block *sb,
1388 ext4_group_t i, struct ext4_group_desc *desc); 1403 ext4_group_t i, struct ext4_group_desc *desc);
1389extern int ext4_mb_get_buddy_cache_lock(struct super_block *, ext4_group_t); 1404extern int ext4_mb_get_buddy_cache_lock(struct super_block *, ext4_group_t);
1390extern void ext4_mb_put_buddy_cache_lock(struct super_block *, 1405extern void ext4_mb_put_buddy_cache_lock(struct super_block *,
1391 ext4_group_t, int); 1406 ext4_group_t, int);
1392/* inode.c */ 1407/* inode.c */
1393int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
1394 struct buffer_head *bh, ext4_fsblk_t blocknr);
1395struct buffer_head *ext4_getblk(handle_t *, struct inode *, 1408struct buffer_head *ext4_getblk(handle_t *, struct inode *,
1396 ext4_lblk_t, int, int *); 1409 ext4_lblk_t, int, int *);
1397struct buffer_head *ext4_bread(handle_t *, struct inode *, 1410struct buffer_head *ext4_bread(handle_t *, struct inode *,
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index 6a9409920dee..b57e5c711b6d 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -4,6 +4,8 @@
4 4
5#include "ext4_jbd2.h" 5#include "ext4_jbd2.h"
6 6
7#include <trace/events/ext4.h>
8
7int __ext4_journal_get_undo_access(const char *where, handle_t *handle, 9int __ext4_journal_get_undo_access(const char *where, handle_t *handle,
8 struct buffer_head *bh) 10 struct buffer_head *bh)
9{ 11{
@@ -32,35 +34,69 @@ int __ext4_journal_get_write_access(const char *where, handle_t *handle,
32 return err; 34 return err;
33} 35}
34 36
35int __ext4_journal_forget(const char *where, handle_t *handle, 37/*
36 struct buffer_head *bh) 38 * The ext4 forget function must perform a revoke if we are freeing data
39 * which has been journaled. Metadata (eg. indirect blocks) must be
40 * revoked in all cases.
41 *
42 * "bh" may be NULL: a metadata block may have been freed from memory
43 * but there may still be a record of it in the journal, and that record
44 * still needs to be revoked.
45 *
46 * If the handle isn't valid we're not journaling, but we still need to
47 * call into ext4_journal_revoke() to put the buffer head.
48 */
49int __ext4_forget(const char *where, handle_t *handle, int is_metadata,
50 struct inode *inode, struct buffer_head *bh,
51 ext4_fsblk_t blocknr)
37{ 52{
38 int err = 0; 53 int err;
39 54
40 if (ext4_handle_valid(handle)) { 55 might_sleep();
41 err = jbd2_journal_forget(handle, bh); 56
42 if (err) 57 trace_ext4_forget(inode, is_metadata, blocknr);
43 ext4_journal_abort_handle(where, __func__, bh, 58 BUFFER_TRACE(bh, "enter");
44 handle, err); 59
45 } 60 jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, "
46 else 61 "data mode %x\n",
62 bh, is_metadata, inode->i_mode,
63 test_opt(inode->i_sb, DATA_FLAGS));
64
65 /* In the no journal case, we can just do a bforget and return */
66 if (!ext4_handle_valid(handle)) {
47 bforget(bh); 67 bforget(bh);
48 return err; 68 return 0;
49} 69 }
50 70
51int __ext4_journal_revoke(const char *where, handle_t *handle, 71 /* Never use the revoke function if we are doing full data
52 ext4_fsblk_t blocknr, struct buffer_head *bh) 72 * journaling: there is no need to, and a V1 superblock won't
53{ 73 * support it. Otherwise, only skip the revoke on un-journaled
54 int err = 0; 74 * data blocks. */
55 75
56 if (ext4_handle_valid(handle)) { 76 if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ||
57 err = jbd2_journal_revoke(handle, blocknr, bh); 77 (!is_metadata && !ext4_should_journal_data(inode))) {
58 if (err) 78 if (bh) {
59 ext4_journal_abort_handle(where, __func__, bh, 79 BUFFER_TRACE(bh, "call jbd2_journal_forget");
60 handle, err); 80 err = jbd2_journal_forget(handle, bh);
81 if (err)
82 ext4_journal_abort_handle(where, __func__, bh,
83 handle, err);
84 return err;
85 }
86 return 0;
61 } 87 }
62 else 88
63 bforget(bh); 89 /*
90 * data!=journal && (is_metadata || should_journal_data(inode))
91 */
92 BUFFER_TRACE(bh, "call jbd2_journal_revoke");
93 err = jbd2_journal_revoke(handle, blocknr, bh);
94 if (err) {
95 ext4_journal_abort_handle(where, __func__, bh, handle, err);
96 ext4_abort(inode->i_sb, __func__,
97 "error %d when attempting revoke", err);
98 }
99 BUFFER_TRACE(bh, "exit");
64 return err; 100 return err;
65} 101}
66 102
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index a2865980342f..05eca817d704 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -49,7 +49,7 @@
49 49
50#define EXT4_DATA_TRANS_BLOCKS(sb) (EXT4_SINGLEDATA_TRANS_BLOCKS(sb) + \ 50#define EXT4_DATA_TRANS_BLOCKS(sb) (EXT4_SINGLEDATA_TRANS_BLOCKS(sb) + \
51 EXT4_XATTR_TRANS_BLOCKS - 2 + \ 51 EXT4_XATTR_TRANS_BLOCKS - 2 + \
52 2*EXT4_QUOTA_TRANS_BLOCKS(sb)) 52 EXT4_MAXQUOTAS_TRANS_BLOCKS(sb))
53 53
54/* 54/*
55 * Define the number of metadata blocks we need to account to modify data. 55 * Define the number of metadata blocks we need to account to modify data.
@@ -57,7 +57,7 @@
57 * This include super block, inode block, quota blocks and xattr blocks 57 * This include super block, inode block, quota blocks and xattr blocks
58 */ 58 */
59#define EXT4_META_TRANS_BLOCKS(sb) (EXT4_XATTR_TRANS_BLOCKS + \ 59#define EXT4_META_TRANS_BLOCKS(sb) (EXT4_XATTR_TRANS_BLOCKS + \
60 2*EXT4_QUOTA_TRANS_BLOCKS(sb)) 60 EXT4_MAXQUOTAS_TRANS_BLOCKS(sb))
61 61
62/* Delete operations potentially hit one directory's namespace plus an 62/* Delete operations potentially hit one directory's namespace plus an
63 * entire inode, plus arbitrary amounts of bitmap/indirection data. Be 63 * entire inode, plus arbitrary amounts of bitmap/indirection data. Be
@@ -92,6 +92,7 @@
92 * but inode, sb and group updates are done only once */ 92 * but inode, sb and group updates are done only once */
93#define EXT4_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\ 93#define EXT4_QUOTA_INIT_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_INIT_ALLOC*\
94 (EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_INIT_REWRITE) : 0) 94 (EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_INIT_REWRITE) : 0)
95
95#define EXT4_QUOTA_DEL_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_DEL_ALLOC*\ 96#define EXT4_QUOTA_DEL_BLOCKS(sb) (test_opt(sb, QUOTA) ? (DQUOT_DEL_ALLOC*\
96 (EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_DEL_REWRITE) : 0) 97 (EXT4_SINGLEDATA_TRANS_BLOCKS(sb)-3)+3+DQUOT_DEL_REWRITE) : 0)
97#else 98#else
@@ -99,6 +100,9 @@
99#define EXT4_QUOTA_INIT_BLOCKS(sb) 0 100#define EXT4_QUOTA_INIT_BLOCKS(sb) 0
100#define EXT4_QUOTA_DEL_BLOCKS(sb) 0 101#define EXT4_QUOTA_DEL_BLOCKS(sb) 0
101#endif 102#endif
103#define EXT4_MAXQUOTAS_TRANS_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_TRANS_BLOCKS(sb))
104#define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb))
105#define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb))
102 106
103int 107int
104ext4_mark_iloc_dirty(handle_t *handle, 108ext4_mark_iloc_dirty(handle_t *handle,
@@ -116,12 +120,8 @@ int ext4_reserve_inode_write(handle_t *handle, struct inode *inode,
116int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode); 120int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode);
117 121
118/* 122/*
119 * Wrapper functions with which ext4 calls into JBD. The intent here is 123 * Wrapper functions with which ext4 calls into JBD.
120 * to allow these to be turned into appropriate stubs so ext4 can control
121 * ext2 filesystems, so ext2+ext4 systems only nee one fs. This work hasn't
122 * been done yet.
123 */ 124 */
124
125void ext4_journal_abort_handle(const char *caller, const char *err_fn, 125void ext4_journal_abort_handle(const char *caller, const char *err_fn,
126 struct buffer_head *bh, handle_t *handle, int err); 126 struct buffer_head *bh, handle_t *handle, int err);
127 127
@@ -131,13 +131,9 @@ int __ext4_journal_get_undo_access(const char *where, handle_t *handle,
131int __ext4_journal_get_write_access(const char *where, handle_t *handle, 131int __ext4_journal_get_write_access(const char *where, handle_t *handle,
132 struct buffer_head *bh); 132 struct buffer_head *bh);
133 133
134/* When called with an invalid handle, this will still do a put on the BH */ 134int __ext4_forget(const char *where, handle_t *handle, int is_metadata,
135int __ext4_journal_forget(const char *where, handle_t *handle, 135 struct inode *inode, struct buffer_head *bh,
136 struct buffer_head *bh); 136 ext4_fsblk_t blocknr);
137
138/* When called with an invalid handle, this will still do a put on the BH */
139int __ext4_journal_revoke(const char *where, handle_t *handle,
140 ext4_fsblk_t blocknr, struct buffer_head *bh);
141 137
142int __ext4_journal_get_create_access(const char *where, 138int __ext4_journal_get_create_access(const char *where,
143 handle_t *handle, struct buffer_head *bh); 139 handle_t *handle, struct buffer_head *bh);
@@ -149,12 +145,11 @@ int __ext4_handle_dirty_metadata(const char *where, handle_t *handle,
149 __ext4_journal_get_undo_access(__func__, (handle), (bh)) 145 __ext4_journal_get_undo_access(__func__, (handle), (bh))
150#define ext4_journal_get_write_access(handle, bh) \ 146#define ext4_journal_get_write_access(handle, bh) \
151 __ext4_journal_get_write_access(__func__, (handle), (bh)) 147 __ext4_journal_get_write_access(__func__, (handle), (bh))
152#define ext4_journal_revoke(handle, blocknr, bh) \ 148#define ext4_forget(handle, is_metadata, inode, bh, block_nr) \
153 __ext4_journal_revoke(__func__, (handle), (blocknr), (bh)) 149 __ext4_forget(__func__, (handle), (is_metadata), (inode), (bh),\
150 (block_nr))
154#define ext4_journal_get_create_access(handle, bh) \ 151#define ext4_journal_get_create_access(handle, bh) \
155 __ext4_journal_get_create_access(__func__, (handle), (bh)) 152 __ext4_journal_get_create_access(__func__, (handle), (bh))
156#define ext4_journal_forget(handle, bh) \
157 __ext4_journal_forget(__func__, (handle), (bh))
158#define ext4_handle_dirty_metadata(handle, inode, bh) \ 153#define ext4_handle_dirty_metadata(handle, inode, bh) \
159 __ext4_handle_dirty_metadata(__func__, (handle), (inode), (bh)) 154 __ext4_handle_dirty_metadata(__func__, (handle), (inode), (bh))
160 155
@@ -254,6 +249,19 @@ static inline int ext4_jbd2_file_inode(handle_t *handle, struct inode *inode)
254 return 0; 249 return 0;
255} 250}
256 251
252static inline void ext4_update_inode_fsync_trans(handle_t *handle,
253 struct inode *inode,
254 int datasync)
255{
256 struct ext4_inode_info *ei = EXT4_I(inode);
257
258 if (ext4_handle_valid(handle)) {
259 ei->i_sync_tid = handle->h_transaction->t_tid;
260 if (datasync)
261 ei->i_datasync_tid = handle->h_transaction->t_tid;
262 }
263}
264
257/* super.c */ 265/* super.c */
258int ext4_force_commit(struct super_block *sb); 266int ext4_force_commit(struct super_block *sb);
259 267
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 10539e364283..3a7928f825e4 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -1007,7 +1007,8 @@ cleanup:
1007 for (i = 0; i < depth; i++) { 1007 for (i = 0; i < depth; i++) {
1008 if (!ablocks[i]) 1008 if (!ablocks[i])
1009 continue; 1009 continue;
1010 ext4_free_blocks(handle, inode, ablocks[i], 1, 1); 1010 ext4_free_blocks(handle, inode, 0, ablocks[i], 1,
1011 EXT4_FREE_BLOCKS_METADATA);
1011 } 1012 }
1012 } 1013 }
1013 kfree(ablocks); 1014 kfree(ablocks);
@@ -1761,7 +1762,9 @@ int ext4_ext_walk_space(struct inode *inode, ext4_lblk_t block,
1761 while (block < last && block != EXT_MAX_BLOCK) { 1762 while (block < last && block != EXT_MAX_BLOCK) {
1762 num = last - block; 1763 num = last - block;
1763 /* find extent for this block */ 1764 /* find extent for this block */
1765 down_read(&EXT4_I(inode)->i_data_sem);
1764 path = ext4_ext_find_extent(inode, block, path); 1766 path = ext4_ext_find_extent(inode, block, path);
1767 up_read(&EXT4_I(inode)->i_data_sem);
1765 if (IS_ERR(path)) { 1768 if (IS_ERR(path)) {
1766 err = PTR_ERR(path); 1769 err = PTR_ERR(path);
1767 path = NULL; 1770 path = NULL;
@@ -1957,7 +1960,6 @@ errout:
1957static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode, 1960static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
1958 struct ext4_ext_path *path) 1961 struct ext4_ext_path *path)
1959{ 1962{
1960 struct buffer_head *bh;
1961 int err; 1963 int err;
1962 ext4_fsblk_t leaf; 1964 ext4_fsblk_t leaf;
1963 1965
@@ -1973,9 +1975,8 @@ static int ext4_ext_rm_idx(handle_t *handle, struct inode *inode,
1973 if (err) 1975 if (err)
1974 return err; 1976 return err;
1975 ext_debug("index is empty, remove it, free block %llu\n", leaf); 1977 ext_debug("index is empty, remove it, free block %llu\n", leaf);
1976 bh = sb_find_get_block(inode->i_sb, leaf); 1978 ext4_free_blocks(handle, inode, 0, leaf, 1,
1977 ext4_forget(handle, 1, inode, bh, leaf); 1979 EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
1978 ext4_free_blocks(handle, inode, leaf, 1, 1);
1979 return err; 1980 return err;
1980} 1981}
1981 1982
@@ -2042,12 +2043,11 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
2042 struct ext4_extent *ex, 2043 struct ext4_extent *ex,
2043 ext4_lblk_t from, ext4_lblk_t to) 2044 ext4_lblk_t from, ext4_lblk_t to)
2044{ 2045{
2045 struct buffer_head *bh;
2046 unsigned short ee_len = ext4_ext_get_actual_len(ex); 2046 unsigned short ee_len = ext4_ext_get_actual_len(ex);
2047 int i, metadata = 0; 2047 int flags = EXT4_FREE_BLOCKS_FORGET;
2048 2048
2049 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) 2049 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
2050 metadata = 1; 2050 flags |= EXT4_FREE_BLOCKS_METADATA;
2051#ifdef EXTENTS_STATS 2051#ifdef EXTENTS_STATS
2052 { 2052 {
2053 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 2053 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
@@ -2072,11 +2072,7 @@ static int ext4_remove_blocks(handle_t *handle, struct inode *inode,
2072 num = le32_to_cpu(ex->ee_block) + ee_len - from; 2072 num = le32_to_cpu(ex->ee_block) + ee_len - from;
2073 start = ext_pblock(ex) + ee_len - num; 2073 start = ext_pblock(ex) + ee_len - num;
2074 ext_debug("free last %u blocks starting %llu\n", num, start); 2074 ext_debug("free last %u blocks starting %llu\n", num, start);
2075 for (i = 0; i < num; i++) { 2075 ext4_free_blocks(handle, inode, 0, start, num, flags);
2076 bh = sb_find_get_block(inode->i_sb, start + i);
2077 ext4_forget(handle, 0, inode, bh, start + i);
2078 }
2079 ext4_free_blocks(handle, inode, start, num, metadata);
2080 } else if (from == le32_to_cpu(ex->ee_block) 2076 } else if (from == le32_to_cpu(ex->ee_block)
2081 && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) { 2077 && to <= le32_to_cpu(ex->ee_block) + ee_len - 1) {
2082 printk(KERN_INFO "strange request: removal %u-%u from %u:%u\n", 2078 printk(KERN_INFO "strange request: removal %u-%u from %u:%u\n",
@@ -2167,7 +2163,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
2167 correct_index = 1; 2163 correct_index = 1;
2168 credits += (ext_depth(inode)) + 1; 2164 credits += (ext_depth(inode)) + 1;
2169 } 2165 }
2170 credits += 2 * EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); 2166 credits += EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb);
2171 2167
2172 err = ext4_ext_truncate_extend_restart(handle, inode, credits); 2168 err = ext4_ext_truncate_extend_restart(handle, inode, credits);
2173 if (err) 2169 if (err)
@@ -2807,6 +2803,8 @@ fix_extent_len:
2807 * into three uninitialized extent(at most). After IO complete, the part 2803 * into three uninitialized extent(at most). After IO complete, the part
2808 * being filled will be convert to initialized by the end_io callback function 2804 * being filled will be convert to initialized by the end_io callback function
2809 * via ext4_convert_unwritten_extents(). 2805 * via ext4_convert_unwritten_extents().
2806 *
2807 * Returns the size of uninitialized extent to be written on success.
2810 */ 2808 */
2811static int ext4_split_unwritten_extents(handle_t *handle, 2809static int ext4_split_unwritten_extents(handle_t *handle,
2812 struct inode *inode, 2810 struct inode *inode,
@@ -2824,7 +2822,6 @@ static int ext4_split_unwritten_extents(handle_t *handle,
2824 unsigned int allocated, ee_len, depth; 2822 unsigned int allocated, ee_len, depth;
2825 ext4_fsblk_t newblock; 2823 ext4_fsblk_t newblock;
2826 int err = 0; 2824 int err = 0;
2827 int ret = 0;
2828 2825
2829 ext_debug("ext4_split_unwritten_extents: inode %lu," 2826 ext_debug("ext4_split_unwritten_extents: inode %lu,"
2830 "iblock %llu, max_blocks %u\n", inode->i_ino, 2827 "iblock %llu, max_blocks %u\n", inode->i_ino,
@@ -2842,12 +2839,12 @@ static int ext4_split_unwritten_extents(handle_t *handle,
2842 ext4_ext_store_pblock(&orig_ex, ext_pblock(ex)); 2839 ext4_ext_store_pblock(&orig_ex, ext_pblock(ex));
2843 2840
2844 /* 2841 /*
2845 * if the entire unintialized extent length less than 2842 * If the uninitialized extent begins at the same logical
2846 * the size of extent to write, there is no need to split 2843 * block where the write begins, and the write completely
2847 * uninitialized extent 2844 * covers the extent, then we don't need to split it.
2848 */ 2845 */
2849 if (allocated <= max_blocks) 2846 if ((iblock == ee_block) && (allocated <= max_blocks))
2850 return ret; 2847 return allocated;
2851 2848
2852 err = ext4_ext_get_access(handle, inode, path + depth); 2849 err = ext4_ext_get_access(handle, inode, path + depth);
2853 if (err) 2850 if (err)
@@ -3048,15 +3045,23 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3048 ret = ext4_split_unwritten_extents(handle, 3045 ret = ext4_split_unwritten_extents(handle,
3049 inode, path, iblock, 3046 inode, path, iblock,
3050 max_blocks, flags); 3047 max_blocks, flags);
3051 /* flag the io_end struct that we need convert when IO done */ 3048 /*
3049 * Flag the inode(non aio case) or end_io struct (aio case)
3050 * that this IO needs to convertion to written when IO is
3051 * completed
3052 */
3052 if (io) 3053 if (io)
3053 io->flag = DIO_AIO_UNWRITTEN; 3054 io->flag = DIO_AIO_UNWRITTEN;
3055 else
3056 EXT4_I(inode)->i_state |= EXT4_STATE_DIO_UNWRITTEN;
3054 goto out; 3057 goto out;
3055 } 3058 }
3056 /* DIO end_io complete, convert the filled extent to written */ 3059 /* async DIO end_io complete, convert the filled extent to written */
3057 if (flags == EXT4_GET_BLOCKS_DIO_CONVERT_EXT) { 3060 if (flags == EXT4_GET_BLOCKS_DIO_CONVERT_EXT) {
3058 ret = ext4_convert_unwritten_extents_dio(handle, inode, 3061 ret = ext4_convert_unwritten_extents_dio(handle, inode,
3059 path); 3062 path);
3063 if (ret >= 0)
3064 ext4_update_inode_fsync_trans(handle, inode, 1);
3060 goto out2; 3065 goto out2;
3061 } 3066 }
3062 /* buffered IO case */ 3067 /* buffered IO case */
@@ -3084,6 +3089,8 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3084 ret = ext4_ext_convert_to_initialized(handle, inode, 3089 ret = ext4_ext_convert_to_initialized(handle, inode,
3085 path, iblock, 3090 path, iblock,
3086 max_blocks); 3091 max_blocks);
3092 if (ret >= 0)
3093 ext4_update_inode_fsync_trans(handle, inode, 1);
3087out: 3094out:
3088 if (ret <= 0) { 3095 if (ret <= 0) {
3089 err = ret; 3096 err = ret;
@@ -3295,10 +3302,16 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
3295 * To avoid unecessary convertion for every aio dio rewrite 3302 * To avoid unecessary convertion for every aio dio rewrite
3296 * to the mid of file, here we flag the IO that is really 3303 * to the mid of file, here we flag the IO that is really
3297 * need the convertion. 3304 * need the convertion.
3298 * 3305 * For non asycn direct IO case, flag the inode state
3306 * that we need to perform convertion when IO is done.
3299 */ 3307 */
3300 if (io && flags == EXT4_GET_BLOCKS_DIO_CREATE_EXT) 3308 if (flags == EXT4_GET_BLOCKS_DIO_CREATE_EXT) {
3301 io->flag = DIO_AIO_UNWRITTEN; 3309 if (io)
3310 io->flag = DIO_AIO_UNWRITTEN;
3311 else
3312 EXT4_I(inode)->i_state |=
3313 EXT4_STATE_DIO_UNWRITTEN;;
3314 }
3302 } 3315 }
3303 err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); 3316 err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
3304 if (err) { 3317 if (err) {
@@ -3306,8 +3319,8 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
3306 /* not a good idea to call discard here directly, 3319 /* not a good idea to call discard here directly,
3307 * but otherwise we'd need to call it every free() */ 3320 * but otherwise we'd need to call it every free() */
3308 ext4_discard_preallocations(inode); 3321 ext4_discard_preallocations(inode);
3309 ext4_free_blocks(handle, inode, ext_pblock(&newex), 3322 ext4_free_blocks(handle, inode, 0, ext_pblock(&newex),
3310 ext4_ext_get_actual_len(&newex), 0); 3323 ext4_ext_get_actual_len(&newex), 0);
3311 goto out2; 3324 goto out2;
3312 } 3325 }
3313 3326
@@ -3316,10 +3329,16 @@ int ext4_ext_get_blocks(handle_t *handle, struct inode *inode,
3316 allocated = ext4_ext_get_actual_len(&newex); 3329 allocated = ext4_ext_get_actual_len(&newex);
3317 set_buffer_new(bh_result); 3330 set_buffer_new(bh_result);
3318 3331
3319 /* Cache only when it is _not_ an uninitialized extent */ 3332 /*
3320 if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) 3333 * Cache the extent and update transaction to commit on fdatasync only
3334 * when it is _not_ an uninitialized extent.
3335 */
3336 if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) {
3321 ext4_ext_put_in_cache(inode, iblock, allocated, newblock, 3337 ext4_ext_put_in_cache(inode, iblock, allocated, newblock,
3322 EXT4_EXT_CACHE_EXTENT); 3338 EXT4_EXT_CACHE_EXTENT);
3339 ext4_update_inode_fsync_trans(handle, inode, 1);
3340 } else
3341 ext4_update_inode_fsync_trans(handle, inode, 0);
3323out: 3342out:
3324 if (allocated > max_blocks) 3343 if (allocated > max_blocks)
3325 allocated = max_blocks; 3344 allocated = max_blocks;
@@ -3519,6 +3538,7 @@ retry:
3519 * 3538 *
3520 * This function is called from the direct IO end io call back 3539 * This function is called from the direct IO end io call back
3521 * function, to convert the fallocated extents after IO is completed. 3540 * function, to convert the fallocated extents after IO is completed.
3541 * Returns 0 on success.
3522 */ 3542 */
3523int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset, 3543int ext4_convert_unwritten_extents(struct inode *inode, loff_t offset,
3524 loff_t len) 3544 loff_t len)
@@ -3706,10 +3726,8 @@ int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
3706 * Walk the extent tree gathering extent information. 3726 * Walk the extent tree gathering extent information.
3707 * ext4_ext_fiemap_cb will push extents back to user. 3727 * ext4_ext_fiemap_cb will push extents back to user.
3708 */ 3728 */
3709 down_read(&EXT4_I(inode)->i_data_sem);
3710 error = ext4_ext_walk_space(inode, start_blk, len_blks, 3729 error = ext4_ext_walk_space(inode, start_blk, len_blks,
3711 ext4_ext_fiemap_cb, fieinfo); 3730 ext4_ext_fiemap_cb, fieinfo);
3712 up_read(&EXT4_I(inode)->i_data_sem);
3713 } 3731 }
3714 3732
3715 return error; 3733 return error;
diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 2b1531266ee2..0b22497d92e1 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -51,25 +51,30 @@
51int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync) 51int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
52{ 52{
53 struct inode *inode = dentry->d_inode; 53 struct inode *inode = dentry->d_inode;
54 struct ext4_inode_info *ei = EXT4_I(inode);
54 journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; 55 journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
55 int err, ret = 0; 56 int ret;
57 tid_t commit_tid;
56 58
57 J_ASSERT(ext4_journal_current_handle() == NULL); 59 J_ASSERT(ext4_journal_current_handle() == NULL);
58 60
59 trace_ext4_sync_file(file, dentry, datasync); 61 trace_ext4_sync_file(file, dentry, datasync);
60 62
63 if (inode->i_sb->s_flags & MS_RDONLY)
64 return 0;
65
61 ret = flush_aio_dio_completed_IO(inode); 66 ret = flush_aio_dio_completed_IO(inode);
62 if (ret < 0) 67 if (ret < 0)
63 goto out; 68 return ret;
69
70 if (!journal)
71 return simple_fsync(file, dentry, datasync);
72
64 /* 73 /*
65 * data=writeback: 74 * data=writeback,ordered:
66 * The caller's filemap_fdatawrite()/wait will sync the data. 75 * The caller's filemap_fdatawrite()/wait will sync the data.
67 * sync_inode() will sync the metadata 76 * Metadata is in the journal, we wait for proper transaction to
68 * 77 * commit here.
69 * data=ordered:
70 * The caller's filemap_fdatawrite() will write the data and
71 * sync_inode() will write the inode if it is dirty. Then the caller's
72 * filemap_fdatawait() will wait on the pages.
73 * 78 *
74 * data=journal: 79 * data=journal:
75 * filemap_fdatawrite won't do anything (the buffers are clean). 80 * filemap_fdatawrite won't do anything (the buffers are clean).
@@ -79,32 +84,13 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
79 * (they were dirtied by commit). But that's OK - the blocks are 84 * (they were dirtied by commit). But that's OK - the blocks are
80 * safe in-journal, which is all fsync() needs to ensure. 85 * safe in-journal, which is all fsync() needs to ensure.
81 */ 86 */
82 if (ext4_should_journal_data(inode)) { 87 if (ext4_should_journal_data(inode))
83 ret = ext4_force_commit(inode->i_sb); 88 return ext4_force_commit(inode->i_sb);
84 goto out;
85 }
86 89
87 if (!journal) 90 commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid;
88 ret = sync_mapping_buffers(inode->i_mapping); 91 if (jbd2_log_start_commit(journal, commit_tid))
89 92 jbd2_log_wait_commit(journal, commit_tid);
90 if (datasync && !(inode->i_state & I_DIRTY_DATASYNC)) 93 else if (journal->j_flags & JBD2_BARRIER)
91 goto out;
92
93 /*
94 * The VFS has written the file data. If the inode is unaltered
95 * then we need not start a commit.
96 */
97 if (inode->i_state & (I_DIRTY_SYNC|I_DIRTY_DATASYNC)) {
98 struct writeback_control wbc = {
99 .sync_mode = WB_SYNC_ALL,
100 .nr_to_write = 0, /* sys_fsync did this */
101 };
102 err = sync_inode(inode, &wbc);
103 if (ret == 0)
104 ret = err;
105 }
106out:
107 if (journal && (journal->j_flags & JBD2_BARRIER))
108 blkdev_issue_flush(inode->i_sb->s_bdev, NULL); 94 blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
109 return ret; 95 return ret;
110} 96}
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index ec367bce7215..5352db1a3086 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -71,58 +71,6 @@ static int ext4_inode_is_fast_symlink(struct inode *inode)
71} 71}
72 72
73/* 73/*
74 * The ext4 forget function must perform a revoke if we are freeing data
75 * which has been journaled. Metadata (eg. indirect blocks) must be
76 * revoked in all cases.
77 *
78 * "bh" may be NULL: a metadata block may have been freed from memory
79 * but there may still be a record of it in the journal, and that record
80 * still needs to be revoked.
81 *
82 * If the handle isn't valid we're not journaling, but we still need to
83 * call into ext4_journal_revoke() to put the buffer head.
84 */
85int ext4_forget(handle_t *handle, int is_metadata, struct inode *inode,
86 struct buffer_head *bh, ext4_fsblk_t blocknr)
87{
88 int err;
89
90 might_sleep();
91
92 BUFFER_TRACE(bh, "enter");
93
94 jbd_debug(4, "forgetting bh %p: is_metadata = %d, mode %o, "
95 "data mode %x\n",
96 bh, is_metadata, inode->i_mode,
97 test_opt(inode->i_sb, DATA_FLAGS));
98
99 /* Never use the revoke function if we are doing full data
100 * journaling: there is no need to, and a V1 superblock won't
101 * support it. Otherwise, only skip the revoke on un-journaled
102 * data blocks. */
103
104 if (test_opt(inode->i_sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA ||
105 (!is_metadata && !ext4_should_journal_data(inode))) {
106 if (bh) {
107 BUFFER_TRACE(bh, "call jbd2_journal_forget");
108 return ext4_journal_forget(handle, bh);
109 }
110 return 0;
111 }
112
113 /*
114 * data!=journal && (is_metadata || should_journal_data(inode))
115 */
116 BUFFER_TRACE(bh, "call ext4_journal_revoke");
117 err = ext4_journal_revoke(handle, blocknr, bh);
118 if (err)
119 ext4_abort(inode->i_sb, __func__,
120 "error %d when attempting revoke", err);
121 BUFFER_TRACE(bh, "exit");
122 return err;
123}
124
125/*
126 * Work out how many blocks we need to proceed with the next chunk of a 74 * Work out how many blocks we need to proceed with the next chunk of a
127 * truncate transaction. 75 * truncate transaction.
128 */ 76 */
@@ -193,7 +141,7 @@ static int try_to_extend_transaction(handle_t *handle, struct inode *inode)
193 * so before we call here everything must be consistently dirtied against 141 * so before we call here everything must be consistently dirtied against
194 * this transaction. 142 * this transaction.
195 */ 143 */
196 int ext4_truncate_restart_trans(handle_t *handle, struct inode *inode, 144int ext4_truncate_restart_trans(handle_t *handle, struct inode *inode,
197 int nblocks) 145 int nblocks)
198{ 146{
199 int ret; 147 int ret;
@@ -209,6 +157,7 @@ static int try_to_extend_transaction(handle_t *handle, struct inode *inode)
209 up_write(&EXT4_I(inode)->i_data_sem); 157 up_write(&EXT4_I(inode)->i_data_sem);
210 ret = ext4_journal_restart(handle, blocks_for_truncate(inode)); 158 ret = ext4_journal_restart(handle, blocks_for_truncate(inode));
211 down_write(&EXT4_I(inode)->i_data_sem); 159 down_write(&EXT4_I(inode)->i_data_sem);
160 ext4_discard_preallocations(inode);
212 161
213 return ret; 162 return ret;
214} 163}
@@ -720,7 +669,7 @@ allocated:
720 return ret; 669 return ret;
721failed_out: 670failed_out:
722 for (i = 0; i < index; i++) 671 for (i = 0; i < index; i++)
723 ext4_free_blocks(handle, inode, new_blocks[i], 1, 0); 672 ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, 0);
724 return ret; 673 return ret;
725} 674}
726 675
@@ -816,14 +765,20 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
816 return err; 765 return err;
817failed: 766failed:
818 /* Allocation failed, free what we already allocated */ 767 /* Allocation failed, free what we already allocated */
768 ext4_free_blocks(handle, inode, 0, new_blocks[0], 1, 0);
819 for (i = 1; i <= n ; i++) { 769 for (i = 1; i <= n ; i++) {
820 BUFFER_TRACE(branch[i].bh, "call jbd2_journal_forget"); 770 /*
821 ext4_journal_forget(handle, branch[i].bh); 771 * branch[i].bh is newly allocated, so there is no
772 * need to revoke the block, which is why we don't
773 * need to set EXT4_FREE_BLOCKS_METADATA.
774 */
775 ext4_free_blocks(handle, inode, 0, new_blocks[i], 1,
776 EXT4_FREE_BLOCKS_FORGET);
822 } 777 }
823 for (i = 0; i < indirect_blks; i++) 778 for (i = n+1; i < indirect_blks; i++)
824 ext4_free_blocks(handle, inode, new_blocks[i], 1, 0); 779 ext4_free_blocks(handle, inode, 0, new_blocks[i], 1, 0);
825 780
826 ext4_free_blocks(handle, inode, new_blocks[i], num, 0); 781 ext4_free_blocks(handle, inode, 0, new_blocks[i], num, 0);
827 782
828 return err; 783 return err;
829} 784}
@@ -902,12 +857,16 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode,
902 857
903err_out: 858err_out:
904 for (i = 1; i <= num; i++) { 859 for (i = 1; i <= num; i++) {
905 BUFFER_TRACE(where[i].bh, "call jbd2_journal_forget"); 860 /*
906 ext4_journal_forget(handle, where[i].bh); 861 * branch[i].bh is newly allocated, so there is no
907 ext4_free_blocks(handle, inode, 862 * need to revoke the block, which is why we don't
908 le32_to_cpu(where[i-1].key), 1, 0); 863 * need to set EXT4_FREE_BLOCKS_METADATA.
864 */
865 ext4_free_blocks(handle, inode, where[i].bh, 0, 1,
866 EXT4_FREE_BLOCKS_FORGET);
909 } 867 }
910 ext4_free_blocks(handle, inode, le32_to_cpu(where[num].key), blks, 0); 868 ext4_free_blocks(handle, inode, 0, le32_to_cpu(where[num].key),
869 blks, 0);
911 870
912 return err; 871 return err;
913} 872}
@@ -1020,10 +979,12 @@ static int ext4_ind_get_blocks(handle_t *handle, struct inode *inode,
1020 if (!err) 979 if (!err)
1021 err = ext4_splice_branch(handle, inode, iblock, 980 err = ext4_splice_branch(handle, inode, iblock,
1022 partial, indirect_blks, count); 981 partial, indirect_blks, count);
1023 else 982 if (err)
1024 goto cleanup; 983 goto cleanup;
1025 984
1026 set_buffer_new(bh_result); 985 set_buffer_new(bh_result);
986
987 ext4_update_inode_fsync_trans(handle, inode, 1);
1027got_it: 988got_it:
1028 map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key)); 989 map_bh(bh_result, inode->i_sb, le32_to_cpu(chain[depth-1].key));
1029 if (count > blocks_to_boundary) 990 if (count > blocks_to_boundary)
@@ -1051,7 +1012,7 @@ qsize_t ext4_get_reserved_space(struct inode *inode)
1051 EXT4_I(inode)->i_reserved_meta_blocks; 1012 EXT4_I(inode)->i_reserved_meta_blocks;
1052 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 1013 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
1053 1014
1054 return total; 1015 return (total << inode->i_blkbits);
1055} 1016}
1056/* 1017/*
1057 * Calculate the number of metadata blocks need to reserve 1018 * Calculate the number of metadata blocks need to reserve
@@ -1146,8 +1107,8 @@ static int check_block_validity(struct inode *inode, const char *msg,
1146} 1107}
1147 1108
1148/* 1109/*
1149 * Return the number of dirty pages in the given inode starting at 1110 * Return the number of contiguous dirty pages in a given inode
1150 * page frame idx. 1111 * starting at page frame idx.
1151 */ 1112 */
1152static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx, 1113static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx,
1153 unsigned int max_pages) 1114 unsigned int max_pages)
@@ -1181,15 +1142,15 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx,
1181 unlock_page(page); 1142 unlock_page(page);
1182 break; 1143 break;
1183 } 1144 }
1184 head = page_buffers(page); 1145 if (page_has_buffers(page)) {
1185 bh = head; 1146 bh = head = page_buffers(page);
1186 do { 1147 do {
1187 if (!buffer_delay(bh) && 1148 if (!buffer_delay(bh) &&
1188 !buffer_unwritten(bh)) { 1149 !buffer_unwritten(bh))
1189 done = 1; 1150 done = 1;
1190 break; 1151 bh = bh->b_this_page;
1191 } 1152 } while (!done && (bh != head));
1192 } while ((bh = bh->b_this_page) != head); 1153 }
1193 unlock_page(page); 1154 unlock_page(page);
1194 if (done) 1155 if (done)
1195 break; 1156 break;
@@ -1533,6 +1494,16 @@ static int do_journal_get_write_access(handle_t *handle,
1533 return ext4_journal_get_write_access(handle, bh); 1494 return ext4_journal_get_write_access(handle, bh);
1534} 1495}
1535 1496
1497/*
1498 * Truncate blocks that were not used by write. We have to truncate the
1499 * pagecache as well so that corresponding buffers get properly unmapped.
1500 */
1501static void ext4_truncate_failed_write(struct inode *inode)
1502{
1503 truncate_inode_pages(inode->i_mapping, inode->i_size);
1504 ext4_truncate(inode);
1505}
1506
1536static int ext4_write_begin(struct file *file, struct address_space *mapping, 1507static int ext4_write_begin(struct file *file, struct address_space *mapping,
1537 loff_t pos, unsigned len, unsigned flags, 1508 loff_t pos, unsigned len, unsigned flags,
1538 struct page **pagep, void **fsdata) 1509 struct page **pagep, void **fsdata)
@@ -1598,7 +1569,7 @@ retry:
1598 1569
1599 ext4_journal_stop(handle); 1570 ext4_journal_stop(handle);
1600 if (pos + len > inode->i_size) { 1571 if (pos + len > inode->i_size) {
1601 ext4_truncate(inode); 1572 ext4_truncate_failed_write(inode);
1602 /* 1573 /*
1603 * If truncate failed early the inode might 1574 * If truncate failed early the inode might
1604 * still be on the orphan list; we need to 1575 * still be on the orphan list; we need to
@@ -1708,7 +1679,7 @@ static int ext4_ordered_write_end(struct file *file,
1708 ret = ret2; 1679 ret = ret2;
1709 1680
1710 if (pos + len > inode->i_size) { 1681 if (pos + len > inode->i_size) {
1711 ext4_truncate(inode); 1682 ext4_truncate_failed_write(inode);
1712 /* 1683 /*
1713 * If truncate failed early the inode might still be 1684 * If truncate failed early the inode might still be
1714 * on the orphan list; we need to make sure the inode 1685 * on the orphan list; we need to make sure the inode
@@ -1750,7 +1721,7 @@ static int ext4_writeback_write_end(struct file *file,
1750 ret = ret2; 1721 ret = ret2;
1751 1722
1752 if (pos + len > inode->i_size) { 1723 if (pos + len > inode->i_size) {
1753 ext4_truncate(inode); 1724 ext4_truncate_failed_write(inode);
1754 /* 1725 /*
1755 * If truncate failed early the inode might still be 1726 * If truncate failed early the inode might still be
1756 * on the orphan list; we need to make sure the inode 1727 * on the orphan list; we need to make sure the inode
@@ -1813,7 +1784,7 @@ static int ext4_journalled_write_end(struct file *file,
1813 if (!ret) 1784 if (!ret)
1814 ret = ret2; 1785 ret = ret2;
1815 if (pos + len > inode->i_size) { 1786 if (pos + len > inode->i_size) {
1816 ext4_truncate(inode); 1787 ext4_truncate_failed_write(inode);
1817 /* 1788 /*
1818 * If truncate failed early the inode might still be 1789 * If truncate failed early the inode might still be
1819 * on the orphan list; we need to make sure the inode 1790 * on the orphan list; we need to make sure the inode
@@ -2599,7 +2570,6 @@ static int bput_one(handle_t *handle, struct buffer_head *bh)
2599} 2570}
2600 2571
2601static int __ext4_journalled_writepage(struct page *page, 2572static int __ext4_journalled_writepage(struct page *page,
2602 struct writeback_control *wbc,
2603 unsigned int len) 2573 unsigned int len)
2604{ 2574{
2605 struct address_space *mapping = page->mapping; 2575 struct address_space *mapping = page->mapping;
@@ -2757,7 +2727,7 @@ static int ext4_writepage(struct page *page,
2757 * doesn't seem much point in redirtying the page here. 2727 * doesn't seem much point in redirtying the page here.
2758 */ 2728 */
2759 ClearPageChecked(page); 2729 ClearPageChecked(page);
2760 return __ext4_journalled_writepage(page, wbc, len); 2730 return __ext4_journalled_writepage(page, len);
2761 } 2731 }
2762 2732
2763 if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode)) 2733 if (test_opt(inode->i_sb, NOBH) && ext4_should_writeback_data(inode))
@@ -2787,7 +2757,7 @@ static int ext4_da_writepages_trans_blocks(struct inode *inode)
2787 * number of contiguous block. So we will limit 2757 * number of contiguous block. So we will limit
2788 * number of contiguous block to a sane value 2758 * number of contiguous block to a sane value
2789 */ 2759 */
2790 if (!(inode->i_flags & EXT4_EXTENTS_FL) && 2760 if (!(EXT4_I(inode)->i_flags & EXT4_EXTENTS_FL) &&
2791 (max_blocks > EXT4_MAX_TRANS_DATA)) 2761 (max_blocks > EXT4_MAX_TRANS_DATA))
2792 max_blocks = EXT4_MAX_TRANS_DATA; 2762 max_blocks = EXT4_MAX_TRANS_DATA;
2793 2763
@@ -2932,7 +2902,7 @@ retry:
2932 ret = write_cache_pages(mapping, wbc, __mpage_da_writepage, 2902 ret = write_cache_pages(mapping, wbc, __mpage_da_writepage,
2933 &mpd); 2903 &mpd);
2934 /* 2904 /*
2935 * If we have a contigous extent of pages and we 2905 * If we have a contiguous extent of pages and we
2936 * haven't done the I/O yet, map the blocks and submit 2906 * haven't done the I/O yet, map the blocks and submit
2937 * them for I/O. 2907 * them for I/O.
2938 */ 2908 */
@@ -3090,7 +3060,7 @@ retry:
3090 * i_size_read because we hold i_mutex. 3060 * i_size_read because we hold i_mutex.
3091 */ 3061 */
3092 if (pos + len > inode->i_size) 3062 if (pos + len > inode->i_size)
3093 ext4_truncate(inode); 3063 ext4_truncate_failed_write(inode);
3094 } 3064 }
3095 3065
3096 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) 3066 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
@@ -3378,6 +3348,7 @@ static ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
3378 ssize_t ret; 3348 ssize_t ret;
3379 int orphan = 0; 3349 int orphan = 0;
3380 size_t count = iov_length(iov, nr_segs); 3350 size_t count = iov_length(iov, nr_segs);
3351 int retries = 0;
3381 3352
3382 if (rw == WRITE) { 3353 if (rw == WRITE) {
3383 loff_t final_size = offset + count; 3354 loff_t final_size = offset + count;
@@ -3400,9 +3371,12 @@ static ssize_t ext4_ind_direct_IO(int rw, struct kiocb *iocb,
3400 } 3371 }
3401 } 3372 }
3402 3373
3374retry:
3403 ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov, 3375 ret = blockdev_direct_IO(rw, iocb, inode, inode->i_sb->s_bdev, iov,
3404 offset, nr_segs, 3376 offset, nr_segs,
3405 ext4_get_block, NULL); 3377 ext4_get_block, NULL);
3378 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
3379 goto retry;
3406 3380
3407 if (orphan) { 3381 if (orphan) {
3408 int err; 3382 int err;
@@ -3441,8 +3415,6 @@ out:
3441 return ret; 3415 return ret;
3442} 3416}
3443 3417
3444/* Maximum number of blocks we map for direct IO at once. */
3445
3446static int ext4_get_block_dio_write(struct inode *inode, sector_t iblock, 3418static int ext4_get_block_dio_write(struct inode *inode, sector_t iblock,
3447 struct buffer_head *bh_result, int create) 3419 struct buffer_head *bh_result, int create)
3448{ 3420{
@@ -3650,13 +3622,14 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
3650 ext4_io_end_t *io_end = iocb->private; 3622 ext4_io_end_t *io_end = iocb->private;
3651 struct workqueue_struct *wq; 3623 struct workqueue_struct *wq;
3652 3624
3625 /* if not async direct IO or dio with 0 bytes write, just return */
3626 if (!io_end || !size)
3627 return;
3628
3653 ext_debug("ext4_end_io_dio(): io_end 0x%p" 3629 ext_debug("ext4_end_io_dio(): io_end 0x%p"
3654 "for inode %lu, iocb 0x%p, offset %llu, size %llu\n", 3630 "for inode %lu, iocb 0x%p, offset %llu, size %llu\n",
3655 iocb->private, io_end->inode->i_ino, iocb, offset, 3631 iocb->private, io_end->inode->i_ino, iocb, offset,
3656 size); 3632 size);
3657 /* if not async direct IO or dio with 0 bytes write, just return */
3658 if (!io_end || !size)
3659 return;
3660 3633
3661 /* if not aio dio with unwritten extents, just free io and return */ 3634 /* if not aio dio with unwritten extents, just free io and return */
3662 if (io_end->flag != DIO_AIO_UNWRITTEN){ 3635 if (io_end->flag != DIO_AIO_UNWRITTEN){
@@ -3767,13 +3740,19 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
3767 if (ret != -EIOCBQUEUED && ret <= 0 && iocb->private) { 3740 if (ret != -EIOCBQUEUED && ret <= 0 && iocb->private) {
3768 ext4_free_io_end(iocb->private); 3741 ext4_free_io_end(iocb->private);
3769 iocb->private = NULL; 3742 iocb->private = NULL;
3770 } else if (ret > 0) 3743 } else if (ret > 0 && (EXT4_I(inode)->i_state &
3744 EXT4_STATE_DIO_UNWRITTEN)) {
3745 int err;
3771 /* 3746 /*
3772 * for non AIO case, since the IO is already 3747 * for non AIO case, since the IO is already
3773 * completed, we could do the convertion right here 3748 * completed, we could do the convertion right here
3774 */ 3749 */
3775 ret = ext4_convert_unwritten_extents(inode, 3750 err = ext4_convert_unwritten_extents(inode,
3776 offset, ret); 3751 offset, ret);
3752 if (err < 0)
3753 ret = err;
3754 EXT4_I(inode)->i_state &= ~EXT4_STATE_DIO_UNWRITTEN;
3755 }
3777 return ret; 3756 return ret;
3778 } 3757 }
3779 3758
@@ -4054,7 +4033,7 @@ static Indirect *ext4_find_shared(struct inode *inode, int depth,
4054 int k, err; 4033 int k, err;
4055 4034
4056 *top = 0; 4035 *top = 0;
4057 /* Make k index the deepest non-null offest + 1 */ 4036 /* Make k index the deepest non-null offset + 1 */
4058 for (k = depth; k > 1 && !offsets[k-1]; k--) 4037 for (k = depth; k > 1 && !offsets[k-1]; k--)
4059 ; 4038 ;
4060 partial = ext4_get_branch(inode, k, offsets, chain, &err); 4039 partial = ext4_get_branch(inode, k, offsets, chain, &err);
@@ -4110,6 +4089,11 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode,
4110 __le32 *last) 4089 __le32 *last)
4111{ 4090{
4112 __le32 *p; 4091 __le32 *p;
4092 int flags = EXT4_FREE_BLOCKS_FORGET;
4093
4094 if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
4095 flags |= EXT4_FREE_BLOCKS_METADATA;
4096
4113 if (try_to_extend_transaction(handle, inode)) { 4097 if (try_to_extend_transaction(handle, inode)) {
4114 if (bh) { 4098 if (bh) {
4115 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); 4099 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
@@ -4124,27 +4108,10 @@ static void ext4_clear_blocks(handle_t *handle, struct inode *inode,
4124 } 4108 }
4125 } 4109 }
4126 4110
4127 /* 4111 for (p = first; p < last; p++)
4128 * Any buffers which are on the journal will be in memory. We 4112 *p = 0;
4129 * find them on the hash table so jbd2_journal_revoke() will
4130 * run jbd2_journal_forget() on them. We've already detached
4131 * each block from the file, so bforget() in
4132 * jbd2_journal_forget() should be safe.
4133 *
4134 * AKPM: turn on bforget in jbd2_journal_forget()!!!
4135 */
4136 for (p = first; p < last; p++) {
4137 u32 nr = le32_to_cpu(*p);
4138 if (nr) {
4139 struct buffer_head *tbh;
4140
4141 *p = 0;
4142 tbh = sb_find_get_block(inode->i_sb, nr);
4143 ext4_forget(handle, 0, inode, tbh, nr);
4144 }
4145 }
4146 4113
4147 ext4_free_blocks(handle, inode, block_to_free, count, 0); 4114 ext4_free_blocks(handle, inode, 0, block_to_free, count, flags);
4148} 4115}
4149 4116
4150/** 4117/**
@@ -4332,7 +4299,8 @@ static void ext4_free_branches(handle_t *handle, struct inode *inode,
4332 blocks_for_truncate(inode)); 4299 blocks_for_truncate(inode));
4333 } 4300 }
4334 4301
4335 ext4_free_blocks(handle, inode, nr, 1, 1); 4302 ext4_free_blocks(handle, inode, 0, nr, 1,
4303 EXT4_FREE_BLOCKS_METADATA);
4336 4304
4337 if (parent_bh) { 4305 if (parent_bh) {
4338 /* 4306 /*
@@ -4771,8 +4739,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4771 struct ext4_iloc iloc; 4739 struct ext4_iloc iloc;
4772 struct ext4_inode *raw_inode; 4740 struct ext4_inode *raw_inode;
4773 struct ext4_inode_info *ei; 4741 struct ext4_inode_info *ei;
4774 struct buffer_head *bh;
4775 struct inode *inode; 4742 struct inode *inode;
4743 journal_t *journal = EXT4_SB(sb)->s_journal;
4776 long ret; 4744 long ret;
4777 int block; 4745 int block;
4778 4746
@@ -4783,11 +4751,11 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4783 return inode; 4751 return inode;
4784 4752
4785 ei = EXT4_I(inode); 4753 ei = EXT4_I(inode);
4754 iloc.bh = 0;
4786 4755
4787 ret = __ext4_get_inode_loc(inode, &iloc, 0); 4756 ret = __ext4_get_inode_loc(inode, &iloc, 0);
4788 if (ret < 0) 4757 if (ret < 0)
4789 goto bad_inode; 4758 goto bad_inode;
4790 bh = iloc.bh;
4791 raw_inode = ext4_raw_inode(&iloc); 4759 raw_inode = ext4_raw_inode(&iloc);
4792 inode->i_mode = le16_to_cpu(raw_inode->i_mode); 4760 inode->i_mode = le16_to_cpu(raw_inode->i_mode);
4793 inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low); 4761 inode->i_uid = (uid_t)le16_to_cpu(raw_inode->i_uid_low);
@@ -4810,7 +4778,6 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4810 if (inode->i_mode == 0 || 4778 if (inode->i_mode == 0 ||
4811 !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) { 4779 !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS)) {
4812 /* this inode is deleted */ 4780 /* this inode is deleted */
4813 brelse(bh);
4814 ret = -ESTALE; 4781 ret = -ESTALE;
4815 goto bad_inode; 4782 goto bad_inode;
4816 } 4783 }
@@ -4838,11 +4805,35 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4838 ei->i_data[block] = raw_inode->i_block[block]; 4805 ei->i_data[block] = raw_inode->i_block[block];
4839 INIT_LIST_HEAD(&ei->i_orphan); 4806 INIT_LIST_HEAD(&ei->i_orphan);
4840 4807
4808 /*
4809 * Set transaction id's of transactions that have to be committed
4810 * to finish f[data]sync. We set them to currently running transaction
4811 * as we cannot be sure that the inode or some of its metadata isn't
4812 * part of the transaction - the inode could have been reclaimed and
4813 * now it is reread from disk.
4814 */
4815 if (journal) {
4816 transaction_t *transaction;
4817 tid_t tid;
4818
4819 spin_lock(&journal->j_state_lock);
4820 if (journal->j_running_transaction)
4821 transaction = journal->j_running_transaction;
4822 else
4823 transaction = journal->j_committing_transaction;
4824 if (transaction)
4825 tid = transaction->t_tid;
4826 else
4827 tid = journal->j_commit_sequence;
4828 spin_unlock(&journal->j_state_lock);
4829 ei->i_sync_tid = tid;
4830 ei->i_datasync_tid = tid;
4831 }
4832
4841 if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) { 4833 if (EXT4_INODE_SIZE(inode->i_sb) > EXT4_GOOD_OLD_INODE_SIZE) {
4842 ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize); 4834 ei->i_extra_isize = le16_to_cpu(raw_inode->i_extra_isize);
4843 if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize > 4835 if (EXT4_GOOD_OLD_INODE_SIZE + ei->i_extra_isize >
4844 EXT4_INODE_SIZE(inode->i_sb)) { 4836 EXT4_INODE_SIZE(inode->i_sb)) {
4845 brelse(bh);
4846 ret = -EIO; 4837 ret = -EIO;
4847 goto bad_inode; 4838 goto bad_inode;
4848 } 4839 }
@@ -4874,10 +4865,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4874 4865
4875 ret = 0; 4866 ret = 0;
4876 if (ei->i_file_acl && 4867 if (ei->i_file_acl &&
4877 ((ei->i_file_acl < 4868 !ext4_data_block_valid(EXT4_SB(sb), ei->i_file_acl, 1)) {
4878 (le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) +
4879 EXT4_SB(sb)->s_gdb_count)) ||
4880 (ei->i_file_acl >= ext4_blocks_count(EXT4_SB(sb)->s_es)))) {
4881 ext4_error(sb, __func__, 4869 ext4_error(sb, __func__,
4882 "bad extended attribute block %llu in inode #%lu", 4870 "bad extended attribute block %llu in inode #%lu",
4883 ei->i_file_acl, inode->i_ino); 4871 ei->i_file_acl, inode->i_ino);
@@ -4895,10 +4883,8 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4895 /* Validate block references which are part of inode */ 4883 /* Validate block references which are part of inode */
4896 ret = ext4_check_inode_blockref(inode); 4884 ret = ext4_check_inode_blockref(inode);
4897 } 4885 }
4898 if (ret) { 4886 if (ret)
4899 brelse(bh);
4900 goto bad_inode; 4887 goto bad_inode;
4901 }
4902 4888
4903 if (S_ISREG(inode->i_mode)) { 4889 if (S_ISREG(inode->i_mode)) {
4904 inode->i_op = &ext4_file_inode_operations; 4890 inode->i_op = &ext4_file_inode_operations;
@@ -4926,7 +4912,6 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4926 init_special_inode(inode, inode->i_mode, 4912 init_special_inode(inode, inode->i_mode,
4927 new_decode_dev(le32_to_cpu(raw_inode->i_block[1]))); 4913 new_decode_dev(le32_to_cpu(raw_inode->i_block[1])));
4928 } else { 4914 } else {
4929 brelse(bh);
4930 ret = -EIO; 4915 ret = -EIO;
4931 ext4_error(inode->i_sb, __func__, 4916 ext4_error(inode->i_sb, __func__,
4932 "bogus i_mode (%o) for inode=%lu", 4917 "bogus i_mode (%o) for inode=%lu",
@@ -4939,6 +4924,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
4939 return inode; 4924 return inode;
4940 4925
4941bad_inode: 4926bad_inode:
4927 brelse(iloc.bh);
4942 iget_failed(inode); 4928 iget_failed(inode);
4943 return ERR_PTR(ret); 4929 return ERR_PTR(ret);
4944} 4930}
@@ -5098,6 +5084,7 @@ static int ext4_do_update_inode(handle_t *handle,
5098 err = rc; 5084 err = rc;
5099 ei->i_state &= ~EXT4_STATE_NEW; 5085 ei->i_state &= ~EXT4_STATE_NEW;
5100 5086
5087 ext4_update_inode_fsync_trans(handle, inode, 0);
5101out_brelse: 5088out_brelse:
5102 brelse(bh); 5089 brelse(bh);
5103 ext4_std_error(inode->i_sb, err); 5090 ext4_std_error(inode->i_sb, err);
@@ -5217,8 +5204,8 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
5217 5204
5218 /* (user+group)*(old+new) structure, inode write (sb, 5205 /* (user+group)*(old+new) structure, inode write (sb,
5219 * inode block, ? - but truncate inode update has it) */ 5206 * inode block, ? - but truncate inode update has it) */
5220 handle = ext4_journal_start(inode, 2*(EXT4_QUOTA_INIT_BLOCKS(inode->i_sb)+ 5207 handle = ext4_journal_start(inode, (EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)+
5221 EXT4_QUOTA_DEL_BLOCKS(inode->i_sb))+3); 5208 EXT4_MAXQUOTAS_DEL_BLOCKS(inode->i_sb))+3);
5222 if (IS_ERR(handle)) { 5209 if (IS_ERR(handle)) {
5223 error = PTR_ERR(handle); 5210 error = PTR_ERR(handle);
5224 goto err_out; 5211 goto err_out;
@@ -5366,7 +5353,7 @@ static int ext4_index_trans_blocks(struct inode *inode, int nrblocks, int chunk)
5366 * worse case, the indexs blocks spread over different block groups 5353 * worse case, the indexs blocks spread over different block groups
5367 * 5354 *
5368 * If datablocks are discontiguous, they are possible to spread over 5355 * If datablocks are discontiguous, they are possible to spread over
5369 * different block groups too. If they are contiugous, with flexbg, 5356 * different block groups too. If they are contiuguous, with flexbg,
5370 * they could still across block group boundary. 5357 * they could still across block group boundary.
5371 * 5358 *
5372 * Also account for superblock, inode, quota and xattr blocks 5359 * Also account for superblock, inode, quota and xattr blocks
@@ -5442,7 +5429,7 @@ int ext4_writepage_trans_blocks(struct inode *inode)
5442 * Calculate the journal credits for a chunk of data modification. 5429 * Calculate the journal credits for a chunk of data modification.
5443 * 5430 *
5444 * This is called from DIO, fallocate or whoever calling 5431 * This is called from DIO, fallocate or whoever calling
5445 * ext4_get_blocks() to map/allocate a chunk of contigous disk blocks. 5432 * ext4_get_blocks() to map/allocate a chunk of contiguous disk blocks.
5446 * 5433 *
5447 * journal buffers for data blocks are not included here, as DIO 5434 * journal buffers for data blocks are not included here, as DIO
5448 * and fallocate do no need to journal data buffers. 5435 * and fallocate do no need to journal data buffers.
@@ -5612,14 +5599,12 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode)
5612 */ 5599 */
5613void ext4_dirty_inode(struct inode *inode) 5600void ext4_dirty_inode(struct inode *inode)
5614{ 5601{
5615 handle_t *current_handle = ext4_journal_current_handle();
5616 handle_t *handle; 5602 handle_t *handle;
5617 5603
5618 handle = ext4_journal_start(inode, 2); 5604 handle = ext4_journal_start(inode, 2);
5619 if (IS_ERR(handle)) 5605 if (IS_ERR(handle))
5620 goto out; 5606 goto out;
5621 5607
5622 jbd_debug(5, "marking dirty. outer handle=%p\n", current_handle);
5623 ext4_mark_inode_dirty(handle, inode); 5608 ext4_mark_inode_dirty(handle, inode);
5624 5609
5625 ext4_journal_stop(handle); 5610 ext4_journal_stop(handle);
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index c1cdf613e725..b63d193126db 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -221,31 +221,38 @@ setversion_out:
221 struct file *donor_filp; 221 struct file *donor_filp;
222 int err; 222 int err;
223 223
224 if (!(filp->f_mode & FMODE_READ) ||
225 !(filp->f_mode & FMODE_WRITE))
226 return -EBADF;
227
224 if (copy_from_user(&me, 228 if (copy_from_user(&me,
225 (struct move_extent __user *)arg, sizeof(me))) 229 (struct move_extent __user *)arg, sizeof(me)))
226 return -EFAULT; 230 return -EFAULT;
231 me.moved_len = 0;
227 232
228 donor_filp = fget(me.donor_fd); 233 donor_filp = fget(me.donor_fd);
229 if (!donor_filp) 234 if (!donor_filp)
230 return -EBADF; 235 return -EBADF;
231 236
232 if (!capable(CAP_DAC_OVERRIDE)) { 237 if (!(donor_filp->f_mode & FMODE_WRITE)) {
233 if ((current->real_cred->fsuid != inode->i_uid) || 238 err = -EBADF;
234 !(inode->i_mode & S_IRUSR) || 239 goto mext_out;
235 !(donor_filp->f_dentry->d_inode->i_mode &
236 S_IRUSR)) {
237 fput(donor_filp);
238 return -EACCES;
239 }
240 } 240 }
241 241
242 err = mnt_want_write(filp->f_path.mnt);
243 if (err)
244 goto mext_out;
245
242 err = ext4_move_extents(filp, donor_filp, me.orig_start, 246 err = ext4_move_extents(filp, donor_filp, me.orig_start,
243 me.donor_start, me.len, &me.moved_len); 247 me.donor_start, me.len, &me.moved_len);
244 fput(donor_filp); 248 mnt_drop_write(filp->f_path.mnt);
249 if (me.moved_len > 0)
250 file_remove_suid(donor_filp);
245 251
246 if (copy_to_user((struct move_extent *)arg, &me, sizeof(me))) 252 if (copy_to_user((struct move_extent *)arg, &me, sizeof(me)))
247 return -EFAULT; 253 err = -EFAULT;
248 254mext_out:
255 fput(donor_filp);
249 return err; 256 return err;
250 } 257 }
251 258
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index d527fd384582..b1fd3daadc9c 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -142,7 +142,7 @@
142 * 2 blocks and the order of allocation is >= sbi->s_mb_order2_reqs. The 142 * 2 blocks and the order of allocation is >= sbi->s_mb_order2_reqs. The
143 * value of s_mb_order2_reqs can be tuned via 143 * value of s_mb_order2_reqs can be tuned via
144 * /sys/fs/ext4/<partition>/mb_order2_req. If the request len is equal to 144 * /sys/fs/ext4/<partition>/mb_order2_req. If the request len is equal to
145 * stripe size (sbi->s_stripe), we try to search for contigous block in 145 * stripe size (sbi->s_stripe), we try to search for contiguous block in
146 * stripe size. This should result in better allocation on RAID setups. If 146 * stripe size. This should result in better allocation on RAID setups. If
147 * not, we search in the specific group using bitmap for best extents. The 147 * not, we search in the specific group using bitmap for best extents. The
148 * tunable min_to_scan and max_to_scan control the behaviour here. 148 * tunable min_to_scan and max_to_scan control the behaviour here.
@@ -2529,7 +2529,6 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
2529 struct ext4_group_info *db; 2529 struct ext4_group_info *db;
2530 int err, count = 0, count2 = 0; 2530 int err, count = 0, count2 = 0;
2531 struct ext4_free_data *entry; 2531 struct ext4_free_data *entry;
2532 ext4_fsblk_t discard_block;
2533 struct list_head *l, *ltmp; 2532 struct list_head *l, *ltmp;
2534 2533
2535 list_for_each_safe(l, ltmp, &txn->t_private_list) { 2534 list_for_each_safe(l, ltmp, &txn->t_private_list) {
@@ -2559,13 +2558,19 @@ static void release_blocks_on_commit(journal_t *journal, transaction_t *txn)
2559 page_cache_release(e4b.bd_bitmap_page); 2558 page_cache_release(e4b.bd_bitmap_page);
2560 } 2559 }
2561 ext4_unlock_group(sb, entry->group); 2560 ext4_unlock_group(sb, entry->group);
2562 discard_block = (ext4_fsblk_t) entry->group * EXT4_BLOCKS_PER_GROUP(sb) 2561 if (test_opt(sb, DISCARD)) {
2563 + entry->start_blk 2562 ext4_fsblk_t discard_block;
2564 + le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block); 2563 struct ext4_super_block *es = EXT4_SB(sb)->s_es;
2565 trace_ext4_discard_blocks(sb, (unsigned long long)discard_block, 2564
2566 entry->count); 2565 discard_block = (ext4_fsblk_t)entry->group *
2567 sb_issue_discard(sb, discard_block, entry->count); 2566 EXT4_BLOCKS_PER_GROUP(sb)
2568 2567 + entry->start_blk
2568 + le32_to_cpu(es->s_first_data_block);
2569 trace_ext4_discard_blocks(sb,
2570 (unsigned long long)discard_block,
2571 entry->count);
2572 sb_issue_discard(sb, discard_block, entry->count);
2573 }
2569 kmem_cache_free(ext4_free_ext_cachep, entry); 2574 kmem_cache_free(ext4_free_ext_cachep, entry);
2570 ext4_mb_release_desc(&e4b); 2575 ext4_mb_release_desc(&e4b);
2571 } 2576 }
@@ -3006,6 +3011,24 @@ static void ext4_mb_collect_stats(struct ext4_allocation_context *ac)
3006} 3011}
3007 3012
3008/* 3013/*
3014 * Called on failure; free up any blocks from the inode PA for this
3015 * context. We don't need this for MB_GROUP_PA because we only change
3016 * pa_free in ext4_mb_release_context(), but on failure, we've already
3017 * zeroed out ac->ac_b_ex.fe_len, so group_pa->pa_free is not changed.
3018 */
3019static void ext4_discard_allocated_blocks(struct ext4_allocation_context *ac)
3020{
3021 struct ext4_prealloc_space *pa = ac->ac_pa;
3022 int len;
3023
3024 if (pa && pa->pa_type == MB_INODE_PA) {
3025 len = ac->ac_b_ex.fe_len;
3026 pa->pa_free += len;
3027 }
3028
3029}
3030
3031/*
3009 * use blocks preallocated to inode 3032 * use blocks preallocated to inode
3010 */ 3033 */
3011static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac, 3034static void ext4_mb_use_inode_pa(struct ext4_allocation_context *ac,
@@ -4290,6 +4313,7 @@ repeat:
4290 ac->ac_status = AC_STATUS_CONTINUE; 4313 ac->ac_status = AC_STATUS_CONTINUE;
4291 goto repeat; 4314 goto repeat;
4292 } else if (*errp) { 4315 } else if (*errp) {
4316 ext4_discard_allocated_blocks(ac);
4293 ac->ac_b_ex.fe_len = 0; 4317 ac->ac_b_ex.fe_len = 0;
4294 ar->len = 0; 4318 ar->len = 0;
4295 ext4_mb_show_ac(ac); 4319 ext4_mb_show_ac(ac);
@@ -4422,18 +4446,24 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
4422 return 0; 4446 return 0;
4423} 4447}
4424 4448
4425/* 4449/**
4426 * Main entry point into mballoc to free blocks 4450 * ext4_free_blocks() -- Free given blocks and update quota
4451 * @handle: handle for this transaction
4452 * @inode: inode
4453 * @block: start physical block to free
4454 * @count: number of blocks to count
4455 * @metadata: Are these metadata blocks
4427 */ 4456 */
4428void ext4_mb_free_blocks(handle_t *handle, struct inode *inode, 4457void ext4_free_blocks(handle_t *handle, struct inode *inode,
4429 ext4_fsblk_t block, unsigned long count, 4458 struct buffer_head *bh, ext4_fsblk_t block,
4430 int metadata, unsigned long *freed) 4459 unsigned long count, int flags)
4431{ 4460{
4432 struct buffer_head *bitmap_bh = NULL; 4461 struct buffer_head *bitmap_bh = NULL;
4433 struct super_block *sb = inode->i_sb; 4462 struct super_block *sb = inode->i_sb;
4434 struct ext4_allocation_context *ac = NULL; 4463 struct ext4_allocation_context *ac = NULL;
4435 struct ext4_group_desc *gdp; 4464 struct ext4_group_desc *gdp;
4436 struct ext4_super_block *es; 4465 struct ext4_super_block *es;
4466 unsigned long freed = 0;
4437 unsigned int overflow; 4467 unsigned int overflow;
4438 ext4_grpblk_t bit; 4468 ext4_grpblk_t bit;
4439 struct buffer_head *gd_bh; 4469 struct buffer_head *gd_bh;
@@ -4443,13 +4473,16 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
4443 int err = 0; 4473 int err = 0;
4444 int ret; 4474 int ret;
4445 4475
4446 *freed = 0; 4476 if (bh) {
4477 if (block)
4478 BUG_ON(block != bh->b_blocknr);
4479 else
4480 block = bh->b_blocknr;
4481 }
4447 4482
4448 sbi = EXT4_SB(sb); 4483 sbi = EXT4_SB(sb);
4449 es = EXT4_SB(sb)->s_es; 4484 es = EXT4_SB(sb)->s_es;
4450 if (block < le32_to_cpu(es->s_first_data_block) || 4485 if (!ext4_data_block_valid(sbi, block, count)) {
4451 block + count < block ||
4452 block + count > ext4_blocks_count(es)) {
4453 ext4_error(sb, __func__, 4486 ext4_error(sb, __func__,
4454 "Freeing blocks not in datazone - " 4487 "Freeing blocks not in datazone - "
4455 "block = %llu, count = %lu", block, count); 4488 "block = %llu, count = %lu", block, count);
@@ -4457,7 +4490,32 @@ void ext4_mb_free_blocks(handle_t *handle, struct inode *inode,
4457 } 4490 }
4458 4491
4459 ext4_debug("freeing block %llu\n", block); 4492 ext4_debug("freeing block %llu\n", block);
4460 trace_ext4_free_blocks(inode, block, count, metadata); 4493 trace_ext4_free_blocks(inode, block, count, flags);
4494
4495 if (flags & EXT4_FREE_BLOCKS_FORGET) {
4496 struct buffer_head *tbh = bh;
4497 int i;
4498
4499 BUG_ON(bh && (count > 1));
4500
4501 for (i = 0; i < count; i++) {
4502 if (!bh)
4503 tbh = sb_find_get_block(inode->i_sb,
4504 block + i);
4505 ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA,
4506 inode, tbh, block + i);
4507 }
4508 }
4509
4510 /*
4511 * We need to make sure we don't reuse the freed block until
4512 * after the transaction is committed, which we can do by
4513 * treating the block as metadata, below. We make an
4514 * exception if the inode is to be written in writeback mode
4515 * since writeback mode has weak data consistency guarantees.
4516 */
4517 if (!ext4_should_writeback_data(inode))
4518 flags |= EXT4_FREE_BLOCKS_METADATA;
4461 4519
4462 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS); 4520 ac = kmem_cache_alloc(ext4_ac_cachep, GFP_NOFS);
4463 if (ac) { 4521 if (ac) {
@@ -4533,7 +4591,8 @@ do_more:
4533 err = ext4_mb_load_buddy(sb, block_group, &e4b); 4591 err = ext4_mb_load_buddy(sb, block_group, &e4b);
4534 if (err) 4592 if (err)
4535 goto error_return; 4593 goto error_return;
4536 if (metadata && ext4_handle_valid(handle)) { 4594
4595 if ((flags & EXT4_FREE_BLOCKS_METADATA) && ext4_handle_valid(handle)) {
4537 struct ext4_free_data *new_entry; 4596 struct ext4_free_data *new_entry;
4538 /* 4597 /*
4539 * blocks being freed are metadata. these blocks shouldn't 4598 * blocks being freed are metadata. these blocks shouldn't
@@ -4572,7 +4631,7 @@ do_more:
4572 4631
4573 ext4_mb_release_desc(&e4b); 4632 ext4_mb_release_desc(&e4b);
4574 4633
4575 *freed += count; 4634 freed += count;
4576 4635
4577 /* We dirtied the bitmap block */ 4636 /* We dirtied the bitmap block */
4578 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block"); 4637 BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
@@ -4592,6 +4651,8 @@ do_more:
4592 } 4651 }
4593 sb->s_dirt = 1; 4652 sb->s_dirt = 1;
4594error_return: 4653error_return:
4654 if (freed)
4655 vfs_dq_free_block(inode, freed);
4595 brelse(bitmap_bh); 4656 brelse(bitmap_bh);
4596 ext4_std_error(sb, err); 4657 ext4_std_error(sb, err);
4597 if (ac) 4658 if (ac)
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index a93d5b80f3e2..81415814b00b 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -238,7 +238,7 @@ static int extend_credit_for_blkdel(handle_t *handle, struct inode *inode)
238 * So allocate a credit of 3. We may update 238 * So allocate a credit of 3. We may update
239 * quota (user and group). 239 * quota (user and group).
240 */ 240 */
241 needed = 3 + 2*EXT4_QUOTA_TRANS_BLOCKS(inode->i_sb); 241 needed = 3 + EXT4_MAXQUOTAS_TRANS_BLOCKS(inode->i_sb);
242 242
243 if (ext4_journal_extend(handle, needed) != 0) 243 if (ext4_journal_extend(handle, needed) != 0)
244 retval = ext4_journal_restart(handle, needed); 244 retval = ext4_journal_restart(handle, needed);
@@ -262,13 +262,17 @@ static int free_dind_blocks(handle_t *handle,
262 for (i = 0; i < max_entries; i++) { 262 for (i = 0; i < max_entries; i++) {
263 if (tmp_idata[i]) { 263 if (tmp_idata[i]) {
264 extend_credit_for_blkdel(handle, inode); 264 extend_credit_for_blkdel(handle, inode);
265 ext4_free_blocks(handle, inode, 265 ext4_free_blocks(handle, inode, 0,
266 le32_to_cpu(tmp_idata[i]), 1, 1); 266 le32_to_cpu(tmp_idata[i]), 1,
267 EXT4_FREE_BLOCKS_METADATA |
268 EXT4_FREE_BLOCKS_FORGET);
267 } 269 }
268 } 270 }
269 put_bh(bh); 271 put_bh(bh);
270 extend_credit_for_blkdel(handle, inode); 272 extend_credit_for_blkdel(handle, inode);
271 ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1, 1); 273 ext4_free_blocks(handle, inode, 0, le32_to_cpu(i_data), 1,
274 EXT4_FREE_BLOCKS_METADATA |
275 EXT4_FREE_BLOCKS_FORGET);
272 return 0; 276 return 0;
273} 277}
274 278
@@ -297,7 +301,9 @@ static int free_tind_blocks(handle_t *handle,
297 } 301 }
298 put_bh(bh); 302 put_bh(bh);
299 extend_credit_for_blkdel(handle, inode); 303 extend_credit_for_blkdel(handle, inode);
300 ext4_free_blocks(handle, inode, le32_to_cpu(i_data), 1, 1); 304 ext4_free_blocks(handle, inode, 0, le32_to_cpu(i_data), 1,
305 EXT4_FREE_BLOCKS_METADATA |
306 EXT4_FREE_BLOCKS_FORGET);
301 return 0; 307 return 0;
302} 308}
303 309
@@ -308,8 +314,10 @@ static int free_ind_block(handle_t *handle, struct inode *inode, __le32 *i_data)
308 /* ei->i_data[EXT4_IND_BLOCK] */ 314 /* ei->i_data[EXT4_IND_BLOCK] */
309 if (i_data[0]) { 315 if (i_data[0]) {
310 extend_credit_for_blkdel(handle, inode); 316 extend_credit_for_blkdel(handle, inode);
311 ext4_free_blocks(handle, inode, 317 ext4_free_blocks(handle, inode, 0,
312 le32_to_cpu(i_data[0]), 1, 1); 318 le32_to_cpu(i_data[0]), 1,
319 EXT4_FREE_BLOCKS_METADATA |
320 EXT4_FREE_BLOCKS_FORGET);
313 } 321 }
314 322
315 /* ei->i_data[EXT4_DIND_BLOCK] */ 323 /* ei->i_data[EXT4_DIND_BLOCK] */
@@ -419,7 +427,8 @@ static int free_ext_idx(handle_t *handle, struct inode *inode,
419 } 427 }
420 put_bh(bh); 428 put_bh(bh);
421 extend_credit_for_blkdel(handle, inode); 429 extend_credit_for_blkdel(handle, inode);
422 ext4_free_blocks(handle, inode, block, 1, 1); 430 ext4_free_blocks(handle, inode, 0, block, 1,
431 EXT4_FREE_BLOCKS_METADATA | EXT4_FREE_BLOCKS_FORGET);
423 return retval; 432 return retval;
424} 433}
425 434
@@ -477,7 +486,7 @@ int ext4_ext_migrate(struct inode *inode)
477 handle = ext4_journal_start(inode, 486 handle = ext4_journal_start(inode,
478 EXT4_DATA_TRANS_BLOCKS(inode->i_sb) + 487 EXT4_DATA_TRANS_BLOCKS(inode->i_sb) +
479 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + 488 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
480 2 * EXT4_QUOTA_INIT_BLOCKS(inode->i_sb) 489 EXT4_MAXQUOTAS_INIT_BLOCKS(inode->i_sb)
481 + 1); 490 + 1);
482 if (IS_ERR(handle)) { 491 if (IS_ERR(handle)) {
483 retval = PTR_ERR(handle); 492 retval = PTR_ERR(handle);
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 25b6b1457360..82c415be87a4 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -77,12 +77,14 @@ static int
77mext_next_extent(struct inode *inode, struct ext4_ext_path *path, 77mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
78 struct ext4_extent **extent) 78 struct ext4_extent **extent)
79{ 79{
80 struct ext4_extent_header *eh;
80 int ppos, leaf_ppos = path->p_depth; 81 int ppos, leaf_ppos = path->p_depth;
81 82
82 ppos = leaf_ppos; 83 ppos = leaf_ppos;
83 if (EXT_LAST_EXTENT(path[ppos].p_hdr) > path[ppos].p_ext) { 84 if (EXT_LAST_EXTENT(path[ppos].p_hdr) > path[ppos].p_ext) {
84 /* leaf block */ 85 /* leaf block */
85 *extent = ++path[ppos].p_ext; 86 *extent = ++path[ppos].p_ext;
87 path[ppos].p_block = ext_pblock(path[ppos].p_ext);
86 return 0; 88 return 0;
87 } 89 }
88 90
@@ -119,9 +121,18 @@ mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
119 ext_block_hdr(path[cur_ppos+1].p_bh); 121 ext_block_hdr(path[cur_ppos+1].p_bh);
120 } 122 }
121 123
124 path[leaf_ppos].p_ext = *extent = NULL;
125
126 eh = path[leaf_ppos].p_hdr;
127 if (le16_to_cpu(eh->eh_entries) == 0)
128 /* empty leaf is found */
129 return -ENODATA;
130
122 /* leaf block */ 131 /* leaf block */
123 path[leaf_ppos].p_ext = *extent = 132 path[leaf_ppos].p_ext = *extent =
124 EXT_FIRST_EXTENT(path[leaf_ppos].p_hdr); 133 EXT_FIRST_EXTENT(path[leaf_ppos].p_hdr);
134 path[leaf_ppos].p_block =
135 ext_pblock(path[leaf_ppos].p_ext);
125 return 0; 136 return 0;
126 } 137 }
127 } 138 }
@@ -155,40 +166,15 @@ mext_check_null_inode(struct inode *inode1, struct inode *inode2,
155} 166}
156 167
157/** 168/**
158 * mext_double_down_read - Acquire two inodes' read semaphore 169 * double_down_write_data_sem - Acquire two inodes' write lock of i_data_sem
159 *
160 * @orig_inode: original inode structure
161 * @donor_inode: donor inode structure
162 * Acquire read semaphore of the two inodes (orig and donor) by i_ino order.
163 */
164static void
165mext_double_down_read(struct inode *orig_inode, struct inode *donor_inode)
166{
167 struct inode *first = orig_inode, *second = donor_inode;
168
169 /*
170 * Use the inode number to provide the stable locking order instead
171 * of its address, because the C language doesn't guarantee you can
172 * compare pointers that don't come from the same array.
173 */
174 if (donor_inode->i_ino < orig_inode->i_ino) {
175 first = donor_inode;
176 second = orig_inode;
177 }
178
179 down_read(&EXT4_I(first)->i_data_sem);
180 down_read(&EXT4_I(second)->i_data_sem);
181}
182
183/**
184 * mext_double_down_write - Acquire two inodes' write semaphore
185 * 170 *
186 * @orig_inode: original inode structure 171 * @orig_inode: original inode structure
187 * @donor_inode: donor inode structure 172 * @donor_inode: donor inode structure
188 * Acquire write semaphore of the two inodes (orig and donor) by i_ino order. 173 * Acquire write lock of i_data_sem of the two inodes (orig and donor) by
174 * i_ino order.
189 */ 175 */
190static void 176static void
191mext_double_down_write(struct inode *orig_inode, struct inode *donor_inode) 177double_down_write_data_sem(struct inode *orig_inode, struct inode *donor_inode)
192{ 178{
193 struct inode *first = orig_inode, *second = donor_inode; 179 struct inode *first = orig_inode, *second = donor_inode;
194 180
@@ -203,32 +189,18 @@ mext_double_down_write(struct inode *orig_inode, struct inode *donor_inode)
203 } 189 }
204 190
205 down_write(&EXT4_I(first)->i_data_sem); 191 down_write(&EXT4_I(first)->i_data_sem);
206 down_write(&EXT4_I(second)->i_data_sem); 192 down_write_nested(&EXT4_I(second)->i_data_sem, SINGLE_DEPTH_NESTING);
207} 193}
208 194
209/** 195/**
210 * mext_double_up_read - Release two inodes' read semaphore 196 * double_up_write_data_sem - Release two inodes' write lock of i_data_sem
211 * 197 *
212 * @orig_inode: original inode structure to be released its lock first 198 * @orig_inode: original inode structure to be released its lock first
213 * @donor_inode: donor inode structure to be released its lock second 199 * @donor_inode: donor inode structure to be released its lock second
214 * Release read semaphore of two inodes (orig and donor). 200 * Release write lock of i_data_sem of two inodes (orig and donor).
215 */ 201 */
216static void 202static void
217mext_double_up_read(struct inode *orig_inode, struct inode *donor_inode) 203double_up_write_data_sem(struct inode *orig_inode, struct inode *donor_inode)
218{
219 up_read(&EXT4_I(orig_inode)->i_data_sem);
220 up_read(&EXT4_I(donor_inode)->i_data_sem);
221}
222
223/**
224 * mext_double_up_write - Release two inodes' write semaphore
225 *
226 * @orig_inode: original inode structure to be released its lock first
227 * @donor_inode: donor inode structure to be released its lock second
228 * Release write semaphore of two inodes (orig and donor).
229 */
230static void
231mext_double_up_write(struct inode *orig_inode, struct inode *donor_inode)
232{ 204{
233 up_write(&EXT4_I(orig_inode)->i_data_sem); 205 up_write(&EXT4_I(orig_inode)->i_data_sem);
234 up_write(&EXT4_I(donor_inode)->i_data_sem); 206 up_write(&EXT4_I(donor_inode)->i_data_sem);
@@ -596,7 +568,7 @@ out:
596 * @tmp_oext: the extent that will belong to the donor inode 568 * @tmp_oext: the extent that will belong to the donor inode
597 * @orig_off: block offset of original inode 569 * @orig_off: block offset of original inode
598 * @donor_off: block offset of donor inode 570 * @donor_off: block offset of donor inode
599 * @max_count: the maximun length of extents 571 * @max_count: the maximum length of extents
600 * 572 *
601 * Return 0 on success, or a negative error value on failure. 573 * Return 0 on success, or a negative error value on failure.
602 */ 574 */
@@ -661,6 +633,7 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext,
661 * @donor_inode: donor inode 633 * @donor_inode: donor inode
662 * @from: block offset of orig_inode 634 * @from: block offset of orig_inode
663 * @count: block count to be replaced 635 * @count: block count to be replaced
636 * @err: pointer to save return value
664 * 637 *
665 * Replace original inode extents and donor inode extents page by page. 638 * Replace original inode extents and donor inode extents page by page.
666 * We implement this replacement in the following three steps: 639 * We implement this replacement in the following three steps:
@@ -671,33 +644,33 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext,
671 * 3. Change the block information of donor inode to point at the saved 644 * 3. Change the block information of donor inode to point at the saved
672 * original inode blocks in the dummy extents. 645 * original inode blocks in the dummy extents.
673 * 646 *
674 * Return 0 on success, or a negative error value on failure. 647 * Return replaced block count.
675 */ 648 */
676static int 649static int
677mext_replace_branches(handle_t *handle, struct inode *orig_inode, 650mext_replace_branches(handle_t *handle, struct inode *orig_inode,
678 struct inode *donor_inode, ext4_lblk_t from, 651 struct inode *donor_inode, ext4_lblk_t from,
679 ext4_lblk_t count) 652 ext4_lblk_t count, int *err)
680{ 653{
681 struct ext4_ext_path *orig_path = NULL; 654 struct ext4_ext_path *orig_path = NULL;
682 struct ext4_ext_path *donor_path = NULL; 655 struct ext4_ext_path *donor_path = NULL;
683 struct ext4_extent *oext, *dext; 656 struct ext4_extent *oext, *dext;
684 struct ext4_extent tmp_dext, tmp_oext; 657 struct ext4_extent tmp_dext, tmp_oext;
685 ext4_lblk_t orig_off = from, donor_off = from; 658 ext4_lblk_t orig_off = from, donor_off = from;
686 int err = 0;
687 int depth; 659 int depth;
688 int replaced_count = 0; 660 int replaced_count = 0;
689 int dext_alen; 661 int dext_alen;
690 662
691 mext_double_down_write(orig_inode, donor_inode); 663 /* Protect extent trees against block allocations via delalloc */
664 double_down_write_data_sem(orig_inode, donor_inode);
692 665
693 /* Get the original extent for the block "orig_off" */ 666 /* Get the original extent for the block "orig_off" */
694 err = get_ext_path(orig_inode, orig_off, &orig_path); 667 *err = get_ext_path(orig_inode, orig_off, &orig_path);
695 if (err) 668 if (*err)
696 goto out; 669 goto out;
697 670
698 /* Get the donor extent for the head */ 671 /* Get the donor extent for the head */
699 err = get_ext_path(donor_inode, donor_off, &donor_path); 672 *err = get_ext_path(donor_inode, donor_off, &donor_path);
700 if (err) 673 if (*err)
701 goto out; 674 goto out;
702 depth = ext_depth(orig_inode); 675 depth = ext_depth(orig_inode);
703 oext = orig_path[depth].p_ext; 676 oext = orig_path[depth].p_ext;
@@ -707,9 +680,9 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode,
707 dext = donor_path[depth].p_ext; 680 dext = donor_path[depth].p_ext;
708 tmp_dext = *dext; 681 tmp_dext = *dext;
709 682
710 err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off, 683 *err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
711 donor_off, count); 684 donor_off, count);
712 if (err) 685 if (*err)
713 goto out; 686 goto out;
714 687
715 /* Loop for the donor extents */ 688 /* Loop for the donor extents */
@@ -718,7 +691,7 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode,
718 if (!dext) { 691 if (!dext) {
719 ext4_error(donor_inode->i_sb, __func__, 692 ext4_error(donor_inode->i_sb, __func__,
720 "The extent for donor must be found"); 693 "The extent for donor must be found");
721 err = -EIO; 694 *err = -EIO;
722 goto out; 695 goto out;
723 } else if (donor_off != le32_to_cpu(tmp_dext.ee_block)) { 696 } else if (donor_off != le32_to_cpu(tmp_dext.ee_block)) {
724 ext4_error(donor_inode->i_sb, __func__, 697 ext4_error(donor_inode->i_sb, __func__,
@@ -726,20 +699,20 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode,
726 "extent(%u) should be equal", 699 "extent(%u) should be equal",
727 donor_off, 700 donor_off,
728 le32_to_cpu(tmp_dext.ee_block)); 701 le32_to_cpu(tmp_dext.ee_block));
729 err = -EIO; 702 *err = -EIO;
730 goto out; 703 goto out;
731 } 704 }
732 705
733 /* Set donor extent to orig extent */ 706 /* Set donor extent to orig extent */
734 err = mext_leaf_block(handle, orig_inode, 707 *err = mext_leaf_block(handle, orig_inode,
735 orig_path, &tmp_dext, &orig_off); 708 orig_path, &tmp_dext, &orig_off);
736 if (err < 0) 709 if (*err)
737 goto out; 710 goto out;
738 711
739 /* Set orig extent to donor extent */ 712 /* Set orig extent to donor extent */
740 err = mext_leaf_block(handle, donor_inode, 713 *err = mext_leaf_block(handle, donor_inode,
741 donor_path, &tmp_oext, &donor_off); 714 donor_path, &tmp_oext, &donor_off);
742 if (err < 0) 715 if (*err)
743 goto out; 716 goto out;
744 717
745 dext_alen = ext4_ext_get_actual_len(&tmp_dext); 718 dext_alen = ext4_ext_get_actual_len(&tmp_dext);
@@ -753,35 +726,25 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode,
753 726
754 if (orig_path) 727 if (orig_path)
755 ext4_ext_drop_refs(orig_path); 728 ext4_ext_drop_refs(orig_path);
756 err = get_ext_path(orig_inode, orig_off, &orig_path); 729 *err = get_ext_path(orig_inode, orig_off, &orig_path);
757 if (err) 730 if (*err)
758 goto out; 731 goto out;
759 depth = ext_depth(orig_inode); 732 depth = ext_depth(orig_inode);
760 oext = orig_path[depth].p_ext; 733 oext = orig_path[depth].p_ext;
761 if (le32_to_cpu(oext->ee_block) +
762 ext4_ext_get_actual_len(oext) <= orig_off) {
763 err = 0;
764 goto out;
765 }
766 tmp_oext = *oext; 734 tmp_oext = *oext;
767 735
768 if (donor_path) 736 if (donor_path)
769 ext4_ext_drop_refs(donor_path); 737 ext4_ext_drop_refs(donor_path);
770 err = get_ext_path(donor_inode, donor_off, &donor_path); 738 *err = get_ext_path(donor_inode, donor_off, &donor_path);
771 if (err) 739 if (*err)
772 goto out; 740 goto out;
773 depth = ext_depth(donor_inode); 741 depth = ext_depth(donor_inode);
774 dext = donor_path[depth].p_ext; 742 dext = donor_path[depth].p_ext;
775 if (le32_to_cpu(dext->ee_block) +
776 ext4_ext_get_actual_len(dext) <= donor_off) {
777 err = 0;
778 goto out;
779 }
780 tmp_dext = *dext; 743 tmp_dext = *dext;
781 744
782 err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off, 745 *err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
783 donor_off, count - replaced_count); 746 donor_off, count - replaced_count);
784 if (err) 747 if (*err)
785 goto out; 748 goto out;
786 } 749 }
787 750
@@ -795,8 +758,12 @@ out:
795 kfree(donor_path); 758 kfree(donor_path);
796 } 759 }
797 760
798 mext_double_up_write(orig_inode, donor_inode); 761 ext4_ext_invalidate_cache(orig_inode);
799 return err; 762 ext4_ext_invalidate_cache(donor_inode);
763
764 double_up_write_data_sem(orig_inode, donor_inode);
765
766 return replaced_count;
800} 767}
801 768
802/** 769/**
@@ -808,16 +775,17 @@ out:
808 * @data_offset_in_page: block index where data swapping starts 775 * @data_offset_in_page: block index where data swapping starts
809 * @block_len_in_page: the number of blocks to be swapped 776 * @block_len_in_page: the number of blocks to be swapped
810 * @uninit: orig extent is uninitialized or not 777 * @uninit: orig extent is uninitialized or not
778 * @err: pointer to save return value
811 * 779 *
812 * Save the data in original inode blocks and replace original inode extents 780 * Save the data in original inode blocks and replace original inode extents
813 * with donor inode extents by calling mext_replace_branches(). 781 * with donor inode extents by calling mext_replace_branches().
814 * Finally, write out the saved data in new original inode blocks. Return 0 782 * Finally, write out the saved data in new original inode blocks. Return
815 * on success, or a negative error value on failure. 783 * replaced block count.
816 */ 784 */
817static int 785static int
818move_extent_per_page(struct file *o_filp, struct inode *donor_inode, 786move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
819 pgoff_t orig_page_offset, int data_offset_in_page, 787 pgoff_t orig_page_offset, int data_offset_in_page,
820 int block_len_in_page, int uninit) 788 int block_len_in_page, int uninit, int *err)
821{ 789{
822 struct inode *orig_inode = o_filp->f_dentry->d_inode; 790 struct inode *orig_inode = o_filp->f_dentry->d_inode;
823 struct address_space *mapping = orig_inode->i_mapping; 791 struct address_space *mapping = orig_inode->i_mapping;
@@ -829,9 +797,11 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
829 long long offs = orig_page_offset << PAGE_CACHE_SHIFT; 797 long long offs = orig_page_offset << PAGE_CACHE_SHIFT;
830 unsigned long blocksize = orig_inode->i_sb->s_blocksize; 798 unsigned long blocksize = orig_inode->i_sb->s_blocksize;
831 unsigned int w_flags = 0; 799 unsigned int w_flags = 0;
832 unsigned int tmp_data_len, data_len; 800 unsigned int tmp_data_size, data_size, replaced_size;
833 void *fsdata; 801 void *fsdata;
834 int ret, i, jblocks; 802 int i, jblocks;
803 int err2 = 0;
804 int replaced_count = 0;
835 int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits; 805 int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;
836 806
837 /* 807 /*
@@ -841,8 +811,8 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
841 jblocks = ext4_writepage_trans_blocks(orig_inode) * 2; 811 jblocks = ext4_writepage_trans_blocks(orig_inode) * 2;
842 handle = ext4_journal_start(orig_inode, jblocks); 812 handle = ext4_journal_start(orig_inode, jblocks);
843 if (IS_ERR(handle)) { 813 if (IS_ERR(handle)) {
844 ret = PTR_ERR(handle); 814 *err = PTR_ERR(handle);
845 return ret; 815 return 0;
846 } 816 }
847 817
848 if (segment_eq(get_fs(), KERNEL_DS)) 818 if (segment_eq(get_fs(), KERNEL_DS))
@@ -858,39 +828,36 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
858 * Just swap data blocks between orig and donor. 828 * Just swap data blocks between orig and donor.
859 */ 829 */
860 if (uninit) { 830 if (uninit) {
861 ret = mext_replace_branches(handle, orig_inode, 831 replaced_count = mext_replace_branches(handle, orig_inode,
862 donor_inode, orig_blk_offset, 832 donor_inode, orig_blk_offset,
863 block_len_in_page); 833 block_len_in_page, err);
864
865 /* Clear the inode cache not to refer to the old data */
866 ext4_ext_invalidate_cache(orig_inode);
867 ext4_ext_invalidate_cache(donor_inode);
868 goto out2; 834 goto out2;
869 } 835 }
870 836
871 offs = (long long)orig_blk_offset << orig_inode->i_blkbits; 837 offs = (long long)orig_blk_offset << orig_inode->i_blkbits;
872 838
873 /* Calculate data_len */ 839 /* Calculate data_size */
874 if ((orig_blk_offset + block_len_in_page - 1) == 840 if ((orig_blk_offset + block_len_in_page - 1) ==
875 ((orig_inode->i_size - 1) >> orig_inode->i_blkbits)) { 841 ((orig_inode->i_size - 1) >> orig_inode->i_blkbits)) {
876 /* Replace the last block */ 842 /* Replace the last block */
877 tmp_data_len = orig_inode->i_size & (blocksize - 1); 843 tmp_data_size = orig_inode->i_size & (blocksize - 1);
878 /* 844 /*
879 * If data_len equal zero, it shows data_len is multiples of 845 * If data_size equal zero, it shows data_size is multiples of
880 * blocksize. So we set appropriate value. 846 * blocksize. So we set appropriate value.
881 */ 847 */
882 if (tmp_data_len == 0) 848 if (tmp_data_size == 0)
883 tmp_data_len = blocksize; 849 tmp_data_size = blocksize;
884 850
885 data_len = tmp_data_len + 851 data_size = tmp_data_size +
886 ((block_len_in_page - 1) << orig_inode->i_blkbits); 852 ((block_len_in_page - 1) << orig_inode->i_blkbits);
887 } else { 853 } else
888 data_len = block_len_in_page << orig_inode->i_blkbits; 854 data_size = block_len_in_page << orig_inode->i_blkbits;
889 } 855
856 replaced_size = data_size;
890 857
891 ret = a_ops->write_begin(o_filp, mapping, offs, data_len, w_flags, 858 *err = a_ops->write_begin(o_filp, mapping, offs, data_size, w_flags,
892 &page, &fsdata); 859 &page, &fsdata);
893 if (unlikely(ret < 0)) 860 if (unlikely(*err < 0))
894 goto out; 861 goto out;
895 862
896 if (!PageUptodate(page)) { 863 if (!PageUptodate(page)) {
@@ -911,14 +878,17 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
911 /* Release old bh and drop refs */ 878 /* Release old bh and drop refs */
912 try_to_release_page(page, 0); 879 try_to_release_page(page, 0);
913 880
914 ret = mext_replace_branches(handle, orig_inode, donor_inode, 881 replaced_count = mext_replace_branches(handle, orig_inode, donor_inode,
915 orig_blk_offset, block_len_in_page); 882 orig_blk_offset, block_len_in_page,
916 if (ret < 0) 883 &err2);
917 goto out; 884 if (err2) {
918 885 if (replaced_count) {
919 /* Clear the inode cache not to refer to the old data */ 886 block_len_in_page = replaced_count;
920 ext4_ext_invalidate_cache(orig_inode); 887 replaced_size =
921 ext4_ext_invalidate_cache(donor_inode); 888 block_len_in_page << orig_inode->i_blkbits;
889 } else
890 goto out;
891 }
922 892
923 if (!page_has_buffers(page)) 893 if (!page_has_buffers(page))
924 create_empty_buffers(page, 1 << orig_inode->i_blkbits, 0); 894 create_empty_buffers(page, 1 << orig_inode->i_blkbits, 0);
@@ -928,16 +898,16 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
928 bh = bh->b_this_page; 898 bh = bh->b_this_page;
929 899
930 for (i = 0; i < block_len_in_page; i++) { 900 for (i = 0; i < block_len_in_page; i++) {
931 ret = ext4_get_block(orig_inode, 901 *err = ext4_get_block(orig_inode,
932 (sector_t)(orig_blk_offset + i), bh, 0); 902 (sector_t)(orig_blk_offset + i), bh, 0);
933 if (ret < 0) 903 if (*err < 0)
934 goto out; 904 goto out;
935 905
936 if (bh->b_this_page != NULL) 906 if (bh->b_this_page != NULL)
937 bh = bh->b_this_page; 907 bh = bh->b_this_page;
938 } 908 }
939 909
940 ret = a_ops->write_end(o_filp, mapping, offs, data_len, data_len, 910 *err = a_ops->write_end(o_filp, mapping, offs, data_size, replaced_size,
941 page, fsdata); 911 page, fsdata);
942 page = NULL; 912 page = NULL;
943 913
@@ -951,7 +921,10 @@ out:
951out2: 921out2:
952 ext4_journal_stop(handle); 922 ext4_journal_stop(handle);
953 923
954 return ret < 0 ? ret : 0; 924 if (err2)
925 *err = err2;
926
927 return replaced_count;
955} 928}
956 929
957/** 930/**
@@ -962,7 +935,6 @@ out2:
962 * @orig_start: logical start offset in block for orig 935 * @orig_start: logical start offset in block for orig
963 * @donor_start: logical start offset in block for donor 936 * @donor_start: logical start offset in block for donor
964 * @len: the number of blocks to be moved 937 * @len: the number of blocks to be moved
965 * @moved_len: moved block length
966 * 938 *
967 * Check the arguments of ext4_move_extents() whether the files can be 939 * Check the arguments of ext4_move_extents() whether the files can be
968 * exchanged with each other. 940 * exchanged with each other.
@@ -970,8 +942,8 @@ out2:
970 */ 942 */
971static int 943static int
972mext_check_arguments(struct inode *orig_inode, 944mext_check_arguments(struct inode *orig_inode,
973 struct inode *donor_inode, __u64 orig_start, 945 struct inode *donor_inode, __u64 orig_start,
974 __u64 donor_start, __u64 *len, __u64 moved_len) 946 __u64 donor_start, __u64 *len)
975{ 947{
976 ext4_lblk_t orig_blocks, donor_blocks; 948 ext4_lblk_t orig_blocks, donor_blocks;
977 unsigned int blkbits = orig_inode->i_blkbits; 949 unsigned int blkbits = orig_inode->i_blkbits;
@@ -985,6 +957,13 @@ mext_check_arguments(struct inode *orig_inode,
985 return -EINVAL; 957 return -EINVAL;
986 } 958 }
987 959
960 if (donor_inode->i_mode & (S_ISUID|S_ISGID)) {
961 ext4_debug("ext4 move extent: suid or sgid is set"
962 " to donor file [ino:orig %lu, donor %lu]\n",
963 orig_inode->i_ino, donor_inode->i_ino);
964 return -EINVAL;
965 }
966
988 /* Ext4 move extent does not support swapfile */ 967 /* Ext4 move extent does not support swapfile */
989 if (IS_SWAPFILE(orig_inode) || IS_SWAPFILE(donor_inode)) { 968 if (IS_SWAPFILE(orig_inode) || IS_SWAPFILE(donor_inode)) {
990 ext4_debug("ext4 move extent: The argument files should " 969 ext4_debug("ext4 move extent: The argument files should "
@@ -1025,13 +1004,6 @@ mext_check_arguments(struct inode *orig_inode,
1025 return -EINVAL; 1004 return -EINVAL;
1026 } 1005 }
1027 1006
1028 if (moved_len) {
1029 ext4_debug("ext4 move extent: moved_len should be 0 "
1030 "[ino:orig %lu, donor %lu]\n", orig_inode->i_ino,
1031 donor_inode->i_ino);
1032 return -EINVAL;
1033 }
1034
1035 if ((orig_start > EXT_MAX_BLOCK) || 1007 if ((orig_start > EXT_MAX_BLOCK) ||
1036 (donor_start > EXT_MAX_BLOCK) || 1008 (donor_start > EXT_MAX_BLOCK) ||
1037 (*len > EXT_MAX_BLOCK) || 1009 (*len > EXT_MAX_BLOCK) ||
@@ -1088,7 +1060,7 @@ mext_check_arguments(struct inode *orig_inode,
1088 } 1060 }
1089 1061
1090 if (!*len) { 1062 if (!*len) {
1091 ext4_debug("ext4 move extent: len shoudld not be 0 " 1063 ext4_debug("ext4 move extent: len should not be 0 "
1092 "[ino:orig %lu, donor %lu]\n", orig_inode->i_ino, 1064 "[ino:orig %lu, donor %lu]\n", orig_inode->i_ino,
1093 donor_inode->i_ino); 1065 donor_inode->i_ino);
1094 return -EINVAL; 1066 return -EINVAL;
@@ -1232,16 +1204,16 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
1232 return -EINVAL; 1204 return -EINVAL;
1233 } 1205 }
1234 1206
1235 /* protect orig and donor against a truncate */ 1207 /* Protect orig and donor inodes against a truncate */
1236 ret1 = mext_inode_double_lock(orig_inode, donor_inode); 1208 ret1 = mext_inode_double_lock(orig_inode, donor_inode);
1237 if (ret1 < 0) 1209 if (ret1 < 0)
1238 return ret1; 1210 return ret1;
1239 1211
1240 mext_double_down_read(orig_inode, donor_inode); 1212 /* Protect extent tree against block allocations via delalloc */
1213 double_down_write_data_sem(orig_inode, donor_inode);
1241 /* Check the filesystem environment whether move_extent can be done */ 1214 /* Check the filesystem environment whether move_extent can be done */
1242 ret1 = mext_check_arguments(orig_inode, donor_inode, orig_start, 1215 ret1 = mext_check_arguments(orig_inode, donor_inode, orig_start,
1243 donor_start, &len, *moved_len); 1216 donor_start, &len);
1244 mext_double_up_read(orig_inode, donor_inode);
1245 if (ret1) 1217 if (ret1)
1246 goto out; 1218 goto out;
1247 1219
@@ -1355,36 +1327,39 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
1355 seq_start = le32_to_cpu(ext_cur->ee_block); 1327 seq_start = le32_to_cpu(ext_cur->ee_block);
1356 rest_blocks = seq_blocks; 1328 rest_blocks = seq_blocks;
1357 1329
1358 /* Discard preallocations of two inodes */ 1330 /*
1359 down_write(&EXT4_I(orig_inode)->i_data_sem); 1331 * Up semaphore to avoid following problems:
1360 ext4_discard_preallocations(orig_inode); 1332 * a. transaction deadlock among ext4_journal_start,
1361 up_write(&EXT4_I(orig_inode)->i_data_sem); 1333 * ->write_begin via pagefault, and jbd2_journal_commit
1362 1334 * b. racing with ->readpage, ->write_begin, and ext4_get_block
1363 down_write(&EXT4_I(donor_inode)->i_data_sem); 1335 * in move_extent_per_page
1364 ext4_discard_preallocations(donor_inode); 1336 */
1365 up_write(&EXT4_I(donor_inode)->i_data_sem); 1337 double_up_write_data_sem(orig_inode, donor_inode);
1366 1338
1367 while (orig_page_offset <= seq_end_page) { 1339 while (orig_page_offset <= seq_end_page) {
1368 1340
1369 /* Swap original branches with new branches */ 1341 /* Swap original branches with new branches */
1370 ret1 = move_extent_per_page(o_filp, donor_inode, 1342 block_len_in_page = move_extent_per_page(
1343 o_filp, donor_inode,
1371 orig_page_offset, 1344 orig_page_offset,
1372 data_offset_in_page, 1345 data_offset_in_page,
1373 block_len_in_page, uninit); 1346 block_len_in_page, uninit,
1374 if (ret1 < 0) 1347 &ret1);
1375 goto out; 1348
1376 orig_page_offset++;
1377 /* Count how many blocks we have exchanged */ 1349 /* Count how many blocks we have exchanged */
1378 *moved_len += block_len_in_page; 1350 *moved_len += block_len_in_page;
1351 if (ret1 < 0)
1352 break;
1379 if (*moved_len > len) { 1353 if (*moved_len > len) {
1380 ext4_error(orig_inode->i_sb, __func__, 1354 ext4_error(orig_inode->i_sb, __func__,
1381 "We replaced blocks too much! " 1355 "We replaced blocks too much! "
1382 "sum of replaced: %llu requested: %llu", 1356 "sum of replaced: %llu requested: %llu",
1383 *moved_len, len); 1357 *moved_len, len);
1384 ret1 = -EIO; 1358 ret1 = -EIO;
1385 goto out; 1359 break;
1386 } 1360 }
1387 1361
1362 orig_page_offset++;
1388 data_offset_in_page = 0; 1363 data_offset_in_page = 0;
1389 rest_blocks -= block_len_in_page; 1364 rest_blocks -= block_len_in_page;
1390 if (rest_blocks > blocks_per_page) 1365 if (rest_blocks > blocks_per_page)
@@ -1393,6 +1368,10 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
1393 block_len_in_page = rest_blocks; 1368 block_len_in_page = rest_blocks;
1394 } 1369 }
1395 1370
1371 double_down_write_data_sem(orig_inode, donor_inode);
1372 if (ret1 < 0)
1373 break;
1374
1396 /* Decrease buffer counter */ 1375 /* Decrease buffer counter */
1397 if (holecheck_path) 1376 if (holecheck_path)
1398 ext4_ext_drop_refs(holecheck_path); 1377 ext4_ext_drop_refs(holecheck_path);
@@ -1414,6 +1393,11 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
1414 1393
1415 } 1394 }
1416out: 1395out:
1396 if (*moved_len) {
1397 ext4_discard_preallocations(orig_inode);
1398 ext4_discard_preallocations(donor_inode);
1399 }
1400
1417 if (orig_path) { 1401 if (orig_path) {
1418 ext4_ext_drop_refs(orig_path); 1402 ext4_ext_drop_refs(orig_path);
1419 kfree(orig_path); 1403 kfree(orig_path);
@@ -1422,7 +1406,7 @@ out:
1422 ext4_ext_drop_refs(holecheck_path); 1406 ext4_ext_drop_refs(holecheck_path);
1423 kfree(holecheck_path); 1407 kfree(holecheck_path);
1424 } 1408 }
1425 1409 double_up_write_data_sem(orig_inode, donor_inode);
1426 ret2 = mext_inode_double_unlock(orig_inode, donor_inode); 1410 ret2 = mext_inode_double_unlock(orig_inode, donor_inode);
1427 1411
1428 if (ret1) 1412 if (ret1)
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 7c8fe80bacdd..17a17e10dd60 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1292,9 +1292,6 @@ errout:
1292 * add_dirent_to_buf will attempt search the directory block for 1292 * add_dirent_to_buf will attempt search the directory block for
1293 * space. It will return -ENOSPC if no space is available, and -EIO 1293 * space. It will return -ENOSPC if no space is available, and -EIO
1294 * and -EEXIST if directory entry already exists. 1294 * and -EEXIST if directory entry already exists.
1295 *
1296 * NOTE! bh is NOT released in the case where ENOSPC is returned. In
1297 * all other cases bh is released.
1298 */ 1295 */
1299static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry, 1296static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
1300 struct inode *inode, struct ext4_dir_entry_2 *de, 1297 struct inode *inode, struct ext4_dir_entry_2 *de,
@@ -1315,14 +1312,10 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
1315 top = bh->b_data + blocksize - reclen; 1312 top = bh->b_data + blocksize - reclen;
1316 while ((char *) de <= top) { 1313 while ((char *) de <= top) {
1317 if (!ext4_check_dir_entry("ext4_add_entry", dir, de, 1314 if (!ext4_check_dir_entry("ext4_add_entry", dir, de,
1318 bh, offset)) { 1315 bh, offset))
1319 brelse(bh);
1320 return -EIO; 1316 return -EIO;
1321 } 1317 if (ext4_match(namelen, name, de))
1322 if (ext4_match(namelen, name, de)) {
1323 brelse(bh);
1324 return -EEXIST; 1318 return -EEXIST;
1325 }
1326 nlen = EXT4_DIR_REC_LEN(de->name_len); 1319 nlen = EXT4_DIR_REC_LEN(de->name_len);
1327 rlen = ext4_rec_len_from_disk(de->rec_len, blocksize); 1320 rlen = ext4_rec_len_from_disk(de->rec_len, blocksize);
1328 if ((de->inode? rlen - nlen: rlen) >= reclen) 1321 if ((de->inode? rlen - nlen: rlen) >= reclen)
@@ -1337,7 +1330,6 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
1337 err = ext4_journal_get_write_access(handle, bh); 1330 err = ext4_journal_get_write_access(handle, bh);
1338 if (err) { 1331 if (err) {
1339 ext4_std_error(dir->i_sb, err); 1332 ext4_std_error(dir->i_sb, err);
1340 brelse(bh);
1341 return err; 1333 return err;
1342 } 1334 }
1343 1335
@@ -1377,7 +1369,6 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
1377 err = ext4_handle_dirty_metadata(handle, dir, bh); 1369 err = ext4_handle_dirty_metadata(handle, dir, bh);
1378 if (err) 1370 if (err)
1379 ext4_std_error(dir->i_sb, err); 1371 ext4_std_error(dir->i_sb, err);
1380 brelse(bh);
1381 return 0; 1372 return 0;
1382} 1373}
1383 1374
@@ -1471,7 +1462,9 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
1471 if (!(de)) 1462 if (!(de))
1472 return retval; 1463 return retval;
1473 1464
1474 return add_dirent_to_buf(handle, dentry, inode, de, bh); 1465 retval = add_dirent_to_buf(handle, dentry, inode, de, bh);
1466 brelse(bh);
1467 return retval;
1475} 1468}
1476 1469
1477/* 1470/*
@@ -1514,16 +1507,14 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
1514 if(!bh) 1507 if(!bh)
1515 return retval; 1508 return retval;
1516 retval = add_dirent_to_buf(handle, dentry, inode, NULL, bh); 1509 retval = add_dirent_to_buf(handle, dentry, inode, NULL, bh);
1517 if (retval != -ENOSPC) 1510 if (retval != -ENOSPC) {
1511 brelse(bh);
1518 return retval; 1512 return retval;
1513 }
1519 1514
1520 if (blocks == 1 && !dx_fallback && 1515 if (blocks == 1 && !dx_fallback &&
1521 EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX)) { 1516 EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_DIR_INDEX))
1522 retval = make_indexed_dir(handle, dentry, inode, bh); 1517 return make_indexed_dir(handle, dentry, inode, bh);
1523 if (retval == -ENOSPC)
1524 brelse(bh);
1525 return retval;
1526 }
1527 brelse(bh); 1518 brelse(bh);
1528 } 1519 }
1529 bh = ext4_append(handle, dir, &block, &retval); 1520 bh = ext4_append(handle, dir, &block, &retval);
@@ -1533,8 +1524,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
1533 de->inode = 0; 1524 de->inode = 0;
1534 de->rec_len = ext4_rec_len_to_disk(blocksize, blocksize); 1525 de->rec_len = ext4_rec_len_to_disk(blocksize, blocksize);
1535 retval = add_dirent_to_buf(handle, dentry, inode, de, bh); 1526 retval = add_dirent_to_buf(handle, dentry, inode, de, bh);
1536 if (retval == -ENOSPC) 1527 brelse(bh);
1537 brelse(bh);
1538 return retval; 1528 return retval;
1539} 1529}
1540 1530
@@ -1568,10 +1558,8 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
1568 goto journal_error; 1558 goto journal_error;
1569 1559
1570 err = add_dirent_to_buf(handle, dentry, inode, NULL, bh); 1560 err = add_dirent_to_buf(handle, dentry, inode, NULL, bh);
1571 if (err != -ENOSPC) { 1561 if (err != -ENOSPC)
1572 bh = NULL;
1573 goto cleanup; 1562 goto cleanup;
1574 }
1575 1563
1576 /* Block full, should compress but for now just split */ 1564 /* Block full, should compress but for now just split */
1577 dxtrace(printk(KERN_DEBUG "using %u of %u node entries\n", 1565 dxtrace(printk(KERN_DEBUG "using %u of %u node entries\n",
@@ -1664,8 +1652,6 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
1664 if (!de) 1652 if (!de)
1665 goto cleanup; 1653 goto cleanup;
1666 err = add_dirent_to_buf(handle, dentry, inode, de, bh); 1654 err = add_dirent_to_buf(handle, dentry, inode, de, bh);
1667 if (err != -ENOSPC)
1668 bh = NULL;
1669 goto cleanup; 1655 goto cleanup;
1670 1656
1671journal_error: 1657journal_error:
@@ -1783,7 +1769,7 @@ static int ext4_create(struct inode *dir, struct dentry *dentry, int mode,
1783retry: 1769retry:
1784 handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + 1770 handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
1785 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + 1771 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
1786 2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb)); 1772 EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
1787 if (IS_ERR(handle)) 1773 if (IS_ERR(handle))
1788 return PTR_ERR(handle); 1774 return PTR_ERR(handle);
1789 1775
@@ -1817,7 +1803,7 @@ static int ext4_mknod(struct inode *dir, struct dentry *dentry,
1817retry: 1803retry:
1818 handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + 1804 handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
1819 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + 1805 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
1820 2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb)); 1806 EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
1821 if (IS_ERR(handle)) 1807 if (IS_ERR(handle))
1822 return PTR_ERR(handle); 1808 return PTR_ERR(handle);
1823 1809
@@ -1854,7 +1840,7 @@ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1854retry: 1840retry:
1855 handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + 1841 handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
1856 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 + 1842 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3 +
1857 2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb)); 1843 EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
1858 if (IS_ERR(handle)) 1844 if (IS_ERR(handle))
1859 return PTR_ERR(handle); 1845 return PTR_ERR(handle);
1860 1846
@@ -2267,7 +2253,7 @@ static int ext4_symlink(struct inode *dir,
2267retry: 2253retry:
2268 handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) + 2254 handle = ext4_journal_start(dir, EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
2269 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 5 + 2255 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 5 +
2270 2*EXT4_QUOTA_INIT_BLOCKS(dir->i_sb)); 2256 EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb));
2271 if (IS_ERR(handle)) 2257 if (IS_ERR(handle))
2272 return PTR_ERR(handle); 2258 return PTR_ERR(handle);
2273 2259
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 3cfc343c41b5..3b2c5541d8a6 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -247,7 +247,7 @@ static int setup_new_group_blocks(struct super_block *sb,
247 goto exit_bh; 247 goto exit_bh;
248 248
249 if (IS_ERR(gdb = bclean(handle, sb, block))) { 249 if (IS_ERR(gdb = bclean(handle, sb, block))) {
250 err = PTR_ERR(bh); 250 err = PTR_ERR(gdb);
251 goto exit_bh; 251 goto exit_bh;
252 } 252 }
253 ext4_handle_dirty_metadata(handle, NULL, gdb); 253 ext4_handle_dirty_metadata(handle, NULL, gdb);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 12e726a7073f..768c111a77ec 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -603,10 +603,6 @@ static void ext4_put_super(struct super_block *sb)
603 if (sb->s_dirt) 603 if (sb->s_dirt)
604 ext4_commit_super(sb, 1); 604 ext4_commit_super(sb, 1);
605 605
606 ext4_release_system_zone(sb);
607 ext4_mb_release(sb);
608 ext4_ext_release(sb);
609 ext4_xattr_put_super(sb);
610 if (sbi->s_journal) { 606 if (sbi->s_journal) {
611 err = jbd2_journal_destroy(sbi->s_journal); 607 err = jbd2_journal_destroy(sbi->s_journal);
612 sbi->s_journal = NULL; 608 sbi->s_journal = NULL;
@@ -614,6 +610,12 @@ static void ext4_put_super(struct super_block *sb)
614 ext4_abort(sb, __func__, 610 ext4_abort(sb, __func__,
615 "Couldn't clean up the journal"); 611 "Couldn't clean up the journal");
616 } 612 }
613
614 ext4_release_system_zone(sb);
615 ext4_mb_release(sb);
616 ext4_ext_release(sb);
617 ext4_xattr_put_super(sb);
618
617 if (!(sb->s_flags & MS_RDONLY)) { 619 if (!(sb->s_flags & MS_RDONLY)) {
618 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 620 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
619 es->s_state = cpu_to_le16(sbi->s_mount_state); 621 es->s_state = cpu_to_le16(sbi->s_mount_state);
@@ -704,6 +706,8 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
704 spin_lock_init(&(ei->i_block_reservation_lock)); 706 spin_lock_init(&(ei->i_block_reservation_lock));
705 INIT_LIST_HEAD(&ei->i_aio_dio_complete_list); 707 INIT_LIST_HEAD(&ei->i_aio_dio_complete_list);
706 ei->cur_aio_dio = NULL; 708 ei->cur_aio_dio = NULL;
709 ei->i_sync_tid = 0;
710 ei->i_datasync_tid = 0;
707 711
708 return &ei->vfs_inode; 712 return &ei->vfs_inode;
709} 713}
@@ -765,9 +769,22 @@ static inline void ext4_show_quota_options(struct seq_file *seq,
765#if defined(CONFIG_QUOTA) 769#if defined(CONFIG_QUOTA)
766 struct ext4_sb_info *sbi = EXT4_SB(sb); 770 struct ext4_sb_info *sbi = EXT4_SB(sb);
767 771
768 if (sbi->s_jquota_fmt) 772 if (sbi->s_jquota_fmt) {
769 seq_printf(seq, ",jqfmt=%s", 773 char *fmtname = "";
770 (sbi->s_jquota_fmt == QFMT_VFS_OLD) ? "vfsold" : "vfsv0"); 774
775 switch (sbi->s_jquota_fmt) {
776 case QFMT_VFS_OLD:
777 fmtname = "vfsold";
778 break;
779 case QFMT_VFS_V0:
780 fmtname = "vfsv0";
781 break;
782 case QFMT_VFS_V1:
783 fmtname = "vfsv1";
784 break;
785 }
786 seq_printf(seq, ",jqfmt=%s", fmtname);
787 }
771 788
772 if (sbi->s_qf_names[USRQUOTA]) 789 if (sbi->s_qf_names[USRQUOTA])
773 seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]); 790 seq_printf(seq, ",usrjquota=%s", sbi->s_qf_names[USRQUOTA]);
@@ -899,6 +916,12 @@ static int ext4_show_options(struct seq_file *seq, struct vfsmount *vfs)
899 if (test_opt(sb, NO_AUTO_DA_ALLOC)) 916 if (test_opt(sb, NO_AUTO_DA_ALLOC))
900 seq_puts(seq, ",noauto_da_alloc"); 917 seq_puts(seq, ",noauto_da_alloc");
901 918
919 if (test_opt(sb, DISCARD))
920 seq_puts(seq, ",discard");
921
922 if (test_opt(sb, NOLOAD))
923 seq_puts(seq, ",norecovery");
924
902 ext4_show_quota_options(seq, sb); 925 ext4_show_quota_options(seq, sb);
903 926
904 return 0; 927 return 0;
@@ -1074,12 +1097,13 @@ enum {
1074 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback, 1097 Opt_abort, Opt_data_journal, Opt_data_ordered, Opt_data_writeback,
1075 Opt_data_err_abort, Opt_data_err_ignore, 1098 Opt_data_err_abort, Opt_data_err_ignore,
1076 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota, 1099 Opt_usrjquota, Opt_grpjquota, Opt_offusrjquota, Opt_offgrpjquota,
1077 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_quota, Opt_noquota, 1100 Opt_jqfmt_vfsold, Opt_jqfmt_vfsv0, Opt_jqfmt_vfsv1, Opt_quota,
1078 Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err, Opt_resize, 1101 Opt_noquota, Opt_ignore, Opt_barrier, Opt_nobarrier, Opt_err,
1079 Opt_usrquota, Opt_grpquota, Opt_i_version, 1102 Opt_resize, Opt_usrquota, Opt_grpquota, Opt_i_version,
1080 Opt_stripe, Opt_delalloc, Opt_nodelalloc, 1103 Opt_stripe, Opt_delalloc, Opt_nodelalloc,
1081 Opt_block_validity, Opt_noblock_validity, 1104 Opt_block_validity, Opt_noblock_validity,
1082 Opt_inode_readahead_blks, Opt_journal_ioprio 1105 Opt_inode_readahead_blks, Opt_journal_ioprio,
1106 Opt_discard, Opt_nodiscard,
1083}; 1107};
1084 1108
1085static const match_table_t tokens = { 1109static const match_table_t tokens = {
@@ -1104,6 +1128,7 @@ static const match_table_t tokens = {
1104 {Opt_acl, "acl"}, 1128 {Opt_acl, "acl"},
1105 {Opt_noacl, "noacl"}, 1129 {Opt_noacl, "noacl"},
1106 {Opt_noload, "noload"}, 1130 {Opt_noload, "noload"},
1131 {Opt_noload, "norecovery"},
1107 {Opt_nobh, "nobh"}, 1132 {Opt_nobh, "nobh"},
1108 {Opt_bh, "bh"}, 1133 {Opt_bh, "bh"},
1109 {Opt_commit, "commit=%u"}, 1134 {Opt_commit, "commit=%u"},
@@ -1125,6 +1150,7 @@ static const match_table_t tokens = {
1125 {Opt_grpjquota, "grpjquota=%s"}, 1150 {Opt_grpjquota, "grpjquota=%s"},
1126 {Opt_jqfmt_vfsold, "jqfmt=vfsold"}, 1151 {Opt_jqfmt_vfsold, "jqfmt=vfsold"},
1127 {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"}, 1152 {Opt_jqfmt_vfsv0, "jqfmt=vfsv0"},
1153 {Opt_jqfmt_vfsv1, "jqfmt=vfsv1"},
1128 {Opt_grpquota, "grpquota"}, 1154 {Opt_grpquota, "grpquota"},
1129 {Opt_noquota, "noquota"}, 1155 {Opt_noquota, "noquota"},
1130 {Opt_quota, "quota"}, 1156 {Opt_quota, "quota"},
@@ -1144,6 +1170,8 @@ static const match_table_t tokens = {
1144 {Opt_auto_da_alloc, "auto_da_alloc=%u"}, 1170 {Opt_auto_da_alloc, "auto_da_alloc=%u"},
1145 {Opt_auto_da_alloc, "auto_da_alloc"}, 1171 {Opt_auto_da_alloc, "auto_da_alloc"},
1146 {Opt_noauto_da_alloc, "noauto_da_alloc"}, 1172 {Opt_noauto_da_alloc, "noauto_da_alloc"},
1173 {Opt_discard, "discard"},
1174 {Opt_nodiscard, "nodiscard"},
1147 {Opt_err, NULL}, 1175 {Opt_err, NULL},
1148}; 1176};
1149 1177
@@ -1300,9 +1328,11 @@ static int parse_options(char *options, struct super_block *sb,
1300 *journal_devnum = option; 1328 *journal_devnum = option;
1301 break; 1329 break;
1302 case Opt_journal_checksum: 1330 case Opt_journal_checksum:
1303 break; /* Kept for backwards compatibility */ 1331 set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM);
1332 break;
1304 case Opt_journal_async_commit: 1333 case Opt_journal_async_commit:
1305 set_opt(sbi->s_mount_opt, JOURNAL_ASYNC_COMMIT); 1334 set_opt(sbi->s_mount_opt, JOURNAL_ASYNC_COMMIT);
1335 set_opt(sbi->s_mount_opt, JOURNAL_CHECKSUM);
1306 break; 1336 break;
1307 case Opt_noload: 1337 case Opt_noload:
1308 set_opt(sbi->s_mount_opt, NOLOAD); 1338 set_opt(sbi->s_mount_opt, NOLOAD);
@@ -1423,6 +1453,9 @@ clear_qf_name:
1423 goto set_qf_format; 1453 goto set_qf_format;
1424 case Opt_jqfmt_vfsv0: 1454 case Opt_jqfmt_vfsv0:
1425 qfmt = QFMT_VFS_V0; 1455 qfmt = QFMT_VFS_V0;
1456 goto set_qf_format;
1457 case Opt_jqfmt_vfsv1:
1458 qfmt = QFMT_VFS_V1;
1426set_qf_format: 1459set_qf_format:
1427 if (sb_any_quota_loaded(sb) && 1460 if (sb_any_quota_loaded(sb) &&
1428 sbi->s_jquota_fmt != qfmt) { 1461 sbi->s_jquota_fmt != qfmt) {
@@ -1465,6 +1498,7 @@ set_qf_format:
1465 case Opt_offgrpjquota: 1498 case Opt_offgrpjquota:
1466 case Opt_jqfmt_vfsold: 1499 case Opt_jqfmt_vfsold:
1467 case Opt_jqfmt_vfsv0: 1500 case Opt_jqfmt_vfsv0:
1501 case Opt_jqfmt_vfsv1:
1468 ext4_msg(sb, KERN_ERR, 1502 ext4_msg(sb, KERN_ERR,
1469 "journaled quota options not supported"); 1503 "journaled quota options not supported");
1470 break; 1504 break;
@@ -1563,6 +1597,12 @@ set_qf_format:
1563 else 1597 else
1564 set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC); 1598 set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC);
1565 break; 1599 break;
1600 case Opt_discard:
1601 set_opt(sbi->s_mount_opt, DISCARD);
1602 break;
1603 case Opt_nodiscard:
1604 clear_opt(sbi->s_mount_opt, DISCARD);
1605 break;
1566 default: 1606 default:
1567 ext4_msg(sb, KERN_ERR, 1607 ext4_msg(sb, KERN_ERR,
1568 "Unrecognized mount option \"%s\" " 1608 "Unrecognized mount option \"%s\" "
@@ -1671,14 +1711,14 @@ static int ext4_fill_flex_info(struct super_block *sb)
1671 size_t size; 1711 size_t size;
1672 int i; 1712 int i;
1673 1713
1674 if (!sbi->s_es->s_log_groups_per_flex) { 1714 sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
1715 groups_per_flex = 1 << sbi->s_log_groups_per_flex;
1716
1717 if (groups_per_flex < 2) {
1675 sbi->s_log_groups_per_flex = 0; 1718 sbi->s_log_groups_per_flex = 0;
1676 return 1; 1719 return 1;
1677 } 1720 }
1678 1721
1679 sbi->s_log_groups_per_flex = sbi->s_es->s_log_groups_per_flex;
1680 groups_per_flex = 1 << sbi->s_log_groups_per_flex;
1681
1682 /* We allocate both existing and potentially added groups */ 1722 /* We allocate both existing and potentially added groups */
1683 flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) + 1723 flex_group_count = ((sbi->s_groups_count + groups_per_flex - 1) +
1684 ((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) << 1724 ((le16_to_cpu(sbi->s_es->s_reserved_gdt_blocks) + 1) <<
@@ -2719,26 +2759,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2719 EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) { 2759 EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) {
2720 if (ext4_load_journal(sb, es, journal_devnum)) 2760 if (ext4_load_journal(sb, es, journal_devnum))
2721 goto failed_mount3; 2761 goto failed_mount3;
2722 if (!(sb->s_flags & MS_RDONLY) &&
2723 EXT4_SB(sb)->s_journal->j_failed_commit) {
2724 ext4_msg(sb, KERN_CRIT, "error: "
2725 "ext4_fill_super: Journal transaction "
2726 "%u is corrupt",
2727 EXT4_SB(sb)->s_journal->j_failed_commit);
2728 if (test_opt(sb, ERRORS_RO)) {
2729 ext4_msg(sb, KERN_CRIT,
2730 "Mounting filesystem read-only");
2731 sb->s_flags |= MS_RDONLY;
2732 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
2733 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
2734 }
2735 if (test_opt(sb, ERRORS_PANIC)) {
2736 EXT4_SB(sb)->s_mount_state |= EXT4_ERROR_FS;
2737 es->s_state |= cpu_to_le16(EXT4_ERROR_FS);
2738 ext4_commit_super(sb, 1);
2739 goto failed_mount4;
2740 }
2741 }
2742 } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) && 2762 } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) &&
2743 EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { 2763 EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) {
2744 ext4_msg(sb, KERN_ERR, "required journal recovery " 2764 ext4_msg(sb, KERN_ERR, "required journal recovery "
@@ -2759,14 +2779,20 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
2759 goto failed_mount4; 2779 goto failed_mount4;
2760 } 2780 }
2761 2781
2762 jbd2_journal_set_features(sbi->s_journal, 2782 if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
2763 JBD2_FEATURE_COMPAT_CHECKSUM, 0, 0); 2783 jbd2_journal_set_features(sbi->s_journal,
2764 if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) 2784 JBD2_FEATURE_COMPAT_CHECKSUM, 0,
2765 jbd2_journal_set_features(sbi->s_journal, 0, 0,
2766 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); 2785 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
2767 else 2786 } else if (test_opt(sb, JOURNAL_CHECKSUM)) {
2787 jbd2_journal_set_features(sbi->s_journal,
2788 JBD2_FEATURE_COMPAT_CHECKSUM, 0, 0);
2768 jbd2_journal_clear_features(sbi->s_journal, 0, 0, 2789 jbd2_journal_clear_features(sbi->s_journal, 0, 0,
2769 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); 2790 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
2791 } else {
2792 jbd2_journal_clear_features(sbi->s_journal,
2793 JBD2_FEATURE_COMPAT_CHECKSUM, 0,
2794 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
2795 }
2770 2796
2771 /* We have now updated the journal if required, so we can 2797 /* We have now updated the journal if required, so we can
2772 * validate the data journaling mode. */ 2798 * validate the data journaling mode. */
@@ -3660,13 +3686,11 @@ static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf)
3660 buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last; 3686 buf->f_blocks = ext4_blocks_count(es) - sbi->s_overhead_last;
3661 buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) - 3687 buf->f_bfree = percpu_counter_sum_positive(&sbi->s_freeblocks_counter) -
3662 percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter); 3688 percpu_counter_sum_positive(&sbi->s_dirtyblocks_counter);
3663 ext4_free_blocks_count_set(es, buf->f_bfree);
3664 buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es); 3689 buf->f_bavail = buf->f_bfree - ext4_r_blocks_count(es);
3665 if (buf->f_bfree < ext4_r_blocks_count(es)) 3690 if (buf->f_bfree < ext4_r_blocks_count(es))
3666 buf->f_bavail = 0; 3691 buf->f_bavail = 0;
3667 buf->f_files = le32_to_cpu(es->s_inodes_count); 3692 buf->f_files = le32_to_cpu(es->s_inodes_count);
3668 buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter); 3693 buf->f_ffree = percpu_counter_sum_positive(&sbi->s_freeinodes_counter);
3669 es->s_free_inodes_count = cpu_to_le32(buf->f_ffree);
3670 buf->f_namelen = EXT4_NAME_LEN; 3694 buf->f_namelen = EXT4_NAME_LEN;
3671 fsid = le64_to_cpup((void *)es->s_uuid) ^ 3695 fsid = le64_to_cpup((void *)es->s_uuid) ^
3672 le64_to_cpup((void *)es->s_uuid + sizeof(u64)); 3696 le64_to_cpup((void *)es->s_uuid + sizeof(u64));
@@ -3958,35 +3982,66 @@ static int ext4_get_sb(struct file_system_type *fs_type, int flags,
3958 return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super,mnt); 3982 return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super,mnt);
3959} 3983}
3960 3984
3961static struct file_system_type ext4_fs_type = { 3985#if !defined(CONTIG_EXT2_FS) && !defined(CONFIG_EXT2_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
3986static struct file_system_type ext2_fs_type = {
3962 .owner = THIS_MODULE, 3987 .owner = THIS_MODULE,
3963 .name = "ext4", 3988 .name = "ext2",
3964 .get_sb = ext4_get_sb, 3989 .get_sb = ext4_get_sb,
3965 .kill_sb = kill_block_super, 3990 .kill_sb = kill_block_super,
3966 .fs_flags = FS_REQUIRES_DEV, 3991 .fs_flags = FS_REQUIRES_DEV,
3967}; 3992};
3968 3993
3969#ifdef CONFIG_EXT4DEV_COMPAT 3994static inline void register_as_ext2(void)
3970static int ext4dev_get_sb(struct file_system_type *fs_type, int flags,
3971 const char *dev_name, void *data,struct vfsmount *mnt)
3972{ 3995{
3973 printk(KERN_WARNING "EXT4-fs (%s): Update your userspace programs " 3996 int err = register_filesystem(&ext2_fs_type);
3974 "to mount using ext4\n", dev_name); 3997 if (err)
3975 printk(KERN_WARNING "EXT4-fs (%s): ext4dev backwards compatibility " 3998 printk(KERN_WARNING
3976 "will go away by 2.6.31\n", dev_name); 3999 "EXT4-fs: Unable to register as ext2 (%d)\n", err);
3977 return get_sb_bdev(fs_type, flags, dev_name, data, ext4_fill_super,mnt);
3978} 4000}
3979 4001
3980static struct file_system_type ext4dev_fs_type = { 4002static inline void unregister_as_ext2(void)
4003{
4004 unregister_filesystem(&ext2_fs_type);
4005}
4006#else
4007static inline void register_as_ext2(void) { }
4008static inline void unregister_as_ext2(void) { }
4009#endif
4010
4011#if !defined(CONTIG_EXT3_FS) && !defined(CONFIG_EXT3_FS_MODULE) && defined(CONFIG_EXT4_USE_FOR_EXT23)
4012static struct file_system_type ext3_fs_type = {
3981 .owner = THIS_MODULE, 4013 .owner = THIS_MODULE,
3982 .name = "ext4dev", 4014 .name = "ext3",
3983 .get_sb = ext4dev_get_sb, 4015 .get_sb = ext4_get_sb,
3984 .kill_sb = kill_block_super, 4016 .kill_sb = kill_block_super,
3985 .fs_flags = FS_REQUIRES_DEV, 4017 .fs_flags = FS_REQUIRES_DEV,
3986}; 4018};
3987MODULE_ALIAS("ext4dev"); 4019
4020static inline void register_as_ext3(void)
4021{
4022 int err = register_filesystem(&ext3_fs_type);
4023 if (err)
4024 printk(KERN_WARNING
4025 "EXT4-fs: Unable to register as ext3 (%d)\n", err);
4026}
4027
4028static inline void unregister_as_ext3(void)
4029{
4030 unregister_filesystem(&ext3_fs_type);
4031}
4032#else
4033static inline void register_as_ext3(void) { }
4034static inline void unregister_as_ext3(void) { }
3988#endif 4035#endif
3989 4036
4037static struct file_system_type ext4_fs_type = {
4038 .owner = THIS_MODULE,
4039 .name = "ext4",
4040 .get_sb = ext4_get_sb,
4041 .kill_sb = kill_block_super,
4042 .fs_flags = FS_REQUIRES_DEV,
4043};
4044
3990static int __init init_ext4_fs(void) 4045static int __init init_ext4_fs(void)
3991{ 4046{
3992 int err; 4047 int err;
@@ -4008,18 +4063,15 @@ static int __init init_ext4_fs(void)
4008 err = init_inodecache(); 4063 err = init_inodecache();
4009 if (err) 4064 if (err)
4010 goto out1; 4065 goto out1;
4066 register_as_ext2();
4067 register_as_ext3();
4011 err = register_filesystem(&ext4_fs_type); 4068 err = register_filesystem(&ext4_fs_type);
4012 if (err) 4069 if (err)
4013 goto out; 4070 goto out;
4014#ifdef CONFIG_EXT4DEV_COMPAT
4015 err = register_filesystem(&ext4dev_fs_type);
4016 if (err) {
4017 unregister_filesystem(&ext4_fs_type);
4018 goto out;
4019 }
4020#endif
4021 return 0; 4071 return 0;
4022out: 4072out:
4073 unregister_as_ext2();
4074 unregister_as_ext3();
4023 destroy_inodecache(); 4075 destroy_inodecache();
4024out1: 4076out1:
4025 exit_ext4_xattr(); 4077 exit_ext4_xattr();
@@ -4035,10 +4087,9 @@ out4:
4035 4087
4036static void __exit exit_ext4_fs(void) 4088static void __exit exit_ext4_fs(void)
4037{ 4089{
4090 unregister_as_ext2();
4091 unregister_as_ext3();
4038 unregister_filesystem(&ext4_fs_type); 4092 unregister_filesystem(&ext4_fs_type);
4039#ifdef CONFIG_EXT4DEV_COMPAT
4040 unregister_filesystem(&ext4dev_fs_type);
4041#endif
4042 destroy_inodecache(); 4093 destroy_inodecache();
4043 exit_ext4_xattr(); 4094 exit_ext4_xattr();
4044 exit_ext4_mballoc(); 4095 exit_ext4_mballoc();
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index fed5b01d7a8d..910bf9a59cb3 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -482,9 +482,10 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
482 ea_bdebug(bh, "refcount now=0; freeing"); 482 ea_bdebug(bh, "refcount now=0; freeing");
483 if (ce) 483 if (ce)
484 mb_cache_entry_free(ce); 484 mb_cache_entry_free(ce);
485 ext4_free_blocks(handle, inode, bh->b_blocknr, 1, 1);
486 get_bh(bh); 485 get_bh(bh);
487 ext4_forget(handle, 1, inode, bh, bh->b_blocknr); 486 ext4_free_blocks(handle, inode, bh, 0, 1,
487 EXT4_FREE_BLOCKS_METADATA |
488 EXT4_FREE_BLOCKS_FORGET);
488 } else { 489 } else {
489 le32_add_cpu(&BHDR(bh)->h_refcount, -1); 490 le32_add_cpu(&BHDR(bh)->h_refcount, -1);
490 error = ext4_handle_dirty_metadata(handle, inode, bh); 491 error = ext4_handle_dirty_metadata(handle, inode, bh);
@@ -832,7 +833,8 @@ inserted:
832 new_bh = sb_getblk(sb, block); 833 new_bh = sb_getblk(sb, block);
833 if (!new_bh) { 834 if (!new_bh) {
834getblk_failed: 835getblk_failed:
835 ext4_free_blocks(handle, inode, block, 1, 1); 836 ext4_free_blocks(handle, inode, 0, block, 1,
837 EXT4_FREE_BLOCKS_METADATA);
836 error = -EIO; 838 error = -EIO;
837 goto cleanup; 839 goto cleanup;
838 } 840 }
@@ -988,6 +990,10 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
988 if (error) 990 if (error)
989 goto cleanup; 991 goto cleanup;
990 992
993 error = ext4_journal_get_write_access(handle, is.iloc.bh);
994 if (error)
995 goto cleanup;
996
991 if (EXT4_I(inode)->i_state & EXT4_STATE_NEW) { 997 if (EXT4_I(inode)->i_state & EXT4_STATE_NEW) {
992 struct ext4_inode *raw_inode = ext4_raw_inode(&is.iloc); 998 struct ext4_inode *raw_inode = ext4_raw_inode(&is.iloc);
993 memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size); 999 memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size);
@@ -1013,9 +1019,6 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
1013 if (flags & XATTR_CREATE) 1019 if (flags & XATTR_CREATE)
1014 goto cleanup; 1020 goto cleanup;
1015 } 1021 }
1016 error = ext4_journal_get_write_access(handle, is.iloc.bh);
1017 if (error)
1018 goto cleanup;
1019 if (!value) { 1022 if (!value) {
1020 if (!is.s.not_found) 1023 if (!is.s.not_found)
1021 error = ext4_xattr_ibody_set(handle, inode, &i, &is); 1024 error = ext4_xattr_ibody_set(handle, inode, &i, &is);