aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-06-08 16:03:35 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-06-08 16:03:35 -0400
commitf8409abdc592e13cefbe4e4a24a84b3d5741e85f (patch)
treeb016b7158eb080d59bf79605958cb746d545f56d /fs/ext4
parentb20dcab9d4589ef9918a13c888c5493945adfc13 (diff)
parentbd9db175dde14b606265e0d37e8319d96fe1a58f (diff)
Merge tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
Pull ext4 updates from Ted Ts'o: "Clean ups and miscellaneous bug fixes, in particular for the new collapse_range and zero_range fallocate functions. In addition, improve the scalability of adding and remove inodes from the orphan list" * tag 'ext4_for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (25 commits) ext4: handle symlink properly with inline_data ext4: fix wrong assert in ext4_mb_normalize_request() ext4: fix zeroing of page during writeback ext4: remove unused local variable "stored" from ext4_readdir(...) ext4: fix ZERO_RANGE test failure in data journalling ext4: reduce contention on s_orphan_lock ext4: use sbi in ext4_orphan_{add|del}() ext4: use EXT_MAX_BLOCKS in ext4_es_can_be_merged() ext4: add missing BUFFER_TRACE before ext4_journal_get_write_access ext4: remove unnecessary double parentheses ext4: do not destroy ext4_groupinfo_caches if ext4_mb_init() fails ext4: make local functions static ext4: fix block bitmap validation when bigalloc, ^flex_bg ext4: fix block bitmap initialization under sparse_super2 ext4: find the group descriptors on a 1k-block bigalloc,meta_bg filesystem ext4: avoid unneeded lookup when xattr name is invalid ext4: fix data integrity sync in ordered mode ext4: remove obsoleted check ext4: add a new spinlock i_raw_lock to protect the ext4's raw inode ext4: fix locking for O_APPEND writes ...
Diffstat (limited to 'fs/ext4')
-rw-r--r--fs/ext4/balloc.c66
-rw-r--r--fs/ext4/dir.c3
-rw-r--r--fs/ext4/ext4.h59
-rw-r--r--fs/ext4/ext4_extents.h22
-rw-r--r--fs/ext4/ext4_jbd2.c7
-rw-r--r--fs/ext4/ext4_jbd2.h4
-rw-r--r--fs/ext4/extents.c236
-rw-r--r--fs/ext4/extents_status.c10
-rw-r--r--fs/ext4/file.c153
-rw-r--r--fs/ext4/inline.c15
-rw-r--r--fs/ext4/inode.c90
-rw-r--r--fs/ext4/mballoc.c8
-rw-r--r--fs/ext4/migrate.c2
-rw-r--r--fs/ext4/mmp.c4
-rw-r--r--fs/ext4/move_extent.c39
-rw-r--r--fs/ext4/namei.c131
-rw-r--r--fs/ext4/page-io.c32
-rw-r--r--fs/ext4/resize.c13
-rw-r--r--fs/ext4/super.c20
-rw-r--r--fs/ext4/xattr.c9
20 files changed, 494 insertions, 429 deletions
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 5c56785007e0..0762d143e252 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -83,9 +83,9 @@ static inline int ext4_block_in_group(struct super_block *sb,
83/* Return the number of clusters used for file system metadata; this 83/* Return the number of clusters used for file system metadata; this
84 * represents the overhead needed by the file system. 84 * represents the overhead needed by the file system.
85 */ 85 */
86unsigned ext4_num_overhead_clusters(struct super_block *sb, 86static unsigned ext4_num_overhead_clusters(struct super_block *sb,
87 ext4_group_t block_group, 87 ext4_group_t block_group,
88 struct ext4_group_desc *gdp) 88 struct ext4_group_desc *gdp)
89{ 89{
90 unsigned num_clusters; 90 unsigned num_clusters;
91 int block_cluster = -1, inode_cluster = -1, itbl_cluster = -1, i, c; 91 int block_cluster = -1, inode_cluster = -1, itbl_cluster = -1, i, c;
@@ -176,9 +176,10 @@ static unsigned int num_clusters_in_group(struct super_block *sb,
176} 176}
177 177
178/* Initializes an uninitialized block bitmap */ 178/* Initializes an uninitialized block bitmap */
179void ext4_init_block_bitmap(struct super_block *sb, struct buffer_head *bh, 179static void ext4_init_block_bitmap(struct super_block *sb,
180 ext4_group_t block_group, 180 struct buffer_head *bh,
181 struct ext4_group_desc *gdp) 181 ext4_group_t block_group,
182 struct ext4_group_desc *gdp)
182{ 183{
183 unsigned int bit, bit_max; 184 unsigned int bit, bit_max;
184 struct ext4_sb_info *sbi = EXT4_SB(sb); 185 struct ext4_sb_info *sbi = EXT4_SB(sb);
@@ -307,6 +308,7 @@ static ext4_fsblk_t ext4_valid_block_bitmap(struct super_block *sb,
307 ext4_group_t block_group, 308 ext4_group_t block_group,
308 struct buffer_head *bh) 309 struct buffer_head *bh)
309{ 310{
311 struct ext4_sb_info *sbi = EXT4_SB(sb);
310 ext4_grpblk_t offset; 312 ext4_grpblk_t offset;
311 ext4_grpblk_t next_zero_bit; 313 ext4_grpblk_t next_zero_bit;
312 ext4_fsblk_t blk; 314 ext4_fsblk_t blk;
@@ -326,14 +328,14 @@ static ext4_fsblk_t ext4_valid_block_bitmap(struct super_block *sb,
326 /* check whether block bitmap block number is set */ 328 /* check whether block bitmap block number is set */
327 blk = ext4_block_bitmap(sb, desc); 329 blk = ext4_block_bitmap(sb, desc);
328 offset = blk - group_first_block; 330 offset = blk - group_first_block;
329 if (!ext4_test_bit(offset, bh->b_data)) 331 if (!ext4_test_bit(EXT4_B2C(sbi, offset), bh->b_data))
330 /* bad block bitmap */ 332 /* bad block bitmap */
331 return blk; 333 return blk;
332 334
333 /* check whether the inode bitmap block number is set */ 335 /* check whether the inode bitmap block number is set */
334 blk = ext4_inode_bitmap(sb, desc); 336 blk = ext4_inode_bitmap(sb, desc);
335 offset = blk - group_first_block; 337 offset = blk - group_first_block;
336 if (!ext4_test_bit(offset, bh->b_data)) 338 if (!ext4_test_bit(EXT4_B2C(sbi, offset), bh->b_data))
337 /* bad block bitmap */ 339 /* bad block bitmap */
338 return blk; 340 return blk;
339 341
@@ -341,18 +343,19 @@ static ext4_fsblk_t ext4_valid_block_bitmap(struct super_block *sb,
341 blk = ext4_inode_table(sb, desc); 343 blk = ext4_inode_table(sb, desc);
342 offset = blk - group_first_block; 344 offset = blk - group_first_block;
343 next_zero_bit = ext4_find_next_zero_bit(bh->b_data, 345 next_zero_bit = ext4_find_next_zero_bit(bh->b_data,
344 offset + EXT4_SB(sb)->s_itb_per_group, 346 EXT4_B2C(sbi, offset + EXT4_SB(sb)->s_itb_per_group),
345 offset); 347 EXT4_B2C(sbi, offset));
346 if (next_zero_bit < offset + EXT4_SB(sb)->s_itb_per_group) 348 if (next_zero_bit <
349 EXT4_B2C(sbi, offset + EXT4_SB(sb)->s_itb_per_group))
347 /* bad bitmap for inode tables */ 350 /* bad bitmap for inode tables */
348 return blk; 351 return blk;
349 return 0; 352 return 0;
350} 353}
351 354
352void ext4_validate_block_bitmap(struct super_block *sb, 355static void ext4_validate_block_bitmap(struct super_block *sb,
353 struct ext4_group_desc *desc, 356 struct ext4_group_desc *desc,
354 ext4_group_t block_group, 357 ext4_group_t block_group,
355 struct buffer_head *bh) 358 struct buffer_head *bh)
356{ 359{
357 ext4_fsblk_t blk; 360 ext4_fsblk_t blk;
358 struct ext4_group_info *grp = ext4_get_group_info(sb, block_group); 361 struct ext4_group_info *grp = ext4_get_group_info(sb, block_group);
@@ -708,16 +711,6 @@ static inline int test_root(ext4_group_t a, int b)
708 } 711 }
709} 712}
710 713
711static int ext4_group_sparse(ext4_group_t group)
712{
713 if (group <= 1)
714 return 1;
715 if (!(group & 1))
716 return 0;
717 return (test_root(group, 7) || test_root(group, 5) ||
718 test_root(group, 3));
719}
720
721/** 714/**
722 * ext4_bg_has_super - number of blocks used by the superblock in group 715 * ext4_bg_has_super - number of blocks used by the superblock in group
723 * @sb: superblock for filesystem 716 * @sb: superblock for filesystem
@@ -728,11 +721,26 @@ static int ext4_group_sparse(ext4_group_t group)
728 */ 721 */
729int ext4_bg_has_super(struct super_block *sb, ext4_group_t group) 722int ext4_bg_has_super(struct super_block *sb, ext4_group_t group)
730{ 723{
731 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, 724 struct ext4_super_block *es = EXT4_SB(sb)->s_es;
732 EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER) && 725
733 !ext4_group_sparse(group)) 726 if (group == 0)
727 return 1;
728 if (EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_SPARSE_SUPER2)) {
729 if (group == le32_to_cpu(es->s_backup_bgs[0]) ||
730 group == le32_to_cpu(es->s_backup_bgs[1]))
731 return 1;
734 return 0; 732 return 0;
735 return 1; 733 }
734 if ((group <= 1) || !EXT4_HAS_RO_COMPAT_FEATURE(sb,
735 EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER))
736 return 1;
737 if (!(group & 1))
738 return 0;
739 if (test_root(group, 3) || (test_root(group, 5)) ||
740 test_root(group, 7))
741 return 1;
742
743 return 0;
736} 744}
737 745
738static unsigned long ext4_bg_num_gdb_meta(struct super_block *sb, 746static unsigned long ext4_bg_num_gdb_meta(struct super_block *sb,
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index d638c57e996e..ef1bed66c14f 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -105,7 +105,7 @@ int __ext4_check_dir_entry(const char *function, unsigned int line,
105static int ext4_readdir(struct file *file, struct dir_context *ctx) 105static int ext4_readdir(struct file *file, struct dir_context *ctx)
106{ 106{
107 unsigned int offset; 107 unsigned int offset;
108 int i, stored; 108 int i;
109 struct ext4_dir_entry_2 *de; 109 struct ext4_dir_entry_2 *de;
110 int err; 110 int err;
111 struct inode *inode = file_inode(file); 111 struct inode *inode = file_inode(file);
@@ -133,7 +133,6 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
133 return ret; 133 return ret;
134 } 134 }
135 135
136 stored = 0;
137 offset = ctx->pos & (sb->s_blocksize - 1); 136 offset = ctx->pos & (sb->s_blocksize - 1);
138 137
139 while (ctx->pos < inode->i_size) { 138 while (ctx->pos < inode->i_size) {
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 66946aa62127..1479e2ae00d2 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -158,7 +158,6 @@ struct ext4_allocation_request {
158#define EXT4_MAP_MAPPED (1 << BH_Mapped) 158#define EXT4_MAP_MAPPED (1 << BH_Mapped)
159#define EXT4_MAP_UNWRITTEN (1 << BH_Unwritten) 159#define EXT4_MAP_UNWRITTEN (1 << BH_Unwritten)
160#define EXT4_MAP_BOUNDARY (1 << BH_Boundary) 160#define EXT4_MAP_BOUNDARY (1 << BH_Boundary)
161#define EXT4_MAP_UNINIT (1 << BH_Uninit)
162/* Sometimes (in the bigalloc case, from ext4_da_get_block_prep) the caller of 161/* Sometimes (in the bigalloc case, from ext4_da_get_block_prep) the caller of
163 * ext4_map_blocks wants to know whether or not the underlying cluster has 162 * ext4_map_blocks wants to know whether or not the underlying cluster has
164 * already been accounted for. EXT4_MAP_FROM_CLUSTER conveys to the caller that 163 * already been accounted for. EXT4_MAP_FROM_CLUSTER conveys to the caller that
@@ -169,7 +168,7 @@ struct ext4_allocation_request {
169#define EXT4_MAP_FROM_CLUSTER (1 << BH_AllocFromCluster) 168#define EXT4_MAP_FROM_CLUSTER (1 << BH_AllocFromCluster)
170#define EXT4_MAP_FLAGS (EXT4_MAP_NEW | EXT4_MAP_MAPPED |\ 169#define EXT4_MAP_FLAGS (EXT4_MAP_NEW | EXT4_MAP_MAPPED |\
171 EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY |\ 170 EXT4_MAP_UNWRITTEN | EXT4_MAP_BOUNDARY |\
172 EXT4_MAP_UNINIT | EXT4_MAP_FROM_CLUSTER) 171 EXT4_MAP_FROM_CLUSTER)
173 172
174struct ext4_map_blocks { 173struct ext4_map_blocks {
175 ext4_fsblk_t m_pblk; 174 ext4_fsblk_t m_pblk;
@@ -184,7 +183,7 @@ struct ext4_map_blocks {
184#define EXT4_IO_END_UNWRITTEN 0x0001 183#define EXT4_IO_END_UNWRITTEN 0x0001
185 184
186/* 185/*
187 * For converting uninitialized extents on a work queue. 'handle' is used for 186 * For converting unwritten extents on a work queue. 'handle' is used for
188 * buffered writeback. 187 * buffered writeback.
189 */ 188 */
190typedef struct ext4_io_end { 189typedef struct ext4_io_end {
@@ -537,26 +536,26 @@ enum {
537/* 536/*
538 * Flags used by ext4_map_blocks() 537 * Flags used by ext4_map_blocks()
539 */ 538 */
540 /* Allocate any needed blocks and/or convert an unitialized 539 /* Allocate any needed blocks and/or convert an unwritten
541 extent to be an initialized ext4 */ 540 extent to be an initialized ext4 */
542#define EXT4_GET_BLOCKS_CREATE 0x0001 541#define EXT4_GET_BLOCKS_CREATE 0x0001
543 /* Request the creation of an unitialized extent */ 542 /* Request the creation of an unwritten extent */
544#define EXT4_GET_BLOCKS_UNINIT_EXT 0x0002 543#define EXT4_GET_BLOCKS_UNWRIT_EXT 0x0002
545#define EXT4_GET_BLOCKS_CREATE_UNINIT_EXT (EXT4_GET_BLOCKS_UNINIT_EXT|\ 544#define EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT (EXT4_GET_BLOCKS_UNWRIT_EXT|\
546 EXT4_GET_BLOCKS_CREATE) 545 EXT4_GET_BLOCKS_CREATE)
547 /* Caller is from the delayed allocation writeout path 546 /* Caller is from the delayed allocation writeout path
548 * finally doing the actual allocation of delayed blocks */ 547 * finally doing the actual allocation of delayed blocks */
549#define EXT4_GET_BLOCKS_DELALLOC_RESERVE 0x0004 548#define EXT4_GET_BLOCKS_DELALLOC_RESERVE 0x0004
550 /* caller is from the direct IO path, request to creation of an 549 /* caller is from the direct IO path, request to creation of an
551 unitialized extents if not allocated, split the uninitialized 550 unwritten extents if not allocated, split the unwritten
552 extent if blocks has been preallocated already*/ 551 extent if blocks has been preallocated already*/
553#define EXT4_GET_BLOCKS_PRE_IO 0x0008 552#define EXT4_GET_BLOCKS_PRE_IO 0x0008
554#define EXT4_GET_BLOCKS_CONVERT 0x0010 553#define EXT4_GET_BLOCKS_CONVERT 0x0010
555#define EXT4_GET_BLOCKS_IO_CREATE_EXT (EXT4_GET_BLOCKS_PRE_IO|\ 554#define EXT4_GET_BLOCKS_IO_CREATE_EXT (EXT4_GET_BLOCKS_PRE_IO|\
556 EXT4_GET_BLOCKS_CREATE_UNINIT_EXT) 555 EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT)
557 /* Convert extent to initialized after IO complete */ 556 /* Convert extent to initialized after IO complete */
558#define EXT4_GET_BLOCKS_IO_CONVERT_EXT (EXT4_GET_BLOCKS_CONVERT|\ 557#define EXT4_GET_BLOCKS_IO_CONVERT_EXT (EXT4_GET_BLOCKS_CONVERT|\
559 EXT4_GET_BLOCKS_CREATE_UNINIT_EXT) 558 EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT)
560 /* Eventual metadata allocation (due to growing extent tree) 559 /* Eventual metadata allocation (due to growing extent tree)
561 * should not fail, so try to use reserved blocks for that.*/ 560 * should not fail, so try to use reserved blocks for that.*/
562#define EXT4_GET_BLOCKS_METADATA_NOFAIL 0x0020 561#define EXT4_GET_BLOCKS_METADATA_NOFAIL 0x0020
@@ -876,6 +875,8 @@ struct ext4_inode_info {
876 struct inode vfs_inode; 875 struct inode vfs_inode;
877 struct jbd2_inode *jinode; 876 struct jbd2_inode *jinode;
878 877
878 spinlock_t i_raw_lock; /* protects updates to the raw inode */
879
879 /* 880 /*
880 * File creation time. Its function is same as that of 881 * File creation time. Its function is same as that of
881 * struct timespec i_{a,c,m}time in the generic inode. 882 * struct timespec i_{a,c,m}time in the generic inode.
@@ -1159,7 +1160,8 @@ struct ext4_super_block {
1159 __le32 s_usr_quota_inum; /* inode for tracking user quota */ 1160 __le32 s_usr_quota_inum; /* inode for tracking user quota */
1160 __le32 s_grp_quota_inum; /* inode for tracking group quota */ 1161 __le32 s_grp_quota_inum; /* inode for tracking group quota */
1161 __le32 s_overhead_clusters; /* overhead blocks/clusters in fs */ 1162 __le32 s_overhead_clusters; /* overhead blocks/clusters in fs */
1162 __le32 s_reserved[108]; /* Padding to the end of the block */ 1163 __le32 s_backup_bgs[2]; /* groups with sparse_super2 SBs */
1164 __le32 s_reserved[106]; /* Padding to the end of the block */
1163 __le32 s_checksum; /* crc32c(superblock) */ 1165 __le32 s_checksum; /* crc32c(superblock) */
1164}; 1166};
1165 1167
@@ -1505,6 +1507,7 @@ static inline void ext4_clear_state_flags(struct ext4_inode_info *ei)
1505#define EXT4_FEATURE_COMPAT_EXT_ATTR 0x0008 1507#define EXT4_FEATURE_COMPAT_EXT_ATTR 0x0008
1506#define EXT4_FEATURE_COMPAT_RESIZE_INODE 0x0010 1508#define EXT4_FEATURE_COMPAT_RESIZE_INODE 0x0010
1507#define EXT4_FEATURE_COMPAT_DIR_INDEX 0x0020 1509#define EXT4_FEATURE_COMPAT_DIR_INDEX 0x0020
1510#define EXT4_FEATURE_COMPAT_SPARSE_SUPER2 0x0200
1508 1511
1509#define EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001 1512#define EXT4_FEATURE_RO_COMPAT_SPARSE_SUPER 0x0001
1510#define EXT4_FEATURE_RO_COMPAT_LARGE_FILE 0x0002 1513#define EXT4_FEATURE_RO_COMPAT_LARGE_FILE 0x0002
@@ -1953,10 +1956,6 @@ extern void ext4_get_group_no_and_offset(struct super_block *sb,
1953extern ext4_group_t ext4_get_group_number(struct super_block *sb, 1956extern ext4_group_t ext4_get_group_number(struct super_block *sb,
1954 ext4_fsblk_t block); 1957 ext4_fsblk_t block);
1955 1958
1956extern void ext4_validate_block_bitmap(struct super_block *sb,
1957 struct ext4_group_desc *desc,
1958 ext4_group_t block_group,
1959 struct buffer_head *bh);
1960extern unsigned int ext4_block_group(struct super_block *sb, 1959extern unsigned int ext4_block_group(struct super_block *sb,
1961 ext4_fsblk_t blocknr); 1960 ext4_fsblk_t blocknr);
1962extern ext4_grpblk_t ext4_block_group_offset(struct super_block *sb, 1961extern ext4_grpblk_t ext4_block_group_offset(struct super_block *sb,
@@ -1985,16 +1984,9 @@ extern int ext4_wait_block_bitmap(struct super_block *sb,
1985 struct buffer_head *bh); 1984 struct buffer_head *bh);
1986extern struct buffer_head *ext4_read_block_bitmap(struct super_block *sb, 1985extern struct buffer_head *ext4_read_block_bitmap(struct super_block *sb,
1987 ext4_group_t block_group); 1986 ext4_group_t block_group);
1988extern void ext4_init_block_bitmap(struct super_block *sb,
1989 struct buffer_head *bh,
1990 ext4_group_t group,
1991 struct ext4_group_desc *desc);
1992extern unsigned ext4_free_clusters_after_init(struct super_block *sb, 1987extern unsigned ext4_free_clusters_after_init(struct super_block *sb,
1993 ext4_group_t block_group, 1988 ext4_group_t block_group,
1994 struct ext4_group_desc *gdp); 1989 struct ext4_group_desc *gdp);
1995extern unsigned ext4_num_overhead_clusters(struct super_block *sb,
1996 ext4_group_t block_group,
1997 struct ext4_group_desc *gdp);
1998ext4_fsblk_t ext4_inode_to_goal_block(struct inode *); 1990ext4_fsblk_t ext4_inode_to_goal_block(struct inode *);
1999 1991
2000/* dir.c */ 1992/* dir.c */
@@ -2137,8 +2129,6 @@ extern int ext4_alloc_da_blocks(struct inode *inode);
2137extern void ext4_set_aops(struct inode *inode); 2129extern void ext4_set_aops(struct inode *inode);
2138extern int ext4_writepage_trans_blocks(struct inode *); 2130extern int ext4_writepage_trans_blocks(struct inode *);
2139extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks); 2131extern int ext4_chunk_trans_blocks(struct inode *, int nrblocks);
2140extern int ext4_block_truncate_page(handle_t *handle,
2141 struct address_space *mapping, loff_t from);
2142extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode, 2132extern int ext4_zero_partial_blocks(handle_t *handle, struct inode *inode,
2143 loff_t lstart, loff_t lend); 2133 loff_t lstart, loff_t lend);
2144extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); 2134extern int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
@@ -2198,8 +2188,6 @@ extern int ext4_resize_fs(struct super_block *sb, ext4_fsblk_t n_blocks_count);
2198 2188
2199/* super.c */ 2189/* super.c */
2200extern int ext4_calculate_overhead(struct super_block *sb); 2190extern int ext4_calculate_overhead(struct super_block *sb);
2201extern int ext4_superblock_csum_verify(struct super_block *sb,
2202 struct ext4_super_block *es);
2203extern void ext4_superblock_csum_set(struct super_block *sb); 2191extern void ext4_superblock_csum_set(struct super_block *sb);
2204extern void *ext4_kvmalloc(size_t size, gfp_t flags); 2192extern void *ext4_kvmalloc(size_t size, gfp_t flags);
2205extern void *ext4_kvzalloc(size_t size, gfp_t flags); 2193extern void *ext4_kvzalloc(size_t size, gfp_t flags);
@@ -2571,19 +2559,11 @@ extern const struct file_operations ext4_dir_operations;
2571extern const struct inode_operations ext4_file_inode_operations; 2559extern const struct inode_operations ext4_file_inode_operations;
2572extern const struct file_operations ext4_file_operations; 2560extern const struct file_operations ext4_file_operations;
2573extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin); 2561extern loff_t ext4_llseek(struct file *file, loff_t offset, int origin);
2574extern void ext4_unwritten_wait(struct inode *inode);
2575 2562
2576/* inline.c */ 2563/* inline.c */
2577extern int ext4_has_inline_data(struct inode *inode); 2564extern int ext4_has_inline_data(struct inode *inode);
2578extern int ext4_get_inline_size(struct inode *inode);
2579extern int ext4_get_max_inline_size(struct inode *inode); 2565extern int ext4_get_max_inline_size(struct inode *inode);
2580extern int ext4_find_inline_data_nolock(struct inode *inode); 2566extern int ext4_find_inline_data_nolock(struct inode *inode);
2581extern void ext4_write_inline_data(struct inode *inode,
2582 struct ext4_iloc *iloc,
2583 void *buffer, loff_t pos,
2584 unsigned int len);
2585extern int ext4_prepare_inline_data(handle_t *handle, struct inode *inode,
2586 unsigned int len);
2587extern int ext4_init_inline_data(handle_t *handle, struct inode *inode, 2567extern int ext4_init_inline_data(handle_t *handle, struct inode *inode,
2588 unsigned int len); 2568 unsigned int len);
2589extern int ext4_destroy_inline_data(handle_t *handle, struct inode *inode); 2569extern int ext4_destroy_inline_data(handle_t *handle, struct inode *inode);
@@ -2771,23 +2751,20 @@ extern void ext4_io_submit(struct ext4_io_submit *io);
2771extern int ext4_bio_write_page(struct ext4_io_submit *io, 2751extern int ext4_bio_write_page(struct ext4_io_submit *io,
2772 struct page *page, 2752 struct page *page,
2773 int len, 2753 int len,
2774 struct writeback_control *wbc); 2754 struct writeback_control *wbc,
2755 bool keep_towrite);
2775 2756
2776/* mmp.c */ 2757/* mmp.c */
2777extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t); 2758extern int ext4_multi_mount_protect(struct super_block *, ext4_fsblk_t);
2778extern void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp);
2779extern int ext4_mmp_csum_verify(struct super_block *sb,
2780 struct mmp_struct *mmp);
2781 2759
2782/* 2760/*
2783 * Note that these flags will never ever appear in a buffer_head's state flag. 2761 * Note that these flags will never ever appear in a buffer_head's state flag.
2784 * See EXT4_MAP_... to see where this is used. 2762 * See EXT4_MAP_... to see where this is used.
2785 */ 2763 */
2786enum ext4_state_bits { 2764enum ext4_state_bits {
2787 BH_Uninit /* blocks are allocated but uninitialized on disk */ 2765 BH_AllocFromCluster /* allocated blocks were part of already
2788 = BH_JBDPrivateStart,
2789 BH_AllocFromCluster, /* allocated blocks were part of already
2790 * allocated cluster. */ 2766 * allocated cluster. */
2767 = BH_JBDPrivateStart
2791}; 2768};
2792 2769
2793/* 2770/*
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index 5074fe23f19e..a867f5ca9991 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -137,21 +137,21 @@ struct ext4_ext_path {
137 * EXT_INIT_MAX_LEN is the maximum number of blocks we can have in an 137 * EXT_INIT_MAX_LEN is the maximum number of blocks we can have in an
138 * initialized extent. This is 2^15 and not (2^16 - 1), since we use the 138 * initialized extent. This is 2^15 and not (2^16 - 1), since we use the
139 * MSB of ee_len field in the extent datastructure to signify if this 139 * MSB of ee_len field in the extent datastructure to signify if this
140 * particular extent is an initialized extent or an uninitialized (i.e. 140 * particular extent is an initialized extent or an unwritten (i.e.
141 * preallocated). 141 * preallocated).
142 * EXT_UNINIT_MAX_LEN is the maximum number of blocks we can have in an 142 * EXT_UNWRITTEN_MAX_LEN is the maximum number of blocks we can have in an
143 * uninitialized extent. 143 * unwritten extent.
144 * If ee_len is <= 0x8000, it is an initialized extent. Otherwise, it is an 144 * If ee_len is <= 0x8000, it is an initialized extent. Otherwise, it is an
145 * uninitialized one. In other words, if MSB of ee_len is set, it is an 145 * unwritten one. In other words, if MSB of ee_len is set, it is an
146 * uninitialized extent with only one special scenario when ee_len = 0x8000. 146 * unwritten extent with only one special scenario when ee_len = 0x8000.
147 * In this case we can not have an uninitialized extent of zero length and 147 * In this case we can not have an unwritten extent of zero length and
148 * thus we make it as a special case of initialized extent with 0x8000 length. 148 * thus we make it as a special case of initialized extent with 0x8000 length.
149 * This way we get better extent-to-group alignment for initialized extents. 149 * This way we get better extent-to-group alignment for initialized extents.
150 * Hence, the maximum number of blocks we can have in an *initialized* 150 * Hence, the maximum number of blocks we can have in an *initialized*
151 * extent is 2^15 (32768) and in an *uninitialized* extent is 2^15-1 (32767). 151 * extent is 2^15 (32768) and in an *unwritten* extent is 2^15-1 (32767).
152 */ 152 */
153#define EXT_INIT_MAX_LEN (1UL << 15) 153#define EXT_INIT_MAX_LEN (1UL << 15)
154#define EXT_UNINIT_MAX_LEN (EXT_INIT_MAX_LEN - 1) 154#define EXT_UNWRITTEN_MAX_LEN (EXT_INIT_MAX_LEN - 1)
155 155
156 156
157#define EXT_FIRST_EXTENT(__hdr__) \ 157#define EXT_FIRST_EXTENT(__hdr__) \
@@ -187,14 +187,14 @@ static inline unsigned short ext_depth(struct inode *inode)
187 return le16_to_cpu(ext_inode_hdr(inode)->eh_depth); 187 return le16_to_cpu(ext_inode_hdr(inode)->eh_depth);
188} 188}
189 189
190static inline void ext4_ext_mark_uninitialized(struct ext4_extent *ext) 190static inline void ext4_ext_mark_unwritten(struct ext4_extent *ext)
191{ 191{
192 /* We can not have an uninitialized extent of zero length! */ 192 /* We can not have an unwritten extent of zero length! */
193 BUG_ON((le16_to_cpu(ext->ee_len) & ~EXT_INIT_MAX_LEN) == 0); 193 BUG_ON((le16_to_cpu(ext->ee_len) & ~EXT_INIT_MAX_LEN) == 0);
194 ext->ee_len |= cpu_to_le16(EXT_INIT_MAX_LEN); 194 ext->ee_len |= cpu_to_le16(EXT_INIT_MAX_LEN);
195} 195}
196 196
197static inline int ext4_ext_is_uninitialized(struct ext4_extent *ext) 197static inline int ext4_ext_is_unwritten(struct ext4_extent *ext)
198{ 198{
199 /* Extent with ee_len of 0x8000 is treated as an initialized extent */ 199 /* Extent with ee_len of 0x8000 is treated as an initialized extent */
200 return (le16_to_cpu(ext->ee_len) > EXT_INIT_MAX_LEN); 200 return (le16_to_cpu(ext->ee_len) > EXT_INIT_MAX_LEN);
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index c3fb607413ed..0074e0d23d6e 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -122,9 +122,10 @@ handle_t *__ext4_journal_start_reserved(handle_t *handle, unsigned int line,
122 return handle; 122 return handle;
123} 123}
124 124
125void ext4_journal_abort_handle(const char *caller, unsigned int line, 125static void ext4_journal_abort_handle(const char *caller, unsigned int line,
126 const char *err_fn, struct buffer_head *bh, 126 const char *err_fn,
127 handle_t *handle, int err) 127 struct buffer_head *bh,
128 handle_t *handle, int err)
128{ 129{
129 char nbuf[16]; 130 char nbuf[16];
130 const char *errstr = ext4_decode_error(NULL, err, nbuf); 131 const char *errstr = ext4_decode_error(NULL, err, nbuf);
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index 81cfefa9dc0c..17c00ff202f2 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -231,10 +231,6 @@ int ext4_mark_inode_dirty(handle_t *handle, struct inode *inode);
231/* 231/*
232 * Wrapper functions with which ext4 calls into JBD. 232 * Wrapper functions with which ext4 calls into JBD.
233 */ 233 */
234void ext4_journal_abort_handle(const char *caller, unsigned int line,
235 const char *err_fn,
236 struct buffer_head *bh, handle_t *handle, int err);
237
238int __ext4_journal_get_write_access(const char *where, unsigned int line, 234int __ext4_journal_get_write_access(const char *where, unsigned int line,
239 handle_t *handle, struct buffer_head *bh); 235 handle_t *handle, struct buffer_head *bh);
240 236
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 01b0c208f625..4da228a0e6d0 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -50,8 +50,8 @@
50 */ 50 */
51#define EXT4_EXT_MAY_ZEROOUT 0x1 /* safe to zeroout if split fails \ 51#define EXT4_EXT_MAY_ZEROOUT 0x1 /* safe to zeroout if split fails \
52 due to ENOSPC */ 52 due to ENOSPC */
53#define EXT4_EXT_MARK_UNINIT1 0x2 /* mark first half uninitialized */ 53#define EXT4_EXT_MARK_UNWRIT1 0x2 /* mark first half unwritten */
54#define EXT4_EXT_MARK_UNINIT2 0x4 /* mark second half uninitialized */ 54#define EXT4_EXT_MARK_UNWRIT2 0x4 /* mark second half unwritten */
55 55
56#define EXT4_EXT_DATA_VALID1 0x8 /* first half contains valid data */ 56#define EXT4_EXT_DATA_VALID1 0x8 /* first half contains valid data */
57#define EXT4_EXT_DATA_VALID2 0x10 /* second half contains valid data */ 57#define EXT4_EXT_DATA_VALID2 0x10 /* second half contains valid data */
@@ -143,6 +143,7 @@ static int ext4_ext_get_access(handle_t *handle, struct inode *inode,
143{ 143{
144 if (path->p_bh) { 144 if (path->p_bh) {
145 /* path points to block */ 145 /* path points to block */
146 BUFFER_TRACE(path->p_bh, "get_write_access");
146 return ext4_journal_get_write_access(handle, path->p_bh); 147 return ext4_journal_get_write_access(handle, path->p_bh);
147 } 148 }
148 /* path points to leaf/index in inode body */ 149 /* path points to leaf/index in inode body */
@@ -524,7 +525,7 @@ __read_extent_tree_block(const char *function, unsigned int line,
524 lblk - prev, ~0, 525 lblk - prev, ~0,
525 EXTENT_STATUS_HOLE); 526 EXTENT_STATUS_HOLE);
526 527
527 if (ext4_ext_is_uninitialized(ex)) 528 if (ext4_ext_is_unwritten(ex))
528 status = EXTENT_STATUS_UNWRITTEN; 529 status = EXTENT_STATUS_UNWRITTEN;
529 ext4_es_cache_extent(inode, lblk, len, 530 ext4_es_cache_extent(inode, lblk, len,
530 ext4_ext_pblock(ex), status); 531 ext4_ext_pblock(ex), status);
@@ -620,7 +621,7 @@ static void ext4_ext_show_path(struct inode *inode, struct ext4_ext_path *path)
620 } else if (path->p_ext) { 621 } else if (path->p_ext) {
621 ext_debug(" %d:[%d]%d:%llu ", 622 ext_debug(" %d:[%d]%d:%llu ",
622 le32_to_cpu(path->p_ext->ee_block), 623 le32_to_cpu(path->p_ext->ee_block),
623 ext4_ext_is_uninitialized(path->p_ext), 624 ext4_ext_is_unwritten(path->p_ext),
624 ext4_ext_get_actual_len(path->p_ext), 625 ext4_ext_get_actual_len(path->p_ext),
625 ext4_ext_pblock(path->p_ext)); 626 ext4_ext_pblock(path->p_ext));
626 } else 627 } else
@@ -646,7 +647,7 @@ static void ext4_ext_show_leaf(struct inode *inode, struct ext4_ext_path *path)
646 647
647 for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) { 648 for (i = 0; i < le16_to_cpu(eh->eh_entries); i++, ex++) {
648 ext_debug("%d:[%d]%d:%llu ", le32_to_cpu(ex->ee_block), 649 ext_debug("%d:[%d]%d:%llu ", le32_to_cpu(ex->ee_block),
649 ext4_ext_is_uninitialized(ex), 650 ext4_ext_is_unwritten(ex),
650 ext4_ext_get_actual_len(ex), ext4_ext_pblock(ex)); 651 ext4_ext_get_actual_len(ex), ext4_ext_pblock(ex));
651 } 652 }
652 ext_debug("\n"); 653 ext_debug("\n");
@@ -677,7 +678,7 @@ static void ext4_ext_show_move(struct inode *inode, struct ext4_ext_path *path,
677 ext_debug("move %d:%llu:[%d]%d in new leaf %llu\n", 678 ext_debug("move %d:%llu:[%d]%d in new leaf %llu\n",
678 le32_to_cpu(ex->ee_block), 679 le32_to_cpu(ex->ee_block),
679 ext4_ext_pblock(ex), 680 ext4_ext_pblock(ex),
680 ext4_ext_is_uninitialized(ex), 681 ext4_ext_is_unwritten(ex),
681 ext4_ext_get_actual_len(ex), 682 ext4_ext_get_actual_len(ex),
682 newblock); 683 newblock);
683 ex++; 684 ex++;
@@ -802,7 +803,7 @@ ext4_ext_binsearch(struct inode *inode,
802 ext_debug(" -> %d:%llu:[%d]%d ", 803 ext_debug(" -> %d:%llu:[%d]%d ",
803 le32_to_cpu(path->p_ext->ee_block), 804 le32_to_cpu(path->p_ext->ee_block),
804 ext4_ext_pblock(path->p_ext), 805 ext4_ext_pblock(path->p_ext),
805 ext4_ext_is_uninitialized(path->p_ext), 806 ext4_ext_is_unwritten(path->p_ext),
806 ext4_ext_get_actual_len(path->p_ext)); 807 ext4_ext_get_actual_len(path->p_ext));
807 808
808#ifdef CHECK_BINSEARCH 809#ifdef CHECK_BINSEARCH
@@ -1686,11 +1687,11 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
1686 1687
1687 /* 1688 /*
1688 * Make sure that both extents are initialized. We don't merge 1689 * Make sure that both extents are initialized. We don't merge
1689 * uninitialized extents so that we can be sure that end_io code has 1690 * unwritten extents so that we can be sure that end_io code has
1690 * the extent that was written properly split out and conversion to 1691 * the extent that was written properly split out and conversion to
1691 * initialized is trivial. 1692 * initialized is trivial.
1692 */ 1693 */
1693 if (ext4_ext_is_uninitialized(ex1) != ext4_ext_is_uninitialized(ex2)) 1694 if (ext4_ext_is_unwritten(ex1) != ext4_ext_is_unwritten(ex2))
1694 return 0; 1695 return 0;
1695 1696
1696 ext1_ee_len = ext4_ext_get_actual_len(ex1); 1697 ext1_ee_len = ext4_ext_get_actual_len(ex1);
@@ -1707,10 +1708,10 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
1707 */ 1708 */
1708 if (ext1_ee_len + ext2_ee_len > EXT_INIT_MAX_LEN) 1709 if (ext1_ee_len + ext2_ee_len > EXT_INIT_MAX_LEN)
1709 return 0; 1710 return 0;
1710 if (ext4_ext_is_uninitialized(ex1) && 1711 if (ext4_ext_is_unwritten(ex1) &&
1711 (ext4_test_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN) || 1712 (ext4_test_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN) ||
1712 atomic_read(&EXT4_I(inode)->i_unwritten) || 1713 atomic_read(&EXT4_I(inode)->i_unwritten) ||
1713 (ext1_ee_len + ext2_ee_len > EXT_UNINIT_MAX_LEN))) 1714 (ext1_ee_len + ext2_ee_len > EXT_UNWRITTEN_MAX_LEN)))
1714 return 0; 1715 return 0;
1715#ifdef AGGRESSIVE_TEST 1716#ifdef AGGRESSIVE_TEST
1716 if (ext1_ee_len >= 4) 1717 if (ext1_ee_len >= 4)
@@ -1735,7 +1736,7 @@ static int ext4_ext_try_to_merge_right(struct inode *inode,
1735{ 1736{
1736 struct ext4_extent_header *eh; 1737 struct ext4_extent_header *eh;
1737 unsigned int depth, len; 1738 unsigned int depth, len;
1738 int merge_done = 0, uninit; 1739 int merge_done = 0, unwritten;
1739 1740
1740 depth = ext_depth(inode); 1741 depth = ext_depth(inode);
1741 BUG_ON(path[depth].p_hdr == NULL); 1742 BUG_ON(path[depth].p_hdr == NULL);
@@ -1745,11 +1746,11 @@ static int ext4_ext_try_to_merge_right(struct inode *inode,
1745 if (!ext4_can_extents_be_merged(inode, ex, ex + 1)) 1746 if (!ext4_can_extents_be_merged(inode, ex, ex + 1))
1746 break; 1747 break;
1747 /* merge with next extent! */ 1748 /* merge with next extent! */
1748 uninit = ext4_ext_is_uninitialized(ex); 1749 unwritten = ext4_ext_is_unwritten(ex);
1749 ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) 1750 ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
1750 + ext4_ext_get_actual_len(ex + 1)); 1751 + ext4_ext_get_actual_len(ex + 1));
1751 if (uninit) 1752 if (unwritten)
1752 ext4_ext_mark_uninitialized(ex); 1753 ext4_ext_mark_unwritten(ex);
1753 1754
1754 if (ex + 1 < EXT_LAST_EXTENT(eh)) { 1755 if (ex + 1 < EXT_LAST_EXTENT(eh)) {
1755 len = (EXT_LAST_EXTENT(eh) - ex - 1) 1756 len = (EXT_LAST_EXTENT(eh) - ex - 1)
@@ -1903,7 +1904,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
1903 struct ext4_ext_path *npath = NULL; 1904 struct ext4_ext_path *npath = NULL;
1904 int depth, len, err; 1905 int depth, len, err;
1905 ext4_lblk_t next; 1906 ext4_lblk_t next;
1906 int mb_flags = 0, uninit; 1907 int mb_flags = 0, unwritten;
1907 1908
1908 if (unlikely(ext4_ext_get_actual_len(newext) == 0)) { 1909 if (unlikely(ext4_ext_get_actual_len(newext) == 0)) {
1909 EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0"); 1910 EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0");
@@ -1943,21 +1944,21 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
1943 if (ext4_can_extents_be_merged(inode, ex, newext)) { 1944 if (ext4_can_extents_be_merged(inode, ex, newext)) {
1944 ext_debug("append [%d]%d block to %u:[%d]%d" 1945 ext_debug("append [%d]%d block to %u:[%d]%d"
1945 "(from %llu)\n", 1946 "(from %llu)\n",
1946 ext4_ext_is_uninitialized(newext), 1947 ext4_ext_is_unwritten(newext),
1947 ext4_ext_get_actual_len(newext), 1948 ext4_ext_get_actual_len(newext),
1948 le32_to_cpu(ex->ee_block), 1949 le32_to_cpu(ex->ee_block),
1949 ext4_ext_is_uninitialized(ex), 1950 ext4_ext_is_unwritten(ex),
1950 ext4_ext_get_actual_len(ex), 1951 ext4_ext_get_actual_len(ex),
1951 ext4_ext_pblock(ex)); 1952 ext4_ext_pblock(ex));
1952 err = ext4_ext_get_access(handle, inode, 1953 err = ext4_ext_get_access(handle, inode,
1953 path + depth); 1954 path + depth);
1954 if (err) 1955 if (err)
1955 return err; 1956 return err;
1956 uninit = ext4_ext_is_uninitialized(ex); 1957 unwritten = ext4_ext_is_unwritten(ex);
1957 ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) 1958 ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
1958 + ext4_ext_get_actual_len(newext)); 1959 + ext4_ext_get_actual_len(newext));
1959 if (uninit) 1960 if (unwritten)
1960 ext4_ext_mark_uninitialized(ex); 1961 ext4_ext_mark_unwritten(ex);
1961 eh = path[depth].p_hdr; 1962 eh = path[depth].p_hdr;
1962 nearex = ex; 1963 nearex = ex;
1963 goto merge; 1964 goto merge;
@@ -1969,10 +1970,10 @@ prepend:
1969 ext_debug("prepend %u[%d]%d block to %u:[%d]%d" 1970 ext_debug("prepend %u[%d]%d block to %u:[%d]%d"
1970 "(from %llu)\n", 1971 "(from %llu)\n",
1971 le32_to_cpu(newext->ee_block), 1972 le32_to_cpu(newext->ee_block),
1972 ext4_ext_is_uninitialized(newext), 1973 ext4_ext_is_unwritten(newext),
1973 ext4_ext_get_actual_len(newext), 1974 ext4_ext_get_actual_len(newext),
1974 le32_to_cpu(ex->ee_block), 1975 le32_to_cpu(ex->ee_block),
1975 ext4_ext_is_uninitialized(ex), 1976 ext4_ext_is_unwritten(ex),
1976 ext4_ext_get_actual_len(ex), 1977 ext4_ext_get_actual_len(ex),
1977 ext4_ext_pblock(ex)); 1978 ext4_ext_pblock(ex));
1978 err = ext4_ext_get_access(handle, inode, 1979 err = ext4_ext_get_access(handle, inode,
@@ -1980,13 +1981,13 @@ prepend:
1980 if (err) 1981 if (err)
1981 return err; 1982 return err;
1982 1983
1983 uninit = ext4_ext_is_uninitialized(ex); 1984 unwritten = ext4_ext_is_unwritten(ex);
1984 ex->ee_block = newext->ee_block; 1985 ex->ee_block = newext->ee_block;
1985 ext4_ext_store_pblock(ex, ext4_ext_pblock(newext)); 1986 ext4_ext_store_pblock(ex, ext4_ext_pblock(newext));
1986 ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex) 1987 ex->ee_len = cpu_to_le16(ext4_ext_get_actual_len(ex)
1987 + ext4_ext_get_actual_len(newext)); 1988 + ext4_ext_get_actual_len(newext));
1988 if (uninit) 1989 if (unwritten)
1989 ext4_ext_mark_uninitialized(ex); 1990 ext4_ext_mark_unwritten(ex);
1990 eh = path[depth].p_hdr; 1991 eh = path[depth].p_hdr;
1991 nearex = ex; 1992 nearex = ex;
1992 goto merge; 1993 goto merge;
@@ -2046,7 +2047,7 @@ has_space:
2046 ext_debug("first extent in the leaf: %u:%llu:[%d]%d\n", 2047 ext_debug("first extent in the leaf: %u:%llu:[%d]%d\n",
2047 le32_to_cpu(newext->ee_block), 2048 le32_to_cpu(newext->ee_block),
2048 ext4_ext_pblock(newext), 2049 ext4_ext_pblock(newext),
2049 ext4_ext_is_uninitialized(newext), 2050 ext4_ext_is_unwritten(newext),
2050 ext4_ext_get_actual_len(newext)); 2051 ext4_ext_get_actual_len(newext));
2051 nearex = EXT_FIRST_EXTENT(eh); 2052 nearex = EXT_FIRST_EXTENT(eh);
2052 } else { 2053 } else {
@@ -2057,7 +2058,7 @@ has_space:
2057 "nearest %p\n", 2058 "nearest %p\n",
2058 le32_to_cpu(newext->ee_block), 2059 le32_to_cpu(newext->ee_block),
2059 ext4_ext_pblock(newext), 2060 ext4_ext_pblock(newext),
2060 ext4_ext_is_uninitialized(newext), 2061 ext4_ext_is_unwritten(newext),
2061 ext4_ext_get_actual_len(newext), 2062 ext4_ext_get_actual_len(newext),
2062 nearex); 2063 nearex);
2063 nearex++; 2064 nearex++;
@@ -2068,7 +2069,7 @@ has_space:
2068 "nearest %p\n", 2069 "nearest %p\n",
2069 le32_to_cpu(newext->ee_block), 2070 le32_to_cpu(newext->ee_block),
2070 ext4_ext_pblock(newext), 2071 ext4_ext_pblock(newext),
2071 ext4_ext_is_uninitialized(newext), 2072 ext4_ext_is_unwritten(newext),
2072 ext4_ext_get_actual_len(newext), 2073 ext4_ext_get_actual_len(newext),
2073 nearex); 2074 nearex);
2074 } 2075 }
@@ -2078,7 +2079,7 @@ has_space:
2078 "move %d extents from 0x%p to 0x%p\n", 2079 "move %d extents from 0x%p to 0x%p\n",
2079 le32_to_cpu(newext->ee_block), 2080 le32_to_cpu(newext->ee_block),
2080 ext4_ext_pblock(newext), 2081 ext4_ext_pblock(newext),
2081 ext4_ext_is_uninitialized(newext), 2082 ext4_ext_is_unwritten(newext),
2082 ext4_ext_get_actual_len(newext), 2083 ext4_ext_get_actual_len(newext),
2083 len, nearex, nearex + 1); 2084 len, nearex, nearex + 1);
2084 memmove(nearex + 1, nearex, 2085 memmove(nearex + 1, nearex,
@@ -2200,7 +2201,7 @@ static int ext4_fill_fiemap_extents(struct inode *inode,
2200 es.es_lblk = le32_to_cpu(ex->ee_block); 2201 es.es_lblk = le32_to_cpu(ex->ee_block);
2201 es.es_len = ext4_ext_get_actual_len(ex); 2202 es.es_len = ext4_ext_get_actual_len(ex);
2202 es.es_pblk = ext4_ext_pblock(ex); 2203 es.es_pblk = ext4_ext_pblock(ex);
2203 if (ext4_ext_is_uninitialized(ex)) 2204 if (ext4_ext_is_unwritten(ex))
2204 flags |= FIEMAP_EXTENT_UNWRITTEN; 2205 flags |= FIEMAP_EXTENT_UNWRITTEN;
2205 } 2206 }
2206 2207
@@ -2576,7 +2577,7 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
2576 unsigned num; 2577 unsigned num;
2577 ext4_lblk_t ex_ee_block; 2578 ext4_lblk_t ex_ee_block;
2578 unsigned short ex_ee_len; 2579 unsigned short ex_ee_len;
2579 unsigned uninitialized = 0; 2580 unsigned unwritten = 0;
2580 struct ext4_extent *ex; 2581 struct ext4_extent *ex;
2581 ext4_fsblk_t pblk; 2582 ext4_fsblk_t pblk;
2582 2583
@@ -2623,13 +2624,13 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
2623 while (ex >= EXT_FIRST_EXTENT(eh) && 2624 while (ex >= EXT_FIRST_EXTENT(eh) &&
2624 ex_ee_block + ex_ee_len > start) { 2625 ex_ee_block + ex_ee_len > start) {
2625 2626
2626 if (ext4_ext_is_uninitialized(ex)) 2627 if (ext4_ext_is_unwritten(ex))
2627 uninitialized = 1; 2628 unwritten = 1;
2628 else 2629 else
2629 uninitialized = 0; 2630 unwritten = 0;
2630 2631
2631 ext_debug("remove ext %u:[%d]%d\n", ex_ee_block, 2632 ext_debug("remove ext %u:[%d]%d\n", ex_ee_block,
2632 uninitialized, ex_ee_len); 2633 unwritten, ex_ee_len);
2633 path[depth].p_ext = ex; 2634 path[depth].p_ext = ex;
2634 2635
2635 a = ex_ee_block > start ? ex_ee_block : start; 2636 a = ex_ee_block > start ? ex_ee_block : start;
@@ -2701,11 +2702,11 @@ ext4_ext_rm_leaf(handle_t *handle, struct inode *inode,
2701 2702
2702 ex->ee_len = cpu_to_le16(num); 2703 ex->ee_len = cpu_to_le16(num);
2703 /* 2704 /*
2704 * Do not mark uninitialized if all the blocks in the 2705 * Do not mark unwritten if all the blocks in the
2705 * extent have been removed. 2706 * extent have been removed.
2706 */ 2707 */
2707 if (uninitialized && num) 2708 if (unwritten && num)
2708 ext4_ext_mark_uninitialized(ex); 2709 ext4_ext_mark_unwritten(ex);
2709 /* 2710 /*
2710 * If the extent was completely released, 2711 * If the extent was completely released,
2711 * we need to remove it from the leaf 2712 * we need to remove it from the leaf
@@ -2854,9 +2855,9 @@ again:
2854 end < ee_block + ext4_ext_get_actual_len(ex) - 1) { 2855 end < ee_block + ext4_ext_get_actual_len(ex) - 1) {
2855 int split_flag = 0; 2856 int split_flag = 0;
2856 2857
2857 if (ext4_ext_is_uninitialized(ex)) 2858 if (ext4_ext_is_unwritten(ex))
2858 split_flag = EXT4_EXT_MARK_UNINIT1 | 2859 split_flag = EXT4_EXT_MARK_UNWRIT1 |
2859 EXT4_EXT_MARK_UNINIT2; 2860 EXT4_EXT_MARK_UNWRIT2;
2860 2861
2861 /* 2862 /*
2862 * Split the extent in two so that 'end' is the last 2863 * Split the extent in two so that 'end' is the last
@@ -3113,7 +3114,7 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
3113 * @path: the path to the extent 3114 * @path: the path to the extent
3114 * @split: the logical block where the extent is splitted. 3115 * @split: the logical block where the extent is splitted.
3115 * @split_flags: indicates if the extent could be zeroout if split fails, and 3116 * @split_flags: indicates if the extent could be zeroout if split fails, and
3116 * the states(init or uninit) of new extents. 3117 * the states(init or unwritten) of new extents.
3117 * @flags: flags used to insert new extent to extent tree. 3118 * @flags: flags used to insert new extent to extent tree.
3118 * 3119 *
3119 * 3120 *
@@ -3155,10 +3156,10 @@ static int ext4_split_extent_at(handle_t *handle,
3155 newblock = split - ee_block + ext4_ext_pblock(ex); 3156 newblock = split - ee_block + ext4_ext_pblock(ex);
3156 3157
3157 BUG_ON(split < ee_block || split >= (ee_block + ee_len)); 3158 BUG_ON(split < ee_block || split >= (ee_block + ee_len));
3158 BUG_ON(!ext4_ext_is_uninitialized(ex) && 3159 BUG_ON(!ext4_ext_is_unwritten(ex) &&
3159 split_flag & (EXT4_EXT_MAY_ZEROOUT | 3160 split_flag & (EXT4_EXT_MAY_ZEROOUT |
3160 EXT4_EXT_MARK_UNINIT1 | 3161 EXT4_EXT_MARK_UNWRIT1 |
3161 EXT4_EXT_MARK_UNINIT2)); 3162 EXT4_EXT_MARK_UNWRIT2));
3162 3163
3163 err = ext4_ext_get_access(handle, inode, path + depth); 3164 err = ext4_ext_get_access(handle, inode, path + depth);
3164 if (err) 3165 if (err)
@@ -3170,8 +3171,8 @@ static int ext4_split_extent_at(handle_t *handle,
3170 * then we just change the state of the extent, and splitting 3171 * then we just change the state of the extent, and splitting
3171 * is not needed. 3172 * is not needed.
3172 */ 3173 */
3173 if (split_flag & EXT4_EXT_MARK_UNINIT2) 3174 if (split_flag & EXT4_EXT_MARK_UNWRIT2)
3174 ext4_ext_mark_uninitialized(ex); 3175 ext4_ext_mark_unwritten(ex);
3175 else 3176 else
3176 ext4_ext_mark_initialized(ex); 3177 ext4_ext_mark_initialized(ex);
3177 3178
@@ -3185,8 +3186,8 @@ static int ext4_split_extent_at(handle_t *handle,
3185 /* case a */ 3186 /* case a */
3186 memcpy(&orig_ex, ex, sizeof(orig_ex)); 3187 memcpy(&orig_ex, ex, sizeof(orig_ex));
3187 ex->ee_len = cpu_to_le16(split - ee_block); 3188 ex->ee_len = cpu_to_le16(split - ee_block);
3188 if (split_flag & EXT4_EXT_MARK_UNINIT1) 3189 if (split_flag & EXT4_EXT_MARK_UNWRIT1)
3189 ext4_ext_mark_uninitialized(ex); 3190 ext4_ext_mark_unwritten(ex);
3190 3191
3191 /* 3192 /*
3192 * path may lead to new leaf, not to original leaf any more 3193 * path may lead to new leaf, not to original leaf any more
@@ -3200,8 +3201,8 @@ static int ext4_split_extent_at(handle_t *handle,
3200 ex2->ee_block = cpu_to_le32(split); 3201 ex2->ee_block = cpu_to_le32(split);
3201 ex2->ee_len = cpu_to_le16(ee_len - (split - ee_block)); 3202 ex2->ee_len = cpu_to_le16(ee_len - (split - ee_block));
3202 ext4_ext_store_pblock(ex2, newblock); 3203 ext4_ext_store_pblock(ex2, newblock);
3203 if (split_flag & EXT4_EXT_MARK_UNINIT2) 3204 if (split_flag & EXT4_EXT_MARK_UNWRIT2)
3204 ext4_ext_mark_uninitialized(ex2); 3205 ext4_ext_mark_unwritten(ex2);
3205 3206
3206 err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); 3207 err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
3207 if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) { 3208 if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
@@ -3278,7 +3279,7 @@ static int ext4_split_extent(handle_t *handle,
3278 struct ext4_extent *ex; 3279 struct ext4_extent *ex;
3279 unsigned int ee_len, depth; 3280 unsigned int ee_len, depth;
3280 int err = 0; 3281 int err = 0;
3281 int uninitialized; 3282 int unwritten;
3282 int split_flag1, flags1; 3283 int split_flag1, flags1;
3283 int allocated = map->m_len; 3284 int allocated = map->m_len;
3284 3285
@@ -3286,14 +3287,14 @@ static int ext4_split_extent(handle_t *handle,
3286 ex = path[depth].p_ext; 3287 ex = path[depth].p_ext;
3287 ee_block = le32_to_cpu(ex->ee_block); 3288 ee_block = le32_to_cpu(ex->ee_block);
3288 ee_len = ext4_ext_get_actual_len(ex); 3289 ee_len = ext4_ext_get_actual_len(ex);
3289 uninitialized = ext4_ext_is_uninitialized(ex); 3290 unwritten = ext4_ext_is_unwritten(ex);
3290 3291
3291 if (map->m_lblk + map->m_len < ee_block + ee_len) { 3292 if (map->m_lblk + map->m_len < ee_block + ee_len) {
3292 split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT; 3293 split_flag1 = split_flag & EXT4_EXT_MAY_ZEROOUT;
3293 flags1 = flags | EXT4_GET_BLOCKS_PRE_IO; 3294 flags1 = flags | EXT4_GET_BLOCKS_PRE_IO;
3294 if (uninitialized) 3295 if (unwritten)
3295 split_flag1 |= EXT4_EXT_MARK_UNINIT1 | 3296 split_flag1 |= EXT4_EXT_MARK_UNWRIT1 |
3296 EXT4_EXT_MARK_UNINIT2; 3297 EXT4_EXT_MARK_UNWRIT2;
3297 if (split_flag & EXT4_EXT_DATA_VALID2) 3298 if (split_flag & EXT4_EXT_DATA_VALID2)
3298 split_flag1 |= EXT4_EXT_DATA_VALID1; 3299 split_flag1 |= EXT4_EXT_DATA_VALID1;
3299 err = ext4_split_extent_at(handle, inode, path, 3300 err = ext4_split_extent_at(handle, inode, path,
@@ -3318,15 +3319,15 @@ static int ext4_split_extent(handle_t *handle,
3318 (unsigned long) map->m_lblk); 3319 (unsigned long) map->m_lblk);
3319 return -EIO; 3320 return -EIO;
3320 } 3321 }
3321 uninitialized = ext4_ext_is_uninitialized(ex); 3322 unwritten = ext4_ext_is_unwritten(ex);
3322 split_flag1 = 0; 3323 split_flag1 = 0;
3323 3324
3324 if (map->m_lblk >= ee_block) { 3325 if (map->m_lblk >= ee_block) {
3325 split_flag1 = split_flag & EXT4_EXT_DATA_VALID2; 3326 split_flag1 = split_flag & EXT4_EXT_DATA_VALID2;
3326 if (uninitialized) { 3327 if (unwritten) {
3327 split_flag1 |= EXT4_EXT_MARK_UNINIT1; 3328 split_flag1 |= EXT4_EXT_MARK_UNWRIT1;
3328 split_flag1 |= split_flag & (EXT4_EXT_MAY_ZEROOUT | 3329 split_flag1 |= split_flag & (EXT4_EXT_MAY_ZEROOUT |
3329 EXT4_EXT_MARK_UNINIT2); 3330 EXT4_EXT_MARK_UNWRIT2);
3330 } 3331 }
3331 err = ext4_split_extent_at(handle, inode, path, 3332 err = ext4_split_extent_at(handle, inode, path,
3332 map->m_lblk, split_flag1, flags); 3333 map->m_lblk, split_flag1, flags);
@@ -3341,16 +3342,16 @@ out:
3341 3342
3342/* 3343/*
3343 * This function is called by ext4_ext_map_blocks() if someone tries to write 3344 * This function is called by ext4_ext_map_blocks() if someone tries to write
3344 * to an uninitialized extent. It may result in splitting the uninitialized 3345 * to an unwritten extent. It may result in splitting the unwritten
3345 * extent into multiple extents (up to three - one initialized and two 3346 * extent into multiple extents (up to three - one initialized and two
3346 * uninitialized). 3347 * unwritten).
3347 * There are three possibilities: 3348 * There are three possibilities:
3348 * a> There is no split required: Entire extent should be initialized 3349 * a> There is no split required: Entire extent should be initialized
3349 * b> Splits in two extents: Write is happening at either end of the extent 3350 * b> Splits in two extents: Write is happening at either end of the extent
3350 * c> Splits in three extents: Somone is writing in middle of the extent 3351 * c> Splits in three extents: Somone is writing in middle of the extent
3351 * 3352 *
3352 * Pre-conditions: 3353 * Pre-conditions:
3353 * - The extent pointed to by 'path' is uninitialized. 3354 * - The extent pointed to by 'path' is unwritten.
3354 * - The extent pointed to by 'path' contains a superset 3355 * - The extent pointed to by 'path' contains a superset
3355 * of the logical span [map->m_lblk, map->m_lblk + map->m_len). 3356 * of the logical span [map->m_lblk, map->m_lblk + map->m_len).
3356 * 3357 *
@@ -3396,12 +3397,12 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
3396 trace_ext4_ext_convert_to_initialized_enter(inode, map, ex); 3397 trace_ext4_ext_convert_to_initialized_enter(inode, map, ex);
3397 3398
3398 /* Pre-conditions */ 3399 /* Pre-conditions */
3399 BUG_ON(!ext4_ext_is_uninitialized(ex)); 3400 BUG_ON(!ext4_ext_is_unwritten(ex));
3400 BUG_ON(!in_range(map->m_lblk, ee_block, ee_len)); 3401 BUG_ON(!in_range(map->m_lblk, ee_block, ee_len));
3401 3402
3402 /* 3403 /*
3403 * Attempt to transfer newly initialized blocks from the currently 3404 * Attempt to transfer newly initialized blocks from the currently
3404 * uninitialized extent to its neighbor. This is much cheaper 3405 * unwritten extent to its neighbor. This is much cheaper
3405 * than an insertion followed by a merge as those involve costly 3406 * than an insertion followed by a merge as those involve costly
3406 * memmove() calls. Transferring to the left is the common case in 3407 * memmove() calls. Transferring to the left is the common case in
3407 * steady state for workloads doing fallocate(FALLOC_FL_KEEP_SIZE) 3408 * steady state for workloads doing fallocate(FALLOC_FL_KEEP_SIZE)
@@ -3437,7 +3438,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
3437 * - C4: abut_ex can receive the additional blocks without 3438 * - C4: abut_ex can receive the additional blocks without
3438 * overflowing the (initialized) length limit. 3439 * overflowing the (initialized) length limit.
3439 */ 3440 */
3440 if ((!ext4_ext_is_uninitialized(abut_ex)) && /*C1*/ 3441 if ((!ext4_ext_is_unwritten(abut_ex)) && /*C1*/
3441 ((prev_lblk + prev_len) == ee_block) && /*C2*/ 3442 ((prev_lblk + prev_len) == ee_block) && /*C2*/
3442 ((prev_pblk + prev_len) == ee_pblk) && /*C3*/ 3443 ((prev_pblk + prev_len) == ee_pblk) && /*C3*/
3443 (prev_len < (EXT_INIT_MAX_LEN - map_len))) { /*C4*/ 3444 (prev_len < (EXT_INIT_MAX_LEN - map_len))) { /*C4*/
@@ -3452,7 +3453,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
3452 ex->ee_block = cpu_to_le32(ee_block + map_len); 3453 ex->ee_block = cpu_to_le32(ee_block + map_len);
3453 ext4_ext_store_pblock(ex, ee_pblk + map_len); 3454 ext4_ext_store_pblock(ex, ee_pblk + map_len);
3454 ex->ee_len = cpu_to_le16(ee_len - map_len); 3455 ex->ee_len = cpu_to_le16(ee_len - map_len);
3455 ext4_ext_mark_uninitialized(ex); /* Restore the flag */ 3456 ext4_ext_mark_unwritten(ex); /* Restore the flag */
3456 3457
3457 /* Extend abut_ex by 'map_len' blocks */ 3458 /* Extend abut_ex by 'map_len' blocks */
3458 abut_ex->ee_len = cpu_to_le16(prev_len + map_len); 3459 abut_ex->ee_len = cpu_to_le16(prev_len + map_len);
@@ -3483,7 +3484,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
3483 * - C4: abut_ex can receive the additional blocks without 3484 * - C4: abut_ex can receive the additional blocks without
3484 * overflowing the (initialized) length limit. 3485 * overflowing the (initialized) length limit.
3485 */ 3486 */
3486 if ((!ext4_ext_is_uninitialized(abut_ex)) && /*C1*/ 3487 if ((!ext4_ext_is_unwritten(abut_ex)) && /*C1*/
3487 ((map->m_lblk + map_len) == next_lblk) && /*C2*/ 3488 ((map->m_lblk + map_len) == next_lblk) && /*C2*/
3488 ((ee_pblk + ee_len) == next_pblk) && /*C3*/ 3489 ((ee_pblk + ee_len) == next_pblk) && /*C3*/
3489 (next_len < (EXT_INIT_MAX_LEN - map_len))) { /*C4*/ 3490 (next_len < (EXT_INIT_MAX_LEN - map_len))) { /*C4*/
@@ -3498,7 +3499,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
3498 abut_ex->ee_block = cpu_to_le32(next_lblk - map_len); 3499 abut_ex->ee_block = cpu_to_le32(next_lblk - map_len);
3499 ext4_ext_store_pblock(abut_ex, next_pblk - map_len); 3500 ext4_ext_store_pblock(abut_ex, next_pblk - map_len);
3500 ex->ee_len = cpu_to_le16(ee_len - map_len); 3501 ex->ee_len = cpu_to_le16(ee_len - map_len);
3501 ext4_ext_mark_uninitialized(ex); /* Restore the flag */ 3502 ext4_ext_mark_unwritten(ex); /* Restore the flag */
3502 3503
3503 /* Extend abut_ex by 'map_len' blocks */ 3504 /* Extend abut_ex by 'map_len' blocks */
3504 abut_ex->ee_len = cpu_to_le16(next_len + map_len); 3505 abut_ex->ee_len = cpu_to_le16(next_len + map_len);
@@ -3603,26 +3604,26 @@ out:
3603/* 3604/*
3604 * This function is called by ext4_ext_map_blocks() from 3605 * This function is called by ext4_ext_map_blocks() from
3605 * ext4_get_blocks_dio_write() when DIO to write 3606 * ext4_get_blocks_dio_write() when DIO to write
3606 * to an uninitialized extent. 3607 * to an unwritten extent.
3607 * 3608 *
3608 * Writing to an uninitialized extent may result in splitting the uninitialized 3609 * Writing to an unwritten extent may result in splitting the unwritten
3609 * extent into multiple initialized/uninitialized extents (up to three) 3610 * extent into multiple initialized/unwritten extents (up to three)
3610 * There are three possibilities: 3611 * There are three possibilities:
3611 * a> There is no split required: Entire extent should be uninitialized 3612 * a> There is no split required: Entire extent should be unwritten
3612 * b> Splits in two extents: Write is happening at either end of the extent 3613 * b> Splits in two extents: Write is happening at either end of the extent
3613 * c> Splits in three extents: Somone is writing in middle of the extent 3614 * c> Splits in three extents: Somone is writing in middle of the extent
3614 * 3615 *
3615 * This works the same way in the case of initialized -> unwritten conversion. 3616 * This works the same way in the case of initialized -> unwritten conversion.
3616 * 3617 *
3617 * One of more index blocks maybe needed if the extent tree grow after 3618 * One of more index blocks maybe needed if the extent tree grow after
3618 * the uninitialized extent split. To prevent ENOSPC occur at the IO 3619 * the unwritten extent split. To prevent ENOSPC occur at the IO
3619 * complete, we need to split the uninitialized extent before DIO submit 3620 * complete, we need to split the unwritten extent before DIO submit
3620 * the IO. The uninitialized extent called at this time will be split 3621 * the IO. The unwritten extent called at this time will be split
3621 * into three uninitialized extent(at most). After IO complete, the part 3622 * into three unwritten extent(at most). After IO complete, the part
3622 * being filled will be convert to initialized by the end_io callback function 3623 * being filled will be convert to initialized by the end_io callback function
3623 * via ext4_convert_unwritten_extents(). 3624 * via ext4_convert_unwritten_extents().
3624 * 3625 *
3625 * Returns the size of uninitialized extent to be written on success. 3626 * Returns the size of unwritten extent to be written on success.
3626 */ 3627 */
3627static int ext4_split_convert_extents(handle_t *handle, 3628static int ext4_split_convert_extents(handle_t *handle,
3628 struct inode *inode, 3629 struct inode *inode,
@@ -3660,7 +3661,7 @@ static int ext4_split_convert_extents(handle_t *handle,
3660 } else if (flags & EXT4_GET_BLOCKS_CONVERT) { 3661 } else if (flags & EXT4_GET_BLOCKS_CONVERT) {
3661 split_flag |= ee_block + ee_len <= eof_block ? 3662 split_flag |= ee_block + ee_len <= eof_block ?
3662 EXT4_EXT_MAY_ZEROOUT : 0; 3663 EXT4_EXT_MAY_ZEROOUT : 0;
3663 split_flag |= (EXT4_EXT_MARK_UNINIT2 | EXT4_EXT_DATA_VALID2); 3664 split_flag |= (EXT4_EXT_MARK_UNWRIT2 | EXT4_EXT_DATA_VALID2);
3664 } 3665 }
3665 flags |= EXT4_GET_BLOCKS_PRE_IO; 3666 flags |= EXT4_GET_BLOCKS_PRE_IO;
3666 return ext4_split_extent(handle, inode, path, map, split_flag, flags); 3667 return ext4_split_extent(handle, inode, path, map, split_flag, flags);
@@ -3710,8 +3711,8 @@ static int ext4_convert_initialized_extents(handle_t *handle,
3710 err = ext4_ext_get_access(handle, inode, path + depth); 3711 err = ext4_ext_get_access(handle, inode, path + depth);
3711 if (err) 3712 if (err)
3712 goto out; 3713 goto out;
3713 /* first mark the extent as uninitialized */ 3714 /* first mark the extent as unwritten */
3714 ext4_ext_mark_uninitialized(ex); 3715 ext4_ext_mark_unwritten(ex);
3715 3716
3716 /* note: ext4_ext_correct_indexes() isn't needed here because 3717 /* note: ext4_ext_correct_indexes() isn't needed here because
3717 * borders are not changed 3718 * borders are not changed
@@ -3971,10 +3972,10 @@ ext4_ext_convert_initialized_extent(handle_t *handle, struct inode *inode,
3971 3972
3972 /* 3973 /*
3973 * Make sure that the extent is no bigger than we support with 3974 * Make sure that the extent is no bigger than we support with
3974 * uninitialized extent 3975 * unwritten extent
3975 */ 3976 */
3976 if (map->m_len > EXT_UNINIT_MAX_LEN) 3977 if (map->m_len > EXT_UNWRITTEN_MAX_LEN)
3977 map->m_len = EXT_UNINIT_MAX_LEN / 2; 3978 map->m_len = EXT_UNWRITTEN_MAX_LEN / 2;
3978 3979
3979 ret = ext4_convert_initialized_extents(handle, inode, map, 3980 ret = ext4_convert_initialized_extents(handle, inode, map,
3980 path); 3981 path);
@@ -3993,7 +3994,7 @@ ext4_ext_convert_initialized_extent(handle_t *handle, struct inode *inode,
3993} 3994}
3994 3995
3995static int 3996static int
3996ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode, 3997ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode,
3997 struct ext4_map_blocks *map, 3998 struct ext4_map_blocks *map,
3998 struct ext4_ext_path *path, int flags, 3999 struct ext4_ext_path *path, int flags,
3999 unsigned int allocated, ext4_fsblk_t newblock) 4000 unsigned int allocated, ext4_fsblk_t newblock)
@@ -4002,23 +4003,23 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
4002 int err = 0; 4003 int err = 0;
4003 ext4_io_end_t *io = ext4_inode_aio(inode); 4004 ext4_io_end_t *io = ext4_inode_aio(inode);
4004 4005
4005 ext_debug("ext4_ext_handle_uninitialized_extents: inode %lu, logical " 4006 ext_debug("ext4_ext_handle_unwritten_extents: inode %lu, logical "
4006 "block %llu, max_blocks %u, flags %x, allocated %u\n", 4007 "block %llu, max_blocks %u, flags %x, allocated %u\n",
4007 inode->i_ino, (unsigned long long)map->m_lblk, map->m_len, 4008 inode->i_ino, (unsigned long long)map->m_lblk, map->m_len,
4008 flags, allocated); 4009 flags, allocated);
4009 ext4_ext_show_leaf(inode, path); 4010 ext4_ext_show_leaf(inode, path);
4010 4011
4011 /* 4012 /*
4012 * When writing into uninitialized space, we should not fail to 4013 * When writing into unwritten space, we should not fail to
4013 * allocate metadata blocks for the new extent block if needed. 4014 * allocate metadata blocks for the new extent block if needed.
4014 */ 4015 */
4015 flags |= EXT4_GET_BLOCKS_METADATA_NOFAIL; 4016 flags |= EXT4_GET_BLOCKS_METADATA_NOFAIL;
4016 4017
4017 trace_ext4_ext_handle_uninitialized_extents(inode, map, flags, 4018 trace_ext4_ext_handle_unwritten_extents(inode, map, flags,
4018 allocated, newblock); 4019 allocated, newblock);
4019 4020
4020 /* get_block() before submit the IO, split the extent */ 4021 /* get_block() before submit the IO, split the extent */
4021 if ((flags & EXT4_GET_BLOCKS_PRE_IO)) { 4022 if (flags & EXT4_GET_BLOCKS_PRE_IO) {
4022 ret = ext4_split_convert_extents(handle, inode, map, 4023 ret = ext4_split_convert_extents(handle, inode, map,
4023 path, flags | EXT4_GET_BLOCKS_CONVERT); 4024 path, flags | EXT4_GET_BLOCKS_CONVERT);
4024 if (ret <= 0) 4025 if (ret <= 0)
@@ -4033,12 +4034,10 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
4033 else 4034 else
4034 ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN); 4035 ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
4035 map->m_flags |= EXT4_MAP_UNWRITTEN; 4036 map->m_flags |= EXT4_MAP_UNWRITTEN;
4036 if (ext4_should_dioread_nolock(inode))
4037 map->m_flags |= EXT4_MAP_UNINIT;
4038 goto out; 4037 goto out;
4039 } 4038 }
4040 /* IO end_io complete, convert the filled extent to written */ 4039 /* IO end_io complete, convert the filled extent to written */
4041 if ((flags & EXT4_GET_BLOCKS_CONVERT)) { 4040 if (flags & EXT4_GET_BLOCKS_CONVERT) {
4042 ret = ext4_convert_unwritten_extents_endio(handle, inode, map, 4041 ret = ext4_convert_unwritten_extents_endio(handle, inode, map,
4043 path); 4042 path);
4044 if (ret >= 0) { 4043 if (ret >= 0) {
@@ -4059,7 +4058,7 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
4059 * repeat fallocate creation request 4058 * repeat fallocate creation request
4060 * we already have an unwritten extent 4059 * we already have an unwritten extent
4061 */ 4060 */
4062 if (flags & EXT4_GET_BLOCKS_UNINIT_EXT) { 4061 if (flags & EXT4_GET_BLOCKS_UNWRIT_EXT) {
4063 map->m_flags |= EXT4_MAP_UNWRITTEN; 4062 map->m_flags |= EXT4_MAP_UNWRITTEN;
4064 goto map_out; 4063 goto map_out;
4065 } 4064 }
@@ -4310,7 +4309,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
4310 4309
4311 4310
4312 /* 4311 /*
4313 * Uninitialized extents are treated as holes, except that 4312 * unwritten extents are treated as holes, except that
4314 * we split out initialized portions during a write. 4313 * we split out initialized portions during a write.
4315 */ 4314 */
4316 ee_len = ext4_ext_get_actual_len(ex); 4315 ee_len = ext4_ext_get_actual_len(ex);
@@ -4329,16 +4328,16 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
4329 * If the extent is initialized check whether the 4328 * If the extent is initialized check whether the
4330 * caller wants to convert it to unwritten. 4329 * caller wants to convert it to unwritten.
4331 */ 4330 */
4332 if ((!ext4_ext_is_uninitialized(ex)) && 4331 if ((!ext4_ext_is_unwritten(ex)) &&
4333 (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN)) { 4332 (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN)) {
4334 allocated = ext4_ext_convert_initialized_extent( 4333 allocated = ext4_ext_convert_initialized_extent(
4335 handle, inode, map, path, flags, 4334 handle, inode, map, path, flags,
4336 allocated, newblock); 4335 allocated, newblock);
4337 goto out2; 4336 goto out2;
4338 } else if (!ext4_ext_is_uninitialized(ex)) 4337 } else if (!ext4_ext_is_unwritten(ex))
4339 goto out; 4338 goto out;
4340 4339
4341 ret = ext4_ext_handle_uninitialized_extents( 4340 ret = ext4_ext_handle_unwritten_extents(
4342 handle, inode, map, path, flags, 4341 handle, inode, map, path, flags,
4343 allocated, newblock); 4342 allocated, newblock);
4344 if (ret < 0) 4343 if (ret < 0)
@@ -4410,15 +4409,15 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
4410 /* 4409 /*
4411 * See if request is beyond maximum number of blocks we can have in 4410 * See if request is beyond maximum number of blocks we can have in
4412 * a single extent. For an initialized extent this limit is 4411 * a single extent. For an initialized extent this limit is
4413 * EXT_INIT_MAX_LEN and for an uninitialized extent this limit is 4412 * EXT_INIT_MAX_LEN and for an unwritten extent this limit is
4414 * EXT_UNINIT_MAX_LEN. 4413 * EXT_UNWRITTEN_MAX_LEN.
4415 */ 4414 */
4416 if (map->m_len > EXT_INIT_MAX_LEN && 4415 if (map->m_len > EXT_INIT_MAX_LEN &&
4417 !(flags & EXT4_GET_BLOCKS_UNINIT_EXT)) 4416 !(flags & EXT4_GET_BLOCKS_UNWRIT_EXT))
4418 map->m_len = EXT_INIT_MAX_LEN; 4417 map->m_len = EXT_INIT_MAX_LEN;
4419 else if (map->m_len > EXT_UNINIT_MAX_LEN && 4418 else if (map->m_len > EXT_UNWRITTEN_MAX_LEN &&
4420 (flags & EXT4_GET_BLOCKS_UNINIT_EXT)) 4419 (flags & EXT4_GET_BLOCKS_UNWRIT_EXT))
4421 map->m_len = EXT_UNINIT_MAX_LEN; 4420 map->m_len = EXT_UNWRITTEN_MAX_LEN;
4422 4421
4423 /* Check if we can really insert (m_lblk)::(m_lblk + m_len) extent */ 4422 /* Check if we can really insert (m_lblk)::(m_lblk + m_len) extent */
4424 newex.ee_len = cpu_to_le16(map->m_len); 4423 newex.ee_len = cpu_to_le16(map->m_len);
@@ -4466,21 +4465,19 @@ got_allocated_blocks:
4466 /* try to insert new extent into found leaf and return */ 4465 /* try to insert new extent into found leaf and return */
4467 ext4_ext_store_pblock(&newex, newblock + offset); 4466 ext4_ext_store_pblock(&newex, newblock + offset);
4468 newex.ee_len = cpu_to_le16(ar.len); 4467 newex.ee_len = cpu_to_le16(ar.len);
4469 /* Mark uninitialized */ 4468 /* Mark unwritten */
4470 if (flags & EXT4_GET_BLOCKS_UNINIT_EXT){ 4469 if (flags & EXT4_GET_BLOCKS_UNWRIT_EXT){
4471 ext4_ext_mark_uninitialized(&newex); 4470 ext4_ext_mark_unwritten(&newex);
4472 map->m_flags |= EXT4_MAP_UNWRITTEN; 4471 map->m_flags |= EXT4_MAP_UNWRITTEN;
4473 /* 4472 /*
4474 * io_end structure was created for every IO write to an 4473 * io_end structure was created for every IO write to an
4475 * uninitialized extent. To avoid unnecessary conversion, 4474 * unwritten extent. To avoid unnecessary conversion,
4476 * here we flag the IO that really needs the conversion. 4475 * here we flag the IO that really needs the conversion.
4477 * For non asycn direct IO case, flag the inode state 4476 * For non asycn direct IO case, flag the inode state
4478 * that we need to perform conversion when IO is done. 4477 * that we need to perform conversion when IO is done.
4479 */ 4478 */
4480 if ((flags & EXT4_GET_BLOCKS_PRE_IO)) 4479 if (flags & EXT4_GET_BLOCKS_PRE_IO)
4481 set_unwritten = 1; 4480 set_unwritten = 1;
4482 if (ext4_should_dioread_nolock(inode))
4483 map->m_flags |= EXT4_MAP_UNINIT;
4484 } 4481 }
4485 4482
4486 err = 0; 4483 err = 0;
@@ -4607,9 +4604,9 @@ got_allocated_blocks:
4607 4604
4608 /* 4605 /*
4609 * Cache the extent and update transaction to commit on fdatasync only 4606 * Cache the extent and update transaction to commit on fdatasync only
4610 * when it is _not_ an uninitialized extent. 4607 * when it is _not_ an unwritten extent.
4611 */ 4608 */
4612 if ((flags & EXT4_GET_BLOCKS_UNINIT_EXT) == 0) 4609 if ((flags & EXT4_GET_BLOCKS_UNWRIT_EXT) == 0)
4613 ext4_update_inode_fsync_trans(handle, inode, 1); 4610 ext4_update_inode_fsync_trans(handle, inode, 1);
4614 else 4611 else
4615 ext4_update_inode_fsync_trans(handle, inode, 0); 4612 ext4_update_inode_fsync_trans(handle, inode, 0);
@@ -4683,7 +4680,7 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
4683 * that it doesn't get unnecessarily split into multiple 4680 * that it doesn't get unnecessarily split into multiple
4684 * extents. 4681 * extents.
4685 */ 4682 */
4686 if (len <= EXT_UNINIT_MAX_LEN) 4683 if (len <= EXT_UNWRITTEN_MAX_LEN)
4687 flags |= EXT4_GET_BLOCKS_NO_NORMALIZE; 4684 flags |= EXT4_GET_BLOCKS_NO_NORMALIZE;
4688 4685
4689 /* 4686 /*
@@ -4744,6 +4741,13 @@ static long ext4_zero_range(struct file *file, loff_t offset,
4744 if (!S_ISREG(inode->i_mode)) 4741 if (!S_ISREG(inode->i_mode))
4745 return -EINVAL; 4742 return -EINVAL;
4746 4743
4744 /* Call ext4_force_commit to flush all data in case of data=journal. */
4745 if (ext4_should_journal_data(inode)) {
4746 ret = ext4_force_commit(inode->i_sb);
4747 if (ret)
4748 return ret;
4749 }
4750
4747 /* 4751 /*
4748 * Write out all dirty pages to avoid race conditions 4752 * Write out all dirty pages to avoid race conditions
4749 * Then release them. 4753 * Then release them.
@@ -4775,7 +4779,7 @@ static long ext4_zero_range(struct file *file, loff_t offset,
4775 else 4779 else
4776 max_blocks -= lblk; 4780 max_blocks -= lblk;
4777 4781
4778 flags = EXT4_GET_BLOCKS_CREATE_UNINIT_EXT | 4782 flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT |
4779 EXT4_GET_BLOCKS_CONVERT_UNWRITTEN; 4783 EXT4_GET_BLOCKS_CONVERT_UNWRITTEN;
4780 if (mode & FALLOC_FL_KEEP_SIZE) 4784 if (mode & FALLOC_FL_KEEP_SIZE)
4781 flags |= EXT4_GET_BLOCKS_KEEP_SIZE; 4785 flags |= EXT4_GET_BLOCKS_KEEP_SIZE;
@@ -4918,7 +4922,7 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
4918 max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits) 4922 max_blocks = (EXT4_BLOCK_ALIGN(len + offset, blkbits) >> blkbits)
4919 - lblk; 4923 - lblk;
4920 4924
4921 flags = EXT4_GET_BLOCKS_CREATE_UNINIT_EXT; 4925 flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT;
4922 if (mode & FALLOC_FL_KEEP_SIZE) 4926 if (mode & FALLOC_FL_KEEP_SIZE)
4923 flags |= EXT4_GET_BLOCKS_KEEP_SIZE; 4927 flags |= EXT4_GET_BLOCKS_KEEP_SIZE;
4924 4928
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index 0ebc21204b51..3f5c188953a4 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -344,8 +344,14 @@ static int ext4_es_can_be_merged(struct extent_status *es1,
344 if (ext4_es_status(es1) != ext4_es_status(es2)) 344 if (ext4_es_status(es1) != ext4_es_status(es2))
345 return 0; 345 return 0;
346 346
347 if (((__u64) es1->es_len) + es2->es_len > 0xFFFFFFFFULL) 347 if (((__u64) es1->es_len) + es2->es_len > EXT_MAX_BLOCKS) {
348 pr_warn("ES assertion failed when merging extents. "
349 "The sum of lengths of es1 (%d) and es2 (%d) "
350 "is bigger than allowed file size (%d)\n",
351 es1->es_len, es2->es_len, EXT_MAX_BLOCKS);
352 WARN_ON(1);
348 return 0; 353 return 0;
354 }
349 355
350 if (((__u64) es1->es_lblk) + es1->es_len != es2->es_lblk) 356 if (((__u64) es1->es_lblk) + es1->es_len != es2->es_lblk)
351 return 0; 357 return 0;
@@ -433,7 +439,7 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode,
433 ee_start = ext4_ext_pblock(ex); 439 ee_start = ext4_ext_pblock(ex);
434 ee_len = ext4_ext_get_actual_len(ex); 440 ee_len = ext4_ext_get_actual_len(ex);
435 441
436 ee_status = ext4_ext_is_uninitialized(ex) ? 1 : 0; 442 ee_status = ext4_ext_is_unwritten(ex) ? 1 : 0;
437 es_status = ext4_es_is_unwritten(es) ? 1 : 0; 443 es_status = ext4_es_is_unwritten(es) ? 1 : 0;
438 444
439 /* 445 /*
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 063fc1538355..4e8bc284ec0e 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -57,7 +57,7 @@ static int ext4_release_file(struct inode *inode, struct file *filp)
57 return 0; 57 return 0;
58} 58}
59 59
60void ext4_unwritten_wait(struct inode *inode) 60static void ext4_unwritten_wait(struct inode *inode)
61{ 61{
62 wait_queue_head_t *wq = ext4_ioend_wq(inode); 62 wait_queue_head_t *wq = ext4_ioend_wq(inode);
63 63
@@ -92,58 +92,91 @@ ext4_unaligned_aio(struct inode *inode, const struct iovec *iov,
92} 92}
93 93
94static ssize_t 94static ssize_t
95ext4_file_dio_write(struct kiocb *iocb, const struct iovec *iov, 95ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
96 unsigned long nr_segs, loff_t pos) 96 unsigned long nr_segs, loff_t pos)
97{ 97{
98 struct file *file = iocb->ki_filp; 98 struct file *file = iocb->ki_filp;
99 struct inode *inode = file->f_mapping->host; 99 struct inode *inode = file_inode(iocb->ki_filp);
100 struct mutex *aio_mutex = NULL;
100 struct blk_plug plug; 101 struct blk_plug plug;
101 int unaligned_aio = 0; 102 int o_direct = file->f_flags & O_DIRECT;
102 ssize_t ret;
103 int overwrite = 0; 103 int overwrite = 0;
104 size_t length = iov_length(iov, nr_segs); 104 size_t length = iov_length(iov, nr_segs);
105 ssize_t ret;
105 106
106 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) && 107 BUG_ON(iocb->ki_pos != pos);
107 !is_sync_kiocb(iocb))
108 unaligned_aio = ext4_unaligned_aio(inode, iov, nr_segs, pos);
109 108
110 /* Unaligned direct AIO must be serialized; see comment above */ 109 /*
111 if (unaligned_aio) { 110 * Unaligned direct AIO must be serialized; see comment above
112 mutex_lock(ext4_aio_mutex(inode)); 111 * In the case of O_APPEND, assume that we must always serialize
112 */
113 if (o_direct &&
114 ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) &&
115 !is_sync_kiocb(iocb) &&
116 (file->f_flags & O_APPEND ||
117 ext4_unaligned_aio(inode, iov, nr_segs, pos))) {
118 aio_mutex = ext4_aio_mutex(inode);
119 mutex_lock(aio_mutex);
113 ext4_unwritten_wait(inode); 120 ext4_unwritten_wait(inode);
114 } 121 }
115 122
116 BUG_ON(iocb->ki_pos != pos);
117
118 mutex_lock(&inode->i_mutex); 123 mutex_lock(&inode->i_mutex);
119 blk_start_plug(&plug); 124 if (file->f_flags & O_APPEND)
125 iocb->ki_pos = pos = i_size_read(inode);
126
127 /*
128 * If we have encountered a bitmap-format file, the size limit
129 * is smaller than s_maxbytes, which is for extent-mapped files.
130 */
131 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
132 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
120 133
121 iocb->private = &overwrite; 134 if ((pos > sbi->s_bitmap_maxbytes) ||
135 (pos == sbi->s_bitmap_maxbytes && length > 0)) {
136 mutex_unlock(&inode->i_mutex);
137 ret = -EFBIG;
138 goto errout;
139 }
122 140
123 /* check whether we do a DIO overwrite or not */ 141 if (pos + length > sbi->s_bitmap_maxbytes) {
124 if (ext4_should_dioread_nolock(inode) && !unaligned_aio && 142 nr_segs = iov_shorten((struct iovec *)iov, nr_segs,
125 !file->f_mapping->nrpages && pos + length <= i_size_read(inode)) { 143 sbi->s_bitmap_maxbytes - pos);
126 struct ext4_map_blocks map; 144 }
127 unsigned int blkbits = inode->i_blkbits; 145 }
128 int err, len;
129 146
130 map.m_lblk = pos >> blkbits; 147 if (o_direct) {
131 map.m_len = (EXT4_BLOCK_ALIGN(pos + length, blkbits) >> blkbits) 148 blk_start_plug(&plug);
132 - map.m_lblk;
133 len = map.m_len;
134 149
135 err = ext4_map_blocks(NULL, inode, &map, 0); 150 iocb->private = &overwrite;
136 /* 151
137 * 'err==len' means that all of blocks has been preallocated no 152 /* check whether we do a DIO overwrite or not */
138 * matter they are initialized or not. For excluding 153 if (ext4_should_dioread_nolock(inode) && !aio_mutex &&
139 * uninitialized extents, we need to check m_flags. There are 154 !file->f_mapping->nrpages && pos + length <= i_size_read(inode)) {
140 * two conditions that indicate for initialized extents. 155 struct ext4_map_blocks map;
141 * 1) If we hit extent cache, EXT4_MAP_MAPPED flag is returned; 156 unsigned int blkbits = inode->i_blkbits;
142 * 2) If we do a real lookup, non-flags are returned. 157 int err, len;
143 * So we should check these two conditions. 158
144 */ 159 map.m_lblk = pos >> blkbits;
145 if (err == len && (map.m_flags & EXT4_MAP_MAPPED)) 160 map.m_len = (EXT4_BLOCK_ALIGN(pos + length, blkbits) >> blkbits)
146 overwrite = 1; 161 - map.m_lblk;
162 len = map.m_len;
163
164 err = ext4_map_blocks(NULL, inode, &map, 0);
165 /*
166 * 'err==len' means that all of blocks has
167 * been preallocated no matter they are
168 * initialized or not. For excluding
169 * unwritten extents, we need to check
170 * m_flags. There are two conditions that
171 * indicate for initialized extents. 1) If we
172 * hit extent cache, EXT4_MAP_MAPPED flag is
173 * returned; 2) If we do a real lookup,
174 * non-flags are returned. So we should check
175 * these two conditions.
176 */
177 if (err == len && (map.m_flags & EXT4_MAP_MAPPED))
178 overwrite = 1;
179 }
147 } 180 }
148 181
149 ret = __generic_file_aio_write(iocb, iov, nr_segs); 182 ret = __generic_file_aio_write(iocb, iov, nr_segs);
@@ -156,45 +189,12 @@ ext4_file_dio_write(struct kiocb *iocb, const struct iovec *iov,
156 if (err < 0) 189 if (err < 0)
157 ret = err; 190 ret = err;
158 } 191 }
159 blk_finish_plug(&plug); 192 if (o_direct)
160 193 blk_finish_plug(&plug);
161 if (unaligned_aio)
162 mutex_unlock(ext4_aio_mutex(inode));
163
164 return ret;
165}
166
167static ssize_t
168ext4_file_write(struct kiocb *iocb, const struct iovec *iov,
169 unsigned long nr_segs, loff_t pos)
170{
171 struct inode *inode = file_inode(iocb->ki_filp);
172 ssize_t ret;
173
174 /*
175 * If we have encountered a bitmap-format file, the size limit
176 * is smaller than s_maxbytes, which is for extent-mapped files.
177 */
178
179 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
180 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
181 size_t length = iov_length(iov, nr_segs);
182
183 if ((pos > sbi->s_bitmap_maxbytes ||
184 (pos == sbi->s_bitmap_maxbytes && length > 0)))
185 return -EFBIG;
186
187 if (pos + length > sbi->s_bitmap_maxbytes) {
188 nr_segs = iov_shorten((struct iovec *)iov, nr_segs,
189 sbi->s_bitmap_maxbytes - pos);
190 }
191 }
192
193 if (unlikely(iocb->ki_filp->f_flags & O_DIRECT))
194 ret = ext4_file_dio_write(iocb, iov, nr_segs, pos);
195 else
196 ret = generic_file_aio_write(iocb, iov, nr_segs, pos);
197 194
195errout:
196 if (aio_mutex)
197 mutex_unlock(aio_mutex);
198 return ret; 198 return ret;
199} 199}
200 200
@@ -244,6 +244,7 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
244 handle = ext4_journal_start_sb(sb, EXT4_HT_MISC, 1); 244 handle = ext4_journal_start_sb(sb, EXT4_HT_MISC, 1);
245 if (IS_ERR(handle)) 245 if (IS_ERR(handle))
246 return PTR_ERR(handle); 246 return PTR_ERR(handle);
247 BUFFER_TRACE(sbi->s_sbh, "get_write_access");
247 err = ext4_journal_get_write_access(handle, sbi->s_sbh); 248 err = ext4_journal_get_write_access(handle, sbi->s_sbh);
248 if (err) { 249 if (err) {
249 ext4_journal_stop(handle); 250 ext4_journal_stop(handle);
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index 82edf5b93352..645205d8ada6 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -22,7 +22,7 @@
22#define EXT4_INLINE_DOTDOT_OFFSET 2 22#define EXT4_INLINE_DOTDOT_OFFSET 2
23#define EXT4_INLINE_DOTDOT_SIZE 4 23#define EXT4_INLINE_DOTDOT_SIZE 4
24 24
25int ext4_get_inline_size(struct inode *inode) 25static int ext4_get_inline_size(struct inode *inode)
26{ 26{
27 if (EXT4_I(inode)->i_inline_off) 27 if (EXT4_I(inode)->i_inline_off)
28 return EXT4_I(inode)->i_inline_size; 28 return EXT4_I(inode)->i_inline_size;
@@ -211,8 +211,8 @@ out:
211 * value since it is already handled by ext4_xattr_ibody_inline_set. 211 * value since it is already handled by ext4_xattr_ibody_inline_set.
212 * That saves us one memcpy. 212 * That saves us one memcpy.
213 */ 213 */
214void ext4_write_inline_data(struct inode *inode, struct ext4_iloc *iloc, 214static void ext4_write_inline_data(struct inode *inode, struct ext4_iloc *iloc,
215 void *buffer, loff_t pos, unsigned int len) 215 void *buffer, loff_t pos, unsigned int len)
216{ 216{
217 struct ext4_xattr_entry *entry; 217 struct ext4_xattr_entry *entry;
218 struct ext4_xattr_ibody_header *header; 218 struct ext4_xattr_ibody_header *header;
@@ -264,6 +264,7 @@ static int ext4_create_inline_data(handle_t *handle,
264 if (error) 264 if (error)
265 return error; 265 return error;
266 266
267 BUFFER_TRACE(is.iloc.bh, "get_write_access");
267 error = ext4_journal_get_write_access(handle, is.iloc.bh); 268 error = ext4_journal_get_write_access(handle, is.iloc.bh);
268 if (error) 269 if (error)
269 goto out; 270 goto out;
@@ -347,6 +348,7 @@ static int ext4_update_inline_data(handle_t *handle, struct inode *inode,
347 if (error == -ENODATA) 348 if (error == -ENODATA)
348 goto out; 349 goto out;
349 350
351 BUFFER_TRACE(is.iloc.bh, "get_write_access");
350 error = ext4_journal_get_write_access(handle, is.iloc.bh); 352 error = ext4_journal_get_write_access(handle, is.iloc.bh);
351 if (error) 353 if (error)
352 goto out; 354 goto out;
@@ -373,8 +375,8 @@ out:
373 return error; 375 return error;
374} 376}
375 377
376int ext4_prepare_inline_data(handle_t *handle, struct inode *inode, 378static int ext4_prepare_inline_data(handle_t *handle, struct inode *inode,
377 unsigned int len) 379 unsigned int len)
378{ 380{
379 int ret, size; 381 int ret, size;
380 struct ext4_inode_info *ei = EXT4_I(inode); 382 struct ext4_inode_info *ei = EXT4_I(inode);
@@ -424,6 +426,7 @@ static int ext4_destroy_inline_data_nolock(handle_t *handle,
424 if (error) 426 if (error)
425 goto out; 427 goto out;
426 428
429 BUFFER_TRACE(is.iloc.bh, "get_write_access");
427 error = ext4_journal_get_write_access(handle, is.iloc.bh); 430 error = ext4_journal_get_write_access(handle, is.iloc.bh);
428 if (error) 431 if (error)
429 goto out; 432 goto out;
@@ -1007,6 +1010,7 @@ static int ext4_add_dirent_to_inline(handle_t *handle,
1007 if (err) 1010 if (err)
1008 return err; 1011 return err;
1009 1012
1013 BUFFER_TRACE(iloc->bh, "get_write_access");
1010 err = ext4_journal_get_write_access(handle, iloc->bh); 1014 err = ext4_journal_get_write_access(handle, iloc->bh);
1011 if (err) 1015 if (err)
1012 return err; 1016 return err;
@@ -1669,6 +1673,7 @@ int ext4_delete_inline_entry(handle_t *handle,
1669 EXT4_MIN_INLINE_DATA_SIZE; 1673 EXT4_MIN_INLINE_DATA_SIZE;
1670 } 1674 }
1671 1675
1676 BUFFER_TRACE(bh, "get_write_access");
1672 err = ext4_journal_get_write_access(handle, bh); 1677 err = ext4_journal_get_write_access(handle, bh);
1673 if (err) 1678 if (err)
1674 goto out; 1679 goto out;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index d7b7462a0e13..7fcd68ee9155 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -148,6 +148,9 @@ static int ext4_inode_is_fast_symlink(struct inode *inode)
148 int ea_blocks = EXT4_I(inode)->i_file_acl ? 148 int ea_blocks = EXT4_I(inode)->i_file_acl ?
149 EXT4_CLUSTER_SIZE(inode->i_sb) >> 9 : 0; 149 EXT4_CLUSTER_SIZE(inode->i_sb) >> 9 : 0;
150 150
151 if (ext4_has_inline_data(inode))
152 return 0;
153
151 return (S_ISLNK(inode->i_mode) && inode->i_blocks - ea_blocks == 0); 154 return (S_ISLNK(inode->i_mode) && inode->i_blocks - ea_blocks == 0);
152} 155}
153 156
@@ -443,7 +446,7 @@ static void ext4_map_blocks_es_recheck(handle_t *handle,
443 * could be converted. 446 * could be converted.
444 */ 447 */
445 if (!(flags & EXT4_GET_BLOCKS_NO_LOCK)) 448 if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
446 down_read((&EXT4_I(inode)->i_data_sem)); 449 down_read(&EXT4_I(inode)->i_data_sem);
447 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { 450 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
448 retval = ext4_ext_map_blocks(handle, inode, map, flags & 451 retval = ext4_ext_map_blocks(handle, inode, map, flags &
449 EXT4_GET_BLOCKS_KEEP_SIZE); 452 EXT4_GET_BLOCKS_KEEP_SIZE);
@@ -489,8 +492,8 @@ static void ext4_map_blocks_es_recheck(handle_t *handle,
489 * Otherwise, call with ext4_ind_map_blocks() to handle indirect mapping 492 * Otherwise, call with ext4_ind_map_blocks() to handle indirect mapping
490 * based files 493 * based files
491 * 494 *
492 * On success, it returns the number of blocks being mapped or allocate. 495 * On success, it returns the number of blocks being mapped or allocated.
493 * if create==0 and the blocks are pre-allocated and uninitialized block, 496 * if create==0 and the blocks are pre-allocated and unwritten block,
494 * the result buffer head is unmapped. If the create ==1, it will make sure 497 * the result buffer head is unmapped. If the create ==1, it will make sure
495 * the buffer head is mapped. 498 * the buffer head is mapped.
496 * 499 *
@@ -555,7 +558,7 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
555 * file system block. 558 * file system block.
556 */ 559 */
557 if (!(flags & EXT4_GET_BLOCKS_NO_LOCK)) 560 if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
558 down_read((&EXT4_I(inode)->i_data_sem)); 561 down_read(&EXT4_I(inode)->i_data_sem);
559 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { 562 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
560 retval = ext4_ext_map_blocks(handle, inode, map, flags & 563 retval = ext4_ext_map_blocks(handle, inode, map, flags &
561 EXT4_GET_BLOCKS_KEEP_SIZE); 564 EXT4_GET_BLOCKS_KEEP_SIZE);
@@ -622,12 +625,12 @@ found:
622 map->m_flags &= ~EXT4_MAP_FLAGS; 625 map->m_flags &= ~EXT4_MAP_FLAGS;
623 626
624 /* 627 /*
625 * New blocks allocate and/or writing to uninitialized extent 628 * New blocks allocate and/or writing to unwritten extent
626 * will possibly result in updating i_data, so we take 629 * will possibly result in updating i_data, so we take
627 * the write lock of i_data_sem, and call get_blocks() 630 * the write lock of i_data_sem, and call get_blocks()
628 * with create == 1 flag. 631 * with create == 1 flag.
629 */ 632 */
630 down_write((&EXT4_I(inode)->i_data_sem)); 633 down_write(&EXT4_I(inode)->i_data_sem);
631 634
632 /* 635 /*
633 * if the caller is from delayed allocation writeout path 636 * if the caller is from delayed allocation writeout path
@@ -922,6 +925,7 @@ int do_journal_get_write_access(handle_t *handle,
922 */ 925 */
923 if (dirty) 926 if (dirty)
924 clear_buffer_dirty(bh); 927 clear_buffer_dirty(bh);
928 BUFFER_TRACE(bh, "get write access");
925 ret = ext4_journal_get_write_access(handle, bh); 929 ret = ext4_journal_get_write_access(handle, bh);
926 if (!ret && dirty) 930 if (!ret && dirty)
927 ret = ext4_handle_dirty_metadata(handle, NULL, bh); 931 ret = ext4_handle_dirty_metadata(handle, NULL, bh);
@@ -1540,7 +1544,7 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
1540 ext4_es_lru_add(inode); 1544 ext4_es_lru_add(inode);
1541 if (ext4_es_is_hole(&es)) { 1545 if (ext4_es_is_hole(&es)) {
1542 retval = 0; 1546 retval = 0;
1543 down_read((&EXT4_I(inode)->i_data_sem)); 1547 down_read(&EXT4_I(inode)->i_data_sem);
1544 goto add_delayed; 1548 goto add_delayed;
1545 } 1549 }
1546 1550
@@ -1577,7 +1581,7 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
1577 * Try to see if we can get the block without requesting a new 1581 * Try to see if we can get the block without requesting a new
1578 * file system block. 1582 * file system block.
1579 */ 1583 */
1580 down_read((&EXT4_I(inode)->i_data_sem)); 1584 down_read(&EXT4_I(inode)->i_data_sem);
1581 if (ext4_has_inline_data(inode)) { 1585 if (ext4_has_inline_data(inode)) {
1582 /* 1586 /*
1583 * We will soon create blocks for this page, and let 1587 * We will soon create blocks for this page, and let
@@ -1769,6 +1773,7 @@ static int __ext4_journalled_writepage(struct page *page,
1769 BUG_ON(!ext4_handle_valid(handle)); 1773 BUG_ON(!ext4_handle_valid(handle));
1770 1774
1771 if (inline_data) { 1775 if (inline_data) {
1776 BUFFER_TRACE(inode_bh, "get write access");
1772 ret = ext4_journal_get_write_access(handle, inode_bh); 1777 ret = ext4_journal_get_write_access(handle, inode_bh);
1773 1778
1774 err = ext4_handle_dirty_metadata(handle, inode, inode_bh); 1779 err = ext4_handle_dirty_metadata(handle, inode, inode_bh);
@@ -1846,6 +1851,7 @@ static int ext4_writepage(struct page *page,
1846 struct buffer_head *page_bufs = NULL; 1851 struct buffer_head *page_bufs = NULL;
1847 struct inode *inode = page->mapping->host; 1852 struct inode *inode = page->mapping->host;
1848 struct ext4_io_submit io_submit; 1853 struct ext4_io_submit io_submit;
1854 bool keep_towrite = false;
1849 1855
1850 trace_ext4_writepage(page); 1856 trace_ext4_writepage(page);
1851 size = i_size_read(inode); 1857 size = i_size_read(inode);
@@ -1876,6 +1882,7 @@ static int ext4_writepage(struct page *page,
1876 unlock_page(page); 1882 unlock_page(page);
1877 return 0; 1883 return 0;
1878 } 1884 }
1885 keep_towrite = true;
1879 } 1886 }
1880 1887
1881 if (PageChecked(page) && ext4_should_journal_data(inode)) 1888 if (PageChecked(page) && ext4_should_journal_data(inode))
@@ -1892,7 +1899,7 @@ static int ext4_writepage(struct page *page,
1892 unlock_page(page); 1899 unlock_page(page);
1893 return -ENOMEM; 1900 return -ENOMEM;
1894 } 1901 }
1895 ret = ext4_bio_write_page(&io_submit, page, len, wbc); 1902 ret = ext4_bio_write_page(&io_submit, page, len, wbc, keep_towrite);
1896 ext4_io_submit(&io_submit); 1903 ext4_io_submit(&io_submit);
1897 /* Drop io_end reference we got from init */ 1904 /* Drop io_end reference we got from init */
1898 ext4_put_io_end_defer(io_submit.io_end); 1905 ext4_put_io_end_defer(io_submit.io_end);
@@ -1911,7 +1918,7 @@ static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page)
1911 else 1918 else
1912 len = PAGE_CACHE_SIZE; 1919 len = PAGE_CACHE_SIZE;
1913 clear_page_dirty_for_io(page); 1920 clear_page_dirty_for_io(page);
1914 err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc); 1921 err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc, false);
1915 if (!err) 1922 if (!err)
1916 mpd->wbc->nr_to_write--; 1923 mpd->wbc->nr_to_write--;
1917 mpd->first_page++; 1924 mpd->first_page++;
@@ -2032,7 +2039,7 @@ static int mpage_process_page_bufs(struct mpage_da_data *mpd,
2032 * Scan buffers corresponding to changed extent (we expect corresponding pages 2039 * Scan buffers corresponding to changed extent (we expect corresponding pages
2033 * to be already locked) and update buffer state according to new extent state. 2040 * to be already locked) and update buffer state according to new extent state.
2034 * We map delalloc buffers to their physical location, clear unwritten bits, 2041 * We map delalloc buffers to their physical location, clear unwritten bits,
2035 * and mark buffers as uninit when we perform writes to uninitialized extents 2042 * and mark buffers as uninit when we perform writes to unwritten extents
2036 * and do extent conversion after IO is finished. If the last page is not fully 2043 * and do extent conversion after IO is finished. If the last page is not fully
2037 * mapped, we update @map to the next extent in the last page that needs 2044 * mapped, we update @map to the next extent in the last page that needs
2038 * mapping. Otherwise we submit the page for IO. 2045 * mapping. Otherwise we submit the page for IO.
@@ -2126,12 +2133,12 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd)
2126 struct inode *inode = mpd->inode; 2133 struct inode *inode = mpd->inode;
2127 struct ext4_map_blocks *map = &mpd->map; 2134 struct ext4_map_blocks *map = &mpd->map;
2128 int get_blocks_flags; 2135 int get_blocks_flags;
2129 int err; 2136 int err, dioread_nolock;
2130 2137
2131 trace_ext4_da_write_pages_extent(inode, map); 2138 trace_ext4_da_write_pages_extent(inode, map);
2132 /* 2139 /*
2133 * Call ext4_map_blocks() to allocate any delayed allocation blocks, or 2140 * Call ext4_map_blocks() to allocate any delayed allocation blocks, or
2134 * to convert an uninitialized extent to be initialized (in the case 2141 * to convert an unwritten extent to be initialized (in the case
2135 * where we have written into one or more preallocated blocks). It is 2142 * where we have written into one or more preallocated blocks). It is
2136 * possible that we're going to need more metadata blocks than 2143 * possible that we're going to need more metadata blocks than
2137 * previously reserved. However we must not fail because we're in 2144 * previously reserved. However we must not fail because we're in
@@ -2148,7 +2155,8 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd)
2148 */ 2155 */
2149 get_blocks_flags = EXT4_GET_BLOCKS_CREATE | 2156 get_blocks_flags = EXT4_GET_BLOCKS_CREATE |
2150 EXT4_GET_BLOCKS_METADATA_NOFAIL; 2157 EXT4_GET_BLOCKS_METADATA_NOFAIL;
2151 if (ext4_should_dioread_nolock(inode)) 2158 dioread_nolock = ext4_should_dioread_nolock(inode);
2159 if (dioread_nolock)
2152 get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT; 2160 get_blocks_flags |= EXT4_GET_BLOCKS_IO_CREATE_EXT;
2153 if (map->m_flags & (1 << BH_Delay)) 2161 if (map->m_flags & (1 << BH_Delay))
2154 get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE; 2162 get_blocks_flags |= EXT4_GET_BLOCKS_DELALLOC_RESERVE;
@@ -2156,7 +2164,7 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd)
2156 err = ext4_map_blocks(handle, inode, map, get_blocks_flags); 2164 err = ext4_map_blocks(handle, inode, map, get_blocks_flags);
2157 if (err < 0) 2165 if (err < 0)
2158 return err; 2166 return err;
2159 if (map->m_flags & EXT4_MAP_UNINIT) { 2167 if (dioread_nolock && (map->m_flags & EXT4_MAP_UNWRITTEN)) {
2160 if (!mpd->io_submit.io_end->handle && 2168 if (!mpd->io_submit.io_end->handle &&
2161 ext4_handle_valid(handle)) { 2169 ext4_handle_valid(handle)) {
2162 mpd->io_submit.io_end->handle = handle->h_rsv_handle; 2170 mpd->io_submit.io_end->handle = handle->h_rsv_handle;
@@ -3070,9 +3078,9 @@ static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
3070 * preallocated extents, and those write extend the file, no need to 3078 * preallocated extents, and those write extend the file, no need to
3071 * fall back to buffered IO. 3079 * fall back to buffered IO.
3072 * 3080 *
3073 * For holes, we fallocate those blocks, mark them as uninitialized 3081 * For holes, we fallocate those blocks, mark them as unwritten
3074 * If those blocks were preallocated, we mark sure they are split, but 3082 * If those blocks were preallocated, we mark sure they are split, but
3075 * still keep the range to write as uninitialized. 3083 * still keep the range to write as unwritten.
3076 * 3084 *
3077 * The unwritten extents will be converted to written when DIO is completed. 3085 * The unwritten extents will be converted to written when DIO is completed.
3078 * For async direct IO, since the IO may still pending when return, we 3086 * For async direct IO, since the IO may still pending when return, we
@@ -3124,12 +3132,12 @@ static ssize_t ext4_ext_direct_IO(int rw, struct kiocb *iocb,
3124 * We could direct write to holes and fallocate. 3132 * We could direct write to holes and fallocate.
3125 * 3133 *
3126 * Allocated blocks to fill the hole are marked as 3134 * Allocated blocks to fill the hole are marked as
3127 * uninitialized to prevent parallel buffered read to expose 3135 * unwritten to prevent parallel buffered read to expose
3128 * the stale data before DIO complete the data IO. 3136 * the stale data before DIO complete the data IO.
3129 * 3137 *
3130 * As to previously fallocated extents, ext4 get_block will 3138 * As to previously fallocated extents, ext4 get_block will
3131 * just simply mark the buffer mapped but still keep the 3139 * just simply mark the buffer mapped but still keep the
3132 * extents uninitialized. 3140 * extents unwritten.
3133 * 3141 *
3134 * For non AIO case, we will convert those unwritten extents 3142 * For non AIO case, we will convert those unwritten extents
3135 * to written after return back from blockdev_direct_IO. 3143 * to written after return back from blockdev_direct_IO.
@@ -3440,7 +3448,7 @@ unlock:
3440 * This required during truncate. We need to physically zero the tail end 3448 * This required during truncate. We need to physically zero the tail end
3441 * of that block so it doesn't yield old data if the file is later grown. 3449 * of that block so it doesn't yield old data if the file is later grown.
3442 */ 3450 */
3443int ext4_block_truncate_page(handle_t *handle, 3451static int ext4_block_truncate_page(handle_t *handle,
3444 struct address_space *mapping, loff_t from) 3452 struct address_space *mapping, loff_t from)
3445{ 3453{
3446 unsigned offset = from & (PAGE_CACHE_SIZE-1); 3454 unsigned offset = from & (PAGE_CACHE_SIZE-1);
@@ -4304,12 +4312,15 @@ static int ext4_do_update_inode(handle_t *handle,
4304 struct ext4_inode *raw_inode = ext4_raw_inode(iloc); 4312 struct ext4_inode *raw_inode = ext4_raw_inode(iloc);
4305 struct ext4_inode_info *ei = EXT4_I(inode); 4313 struct ext4_inode_info *ei = EXT4_I(inode);
4306 struct buffer_head *bh = iloc->bh; 4314 struct buffer_head *bh = iloc->bh;
4315 struct super_block *sb = inode->i_sb;
4307 int err = 0, rc, block; 4316 int err = 0, rc, block;
4308 int need_datasync = 0; 4317 int need_datasync = 0, set_large_file = 0;
4309 uid_t i_uid; 4318 uid_t i_uid;
4310 gid_t i_gid; 4319 gid_t i_gid;
4311 4320
4312 /* For fields not not tracking in the in-memory inode, 4321 spin_lock(&ei->i_raw_lock);
4322
4323 /* For fields not tracked in the in-memory inode,
4313 * initialise them to zero for new inodes. */ 4324 * initialise them to zero for new inodes. */
4314 if (ext4_test_inode_state(inode, EXT4_STATE_NEW)) 4325 if (ext4_test_inode_state(inode, EXT4_STATE_NEW))
4315 memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size); 4326 memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size);
@@ -4347,8 +4358,10 @@ static int ext4_do_update_inode(handle_t *handle,
4347 EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode); 4358 EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode);
4348 EXT4_EINODE_SET_XTIME(i_crtime, ei, raw_inode); 4359 EXT4_EINODE_SET_XTIME(i_crtime, ei, raw_inode);
4349 4360
4350 if (ext4_inode_blocks_set(handle, raw_inode, ei)) 4361 if (ext4_inode_blocks_set(handle, raw_inode, ei)) {
4362 spin_unlock(&ei->i_raw_lock);
4351 goto out_brelse; 4363 goto out_brelse;
4364 }
4352 raw_inode->i_dtime = cpu_to_le32(ei->i_dtime); 4365 raw_inode->i_dtime = cpu_to_le32(ei->i_dtime);
4353 raw_inode->i_flags = cpu_to_le32(ei->i_flags & 0xFFFFFFFF); 4366 raw_inode->i_flags = cpu_to_le32(ei->i_flags & 0xFFFFFFFF);
4354 if (likely(!test_opt2(inode->i_sb, HURD_COMPAT))) 4367 if (likely(!test_opt2(inode->i_sb, HURD_COMPAT)))
@@ -4360,24 +4373,11 @@ static int ext4_do_update_inode(handle_t *handle,
4360 need_datasync = 1; 4373 need_datasync = 1;
4361 } 4374 }
4362 if (ei->i_disksize > 0x7fffffffULL) { 4375 if (ei->i_disksize > 0x7fffffffULL) {
4363 struct super_block *sb = inode->i_sb;
4364 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, 4376 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
4365 EXT4_FEATURE_RO_COMPAT_LARGE_FILE) || 4377 EXT4_FEATURE_RO_COMPAT_LARGE_FILE) ||
4366 EXT4_SB(sb)->s_es->s_rev_level == 4378 EXT4_SB(sb)->s_es->s_rev_level ==
4367 cpu_to_le32(EXT4_GOOD_OLD_REV)) { 4379 cpu_to_le32(EXT4_GOOD_OLD_REV))
4368 /* If this is the first large file 4380 set_large_file = 1;
4369 * created, add a flag to the superblock.
4370 */
4371 err = ext4_journal_get_write_access(handle,
4372 EXT4_SB(sb)->s_sbh);
4373 if (err)
4374 goto out_brelse;
4375 ext4_update_dynamic_rev(sb);
4376 EXT4_SET_RO_COMPAT_FEATURE(sb,
4377 EXT4_FEATURE_RO_COMPAT_LARGE_FILE);
4378 ext4_handle_sync(handle);
4379 err = ext4_handle_dirty_super(handle, sb);
4380 }
4381 } 4381 }
4382 raw_inode->i_generation = cpu_to_le32(inode->i_generation); 4382 raw_inode->i_generation = cpu_to_le32(inode->i_generation);
4383 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) { 4383 if (S_ISCHR(inode->i_mode) || S_ISBLK(inode->i_mode)) {
@@ -4409,12 +4409,24 @@ static int ext4_do_update_inode(handle_t *handle,
4409 4409
4410 ext4_inode_csum_set(inode, raw_inode, ei); 4410 ext4_inode_csum_set(inode, raw_inode, ei);
4411 4411
4412 spin_unlock(&ei->i_raw_lock);
4413
4412 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); 4414 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
4413 rc = ext4_handle_dirty_metadata(handle, NULL, bh); 4415 rc = ext4_handle_dirty_metadata(handle, NULL, bh);
4414 if (!err) 4416 if (!err)
4415 err = rc; 4417 err = rc;
4416 ext4_clear_inode_state(inode, EXT4_STATE_NEW); 4418 ext4_clear_inode_state(inode, EXT4_STATE_NEW);
4417 4419 if (set_large_file) {
4420 BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get write access");
4421 err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh);
4422 if (err)
4423 goto out_brelse;
4424 ext4_update_dynamic_rev(sb);
4425 EXT4_SET_RO_COMPAT_FEATURE(sb,
4426 EXT4_FEATURE_RO_COMPAT_LARGE_FILE);
4427 ext4_handle_sync(handle);
4428 err = ext4_handle_dirty_super(handle, sb);
4429 }
4418 ext4_update_inode_fsync_trans(handle, inode, need_datasync); 4430 ext4_update_inode_fsync_trans(handle, inode, need_datasync);
4419out_brelse: 4431out_brelse:
4420 brelse(bh); 4432 brelse(bh);
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index afe8a133e3d1..59e31622cc6e 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2619,7 +2619,7 @@ int ext4_mb_init(struct super_block *sb)
2619 sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group); 2619 sbi->s_locality_groups = alloc_percpu(struct ext4_locality_group);
2620 if (sbi->s_locality_groups == NULL) { 2620 if (sbi->s_locality_groups == NULL) {
2621 ret = -ENOMEM; 2621 ret = -ENOMEM;
2622 goto out_free_groupinfo_slab; 2622 goto out;
2623 } 2623 }
2624 for_each_possible_cpu(i) { 2624 for_each_possible_cpu(i) {
2625 struct ext4_locality_group *lg; 2625 struct ext4_locality_group *lg;
@@ -2644,8 +2644,6 @@ int ext4_mb_init(struct super_block *sb)
2644out_free_locality_groups: 2644out_free_locality_groups:
2645 free_percpu(sbi->s_locality_groups); 2645 free_percpu(sbi->s_locality_groups);
2646 sbi->s_locality_groups = NULL; 2646 sbi->s_locality_groups = NULL;
2647out_free_groupinfo_slab:
2648 ext4_groupinfo_destroy_slabs();
2649out: 2647out:
2650 kfree(sbi->s_mb_offsets); 2648 kfree(sbi->s_mb_offsets);
2651 sbi->s_mb_offsets = NULL; 2649 sbi->s_mb_offsets = NULL;
@@ -2878,6 +2876,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2878 if (!bitmap_bh) 2876 if (!bitmap_bh)
2879 goto out_err; 2877 goto out_err;
2880 2878
2879 BUFFER_TRACE(bitmap_bh, "getting write access");
2881 err = ext4_journal_get_write_access(handle, bitmap_bh); 2880 err = ext4_journal_get_write_access(handle, bitmap_bh);
2882 if (err) 2881 if (err)
2883 goto out_err; 2882 goto out_err;
@@ -2890,6 +2889,7 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2890 ext4_debug("using block group %u(%d)\n", ac->ac_b_ex.fe_group, 2889 ext4_debug("using block group %u(%d)\n", ac->ac_b_ex.fe_group,
2891 ext4_free_group_clusters(sb, gdp)); 2890 ext4_free_group_clusters(sb, gdp));
2892 2891
2892 BUFFER_TRACE(gdp_bh, "get_write_access");
2893 err = ext4_journal_get_write_access(handle, gdp_bh); 2893 err = ext4_journal_get_write_access(handle, gdp_bh);
2894 if (err) 2894 if (err)
2895 goto out_err; 2895 goto out_err;
@@ -3147,7 +3147,7 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
3147 } 3147 }
3148 BUG_ON(start + size <= ac->ac_o_ex.fe_logical && 3148 BUG_ON(start + size <= ac->ac_o_ex.fe_logical &&
3149 start > ac->ac_o_ex.fe_logical); 3149 start > ac->ac_o_ex.fe_logical);
3150 BUG_ON(size <= 0 || size > EXT4_CLUSTERS_PER_GROUP(ac->ac_sb)); 3150 BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
3151 3151
3152 /* now prepare goal request */ 3152 /* now prepare goal request */
3153 3153
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index 2ae73a80c19b..ec092437d3e0 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -505,7 +505,7 @@ int ext4_ext_migrate(struct inode *inode)
505 * with i_data_sem held to prevent racing with block 505 * with i_data_sem held to prevent racing with block
506 * allocation. 506 * allocation.
507 */ 507 */
508 down_read((&EXT4_I(inode)->i_data_sem)); 508 down_read(&EXT4_I(inode)->i_data_sem);
509 ext4_set_inode_state(inode, EXT4_STATE_EXT_MIGRATE); 509 ext4_set_inode_state(inode, EXT4_STATE_EXT_MIGRATE);
510 up_read((&EXT4_I(inode)->i_data_sem)); 510 up_read((&EXT4_I(inode)->i_data_sem));
511 511
diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c
index 04434ad3e8e0..32bce844c2e1 100644
--- a/fs/ext4/mmp.c
+++ b/fs/ext4/mmp.c
@@ -18,7 +18,7 @@ static __le32 ext4_mmp_csum(struct super_block *sb, struct mmp_struct *mmp)
18 return cpu_to_le32(csum); 18 return cpu_to_le32(csum);
19} 19}
20 20
21int ext4_mmp_csum_verify(struct super_block *sb, struct mmp_struct *mmp) 21static int ext4_mmp_csum_verify(struct super_block *sb, struct mmp_struct *mmp)
22{ 22{
23 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, 23 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
24 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) 24 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
@@ -27,7 +27,7 @@ int ext4_mmp_csum_verify(struct super_block *sb, struct mmp_struct *mmp)
27 return mmp->mmp_checksum == ext4_mmp_csum(sb, mmp); 27 return mmp->mmp_checksum == ext4_mmp_csum(sb, mmp);
28} 28}
29 29
30void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp) 30static void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp)
31{ 31{
32 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, 32 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
33 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) 33 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 58ee7dc87669..2484c7ec6a72 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -57,8 +57,8 @@ get_ext_path(struct inode *inode, ext4_lblk_t lblock,
57static void 57static void
58copy_extent_status(struct ext4_extent *src, struct ext4_extent *dest) 58copy_extent_status(struct ext4_extent *src, struct ext4_extent *dest)
59{ 59{
60 if (ext4_ext_is_uninitialized(src)) 60 if (ext4_ext_is_unwritten(src))
61 ext4_ext_mark_uninitialized(dest); 61 ext4_ext_mark_unwritten(dest);
62 else 62 else
63 dest->ee_len = cpu_to_le16(ext4_ext_get_actual_len(dest)); 63 dest->ee_len = cpu_to_le16(ext4_ext_get_actual_len(dest));
64} 64}
@@ -391,6 +391,7 @@ mext_insert_extents(handle_t *handle, struct inode *orig_inode,
391 391
392 if (depth) { 392 if (depth) {
393 /* Register to journal */ 393 /* Register to journal */
394 BUFFER_TRACE(orig_path->p_bh, "get_write_access");
394 ret = ext4_journal_get_write_access(handle, orig_path->p_bh); 395 ret = ext4_journal_get_write_access(handle, orig_path->p_bh);
395 if (ret) 396 if (ret)
396 return ret; 397 return ret;
@@ -593,14 +594,14 @@ mext_calc_swap_extents(struct ext4_extent *tmp_dext,
593 * @inode: inode in question 594 * @inode: inode in question
594 * @from: block offset of inode 595 * @from: block offset of inode
595 * @count: block count to be checked 596 * @count: block count to be checked
596 * @uninit: extents expected to be uninitialized 597 * @unwritten: extents expected to be unwritten
597 * @err: pointer to save error value 598 * @err: pointer to save error value
598 * 599 *
599 * Return 1 if all extents in range has expected type, and zero otherwise. 600 * Return 1 if all extents in range has expected type, and zero otherwise.
600 */ 601 */
601static int 602static int
602mext_check_coverage(struct inode *inode, ext4_lblk_t from, ext4_lblk_t count, 603mext_check_coverage(struct inode *inode, ext4_lblk_t from, ext4_lblk_t count,
603 int uninit, int *err) 604 int unwritten, int *err)
604{ 605{
605 struct ext4_ext_path *path = NULL; 606 struct ext4_ext_path *path = NULL;
606 struct ext4_extent *ext; 607 struct ext4_extent *ext;
@@ -611,7 +612,7 @@ mext_check_coverage(struct inode *inode, ext4_lblk_t from, ext4_lblk_t count,
611 if (*err) 612 if (*err)
612 goto out; 613 goto out;
613 ext = path[ext_depth(inode)].p_ext; 614 ext = path[ext_depth(inode)].p_ext;
614 if (uninit != ext4_ext_is_uninitialized(ext)) 615 if (unwritten != ext4_ext_is_unwritten(ext))
615 goto out; 616 goto out;
616 from += ext4_ext_get_actual_len(ext); 617 from += ext4_ext_get_actual_len(ext);
617 ext4_ext_drop_refs(path); 618 ext4_ext_drop_refs(path);
@@ -894,7 +895,7 @@ out:
894 * @orig_page_offset: page index on original file 895 * @orig_page_offset: page index on original file
895 * @data_offset_in_page: block index where data swapping starts 896 * @data_offset_in_page: block index where data swapping starts
896 * @block_len_in_page: the number of blocks to be swapped 897 * @block_len_in_page: the number of blocks to be swapped
897 * @uninit: orig extent is uninitialized or not 898 * @unwritten: orig extent is unwritten or not
898 * @err: pointer to save return value 899 * @err: pointer to save return value
899 * 900 *
900 * Save the data in original inode blocks and replace original inode extents 901 * Save the data in original inode blocks and replace original inode extents
@@ -905,7 +906,7 @@ out:
905static int 906static int
906move_extent_per_page(struct file *o_filp, struct inode *donor_inode, 907move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
907 pgoff_t orig_page_offset, int data_offset_in_page, 908 pgoff_t orig_page_offset, int data_offset_in_page,
908 int block_len_in_page, int uninit, int *err) 909 int block_len_in_page, int unwritten, int *err)
909{ 910{
910 struct inode *orig_inode = file_inode(o_filp); 911 struct inode *orig_inode = file_inode(o_filp);
911 struct page *pagep[2] = {NULL, NULL}; 912 struct page *pagep[2] = {NULL, NULL};
@@ -962,27 +963,27 @@ again:
962 if (unlikely(*err < 0)) 963 if (unlikely(*err < 0))
963 goto stop_journal; 964 goto stop_journal;
964 /* 965 /*
965 * If orig extent was uninitialized it can become initialized 966 * If orig extent was unwritten it can become initialized
966 * at any time after i_data_sem was dropped, in order to 967 * at any time after i_data_sem was dropped, in order to
967 * serialize with delalloc we have recheck extent while we 968 * serialize with delalloc we have recheck extent while we
968 * hold page's lock, if it is still the case data copy is not 969 * hold page's lock, if it is still the case data copy is not
969 * necessary, just swap data blocks between orig and donor. 970 * necessary, just swap data blocks between orig and donor.
970 */ 971 */
971 if (uninit) { 972 if (unwritten) {
972 ext4_double_down_write_data_sem(orig_inode, donor_inode); 973 ext4_double_down_write_data_sem(orig_inode, donor_inode);
973 /* If any of extents in range became initialized we have to 974 /* If any of extents in range became initialized we have to
974 * fallback to data copying */ 975 * fallback to data copying */
975 uninit = mext_check_coverage(orig_inode, orig_blk_offset, 976 unwritten = mext_check_coverage(orig_inode, orig_blk_offset,
976 block_len_in_page, 1, err); 977 block_len_in_page, 1, err);
977 if (*err) 978 if (*err)
978 goto drop_data_sem; 979 goto drop_data_sem;
979 980
980 uninit &= mext_check_coverage(donor_inode, orig_blk_offset, 981 unwritten &= mext_check_coverage(donor_inode, orig_blk_offset,
981 block_len_in_page, 1, err); 982 block_len_in_page, 1, err);
982 if (*err) 983 if (*err)
983 goto drop_data_sem; 984 goto drop_data_sem;
984 985
985 if (!uninit) { 986 if (!unwritten) {
986 ext4_double_up_write_data_sem(orig_inode, donor_inode); 987 ext4_double_up_write_data_sem(orig_inode, donor_inode);
987 goto data_copy; 988 goto data_copy;
988 } 989 }
@@ -1259,7 +1260,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
1259 int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits; 1260 int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;
1260 int data_offset_in_page; 1261 int data_offset_in_page;
1261 int block_len_in_page; 1262 int block_len_in_page;
1262 int uninit; 1263 int unwritten;
1263 1264
1264 if (orig_inode->i_sb != donor_inode->i_sb) { 1265 if (orig_inode->i_sb != donor_inode->i_sb) {
1265 ext4_debug("ext4 move extent: The argument files " 1266 ext4_debug("ext4 move extent: The argument files "
@@ -1391,8 +1392,8 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
1391 !last_extent) 1392 !last_extent)
1392 continue; 1393 continue;
1393 1394
1394 /* Is original extent is uninitialized */ 1395 /* Is original extent is unwritten */
1395 uninit = ext4_ext_is_uninitialized(ext_prev); 1396 unwritten = ext4_ext_is_unwritten(ext_prev);
1396 1397
1397 data_offset_in_page = seq_start % blocks_per_page; 1398 data_offset_in_page = seq_start % blocks_per_page;
1398 1399
@@ -1432,8 +1433,8 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
1432 o_filp, donor_inode, 1433 o_filp, donor_inode,
1433 orig_page_offset, 1434 orig_page_offset,
1434 data_offset_in_page, 1435 data_offset_in_page,
1435 block_len_in_page, uninit, 1436 block_len_in_page,
1436 &ret); 1437 unwritten, &ret);
1437 1438
1438 /* Count how many blocks we have exchanged */ 1439 /* Count how many blocks we have exchanged */
1439 *moved_len += block_len_in_page; 1440 *moved_len += block_len_in_page;
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 1cb84f78909e..3520ab8a6639 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -67,6 +67,7 @@ static struct buffer_head *ext4_append(handle_t *handle,
67 return ERR_PTR(err); 67 return ERR_PTR(err);
68 inode->i_size += inode->i_sb->s_blocksize; 68 inode->i_size += inode->i_sb->s_blocksize;
69 EXT4_I(inode)->i_disksize = inode->i_size; 69 EXT4_I(inode)->i_disksize = inode->i_size;
70 BUFFER_TRACE(bh, "get_write_access");
70 err = ext4_journal_get_write_access(handle, bh); 71 err = ext4_journal_get_write_access(handle, bh);
71 if (err) { 72 if (err) {
72 brelse(bh); 73 brelse(bh);
@@ -1778,6 +1779,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
1778 1779
1779 blocksize = dir->i_sb->s_blocksize; 1780 blocksize = dir->i_sb->s_blocksize;
1780 dxtrace(printk(KERN_DEBUG "Creating index: inode %lu\n", dir->i_ino)); 1781 dxtrace(printk(KERN_DEBUG "Creating index: inode %lu\n", dir->i_ino));
1782 BUFFER_TRACE(bh, "get_write_access");
1781 retval = ext4_journal_get_write_access(handle, bh); 1783 retval = ext4_journal_get_write_access(handle, bh);
1782 if (retval) { 1784 if (retval) {
1783 ext4_std_error(dir->i_sb, retval); 1785 ext4_std_error(dir->i_sb, retval);
@@ -2510,8 +2512,7 @@ static int empty_dir(struct inode *inode)
2510 ext4_rec_len_from_disk(de1->rec_len, sb->s_blocksize); 2512 ext4_rec_len_from_disk(de1->rec_len, sb->s_blocksize);
2511 de = ext4_next_entry(de1, sb->s_blocksize); 2513 de = ext4_next_entry(de1, sb->s_blocksize);
2512 while (offset < inode->i_size) { 2514 while (offset < inode->i_size) {
2513 if (!bh || 2515 if ((void *) de >= (void *) (bh->b_data+sb->s_blocksize)) {
2514 (void *) de >= (void *) (bh->b_data+sb->s_blocksize)) {
2515 unsigned int lblock; 2516 unsigned int lblock;
2516 err = 0; 2517 err = 0;
2517 brelse(bh); 2518 brelse(bh);
@@ -2539,26 +2540,37 @@ static int empty_dir(struct inode *inode)
2539 return 1; 2540 return 1;
2540} 2541}
2541 2542
2542/* ext4_orphan_add() links an unlinked or truncated inode into a list of 2543/*
2544 * ext4_orphan_add() links an unlinked or truncated inode into a list of
2543 * such inodes, starting at the superblock, in case we crash before the 2545 * such inodes, starting at the superblock, in case we crash before the
2544 * file is closed/deleted, or in case the inode truncate spans multiple 2546 * file is closed/deleted, or in case the inode truncate spans multiple
2545 * transactions and the last transaction is not recovered after a crash. 2547 * transactions and the last transaction is not recovered after a crash.
2546 * 2548 *
2547 * At filesystem recovery time, we walk this list deleting unlinked 2549 * At filesystem recovery time, we walk this list deleting unlinked
2548 * inodes and truncating linked inodes in ext4_orphan_cleanup(). 2550 * inodes and truncating linked inodes in ext4_orphan_cleanup().
2551 *
2552 * Orphan list manipulation functions must be called under i_mutex unless
2553 * we are just creating the inode or deleting it.
2549 */ 2554 */
2550int ext4_orphan_add(handle_t *handle, struct inode *inode) 2555int ext4_orphan_add(handle_t *handle, struct inode *inode)
2551{ 2556{
2552 struct super_block *sb = inode->i_sb; 2557 struct super_block *sb = inode->i_sb;
2558 struct ext4_sb_info *sbi = EXT4_SB(sb);
2553 struct ext4_iloc iloc; 2559 struct ext4_iloc iloc;
2554 int err = 0, rc; 2560 int err = 0, rc;
2561 bool dirty = false;
2555 2562
2556 if (!EXT4_SB(sb)->s_journal) 2563 if (!sbi->s_journal)
2557 return 0; 2564 return 0;
2558 2565
2559 mutex_lock(&EXT4_SB(sb)->s_orphan_lock); 2566 WARN_ON_ONCE(!(inode->i_state & (I_NEW | I_FREEING)) &&
2567 !mutex_is_locked(&inode->i_mutex));
2568 /*
2569 * Exit early if inode already is on orphan list. This is a big speedup
2570 * since we don't have to contend on the global s_orphan_lock.
2571 */
2560 if (!list_empty(&EXT4_I(inode)->i_orphan)) 2572 if (!list_empty(&EXT4_I(inode)->i_orphan))
2561 goto out_unlock; 2573 return 0;
2562 2574
2563 /* 2575 /*
2564 * Orphan handling is only valid for files with data blocks 2576 * Orphan handling is only valid for files with data blocks
@@ -2569,48 +2581,51 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
2569 J_ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || 2581 J_ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
2570 S_ISLNK(inode->i_mode)) || inode->i_nlink == 0); 2582 S_ISLNK(inode->i_mode)) || inode->i_nlink == 0);
2571 2583
2572 BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access"); 2584 BUFFER_TRACE(sbi->s_sbh, "get_write_access");
2573 err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh); 2585 err = ext4_journal_get_write_access(handle, sbi->s_sbh);
2574 if (err) 2586 if (err)
2575 goto out_unlock; 2587 goto out;
2576 2588
2577 err = ext4_reserve_inode_write(handle, inode, &iloc); 2589 err = ext4_reserve_inode_write(handle, inode, &iloc);
2578 if (err) 2590 if (err)
2579 goto out_unlock; 2591 goto out;
2592
2593 mutex_lock(&sbi->s_orphan_lock);
2580 /* 2594 /*
2581 * Due to previous errors inode may be already a part of on-disk 2595 * Due to previous errors inode may be already a part of on-disk
2582 * orphan list. If so skip on-disk list modification. 2596 * orphan list. If so skip on-disk list modification.
2583 */ 2597 */
2584 if (NEXT_ORPHAN(inode) && NEXT_ORPHAN(inode) <= 2598 if (!NEXT_ORPHAN(inode) || NEXT_ORPHAN(inode) >
2585 (le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count))) 2599 (le32_to_cpu(sbi->s_es->s_inodes_count))) {
2586 goto mem_insert; 2600 /* Insert this inode at the head of the on-disk orphan list */
2587 2601 NEXT_ORPHAN(inode) = le32_to_cpu(sbi->s_es->s_last_orphan);
2588 /* Insert this inode at the head of the on-disk orphan list... */ 2602 sbi->s_es->s_last_orphan = cpu_to_le32(inode->i_ino);
2589 NEXT_ORPHAN(inode) = le32_to_cpu(EXT4_SB(sb)->s_es->s_last_orphan); 2603 dirty = true;
2590 EXT4_SB(sb)->s_es->s_last_orphan = cpu_to_le32(inode->i_ino); 2604 }
2591 err = ext4_handle_dirty_super(handle, sb); 2605 list_add(&EXT4_I(inode)->i_orphan, &sbi->s_orphan);
2592 rc = ext4_mark_iloc_dirty(handle, inode, &iloc); 2606 mutex_unlock(&sbi->s_orphan_lock);
2593 if (!err) 2607
2594 err = rc; 2608 if (dirty) {
2595 2609 err = ext4_handle_dirty_super(handle, sb);
2596 /* Only add to the head of the in-memory list if all the 2610 rc = ext4_mark_iloc_dirty(handle, inode, &iloc);
2597 * previous operations succeeded. If the orphan_add is going to 2611 if (!err)
2598 * fail (possibly taking the journal offline), we can't risk 2612 err = rc;
2599 * leaving the inode on the orphan list: stray orphan-list 2613 if (err) {
2600 * entries can cause panics at unmount time. 2614 /*
2601 * 2615 * We have to remove inode from in-memory list if
2602 * This is safe: on error we're going to ignore the orphan list 2616 * addition to on disk orphan list failed. Stray orphan
2603 * anyway on the next recovery. */ 2617 * list entries can cause panics at unmount time.
2604mem_insert: 2618 */
2605 if (!err) 2619 mutex_lock(&sbi->s_orphan_lock);
2606 list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan); 2620 list_del(&EXT4_I(inode)->i_orphan);
2607 2621 mutex_unlock(&sbi->s_orphan_lock);
2622 }
2623 }
2608 jbd_debug(4, "superblock will point to %lu\n", inode->i_ino); 2624 jbd_debug(4, "superblock will point to %lu\n", inode->i_ino);
2609 jbd_debug(4, "orphan inode %lu will point to %d\n", 2625 jbd_debug(4, "orphan inode %lu will point to %d\n",
2610 inode->i_ino, NEXT_ORPHAN(inode)); 2626 inode->i_ino, NEXT_ORPHAN(inode));
2611out_unlock: 2627out:
2612 mutex_unlock(&EXT4_SB(sb)->s_orphan_lock); 2628 ext4_std_error(sb, err);
2613 ext4_std_error(inode->i_sb, err);
2614 return err; 2629 return err;
2615} 2630}
2616 2631
@@ -2622,45 +2637,51 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
2622{ 2637{
2623 struct list_head *prev; 2638 struct list_head *prev;
2624 struct ext4_inode_info *ei = EXT4_I(inode); 2639 struct ext4_inode_info *ei = EXT4_I(inode);
2625 struct ext4_sb_info *sbi; 2640 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
2626 __u32 ino_next; 2641 __u32 ino_next;
2627 struct ext4_iloc iloc; 2642 struct ext4_iloc iloc;
2628 int err = 0; 2643 int err = 0;
2629 2644
2630 if ((!EXT4_SB(inode->i_sb)->s_journal) && 2645 if (!sbi->s_journal && !(sbi->s_mount_state & EXT4_ORPHAN_FS))
2631 !(EXT4_SB(inode->i_sb)->s_mount_state & EXT4_ORPHAN_FS))
2632 return 0; 2646 return 0;
2633 2647
2634 mutex_lock(&EXT4_SB(inode->i_sb)->s_orphan_lock); 2648 WARN_ON_ONCE(!(inode->i_state & (I_NEW | I_FREEING)) &&
2649 !mutex_is_locked(&inode->i_mutex));
2650 /* Do this quick check before taking global s_orphan_lock. */
2635 if (list_empty(&ei->i_orphan)) 2651 if (list_empty(&ei->i_orphan))
2636 goto out; 2652 return 0;
2637 2653
2638 ino_next = NEXT_ORPHAN(inode); 2654 if (handle) {
2639 prev = ei->i_orphan.prev; 2655 /* Grab inode buffer early before taking global s_orphan_lock */
2640 sbi = EXT4_SB(inode->i_sb); 2656 err = ext4_reserve_inode_write(handle, inode, &iloc);
2657 }
2641 2658
2659 mutex_lock(&sbi->s_orphan_lock);
2642 jbd_debug(4, "remove inode %lu from orphan list\n", inode->i_ino); 2660 jbd_debug(4, "remove inode %lu from orphan list\n", inode->i_ino);
2643 2661
2662 prev = ei->i_orphan.prev;
2644 list_del_init(&ei->i_orphan); 2663 list_del_init(&ei->i_orphan);
2645 2664
2646 /* If we're on an error path, we may not have a valid 2665 /* If we're on an error path, we may not have a valid
2647 * transaction handle with which to update the orphan list on 2666 * transaction handle with which to update the orphan list on
2648 * disk, but we still need to remove the inode from the linked 2667 * disk, but we still need to remove the inode from the linked
2649 * list in memory. */ 2668 * list in memory. */
2650 if (!handle) 2669 if (!handle || err) {
2651 goto out; 2670 mutex_unlock(&sbi->s_orphan_lock);
2652
2653 err = ext4_reserve_inode_write(handle, inode, &iloc);
2654 if (err)
2655 goto out_err; 2671 goto out_err;
2672 }
2656 2673
2674 ino_next = NEXT_ORPHAN(inode);
2657 if (prev == &sbi->s_orphan) { 2675 if (prev == &sbi->s_orphan) {
2658 jbd_debug(4, "superblock will point to %u\n", ino_next); 2676 jbd_debug(4, "superblock will point to %u\n", ino_next);
2659 BUFFER_TRACE(sbi->s_sbh, "get_write_access"); 2677 BUFFER_TRACE(sbi->s_sbh, "get_write_access");
2660 err = ext4_journal_get_write_access(handle, sbi->s_sbh); 2678 err = ext4_journal_get_write_access(handle, sbi->s_sbh);
2661 if (err) 2679 if (err) {
2680 mutex_unlock(&sbi->s_orphan_lock);
2662 goto out_brelse; 2681 goto out_brelse;
2682 }
2663 sbi->s_es->s_last_orphan = cpu_to_le32(ino_next); 2683 sbi->s_es->s_last_orphan = cpu_to_le32(ino_next);
2684 mutex_unlock(&sbi->s_orphan_lock);
2664 err = ext4_handle_dirty_super(handle, inode->i_sb); 2685 err = ext4_handle_dirty_super(handle, inode->i_sb);
2665 } else { 2686 } else {
2666 struct ext4_iloc iloc2; 2687 struct ext4_iloc iloc2;
@@ -2670,20 +2691,20 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
2670 jbd_debug(4, "orphan inode %lu will point to %u\n", 2691 jbd_debug(4, "orphan inode %lu will point to %u\n",
2671 i_prev->i_ino, ino_next); 2692 i_prev->i_ino, ino_next);
2672 err = ext4_reserve_inode_write(handle, i_prev, &iloc2); 2693 err = ext4_reserve_inode_write(handle, i_prev, &iloc2);
2673 if (err) 2694 if (err) {
2695 mutex_unlock(&sbi->s_orphan_lock);
2674 goto out_brelse; 2696 goto out_brelse;
2697 }
2675 NEXT_ORPHAN(i_prev) = ino_next; 2698 NEXT_ORPHAN(i_prev) = ino_next;
2676 err = ext4_mark_iloc_dirty(handle, i_prev, &iloc2); 2699 err = ext4_mark_iloc_dirty(handle, i_prev, &iloc2);
2700 mutex_unlock(&sbi->s_orphan_lock);
2677 } 2701 }
2678 if (err) 2702 if (err)
2679 goto out_brelse; 2703 goto out_brelse;
2680 NEXT_ORPHAN(inode) = 0; 2704 NEXT_ORPHAN(inode) = 0;
2681 err = ext4_mark_iloc_dirty(handle, inode, &iloc); 2705 err = ext4_mark_iloc_dirty(handle, inode, &iloc);
2682
2683out_err: 2706out_err:
2684 ext4_std_error(inode->i_sb, err); 2707 ext4_std_error(inode->i_sb, err);
2685out:
2686 mutex_unlock(&EXT4_SB(inode->i_sb)->s_orphan_lock);
2687 return err; 2708 return err;
2688 2709
2689out_brelse: 2710out_brelse:
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 1a64e7a52b84..b24a2541a9ba 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -401,7 +401,8 @@ submit_and_retry:
401int ext4_bio_write_page(struct ext4_io_submit *io, 401int ext4_bio_write_page(struct ext4_io_submit *io,
402 struct page *page, 402 struct page *page,
403 int len, 403 int len,
404 struct writeback_control *wbc) 404 struct writeback_control *wbc,
405 bool keep_towrite)
405{ 406{
406 struct inode *inode = page->mapping->host; 407 struct inode *inode = page->mapping->host;
407 unsigned block_start, blocksize; 408 unsigned block_start, blocksize;
@@ -414,10 +415,24 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
414 BUG_ON(!PageLocked(page)); 415 BUG_ON(!PageLocked(page));
415 BUG_ON(PageWriteback(page)); 416 BUG_ON(PageWriteback(page));
416 417
417 set_page_writeback(page); 418 if (keep_towrite)
419 set_page_writeback_keepwrite(page);
420 else
421 set_page_writeback(page);
418 ClearPageError(page); 422 ClearPageError(page);
419 423
420 /* 424 /*
425 * Comments copied from block_write_full_page:
426 *
427 * The page straddles i_size. It must be zeroed out on each and every
428 * writepage invocation because it may be mmapped. "A file is mapped
429 * in multiples of the page size. For a file that is not a multiple of
430 * the page size, the remaining memory is zeroed when mapped, and
431 * writes to that region are not written out to the file."
432 */
433 if (len < PAGE_CACHE_SIZE)
434 zero_user_segment(page, len, PAGE_CACHE_SIZE);
435 /*
421 * In the first loop we prepare and mark buffers to submit. We have to 436 * In the first loop we prepare and mark buffers to submit. We have to
422 * mark all buffers in the page before submitting so that 437 * mark all buffers in the page before submitting so that
423 * end_page_writeback() cannot be called from ext4_bio_end_io() when IO 438 * end_page_writeback() cannot be called from ext4_bio_end_io() when IO
@@ -428,19 +443,6 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
428 do { 443 do {
429 block_start = bh_offset(bh); 444 block_start = bh_offset(bh);
430 if (block_start >= len) { 445 if (block_start >= len) {
431 /*
432 * Comments copied from block_write_full_page:
433 *
434 * The page straddles i_size. It must be zeroed out on
435 * each and every writepage invocation because it may
436 * be mmapped. "A file is mapped in multiples of the
437 * page size. For a file that is not a multiple of
438 * the page size, the remaining memory is zeroed when
439 * mapped, and writes to that region are not written
440 * out to the file."
441 */
442 zero_user_segment(page, block_start,
443 block_start + blocksize);
444 clear_buffer_dirty(bh); 446 clear_buffer_dirty(bh);
445 set_buffer_uptodate(bh); 447 set_buffer_uptodate(bh);
446 continue; 448 continue;
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 08b3c116915b..bb0e80f03e2e 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -348,6 +348,7 @@ static struct buffer_head *bclean(handle_t *handle, struct super_block *sb,
348 bh = sb_getblk(sb, blk); 348 bh = sb_getblk(sb, blk);
349 if (unlikely(!bh)) 349 if (unlikely(!bh))
350 return ERR_PTR(-ENOMEM); 350 return ERR_PTR(-ENOMEM);
351 BUFFER_TRACE(bh, "get_write_access");
351 if ((err = ext4_journal_get_write_access(handle, bh))) { 352 if ((err = ext4_journal_get_write_access(handle, bh))) {
352 brelse(bh); 353 brelse(bh);
353 bh = ERR_PTR(err); 354 bh = ERR_PTR(err);
@@ -426,6 +427,7 @@ static int set_flexbg_block_bitmap(struct super_block *sb, handle_t *handle,
426 if (unlikely(!bh)) 427 if (unlikely(!bh))
427 return -ENOMEM; 428 return -ENOMEM;
428 429
430 BUFFER_TRACE(bh, "get_write_access");
429 err = ext4_journal_get_write_access(handle, bh); 431 err = ext4_journal_get_write_access(handle, bh);
430 if (err) 432 if (err)
431 return err; 433 return err;
@@ -518,6 +520,7 @@ static int setup_new_flex_group_blocks(struct super_block *sb,
518 goto out; 520 goto out;
519 } 521 }
520 522
523 BUFFER_TRACE(gdb, "get_write_access");
521 err = ext4_journal_get_write_access(handle, gdb); 524 err = ext4_journal_get_write_access(handle, gdb);
522 if (err) { 525 if (err) {
523 brelse(gdb); 526 brelse(gdb);
@@ -790,14 +793,17 @@ static int add_new_gdb(handle_t *handle, struct inode *inode,
790 goto exit_dind; 793 goto exit_dind;
791 } 794 }
792 795
796 BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access");
793 err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh); 797 err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh);
794 if (unlikely(err)) 798 if (unlikely(err))
795 goto exit_dind; 799 goto exit_dind;
796 800
801 BUFFER_TRACE(gdb_bh, "get_write_access");
797 err = ext4_journal_get_write_access(handle, gdb_bh); 802 err = ext4_journal_get_write_access(handle, gdb_bh);
798 if (unlikely(err)) 803 if (unlikely(err))
799 goto exit_dind; 804 goto exit_dind;
800 805
806 BUFFER_TRACE(dind, "get_write_access");
801 err = ext4_journal_get_write_access(handle, dind); 807 err = ext4_journal_get_write_access(handle, dind);
802 if (unlikely(err)) 808 if (unlikely(err))
803 ext4_std_error(sb, err); 809 ext4_std_error(sb, err);
@@ -902,6 +908,7 @@ static int add_new_gdb_meta_bg(struct super_block *sb,
902 EXT4_SB(sb)->s_group_desc = n_group_desc; 908 EXT4_SB(sb)->s_group_desc = n_group_desc;
903 EXT4_SB(sb)->s_gdb_count++; 909 EXT4_SB(sb)->s_gdb_count++;
904 ext4_kvfree(o_group_desc); 910 ext4_kvfree(o_group_desc);
911 BUFFER_TRACE(gdb_bh, "get_write_access");
905 err = ext4_journal_get_write_access(handle, gdb_bh); 912 err = ext4_journal_get_write_access(handle, gdb_bh);
906 if (unlikely(err)) 913 if (unlikely(err))
907 brelse(gdb_bh); 914 brelse(gdb_bh);
@@ -977,6 +984,7 @@ static int reserve_backup_gdb(handle_t *handle, struct inode *inode,
977 } 984 }
978 985
979 for (i = 0; i < reserved_gdb; i++) { 986 for (i = 0; i < reserved_gdb; i++) {
987 BUFFER_TRACE(primary[i], "get_write_access");
980 if ((err = ext4_journal_get_write_access(handle, primary[i]))) 988 if ((err = ext4_journal_get_write_access(handle, primary[i])))
981 goto exit_bh; 989 goto exit_bh;
982 } 990 }
@@ -1084,6 +1092,7 @@ static void update_backups(struct super_block *sb, int blk_off, char *data,
1084 ext4_debug("update metadata backup %llu(+%llu)\n", 1092 ext4_debug("update metadata backup %llu(+%llu)\n",
1085 backup_block, backup_block - 1093 backup_block, backup_block -
1086 ext4_group_first_block_no(sb, group)); 1094 ext4_group_first_block_no(sb, group));
1095 BUFFER_TRACE(bh, "get_write_access");
1087 if ((err = ext4_journal_get_write_access(handle, bh))) 1096 if ((err = ext4_journal_get_write_access(handle, bh)))
1088 break; 1097 break;
1089 lock_buffer(bh); 1098 lock_buffer(bh);
@@ -1163,6 +1172,7 @@ static int ext4_add_new_descs(handle_t *handle, struct super_block *sb,
1163 */ 1172 */
1164 if (gdb_off) { 1173 if (gdb_off) {
1165 gdb_bh = sbi->s_group_desc[gdb_num]; 1174 gdb_bh = sbi->s_group_desc[gdb_num];
1175 BUFFER_TRACE(gdb_bh, "get_write_access");
1166 err = ext4_journal_get_write_access(handle, gdb_bh); 1176 err = ext4_journal_get_write_access(handle, gdb_bh);
1167 1177
1168 if (!err && reserved_gdb && ext4_bg_num_gdb(sb, group)) 1178 if (!err && reserved_gdb && ext4_bg_num_gdb(sb, group))
@@ -1433,6 +1443,7 @@ static int ext4_flex_group_add(struct super_block *sb,
1433 goto exit; 1443 goto exit;
1434 } 1444 }
1435 1445
1446 BUFFER_TRACE(sbi->s_sbh, "get_write_access");
1436 err = ext4_journal_get_write_access(handle, sbi->s_sbh); 1447 err = ext4_journal_get_write_access(handle, sbi->s_sbh);
1437 if (err) 1448 if (err)
1438 goto exit_journal; 1449 goto exit_journal;
@@ -1645,6 +1656,7 @@ static int ext4_group_extend_no_check(struct super_block *sb,
1645 return err; 1656 return err;
1646 } 1657 }
1647 1658
1659 BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access");
1648 err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh); 1660 err = ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh);
1649 if (err) { 1661 if (err) {
1650 ext4_warning(sb, "error %d on journal write access", err); 1662 ext4_warning(sb, "error %d on journal write access", err);
@@ -1804,6 +1816,7 @@ static int ext4_convert_meta_bg(struct super_block *sb, struct inode *inode)
1804 if (IS_ERR(handle)) 1816 if (IS_ERR(handle))
1805 return PTR_ERR(handle); 1817 return PTR_ERR(handle);
1806 1818
1819 BUFFER_TRACE(sbi->s_sbh, "get_write_access");
1807 err = ext4_journal_get_write_access(handle, sbi->s_sbh); 1820 err = ext4_journal_get_write_access(handle, sbi->s_sbh);
1808 if (err) 1821 if (err)
1809 goto errout; 1822 goto errout;
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 6f9e6fadac04..b9b9aabfb4d2 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -138,8 +138,8 @@ static __le32 ext4_superblock_csum(struct super_block *sb,
138 return cpu_to_le32(csum); 138 return cpu_to_le32(csum);
139} 139}
140 140
141int ext4_superblock_csum_verify(struct super_block *sb, 141static int ext4_superblock_csum_verify(struct super_block *sb,
142 struct ext4_super_block *es) 142 struct ext4_super_block *es)
143{ 143{
144 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, 144 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb,
145 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) 145 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
@@ -879,6 +879,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
879 return NULL; 879 return NULL;
880 880
881 ei->vfs_inode.i_version = 1; 881 ei->vfs_inode.i_version = 1;
882 spin_lock_init(&ei->i_raw_lock);
882 INIT_LIST_HEAD(&ei->i_prealloc_list); 883 INIT_LIST_HEAD(&ei->i_prealloc_list);
883 spin_lock_init(&ei->i_prealloc_lock); 884 spin_lock_init(&ei->i_prealloc_lock);
884 ext4_es_init_tree(&ei->i_es_tree); 885 ext4_es_init_tree(&ei->i_es_tree);
@@ -1903,7 +1904,7 @@ static int ext4_setup_super(struct super_block *sb, struct ext4_super_block *es,
1903 if (!(sbi->s_mount_state & EXT4_VALID_FS)) 1904 if (!(sbi->s_mount_state & EXT4_VALID_FS))
1904 ext4_msg(sb, KERN_WARNING, "warning: mounting unchecked fs, " 1905 ext4_msg(sb, KERN_WARNING, "warning: mounting unchecked fs, "
1905 "running e2fsck is recommended"); 1906 "running e2fsck is recommended");
1906 else if ((sbi->s_mount_state & EXT4_ERROR_FS)) 1907 else if (sbi->s_mount_state & EXT4_ERROR_FS)
1907 ext4_msg(sb, KERN_WARNING, 1908 ext4_msg(sb, KERN_WARNING,
1908 "warning: mounting fs with errors, " 1909 "warning: mounting fs with errors, "
1909 "running e2fsck is recommended"); 1910 "running e2fsck is recommended");
@@ -2404,6 +2405,16 @@ static ext4_fsblk_t descriptor_loc(struct super_block *sb,
2404 if (ext4_bg_has_super(sb, bg)) 2405 if (ext4_bg_has_super(sb, bg))
2405 has_super = 1; 2406 has_super = 1;
2406 2407
2408 /*
2409 * If we have a meta_bg fs with 1k blocks, group 0's GDT is at
2410 * block 2, not 1. If s_first_data_block == 0 (bigalloc is enabled
2411 * on modern mke2fs or blksize > 1k on older mke2fs) then we must
2412 * compensate.
2413 */
2414 if (sb->s_blocksize == 1024 && nr == 0 &&
2415 le32_to_cpu(EXT4_SB(sb)->s_es->s_first_data_block) == 0)
2416 has_super++;
2417
2407 return (has_super + ext4_group_first_block_no(sb, bg)); 2418 return (has_super + ext4_group_first_block_no(sb, bg));
2408} 2419}
2409 2420
@@ -3337,7 +3348,7 @@ static ext4_fsblk_t ext4_calculate_resv_clusters(struct super_block *sb)
3337 * By default we reserve 2% or 4096 clusters, whichever is smaller. 3348 * By default we reserve 2% or 4096 clusters, whichever is smaller.
3338 * This should cover the situations where we can not afford to run 3349 * This should cover the situations where we can not afford to run
3339 * out of space like for example punch hole, or converting 3350 * out of space like for example punch hole, or converting
3340 * uninitialized extents in delalloc path. In most cases such 3351 * unwritten extents in delalloc path. In most cases such
3341 * allocation would require 1, or 2 blocks, higher numbers are 3352 * allocation would require 1, or 2 blocks, higher numbers are
3342 * very rare. 3353 * very rare.
3343 */ 3354 */
@@ -5370,6 +5381,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
5370 bh = ext4_bread(handle, inode, blk, 1, &err); 5381 bh = ext4_bread(handle, inode, blk, 1, &err);
5371 if (!bh) 5382 if (!bh)
5372 goto out; 5383 goto out;
5384 BUFFER_TRACE(bh, "get write access");
5373 err = ext4_journal_get_write_access(handle, bh); 5385 err = ext4_journal_get_write_access(handle, bh);
5374 if (err) { 5386 if (err) {
5375 brelse(bh); 5387 brelse(bh);
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 4eec399ec807..e7387337060c 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -369,6 +369,9 @@ ext4_xattr_get(struct inode *inode, int name_index, const char *name,
369{ 369{
370 int error; 370 int error;
371 371
372 if (strlen(name) > 255)
373 return -ERANGE;
374
372 down_read(&EXT4_I(inode)->xattr_sem); 375 down_read(&EXT4_I(inode)->xattr_sem);
373 error = ext4_xattr_ibody_get(inode, name_index, name, buffer, 376 error = ext4_xattr_ibody_get(inode, name_index, name, buffer,
374 buffer_size); 377 buffer_size);
@@ -513,6 +516,7 @@ static void ext4_xattr_update_super_block(handle_t *handle,
513 if (EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_EXT_ATTR)) 516 if (EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_EXT_ATTR))
514 return; 517 return;
515 518
519 BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access");
516 if (ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh) == 0) { 520 if (ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh) == 0) {
517 EXT4_SET_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_EXT_ATTR); 521 EXT4_SET_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_EXT_ATTR);
518 ext4_handle_dirty_super(handle, sb); 522 ext4_handle_dirty_super(handle, sb);
@@ -532,6 +536,7 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
532 struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode); 536 struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
533 537
534 ce = mb_cache_entry_get(ext4_mb_cache, bh->b_bdev, bh->b_blocknr); 538 ce = mb_cache_entry_get(ext4_mb_cache, bh->b_bdev, bh->b_blocknr);
539 BUFFER_TRACE(bh, "get_write_access");
535 error = ext4_journal_get_write_access(handle, bh); 540 error = ext4_journal_get_write_access(handle, bh);
536 if (error) 541 if (error)
537 goto out; 542 goto out;
@@ -774,6 +779,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
774 if (s->base) { 779 if (s->base) {
775 ce = mb_cache_entry_get(ext4_mb_cache, bs->bh->b_bdev, 780 ce = mb_cache_entry_get(ext4_mb_cache, bs->bh->b_bdev,
776 bs->bh->b_blocknr); 781 bs->bh->b_blocknr);
782 BUFFER_TRACE(bs->bh, "get_write_access");
777 error = ext4_journal_get_write_access(handle, bs->bh); 783 error = ext4_journal_get_write_access(handle, bs->bh);
778 if (error) 784 if (error)
779 goto cleanup; 785 goto cleanup;
@@ -859,6 +865,7 @@ inserted:
859 EXT4_C2B(EXT4_SB(sb), 1)); 865 EXT4_C2B(EXT4_SB(sb), 1));
860 if (error) 866 if (error)
861 goto cleanup; 867 goto cleanup;
868 BUFFER_TRACE(new_bh, "get_write_access");
862 error = ext4_journal_get_write_access(handle, 869 error = ext4_journal_get_write_access(handle,
863 new_bh); 870 new_bh);
864 if (error) 871 if (error)
@@ -896,7 +903,7 @@ inserted:
896 * take i_data_sem because we will test 903 * take i_data_sem because we will test
897 * i_delalloc_reserved_flag in ext4_mb_new_blocks 904 * i_delalloc_reserved_flag in ext4_mb_new_blocks
898 */ 905 */
899 down_read((&EXT4_I(inode)->i_data_sem)); 906 down_read(&EXT4_I(inode)->i_data_sem);
900 block = ext4_new_meta_blocks(handle, inode, goal, 0, 907 block = ext4_new_meta_blocks(handle, inode, goal, 0,
901 NULL, &error); 908 NULL, &error);
902 up_read((&EXT4_I(inode)->i_data_sem)); 909 up_read((&EXT4_I(inode)->i_data_sem));