aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4')
-rw-r--r--fs/ext4/balloc.c15
-rw-r--r--fs/ext4/bitmap.c12
-rw-r--r--fs/ext4/dir.c8
-rw-r--r--fs/ext4/ext4.h50
-rw-r--r--fs/ext4/ext4_extents.h1
-rw-r--r--fs/ext4/ext4_jbd2.c4
-rw-r--r--fs/ext4/ext4_jbd2.h6
-rw-r--r--fs/ext4/extents.c626
-rw-r--r--fs/ext4/extents_status.c200
-rw-r--r--fs/ext4/extents_status.h13
-rw-r--r--fs/ext4/file.c2
-rw-r--r--fs/ext4/ialloc.c7
-rw-r--r--fs/ext4/indirect.c86
-rw-r--r--fs/ext4/inline.c7
-rw-r--r--fs/ext4/inode.c140
-rw-r--r--fs/ext4/ioctl.c13
-rw-r--r--fs/ext4/mballoc.c15
-rw-r--r--fs/ext4/migrate.c11
-rw-r--r--fs/ext4/mmp.c6
-rw-r--r--fs/ext4/move_extent.c1068
-rw-r--r--fs/ext4/namei.c361
-rw-r--r--fs/ext4/resize.c5
-rw-r--r--fs/ext4/super.c262
-rw-r--r--fs/ext4/xattr.c44
24 files changed, 1285 insertions, 1677 deletions
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 581ef40fbe90..83a6f497c4e0 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -176,7 +176,7 @@ static unsigned int num_clusters_in_group(struct super_block *sb,
176} 176}
177 177
178/* Initializes an uninitialized block bitmap */ 178/* Initializes an uninitialized block bitmap */
179static void ext4_init_block_bitmap(struct super_block *sb, 179static int ext4_init_block_bitmap(struct super_block *sb,
180 struct buffer_head *bh, 180 struct buffer_head *bh,
181 ext4_group_t block_group, 181 ext4_group_t block_group,
182 struct ext4_group_desc *gdp) 182 struct ext4_group_desc *gdp)
@@ -192,7 +192,6 @@ static void ext4_init_block_bitmap(struct super_block *sb,
192 /* If checksum is bad mark all blocks used to prevent allocation 192 /* If checksum is bad mark all blocks used to prevent allocation
193 * essentially implementing a per-group read-only flag. */ 193 * essentially implementing a per-group read-only flag. */
194 if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) { 194 if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) {
195 ext4_error(sb, "Checksum bad for group %u", block_group);
196 grp = ext4_get_group_info(sb, block_group); 195 grp = ext4_get_group_info(sb, block_group);
197 if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp)) 196 if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
198 percpu_counter_sub(&sbi->s_freeclusters_counter, 197 percpu_counter_sub(&sbi->s_freeclusters_counter,
@@ -205,7 +204,7 @@ static void ext4_init_block_bitmap(struct super_block *sb,
205 count); 204 count);
206 } 205 }
207 set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state); 206 set_bit(EXT4_GROUP_INFO_IBITMAP_CORRUPT_BIT, &grp->bb_state);
208 return; 207 return -EIO;
209 } 208 }
210 memset(bh->b_data, 0, sb->s_blocksize); 209 memset(bh->b_data, 0, sb->s_blocksize);
211 210
@@ -243,6 +242,7 @@ static void ext4_init_block_bitmap(struct super_block *sb,
243 sb->s_blocksize * 8, bh->b_data); 242 sb->s_blocksize * 8, bh->b_data);
244 ext4_block_bitmap_csum_set(sb, block_group, gdp, bh); 243 ext4_block_bitmap_csum_set(sb, block_group, gdp, bh);
245 ext4_group_desc_csum_set(sb, block_group, gdp); 244 ext4_group_desc_csum_set(sb, block_group, gdp);
245 return 0;
246} 246}
247 247
248/* Return the number of free blocks in a block group. It is used when 248/* Return the number of free blocks in a block group. It is used when
@@ -438,11 +438,15 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group)
438 } 438 }
439 ext4_lock_group(sb, block_group); 439 ext4_lock_group(sb, block_group);
440 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) { 440 if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
441 ext4_init_block_bitmap(sb, bh, block_group, desc); 441 int err;
442
443 err = ext4_init_block_bitmap(sb, bh, block_group, desc);
442 set_bitmap_uptodate(bh); 444 set_bitmap_uptodate(bh);
443 set_buffer_uptodate(bh); 445 set_buffer_uptodate(bh);
444 ext4_unlock_group(sb, block_group); 446 ext4_unlock_group(sb, block_group);
445 unlock_buffer(bh); 447 unlock_buffer(bh);
448 if (err)
449 ext4_error(sb, "Checksum bad for grp %u", block_group);
446 return bh; 450 return bh;
447 } 451 }
448 ext4_unlock_group(sb, block_group); 452 ext4_unlock_group(sb, block_group);
@@ -636,8 +640,7 @@ ext4_fsblk_t ext4_new_meta_blocks(handle_t *handle, struct inode *inode,
636 * Account for the allocated meta blocks. We will never 640 * Account for the allocated meta blocks. We will never
637 * fail EDQUOT for metdata, but we do account for it. 641 * fail EDQUOT for metdata, but we do account for it.
638 */ 642 */
639 if (!(*errp) && 643 if (!(*errp) && (flags & EXT4_MB_DELALLOC_RESERVED)) {
640 ext4_test_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED)) {
641 spin_lock(&EXT4_I(inode)->i_block_reservation_lock); 644 spin_lock(&EXT4_I(inode)->i_block_reservation_lock);
642 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); 645 spin_unlock(&EXT4_I(inode)->i_block_reservation_lock);
643 dquot_alloc_block_nofail(inode, 646 dquot_alloc_block_nofail(inode,
diff --git a/fs/ext4/bitmap.c b/fs/ext4/bitmap.c
index 3285aa5a706a..b610779a958c 100644
--- a/fs/ext4/bitmap.c
+++ b/fs/ext4/bitmap.c
@@ -24,8 +24,7 @@ int ext4_inode_bitmap_csum_verify(struct super_block *sb, ext4_group_t group,
24 __u32 provided, calculated; 24 __u32 provided, calculated;
25 struct ext4_sb_info *sbi = EXT4_SB(sb); 25 struct ext4_sb_info *sbi = EXT4_SB(sb);
26 26
27 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, 27 if (!ext4_has_metadata_csum(sb))
28 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
29 return 1; 28 return 1;
30 29
31 provided = le16_to_cpu(gdp->bg_inode_bitmap_csum_lo); 30 provided = le16_to_cpu(gdp->bg_inode_bitmap_csum_lo);
@@ -46,8 +45,7 @@ void ext4_inode_bitmap_csum_set(struct super_block *sb, ext4_group_t group,
46 __u32 csum; 45 __u32 csum;
47 struct ext4_sb_info *sbi = EXT4_SB(sb); 46 struct ext4_sb_info *sbi = EXT4_SB(sb);
48 47
49 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, 48 if (!ext4_has_metadata_csum(sb))
50 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
51 return; 49 return;
52 50
53 csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)bh->b_data, sz); 51 csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)bh->b_data, sz);
@@ -65,8 +63,7 @@ int ext4_block_bitmap_csum_verify(struct super_block *sb, ext4_group_t group,
65 struct ext4_sb_info *sbi = EXT4_SB(sb); 63 struct ext4_sb_info *sbi = EXT4_SB(sb);
66 int sz = EXT4_CLUSTERS_PER_GROUP(sb) / 8; 64 int sz = EXT4_CLUSTERS_PER_GROUP(sb) / 8;
67 65
68 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, 66 if (!ext4_has_metadata_csum(sb))
69 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
70 return 1; 67 return 1;
71 68
72 provided = le16_to_cpu(gdp->bg_block_bitmap_csum_lo); 69 provided = le16_to_cpu(gdp->bg_block_bitmap_csum_lo);
@@ -91,8 +88,7 @@ void ext4_block_bitmap_csum_set(struct super_block *sb, ext4_group_t group,
91 __u32 csum; 88 __u32 csum;
92 struct ext4_sb_info *sbi = EXT4_SB(sb); 89 struct ext4_sb_info *sbi = EXT4_SB(sb);
93 90
94 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, 91 if (!ext4_has_metadata_csum(sb))
95 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
96 return; 92 return;
97 93
98 csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)bh->b_data, sz); 94 csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)bh->b_data, sz);
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 0bb3f9ea0832..c24143ea9c08 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -151,13 +151,11 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
151 &file->f_ra, file, 151 &file->f_ra, file,
152 index, 1); 152 index, 1);
153 file->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT; 153 file->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT;
154 bh = ext4_bread(NULL, inode, map.m_lblk, 0, &err); 154 bh = ext4_bread(NULL, inode, map.m_lblk, 0);
155 if (IS_ERR(bh))
156 return PTR_ERR(bh);
155 } 157 }
156 158
157 /*
158 * We ignore I/O errors on directories so users have a chance
159 * of recovering data when there's a bad sector
160 */
161 if (!bh) { 159 if (!bh) {
162 if (!dir_has_error) { 160 if (!dir_has_error) {
163 EXT4_ERROR_FILE(file, 0, 161 EXT4_ERROR_FILE(file, 0,
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index b0c225cdb52c..c55a1faaed58 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -572,15 +572,15 @@ enum {
572 572
573/* 573/*
574 * The bit position of these flags must not overlap with any of the 574 * The bit position of these flags must not overlap with any of the
575 * EXT4_GET_BLOCKS_*. They are used by ext4_ext_find_extent(), 575 * EXT4_GET_BLOCKS_*. They are used by ext4_find_extent(),
576 * read_extent_tree_block(), ext4_split_extent_at(), 576 * read_extent_tree_block(), ext4_split_extent_at(),
577 * ext4_ext_insert_extent(), and ext4_ext_create_new_leaf(). 577 * ext4_ext_insert_extent(), and ext4_ext_create_new_leaf().
578 * EXT4_EX_NOCACHE is used to indicate that the we shouldn't be 578 * EXT4_EX_NOCACHE is used to indicate that the we shouldn't be
579 * caching the extents when reading from the extent tree while a 579 * caching the extents when reading from the extent tree while a
580 * truncate or punch hole operation is in progress. 580 * truncate or punch hole operation is in progress.
581 */ 581 */
582#define EXT4_EX_NOCACHE 0x0400 582#define EXT4_EX_NOCACHE 0x40000000
583#define EXT4_EX_FORCE_CACHE 0x0800 583#define EXT4_EX_FORCE_CACHE 0x20000000
584 584
585/* 585/*
586 * Flags used by ext4_free_blocks 586 * Flags used by ext4_free_blocks
@@ -890,6 +890,7 @@ struct ext4_inode_info {
890 struct ext4_es_tree i_es_tree; 890 struct ext4_es_tree i_es_tree;
891 rwlock_t i_es_lock; 891 rwlock_t i_es_lock;
892 struct list_head i_es_lru; 892 struct list_head i_es_lru;
893 unsigned int i_es_all_nr; /* protected by i_es_lock */
893 unsigned int i_es_lru_nr; /* protected by i_es_lock */ 894 unsigned int i_es_lru_nr; /* protected by i_es_lock */
894 unsigned long i_touch_when; /* jiffies of last accessing */ 895 unsigned long i_touch_when; /* jiffies of last accessing */
895 896
@@ -1174,6 +1175,9 @@ struct ext4_super_block {
1174#define EXT4_MF_MNTDIR_SAMPLED 0x0001 1175#define EXT4_MF_MNTDIR_SAMPLED 0x0001
1175#define EXT4_MF_FS_ABORTED 0x0002 /* Fatal error detected */ 1176#define EXT4_MF_FS_ABORTED 0x0002 /* Fatal error detected */
1176 1177
1178/* Number of quota types we support */
1179#define EXT4_MAXQUOTAS 2
1180
1177/* 1181/*
1178 * fourth extended-fs super-block data in memory 1182 * fourth extended-fs super-block data in memory
1179 */ 1183 */
@@ -1237,7 +1241,7 @@ struct ext4_sb_info {
1237 u32 s_min_batch_time; 1241 u32 s_min_batch_time;
1238 struct block_device *journal_bdev; 1242 struct block_device *journal_bdev;
1239#ifdef CONFIG_QUOTA 1243#ifdef CONFIG_QUOTA
1240 char *s_qf_names[MAXQUOTAS]; /* Names of quota files with journalled quota */ 1244 char *s_qf_names[EXT4_MAXQUOTAS]; /* Names of quota files with journalled quota */
1241 int s_jquota_fmt; /* Format of quota to use */ 1245 int s_jquota_fmt; /* Format of quota to use */
1242#endif 1246#endif
1243 unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */ 1247 unsigned int s_want_extra_isize; /* New inodes should reserve # bytes */
@@ -1330,8 +1334,7 @@ struct ext4_sb_info {
1330 /* Reclaim extents from extent status tree */ 1334 /* Reclaim extents from extent status tree */
1331 struct shrinker s_es_shrinker; 1335 struct shrinker s_es_shrinker;
1332 struct list_head s_es_lru; 1336 struct list_head s_es_lru;
1333 unsigned long s_es_last_sorted; 1337 struct ext4_es_stats s_es_stats;
1334 struct percpu_counter s_extent_cache_cnt;
1335 struct mb_cache *s_mb_cache; 1338 struct mb_cache *s_mb_cache;
1336 spinlock_t s_es_lru_lock ____cacheline_aligned_in_smp; 1339 spinlock_t s_es_lru_lock ____cacheline_aligned_in_smp;
1337 1340
@@ -1399,7 +1402,6 @@ enum {
1399 EXT4_STATE_EXT_MIGRATE, /* Inode is migrating */ 1402 EXT4_STATE_EXT_MIGRATE, /* Inode is migrating */
1400 EXT4_STATE_DIO_UNWRITTEN, /* need convert on dio done*/ 1403 EXT4_STATE_DIO_UNWRITTEN, /* need convert on dio done*/
1401 EXT4_STATE_NEWENTRY, /* File just added to dir */ 1404 EXT4_STATE_NEWENTRY, /* File just added to dir */
1402 EXT4_STATE_DELALLOC_RESERVED, /* blks already reserved for delalloc */
1403 EXT4_STATE_DIOREAD_LOCK, /* Disable support for dio read 1405 EXT4_STATE_DIOREAD_LOCK, /* Disable support for dio read
1404 nolocking */ 1406 nolocking */
1405 EXT4_STATE_MAY_INLINE_DATA, /* may have in-inode data */ 1407 EXT4_STATE_MAY_INLINE_DATA, /* may have in-inode data */
@@ -2086,10 +2088,8 @@ extern int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
2086extern int ext4_trim_fs(struct super_block *, struct fstrim_range *); 2088extern int ext4_trim_fs(struct super_block *, struct fstrim_range *);
2087 2089
2088/* inode.c */ 2090/* inode.c */
2089struct buffer_head *ext4_getblk(handle_t *, struct inode *, 2091struct buffer_head *ext4_getblk(handle_t *, struct inode *, ext4_lblk_t, int);
2090 ext4_lblk_t, int, int *); 2092struct buffer_head *ext4_bread(handle_t *, struct inode *, ext4_lblk_t, int);
2091struct buffer_head *ext4_bread(handle_t *, struct inode *,
2092 ext4_lblk_t, int, int *);
2093int ext4_get_block_write(struct inode *inode, sector_t iblock, 2093int ext4_get_block_write(struct inode *inode, sector_t iblock,
2094 struct buffer_head *bh_result, int create); 2094 struct buffer_head *bh_result, int create);
2095int ext4_get_block(struct inode *inode, sector_t iblock, 2095int ext4_get_block(struct inode *inode, sector_t iblock,
@@ -2109,6 +2109,7 @@ int do_journal_get_write_access(handle_t *handle,
2109#define CONVERT_INLINE_DATA 2 2109#define CONVERT_INLINE_DATA 2
2110 2110
2111extern struct inode *ext4_iget(struct super_block *, unsigned long); 2111extern struct inode *ext4_iget(struct super_block *, unsigned long);
2112extern struct inode *ext4_iget_normal(struct super_block *, unsigned long);
2112extern int ext4_write_inode(struct inode *, struct writeback_control *); 2113extern int ext4_write_inode(struct inode *, struct writeback_control *);
2113extern int ext4_setattr(struct dentry *, struct iattr *); 2114extern int ext4_setattr(struct dentry *, struct iattr *);
2114extern int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, 2115extern int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry,
@@ -2332,10 +2333,18 @@ extern int ext4_register_li_request(struct super_block *sb,
2332static inline int ext4_has_group_desc_csum(struct super_block *sb) 2333static inline int ext4_has_group_desc_csum(struct super_block *sb)
2333{ 2334{
2334 return EXT4_HAS_RO_COMPAT_FEATURE(sb, 2335 return EXT4_HAS_RO_COMPAT_FEATURE(sb,
2335 EXT4_FEATURE_RO_COMPAT_GDT_CSUM | 2336 EXT4_FEATURE_RO_COMPAT_GDT_CSUM) ||
2336 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM); 2337 (EXT4_SB(sb)->s_chksum_driver != NULL);
2337} 2338}
2338 2339
2340static inline int ext4_has_metadata_csum(struct super_block *sb)
2341{
2342 WARN_ON_ONCE(EXT4_HAS_RO_COMPAT_FEATURE(sb,
2343 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) &&
2344 !EXT4_SB(sb)->s_chksum_driver);
2345
2346 return (EXT4_SB(sb)->s_chksum_driver != NULL);
2347}
2339static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es) 2348static inline ext4_fsblk_t ext4_blocks_count(struct ext4_super_block *es)
2340{ 2349{
2341 return ((ext4_fsblk_t)le32_to_cpu(es->s_blocks_count_hi) << 32) | 2350 return ((ext4_fsblk_t)le32_to_cpu(es->s_blocks_count_hi) << 32) |
@@ -2731,21 +2740,26 @@ extern int ext4_can_extents_be_merged(struct inode *inode,
2731 struct ext4_extent *ex1, 2740 struct ext4_extent *ex1,
2732 struct ext4_extent *ex2); 2741 struct ext4_extent *ex2);
2733extern int ext4_ext_insert_extent(handle_t *, struct inode *, 2742extern int ext4_ext_insert_extent(handle_t *, struct inode *,
2734 struct ext4_ext_path *, 2743 struct ext4_ext_path **,
2735 struct ext4_extent *, int); 2744 struct ext4_extent *, int);
2736extern struct ext4_ext_path *ext4_ext_find_extent(struct inode *, ext4_lblk_t, 2745extern struct ext4_ext_path *ext4_find_extent(struct inode *, ext4_lblk_t,
2737 struct ext4_ext_path *, 2746 struct ext4_ext_path **,
2738 int flags); 2747 int flags);
2739extern void ext4_ext_drop_refs(struct ext4_ext_path *); 2748extern void ext4_ext_drop_refs(struct ext4_ext_path *);
2740extern int ext4_ext_check_inode(struct inode *inode); 2749extern int ext4_ext_check_inode(struct inode *inode);
2741extern int ext4_find_delalloc_range(struct inode *inode, 2750extern int ext4_find_delalloc_range(struct inode *inode,
2742 ext4_lblk_t lblk_start, 2751 ext4_lblk_t lblk_start,
2743 ext4_lblk_t lblk_end); 2752 ext4_lblk_t lblk_end);
2744extern int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk); 2753extern int ext4_find_delalloc_cluster(struct inode *inode, ext4_lblk_t lblk);
2754extern ext4_lblk_t ext4_ext_next_allocated_block(struct ext4_ext_path *path);
2745extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 2755extern int ext4_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
2746 __u64 start, __u64 len); 2756 __u64 start, __u64 len);
2747extern int ext4_ext_precache(struct inode *inode); 2757extern int ext4_ext_precache(struct inode *inode);
2748extern int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len); 2758extern int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len);
2759extern int ext4_swap_extents(handle_t *handle, struct inode *inode1,
2760 struct inode *inode2, ext4_lblk_t lblk1,
2761 ext4_lblk_t lblk2, ext4_lblk_t count,
2762 int mark_unwritten,int *err);
2749 2763
2750/* move_extent.c */ 2764/* move_extent.c */
2751extern void ext4_double_down_write_data_sem(struct inode *first, 2765extern void ext4_double_down_write_data_sem(struct inode *first,
@@ -2755,8 +2769,6 @@ extern void ext4_double_up_write_data_sem(struct inode *orig_inode,
2755extern int ext4_move_extents(struct file *o_filp, struct file *d_filp, 2769extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
2756 __u64 start_orig, __u64 start_donor, 2770 __u64 start_orig, __u64 start_donor,
2757 __u64 len, __u64 *moved_len); 2771 __u64 len, __u64 *moved_len);
2758extern int mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
2759 struct ext4_extent **extent);
2760 2772
2761/* page-io.c */ 2773/* page-io.c */
2762extern int __init ext4_init_pageio(void); 2774extern int __init ext4_init_pageio(void);
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index a867f5ca9991..3c9381547094 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -123,6 +123,7 @@ find_ext4_extent_tail(struct ext4_extent_header *eh)
123struct ext4_ext_path { 123struct ext4_ext_path {
124 ext4_fsblk_t p_block; 124 ext4_fsblk_t p_block;
125 __u16 p_depth; 125 __u16 p_depth;
126 __u16 p_maxdepth;
126 struct ext4_extent *p_ext; 127 struct ext4_extent *p_ext;
127 struct ext4_extent_idx *p_idx; 128 struct ext4_extent_idx *p_idx;
128 struct ext4_extent_header *p_hdr; 129 struct ext4_extent_header *p_hdr;
diff --git a/fs/ext4/ext4_jbd2.c b/fs/ext4/ext4_jbd2.c
index 0074e0d23d6e..3445035c7e01 100644
--- a/fs/ext4/ext4_jbd2.c
+++ b/fs/ext4/ext4_jbd2.c
@@ -256,8 +256,8 @@ int __ext4_handle_dirty_metadata(const char *where, unsigned int line,
256 set_buffer_prio(bh); 256 set_buffer_prio(bh);
257 if (ext4_handle_valid(handle)) { 257 if (ext4_handle_valid(handle)) {
258 err = jbd2_journal_dirty_metadata(handle, bh); 258 err = jbd2_journal_dirty_metadata(handle, bh);
259 /* Errors can only happen if there is a bug */ 259 /* Errors can only happen due to aborted journal or a nasty bug */
260 if (WARN_ON_ONCE(err)) { 260 if (!is_handle_aborted(handle) && WARN_ON_ONCE(err)) {
261 ext4_journal_abort_handle(where, line, __func__, bh, 261 ext4_journal_abort_handle(where, line, __func__, bh,
262 handle, err); 262 handle, err);
263 if (inode == NULL) { 263 if (inode == NULL) {
diff --git a/fs/ext4/ext4_jbd2.h b/fs/ext4/ext4_jbd2.h
index 17c00ff202f2..9c5b49fb281e 100644
--- a/fs/ext4/ext4_jbd2.h
+++ b/fs/ext4/ext4_jbd2.h
@@ -102,9 +102,9 @@
102#define EXT4_QUOTA_INIT_BLOCKS(sb) 0 102#define EXT4_QUOTA_INIT_BLOCKS(sb) 0
103#define EXT4_QUOTA_DEL_BLOCKS(sb) 0 103#define EXT4_QUOTA_DEL_BLOCKS(sb) 0
104#endif 104#endif
105#define EXT4_MAXQUOTAS_TRANS_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_TRANS_BLOCKS(sb)) 105#define EXT4_MAXQUOTAS_TRANS_BLOCKS(sb) (EXT4_MAXQUOTAS*EXT4_QUOTA_TRANS_BLOCKS(sb))
106#define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb)) 106#define EXT4_MAXQUOTAS_INIT_BLOCKS(sb) (EXT4_MAXQUOTAS*EXT4_QUOTA_INIT_BLOCKS(sb))
107#define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb)) 107#define EXT4_MAXQUOTAS_DEL_BLOCKS(sb) (EXT4_MAXQUOTAS*EXT4_QUOTA_DEL_BLOCKS(sb))
108 108
109static inline int ext4_jbd2_credits_xattr(struct inode *inode) 109static inline int ext4_jbd2_credits_xattr(struct inode *inode)
110{ 110{
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 74292a71b384..0b16fb4c06d3 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -73,8 +73,7 @@ static int ext4_extent_block_csum_verify(struct inode *inode,
73{ 73{
74 struct ext4_extent_tail *et; 74 struct ext4_extent_tail *et;
75 75
76 if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, 76 if (!ext4_has_metadata_csum(inode->i_sb))
77 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
78 return 1; 77 return 1;
79 78
80 et = find_ext4_extent_tail(eh); 79 et = find_ext4_extent_tail(eh);
@@ -88,8 +87,7 @@ static void ext4_extent_block_csum_set(struct inode *inode,
88{ 87{
89 struct ext4_extent_tail *et; 88 struct ext4_extent_tail *et;
90 89
91 if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, 90 if (!ext4_has_metadata_csum(inode->i_sb))
92 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
93 return; 91 return;
94 92
95 et = find_ext4_extent_tail(eh); 93 et = find_ext4_extent_tail(eh);
@@ -98,14 +96,14 @@ static void ext4_extent_block_csum_set(struct inode *inode,
98 96
99static int ext4_split_extent(handle_t *handle, 97static int ext4_split_extent(handle_t *handle,
100 struct inode *inode, 98 struct inode *inode,
101 struct ext4_ext_path *path, 99 struct ext4_ext_path **ppath,
102 struct ext4_map_blocks *map, 100 struct ext4_map_blocks *map,
103 int split_flag, 101 int split_flag,
104 int flags); 102 int flags);
105 103
106static int ext4_split_extent_at(handle_t *handle, 104static int ext4_split_extent_at(handle_t *handle,
107 struct inode *inode, 105 struct inode *inode,
108 struct ext4_ext_path *path, 106 struct ext4_ext_path **ppath,
109 ext4_lblk_t split, 107 ext4_lblk_t split,
110 int split_flag, 108 int split_flag,
111 int flags); 109 int flags);
@@ -291,6 +289,20 @@ static inline int ext4_ext_space_root_idx(struct inode *inode, int check)
291 return size; 289 return size;
292} 290}
293 291
292static inline int
293ext4_force_split_extent_at(handle_t *handle, struct inode *inode,
294 struct ext4_ext_path **ppath, ext4_lblk_t lblk,
295 int nofail)
296{
297 struct ext4_ext_path *path = *ppath;
298 int unwritten = ext4_ext_is_unwritten(path[path->p_depth].p_ext);
299
300 return ext4_split_extent_at(handle, inode, ppath, lblk, unwritten ?
301 EXT4_EXT_MARK_UNWRIT1|EXT4_EXT_MARK_UNWRIT2 : 0,
302 EXT4_EX_NOCACHE | EXT4_GET_BLOCKS_PRE_IO |
303 (nofail ? EXT4_GET_BLOCKS_METADATA_NOFAIL:0));
304}
305
294/* 306/*
295 * Calculate the number of metadata blocks needed 307 * Calculate the number of metadata blocks needed
296 * to allocate @blocks 308 * to allocate @blocks
@@ -695,9 +707,11 @@ static void ext4_ext_show_move(struct inode *inode, struct ext4_ext_path *path,
695 707
696void ext4_ext_drop_refs(struct ext4_ext_path *path) 708void ext4_ext_drop_refs(struct ext4_ext_path *path)
697{ 709{
698 int depth = path->p_depth; 710 int depth, i;
699 int i;
700 711
712 if (!path)
713 return;
714 depth = path->p_depth;
701 for (i = 0; i <= depth; i++, path++) 715 for (i = 0; i <= depth; i++, path++)
702 if (path->p_bh) { 716 if (path->p_bh) {
703 brelse(path->p_bh); 717 brelse(path->p_bh);
@@ -841,24 +855,32 @@ int ext4_ext_tree_init(handle_t *handle, struct inode *inode)
841} 855}
842 856
843struct ext4_ext_path * 857struct ext4_ext_path *
844ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block, 858ext4_find_extent(struct inode *inode, ext4_lblk_t block,
845 struct ext4_ext_path *path, int flags) 859 struct ext4_ext_path **orig_path, int flags)
846{ 860{
847 struct ext4_extent_header *eh; 861 struct ext4_extent_header *eh;
848 struct buffer_head *bh; 862 struct buffer_head *bh;
849 short int depth, i, ppos = 0, alloc = 0; 863 struct ext4_ext_path *path = orig_path ? *orig_path : NULL;
864 short int depth, i, ppos = 0;
850 int ret; 865 int ret;
851 866
852 eh = ext_inode_hdr(inode); 867 eh = ext_inode_hdr(inode);
853 depth = ext_depth(inode); 868 depth = ext_depth(inode);
854 869
855 /* account possible depth increase */ 870 if (path) {
871 ext4_ext_drop_refs(path);
872 if (depth > path[0].p_maxdepth) {
873 kfree(path);
874 *orig_path = path = NULL;
875 }
876 }
856 if (!path) { 877 if (!path) {
878 /* account possible depth increase */
857 path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 2), 879 path = kzalloc(sizeof(struct ext4_ext_path) * (depth + 2),
858 GFP_NOFS); 880 GFP_NOFS);
859 if (!path) 881 if (unlikely(!path))
860 return ERR_PTR(-ENOMEM); 882 return ERR_PTR(-ENOMEM);
861 alloc = 1; 883 path[0].p_maxdepth = depth + 1;
862 } 884 }
863 path[0].p_hdr = eh; 885 path[0].p_hdr = eh;
864 path[0].p_bh = NULL; 886 path[0].p_bh = NULL;
@@ -876,7 +898,7 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
876 898
877 bh = read_extent_tree_block(inode, path[ppos].p_block, --i, 899 bh = read_extent_tree_block(inode, path[ppos].p_block, --i,
878 flags); 900 flags);
879 if (IS_ERR(bh)) { 901 if (unlikely(IS_ERR(bh))) {
880 ret = PTR_ERR(bh); 902 ret = PTR_ERR(bh);
881 goto err; 903 goto err;
882 } 904 }
@@ -910,8 +932,9 @@ ext4_ext_find_extent(struct inode *inode, ext4_lblk_t block,
910 932
911err: 933err:
912 ext4_ext_drop_refs(path); 934 ext4_ext_drop_refs(path);
913 if (alloc) 935 kfree(path);
914 kfree(path); 936 if (orig_path)
937 *orig_path = NULL;
915 return ERR_PTR(ret); 938 return ERR_PTR(ret);
916} 939}
917 940
@@ -1238,16 +1261,24 @@ cleanup:
1238 * just created block 1261 * just created block
1239 */ 1262 */
1240static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode, 1263static int ext4_ext_grow_indepth(handle_t *handle, struct inode *inode,
1241 unsigned int flags, 1264 unsigned int flags)
1242 struct ext4_extent *newext)
1243{ 1265{
1244 struct ext4_extent_header *neh; 1266 struct ext4_extent_header *neh;
1245 struct buffer_head *bh; 1267 struct buffer_head *bh;
1246 ext4_fsblk_t newblock; 1268 ext4_fsblk_t newblock, goal = 0;
1269 struct ext4_super_block *es = EXT4_SB(inode->i_sb)->s_es;
1247 int err = 0; 1270 int err = 0;
1248 1271
1249 newblock = ext4_ext_new_meta_block(handle, inode, NULL, 1272 /* Try to prepend new index to old one */
1250 newext, &err, flags); 1273 if (ext_depth(inode))
1274 goal = ext4_idx_pblock(EXT_FIRST_INDEX(ext_inode_hdr(inode)));
1275 if (goal > le32_to_cpu(es->s_first_data_block)) {
1276 flags |= EXT4_MB_HINT_TRY_GOAL;
1277 goal--;
1278 } else
1279 goal = ext4_inode_to_goal_block(inode);
1280 newblock = ext4_new_meta_blocks(handle, inode, goal, flags,
1281 NULL, &err);
1251 if (newblock == 0) 1282 if (newblock == 0)
1252 return err; 1283 return err;
1253 1284
@@ -1314,9 +1345,10 @@ out:
1314static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode, 1345static int ext4_ext_create_new_leaf(handle_t *handle, struct inode *inode,
1315 unsigned int mb_flags, 1346 unsigned int mb_flags,
1316 unsigned int gb_flags, 1347 unsigned int gb_flags,
1317 struct ext4_ext_path *path, 1348 struct ext4_ext_path **ppath,
1318 struct ext4_extent *newext) 1349 struct ext4_extent *newext)
1319{ 1350{
1351 struct ext4_ext_path *path = *ppath;
1320 struct ext4_ext_path *curp; 1352 struct ext4_ext_path *curp;
1321 int depth, i, err = 0; 1353 int depth, i, err = 0;
1322 1354
@@ -1340,23 +1372,21 @@ repeat:
1340 goto out; 1372 goto out;
1341 1373
1342 /* refill path */ 1374 /* refill path */
1343 ext4_ext_drop_refs(path); 1375 path = ext4_find_extent(inode,
1344 path = ext4_ext_find_extent(inode,
1345 (ext4_lblk_t)le32_to_cpu(newext->ee_block), 1376 (ext4_lblk_t)le32_to_cpu(newext->ee_block),
1346 path, gb_flags); 1377 ppath, gb_flags);
1347 if (IS_ERR(path)) 1378 if (IS_ERR(path))
1348 err = PTR_ERR(path); 1379 err = PTR_ERR(path);
1349 } else { 1380 } else {
1350 /* tree is full, time to grow in depth */ 1381 /* tree is full, time to grow in depth */
1351 err = ext4_ext_grow_indepth(handle, inode, mb_flags, newext); 1382 err = ext4_ext_grow_indepth(handle, inode, mb_flags);
1352 if (err) 1383 if (err)
1353 goto out; 1384 goto out;
1354 1385
1355 /* refill path */ 1386 /* refill path */
1356 ext4_ext_drop_refs(path); 1387 path = ext4_find_extent(inode,
1357 path = ext4_ext_find_extent(inode,
1358 (ext4_lblk_t)le32_to_cpu(newext->ee_block), 1388 (ext4_lblk_t)le32_to_cpu(newext->ee_block),
1359 path, gb_flags); 1389 ppath, gb_flags);
1360 if (IS_ERR(path)) { 1390 if (IS_ERR(path)) {
1361 err = PTR_ERR(path); 1391 err = PTR_ERR(path);
1362 goto out; 1392 goto out;
@@ -1559,7 +1589,7 @@ found_extent:
1559 * allocated block. Thus, index entries have to be consistent 1589 * allocated block. Thus, index entries have to be consistent
1560 * with leaves. 1590 * with leaves.
1561 */ 1591 */
1562static ext4_lblk_t 1592ext4_lblk_t
1563ext4_ext_next_allocated_block(struct ext4_ext_path *path) 1593ext4_ext_next_allocated_block(struct ext4_ext_path *path)
1564{ 1594{
1565 int depth; 1595 int depth;
@@ -1802,6 +1832,7 @@ static void ext4_ext_try_to_merge_up(handle_t *handle,
1802 sizeof(struct ext4_extent_idx); 1832 sizeof(struct ext4_extent_idx);
1803 s += sizeof(struct ext4_extent_header); 1833 s += sizeof(struct ext4_extent_header);
1804 1834
1835 path[1].p_maxdepth = path[0].p_maxdepth;
1805 memcpy(path[0].p_hdr, path[1].p_hdr, s); 1836 memcpy(path[0].p_hdr, path[1].p_hdr, s);
1806 path[0].p_depth = 0; 1837 path[0].p_depth = 0;
1807 path[0].p_ext = EXT_FIRST_EXTENT(path[0].p_hdr) + 1838 path[0].p_ext = EXT_FIRST_EXTENT(path[0].p_hdr) +
@@ -1896,9 +1927,10 @@ out:
1896 * creating new leaf in the no-space case. 1927 * creating new leaf in the no-space case.
1897 */ 1928 */
1898int ext4_ext_insert_extent(handle_t *handle, struct inode *inode, 1929int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
1899 struct ext4_ext_path *path, 1930 struct ext4_ext_path **ppath,
1900 struct ext4_extent *newext, int gb_flags) 1931 struct ext4_extent *newext, int gb_flags)
1901{ 1932{
1933 struct ext4_ext_path *path = *ppath;
1902 struct ext4_extent_header *eh; 1934 struct ext4_extent_header *eh;
1903 struct ext4_extent *ex, *fex; 1935 struct ext4_extent *ex, *fex;
1904 struct ext4_extent *nearex; /* nearest extent */ 1936 struct ext4_extent *nearex; /* nearest extent */
@@ -1907,6 +1939,8 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
1907 ext4_lblk_t next; 1939 ext4_lblk_t next;
1908 int mb_flags = 0, unwritten; 1940 int mb_flags = 0, unwritten;
1909 1941
1942 if (gb_flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
1943 mb_flags |= EXT4_MB_DELALLOC_RESERVED;
1910 if (unlikely(ext4_ext_get_actual_len(newext) == 0)) { 1944 if (unlikely(ext4_ext_get_actual_len(newext) == 0)) {
1911 EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0"); 1945 EXT4_ERROR_INODE(inode, "ext4_ext_get_actual_len(newext) == 0");
1912 return -EIO; 1946 return -EIO;
@@ -1925,7 +1959,7 @@ int ext4_ext_insert_extent(handle_t *handle, struct inode *inode,
1925 /* 1959 /*
1926 * Try to see whether we should rather test the extent on 1960 * Try to see whether we should rather test the extent on
1927 * right from ex, or from the left of ex. This is because 1961 * right from ex, or from the left of ex. This is because
1928 * ext4_ext_find_extent() can return either extent on the 1962 * ext4_find_extent() can return either extent on the
1929 * left, or on the right from the searched position. This 1963 * left, or on the right from the searched position. This
1930 * will make merging more effective. 1964 * will make merging more effective.
1931 */ 1965 */
@@ -2008,7 +2042,7 @@ prepend:
2008 if (next != EXT_MAX_BLOCKS) { 2042 if (next != EXT_MAX_BLOCKS) {
2009 ext_debug("next leaf block - %u\n", next); 2043 ext_debug("next leaf block - %u\n", next);
2010 BUG_ON(npath != NULL); 2044 BUG_ON(npath != NULL);
2011 npath = ext4_ext_find_extent(inode, next, NULL, 0); 2045 npath = ext4_find_extent(inode, next, NULL, 0);
2012 if (IS_ERR(npath)) 2046 if (IS_ERR(npath))
2013 return PTR_ERR(npath); 2047 return PTR_ERR(npath);
2014 BUG_ON(npath->p_depth != path->p_depth); 2048 BUG_ON(npath->p_depth != path->p_depth);
@@ -2028,9 +2062,9 @@ prepend:
2028 * We're gonna add a new leaf in the tree. 2062 * We're gonna add a new leaf in the tree.
2029 */ 2063 */
2030 if (gb_flags & EXT4_GET_BLOCKS_METADATA_NOFAIL) 2064 if (gb_flags & EXT4_GET_BLOCKS_METADATA_NOFAIL)
2031 mb_flags = EXT4_MB_USE_RESERVED; 2065 mb_flags |= EXT4_MB_USE_RESERVED;
2032 err = ext4_ext_create_new_leaf(handle, inode, mb_flags, gb_flags, 2066 err = ext4_ext_create_new_leaf(handle, inode, mb_flags, gb_flags,
2033 path, newext); 2067 ppath, newext);
2034 if (err) 2068 if (err)
2035 goto cleanup; 2069 goto cleanup;
2036 depth = ext_depth(inode); 2070 depth = ext_depth(inode);
@@ -2108,10 +2142,8 @@ merge:
2108 err = ext4_ext_dirty(handle, inode, path + path->p_depth); 2142 err = ext4_ext_dirty(handle, inode, path + path->p_depth);
2109 2143
2110cleanup: 2144cleanup:
2111 if (npath) { 2145 ext4_ext_drop_refs(npath);
2112 ext4_ext_drop_refs(npath); 2146 kfree(npath);
2113 kfree(npath);
2114 }
2115 return err; 2147 return err;
2116} 2148}
2117 2149
@@ -2133,13 +2165,7 @@ static int ext4_fill_fiemap_extents(struct inode *inode,
2133 /* find extent for this block */ 2165 /* find extent for this block */
2134 down_read(&EXT4_I(inode)->i_data_sem); 2166 down_read(&EXT4_I(inode)->i_data_sem);
2135 2167
2136 if (path && ext_depth(inode) != depth) { 2168 path = ext4_find_extent(inode, block, &path, 0);
2137 /* depth was changed. we have to realloc path */
2138 kfree(path);
2139 path = NULL;
2140 }
2141
2142 path = ext4_ext_find_extent(inode, block, path, 0);
2143 if (IS_ERR(path)) { 2169 if (IS_ERR(path)) {
2144 up_read(&EXT4_I(inode)->i_data_sem); 2170 up_read(&EXT4_I(inode)->i_data_sem);
2145 err = PTR_ERR(path); 2171 err = PTR_ERR(path);
@@ -2156,7 +2182,6 @@ static int ext4_fill_fiemap_extents(struct inode *inode,
2156 } 2182 }
2157 ex = path[depth].p_ext; 2183 ex = path[depth].p_ext;
2158 next = ext4_ext_next_allocated_block(path); 2184 next = ext4_ext_next_allocated_block(path);
2159 ext4_ext_drop_refs(path);
2160 2185
2161 flags = 0; 2186 flags = 0;
2162 exists = 0; 2187 exists = 0;
@@ -2266,11 +2291,8 @@ static int ext4_fill_fiemap_extents(struct inode *inode,
2266 block = es.es_lblk + es.es_len; 2291 block = es.es_lblk + es.es_len;
2267 } 2292 }
2268 2293
2269 if (path) { 2294 ext4_ext_drop_refs(path);
2270 ext4_ext_drop_refs(path); 2295 kfree(path);
2271 kfree(path);
2272 }
2273
2274 return err; 2296 return err;
2275} 2297}
2276 2298
@@ -2826,7 +2848,7 @@ again:
2826 ext4_lblk_t ee_block; 2848 ext4_lblk_t ee_block;
2827 2849
2828 /* find extent for this block */ 2850 /* find extent for this block */
2829 path = ext4_ext_find_extent(inode, end, NULL, EXT4_EX_NOCACHE); 2851 path = ext4_find_extent(inode, end, NULL, EXT4_EX_NOCACHE);
2830 if (IS_ERR(path)) { 2852 if (IS_ERR(path)) {
2831 ext4_journal_stop(handle); 2853 ext4_journal_stop(handle);
2832 return PTR_ERR(path); 2854 return PTR_ERR(path);
@@ -2854,24 +2876,14 @@ again:
2854 */ 2876 */
2855 if (end >= ee_block && 2877 if (end >= ee_block &&
2856 end < ee_block + ext4_ext_get_actual_len(ex) - 1) { 2878 end < ee_block + ext4_ext_get_actual_len(ex) - 1) {
2857 int split_flag = 0;
2858
2859 if (ext4_ext_is_unwritten(ex))
2860 split_flag = EXT4_EXT_MARK_UNWRIT1 |
2861 EXT4_EXT_MARK_UNWRIT2;
2862
2863 /* 2879 /*
2864 * Split the extent in two so that 'end' is the last 2880 * Split the extent in two so that 'end' is the last
2865 * block in the first new extent. Also we should not 2881 * block in the first new extent. Also we should not
2866 * fail removing space due to ENOSPC so try to use 2882 * fail removing space due to ENOSPC so try to use
2867 * reserved block if that happens. 2883 * reserved block if that happens.
2868 */ 2884 */
2869 err = ext4_split_extent_at(handle, inode, path, 2885 err = ext4_force_split_extent_at(handle, inode, &path,
2870 end + 1, split_flag, 2886 end + 1, 1);
2871 EXT4_EX_NOCACHE |
2872 EXT4_GET_BLOCKS_PRE_IO |
2873 EXT4_GET_BLOCKS_METADATA_NOFAIL);
2874
2875 if (err < 0) 2887 if (err < 0)
2876 goto out; 2888 goto out;
2877 } 2889 }
@@ -2893,7 +2905,7 @@ again:
2893 ext4_journal_stop(handle); 2905 ext4_journal_stop(handle);
2894 return -ENOMEM; 2906 return -ENOMEM;
2895 } 2907 }
2896 path[0].p_depth = depth; 2908 path[0].p_maxdepth = path[0].p_depth = depth;
2897 path[0].p_hdr = ext_inode_hdr(inode); 2909 path[0].p_hdr = ext_inode_hdr(inode);
2898 i = 0; 2910 i = 0;
2899 2911
@@ -3013,10 +3025,9 @@ again:
3013out: 3025out:
3014 ext4_ext_drop_refs(path); 3026 ext4_ext_drop_refs(path);
3015 kfree(path); 3027 kfree(path);
3016 if (err == -EAGAIN) { 3028 path = NULL;
3017 path = NULL; 3029 if (err == -EAGAIN)
3018 goto again; 3030 goto again;
3019 }
3020 ext4_journal_stop(handle); 3031 ext4_journal_stop(handle);
3021 3032
3022 return err; 3033 return err;
@@ -3130,11 +3141,12 @@ static int ext4_ext_zeroout(struct inode *inode, struct ext4_extent *ex)
3130 */ 3141 */
3131static int ext4_split_extent_at(handle_t *handle, 3142static int ext4_split_extent_at(handle_t *handle,
3132 struct inode *inode, 3143 struct inode *inode,
3133 struct ext4_ext_path *path, 3144 struct ext4_ext_path **ppath,
3134 ext4_lblk_t split, 3145 ext4_lblk_t split,
3135 int split_flag, 3146 int split_flag,
3136 int flags) 3147 int flags)
3137{ 3148{
3149 struct ext4_ext_path *path = *ppath;
3138 ext4_fsblk_t newblock; 3150 ext4_fsblk_t newblock;
3139 ext4_lblk_t ee_block; 3151 ext4_lblk_t ee_block;
3140 struct ext4_extent *ex, newex, orig_ex, zero_ex; 3152 struct ext4_extent *ex, newex, orig_ex, zero_ex;
@@ -3205,7 +3217,7 @@ static int ext4_split_extent_at(handle_t *handle,
3205 if (split_flag & EXT4_EXT_MARK_UNWRIT2) 3217 if (split_flag & EXT4_EXT_MARK_UNWRIT2)
3206 ext4_ext_mark_unwritten(ex2); 3218 ext4_ext_mark_unwritten(ex2);
3207 3219
3208 err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); 3220 err = ext4_ext_insert_extent(handle, inode, ppath, &newex, flags);
3209 if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) { 3221 if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
3210 if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) { 3222 if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) {
3211 if (split_flag & EXT4_EXT_DATA_VALID1) { 3223 if (split_flag & EXT4_EXT_DATA_VALID1) {
@@ -3271,11 +3283,12 @@ fix_extent_len:
3271 */ 3283 */
3272static int ext4_split_extent(handle_t *handle, 3284static int ext4_split_extent(handle_t *handle,
3273 struct inode *inode, 3285 struct inode *inode,
3274 struct ext4_ext_path *path, 3286 struct ext4_ext_path **ppath,
3275 struct ext4_map_blocks *map, 3287 struct ext4_map_blocks *map,
3276 int split_flag, 3288 int split_flag,
3277 int flags) 3289 int flags)
3278{ 3290{
3291 struct ext4_ext_path *path = *ppath;
3279 ext4_lblk_t ee_block; 3292 ext4_lblk_t ee_block;
3280 struct ext4_extent *ex; 3293 struct ext4_extent *ex;
3281 unsigned int ee_len, depth; 3294 unsigned int ee_len, depth;
@@ -3298,7 +3311,7 @@ static int ext4_split_extent(handle_t *handle,
3298 EXT4_EXT_MARK_UNWRIT2; 3311 EXT4_EXT_MARK_UNWRIT2;
3299 if (split_flag & EXT4_EXT_DATA_VALID2) 3312 if (split_flag & EXT4_EXT_DATA_VALID2)
3300 split_flag1 |= EXT4_EXT_DATA_VALID1; 3313 split_flag1 |= EXT4_EXT_DATA_VALID1;
3301 err = ext4_split_extent_at(handle, inode, path, 3314 err = ext4_split_extent_at(handle, inode, ppath,
3302 map->m_lblk + map->m_len, split_flag1, flags1); 3315 map->m_lblk + map->m_len, split_flag1, flags1);
3303 if (err) 3316 if (err)
3304 goto out; 3317 goto out;
@@ -3309,8 +3322,7 @@ static int ext4_split_extent(handle_t *handle,
3309 * Update path is required because previous ext4_split_extent_at() may 3322 * Update path is required because previous ext4_split_extent_at() may
3310 * result in split of original leaf or extent zeroout. 3323 * result in split of original leaf or extent zeroout.
3311 */ 3324 */
3312 ext4_ext_drop_refs(path); 3325 path = ext4_find_extent(inode, map->m_lblk, ppath, 0);
3313 path = ext4_ext_find_extent(inode, map->m_lblk, path, 0);
3314 if (IS_ERR(path)) 3326 if (IS_ERR(path))
3315 return PTR_ERR(path); 3327 return PTR_ERR(path);
3316 depth = ext_depth(inode); 3328 depth = ext_depth(inode);
@@ -3330,7 +3342,7 @@ static int ext4_split_extent(handle_t *handle,
3330 split_flag1 |= split_flag & (EXT4_EXT_MAY_ZEROOUT | 3342 split_flag1 |= split_flag & (EXT4_EXT_MAY_ZEROOUT |
3331 EXT4_EXT_MARK_UNWRIT2); 3343 EXT4_EXT_MARK_UNWRIT2);
3332 } 3344 }
3333 err = ext4_split_extent_at(handle, inode, path, 3345 err = ext4_split_extent_at(handle, inode, ppath,
3334 map->m_lblk, split_flag1, flags); 3346 map->m_lblk, split_flag1, flags);
3335 if (err) 3347 if (err)
3336 goto out; 3348 goto out;
@@ -3364,9 +3376,10 @@ out:
3364static int ext4_ext_convert_to_initialized(handle_t *handle, 3376static int ext4_ext_convert_to_initialized(handle_t *handle,
3365 struct inode *inode, 3377 struct inode *inode,
3366 struct ext4_map_blocks *map, 3378 struct ext4_map_blocks *map,
3367 struct ext4_ext_path *path, 3379 struct ext4_ext_path **ppath,
3368 int flags) 3380 int flags)
3369{ 3381{
3382 struct ext4_ext_path *path = *ppath;
3370 struct ext4_sb_info *sbi; 3383 struct ext4_sb_info *sbi;
3371 struct ext4_extent_header *eh; 3384 struct ext4_extent_header *eh;
3372 struct ext4_map_blocks split_map; 3385 struct ext4_map_blocks split_map;
@@ -3590,11 +3603,10 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
3590 } 3603 }
3591 } 3604 }
3592 3605
3593 allocated = ext4_split_extent(handle, inode, path, 3606 err = ext4_split_extent(handle, inode, ppath, &split_map, split_flag,
3594 &split_map, split_flag, flags); 3607 flags);
3595 if (allocated < 0) 3608 if (err > 0)
3596 err = allocated; 3609 err = 0;
3597
3598out: 3610out:
3599 /* If we have gotten a failure, don't zero out status tree */ 3611 /* If we have gotten a failure, don't zero out status tree */
3600 if (!err) 3612 if (!err)
@@ -3629,9 +3641,10 @@ out:
3629static int ext4_split_convert_extents(handle_t *handle, 3641static int ext4_split_convert_extents(handle_t *handle,
3630 struct inode *inode, 3642 struct inode *inode,
3631 struct ext4_map_blocks *map, 3643 struct ext4_map_blocks *map,
3632 struct ext4_ext_path *path, 3644 struct ext4_ext_path **ppath,
3633 int flags) 3645 int flags)
3634{ 3646{
3647 struct ext4_ext_path *path = *ppath;
3635 ext4_lblk_t eof_block; 3648 ext4_lblk_t eof_block;
3636 ext4_lblk_t ee_block; 3649 ext4_lblk_t ee_block;
3637 struct ext4_extent *ex; 3650 struct ext4_extent *ex;
@@ -3665,74 +3678,15 @@ static int ext4_split_convert_extents(handle_t *handle,
3665 split_flag |= (EXT4_EXT_MARK_UNWRIT2 | EXT4_EXT_DATA_VALID2); 3678 split_flag |= (EXT4_EXT_MARK_UNWRIT2 | EXT4_EXT_DATA_VALID2);
3666 } 3679 }
3667 flags |= EXT4_GET_BLOCKS_PRE_IO; 3680 flags |= EXT4_GET_BLOCKS_PRE_IO;
3668 return ext4_split_extent(handle, inode, path, map, split_flag, flags); 3681 return ext4_split_extent(handle, inode, ppath, map, split_flag, flags);
3669} 3682}
3670 3683
3671static int ext4_convert_initialized_extents(handle_t *handle,
3672 struct inode *inode,
3673 struct ext4_map_blocks *map,
3674 struct ext4_ext_path *path)
3675{
3676 struct ext4_extent *ex;
3677 ext4_lblk_t ee_block;
3678 unsigned int ee_len;
3679 int depth;
3680 int err = 0;
3681
3682 depth = ext_depth(inode);
3683 ex = path[depth].p_ext;
3684 ee_block = le32_to_cpu(ex->ee_block);
3685 ee_len = ext4_ext_get_actual_len(ex);
3686
3687 ext_debug("%s: inode %lu, logical"
3688 "block %llu, max_blocks %u\n", __func__, inode->i_ino,
3689 (unsigned long long)ee_block, ee_len);
3690
3691 if (ee_block != map->m_lblk || ee_len > map->m_len) {
3692 err = ext4_split_convert_extents(handle, inode, map, path,
3693 EXT4_GET_BLOCKS_CONVERT_UNWRITTEN);
3694 if (err < 0)
3695 goto out;
3696 ext4_ext_drop_refs(path);
3697 path = ext4_ext_find_extent(inode, map->m_lblk, path, 0);
3698 if (IS_ERR(path)) {
3699 err = PTR_ERR(path);
3700 goto out;
3701 }
3702 depth = ext_depth(inode);
3703 ex = path[depth].p_ext;
3704 if (!ex) {
3705 EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
3706 (unsigned long) map->m_lblk);
3707 err = -EIO;
3708 goto out;
3709 }
3710 }
3711
3712 err = ext4_ext_get_access(handle, inode, path + depth);
3713 if (err)
3714 goto out;
3715 /* first mark the extent as unwritten */
3716 ext4_ext_mark_unwritten(ex);
3717
3718 /* note: ext4_ext_correct_indexes() isn't needed here because
3719 * borders are not changed
3720 */
3721 ext4_ext_try_to_merge(handle, inode, path, ex);
3722
3723 /* Mark modified extent as dirty */
3724 err = ext4_ext_dirty(handle, inode, path + path->p_depth);
3725out:
3726 ext4_ext_show_leaf(inode, path);
3727 return err;
3728}
3729
3730
3731static int ext4_convert_unwritten_extents_endio(handle_t *handle, 3684static int ext4_convert_unwritten_extents_endio(handle_t *handle,
3732 struct inode *inode, 3685 struct inode *inode,
3733 struct ext4_map_blocks *map, 3686 struct ext4_map_blocks *map,
3734 struct ext4_ext_path *path) 3687 struct ext4_ext_path **ppath)
3735{ 3688{
3689 struct ext4_ext_path *path = *ppath;
3736 struct ext4_extent *ex; 3690 struct ext4_extent *ex;
3737 ext4_lblk_t ee_block; 3691 ext4_lblk_t ee_block;
3738 unsigned int ee_len; 3692 unsigned int ee_len;
@@ -3761,16 +3715,13 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle,
3761 inode->i_ino, (unsigned long long)ee_block, ee_len, 3715 inode->i_ino, (unsigned long long)ee_block, ee_len,
3762 (unsigned long long)map->m_lblk, map->m_len); 3716 (unsigned long long)map->m_lblk, map->m_len);
3763#endif 3717#endif
3764 err = ext4_split_convert_extents(handle, inode, map, path, 3718 err = ext4_split_convert_extents(handle, inode, map, ppath,
3765 EXT4_GET_BLOCKS_CONVERT); 3719 EXT4_GET_BLOCKS_CONVERT);
3766 if (err < 0) 3720 if (err < 0)
3767 goto out; 3721 return err;
3768 ext4_ext_drop_refs(path); 3722 path = ext4_find_extent(inode, map->m_lblk, ppath, 0);
3769 path = ext4_ext_find_extent(inode, map->m_lblk, path, 0); 3723 if (IS_ERR(path))
3770 if (IS_ERR(path)) { 3724 return PTR_ERR(path);
3771 err = PTR_ERR(path);
3772 goto out;
3773 }
3774 depth = ext_depth(inode); 3725 depth = ext_depth(inode);
3775 ex = path[depth].p_ext; 3726 ex = path[depth].p_ext;
3776 } 3727 }
@@ -3963,12 +3914,16 @@ get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start,
3963} 3914}
3964 3915
3965static int 3916static int
3966ext4_ext_convert_initialized_extent(handle_t *handle, struct inode *inode, 3917convert_initialized_extent(handle_t *handle, struct inode *inode,
3967 struct ext4_map_blocks *map, 3918 struct ext4_map_blocks *map,
3968 struct ext4_ext_path *path, int flags, 3919 struct ext4_ext_path **ppath, int flags,
3969 unsigned int allocated, ext4_fsblk_t newblock) 3920 unsigned int allocated, ext4_fsblk_t newblock)
3970{ 3921{
3971 int ret = 0; 3922 struct ext4_ext_path *path = *ppath;
3923 struct ext4_extent *ex;
3924 ext4_lblk_t ee_block;
3925 unsigned int ee_len;
3926 int depth;
3972 int err = 0; 3927 int err = 0;
3973 3928
3974 /* 3929 /*
@@ -3978,28 +3933,67 @@ ext4_ext_convert_initialized_extent(handle_t *handle, struct inode *inode,
3978 if (map->m_len > EXT_UNWRITTEN_MAX_LEN) 3933 if (map->m_len > EXT_UNWRITTEN_MAX_LEN)
3979 map->m_len = EXT_UNWRITTEN_MAX_LEN / 2; 3934 map->m_len = EXT_UNWRITTEN_MAX_LEN / 2;
3980 3935
3981 ret = ext4_convert_initialized_extents(handle, inode, map, 3936 depth = ext_depth(inode);
3982 path); 3937 ex = path[depth].p_ext;
3983 if (ret >= 0) { 3938 ee_block = le32_to_cpu(ex->ee_block);
3984 ext4_update_inode_fsync_trans(handle, inode, 1); 3939 ee_len = ext4_ext_get_actual_len(ex);
3985 err = check_eofblocks_fl(handle, inode, map->m_lblk, 3940
3986 path, map->m_len); 3941 ext_debug("%s: inode %lu, logical"
3987 } else 3942 "block %llu, max_blocks %u\n", __func__, inode->i_ino,
3988 err = ret; 3943 (unsigned long long)ee_block, ee_len);
3944
3945 if (ee_block != map->m_lblk || ee_len > map->m_len) {
3946 err = ext4_split_convert_extents(handle, inode, map, ppath,
3947 EXT4_GET_BLOCKS_CONVERT_UNWRITTEN);
3948 if (err < 0)
3949 return err;
3950 path = ext4_find_extent(inode, map->m_lblk, ppath, 0);
3951 if (IS_ERR(path))
3952 return PTR_ERR(path);
3953 depth = ext_depth(inode);
3954 ex = path[depth].p_ext;
3955 if (!ex) {
3956 EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
3957 (unsigned long) map->m_lblk);
3958 return -EIO;
3959 }
3960 }
3961
3962 err = ext4_ext_get_access(handle, inode, path + depth);
3963 if (err)
3964 return err;
3965 /* first mark the extent as unwritten */
3966 ext4_ext_mark_unwritten(ex);
3967
3968 /* note: ext4_ext_correct_indexes() isn't needed here because
3969 * borders are not changed
3970 */
3971 ext4_ext_try_to_merge(handle, inode, path, ex);
3972
3973 /* Mark modified extent as dirty */
3974 err = ext4_ext_dirty(handle, inode, path + path->p_depth);
3975 if (err)
3976 return err;
3977 ext4_ext_show_leaf(inode, path);
3978
3979 ext4_update_inode_fsync_trans(handle, inode, 1);
3980 err = check_eofblocks_fl(handle, inode, map->m_lblk, path, map->m_len);
3981 if (err)
3982 return err;
3989 map->m_flags |= EXT4_MAP_UNWRITTEN; 3983 map->m_flags |= EXT4_MAP_UNWRITTEN;
3990 if (allocated > map->m_len) 3984 if (allocated > map->m_len)
3991 allocated = map->m_len; 3985 allocated = map->m_len;
3992 map->m_len = allocated; 3986 map->m_len = allocated;
3993 3987 return allocated;
3994 return err ? err : allocated;
3995} 3988}
3996 3989
3997static int 3990static int
3998ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode, 3991ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode,
3999 struct ext4_map_blocks *map, 3992 struct ext4_map_blocks *map,
4000 struct ext4_ext_path *path, int flags, 3993 struct ext4_ext_path **ppath, int flags,
4001 unsigned int allocated, ext4_fsblk_t newblock) 3994 unsigned int allocated, ext4_fsblk_t newblock)
4002{ 3995{
3996 struct ext4_ext_path *path = *ppath;
4003 int ret = 0; 3997 int ret = 0;
4004 int err = 0; 3998 int err = 0;
4005 ext4_io_end_t *io = ext4_inode_aio(inode); 3999 ext4_io_end_t *io = ext4_inode_aio(inode);
@@ -4021,8 +4015,8 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode,
4021 4015
4022 /* get_block() before submit the IO, split the extent */ 4016 /* get_block() before submit the IO, split the extent */
4023 if (flags & EXT4_GET_BLOCKS_PRE_IO) { 4017 if (flags & EXT4_GET_BLOCKS_PRE_IO) {
4024 ret = ext4_split_convert_extents(handle, inode, map, 4018 ret = ext4_split_convert_extents(handle, inode, map, ppath,
4025 path, flags | EXT4_GET_BLOCKS_CONVERT); 4019 flags | EXT4_GET_BLOCKS_CONVERT);
4026 if (ret <= 0) 4020 if (ret <= 0)
4027 goto out; 4021 goto out;
4028 /* 4022 /*
@@ -4040,7 +4034,7 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode,
4040 /* IO end_io complete, convert the filled extent to written */ 4034 /* IO end_io complete, convert the filled extent to written */
4041 if (flags & EXT4_GET_BLOCKS_CONVERT) { 4035 if (flags & EXT4_GET_BLOCKS_CONVERT) {
4042 ret = ext4_convert_unwritten_extents_endio(handle, inode, map, 4036 ret = ext4_convert_unwritten_extents_endio(handle, inode, map,
4043 path); 4037 ppath);
4044 if (ret >= 0) { 4038 if (ret >= 0) {
4045 ext4_update_inode_fsync_trans(handle, inode, 1); 4039 ext4_update_inode_fsync_trans(handle, inode, 1);
4046 err = check_eofblocks_fl(handle, inode, map->m_lblk, 4040 err = check_eofblocks_fl(handle, inode, map->m_lblk,
@@ -4078,7 +4072,7 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode,
4078 } 4072 }
4079 4073
4080 /* buffered write, writepage time, convert*/ 4074 /* buffered write, writepage time, convert*/
4081 ret = ext4_ext_convert_to_initialized(handle, inode, map, path, flags); 4075 ret = ext4_ext_convert_to_initialized(handle, inode, map, ppath, flags);
4082 if (ret >= 0) 4076 if (ret >= 0)
4083 ext4_update_inode_fsync_trans(handle, inode, 1); 4077 ext4_update_inode_fsync_trans(handle, inode, 1);
4084out: 4078out:
@@ -4279,7 +4273,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
4279 trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags); 4273 trace_ext4_ext_map_blocks_enter(inode, map->m_lblk, map->m_len, flags);
4280 4274
4281 /* find extent for this block */ 4275 /* find extent for this block */
4282 path = ext4_ext_find_extent(inode, map->m_lblk, NULL, 0); 4276 path = ext4_find_extent(inode, map->m_lblk, NULL, 0);
4283 if (IS_ERR(path)) { 4277 if (IS_ERR(path)) {
4284 err = PTR_ERR(path); 4278 err = PTR_ERR(path);
4285 path = NULL; 4279 path = NULL;
@@ -4291,7 +4285,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
4291 /* 4285 /*
4292 * consistent leaf must not be empty; 4286 * consistent leaf must not be empty;
4293 * this situation is possible, though, _during_ tree modification; 4287 * this situation is possible, though, _during_ tree modification;
4294 * this is why assert can't be put in ext4_ext_find_extent() 4288 * this is why assert can't be put in ext4_find_extent()
4295 */ 4289 */
4296 if (unlikely(path[depth].p_ext == NULL && depth != 0)) { 4290 if (unlikely(path[depth].p_ext == NULL && depth != 0)) {
4297 EXT4_ERROR_INODE(inode, "bad extent address " 4291 EXT4_ERROR_INODE(inode, "bad extent address "
@@ -4331,15 +4325,15 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
4331 */ 4325 */
4332 if ((!ext4_ext_is_unwritten(ex)) && 4326 if ((!ext4_ext_is_unwritten(ex)) &&
4333 (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN)) { 4327 (flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN)) {
4334 allocated = ext4_ext_convert_initialized_extent( 4328 allocated = convert_initialized_extent(
4335 handle, inode, map, path, flags, 4329 handle, inode, map, &path,
4336 allocated, newblock); 4330 flags, allocated, newblock);
4337 goto out2; 4331 goto out2;
4338 } else if (!ext4_ext_is_unwritten(ex)) 4332 } else if (!ext4_ext_is_unwritten(ex))
4339 goto out; 4333 goto out;
4340 4334
4341 ret = ext4_ext_handle_unwritten_extents( 4335 ret = ext4_ext_handle_unwritten_extents(
4342 handle, inode, map, path, flags, 4336 handle, inode, map, &path, flags,
4343 allocated, newblock); 4337 allocated, newblock);
4344 if (ret < 0) 4338 if (ret < 0)
4345 err = ret; 4339 err = ret;
@@ -4376,7 +4370,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
4376 4370
4377 /* 4371 /*
4378 * If we are doing bigalloc, check to see if the extent returned 4372 * If we are doing bigalloc, check to see if the extent returned
4379 * by ext4_ext_find_extent() implies a cluster we can use. 4373 * by ext4_find_extent() implies a cluster we can use.
4380 */ 4374 */
4381 if (cluster_offset && ex && 4375 if (cluster_offset && ex &&
4382 get_implied_cluster_alloc(inode->i_sb, map, ex, path)) { 4376 get_implied_cluster_alloc(inode->i_sb, map, ex, path)) {
@@ -4451,6 +4445,8 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
4451 ar.flags = 0; 4445 ar.flags = 0;
4452 if (flags & EXT4_GET_BLOCKS_NO_NORMALIZE) 4446 if (flags & EXT4_GET_BLOCKS_NO_NORMALIZE)
4453 ar.flags |= EXT4_MB_HINT_NOPREALLOC; 4447 ar.flags |= EXT4_MB_HINT_NOPREALLOC;
4448 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
4449 ar.flags |= EXT4_MB_DELALLOC_RESERVED;
4454 newblock = ext4_mb_new_blocks(handle, &ar, &err); 4450 newblock = ext4_mb_new_blocks(handle, &ar, &err);
4455 if (!newblock) 4451 if (!newblock)
4456 goto out2; 4452 goto out2;
@@ -4486,7 +4482,7 @@ got_allocated_blocks:
4486 err = check_eofblocks_fl(handle, inode, map->m_lblk, 4482 err = check_eofblocks_fl(handle, inode, map->m_lblk,
4487 path, ar.len); 4483 path, ar.len);
4488 if (!err) 4484 if (!err)
4489 err = ext4_ext_insert_extent(handle, inode, path, 4485 err = ext4_ext_insert_extent(handle, inode, &path,
4490 &newex, flags); 4486 &newex, flags);
4491 4487
4492 if (!err && set_unwritten) { 4488 if (!err && set_unwritten) {
@@ -4619,10 +4615,8 @@ out:
4619 map->m_pblk = newblock; 4615 map->m_pblk = newblock;
4620 map->m_len = allocated; 4616 map->m_len = allocated;
4621out2: 4617out2:
4622 if (path) { 4618 ext4_ext_drop_refs(path);
4623 ext4_ext_drop_refs(path); 4619 kfree(path);
4624 kfree(path);
4625 }
4626 4620
4627 trace_ext4_ext_map_blocks_exit(inode, flags, map, 4621 trace_ext4_ext_map_blocks_exit(inode, flags, map,
4628 err ? err : allocated); 4622 err ? err : allocated);
@@ -4799,7 +4793,8 @@ static long ext4_zero_range(struct file *file, loff_t offset,
4799 max_blocks -= lblk; 4793 max_blocks -= lblk;
4800 4794
4801 flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT | 4795 flags = EXT4_GET_BLOCKS_CREATE_UNWRIT_EXT |
4802 EXT4_GET_BLOCKS_CONVERT_UNWRITTEN; 4796 EXT4_GET_BLOCKS_CONVERT_UNWRITTEN |
4797 EXT4_EX_NOCACHE;
4803 if (mode & FALLOC_FL_KEEP_SIZE) 4798 if (mode & FALLOC_FL_KEEP_SIZE)
4804 flags |= EXT4_GET_BLOCKS_KEEP_SIZE; 4799 flags |= EXT4_GET_BLOCKS_KEEP_SIZE;
4805 4800
@@ -4837,15 +4832,21 @@ static long ext4_zero_range(struct file *file, loff_t offset,
4837 ext4_inode_block_unlocked_dio(inode); 4832 ext4_inode_block_unlocked_dio(inode);
4838 inode_dio_wait(inode); 4833 inode_dio_wait(inode);
4839 4834
4835 ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
4836 flags, mode);
4837 if (ret)
4838 goto out_dio;
4840 /* 4839 /*
4841 * Remove entire range from the extent status tree. 4840 * Remove entire range from the extent status tree.
4841 *
4842 * ext4_es_remove_extent(inode, lblk, max_blocks) is
4843 * NOT sufficient. I'm not sure why this is the case,
4844 * but let's be conservative and remove the extent
4845 * status tree for the entire inode. There should be
4846 * no outstanding delalloc extents thanks to the
4847 * filemap_write_and_wait_range() call above.
4842 */ 4848 */
4843 ret = ext4_es_remove_extent(inode, lblk, max_blocks); 4849 ret = ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS);
4844 if (ret)
4845 goto out_dio;
4846
4847 ret = ext4_alloc_file_blocks(file, lblk, max_blocks, new_size,
4848 flags, mode);
4849 if (ret) 4850 if (ret)
4850 goto out_dio; 4851 goto out_dio;
4851 } 4852 }
@@ -5304,36 +5305,31 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
5304 struct ext4_ext_path *path; 5305 struct ext4_ext_path *path;
5305 int ret = 0, depth; 5306 int ret = 0, depth;
5306 struct ext4_extent *extent; 5307 struct ext4_extent *extent;
5307 ext4_lblk_t stop_block, current_block; 5308 ext4_lblk_t stop_block;
5308 ext4_lblk_t ex_start, ex_end; 5309 ext4_lblk_t ex_start, ex_end;
5309 5310
5310 /* Let path point to the last extent */ 5311 /* Let path point to the last extent */
5311 path = ext4_ext_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, 0); 5312 path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, 0);
5312 if (IS_ERR(path)) 5313 if (IS_ERR(path))
5313 return PTR_ERR(path); 5314 return PTR_ERR(path);
5314 5315
5315 depth = path->p_depth; 5316 depth = path->p_depth;
5316 extent = path[depth].p_ext; 5317 extent = path[depth].p_ext;
5317 if (!extent) { 5318 if (!extent)
5318 ext4_ext_drop_refs(path); 5319 goto out;
5319 kfree(path);
5320 return ret;
5321 }
5322 5320
5323 stop_block = le32_to_cpu(extent->ee_block) + 5321 stop_block = le32_to_cpu(extent->ee_block) +
5324 ext4_ext_get_actual_len(extent); 5322 ext4_ext_get_actual_len(extent);
5325 ext4_ext_drop_refs(path);
5326 kfree(path);
5327 5323
5328 /* Nothing to shift, if hole is at the end of file */ 5324 /* Nothing to shift, if hole is at the end of file */
5329 if (start >= stop_block) 5325 if (start >= stop_block)
5330 return ret; 5326 goto out;
5331 5327
5332 /* 5328 /*
5333 * Don't start shifting extents until we make sure the hole is big 5329 * Don't start shifting extents until we make sure the hole is big
5334 * enough to accomodate the shift. 5330 * enough to accomodate the shift.
5335 */ 5331 */
5336 path = ext4_ext_find_extent(inode, start - 1, NULL, 0); 5332 path = ext4_find_extent(inode, start - 1, &path, 0);
5337 if (IS_ERR(path)) 5333 if (IS_ERR(path))
5338 return PTR_ERR(path); 5334 return PTR_ERR(path);
5339 depth = path->p_depth; 5335 depth = path->p_depth;
@@ -5346,8 +5342,6 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
5346 ex_start = 0; 5342 ex_start = 0;
5347 ex_end = 0; 5343 ex_end = 0;
5348 } 5344 }
5349 ext4_ext_drop_refs(path);
5350 kfree(path);
5351 5345
5352 if ((start == ex_start && shift > ex_start) || 5346 if ((start == ex_start && shift > ex_start) ||
5353 (shift > start - ex_end)) 5347 (shift > start - ex_end))
@@ -5355,7 +5349,7 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
5355 5349
5356 /* Its safe to start updating extents */ 5350 /* Its safe to start updating extents */
5357 while (start < stop_block) { 5351 while (start < stop_block) {
5358 path = ext4_ext_find_extent(inode, start, NULL, 0); 5352 path = ext4_find_extent(inode, start, &path, 0);
5359 if (IS_ERR(path)) 5353 if (IS_ERR(path))
5360 return PTR_ERR(path); 5354 return PTR_ERR(path);
5361 depth = path->p_depth; 5355 depth = path->p_depth;
@@ -5365,27 +5359,23 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
5365 (unsigned long) start); 5359 (unsigned long) start);
5366 return -EIO; 5360 return -EIO;
5367 } 5361 }
5368 5362 if (start > le32_to_cpu(extent->ee_block)) {
5369 current_block = le32_to_cpu(extent->ee_block);
5370 if (start > current_block) {
5371 /* Hole, move to the next extent */ 5363 /* Hole, move to the next extent */
5372 ret = mext_next_extent(inode, path, &extent); 5364 if (extent < EXT_LAST_EXTENT(path[depth].p_hdr)) {
5373 if (ret != 0) { 5365 path[depth].p_ext++;
5374 ext4_ext_drop_refs(path); 5366 } else {
5375 kfree(path); 5367 start = ext4_ext_next_allocated_block(path);
5376 if (ret == 1) 5368 continue;
5377 ret = 0;
5378 break;
5379 } 5369 }
5380 } 5370 }
5381 ret = ext4_ext_shift_path_extents(path, shift, inode, 5371 ret = ext4_ext_shift_path_extents(path, shift, inode,
5382 handle, &start); 5372 handle, &start);
5383 ext4_ext_drop_refs(path);
5384 kfree(path);
5385 if (ret) 5373 if (ret)
5386 break; 5374 break;
5387 } 5375 }
5388 5376out:
5377 ext4_ext_drop_refs(path);
5378 kfree(path);
5389 return ret; 5379 return ret;
5390} 5380}
5391 5381
@@ -5508,3 +5498,199 @@ out_mutex:
5508 mutex_unlock(&inode->i_mutex); 5498 mutex_unlock(&inode->i_mutex);
5509 return ret; 5499 return ret;
5510} 5500}
5501
5502/**
5503 * ext4_swap_extents - Swap extents between two inodes
5504 *
5505 * @inode1: First inode
5506 * @inode2: Second inode
5507 * @lblk1: Start block for first inode
5508 * @lblk2: Start block for second inode
5509 * @count: Number of blocks to swap
5510 * @mark_unwritten: Mark second inode's extents as unwritten after swap
5511 * @erp: Pointer to save error value
5512 *
5513 * This helper routine does exactly what is promise "swap extents". All other
5514 * stuff such as page-cache locking consistency, bh mapping consistency or
5515 * extent's data copying must be performed by caller.
5516 * Locking:
5517 * i_mutex is held for both inodes
5518 * i_data_sem is locked for write for both inodes
5519 * Assumptions:
5520 * All pages from requested range are locked for both inodes
5521 */
5522int
5523ext4_swap_extents(handle_t *handle, struct inode *inode1,
5524 struct inode *inode2, ext4_lblk_t lblk1, ext4_lblk_t lblk2,
5525 ext4_lblk_t count, int unwritten, int *erp)
5526{
5527 struct ext4_ext_path *path1 = NULL;
5528 struct ext4_ext_path *path2 = NULL;
5529 int replaced_count = 0;
5530
5531 BUG_ON(!rwsem_is_locked(&EXT4_I(inode1)->i_data_sem));
5532 BUG_ON(!rwsem_is_locked(&EXT4_I(inode2)->i_data_sem));
5533 BUG_ON(!mutex_is_locked(&inode1->i_mutex));
5534 BUG_ON(!mutex_is_locked(&inode1->i_mutex));
5535
5536 *erp = ext4_es_remove_extent(inode1, lblk1, count);
5537 if (unlikely(*erp))
5538 return 0;
5539 *erp = ext4_es_remove_extent(inode2, lblk2, count);
5540 if (unlikely(*erp))
5541 return 0;
5542
5543 while (count) {
5544 struct ext4_extent *ex1, *ex2, tmp_ex;
5545 ext4_lblk_t e1_blk, e2_blk;
5546 int e1_len, e2_len, len;
5547 int split = 0;
5548
5549 path1 = ext4_find_extent(inode1, lblk1, NULL, EXT4_EX_NOCACHE);
5550 if (unlikely(IS_ERR(path1))) {
5551 *erp = PTR_ERR(path1);
5552 path1 = NULL;
5553 finish:
5554 count = 0;
5555 goto repeat;
5556 }
5557 path2 = ext4_find_extent(inode2, lblk2, NULL, EXT4_EX_NOCACHE);
5558 if (unlikely(IS_ERR(path2))) {
5559 *erp = PTR_ERR(path2);
5560 path2 = NULL;
5561 goto finish;
5562 }
5563 ex1 = path1[path1->p_depth].p_ext;
5564 ex2 = path2[path2->p_depth].p_ext;
5565 /* Do we have somthing to swap ? */
5566 if (unlikely(!ex2 || !ex1))
5567 goto finish;
5568
5569 e1_blk = le32_to_cpu(ex1->ee_block);
5570 e2_blk = le32_to_cpu(ex2->ee_block);
5571 e1_len = ext4_ext_get_actual_len(ex1);
5572 e2_len = ext4_ext_get_actual_len(ex2);
5573
5574 /* Hole handling */
5575 if (!in_range(lblk1, e1_blk, e1_len) ||
5576 !in_range(lblk2, e2_blk, e2_len)) {
5577 ext4_lblk_t next1, next2;
5578
5579 /* if hole after extent, then go to next extent */
5580 next1 = ext4_ext_next_allocated_block(path1);
5581 next2 = ext4_ext_next_allocated_block(path2);
5582 /* If hole before extent, then shift to that extent */
5583 if (e1_blk > lblk1)
5584 next1 = e1_blk;
5585 if (e2_blk > lblk2)
5586 next2 = e1_blk;
5587 /* Do we have something to swap */
5588 if (next1 == EXT_MAX_BLOCKS || next2 == EXT_MAX_BLOCKS)
5589 goto finish;
5590 /* Move to the rightest boundary */
5591 len = next1 - lblk1;
5592 if (len < next2 - lblk2)
5593 len = next2 - lblk2;
5594 if (len > count)
5595 len = count;
5596 lblk1 += len;
5597 lblk2 += len;
5598 count -= len;
5599 goto repeat;
5600 }
5601
5602 /* Prepare left boundary */
5603 if (e1_blk < lblk1) {
5604 split = 1;
5605 *erp = ext4_force_split_extent_at(handle, inode1,
5606 &path1, lblk1, 0);
5607 if (unlikely(*erp))
5608 goto finish;
5609 }
5610 if (e2_blk < lblk2) {
5611 split = 1;
5612 *erp = ext4_force_split_extent_at(handle, inode2,
5613 &path2, lblk2, 0);
5614 if (unlikely(*erp))
5615 goto finish;
5616 }
5617 /* ext4_split_extent_at() may result in leaf extent split,
5618 * path must to be revalidated. */
5619 if (split)
5620 goto repeat;
5621
5622 /* Prepare right boundary */
5623 len = count;
5624 if (len > e1_blk + e1_len - lblk1)
5625 len = e1_blk + e1_len - lblk1;
5626 if (len > e2_blk + e2_len - lblk2)
5627 len = e2_blk + e2_len - lblk2;
5628
5629 if (len != e1_len) {
5630 split = 1;
5631 *erp = ext4_force_split_extent_at(handle, inode1,
5632 &path1, lblk1 + len, 0);
5633 if (unlikely(*erp))
5634 goto finish;
5635 }
5636 if (len != e2_len) {
5637 split = 1;
5638 *erp = ext4_force_split_extent_at(handle, inode2,
5639 &path2, lblk2 + len, 0);
5640 if (*erp)
5641 goto finish;
5642 }
5643 /* ext4_split_extent_at() may result in leaf extent split,
5644 * path must to be revalidated. */
5645 if (split)
5646 goto repeat;
5647
5648 BUG_ON(e2_len != e1_len);
5649 *erp = ext4_ext_get_access(handle, inode1, path1 + path1->p_depth);
5650 if (unlikely(*erp))
5651 goto finish;
5652 *erp = ext4_ext_get_access(handle, inode2, path2 + path2->p_depth);
5653 if (unlikely(*erp))
5654 goto finish;
5655
5656 /* Both extents are fully inside boundaries. Swap it now */
5657 tmp_ex = *ex1;
5658 ext4_ext_store_pblock(ex1, ext4_ext_pblock(ex2));
5659 ext4_ext_store_pblock(ex2, ext4_ext_pblock(&tmp_ex));
5660 ex1->ee_len = cpu_to_le16(e2_len);
5661 ex2->ee_len = cpu_to_le16(e1_len);
5662 if (unwritten)
5663 ext4_ext_mark_unwritten(ex2);
5664 if (ext4_ext_is_unwritten(&tmp_ex))
5665 ext4_ext_mark_unwritten(ex1);
5666
5667 ext4_ext_try_to_merge(handle, inode2, path2, ex2);
5668 ext4_ext_try_to_merge(handle, inode1, path1, ex1);
5669 *erp = ext4_ext_dirty(handle, inode2, path2 +
5670 path2->p_depth);
5671 if (unlikely(*erp))
5672 goto finish;
5673 *erp = ext4_ext_dirty(handle, inode1, path1 +
5674 path1->p_depth);
5675 /*
5676 * Looks scarry ah..? second inode already points to new blocks,
5677 * and it was successfully dirtied. But luckily error may happen
5678 * only due to journal error, so full transaction will be
5679 * aborted anyway.
5680 */
5681 if (unlikely(*erp))
5682 goto finish;
5683 lblk1 += len;
5684 lblk2 += len;
5685 replaced_count += len;
5686 count -= len;
5687
5688 repeat:
5689 ext4_ext_drop_refs(path1);
5690 kfree(path1);
5691 ext4_ext_drop_refs(path2);
5692 kfree(path2);
5693 path1 = path2 = NULL;
5694 }
5695 return replaced_count;
5696}
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index 0b7e28e7eaa4..94e7855ae71b 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -11,6 +11,8 @@
11 */ 11 */
12#include <linux/rbtree.h> 12#include <linux/rbtree.h>
13#include <linux/list_sort.h> 13#include <linux/list_sort.h>
14#include <linux/proc_fs.h>
15#include <linux/seq_file.h>
14#include "ext4.h" 16#include "ext4.h"
15#include "extents_status.h" 17#include "extents_status.h"
16 18
@@ -313,19 +315,27 @@ ext4_es_alloc_extent(struct inode *inode, ext4_lblk_t lblk, ext4_lblk_t len,
313 */ 315 */
314 if (!ext4_es_is_delayed(es)) { 316 if (!ext4_es_is_delayed(es)) {
315 EXT4_I(inode)->i_es_lru_nr++; 317 EXT4_I(inode)->i_es_lru_nr++;
316 percpu_counter_inc(&EXT4_SB(inode->i_sb)->s_extent_cache_cnt); 318 percpu_counter_inc(&EXT4_SB(inode->i_sb)->
319 s_es_stats.es_stats_lru_cnt);
317 } 320 }
318 321
322 EXT4_I(inode)->i_es_all_nr++;
323 percpu_counter_inc(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt);
324
319 return es; 325 return es;
320} 326}
321 327
322static void ext4_es_free_extent(struct inode *inode, struct extent_status *es) 328static void ext4_es_free_extent(struct inode *inode, struct extent_status *es)
323{ 329{
330 EXT4_I(inode)->i_es_all_nr--;
331 percpu_counter_dec(&EXT4_SB(inode->i_sb)->s_es_stats.es_stats_all_cnt);
332
324 /* Decrease the lru counter when this es is not delayed */ 333 /* Decrease the lru counter when this es is not delayed */
325 if (!ext4_es_is_delayed(es)) { 334 if (!ext4_es_is_delayed(es)) {
326 BUG_ON(EXT4_I(inode)->i_es_lru_nr == 0); 335 BUG_ON(EXT4_I(inode)->i_es_lru_nr == 0);
327 EXT4_I(inode)->i_es_lru_nr--; 336 EXT4_I(inode)->i_es_lru_nr--;
328 percpu_counter_dec(&EXT4_SB(inode->i_sb)->s_extent_cache_cnt); 337 percpu_counter_dec(&EXT4_SB(inode->i_sb)->
338 s_es_stats.es_stats_lru_cnt);
329 } 339 }
330 340
331 kmem_cache_free(ext4_es_cachep, es); 341 kmem_cache_free(ext4_es_cachep, es);
@@ -426,7 +436,7 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode,
426 unsigned short ee_len; 436 unsigned short ee_len;
427 int depth, ee_status, es_status; 437 int depth, ee_status, es_status;
428 438
429 path = ext4_ext_find_extent(inode, es->es_lblk, NULL, EXT4_EX_NOCACHE); 439 path = ext4_find_extent(inode, es->es_lblk, NULL, EXT4_EX_NOCACHE);
430 if (IS_ERR(path)) 440 if (IS_ERR(path))
431 return; 441 return;
432 442
@@ -499,10 +509,8 @@ static void ext4_es_insert_extent_ext_check(struct inode *inode,
499 } 509 }
500 } 510 }
501out: 511out:
502 if (path) { 512 ext4_ext_drop_refs(path);
503 ext4_ext_drop_refs(path); 513 kfree(path);
504 kfree(path);
505 }
506} 514}
507 515
508static void ext4_es_insert_extent_ind_check(struct inode *inode, 516static void ext4_es_insert_extent_ind_check(struct inode *inode,
@@ -731,6 +739,7 @@ int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
731 struct extent_status *es) 739 struct extent_status *es)
732{ 740{
733 struct ext4_es_tree *tree; 741 struct ext4_es_tree *tree;
742 struct ext4_es_stats *stats;
734 struct extent_status *es1 = NULL; 743 struct extent_status *es1 = NULL;
735 struct rb_node *node; 744 struct rb_node *node;
736 int found = 0; 745 int found = 0;
@@ -767,11 +776,15 @@ int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
767 } 776 }
768 777
769out: 778out:
779 stats = &EXT4_SB(inode->i_sb)->s_es_stats;
770 if (found) { 780 if (found) {
771 BUG_ON(!es1); 781 BUG_ON(!es1);
772 es->es_lblk = es1->es_lblk; 782 es->es_lblk = es1->es_lblk;
773 es->es_len = es1->es_len; 783 es->es_len = es1->es_len;
774 es->es_pblk = es1->es_pblk; 784 es->es_pblk = es1->es_pblk;
785 stats->es_stats_cache_hits++;
786 } else {
787 stats->es_stats_cache_misses++;
775 } 788 }
776 789
777 read_unlock(&EXT4_I(inode)->i_es_lock); 790 read_unlock(&EXT4_I(inode)->i_es_lock);
@@ -933,11 +946,16 @@ static int __ext4_es_shrink(struct ext4_sb_info *sbi, int nr_to_scan,
933 struct ext4_inode_info *locked_ei) 946 struct ext4_inode_info *locked_ei)
934{ 947{
935 struct ext4_inode_info *ei; 948 struct ext4_inode_info *ei;
949 struct ext4_es_stats *es_stats;
936 struct list_head *cur, *tmp; 950 struct list_head *cur, *tmp;
937 LIST_HEAD(skipped); 951 LIST_HEAD(skipped);
952 ktime_t start_time;
953 u64 scan_time;
938 int nr_shrunk = 0; 954 int nr_shrunk = 0;
939 int retried = 0, skip_precached = 1, nr_skipped = 0; 955 int retried = 0, skip_precached = 1, nr_skipped = 0;
940 956
957 es_stats = &sbi->s_es_stats;
958 start_time = ktime_get();
941 spin_lock(&sbi->s_es_lru_lock); 959 spin_lock(&sbi->s_es_lru_lock);
942 960
943retry: 961retry:
@@ -948,7 +966,8 @@ retry:
948 * If we have already reclaimed all extents from extent 966 * If we have already reclaimed all extents from extent
949 * status tree, just stop the loop immediately. 967 * status tree, just stop the loop immediately.
950 */ 968 */
951 if (percpu_counter_read_positive(&sbi->s_extent_cache_cnt) == 0) 969 if (percpu_counter_read_positive(
970 &es_stats->es_stats_lru_cnt) == 0)
952 break; 971 break;
953 972
954 ei = list_entry(cur, struct ext4_inode_info, i_es_lru); 973 ei = list_entry(cur, struct ext4_inode_info, i_es_lru);
@@ -958,7 +977,7 @@ retry:
958 * time. Normally we try hard to avoid shrinking 977 * time. Normally we try hard to avoid shrinking
959 * precached inodes, but we will as a last resort. 978 * precached inodes, but we will as a last resort.
960 */ 979 */
961 if ((sbi->s_es_last_sorted < ei->i_touch_when) || 980 if ((es_stats->es_stats_last_sorted < ei->i_touch_when) ||
962 (skip_precached && ext4_test_inode_state(&ei->vfs_inode, 981 (skip_precached && ext4_test_inode_state(&ei->vfs_inode,
963 EXT4_STATE_EXT_PRECACHED))) { 982 EXT4_STATE_EXT_PRECACHED))) {
964 nr_skipped++; 983 nr_skipped++;
@@ -992,7 +1011,7 @@ retry:
992 if ((nr_shrunk == 0) && nr_skipped && !retried) { 1011 if ((nr_shrunk == 0) && nr_skipped && !retried) {
993 retried++; 1012 retried++;
994 list_sort(NULL, &sbi->s_es_lru, ext4_inode_touch_time_cmp); 1013 list_sort(NULL, &sbi->s_es_lru, ext4_inode_touch_time_cmp);
995 sbi->s_es_last_sorted = jiffies; 1014 es_stats->es_stats_last_sorted = jiffies;
996 ei = list_first_entry(&sbi->s_es_lru, struct ext4_inode_info, 1015 ei = list_first_entry(&sbi->s_es_lru, struct ext4_inode_info,
997 i_es_lru); 1016 i_es_lru);
998 /* 1017 /*
@@ -1010,6 +1029,22 @@ retry:
1010 if (locked_ei && nr_shrunk == 0) 1029 if (locked_ei && nr_shrunk == 0)
1011 nr_shrunk = __es_try_to_reclaim_extents(locked_ei, nr_to_scan); 1030 nr_shrunk = __es_try_to_reclaim_extents(locked_ei, nr_to_scan);
1012 1031
1032 scan_time = ktime_to_ns(ktime_sub(ktime_get(), start_time));
1033 if (likely(es_stats->es_stats_scan_time))
1034 es_stats->es_stats_scan_time = (scan_time +
1035 es_stats->es_stats_scan_time*3) / 4;
1036 else
1037 es_stats->es_stats_scan_time = scan_time;
1038 if (scan_time > es_stats->es_stats_max_scan_time)
1039 es_stats->es_stats_max_scan_time = scan_time;
1040 if (likely(es_stats->es_stats_shrunk))
1041 es_stats->es_stats_shrunk = (nr_shrunk +
1042 es_stats->es_stats_shrunk*3) / 4;
1043 else
1044 es_stats->es_stats_shrunk = nr_shrunk;
1045
1046 trace_ext4_es_shrink(sbi->s_sb, nr_shrunk, scan_time, skip_precached,
1047 nr_skipped, retried);
1013 return nr_shrunk; 1048 return nr_shrunk;
1014} 1049}
1015 1050
@@ -1020,8 +1055,8 @@ static unsigned long ext4_es_count(struct shrinker *shrink,
1020 struct ext4_sb_info *sbi; 1055 struct ext4_sb_info *sbi;
1021 1056
1022 sbi = container_of(shrink, struct ext4_sb_info, s_es_shrinker); 1057 sbi = container_of(shrink, struct ext4_sb_info, s_es_shrinker);
1023 nr = percpu_counter_read_positive(&sbi->s_extent_cache_cnt); 1058 nr = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_lru_cnt);
1024 trace_ext4_es_shrink_enter(sbi->s_sb, sc->nr_to_scan, nr); 1059 trace_ext4_es_shrink_count(sbi->s_sb, sc->nr_to_scan, nr);
1025 return nr; 1060 return nr;
1026} 1061}
1027 1062
@@ -1033,31 +1068,160 @@ static unsigned long ext4_es_scan(struct shrinker *shrink,
1033 int nr_to_scan = sc->nr_to_scan; 1068 int nr_to_scan = sc->nr_to_scan;
1034 int ret, nr_shrunk; 1069 int ret, nr_shrunk;
1035 1070
1036 ret = percpu_counter_read_positive(&sbi->s_extent_cache_cnt); 1071 ret = percpu_counter_read_positive(&sbi->s_es_stats.es_stats_lru_cnt);
1037 trace_ext4_es_shrink_enter(sbi->s_sb, nr_to_scan, ret); 1072 trace_ext4_es_shrink_scan_enter(sbi->s_sb, nr_to_scan, ret);
1038 1073
1039 if (!nr_to_scan) 1074 if (!nr_to_scan)
1040 return ret; 1075 return ret;
1041 1076
1042 nr_shrunk = __ext4_es_shrink(sbi, nr_to_scan, NULL); 1077 nr_shrunk = __ext4_es_shrink(sbi, nr_to_scan, NULL);
1043 1078
1044 trace_ext4_es_shrink_exit(sbi->s_sb, nr_shrunk, ret); 1079 trace_ext4_es_shrink_scan_exit(sbi->s_sb, nr_shrunk, ret);
1045 return nr_shrunk; 1080 return nr_shrunk;
1046} 1081}
1047 1082
1048void ext4_es_register_shrinker(struct ext4_sb_info *sbi) 1083static void *ext4_es_seq_shrinker_info_start(struct seq_file *seq, loff_t *pos)
1049{ 1084{
1085 return *pos ? NULL : SEQ_START_TOKEN;
1086}
1087
1088static void *
1089ext4_es_seq_shrinker_info_next(struct seq_file *seq, void *v, loff_t *pos)
1090{
1091 return NULL;
1092}
1093
1094static int ext4_es_seq_shrinker_info_show(struct seq_file *seq, void *v)
1095{
1096 struct ext4_sb_info *sbi = seq->private;
1097 struct ext4_es_stats *es_stats = &sbi->s_es_stats;
1098 struct ext4_inode_info *ei, *max = NULL;
1099 unsigned int inode_cnt = 0;
1100
1101 if (v != SEQ_START_TOKEN)
1102 return 0;
1103
1104 /* here we just find an inode that has the max nr. of objects */
1105 spin_lock(&sbi->s_es_lru_lock);
1106 list_for_each_entry(ei, &sbi->s_es_lru, i_es_lru) {
1107 inode_cnt++;
1108 if (max && max->i_es_all_nr < ei->i_es_all_nr)
1109 max = ei;
1110 else if (!max)
1111 max = ei;
1112 }
1113 spin_unlock(&sbi->s_es_lru_lock);
1114
1115 seq_printf(seq, "stats:\n %lld objects\n %lld reclaimable objects\n",
1116 percpu_counter_sum_positive(&es_stats->es_stats_all_cnt),
1117 percpu_counter_sum_positive(&es_stats->es_stats_lru_cnt));
1118 seq_printf(seq, " %lu/%lu cache hits/misses\n",
1119 es_stats->es_stats_cache_hits,
1120 es_stats->es_stats_cache_misses);
1121 if (es_stats->es_stats_last_sorted != 0)
1122 seq_printf(seq, " %u ms last sorted interval\n",
1123 jiffies_to_msecs(jiffies -
1124 es_stats->es_stats_last_sorted));
1125 if (inode_cnt)
1126 seq_printf(seq, " %d inodes on lru list\n", inode_cnt);
1127
1128 seq_printf(seq, "average:\n %llu us scan time\n",
1129 div_u64(es_stats->es_stats_scan_time, 1000));
1130 seq_printf(seq, " %lu shrunk objects\n", es_stats->es_stats_shrunk);
1131 if (inode_cnt)
1132 seq_printf(seq,
1133 "maximum:\n %lu inode (%u objects, %u reclaimable)\n"
1134 " %llu us max scan time\n",
1135 max->vfs_inode.i_ino, max->i_es_all_nr, max->i_es_lru_nr,
1136 div_u64(es_stats->es_stats_max_scan_time, 1000));
1137
1138 return 0;
1139}
1140
1141static void ext4_es_seq_shrinker_info_stop(struct seq_file *seq, void *v)
1142{
1143}
1144
1145static const struct seq_operations ext4_es_seq_shrinker_info_ops = {
1146 .start = ext4_es_seq_shrinker_info_start,
1147 .next = ext4_es_seq_shrinker_info_next,
1148 .stop = ext4_es_seq_shrinker_info_stop,
1149 .show = ext4_es_seq_shrinker_info_show,
1150};
1151
1152static int
1153ext4_es_seq_shrinker_info_open(struct inode *inode, struct file *file)
1154{
1155 int ret;
1156
1157 ret = seq_open(file, &ext4_es_seq_shrinker_info_ops);
1158 if (!ret) {
1159 struct seq_file *m = file->private_data;
1160 m->private = PDE_DATA(inode);
1161 }
1162
1163 return ret;
1164}
1165
1166static int
1167ext4_es_seq_shrinker_info_release(struct inode *inode, struct file *file)
1168{
1169 return seq_release(inode, file);
1170}
1171
1172static const struct file_operations ext4_es_seq_shrinker_info_fops = {
1173 .owner = THIS_MODULE,
1174 .open = ext4_es_seq_shrinker_info_open,
1175 .read = seq_read,
1176 .llseek = seq_lseek,
1177 .release = ext4_es_seq_shrinker_info_release,
1178};
1179
1180int ext4_es_register_shrinker(struct ext4_sb_info *sbi)
1181{
1182 int err;
1183
1050 INIT_LIST_HEAD(&sbi->s_es_lru); 1184 INIT_LIST_HEAD(&sbi->s_es_lru);
1051 spin_lock_init(&sbi->s_es_lru_lock); 1185 spin_lock_init(&sbi->s_es_lru_lock);
1052 sbi->s_es_last_sorted = 0; 1186 sbi->s_es_stats.es_stats_last_sorted = 0;
1187 sbi->s_es_stats.es_stats_shrunk = 0;
1188 sbi->s_es_stats.es_stats_cache_hits = 0;
1189 sbi->s_es_stats.es_stats_cache_misses = 0;
1190 sbi->s_es_stats.es_stats_scan_time = 0;
1191 sbi->s_es_stats.es_stats_max_scan_time = 0;
1192 err = percpu_counter_init(&sbi->s_es_stats.es_stats_all_cnt, 0, GFP_KERNEL);
1193 if (err)
1194 return err;
1195 err = percpu_counter_init(&sbi->s_es_stats.es_stats_lru_cnt, 0, GFP_KERNEL);
1196 if (err)
1197 goto err1;
1198
1053 sbi->s_es_shrinker.scan_objects = ext4_es_scan; 1199 sbi->s_es_shrinker.scan_objects = ext4_es_scan;
1054 sbi->s_es_shrinker.count_objects = ext4_es_count; 1200 sbi->s_es_shrinker.count_objects = ext4_es_count;
1055 sbi->s_es_shrinker.seeks = DEFAULT_SEEKS; 1201 sbi->s_es_shrinker.seeks = DEFAULT_SEEKS;
1056 register_shrinker(&sbi->s_es_shrinker); 1202 err = register_shrinker(&sbi->s_es_shrinker);
1203 if (err)
1204 goto err2;
1205
1206 if (sbi->s_proc)
1207 proc_create_data("es_shrinker_info", S_IRUGO, sbi->s_proc,
1208 &ext4_es_seq_shrinker_info_fops, sbi);
1209
1210 return 0;
1211
1212err2:
1213 percpu_counter_destroy(&sbi->s_es_stats.es_stats_lru_cnt);
1214err1:
1215 percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt);
1216 return err;
1057} 1217}
1058 1218
1059void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi) 1219void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi)
1060{ 1220{
1221 if (sbi->s_proc)
1222 remove_proc_entry("es_shrinker_info", sbi->s_proc);
1223 percpu_counter_destroy(&sbi->s_es_stats.es_stats_all_cnt);
1224 percpu_counter_destroy(&sbi->s_es_stats.es_stats_lru_cnt);
1061 unregister_shrinker(&sbi->s_es_shrinker); 1225 unregister_shrinker(&sbi->s_es_shrinker);
1062} 1226}
1063 1227
diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h
index f1b62a419920..efd5f970b501 100644
--- a/fs/ext4/extents_status.h
+++ b/fs/ext4/extents_status.h
@@ -64,6 +64,17 @@ struct ext4_es_tree {
64 struct extent_status *cache_es; /* recently accessed extent */ 64 struct extent_status *cache_es; /* recently accessed extent */
65}; 65};
66 66
67struct ext4_es_stats {
68 unsigned long es_stats_last_sorted;
69 unsigned long es_stats_shrunk;
70 unsigned long es_stats_cache_hits;
71 unsigned long es_stats_cache_misses;
72 u64 es_stats_scan_time;
73 u64 es_stats_max_scan_time;
74 struct percpu_counter es_stats_all_cnt;
75 struct percpu_counter es_stats_lru_cnt;
76};
77
67extern int __init ext4_init_es(void); 78extern int __init ext4_init_es(void);
68extern void ext4_exit_es(void); 79extern void ext4_exit_es(void);
69extern void ext4_es_init_tree(struct ext4_es_tree *tree); 80extern void ext4_es_init_tree(struct ext4_es_tree *tree);
@@ -138,7 +149,7 @@ static inline void ext4_es_store_pblock_status(struct extent_status *es,
138 (pb & ~ES_MASK)); 149 (pb & ~ES_MASK));
139} 150}
140 151
141extern void ext4_es_register_shrinker(struct ext4_sb_info *sbi); 152extern int ext4_es_register_shrinker(struct ext4_sb_info *sbi);
142extern void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi); 153extern void ext4_es_unregister_shrinker(struct ext4_sb_info *sbi);
143extern void ext4_es_lru_add(struct inode *inode); 154extern void ext4_es_lru_add(struct inode *inode);
144extern void ext4_es_lru_del(struct inode *inode); 155extern void ext4_es_lru_del(struct inode *inode);
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index aca7b24a4432..8131be8c0af3 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -137,10 +137,10 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
137 iov_iter_truncate(from, sbi->s_bitmap_maxbytes - pos); 137 iov_iter_truncate(from, sbi->s_bitmap_maxbytes - pos);
138 } 138 }
139 139
140 iocb->private = &overwrite;
140 if (o_direct) { 141 if (o_direct) {
141 blk_start_plug(&plug); 142 blk_start_plug(&plug);
142 143
143 iocb->private = &overwrite;
144 144
145 /* check whether we do a DIO overwrite or not */ 145 /* check whether we do a DIO overwrite or not */
146 if (ext4_should_dioread_nolock(inode) && !aio_mutex && 146 if (ext4_should_dioread_nolock(inode) && !aio_mutex &&
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 5b87fc36aab8..ac644c31ca67 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -887,6 +887,10 @@ got:
887 struct buffer_head *block_bitmap_bh; 887 struct buffer_head *block_bitmap_bh;
888 888
889 block_bitmap_bh = ext4_read_block_bitmap(sb, group); 889 block_bitmap_bh = ext4_read_block_bitmap(sb, group);
890 if (!block_bitmap_bh) {
891 err = -EIO;
892 goto out;
893 }
890 BUFFER_TRACE(block_bitmap_bh, "get block bitmap access"); 894 BUFFER_TRACE(block_bitmap_bh, "get block bitmap access");
891 err = ext4_journal_get_write_access(handle, block_bitmap_bh); 895 err = ext4_journal_get_write_access(handle, block_bitmap_bh);
892 if (err) { 896 if (err) {
@@ -1011,8 +1015,7 @@ got:
1011 spin_unlock(&sbi->s_next_gen_lock); 1015 spin_unlock(&sbi->s_next_gen_lock);
1012 1016
1013 /* Precompute checksum seed for inode metadata */ 1017 /* Precompute checksum seed for inode metadata */
1014 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, 1018 if (ext4_has_metadata_csum(sb)) {
1015 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) {
1016 __u32 csum; 1019 __u32 csum;
1017 __le32 inum = cpu_to_le32(inode->i_ino); 1020 __le32 inum = cpu_to_le32(inode->i_ino);
1018 __le32 gen = cpu_to_le32(inode->i_generation); 1021 __le32 gen = cpu_to_le32(inode->i_generation);
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index e75f840000a0..36b369697a13 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -318,34 +318,24 @@ static int ext4_blks_to_allocate(Indirect *branch, int k, unsigned int blks,
318 * ext4_alloc_block() (normally -ENOSPC). Otherwise we set the chain 318 * ext4_alloc_block() (normally -ENOSPC). Otherwise we set the chain
319 * as described above and return 0. 319 * as described above and return 0.
320 */ 320 */
321static int ext4_alloc_branch(handle_t *handle, struct inode *inode, 321static int ext4_alloc_branch(handle_t *handle,
322 ext4_lblk_t iblock, int indirect_blks, 322 struct ext4_allocation_request *ar,
323 int *blks, ext4_fsblk_t goal, 323 int indirect_blks, ext4_lblk_t *offsets,
324 ext4_lblk_t *offsets, Indirect *branch) 324 Indirect *branch)
325{ 325{
326 struct ext4_allocation_request ar;
327 struct buffer_head * bh; 326 struct buffer_head * bh;
328 ext4_fsblk_t b, new_blocks[4]; 327 ext4_fsblk_t b, new_blocks[4];
329 __le32 *p; 328 __le32 *p;
330 int i, j, err, len = 1; 329 int i, j, err, len = 1;
331 330
332 /*
333 * Set up for the direct block allocation
334 */
335 memset(&ar, 0, sizeof(ar));
336 ar.inode = inode;
337 ar.len = *blks;
338 ar.logical = iblock;
339 if (S_ISREG(inode->i_mode))
340 ar.flags = EXT4_MB_HINT_DATA;
341
342 for (i = 0; i <= indirect_blks; i++) { 331 for (i = 0; i <= indirect_blks; i++) {
343 if (i == indirect_blks) { 332 if (i == indirect_blks) {
344 ar.goal = goal; 333 new_blocks[i] = ext4_mb_new_blocks(handle, ar, &err);
345 new_blocks[i] = ext4_mb_new_blocks(handle, &ar, &err);
346 } else 334 } else
347 goal = new_blocks[i] = ext4_new_meta_blocks(handle, inode, 335 ar->goal = new_blocks[i] = ext4_new_meta_blocks(handle,
348 goal, 0, NULL, &err); 336 ar->inode, ar->goal,
337 ar->flags & EXT4_MB_DELALLOC_RESERVED,
338 NULL, &err);
349 if (err) { 339 if (err) {
350 i--; 340 i--;
351 goto failed; 341 goto failed;
@@ -354,7 +344,7 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
354 if (i == 0) 344 if (i == 0)
355 continue; 345 continue;
356 346
357 bh = branch[i].bh = sb_getblk(inode->i_sb, new_blocks[i-1]); 347 bh = branch[i].bh = sb_getblk(ar->inode->i_sb, new_blocks[i-1]);
358 if (unlikely(!bh)) { 348 if (unlikely(!bh)) {
359 err = -ENOMEM; 349 err = -ENOMEM;
360 goto failed; 350 goto failed;
@@ -372,7 +362,7 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
372 b = new_blocks[i]; 362 b = new_blocks[i];
373 363
374 if (i == indirect_blks) 364 if (i == indirect_blks)
375 len = ar.len; 365 len = ar->len;
376 for (j = 0; j < len; j++) 366 for (j = 0; j < len; j++)
377 *p++ = cpu_to_le32(b++); 367 *p++ = cpu_to_le32(b++);
378 368
@@ -381,11 +371,10 @@ static int ext4_alloc_branch(handle_t *handle, struct inode *inode,
381 unlock_buffer(bh); 371 unlock_buffer(bh);
382 372
383 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); 373 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
384 err = ext4_handle_dirty_metadata(handle, inode, bh); 374 err = ext4_handle_dirty_metadata(handle, ar->inode, bh);
385 if (err) 375 if (err)
386 goto failed; 376 goto failed;
387 } 377 }
388 *blks = ar.len;
389 return 0; 378 return 0;
390failed: 379failed:
391 for (; i >= 0; i--) { 380 for (; i >= 0; i--) {
@@ -396,10 +385,10 @@ failed:
396 * existing before ext4_alloc_branch() was called. 385 * existing before ext4_alloc_branch() was called.
397 */ 386 */
398 if (i > 0 && i != indirect_blks && branch[i].bh) 387 if (i > 0 && i != indirect_blks && branch[i].bh)
399 ext4_forget(handle, 1, inode, branch[i].bh, 388 ext4_forget(handle, 1, ar->inode, branch[i].bh,
400 branch[i].bh->b_blocknr); 389 branch[i].bh->b_blocknr);
401 ext4_free_blocks(handle, inode, NULL, new_blocks[i], 390 ext4_free_blocks(handle, ar->inode, NULL, new_blocks[i],
402 (i == indirect_blks) ? ar.len : 1, 0); 391 (i == indirect_blks) ? ar->len : 1, 0);
403 } 392 }
404 return err; 393 return err;
405} 394}
@@ -419,9 +408,9 @@ failed:
419 * inode (->i_blocks, etc.). In case of success we end up with the full 408 * inode (->i_blocks, etc.). In case of success we end up with the full
420 * chain to new block and return 0. 409 * chain to new block and return 0.
421 */ 410 */
422static int ext4_splice_branch(handle_t *handle, struct inode *inode, 411static int ext4_splice_branch(handle_t *handle,
423 ext4_lblk_t block, Indirect *where, int num, 412 struct ext4_allocation_request *ar,
424 int blks) 413 Indirect *where, int num)
425{ 414{
426 int i; 415 int i;
427 int err = 0; 416 int err = 0;
@@ -446,9 +435,9 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode,
446 * Update the host buffer_head or inode to point to more just allocated 435 * Update the host buffer_head or inode to point to more just allocated
447 * direct blocks blocks 436 * direct blocks blocks
448 */ 437 */
449 if (num == 0 && blks > 1) { 438 if (num == 0 && ar->len > 1) {
450 current_block = le32_to_cpu(where->key) + 1; 439 current_block = le32_to_cpu(where->key) + 1;
451 for (i = 1; i < blks; i++) 440 for (i = 1; i < ar->len; i++)
452 *(where->p + i) = cpu_to_le32(current_block++); 441 *(where->p + i) = cpu_to_le32(current_block++);
453 } 442 }
454 443
@@ -465,14 +454,14 @@ static int ext4_splice_branch(handle_t *handle, struct inode *inode,
465 */ 454 */
466 jbd_debug(5, "splicing indirect only\n"); 455 jbd_debug(5, "splicing indirect only\n");
467 BUFFER_TRACE(where->bh, "call ext4_handle_dirty_metadata"); 456 BUFFER_TRACE(where->bh, "call ext4_handle_dirty_metadata");
468 err = ext4_handle_dirty_metadata(handle, inode, where->bh); 457 err = ext4_handle_dirty_metadata(handle, ar->inode, where->bh);
469 if (err) 458 if (err)
470 goto err_out; 459 goto err_out;
471 } else { 460 } else {
472 /* 461 /*
473 * OK, we spliced it into the inode itself on a direct block. 462 * OK, we spliced it into the inode itself on a direct block.
474 */ 463 */
475 ext4_mark_inode_dirty(handle, inode); 464 ext4_mark_inode_dirty(handle, ar->inode);
476 jbd_debug(5, "splicing direct\n"); 465 jbd_debug(5, "splicing direct\n");
477 } 466 }
478 return err; 467 return err;
@@ -484,11 +473,11 @@ err_out:
484 * need to revoke the block, which is why we don't 473 * need to revoke the block, which is why we don't
485 * need to set EXT4_FREE_BLOCKS_METADATA. 474 * need to set EXT4_FREE_BLOCKS_METADATA.
486 */ 475 */
487 ext4_free_blocks(handle, inode, where[i].bh, 0, 1, 476 ext4_free_blocks(handle, ar->inode, where[i].bh, 0, 1,
488 EXT4_FREE_BLOCKS_FORGET); 477 EXT4_FREE_BLOCKS_FORGET);
489 } 478 }
490 ext4_free_blocks(handle, inode, NULL, le32_to_cpu(where[num].key), 479 ext4_free_blocks(handle, ar->inode, NULL, le32_to_cpu(where[num].key),
491 blks, 0); 480 ar->len, 0);
492 481
493 return err; 482 return err;
494} 483}
@@ -525,11 +514,11 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
525 struct ext4_map_blocks *map, 514 struct ext4_map_blocks *map,
526 int flags) 515 int flags)
527{ 516{
517 struct ext4_allocation_request ar;
528 int err = -EIO; 518 int err = -EIO;
529 ext4_lblk_t offsets[4]; 519 ext4_lblk_t offsets[4];
530 Indirect chain[4]; 520 Indirect chain[4];
531 Indirect *partial; 521 Indirect *partial;
532 ext4_fsblk_t goal;
533 int indirect_blks; 522 int indirect_blks;
534 int blocks_to_boundary = 0; 523 int blocks_to_boundary = 0;
535 int depth; 524 int depth;
@@ -579,7 +568,16 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
579 return -ENOSPC; 568 return -ENOSPC;
580 } 569 }
581 570
582 goal = ext4_find_goal(inode, map->m_lblk, partial); 571 /* Set up for the direct block allocation */
572 memset(&ar, 0, sizeof(ar));
573 ar.inode = inode;
574 ar.logical = map->m_lblk;
575 if (S_ISREG(inode->i_mode))
576 ar.flags = EXT4_MB_HINT_DATA;
577 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
578 ar.flags |= EXT4_MB_DELALLOC_RESERVED;
579
580 ar.goal = ext4_find_goal(inode, map->m_lblk, partial);
583 581
584 /* the number of blocks need to allocate for [d,t]indirect blocks */ 582 /* the number of blocks need to allocate for [d,t]indirect blocks */
585 indirect_blks = (chain + depth) - partial - 1; 583 indirect_blks = (chain + depth) - partial - 1;
@@ -588,13 +586,13 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
588 * Next look up the indirect map to count the totoal number of 586 * Next look up the indirect map to count the totoal number of
589 * direct blocks to allocate for this branch. 587 * direct blocks to allocate for this branch.
590 */ 588 */
591 count = ext4_blks_to_allocate(partial, indirect_blks, 589 ar.len = ext4_blks_to_allocate(partial, indirect_blks,
592 map->m_len, blocks_to_boundary); 590 map->m_len, blocks_to_boundary);
591
593 /* 592 /*
594 * Block out ext4_truncate while we alter the tree 593 * Block out ext4_truncate while we alter the tree
595 */ 594 */
596 err = ext4_alloc_branch(handle, inode, map->m_lblk, indirect_blks, 595 err = ext4_alloc_branch(handle, &ar, indirect_blks,
597 &count, goal,
598 offsets + (partial - chain), partial); 596 offsets + (partial - chain), partial);
599 597
600 /* 598 /*
@@ -605,14 +603,14 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
605 * may need to return -EAGAIN upwards in the worst case. --sct 603 * may need to return -EAGAIN upwards in the worst case. --sct
606 */ 604 */
607 if (!err) 605 if (!err)
608 err = ext4_splice_branch(handle, inode, map->m_lblk, 606 err = ext4_splice_branch(handle, &ar, partial, indirect_blks);
609 partial, indirect_blks, count);
610 if (err) 607 if (err)
611 goto cleanup; 608 goto cleanup;
612 609
613 map->m_flags |= EXT4_MAP_NEW; 610 map->m_flags |= EXT4_MAP_NEW;
614 611
615 ext4_update_inode_fsync_trans(handle, inode, 1); 612 ext4_update_inode_fsync_trans(handle, inode, 1);
613 count = ar.len;
616got_it: 614got_it:
617 map->m_flags |= EXT4_MAP_MAPPED; 615 map->m_flags |= EXT4_MAP_MAPPED;
618 map->m_pblk = le32_to_cpu(chain[depth-1].key); 616 map->m_pblk = le32_to_cpu(chain[depth-1].key);
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index bea662bd0ca6..3ea62695abce 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -594,6 +594,7 @@ retry:
594 if (ret) { 594 if (ret) {
595 unlock_page(page); 595 unlock_page(page);
596 page_cache_release(page); 596 page_cache_release(page);
597 page = NULL;
597 ext4_orphan_add(handle, inode); 598 ext4_orphan_add(handle, inode);
598 up_write(&EXT4_I(inode)->xattr_sem); 599 up_write(&EXT4_I(inode)->xattr_sem);
599 sem_held = 0; 600 sem_held = 0;
@@ -613,7 +614,8 @@ retry:
613 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) 614 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
614 goto retry; 615 goto retry;
615 616
616 block_commit_write(page, from, to); 617 if (page)
618 block_commit_write(page, from, to);
617out: 619out:
618 if (page) { 620 if (page) {
619 unlock_page(page); 621 unlock_page(page);
@@ -1126,8 +1128,7 @@ static int ext4_finish_convert_inline_dir(handle_t *handle,
1126 memcpy((void *)de, buf + EXT4_INLINE_DOTDOT_SIZE, 1128 memcpy((void *)de, buf + EXT4_INLINE_DOTDOT_SIZE,
1127 inline_size - EXT4_INLINE_DOTDOT_SIZE); 1129 inline_size - EXT4_INLINE_DOTDOT_SIZE);
1128 1130
1129 if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, 1131 if (ext4_has_metadata_csum(inode->i_sb))
1130 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
1131 csum_size = sizeof(struct ext4_dir_entry_tail); 1132 csum_size = sizeof(struct ext4_dir_entry_tail);
1132 1133
1133 inode->i_size = inode->i_sb->s_blocksize; 1134 inode->i_size = inode->i_sb->s_blocksize;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 3aa26e9117c4..3356ab5395f4 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -83,8 +83,7 @@ static int ext4_inode_csum_verify(struct inode *inode, struct ext4_inode *raw,
83 83
84 if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != 84 if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
85 cpu_to_le32(EXT4_OS_LINUX) || 85 cpu_to_le32(EXT4_OS_LINUX) ||
86 !EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, 86 !ext4_has_metadata_csum(inode->i_sb))
87 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
88 return 1; 87 return 1;
89 88
90 provided = le16_to_cpu(raw->i_checksum_lo); 89 provided = le16_to_cpu(raw->i_checksum_lo);
@@ -105,8 +104,7 @@ static void ext4_inode_csum_set(struct inode *inode, struct ext4_inode *raw,
105 104
106 if (EXT4_SB(inode->i_sb)->s_es->s_creator_os != 105 if (EXT4_SB(inode->i_sb)->s_es->s_creator_os !=
107 cpu_to_le32(EXT4_OS_LINUX) || 106 cpu_to_le32(EXT4_OS_LINUX) ||
108 !EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, 107 !ext4_has_metadata_csum(inode->i_sb))
109 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
110 return; 108 return;
111 109
112 csum = ext4_inode_csum(inode, raw, ei); 110 csum = ext4_inode_csum(inode, raw, ei);
@@ -224,16 +222,15 @@ void ext4_evict_inode(struct inode *inode)
224 goto no_delete; 222 goto no_delete;
225 } 223 }
226 224
227 if (!is_bad_inode(inode)) 225 if (is_bad_inode(inode))
228 dquot_initialize(inode); 226 goto no_delete;
227 dquot_initialize(inode);
229 228
230 if (ext4_should_order_data(inode)) 229 if (ext4_should_order_data(inode))
231 ext4_begin_ordered_truncate(inode, 0); 230 ext4_begin_ordered_truncate(inode, 0);
232 truncate_inode_pages_final(&inode->i_data); 231 truncate_inode_pages_final(&inode->i_data);
233 232
234 WARN_ON(atomic_read(&EXT4_I(inode)->i_ioend_count)); 233 WARN_ON(atomic_read(&EXT4_I(inode)->i_ioend_count));
235 if (is_bad_inode(inode))
236 goto no_delete;
237 234
238 /* 235 /*
239 * Protect us against freezing - iput() caller didn't have to have any 236 * Protect us against freezing - iput() caller didn't have to have any
@@ -590,20 +587,12 @@ found:
590 /* 587 /*
591 * New blocks allocate and/or writing to unwritten extent 588 * New blocks allocate and/or writing to unwritten extent
592 * will possibly result in updating i_data, so we take 589 * will possibly result in updating i_data, so we take
593 * the write lock of i_data_sem, and call get_blocks() 590 * the write lock of i_data_sem, and call get_block()
594 * with create == 1 flag. 591 * with create == 1 flag.
595 */ 592 */
596 down_write(&EXT4_I(inode)->i_data_sem); 593 down_write(&EXT4_I(inode)->i_data_sem);
597 594
598 /* 595 /*
599 * if the caller is from delayed allocation writeout path
600 * we have already reserved fs blocks for allocation
601 * let the underlying get_block() function know to
602 * avoid double accounting
603 */
604 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
605 ext4_set_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED);
606 /*
607 * We need to check for EXT4 here because migrate 596 * We need to check for EXT4 here because migrate
608 * could have changed the inode type in between 597 * could have changed the inode type in between
609 */ 598 */
@@ -631,8 +620,6 @@ found:
631 (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)) 620 (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE))
632 ext4_da_update_reserve_space(inode, retval, 1); 621 ext4_da_update_reserve_space(inode, retval, 1);
633 } 622 }
634 if (flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE)
635 ext4_clear_inode_state(inode, EXT4_STATE_DELALLOC_RESERVED);
636 623
637 if (retval > 0) { 624 if (retval > 0) {
638 unsigned int status; 625 unsigned int status;
@@ -734,11 +721,11 @@ int ext4_get_block(struct inode *inode, sector_t iblock,
734 * `handle' can be NULL if create is zero 721 * `handle' can be NULL if create is zero
735 */ 722 */
736struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode, 723struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
737 ext4_lblk_t block, int create, int *errp) 724 ext4_lblk_t block, int create)
738{ 725{
739 struct ext4_map_blocks map; 726 struct ext4_map_blocks map;
740 struct buffer_head *bh; 727 struct buffer_head *bh;
741 int fatal = 0, err; 728 int err;
742 729
743 J_ASSERT(handle != NULL || create == 0); 730 J_ASSERT(handle != NULL || create == 0);
744 731
@@ -747,21 +734,14 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
747 err = ext4_map_blocks(handle, inode, &map, 734 err = ext4_map_blocks(handle, inode, &map,
748 create ? EXT4_GET_BLOCKS_CREATE : 0); 735 create ? EXT4_GET_BLOCKS_CREATE : 0);
749 736
750 /* ensure we send some value back into *errp */ 737 if (err == 0)
751 *errp = 0; 738 return create ? ERR_PTR(-ENOSPC) : NULL;
752
753 if (create && err == 0)
754 err = -ENOSPC; /* should never happen */
755 if (err < 0) 739 if (err < 0)
756 *errp = err; 740 return ERR_PTR(err);
757 if (err <= 0)
758 return NULL;
759 741
760 bh = sb_getblk(inode->i_sb, map.m_pblk); 742 bh = sb_getblk(inode->i_sb, map.m_pblk);
761 if (unlikely(!bh)) { 743 if (unlikely(!bh))
762 *errp = -ENOMEM; 744 return ERR_PTR(-ENOMEM);
763 return NULL;
764 }
765 if (map.m_flags & EXT4_MAP_NEW) { 745 if (map.m_flags & EXT4_MAP_NEW) {
766 J_ASSERT(create != 0); 746 J_ASSERT(create != 0);
767 J_ASSERT(handle != NULL); 747 J_ASSERT(handle != NULL);
@@ -775,44 +755,44 @@ struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
775 */ 755 */
776 lock_buffer(bh); 756 lock_buffer(bh);
777 BUFFER_TRACE(bh, "call get_create_access"); 757 BUFFER_TRACE(bh, "call get_create_access");
778 fatal = ext4_journal_get_create_access(handle, bh); 758 err = ext4_journal_get_create_access(handle, bh);
779 if (!fatal && !buffer_uptodate(bh)) { 759 if (unlikely(err)) {
760 unlock_buffer(bh);
761 goto errout;
762 }
763 if (!buffer_uptodate(bh)) {
780 memset(bh->b_data, 0, inode->i_sb->s_blocksize); 764 memset(bh->b_data, 0, inode->i_sb->s_blocksize);
781 set_buffer_uptodate(bh); 765 set_buffer_uptodate(bh);
782 } 766 }
783 unlock_buffer(bh); 767 unlock_buffer(bh);
784 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata"); 768 BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
785 err = ext4_handle_dirty_metadata(handle, inode, bh); 769 err = ext4_handle_dirty_metadata(handle, inode, bh);
786 if (!fatal) 770 if (unlikely(err))
787 fatal = err; 771 goto errout;
788 } else { 772 } else
789 BUFFER_TRACE(bh, "not a new buffer"); 773 BUFFER_TRACE(bh, "not a new buffer");
790 }
791 if (fatal) {
792 *errp = fatal;
793 brelse(bh);
794 bh = NULL;
795 }
796 return bh; 774 return bh;
775errout:
776 brelse(bh);
777 return ERR_PTR(err);
797} 778}
798 779
799struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode, 780struct buffer_head *ext4_bread(handle_t *handle, struct inode *inode,
800 ext4_lblk_t block, int create, int *err) 781 ext4_lblk_t block, int create)
801{ 782{
802 struct buffer_head *bh; 783 struct buffer_head *bh;
803 784
804 bh = ext4_getblk(handle, inode, block, create, err); 785 bh = ext4_getblk(handle, inode, block, create);
805 if (!bh) 786 if (IS_ERR(bh))
806 return bh; 787 return bh;
807 if (buffer_uptodate(bh)) 788 if (!bh || buffer_uptodate(bh))
808 return bh; 789 return bh;
809 ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &bh); 790 ll_rw_block(READ | REQ_META | REQ_PRIO, 1, &bh);
810 wait_on_buffer(bh); 791 wait_on_buffer(bh);
811 if (buffer_uptodate(bh)) 792 if (buffer_uptodate(bh))
812 return bh; 793 return bh;
813 put_bh(bh); 794 put_bh(bh);
814 *err = -EIO; 795 return ERR_PTR(-EIO);
815 return NULL;
816} 796}
817 797
818int ext4_walk_page_buffers(handle_t *handle, 798int ext4_walk_page_buffers(handle_t *handle,
@@ -1536,7 +1516,7 @@ out_unlock:
1536} 1516}
1537 1517
1538/* 1518/*
1539 * This is a special get_blocks_t callback which is used by 1519 * This is a special get_block_t callback which is used by
1540 * ext4_da_write_begin(). It will either return mapped block or 1520 * ext4_da_write_begin(). It will either return mapped block or
1541 * reserve space for a single block. 1521 * reserve space for a single block.
1542 * 1522 *
@@ -2011,12 +1991,10 @@ static int mpage_map_one_extent(handle_t *handle, struct mpage_da_data *mpd)
2011 * in data loss. So use reserved blocks to allocate metadata if 1991 * in data loss. So use reserved blocks to allocate metadata if
2012 * possible. 1992 * possible.
2013 * 1993 *
2014 * We pass in the magic EXT4_GET_BLOCKS_DELALLOC_RESERVE if the blocks 1994 * We pass in the magic EXT4_GET_BLOCKS_DELALLOC_RESERVE if
2015 * in question are delalloc blocks. This affects functions in many 1995 * the blocks in question are delalloc blocks. This indicates
2016 * different parts of the allocation call path. This flag exists 1996 * that the blocks and quotas has already been checked when
2017 * primarily because we don't want to change *many* call functions, so 1997 * the data was copied into the page cache.
2018 * ext4_map_blocks() will set the EXT4_STATE_DELALLOC_RESERVED flag
2019 * once the inode's allocation semaphore is taken.
2020 */ 1998 */
2021 get_blocks_flags = EXT4_GET_BLOCKS_CREATE | 1999 get_blocks_flags = EXT4_GET_BLOCKS_CREATE |
2022 EXT4_GET_BLOCKS_METADATA_NOFAIL; 2000 EXT4_GET_BLOCKS_METADATA_NOFAIL;
@@ -2515,6 +2493,20 @@ static int ext4_nonda_switch(struct super_block *sb)
2515 return 0; 2493 return 0;
2516} 2494}
2517 2495
2496/* We always reserve for an inode update; the superblock could be there too */
2497static int ext4_da_write_credits(struct inode *inode, loff_t pos, unsigned len)
2498{
2499 if (likely(EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb,
2500 EXT4_FEATURE_RO_COMPAT_LARGE_FILE)))
2501 return 1;
2502
2503 if (pos + len <= 0x7fffffffULL)
2504 return 1;
2505
2506 /* We might need to update the superblock to set LARGE_FILE */
2507 return 2;
2508}
2509
2518static int ext4_da_write_begin(struct file *file, struct address_space *mapping, 2510static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
2519 loff_t pos, unsigned len, unsigned flags, 2511 loff_t pos, unsigned len, unsigned flags,
2520 struct page **pagep, void **fsdata) 2512 struct page **pagep, void **fsdata)
@@ -2565,7 +2557,8 @@ retry_grab:
2565 * of file which has an already mapped buffer. 2557 * of file which has an already mapped buffer.
2566 */ 2558 */
2567retry_journal: 2559retry_journal:
2568 handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, 1); 2560 handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE,
2561 ext4_da_write_credits(inode, pos, len));
2569 if (IS_ERR(handle)) { 2562 if (IS_ERR(handle)) {
2570 page_cache_release(page); 2563 page_cache_release(page);
2571 return PTR_ERR(handle); 2564 return PTR_ERR(handle);
@@ -2658,10 +2651,7 @@ static int ext4_da_write_end(struct file *file,
2658 if (copied && new_i_size > EXT4_I(inode)->i_disksize) { 2651 if (copied && new_i_size > EXT4_I(inode)->i_disksize) {
2659 if (ext4_has_inline_data(inode) || 2652 if (ext4_has_inline_data(inode) ||
2660 ext4_da_should_update_i_disksize(page, end)) { 2653 ext4_da_should_update_i_disksize(page, end)) {
2661 down_write(&EXT4_I(inode)->i_data_sem); 2654 ext4_update_i_disksize(inode, new_i_size);
2662 if (new_i_size > EXT4_I(inode)->i_disksize)
2663 EXT4_I(inode)->i_disksize = new_i_size;
2664 up_write(&EXT4_I(inode)->i_data_sem);
2665 /* We need to mark inode dirty even if 2655 /* We need to mark inode dirty even if
2666 * new_i_size is less that inode->i_size 2656 * new_i_size is less that inode->i_size
2667 * bu greater than i_disksize.(hint delalloc) 2657 * bu greater than i_disksize.(hint delalloc)
@@ -3936,8 +3926,7 @@ struct inode *ext4_iget(struct super_block *sb, unsigned long ino)
3936 ei->i_extra_isize = 0; 3926 ei->i_extra_isize = 0;
3937 3927
3938 /* Precompute checksum seed for inode metadata */ 3928 /* Precompute checksum seed for inode metadata */
3939 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, 3929 if (ext4_has_metadata_csum(sb)) {
3940 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) {
3941 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb); 3930 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
3942 __u32 csum; 3931 __u32 csum;
3943 __le32 inum = cpu_to_le32(inode->i_ino); 3932 __le32 inum = cpu_to_le32(inode->i_ino);
@@ -4127,6 +4116,13 @@ bad_inode:
4127 return ERR_PTR(ret); 4116 return ERR_PTR(ret);
4128} 4117}
4129 4118
4119struct inode *ext4_iget_normal(struct super_block *sb, unsigned long ino)
4120{
4121 if (ino < EXT4_FIRST_INO(sb) && ino != EXT4_ROOT_INO)
4122 return ERR_PTR(-EIO);
4123 return ext4_iget(sb, ino);
4124}
4125
4130static int ext4_inode_blocks_set(handle_t *handle, 4126static int ext4_inode_blocks_set(handle_t *handle,
4131 struct ext4_inode *raw_inode, 4127 struct ext4_inode *raw_inode,
4132 struct ext4_inode_info *ei) 4128 struct ext4_inode_info *ei)
@@ -4226,7 +4222,8 @@ static int ext4_do_update_inode(handle_t *handle,
4226 EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode); 4222 EXT4_INODE_SET_XTIME(i_atime, inode, raw_inode);
4227 EXT4_EINODE_SET_XTIME(i_crtime, ei, raw_inode); 4223 EXT4_EINODE_SET_XTIME(i_crtime, ei, raw_inode);
4228 4224
4229 if (ext4_inode_blocks_set(handle, raw_inode, ei)) { 4225 err = ext4_inode_blocks_set(handle, raw_inode, ei);
4226 if (err) {
4230 spin_unlock(&ei->i_raw_lock); 4227 spin_unlock(&ei->i_raw_lock);
4231 goto out_brelse; 4228 goto out_brelse;
4232 } 4229 }
@@ -4536,8 +4533,12 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
4536 ext4_orphan_del(NULL, inode); 4533 ext4_orphan_del(NULL, inode);
4537 goto err_out; 4534 goto err_out;
4538 } 4535 }
4539 } else 4536 } else {
4537 loff_t oldsize = inode->i_size;
4538
4540 i_size_write(inode, attr->ia_size); 4539 i_size_write(inode, attr->ia_size);
4540 pagecache_isize_extended(inode, oldsize, inode->i_size);
4541 }
4541 4542
4542 /* 4543 /*
4543 * Blocks are going to be removed from the inode. Wait 4544 * Blocks are going to be removed from the inode. Wait
@@ -4958,7 +4959,12 @@ int ext4_change_inode_journal_flag(struct inode *inode, int val)
4958 if (val) 4959 if (val)
4959 ext4_set_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); 4960 ext4_set_inode_flag(inode, EXT4_INODE_JOURNAL_DATA);
4960 else { 4961 else {
4961 jbd2_journal_flush(journal); 4962 err = jbd2_journal_flush(journal);
4963 if (err < 0) {
4964 jbd2_journal_unlock_updates(journal);
4965 ext4_inode_resume_unlocked_dio(inode);
4966 return err;
4967 }
4962 ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA); 4968 ext4_clear_inode_flag(inode, EXT4_INODE_JOURNAL_DATA);
4963 } 4969 }
4964 ext4_set_aops(inode); 4970 ext4_set_aops(inode);
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 0f2252ec274d..bfda18a15592 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -331,8 +331,7 @@ flags_out:
331 if (!inode_owner_or_capable(inode)) 331 if (!inode_owner_or_capable(inode))
332 return -EPERM; 332 return -EPERM;
333 333
334 if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, 334 if (ext4_has_metadata_csum(inode->i_sb)) {
335 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) {
336 ext4_warning(sb, "Setting inode version is not " 335 ext4_warning(sb, "Setting inode version is not "
337 "supported with metadata_csum enabled."); 336 "supported with metadata_csum enabled.");
338 return -ENOTTY; 337 return -ENOTTY;
@@ -532,9 +531,17 @@ group_add_out:
532 } 531 }
533 532
534 case EXT4_IOC_SWAP_BOOT: 533 case EXT4_IOC_SWAP_BOOT:
534 {
535 int err;
535 if (!(filp->f_mode & FMODE_WRITE)) 536 if (!(filp->f_mode & FMODE_WRITE))
536 return -EBADF; 537 return -EBADF;
537 return swap_inode_boot_loader(sb, inode); 538 err = mnt_want_write_file(filp);
539 if (err)
540 return err;
541 err = swap_inode_boot_loader(sb, inode);
542 mnt_drop_write_file(filp);
543 return err;
544 }
538 545
539 case EXT4_IOC_RESIZE_FS: { 546 case EXT4_IOC_RESIZE_FS: {
540 ext4_fsblk_t n_blocks_count; 547 ext4_fsblk_t n_blocks_count;
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 748c9136a60a..dbfe15c2533c 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -3155,9 +3155,8 @@ ext4_mb_normalize_request(struct ext4_allocation_context *ac,
3155 "start %lu, size %lu, fe_logical %lu", 3155 "start %lu, size %lu, fe_logical %lu",
3156 (unsigned long) start, (unsigned long) size, 3156 (unsigned long) start, (unsigned long) size,
3157 (unsigned long) ac->ac_o_ex.fe_logical); 3157 (unsigned long) ac->ac_o_ex.fe_logical);
3158 BUG();
3158 } 3159 }
3159 BUG_ON(start + size <= ac->ac_o_ex.fe_logical &&
3160 start > ac->ac_o_ex.fe_logical);
3161 BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb)); 3160 BUG_ON(size <= 0 || size > EXT4_BLOCKS_PER_GROUP(ac->ac_sb));
3162 3161
3163 /* now prepare goal request */ 3162 /* now prepare goal request */
@@ -4410,14 +4409,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4410 if (IS_NOQUOTA(ar->inode)) 4409 if (IS_NOQUOTA(ar->inode))
4411 ar->flags |= EXT4_MB_USE_ROOT_BLOCKS; 4410 ar->flags |= EXT4_MB_USE_ROOT_BLOCKS;
4412 4411
4413 /* 4412 if ((ar->flags & EXT4_MB_DELALLOC_RESERVED) == 0) {
4414 * For delayed allocation, we could skip the ENOSPC and
4415 * EDQUOT check, as blocks and quotas have been already
4416 * reserved when data being copied into pagecache.
4417 */
4418 if (ext4_test_inode_state(ar->inode, EXT4_STATE_DELALLOC_RESERVED))
4419 ar->flags |= EXT4_MB_DELALLOC_RESERVED;
4420 else {
4421 /* Without delayed allocation we need to verify 4413 /* Without delayed allocation we need to verify
4422 * there is enough free blocks to do block allocation 4414 * there is enough free blocks to do block allocation
4423 * and verify allocation doesn't exceed the quota limits. 4415 * and verify allocation doesn't exceed the quota limits.
@@ -4528,8 +4520,7 @@ out:
4528 if (inquota && ar->len < inquota) 4520 if (inquota && ar->len < inquota)
4529 dquot_free_block(ar->inode, EXT4_C2B(sbi, inquota - ar->len)); 4521 dquot_free_block(ar->inode, EXT4_C2B(sbi, inquota - ar->len));
4530 if (!ar->len) { 4522 if (!ar->len) {
4531 if (!ext4_test_inode_state(ar->inode, 4523 if ((ar->flags & EXT4_MB_DELALLOC_RESERVED) == 0)
4532 EXT4_STATE_DELALLOC_RESERVED))
4533 /* release all the reserved blocks if non delalloc */ 4524 /* release all the reserved blocks if non delalloc */
4534 percpu_counter_sub(&sbi->s_dirtyclusters_counter, 4525 percpu_counter_sub(&sbi->s_dirtyclusters_counter,
4535 reserv_clstrs); 4526 reserv_clstrs);
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index d3567f27bae7..a432634f2e6a 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -41,8 +41,7 @@ static int finish_range(handle_t *handle, struct inode *inode,
41 ext4_ext_store_pblock(&newext, lb->first_pblock); 41 ext4_ext_store_pblock(&newext, lb->first_pblock);
42 /* Locking only for convinience since we are operating on temp inode */ 42 /* Locking only for convinience since we are operating on temp inode */
43 down_write(&EXT4_I(inode)->i_data_sem); 43 down_write(&EXT4_I(inode)->i_data_sem);
44 path = ext4_ext_find_extent(inode, lb->first_block, NULL, 0); 44 path = ext4_find_extent(inode, lb->first_block, NULL, 0);
45
46 if (IS_ERR(path)) { 45 if (IS_ERR(path)) {
47 retval = PTR_ERR(path); 46 retval = PTR_ERR(path);
48 path = NULL; 47 path = NULL;
@@ -81,13 +80,11 @@ static int finish_range(handle_t *handle, struct inode *inode,
81 goto err_out; 80 goto err_out;
82 } 81 }
83 } 82 }
84 retval = ext4_ext_insert_extent(handle, inode, path, &newext, 0); 83 retval = ext4_ext_insert_extent(handle, inode, &path, &newext, 0);
85err_out: 84err_out:
86 up_write((&EXT4_I(inode)->i_data_sem)); 85 up_write((&EXT4_I(inode)->i_data_sem));
87 if (path) { 86 ext4_ext_drop_refs(path);
88 ext4_ext_drop_refs(path); 87 kfree(path);
89 kfree(path);
90 }
91 lb->first_pblock = 0; 88 lb->first_pblock = 0;
92 return retval; 89 return retval;
93} 90}
diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c
index 32bce844c2e1..8313ca3324ec 100644
--- a/fs/ext4/mmp.c
+++ b/fs/ext4/mmp.c
@@ -20,8 +20,7 @@ static __le32 ext4_mmp_csum(struct super_block *sb, struct mmp_struct *mmp)
20 20
21static int ext4_mmp_csum_verify(struct super_block *sb, struct mmp_struct *mmp) 21static int ext4_mmp_csum_verify(struct super_block *sb, struct mmp_struct *mmp)
22{ 22{
23 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, 23 if (!ext4_has_metadata_csum(sb))
24 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
25 return 1; 24 return 1;
26 25
27 return mmp->mmp_checksum == ext4_mmp_csum(sb, mmp); 26 return mmp->mmp_checksum == ext4_mmp_csum(sb, mmp);
@@ -29,8 +28,7 @@ static int ext4_mmp_csum_verify(struct super_block *sb, struct mmp_struct *mmp)
29 28
30static void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp) 29static void ext4_mmp_csum_set(struct super_block *sb, struct mmp_struct *mmp)
31{ 30{
32 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, 31 if (!ext4_has_metadata_csum(sb))
33 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
34 return; 32 return;
35 33
36 mmp->mmp_checksum = ext4_mmp_csum(sb, mmp); 34 mmp->mmp_checksum = ext4_mmp_csum(sb, mmp);
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 671a74b14fd7..9f2311bc9c4f 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -27,120 +27,26 @@
27 * @lblock: logical block number to find an extent path 27 * @lblock: logical block number to find an extent path
28 * @path: pointer to an extent path pointer (for output) 28 * @path: pointer to an extent path pointer (for output)
29 * 29 *
30 * ext4_ext_find_extent wrapper. Return 0 on success, or a negative error value 30 * ext4_find_extent wrapper. Return 0 on success, or a negative error value
31 * on failure. 31 * on failure.
32 */ 32 */
33static inline int 33static inline int
34get_ext_path(struct inode *inode, ext4_lblk_t lblock, 34get_ext_path(struct inode *inode, ext4_lblk_t lblock,
35 struct ext4_ext_path **orig_path) 35 struct ext4_ext_path **ppath)
36{ 36{
37 int ret = 0;
38 struct ext4_ext_path *path; 37 struct ext4_ext_path *path;
39 38
40 path = ext4_ext_find_extent(inode, lblock, *orig_path, EXT4_EX_NOCACHE); 39 path = ext4_find_extent(inode, lblock, ppath, EXT4_EX_NOCACHE);
41 if (IS_ERR(path)) 40 if (IS_ERR(path))
42 ret = PTR_ERR(path); 41 return PTR_ERR(path);
43 else if (path[ext_depth(inode)].p_ext == NULL) 42 if (path[ext_depth(inode)].p_ext == NULL) {
44 ret = -ENODATA; 43 ext4_ext_drop_refs(path);
45 else 44 kfree(path);
46 *orig_path = path; 45 *ppath = NULL;
47 46 return -ENODATA;
48 return ret;
49}
50
51/**
52 * copy_extent_status - Copy the extent's initialization status
53 *
54 * @src: an extent for getting initialize status
55 * @dest: an extent to be set the status
56 */
57static void
58copy_extent_status(struct ext4_extent *src, struct ext4_extent *dest)
59{
60 if (ext4_ext_is_unwritten(src))
61 ext4_ext_mark_unwritten(dest);
62 else
63 dest->ee_len = cpu_to_le16(ext4_ext_get_actual_len(dest));
64}
65
66/**
67 * mext_next_extent - Search for the next extent and set it to "extent"
68 *
69 * @inode: inode which is searched
70 * @path: this will obtain data for the next extent
71 * @extent: pointer to the next extent we have just gotten
72 *
73 * Search the next extent in the array of ext4_ext_path structure (@path)
74 * and set it to ext4_extent structure (@extent). In addition, the member of
75 * @path (->p_ext) also points the next extent. Return 0 on success, 1 if
76 * ext4_ext_path structure refers to the last extent, or a negative error
77 * value on failure.
78 */
79int
80mext_next_extent(struct inode *inode, struct ext4_ext_path *path,
81 struct ext4_extent **extent)
82{
83 struct ext4_extent_header *eh;
84 int ppos, leaf_ppos = path->p_depth;
85
86 ppos = leaf_ppos;
87 if (EXT_LAST_EXTENT(path[ppos].p_hdr) > path[ppos].p_ext) {
88 /* leaf block */
89 *extent = ++path[ppos].p_ext;
90 path[ppos].p_block = ext4_ext_pblock(path[ppos].p_ext);
91 return 0;
92 }
93
94 while (--ppos >= 0) {
95 if (EXT_LAST_INDEX(path[ppos].p_hdr) >
96 path[ppos].p_idx) {
97 int cur_ppos = ppos;
98
99 /* index block */
100 path[ppos].p_idx++;
101 path[ppos].p_block = ext4_idx_pblock(path[ppos].p_idx);
102 if (path[ppos+1].p_bh)
103 brelse(path[ppos+1].p_bh);
104 path[ppos+1].p_bh =
105 sb_bread(inode->i_sb, path[ppos].p_block);
106 if (!path[ppos+1].p_bh)
107 return -EIO;
108 path[ppos+1].p_hdr =
109 ext_block_hdr(path[ppos+1].p_bh);
110
111 /* Halfway index block */
112 while (++cur_ppos < leaf_ppos) {
113 path[cur_ppos].p_idx =
114 EXT_FIRST_INDEX(path[cur_ppos].p_hdr);
115 path[cur_ppos].p_block =
116 ext4_idx_pblock(path[cur_ppos].p_idx);
117 if (path[cur_ppos+1].p_bh)
118 brelse(path[cur_ppos+1].p_bh);
119 path[cur_ppos+1].p_bh = sb_bread(inode->i_sb,
120 path[cur_ppos].p_block);
121 if (!path[cur_ppos+1].p_bh)
122 return -EIO;
123 path[cur_ppos+1].p_hdr =
124 ext_block_hdr(path[cur_ppos+1].p_bh);
125 }
126
127 path[leaf_ppos].p_ext = *extent = NULL;
128
129 eh = path[leaf_ppos].p_hdr;
130 if (le16_to_cpu(eh->eh_entries) == 0)
131 /* empty leaf is found */
132 return -ENODATA;
133
134 /* leaf block */
135 path[leaf_ppos].p_ext = *extent =
136 EXT_FIRST_EXTENT(path[leaf_ppos].p_hdr);
137 path[leaf_ppos].p_block =
138 ext4_ext_pblock(path[leaf_ppos].p_ext);
139 return 0;
140 }
141 } 47 }
142 /* We found the last extent */ 48 *ppath = path;
143 return 1; 49 return 0;
144} 50}
145 51
146/** 52/**
@@ -178,417 +84,6 @@ ext4_double_up_write_data_sem(struct inode *orig_inode,
178} 84}
179 85
180/** 86/**
181 * mext_insert_across_blocks - Insert extents across leaf block
182 *
183 * @handle: journal handle
184 * @orig_inode: original inode
185 * @o_start: first original extent to be changed
186 * @o_end: last original extent to be changed
187 * @start_ext: first new extent to be inserted
188 * @new_ext: middle of new extent to be inserted
189 * @end_ext: last new extent to be inserted
190 *
191 * Allocate a new leaf block and insert extents into it. Return 0 on success,
192 * or a negative error value on failure.
193 */
194static int
195mext_insert_across_blocks(handle_t *handle, struct inode *orig_inode,
196 struct ext4_extent *o_start, struct ext4_extent *o_end,
197 struct ext4_extent *start_ext, struct ext4_extent *new_ext,
198 struct ext4_extent *end_ext)
199{
200 struct ext4_ext_path *orig_path = NULL;
201 ext4_lblk_t eblock = 0;
202 int new_flag = 0;
203 int end_flag = 0;
204 int err = 0;
205
206 if (start_ext->ee_len && new_ext->ee_len && end_ext->ee_len) {
207 if (o_start == o_end) {
208
209 /* start_ext new_ext end_ext
210 * donor |---------|-----------|--------|
211 * orig |------------------------------|
212 */
213 end_flag = 1;
214 } else {
215
216 /* start_ext new_ext end_ext
217 * donor |---------|----------|---------|
218 * orig |---------------|--------------|
219 */
220 o_end->ee_block = end_ext->ee_block;
221 o_end->ee_len = end_ext->ee_len;
222 ext4_ext_store_pblock(o_end, ext4_ext_pblock(end_ext));
223 }
224
225 o_start->ee_len = start_ext->ee_len;
226 eblock = le32_to_cpu(start_ext->ee_block);
227 new_flag = 1;
228
229 } else if (start_ext->ee_len && new_ext->ee_len &&
230 !end_ext->ee_len && o_start == o_end) {
231
232 /* start_ext new_ext
233 * donor |--------------|---------------|
234 * orig |------------------------------|
235 */
236 o_start->ee_len = start_ext->ee_len;
237 eblock = le32_to_cpu(start_ext->ee_block);
238 new_flag = 1;
239
240 } else if (!start_ext->ee_len && new_ext->ee_len &&
241 end_ext->ee_len && o_start == o_end) {
242
243 /* new_ext end_ext
244 * donor |--------------|---------------|
245 * orig |------------------------------|
246 */
247 o_end->ee_block = end_ext->ee_block;
248 o_end->ee_len = end_ext->ee_len;
249 ext4_ext_store_pblock(o_end, ext4_ext_pblock(end_ext));
250
251 /*
252 * Set 0 to the extent block if new_ext was
253 * the first block.
254 */
255 if (new_ext->ee_block)
256 eblock = le32_to_cpu(new_ext->ee_block);
257
258 new_flag = 1;
259 } else {
260 ext4_debug("ext4 move extent: Unexpected insert case\n");
261 return -EIO;
262 }
263
264 if (new_flag) {
265 err = get_ext_path(orig_inode, eblock, &orig_path);
266 if (err)
267 goto out;
268
269 if (ext4_ext_insert_extent(handle, orig_inode,
270 orig_path, new_ext, 0))
271 goto out;
272 }
273
274 if (end_flag) {
275 err = get_ext_path(orig_inode,
276 le32_to_cpu(end_ext->ee_block) - 1, &orig_path);
277 if (err)
278 goto out;
279
280 if (ext4_ext_insert_extent(handle, orig_inode,
281 orig_path, end_ext, 0))
282 goto out;
283 }
284out:
285 if (orig_path) {
286 ext4_ext_drop_refs(orig_path);
287 kfree(orig_path);
288 }
289
290 return err;
291
292}
293
294/**
295 * mext_insert_inside_block - Insert new extent to the extent block
296 *
297 * @o_start: first original extent to be moved
298 * @o_end: last original extent to be moved
299 * @start_ext: first new extent to be inserted
300 * @new_ext: middle of new extent to be inserted
301 * @end_ext: last new extent to be inserted
302 * @eh: extent header of target leaf block
303 * @range_to_move: used to decide how to insert extent
304 *
305 * Insert extents into the leaf block. The extent (@o_start) is overwritten
306 * by inserted extents.
307 */
308static void
309mext_insert_inside_block(struct ext4_extent *o_start,
310 struct ext4_extent *o_end,
311 struct ext4_extent *start_ext,
312 struct ext4_extent *new_ext,
313 struct ext4_extent *end_ext,
314 struct ext4_extent_header *eh,
315 int range_to_move)
316{
317 int i = 0;
318 unsigned long len;
319
320 /* Move the existing extents */
321 if (range_to_move && o_end < EXT_LAST_EXTENT(eh)) {
322 len = (unsigned long)(EXT_LAST_EXTENT(eh) + 1) -
323 (unsigned long)(o_end + 1);
324 memmove(o_end + 1 + range_to_move, o_end + 1, len);
325 }
326
327 /* Insert start entry */
328 if (start_ext->ee_len)
329 o_start[i++].ee_len = start_ext->ee_len;
330
331 /* Insert new entry */
332 if (new_ext->ee_len) {
333 o_start[i] = *new_ext;
334 ext4_ext_store_pblock(&o_start[i++], ext4_ext_pblock(new_ext));
335 }
336
337 /* Insert end entry */
338 if (end_ext->ee_len)
339 o_start[i] = *end_ext;
340
341 /* Increment the total entries counter on the extent block */
342 le16_add_cpu(&eh->eh_entries, range_to_move);
343}
344
345/**
346 * mext_insert_extents - Insert new extent
347 *
348 * @handle: journal handle
349 * @orig_inode: original inode
350 * @orig_path: path indicates first extent to be changed
351 * @o_start: first original extent to be changed
352 * @o_end: last original extent to be changed
353 * @start_ext: first new extent to be inserted
354 * @new_ext: middle of new extent to be inserted
355 * @end_ext: last new extent to be inserted
356 *
357 * Call the function to insert extents. If we cannot add more extents into
358 * the leaf block, we call mext_insert_across_blocks() to create a
359 * new leaf block. Otherwise call mext_insert_inside_block(). Return 0
360 * on success, or a negative error value on failure.
361 */
362static int
363mext_insert_extents(handle_t *handle, struct inode *orig_inode,
364 struct ext4_ext_path *orig_path,
365 struct ext4_extent *o_start,
366 struct ext4_extent *o_end,
367 struct ext4_extent *start_ext,
368 struct ext4_extent *new_ext,
369 struct ext4_extent *end_ext)
370{
371 struct ext4_extent_header *eh;
372 unsigned long need_slots, slots_range;
373 int range_to_move, depth, ret;
374
375 /*
376 * The extents need to be inserted
377 * start_extent + new_extent + end_extent.
378 */
379 need_slots = (start_ext->ee_len ? 1 : 0) + (end_ext->ee_len ? 1 : 0) +
380 (new_ext->ee_len ? 1 : 0);
381
382 /* The number of slots between start and end */
383 slots_range = ((unsigned long)(o_end + 1) - (unsigned long)o_start + 1)
384 / sizeof(struct ext4_extent);
385
386 /* Range to move the end of extent */
387 range_to_move = need_slots - slots_range;
388 depth = orig_path->p_depth;
389 orig_path += depth;
390 eh = orig_path->p_hdr;
391
392 if (depth) {
393 /* Register to journal */
394 BUFFER_TRACE(orig_path->p_bh, "get_write_access");
395 ret = ext4_journal_get_write_access(handle, orig_path->p_bh);
396 if (ret)
397 return ret;
398 }
399
400 /* Expansion */
401 if (range_to_move > 0 &&
402 (range_to_move > le16_to_cpu(eh->eh_max)
403 - le16_to_cpu(eh->eh_entries))) {
404
405 ret = mext_insert_across_blocks(handle, orig_inode, o_start,
406 o_end, start_ext, new_ext, end_ext);
407 if (ret < 0)
408 return ret;
409 } else
410 mext_insert_inside_block(o_start, o_end, start_ext, new_ext,
411 end_ext, eh, range_to_move);
412
413 return ext4_ext_dirty(handle, orig_inode, orig_path);
414}
415
416/**
417 * mext_leaf_block - Move one leaf extent block into the inode.
418 *
419 * @handle: journal handle
420 * @orig_inode: original inode
421 * @orig_path: path indicates first extent to be changed
422 * @dext: donor extent
423 * @from: start offset on the target file
424 *
425 * In order to insert extents into the leaf block, we must divide the extent
426 * in the leaf block into three extents. The one is located to be inserted
427 * extents, and the others are located around it.
428 *
429 * Therefore, this function creates structures to save extents of the leaf
430 * block, and inserts extents by calling mext_insert_extents() with
431 * created extents. Return 0 on success, or a negative error value on failure.
432 */
433static int
434mext_leaf_block(handle_t *handle, struct inode *orig_inode,
435 struct ext4_ext_path *orig_path, struct ext4_extent *dext,
436 ext4_lblk_t *from)
437{
438 struct ext4_extent *oext, *o_start, *o_end, *prev_ext;
439 struct ext4_extent new_ext, start_ext, end_ext;
440 ext4_lblk_t new_ext_end;
441 int oext_alen, new_ext_alen, end_ext_alen;
442 int depth = ext_depth(orig_inode);
443 int ret;
444
445 start_ext.ee_block = end_ext.ee_block = 0;
446 o_start = o_end = oext = orig_path[depth].p_ext;
447 oext_alen = ext4_ext_get_actual_len(oext);
448 start_ext.ee_len = end_ext.ee_len = 0;
449
450 new_ext.ee_block = cpu_to_le32(*from);
451 ext4_ext_store_pblock(&new_ext, ext4_ext_pblock(dext));
452 new_ext.ee_len = dext->ee_len;
453 new_ext_alen = ext4_ext_get_actual_len(&new_ext);
454 new_ext_end = le32_to_cpu(new_ext.ee_block) + new_ext_alen - 1;
455
456 /*
457 * Case: original extent is first
458 * oext |--------|
459 * new_ext |--|
460 * start_ext |--|
461 */
462 if (le32_to_cpu(oext->ee_block) < le32_to_cpu(new_ext.ee_block) &&
463 le32_to_cpu(new_ext.ee_block) <
464 le32_to_cpu(oext->ee_block) + oext_alen) {
465 start_ext.ee_len = cpu_to_le16(le32_to_cpu(new_ext.ee_block) -
466 le32_to_cpu(oext->ee_block));
467 start_ext.ee_block = oext->ee_block;
468 copy_extent_status(oext, &start_ext);
469 } else if (oext > EXT_FIRST_EXTENT(orig_path[depth].p_hdr)) {
470 prev_ext = oext - 1;
471 /*
472 * We can merge new_ext into previous extent,
473 * if these are contiguous and same extent type.
474 */
475 if (ext4_can_extents_be_merged(orig_inode, prev_ext,
476 &new_ext)) {
477 o_start = prev_ext;
478 start_ext.ee_len = cpu_to_le16(
479 ext4_ext_get_actual_len(prev_ext) +
480 new_ext_alen);
481 start_ext.ee_block = oext->ee_block;
482 copy_extent_status(prev_ext, &start_ext);
483 new_ext.ee_len = 0;
484 }
485 }
486
487 /*
488 * Case: new_ext_end must be less than oext
489 * oext |-----------|
490 * new_ext |-------|
491 */
492 if (le32_to_cpu(oext->ee_block) + oext_alen - 1 < new_ext_end) {
493 EXT4_ERROR_INODE(orig_inode,
494 "new_ext_end(%u) should be less than or equal to "
495 "oext->ee_block(%u) + oext_alen(%d) - 1",
496 new_ext_end, le32_to_cpu(oext->ee_block),
497 oext_alen);
498 ret = -EIO;
499 goto out;
500 }
501
502 /*
503 * Case: new_ext is smaller than original extent
504 * oext |---------------|
505 * new_ext |-----------|
506 * end_ext |---|
507 */
508 if (le32_to_cpu(oext->ee_block) <= new_ext_end &&
509 new_ext_end < le32_to_cpu(oext->ee_block) + oext_alen - 1) {
510 end_ext.ee_len =
511 cpu_to_le16(le32_to_cpu(oext->ee_block) +
512 oext_alen - 1 - new_ext_end);
513 copy_extent_status(oext, &end_ext);
514 end_ext_alen = ext4_ext_get_actual_len(&end_ext);
515 ext4_ext_store_pblock(&end_ext,
516 (ext4_ext_pblock(o_end) + oext_alen - end_ext_alen));
517 end_ext.ee_block =
518 cpu_to_le32(le32_to_cpu(o_end->ee_block) +
519 oext_alen - end_ext_alen);
520 }
521
522 ret = mext_insert_extents(handle, orig_inode, orig_path, o_start,
523 o_end, &start_ext, &new_ext, &end_ext);
524out:
525 return ret;
526}
527
528/**
529 * mext_calc_swap_extents - Calculate extents for extent swapping.
530 *
531 * @tmp_dext: the extent that will belong to the original inode
532 * @tmp_oext: the extent that will belong to the donor inode
533 * @orig_off: block offset of original inode
534 * @donor_off: block offset of donor inode
535 * @max_count: the maximum length of extents
536 *
537 * Return 0 on success, or a negative error value on failure.
538 */
539static int
540mext_calc_swap_extents(struct ext4_extent *tmp_dext,
541 struct ext4_extent *tmp_oext,
542 ext4_lblk_t orig_off, ext4_lblk_t donor_off,
543 ext4_lblk_t max_count)
544{
545 ext4_lblk_t diff, orig_diff;
546 struct ext4_extent dext_old, oext_old;
547
548 BUG_ON(orig_off != donor_off);
549
550 /* original and donor extents have to cover the same block offset */
551 if (orig_off < le32_to_cpu(tmp_oext->ee_block) ||
552 le32_to_cpu(tmp_oext->ee_block) +
553 ext4_ext_get_actual_len(tmp_oext) - 1 < orig_off)
554 return -ENODATA;
555
556 if (orig_off < le32_to_cpu(tmp_dext->ee_block) ||
557 le32_to_cpu(tmp_dext->ee_block) +
558 ext4_ext_get_actual_len(tmp_dext) - 1 < orig_off)
559 return -ENODATA;
560
561 dext_old = *tmp_dext;
562 oext_old = *tmp_oext;
563
564 /* When tmp_dext is too large, pick up the target range. */
565 diff = donor_off - le32_to_cpu(tmp_dext->ee_block);
566
567 ext4_ext_store_pblock(tmp_dext, ext4_ext_pblock(tmp_dext) + diff);
568 le32_add_cpu(&tmp_dext->ee_block, diff);
569 le16_add_cpu(&tmp_dext->ee_len, -diff);
570
571 if (max_count < ext4_ext_get_actual_len(tmp_dext))
572 tmp_dext->ee_len = cpu_to_le16(max_count);
573
574 orig_diff = orig_off - le32_to_cpu(tmp_oext->ee_block);
575 ext4_ext_store_pblock(tmp_oext, ext4_ext_pblock(tmp_oext) + orig_diff);
576
577 /* Adjust extent length if donor extent is larger than orig */
578 if (ext4_ext_get_actual_len(tmp_dext) >
579 ext4_ext_get_actual_len(tmp_oext) - orig_diff)
580 tmp_dext->ee_len = cpu_to_le16(le16_to_cpu(tmp_oext->ee_len) -
581 orig_diff);
582
583 tmp_oext->ee_len = cpu_to_le16(ext4_ext_get_actual_len(tmp_dext));
584
585 copy_extent_status(&oext_old, tmp_dext);
586 copy_extent_status(&dext_old, tmp_oext);
587
588 return 0;
589}
590
591/**
592 * mext_check_coverage - Check that all extents in range has the same type 87 * mext_check_coverage - Check that all extents in range has the same type
593 * 88 *
594 * @inode: inode in question 89 * @inode: inode in question
@@ -619,171 +114,25 @@ mext_check_coverage(struct inode *inode, ext4_lblk_t from, ext4_lblk_t count,
619 } 114 }
620 ret = 1; 115 ret = 1;
621out: 116out:
622 if (path) { 117 ext4_ext_drop_refs(path);
623 ext4_ext_drop_refs(path); 118 kfree(path);
624 kfree(path);
625 }
626 return ret; 119 return ret;
627} 120}
628 121
629/** 122/**
630 * mext_replace_branches - Replace original extents with new extents
631 *
632 * @handle: journal handle
633 * @orig_inode: original inode
634 * @donor_inode: donor inode
635 * @from: block offset of orig_inode
636 * @count: block count to be replaced
637 * @err: pointer to save return value
638 *
639 * Replace original inode extents and donor inode extents page by page.
640 * We implement this replacement in the following three steps:
641 * 1. Save the block information of original and donor inodes into
642 * dummy extents.
643 * 2. Change the block information of original inode to point at the
644 * donor inode blocks.
645 * 3. Change the block information of donor inode to point at the saved
646 * original inode blocks in the dummy extents.
647 *
648 * Return replaced block count.
649 */
650static int
651mext_replace_branches(handle_t *handle, struct inode *orig_inode,
652 struct inode *donor_inode, ext4_lblk_t from,
653 ext4_lblk_t count, int *err)
654{
655 struct ext4_ext_path *orig_path = NULL;
656 struct ext4_ext_path *donor_path = NULL;
657 struct ext4_extent *oext, *dext;
658 struct ext4_extent tmp_dext, tmp_oext;
659 ext4_lblk_t orig_off = from, donor_off = from;
660 int depth;
661 int replaced_count = 0;
662 int dext_alen;
663
664 *err = ext4_es_remove_extent(orig_inode, from, count);
665 if (*err)
666 goto out;
667
668 *err = ext4_es_remove_extent(donor_inode, from, count);
669 if (*err)
670 goto out;
671
672 /* Get the original extent for the block "orig_off" */
673 *err = get_ext_path(orig_inode, orig_off, &orig_path);
674 if (*err)
675 goto out;
676
677 /* Get the donor extent for the head */
678 *err = get_ext_path(donor_inode, donor_off, &donor_path);
679 if (*err)
680 goto out;
681 depth = ext_depth(orig_inode);
682 oext = orig_path[depth].p_ext;
683 tmp_oext = *oext;
684
685 depth = ext_depth(donor_inode);
686 dext = donor_path[depth].p_ext;
687 if (unlikely(!dext))
688 goto missing_donor_extent;
689 tmp_dext = *dext;
690
691 *err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
692 donor_off, count);
693 if (*err)
694 goto out;
695
696 /* Loop for the donor extents */
697 while (1) {
698 /* The extent for donor must be found. */
699 if (unlikely(!dext)) {
700 missing_donor_extent:
701 EXT4_ERROR_INODE(donor_inode,
702 "The extent for donor must be found");
703 *err = -EIO;
704 goto out;
705 } else if (donor_off != le32_to_cpu(tmp_dext.ee_block)) {
706 EXT4_ERROR_INODE(donor_inode,
707 "Donor offset(%u) and the first block of donor "
708 "extent(%u) should be equal",
709 donor_off,
710 le32_to_cpu(tmp_dext.ee_block));
711 *err = -EIO;
712 goto out;
713 }
714
715 /* Set donor extent to orig extent */
716 *err = mext_leaf_block(handle, orig_inode,
717 orig_path, &tmp_dext, &orig_off);
718 if (*err)
719 goto out;
720
721 /* Set orig extent to donor extent */
722 *err = mext_leaf_block(handle, donor_inode,
723 donor_path, &tmp_oext, &donor_off);
724 if (*err)
725 goto out;
726
727 dext_alen = ext4_ext_get_actual_len(&tmp_dext);
728 replaced_count += dext_alen;
729 donor_off += dext_alen;
730 orig_off += dext_alen;
731
732 BUG_ON(replaced_count > count);
733 /* Already moved the expected blocks */
734 if (replaced_count >= count)
735 break;
736
737 if (orig_path)
738 ext4_ext_drop_refs(orig_path);
739 *err = get_ext_path(orig_inode, orig_off, &orig_path);
740 if (*err)
741 goto out;
742 depth = ext_depth(orig_inode);
743 oext = orig_path[depth].p_ext;
744 tmp_oext = *oext;
745
746 if (donor_path)
747 ext4_ext_drop_refs(donor_path);
748 *err = get_ext_path(donor_inode, donor_off, &donor_path);
749 if (*err)
750 goto out;
751 depth = ext_depth(donor_inode);
752 dext = donor_path[depth].p_ext;
753 tmp_dext = *dext;
754
755 *err = mext_calc_swap_extents(&tmp_dext, &tmp_oext, orig_off,
756 donor_off, count - replaced_count);
757 if (*err)
758 goto out;
759 }
760
761out:
762 if (orig_path) {
763 ext4_ext_drop_refs(orig_path);
764 kfree(orig_path);
765 }
766 if (donor_path) {
767 ext4_ext_drop_refs(donor_path);
768 kfree(donor_path);
769 }
770
771 return replaced_count;
772}
773
774/**
775 * mext_page_double_lock - Grab and lock pages on both @inode1 and @inode2 123 * mext_page_double_lock - Grab and lock pages on both @inode1 and @inode2
776 * 124 *
777 * @inode1: the inode structure 125 * @inode1: the inode structure
778 * @inode2: the inode structure 126 * @inode2: the inode structure
779 * @index: page index 127 * @index1: page index
128 * @index2: page index
780 * @page: result page vector 129 * @page: result page vector
781 * 130 *
782 * Grab two locked pages for inode's by inode order 131 * Grab two locked pages for inode's by inode order
783 */ 132 */
784static int 133static int
785mext_page_double_lock(struct inode *inode1, struct inode *inode2, 134mext_page_double_lock(struct inode *inode1, struct inode *inode2,
786 pgoff_t index, struct page *page[2]) 135 pgoff_t index1, pgoff_t index2, struct page *page[2])
787{ 136{
788 struct address_space *mapping[2]; 137 struct address_space *mapping[2];
789 unsigned fl = AOP_FLAG_NOFS; 138 unsigned fl = AOP_FLAG_NOFS;
@@ -793,15 +142,18 @@ mext_page_double_lock(struct inode *inode1, struct inode *inode2,
793 mapping[0] = inode1->i_mapping; 142 mapping[0] = inode1->i_mapping;
794 mapping[1] = inode2->i_mapping; 143 mapping[1] = inode2->i_mapping;
795 } else { 144 } else {
145 pgoff_t tmp = index1;
146 index1 = index2;
147 index2 = tmp;
796 mapping[0] = inode2->i_mapping; 148 mapping[0] = inode2->i_mapping;
797 mapping[1] = inode1->i_mapping; 149 mapping[1] = inode1->i_mapping;
798 } 150 }
799 151
800 page[0] = grab_cache_page_write_begin(mapping[0], index, fl); 152 page[0] = grab_cache_page_write_begin(mapping[0], index1, fl);
801 if (!page[0]) 153 if (!page[0])
802 return -ENOMEM; 154 return -ENOMEM;
803 155
804 page[1] = grab_cache_page_write_begin(mapping[1], index, fl); 156 page[1] = grab_cache_page_write_begin(mapping[1], index2, fl);
805 if (!page[1]) { 157 if (!page[1]) {
806 unlock_page(page[0]); 158 unlock_page(page[0]);
807 page_cache_release(page[0]); 159 page_cache_release(page[0]);
@@ -893,25 +245,27 @@ out:
893 * @o_filp: file structure of original file 245 * @o_filp: file structure of original file
894 * @donor_inode: donor inode 246 * @donor_inode: donor inode
895 * @orig_page_offset: page index on original file 247 * @orig_page_offset: page index on original file
248 * @donor_page_offset: page index on donor file
896 * @data_offset_in_page: block index where data swapping starts 249 * @data_offset_in_page: block index where data swapping starts
897 * @block_len_in_page: the number of blocks to be swapped 250 * @block_len_in_page: the number of blocks to be swapped
898 * @unwritten: orig extent is unwritten or not 251 * @unwritten: orig extent is unwritten or not
899 * @err: pointer to save return value 252 * @err: pointer to save return value
900 * 253 *
901 * Save the data in original inode blocks and replace original inode extents 254 * Save the data in original inode blocks and replace original inode extents
902 * with donor inode extents by calling mext_replace_branches(). 255 * with donor inode extents by calling ext4_swap_extents().
903 * Finally, write out the saved data in new original inode blocks. Return 256 * Finally, write out the saved data in new original inode blocks. Return
904 * replaced block count. 257 * replaced block count.
905 */ 258 */
906static int 259static int
907move_extent_per_page(struct file *o_filp, struct inode *donor_inode, 260move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
908 pgoff_t orig_page_offset, int data_offset_in_page, 261 pgoff_t orig_page_offset, pgoff_t donor_page_offset,
909 int block_len_in_page, int unwritten, int *err) 262 int data_offset_in_page,
263 int block_len_in_page, int unwritten, int *err)
910{ 264{
911 struct inode *orig_inode = file_inode(o_filp); 265 struct inode *orig_inode = file_inode(o_filp);
912 struct page *pagep[2] = {NULL, NULL}; 266 struct page *pagep[2] = {NULL, NULL};
913 handle_t *handle; 267 handle_t *handle;
914 ext4_lblk_t orig_blk_offset; 268 ext4_lblk_t orig_blk_offset, donor_blk_offset;
915 unsigned long blocksize = orig_inode->i_sb->s_blocksize; 269 unsigned long blocksize = orig_inode->i_sb->s_blocksize;
916 unsigned int w_flags = 0; 270 unsigned int w_flags = 0;
917 unsigned int tmp_data_size, data_size, replaced_size; 271 unsigned int tmp_data_size, data_size, replaced_size;
@@ -939,6 +293,9 @@ again:
939 orig_blk_offset = orig_page_offset * blocks_per_page + 293 orig_blk_offset = orig_page_offset * blocks_per_page +
940 data_offset_in_page; 294 data_offset_in_page;
941 295
296 donor_blk_offset = donor_page_offset * blocks_per_page +
297 data_offset_in_page;
298
942 /* Calculate data_size */ 299 /* Calculate data_size */
943 if ((orig_blk_offset + block_len_in_page - 1) == 300 if ((orig_blk_offset + block_len_in_page - 1) ==
944 ((orig_inode->i_size - 1) >> orig_inode->i_blkbits)) { 301 ((orig_inode->i_size - 1) >> orig_inode->i_blkbits)) {
@@ -959,7 +316,7 @@ again:
959 replaced_size = data_size; 316 replaced_size = data_size;
960 317
961 *err = mext_page_double_lock(orig_inode, donor_inode, orig_page_offset, 318 *err = mext_page_double_lock(orig_inode, donor_inode, orig_page_offset,
962 pagep); 319 donor_page_offset, pagep);
963 if (unlikely(*err < 0)) 320 if (unlikely(*err < 0))
964 goto stop_journal; 321 goto stop_journal;
965 /* 322 /*
@@ -978,7 +335,7 @@ again:
978 if (*err) 335 if (*err)
979 goto drop_data_sem; 336 goto drop_data_sem;
980 337
981 unwritten &= mext_check_coverage(donor_inode, orig_blk_offset, 338 unwritten &= mext_check_coverage(donor_inode, donor_blk_offset,
982 block_len_in_page, 1, err); 339 block_len_in_page, 1, err);
983 if (*err) 340 if (*err)
984 goto drop_data_sem; 341 goto drop_data_sem;
@@ -994,9 +351,10 @@ again:
994 *err = -EBUSY; 351 *err = -EBUSY;
995 goto drop_data_sem; 352 goto drop_data_sem;
996 } 353 }
997 replaced_count = mext_replace_branches(handle, orig_inode, 354 replaced_count = ext4_swap_extents(handle, orig_inode,
998 donor_inode, orig_blk_offset, 355 donor_inode, orig_blk_offset,
999 block_len_in_page, err); 356 donor_blk_offset,
357 block_len_in_page, 1, err);
1000 drop_data_sem: 358 drop_data_sem:
1001 ext4_double_up_write_data_sem(orig_inode, donor_inode); 359 ext4_double_up_write_data_sem(orig_inode, donor_inode);
1002 goto unlock_pages; 360 goto unlock_pages;
@@ -1014,9 +372,9 @@ data_copy:
1014 goto unlock_pages; 372 goto unlock_pages;
1015 } 373 }
1016 ext4_double_down_write_data_sem(orig_inode, donor_inode); 374 ext4_double_down_write_data_sem(orig_inode, donor_inode);
1017 replaced_count = mext_replace_branches(handle, orig_inode, donor_inode, 375 replaced_count = ext4_swap_extents(handle, orig_inode, donor_inode,
1018 orig_blk_offset, 376 orig_blk_offset, donor_blk_offset,
1019 block_len_in_page, err); 377 block_len_in_page, 1, err);
1020 ext4_double_up_write_data_sem(orig_inode, donor_inode); 378 ext4_double_up_write_data_sem(orig_inode, donor_inode);
1021 if (*err) { 379 if (*err) {
1022 if (replaced_count) { 380 if (replaced_count) {
@@ -1061,9 +419,9 @@ repair_branches:
1061 * Try to swap extents to it's original places 419 * Try to swap extents to it's original places
1062 */ 420 */
1063 ext4_double_down_write_data_sem(orig_inode, donor_inode); 421 ext4_double_down_write_data_sem(orig_inode, donor_inode);
1064 replaced_count = mext_replace_branches(handle, donor_inode, orig_inode, 422 replaced_count = ext4_swap_extents(handle, donor_inode, orig_inode,
1065 orig_blk_offset, 423 orig_blk_offset, donor_blk_offset,
1066 block_len_in_page, &err2); 424 block_len_in_page, 0, &err2);
1067 ext4_double_up_write_data_sem(orig_inode, donor_inode); 425 ext4_double_up_write_data_sem(orig_inode, donor_inode);
1068 if (replaced_count != block_len_in_page) { 426 if (replaced_count != block_len_in_page) {
1069 EXT4_ERROR_INODE_BLOCK(orig_inode, (sector_t)(orig_blk_offset), 427 EXT4_ERROR_INODE_BLOCK(orig_inode, (sector_t)(orig_blk_offset),
@@ -1093,10 +451,14 @@ mext_check_arguments(struct inode *orig_inode,
1093 struct inode *donor_inode, __u64 orig_start, 451 struct inode *donor_inode, __u64 orig_start,
1094 __u64 donor_start, __u64 *len) 452 __u64 donor_start, __u64 *len)
1095{ 453{
1096 ext4_lblk_t orig_blocks, donor_blocks; 454 __u64 orig_eof, donor_eof;
1097 unsigned int blkbits = orig_inode->i_blkbits; 455 unsigned int blkbits = orig_inode->i_blkbits;
1098 unsigned int blocksize = 1 << blkbits; 456 unsigned int blocksize = 1 << blkbits;
1099 457
458 orig_eof = (i_size_read(orig_inode) + blocksize - 1) >> blkbits;
459 donor_eof = (i_size_read(donor_inode) + blocksize - 1) >> blkbits;
460
461
1100 if (donor_inode->i_mode & (S_ISUID|S_ISGID)) { 462 if (donor_inode->i_mode & (S_ISUID|S_ISGID)) {
1101 ext4_debug("ext4 move extent: suid or sgid is set" 463 ext4_debug("ext4 move extent: suid or sgid is set"
1102 " to donor file [ino:orig %lu, donor %lu]\n", 464 " to donor file [ino:orig %lu, donor %lu]\n",
@@ -1112,7 +474,7 @@ mext_check_arguments(struct inode *orig_inode,
1112 ext4_debug("ext4 move extent: The argument files should " 474 ext4_debug("ext4 move extent: The argument files should "
1113 "not be swapfile [ino:orig %lu, donor %lu]\n", 475 "not be swapfile [ino:orig %lu, donor %lu]\n",
1114 orig_inode->i_ino, donor_inode->i_ino); 476 orig_inode->i_ino, donor_inode->i_ino);
1115 return -EINVAL; 477 return -EBUSY;
1116 } 478 }
1117 479
1118 /* Ext4 move extent supports only extent based file */ 480 /* Ext4 move extent supports only extent based file */
@@ -1132,67 +494,28 @@ mext_check_arguments(struct inode *orig_inode,
1132 } 494 }
1133 495
1134 /* Start offset should be same */ 496 /* Start offset should be same */
1135 if (orig_start != donor_start) { 497 if ((orig_start & ~(PAGE_MASK >> orig_inode->i_blkbits)) !=
498 (donor_start & ~(PAGE_MASK >> orig_inode->i_blkbits))) {
1136 ext4_debug("ext4 move extent: orig and donor's start " 499 ext4_debug("ext4 move extent: orig and donor's start "
1137 "offset are not same [ino:orig %lu, donor %lu]\n", 500 "offset are not alligned [ino:orig %lu, donor %lu]\n",
1138 orig_inode->i_ino, donor_inode->i_ino); 501 orig_inode->i_ino, donor_inode->i_ino);
1139 return -EINVAL; 502 return -EINVAL;
1140 } 503 }
1141 504
1142 if ((orig_start >= EXT_MAX_BLOCKS) || 505 if ((orig_start >= EXT_MAX_BLOCKS) ||
506 (donor_start >= EXT_MAX_BLOCKS) ||
1143 (*len > EXT_MAX_BLOCKS) || 507 (*len > EXT_MAX_BLOCKS) ||
508 (donor_start + *len >= EXT_MAX_BLOCKS) ||
1144 (orig_start + *len >= EXT_MAX_BLOCKS)) { 509 (orig_start + *len >= EXT_MAX_BLOCKS)) {
1145 ext4_debug("ext4 move extent: Can't handle over [%u] blocks " 510 ext4_debug("ext4 move extent: Can't handle over [%u] blocks "
1146 "[ino:orig %lu, donor %lu]\n", EXT_MAX_BLOCKS, 511 "[ino:orig %lu, donor %lu]\n", EXT_MAX_BLOCKS,
1147 orig_inode->i_ino, donor_inode->i_ino); 512 orig_inode->i_ino, donor_inode->i_ino);
1148 return -EINVAL; 513 return -EINVAL;
1149 } 514 }
1150 515 if (orig_eof < orig_start + *len - 1)
1151 if (orig_inode->i_size > donor_inode->i_size) { 516 *len = orig_eof - orig_start;
1152 donor_blocks = (donor_inode->i_size + blocksize - 1) >> blkbits; 517 if (donor_eof < donor_start + *len - 1)
1153 /* TODO: eliminate this artificial restriction */ 518 *len = donor_eof - donor_start;
1154 if (orig_start >= donor_blocks) {
1155 ext4_debug("ext4 move extent: orig start offset "
1156 "[%llu] should be less than donor file blocks "
1157 "[%u] [ino:orig %lu, donor %lu]\n",
1158 orig_start, donor_blocks,
1159 orig_inode->i_ino, donor_inode->i_ino);
1160 return -EINVAL;
1161 }
1162
1163 /* TODO: eliminate this artificial restriction */
1164 if (orig_start + *len > donor_blocks) {
1165 ext4_debug("ext4 move extent: End offset [%llu] should "
1166 "be less than donor file blocks [%u]."
1167 "So adjust length from %llu to %llu "
1168 "[ino:orig %lu, donor %lu]\n",
1169 orig_start + *len, donor_blocks,
1170 *len, donor_blocks - orig_start,
1171 orig_inode->i_ino, donor_inode->i_ino);
1172 *len = donor_blocks - orig_start;
1173 }
1174 } else {
1175 orig_blocks = (orig_inode->i_size + blocksize - 1) >> blkbits;
1176 if (orig_start >= orig_blocks) {
1177 ext4_debug("ext4 move extent: start offset [%llu] "
1178 "should be less than original file blocks "
1179 "[%u] [ino:orig %lu, donor %lu]\n",
1180 orig_start, orig_blocks,
1181 orig_inode->i_ino, donor_inode->i_ino);
1182 return -EINVAL;
1183 }
1184
1185 if (orig_start + *len > orig_blocks) {
1186 ext4_debug("ext4 move extent: Adjust length "
1187 "from %llu to %llu. Because it should be "
1188 "less than original file blocks "
1189 "[ino:orig %lu, donor %lu]\n",
1190 *len, orig_blocks - orig_start,
1191 orig_inode->i_ino, donor_inode->i_ino);
1192 *len = orig_blocks - orig_start;
1193 }
1194 }
1195
1196 if (!*len) { 519 if (!*len) {
1197 ext4_debug("ext4 move extent: len should not be 0 " 520 ext4_debug("ext4 move extent: len should not be 0 "
1198 "[ino:orig %lu, donor %lu]\n", orig_inode->i_ino, 521 "[ino:orig %lu, donor %lu]\n", orig_inode->i_ino,
@@ -1208,60 +531,26 @@ mext_check_arguments(struct inode *orig_inode,
1208 * 531 *
1209 * @o_filp: file structure of the original file 532 * @o_filp: file structure of the original file
1210 * @d_filp: file structure of the donor file 533 * @d_filp: file structure of the donor file
1211 * @orig_start: start offset in block for orig 534 * @orig_blk: start offset in block for orig
1212 * @donor_start: start offset in block for donor 535 * @donor_blk: start offset in block for donor
1213 * @len: the number of blocks to be moved 536 * @len: the number of blocks to be moved
1214 * @moved_len: moved block length 537 * @moved_len: moved block length
1215 * 538 *
1216 * This function returns 0 and moved block length is set in moved_len 539 * This function returns 0 and moved block length is set in moved_len
1217 * if succeed, otherwise returns error value. 540 * if succeed, otherwise returns error value.
1218 * 541 *
1219 * Note: ext4_move_extents() proceeds the following order.
1220 * 1:ext4_move_extents() calculates the last block number of moving extent
1221 * function by the start block number (orig_start) and the number of blocks
1222 * to be moved (len) specified as arguments.
1223 * If the {orig, donor}_start points a hole, the extent's start offset
1224 * pointed by ext_cur (current extent), holecheck_path, orig_path are set
1225 * after hole behind.
1226 * 2:Continue step 3 to step 5, until the holecheck_path points to last_extent
1227 * or the ext_cur exceeds the block_end which is last logical block number.
1228 * 3:To get the length of continues area, call mext_next_extent()
1229 * specified with the ext_cur (initial value is holecheck_path) re-cursive,
1230 * until find un-continuous extent, the start logical block number exceeds
1231 * the block_end or the extent points to the last extent.
1232 * 4:Exchange the original inode data with donor inode data
1233 * from orig_page_offset to seq_end_page.
1234 * The start indexes of data are specified as arguments.
1235 * That of the original inode is orig_page_offset,
1236 * and the donor inode is also orig_page_offset
1237 * (To easily handle blocksize != pagesize case, the offset for the
1238 * donor inode is block unit).
1239 * 5:Update holecheck_path and orig_path to points a next proceeding extent,
1240 * then returns to step 2.
1241 * 6:Release holecheck_path, orig_path and set the len to moved_len
1242 * which shows the number of moved blocks.
1243 * The moved_len is useful for the command to calculate the file offset
1244 * for starting next move extent ioctl.
1245 * 7:Return 0 on success, or a negative error value on failure.
1246 */ 542 */
1247int 543int
1248ext4_move_extents(struct file *o_filp, struct file *d_filp, 544ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk,
1249 __u64 orig_start, __u64 donor_start, __u64 len, 545 __u64 donor_blk, __u64 len, __u64 *moved_len)
1250 __u64 *moved_len)
1251{ 546{
1252 struct inode *orig_inode = file_inode(o_filp); 547 struct inode *orig_inode = file_inode(o_filp);
1253 struct inode *donor_inode = file_inode(d_filp); 548 struct inode *donor_inode = file_inode(d_filp);
1254 struct ext4_ext_path *orig_path = NULL, *holecheck_path = NULL; 549 struct ext4_ext_path *path = NULL;
1255 struct ext4_extent *ext_prev, *ext_cur, *ext_dummy;
1256 ext4_lblk_t block_start = orig_start;
1257 ext4_lblk_t block_end, seq_start, add_blocks, file_end, seq_blocks = 0;
1258 ext4_lblk_t rest_blocks;
1259 pgoff_t orig_page_offset = 0, seq_end_page;
1260 int ret, depth, last_extent = 0;
1261 int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits; 550 int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;
1262 int data_offset_in_page; 551 ext4_lblk_t o_end, o_start = orig_blk;
1263 int block_len_in_page; 552 ext4_lblk_t d_start = donor_blk;
1264 int unwritten; 553 int ret;
1265 554
1266 if (orig_inode->i_sb != donor_inode->i_sb) { 555 if (orig_inode->i_sb != donor_inode->i_sb) {
1267 ext4_debug("ext4 move extent: The argument files " 556 ext4_debug("ext4 move extent: The argument files "
@@ -1303,121 +592,58 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
1303 /* Protect extent tree against block allocations via delalloc */ 592 /* Protect extent tree against block allocations via delalloc */
1304 ext4_double_down_write_data_sem(orig_inode, donor_inode); 593 ext4_double_down_write_data_sem(orig_inode, donor_inode);
1305 /* Check the filesystem environment whether move_extent can be done */ 594 /* Check the filesystem environment whether move_extent can be done */
1306 ret = mext_check_arguments(orig_inode, donor_inode, orig_start, 595 ret = mext_check_arguments(orig_inode, donor_inode, orig_blk,
1307 donor_start, &len); 596 donor_blk, &len);
1308 if (ret) 597 if (ret)
1309 goto out; 598 goto out;
599 o_end = o_start + len;
1310 600
1311 file_end = (i_size_read(orig_inode) - 1) >> orig_inode->i_blkbits; 601 while (o_start < o_end) {
1312 block_end = block_start + len - 1; 602 struct ext4_extent *ex;
1313 if (file_end < block_end) 603 ext4_lblk_t cur_blk, next_blk;
1314 len -= block_end - file_end; 604 pgoff_t orig_page_index, donor_page_index;
605 int offset_in_page;
606 int unwritten, cur_len;
1315 607
1316 ret = get_ext_path(orig_inode, block_start, &orig_path); 608 ret = get_ext_path(orig_inode, o_start, &path);
1317 if (ret) 609 if (ret)
1318 goto out;
1319
1320 /* Get path structure to check the hole */
1321 ret = get_ext_path(orig_inode, block_start, &holecheck_path);
1322 if (ret)
1323 goto out;
1324
1325 depth = ext_depth(orig_inode);
1326 ext_cur = holecheck_path[depth].p_ext;
1327
1328 /*
1329 * Get proper starting location of block replacement if block_start was
1330 * within the hole.
1331 */
1332 if (le32_to_cpu(ext_cur->ee_block) +
1333 ext4_ext_get_actual_len(ext_cur) - 1 < block_start) {
1334 /*
1335 * The hole exists between extents or the tail of
1336 * original file.
1337 */
1338 last_extent = mext_next_extent(orig_inode,
1339 holecheck_path, &ext_cur);
1340 if (last_extent < 0) {
1341 ret = last_extent;
1342 goto out;
1343 }
1344 last_extent = mext_next_extent(orig_inode, orig_path,
1345 &ext_dummy);
1346 if (last_extent < 0) {
1347 ret = last_extent;
1348 goto out; 610 goto out;
1349 } 611 ex = path[path->p_depth].p_ext;
1350 seq_start = le32_to_cpu(ext_cur->ee_block); 612 next_blk = ext4_ext_next_allocated_block(path);
1351 } else if (le32_to_cpu(ext_cur->ee_block) > block_start) 613 cur_blk = le32_to_cpu(ex->ee_block);
1352 /* The hole exists at the beginning of original file. */ 614 cur_len = ext4_ext_get_actual_len(ex);
1353 seq_start = le32_to_cpu(ext_cur->ee_block); 615 /* Check hole before the start pos */
1354 else 616 if (cur_blk + cur_len - 1 < o_start) {
1355 seq_start = block_start; 617 if (next_blk == EXT_MAX_BLOCKS) {
1356 618 o_start = o_end;
1357 /* No blocks within the specified range. */ 619 ret = -ENODATA;
1358 if (le32_to_cpu(ext_cur->ee_block) > block_end) { 620 goto out;
1359 ext4_debug("ext4 move extent: The specified range of file " 621 }
1360 "may be the hole\n"); 622 d_start += next_blk - o_start;
1361 ret = -EINVAL; 623 o_start = next_blk;
1362 goto out;
1363 }
1364
1365 /* Adjust start blocks */
1366 add_blocks = min(le32_to_cpu(ext_cur->ee_block) +
1367 ext4_ext_get_actual_len(ext_cur), block_end + 1) -
1368 max(le32_to_cpu(ext_cur->ee_block), block_start);
1369
1370 while (!last_extent && le32_to_cpu(ext_cur->ee_block) <= block_end) {
1371 seq_blocks += add_blocks;
1372
1373 /* Adjust tail blocks */
1374 if (seq_start + seq_blocks - 1 > block_end)
1375 seq_blocks = block_end - seq_start + 1;
1376
1377 ext_prev = ext_cur;
1378 last_extent = mext_next_extent(orig_inode, holecheck_path,
1379 &ext_cur);
1380 if (last_extent < 0) {
1381 ret = last_extent;
1382 break;
1383 }
1384 add_blocks = ext4_ext_get_actual_len(ext_cur);
1385
1386 /*
1387 * Extend the length of contiguous block (seq_blocks)
1388 * if extents are contiguous.
1389 */
1390 if (ext4_can_extents_be_merged(orig_inode,
1391 ext_prev, ext_cur) &&
1392 block_end >= le32_to_cpu(ext_cur->ee_block) &&
1393 !last_extent)
1394 continue; 624 continue;
1395 625 /* Check hole after the start pos */
1396 /* Is original extent is unwritten */ 626 } else if (cur_blk > o_start) {
1397 unwritten = ext4_ext_is_unwritten(ext_prev); 627 /* Skip hole */
1398 628 d_start += cur_blk - o_start;
1399 data_offset_in_page = seq_start % blocks_per_page; 629 o_start = cur_blk;
1400 630 /* Extent inside requested range ?*/
1401 /* 631 if (cur_blk >= o_end)
1402 * Calculate data blocks count that should be swapped 632 goto out;
1403 * at the first page. 633 } else { /* in_range(o_start, o_blk, o_len) */
1404 */ 634 cur_len += cur_blk - o_start;
1405 if (data_offset_in_page + seq_blocks > blocks_per_page) {
1406 /* Swapped blocks are across pages */
1407 block_len_in_page =
1408 blocks_per_page - data_offset_in_page;
1409 } else {
1410 /* Swapped blocks are in a page */
1411 block_len_in_page = seq_blocks;
1412 } 635 }
1413 636 unwritten = ext4_ext_is_unwritten(ex);
1414 orig_page_offset = seq_start >> 637 if (o_end - o_start < cur_len)
1415 (PAGE_CACHE_SHIFT - orig_inode->i_blkbits); 638 cur_len = o_end - o_start;
1416 seq_end_page = (seq_start + seq_blocks - 1) >> 639
1417 (PAGE_CACHE_SHIFT - orig_inode->i_blkbits); 640 orig_page_index = o_start >> (PAGE_CACHE_SHIFT -
1418 seq_start = le32_to_cpu(ext_cur->ee_block); 641 orig_inode->i_blkbits);
1419 rest_blocks = seq_blocks; 642 donor_page_index = d_start >> (PAGE_CACHE_SHIFT -
1420 643 donor_inode->i_blkbits);
644 offset_in_page = o_start % blocks_per_page;
645 if (cur_len > blocks_per_page- offset_in_page)
646 cur_len = blocks_per_page - offset_in_page;
1421 /* 647 /*
1422 * Up semaphore to avoid following problems: 648 * Up semaphore to avoid following problems:
1423 * a. transaction deadlock among ext4_journal_start, 649 * a. transaction deadlock among ext4_journal_start,
@@ -1426,77 +652,29 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp,
1426 * in move_extent_per_page 652 * in move_extent_per_page
1427 */ 653 */
1428 ext4_double_up_write_data_sem(orig_inode, donor_inode); 654 ext4_double_up_write_data_sem(orig_inode, donor_inode);
1429 655 /* Swap original branches with new branches */
1430 while (orig_page_offset <= seq_end_page) { 656 move_extent_per_page(o_filp, donor_inode,
1431 657 orig_page_index, donor_page_index,
1432 /* Swap original branches with new branches */ 658 offset_in_page, cur_len,
1433 block_len_in_page = move_extent_per_page( 659 unwritten, &ret);
1434 o_filp, donor_inode,
1435 orig_page_offset,
1436 data_offset_in_page,
1437 block_len_in_page,
1438 unwritten, &ret);
1439
1440 /* Count how many blocks we have exchanged */
1441 *moved_len += block_len_in_page;
1442 if (ret < 0)
1443 break;
1444 if (*moved_len > len) {
1445 EXT4_ERROR_INODE(orig_inode,
1446 "We replaced blocks too much! "
1447 "sum of replaced: %llu requested: %llu",
1448 *moved_len, len);
1449 ret = -EIO;
1450 break;
1451 }
1452
1453 orig_page_offset++;
1454 data_offset_in_page = 0;
1455 rest_blocks -= block_len_in_page;
1456 if (rest_blocks > blocks_per_page)
1457 block_len_in_page = blocks_per_page;
1458 else
1459 block_len_in_page = rest_blocks;
1460 }
1461
1462 ext4_double_down_write_data_sem(orig_inode, donor_inode); 660 ext4_double_down_write_data_sem(orig_inode, donor_inode);
1463 if (ret < 0) 661 if (ret < 0)
1464 break; 662 break;
1465 663 o_start += cur_len;
1466 /* Decrease buffer counter */ 664 d_start += cur_len;
1467 if (holecheck_path)
1468 ext4_ext_drop_refs(holecheck_path);
1469 ret = get_ext_path(orig_inode, seq_start, &holecheck_path);
1470 if (ret)
1471 break;
1472 depth = holecheck_path->p_depth;
1473
1474 /* Decrease buffer counter */
1475 if (orig_path)
1476 ext4_ext_drop_refs(orig_path);
1477 ret = get_ext_path(orig_inode, seq_start, &orig_path);
1478 if (ret)
1479 break;
1480
1481 ext_cur = holecheck_path[depth].p_ext;
1482 add_blocks = ext4_ext_get_actual_len(ext_cur);
1483 seq_blocks = 0;
1484
1485 } 665 }
666 *moved_len = o_start - orig_blk;
667 if (*moved_len > len)
668 *moved_len = len;
669
1486out: 670out:
1487 if (*moved_len) { 671 if (*moved_len) {
1488 ext4_discard_preallocations(orig_inode); 672 ext4_discard_preallocations(orig_inode);
1489 ext4_discard_preallocations(donor_inode); 673 ext4_discard_preallocations(donor_inode);
1490 } 674 }
1491 675
1492 if (orig_path) { 676 ext4_ext_drop_refs(path);
1493 ext4_ext_drop_refs(orig_path); 677 kfree(path);
1494 kfree(orig_path);
1495 }
1496 if (holecheck_path) {
1497 ext4_ext_drop_refs(holecheck_path);
1498 kfree(holecheck_path);
1499 }
1500 ext4_double_up_write_data_sem(orig_inode, donor_inode); 678 ext4_double_up_write_data_sem(orig_inode, donor_inode);
1501 ext4_inode_resume_unlocked_dio(orig_inode); 679 ext4_inode_resume_unlocked_dio(orig_inode);
1502 ext4_inode_resume_unlocked_dio(donor_inode); 680 ext4_inode_resume_unlocked_dio(donor_inode);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 603e4ebbd0ac..426211882f72 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -53,7 +53,7 @@ static struct buffer_head *ext4_append(handle_t *handle,
53 ext4_lblk_t *block) 53 ext4_lblk_t *block)
54{ 54{
55 struct buffer_head *bh; 55 struct buffer_head *bh;
56 int err = 0; 56 int err;
57 57
58 if (unlikely(EXT4_SB(inode->i_sb)->s_max_dir_size_kb && 58 if (unlikely(EXT4_SB(inode->i_sb)->s_max_dir_size_kb &&
59 ((inode->i_size >> 10) >= 59 ((inode->i_size >> 10) >=
@@ -62,9 +62,9 @@ static struct buffer_head *ext4_append(handle_t *handle,
62 62
63 *block = inode->i_size >> inode->i_sb->s_blocksize_bits; 63 *block = inode->i_size >> inode->i_sb->s_blocksize_bits;
64 64
65 bh = ext4_bread(handle, inode, *block, 1, &err); 65 bh = ext4_bread(handle, inode, *block, 1);
66 if (!bh) 66 if (IS_ERR(bh))
67 return ERR_PTR(err); 67 return bh;
68 inode->i_size += inode->i_sb->s_blocksize; 68 inode->i_size += inode->i_sb->s_blocksize;
69 EXT4_I(inode)->i_disksize = inode->i_size; 69 EXT4_I(inode)->i_disksize = inode->i_size;
70 BUFFER_TRACE(bh, "get_write_access"); 70 BUFFER_TRACE(bh, "get_write_access");
@@ -94,20 +94,20 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode,
94{ 94{
95 struct buffer_head *bh; 95 struct buffer_head *bh;
96 struct ext4_dir_entry *dirent; 96 struct ext4_dir_entry *dirent;
97 int err = 0, is_dx_block = 0; 97 int is_dx_block = 0;
98 98
99 bh = ext4_bread(NULL, inode, block, 0, &err); 99 bh = ext4_bread(NULL, inode, block, 0);
100 if (!bh) { 100 if (IS_ERR(bh)) {
101 if (err == 0) {
102 ext4_error_inode(inode, __func__, line, block,
103 "Directory hole found");
104 return ERR_PTR(-EIO);
105 }
106 __ext4_warning(inode->i_sb, __func__, line, 101 __ext4_warning(inode->i_sb, __func__, line,
107 "error reading directory block " 102 "error %ld reading directory block "
108 "(ino %lu, block %lu)", inode->i_ino, 103 "(ino %lu, block %lu)", PTR_ERR(bh), inode->i_ino,
109 (unsigned long) block); 104 (unsigned long) block);
110 return ERR_PTR(err); 105
106 return bh;
107 }
108 if (!bh) {
109 ext4_error_inode(inode, __func__, line, block, "Directory hole found");
110 return ERR_PTR(-EIO);
111 } 111 }
112 dirent = (struct ext4_dir_entry *) bh->b_data; 112 dirent = (struct ext4_dir_entry *) bh->b_data;
113 /* Determine whether or not we have an index block */ 113 /* Determine whether or not we have an index block */
@@ -124,8 +124,7 @@ static struct buffer_head *__ext4_read_dirblock(struct inode *inode,
124 "directory leaf block found instead of index block"); 124 "directory leaf block found instead of index block");
125 return ERR_PTR(-EIO); 125 return ERR_PTR(-EIO);
126 } 126 }
127 if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, 127 if (!ext4_has_metadata_csum(inode->i_sb) ||
128 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) ||
129 buffer_verified(bh)) 128 buffer_verified(bh))
130 return bh; 129 return bh;
131 130
@@ -253,8 +252,7 @@ static unsigned dx_node_limit(struct inode *dir);
253static struct dx_frame *dx_probe(const struct qstr *d_name, 252static struct dx_frame *dx_probe(const struct qstr *d_name,
254 struct inode *dir, 253 struct inode *dir,
255 struct dx_hash_info *hinfo, 254 struct dx_hash_info *hinfo,
256 struct dx_frame *frame, 255 struct dx_frame *frame);
257 int *err);
258static void dx_release(struct dx_frame *frames); 256static void dx_release(struct dx_frame *frames);
259static int dx_make_map(struct ext4_dir_entry_2 *de, unsigned blocksize, 257static int dx_make_map(struct ext4_dir_entry_2 *de, unsigned blocksize,
260 struct dx_hash_info *hinfo, struct dx_map_entry map[]); 258 struct dx_hash_info *hinfo, struct dx_map_entry map[]);
@@ -270,8 +268,7 @@ static int ext4_htree_next_block(struct inode *dir, __u32 hash,
270 __u32 *start_hash); 268 __u32 *start_hash);
271static struct buffer_head * ext4_dx_find_entry(struct inode *dir, 269static struct buffer_head * ext4_dx_find_entry(struct inode *dir,
272 const struct qstr *d_name, 270 const struct qstr *d_name,
273 struct ext4_dir_entry_2 **res_dir, 271 struct ext4_dir_entry_2 **res_dir);
274 int *err);
275static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry, 272static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
276 struct inode *inode); 273 struct inode *inode);
277 274
@@ -340,8 +337,7 @@ int ext4_dirent_csum_verify(struct inode *inode, struct ext4_dir_entry *dirent)
340{ 337{
341 struct ext4_dir_entry_tail *t; 338 struct ext4_dir_entry_tail *t;
342 339
343 if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, 340 if (!ext4_has_metadata_csum(inode->i_sb))
344 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
345 return 1; 341 return 1;
346 342
347 t = get_dirent_tail(inode, dirent); 343 t = get_dirent_tail(inode, dirent);
@@ -362,8 +358,7 @@ static void ext4_dirent_csum_set(struct inode *inode,
362{ 358{
363 struct ext4_dir_entry_tail *t; 359 struct ext4_dir_entry_tail *t;
364 360
365 if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, 361 if (!ext4_has_metadata_csum(inode->i_sb))
366 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
367 return; 362 return;
368 363
369 t = get_dirent_tail(inode, dirent); 364 t = get_dirent_tail(inode, dirent);
@@ -438,8 +433,7 @@ static int ext4_dx_csum_verify(struct inode *inode,
438 struct dx_tail *t; 433 struct dx_tail *t;
439 int count_offset, limit, count; 434 int count_offset, limit, count;
440 435
441 if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, 436 if (!ext4_has_metadata_csum(inode->i_sb))
442 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
443 return 1; 437 return 1;
444 438
445 c = get_dx_countlimit(inode, dirent, &count_offset); 439 c = get_dx_countlimit(inode, dirent, &count_offset);
@@ -468,8 +462,7 @@ static void ext4_dx_csum_set(struct inode *inode, struct ext4_dir_entry *dirent)
468 struct dx_tail *t; 462 struct dx_tail *t;
469 int count_offset, limit, count; 463 int count_offset, limit, count;
470 464
471 if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, 465 if (!ext4_has_metadata_csum(inode->i_sb))
472 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
473 return; 466 return;
474 467
475 c = get_dx_countlimit(inode, dirent, &count_offset); 468 c = get_dx_countlimit(inode, dirent, &count_offset);
@@ -557,8 +550,7 @@ static inline unsigned dx_root_limit(struct inode *dir, unsigned infosize)
557 unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) - 550 unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(1) -
558 EXT4_DIR_REC_LEN(2) - infosize; 551 EXT4_DIR_REC_LEN(2) - infosize;
559 552
560 if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb, 553 if (ext4_has_metadata_csum(dir->i_sb))
561 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
562 entry_space -= sizeof(struct dx_tail); 554 entry_space -= sizeof(struct dx_tail);
563 return entry_space / sizeof(struct dx_entry); 555 return entry_space / sizeof(struct dx_entry);
564} 556}
@@ -567,8 +559,7 @@ static inline unsigned dx_node_limit(struct inode *dir)
567{ 559{
568 unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0); 560 unsigned entry_space = dir->i_sb->s_blocksize - EXT4_DIR_REC_LEN(0);
569 561
570 if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb, 562 if (ext4_has_metadata_csum(dir->i_sb))
571 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
572 entry_space -= sizeof(struct dx_tail); 563 entry_space -= sizeof(struct dx_tail);
573 return entry_space / sizeof(struct dx_entry); 564 return entry_space / sizeof(struct dx_entry);
574} 565}
@@ -641,7 +632,9 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
641 u32 range = i < count - 1? (dx_get_hash(entries + 1) - hash): ~hash; 632 u32 range = i < count - 1? (dx_get_hash(entries + 1) - hash): ~hash;
642 struct stats stats; 633 struct stats stats;
643 printk("%s%3u:%03u hash %8x/%8x ",levels?"":" ", i, block, hash, range); 634 printk("%s%3u:%03u hash %8x/%8x ",levels?"":" ", i, block, hash, range);
644 if (!(bh = ext4_bread (NULL,dir, block, 0,&err))) continue; 635 bh = ext4_bread(NULL,dir, block, 0);
636 if (!bh || IS_ERR(bh))
637 continue;
645 stats = levels? 638 stats = levels?
646 dx_show_entries(hinfo, dir, ((struct dx_node *) bh->b_data)->entries, levels - 1): 639 dx_show_entries(hinfo, dir, ((struct dx_node *) bh->b_data)->entries, levels - 1):
647 dx_show_leaf(hinfo, (struct ext4_dir_entry_2 *) bh->b_data, blocksize, 0); 640 dx_show_leaf(hinfo, (struct ext4_dir_entry_2 *) bh->b_data, blocksize, 0);
@@ -669,29 +662,25 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
669 */ 662 */
670static struct dx_frame * 663static struct dx_frame *
671dx_probe(const struct qstr *d_name, struct inode *dir, 664dx_probe(const struct qstr *d_name, struct inode *dir,
672 struct dx_hash_info *hinfo, struct dx_frame *frame_in, int *err) 665 struct dx_hash_info *hinfo, struct dx_frame *frame_in)
673{ 666{
674 unsigned count, indirect; 667 unsigned count, indirect;
675 struct dx_entry *at, *entries, *p, *q, *m; 668 struct dx_entry *at, *entries, *p, *q, *m;
676 struct dx_root *root; 669 struct dx_root *root;
677 struct buffer_head *bh;
678 struct dx_frame *frame = frame_in; 670 struct dx_frame *frame = frame_in;
671 struct dx_frame *ret_err = ERR_PTR(ERR_BAD_DX_DIR);
679 u32 hash; 672 u32 hash;
680 673
681 frame->bh = NULL; 674 frame->bh = ext4_read_dirblock(dir, 0, INDEX);
682 bh = ext4_read_dirblock(dir, 0, INDEX); 675 if (IS_ERR(frame->bh))
683 if (IS_ERR(bh)) { 676 return (struct dx_frame *) frame->bh;
684 *err = PTR_ERR(bh); 677
685 goto fail; 678 root = (struct dx_root *) frame->bh->b_data;
686 }
687 root = (struct dx_root *) bh->b_data;
688 if (root->info.hash_version != DX_HASH_TEA && 679 if (root->info.hash_version != DX_HASH_TEA &&
689 root->info.hash_version != DX_HASH_HALF_MD4 && 680 root->info.hash_version != DX_HASH_HALF_MD4 &&
690 root->info.hash_version != DX_HASH_LEGACY) { 681 root->info.hash_version != DX_HASH_LEGACY) {
691 ext4_warning(dir->i_sb, "Unrecognised inode hash code %d", 682 ext4_warning(dir->i_sb, "Unrecognised inode hash code %d",
692 root->info.hash_version); 683 root->info.hash_version);
693 brelse(bh);
694 *err = ERR_BAD_DX_DIR;
695 goto fail; 684 goto fail;
696 } 685 }
697 hinfo->hash_version = root->info.hash_version; 686 hinfo->hash_version = root->info.hash_version;
@@ -705,16 +694,12 @@ dx_probe(const struct qstr *d_name, struct inode *dir,
705 if (root->info.unused_flags & 1) { 694 if (root->info.unused_flags & 1) {
706 ext4_warning(dir->i_sb, "Unimplemented inode hash flags: %#06x", 695 ext4_warning(dir->i_sb, "Unimplemented inode hash flags: %#06x",
707 root->info.unused_flags); 696 root->info.unused_flags);
708 brelse(bh);
709 *err = ERR_BAD_DX_DIR;
710 goto fail; 697 goto fail;
711 } 698 }
712 699
713 if ((indirect = root->info.indirect_levels) > 1) { 700 if ((indirect = root->info.indirect_levels) > 1) {
714 ext4_warning(dir->i_sb, "Unimplemented inode hash depth: %#06x", 701 ext4_warning(dir->i_sb, "Unimplemented inode hash depth: %#06x",
715 root->info.indirect_levels); 702 root->info.indirect_levels);
716 brelse(bh);
717 *err = ERR_BAD_DX_DIR;
718 goto fail; 703 goto fail;
719 } 704 }
720 705
@@ -724,27 +709,21 @@ dx_probe(const struct qstr *d_name, struct inode *dir,
724 if (dx_get_limit(entries) != dx_root_limit(dir, 709 if (dx_get_limit(entries) != dx_root_limit(dir,
725 root->info.info_length)) { 710 root->info.info_length)) {
726 ext4_warning(dir->i_sb, "dx entry: limit != root limit"); 711 ext4_warning(dir->i_sb, "dx entry: limit != root limit");
727 brelse(bh);
728 *err = ERR_BAD_DX_DIR;
729 goto fail; 712 goto fail;
730 } 713 }
731 714
732 dxtrace(printk("Look up %x", hash)); 715 dxtrace(printk("Look up %x", hash));
733 while (1) 716 while (1) {
734 {
735 count = dx_get_count(entries); 717 count = dx_get_count(entries);
736 if (!count || count > dx_get_limit(entries)) { 718 if (!count || count > dx_get_limit(entries)) {
737 ext4_warning(dir->i_sb, 719 ext4_warning(dir->i_sb,
738 "dx entry: no count or count > limit"); 720 "dx entry: no count or count > limit");
739 brelse(bh); 721 goto fail;
740 *err = ERR_BAD_DX_DIR;
741 goto fail2;
742 } 722 }
743 723
744 p = entries + 1; 724 p = entries + 1;
745 q = entries + count - 1; 725 q = entries + count - 1;
746 while (p <= q) 726 while (p <= q) {
747 {
748 m = p + (q - p)/2; 727 m = p + (q - p)/2;
749 dxtrace(printk(".")); 728 dxtrace(printk("."));
750 if (dx_get_hash(m) > hash) 729 if (dx_get_hash(m) > hash)
@@ -753,8 +732,7 @@ dx_probe(const struct qstr *d_name, struct inode *dir,
753 p = m + 1; 732 p = m + 1;
754 } 733 }
755 734
756 if (0) // linear search cross check 735 if (0) { // linear search cross check
757 {
758 unsigned n = count - 1; 736 unsigned n = count - 1;
759 at = entries; 737 at = entries;
760 while (n--) 738 while (n--)
@@ -771,38 +749,35 @@ dx_probe(const struct qstr *d_name, struct inode *dir,
771 749
772 at = p - 1; 750 at = p - 1;
773 dxtrace(printk(" %x->%u\n", at == entries? 0: dx_get_hash(at), dx_get_block(at))); 751 dxtrace(printk(" %x->%u\n", at == entries? 0: dx_get_hash(at), dx_get_block(at)));
774 frame->bh = bh;
775 frame->entries = entries; 752 frame->entries = entries;
776 frame->at = at; 753 frame->at = at;
777 if (!indirect--) return frame; 754 if (!indirect--)
778 bh = ext4_read_dirblock(dir, dx_get_block(at), INDEX); 755 return frame;
779 if (IS_ERR(bh)) { 756 frame++;
780 *err = PTR_ERR(bh); 757 frame->bh = ext4_read_dirblock(dir, dx_get_block(at), INDEX);
781 goto fail2; 758 if (IS_ERR(frame->bh)) {
759 ret_err = (struct dx_frame *) frame->bh;
760 frame->bh = NULL;
761 goto fail;
782 } 762 }
783 entries = ((struct dx_node *) bh->b_data)->entries; 763 entries = ((struct dx_node *) frame->bh->b_data)->entries;
784 764
785 if (dx_get_limit(entries) != dx_node_limit (dir)) { 765 if (dx_get_limit(entries) != dx_node_limit (dir)) {
786 ext4_warning(dir->i_sb, 766 ext4_warning(dir->i_sb,
787 "dx entry: limit != node limit"); 767 "dx entry: limit != node limit");
788 brelse(bh); 768 goto fail;
789 *err = ERR_BAD_DX_DIR;
790 goto fail2;
791 } 769 }
792 frame++;
793 frame->bh = NULL;
794 } 770 }
795fail2: 771fail:
796 while (frame >= frame_in) { 772 while (frame >= frame_in) {
797 brelse(frame->bh); 773 brelse(frame->bh);
798 frame--; 774 frame--;
799 } 775 }
800fail: 776 if (ret_err == ERR_PTR(ERR_BAD_DX_DIR))
801 if (*err == ERR_BAD_DX_DIR)
802 ext4_warning(dir->i_sb, 777 ext4_warning(dir->i_sb,
803 "Corrupt dir inode %lu, running e2fsck is " 778 "Corrupt dir inode %lu, running e2fsck is "
804 "recommended.", dir->i_ino); 779 "recommended.", dir->i_ino);
805 return NULL; 780 return ret_err;
806} 781}
807 782
808static void dx_release (struct dx_frame *frames) 783static void dx_release (struct dx_frame *frames)
@@ -988,9 +963,9 @@ int ext4_htree_fill_tree(struct file *dir_file, __u32 start_hash,
988 } 963 }
989 hinfo.hash = start_hash; 964 hinfo.hash = start_hash;
990 hinfo.minor_hash = 0; 965 hinfo.minor_hash = 0;
991 frame = dx_probe(NULL, dir, &hinfo, frames, &err); 966 frame = dx_probe(NULL, dir, &hinfo, frames);
992 if (!frame) 967 if (IS_ERR(frame))
993 return err; 968 return PTR_ERR(frame);
994 969
995 /* Add '.' and '..' from the htree header */ 970 /* Add '.' and '..' from the htree header */
996 if (!start_hash && !start_minor_hash) { 971 if (!start_hash && !start_minor_hash) {
@@ -1227,8 +1202,7 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
1227 buffer */ 1202 buffer */
1228 int num = 0; 1203 int num = 0;
1229 ext4_lblk_t nblocks; 1204 ext4_lblk_t nblocks;
1230 int i, err = 0; 1205 int i, namelen;
1231 int namelen;
1232 1206
1233 *res_dir = NULL; 1207 *res_dir = NULL;
1234 sb = dir->i_sb; 1208 sb = dir->i_sb;
@@ -1258,17 +1232,13 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
1258 goto restart; 1232 goto restart;
1259 } 1233 }
1260 if (is_dx(dir)) { 1234 if (is_dx(dir)) {
1261 bh = ext4_dx_find_entry(dir, d_name, res_dir, &err); 1235 bh = ext4_dx_find_entry(dir, d_name, res_dir);
1262 /* 1236 /*
1263 * On success, or if the error was file not found, 1237 * On success, or if the error was file not found,
1264 * return. Otherwise, fall back to doing a search the 1238 * return. Otherwise, fall back to doing a search the
1265 * old fashioned way. 1239 * old fashioned way.
1266 */ 1240 */
1267 if (err == -ENOENT) 1241 if (!IS_ERR(bh) || PTR_ERR(bh) != ERR_BAD_DX_DIR)
1268 return NULL;
1269 if (err && err != ERR_BAD_DX_DIR)
1270 return ERR_PTR(err);
1271 if (bh)
1272 return bh; 1242 return bh;
1273 dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, " 1243 dxtrace(printk(KERN_DEBUG "ext4_find_entry: dx failed, "
1274 "falling back\n")); 1244 "falling back\n"));
@@ -1298,10 +1268,10 @@ restart:
1298 break; 1268 break;
1299 } 1269 }
1300 num++; 1270 num++;
1301 bh = ext4_getblk(NULL, dir, b++, 0, &err); 1271 bh = ext4_getblk(NULL, dir, b++, 0);
1302 if (unlikely(err)) { 1272 if (unlikely(IS_ERR(bh))) {
1303 if (ra_max == 0) 1273 if (ra_max == 0)
1304 return ERR_PTR(err); 1274 return bh;
1305 break; 1275 break;
1306 } 1276 }
1307 bh_use[ra_max] = bh; 1277 bh_use[ra_max] = bh;
@@ -1366,7 +1336,7 @@ cleanup_and_exit:
1366} 1336}
1367 1337
1368static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct qstr *d_name, 1338static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct qstr *d_name,
1369 struct ext4_dir_entry_2 **res_dir, int *err) 1339 struct ext4_dir_entry_2 **res_dir)
1370{ 1340{
1371 struct super_block * sb = dir->i_sb; 1341 struct super_block * sb = dir->i_sb;
1372 struct dx_hash_info hinfo; 1342 struct dx_hash_info hinfo;
@@ -1375,25 +1345,23 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct q
1375 ext4_lblk_t block; 1345 ext4_lblk_t block;
1376 int retval; 1346 int retval;
1377 1347
1378 if (!(frame = dx_probe(d_name, dir, &hinfo, frames, err))) 1348 frame = dx_probe(d_name, dir, &hinfo, frames);
1379 return NULL; 1349 if (IS_ERR(frame))
1350 return (struct buffer_head *) frame;
1380 do { 1351 do {
1381 block = dx_get_block(frame->at); 1352 block = dx_get_block(frame->at);
1382 bh = ext4_read_dirblock(dir, block, DIRENT); 1353 bh = ext4_read_dirblock(dir, block, DIRENT);
1383 if (IS_ERR(bh)) { 1354 if (IS_ERR(bh))
1384 *err = PTR_ERR(bh);
1385 goto errout; 1355 goto errout;
1386 } 1356
1387 retval = search_dirblock(bh, dir, d_name, 1357 retval = search_dirblock(bh, dir, d_name,
1388 block << EXT4_BLOCK_SIZE_BITS(sb), 1358 block << EXT4_BLOCK_SIZE_BITS(sb),
1389 res_dir); 1359 res_dir);
1390 if (retval == 1) { /* Success! */ 1360 if (retval == 1)
1391 dx_release(frames); 1361 goto success;
1392 return bh;
1393 }
1394 brelse(bh); 1362 brelse(bh);
1395 if (retval == -1) { 1363 if (retval == -1) {
1396 *err = ERR_BAD_DX_DIR; 1364 bh = ERR_PTR(ERR_BAD_DX_DIR);
1397 goto errout; 1365 goto errout;
1398 } 1366 }
1399 1367
@@ -1402,18 +1370,19 @@ static struct buffer_head * ext4_dx_find_entry(struct inode *dir, const struct q
1402 frames, NULL); 1370 frames, NULL);
1403 if (retval < 0) { 1371 if (retval < 0) {
1404 ext4_warning(sb, 1372 ext4_warning(sb,
1405 "error reading index page in directory #%lu", 1373 "error %d reading index page in directory #%lu",
1406 dir->i_ino); 1374 retval, dir->i_ino);
1407 *err = retval; 1375 bh = ERR_PTR(retval);
1408 goto errout; 1376 goto errout;
1409 } 1377 }
1410 } while (retval == 1); 1378 } while (retval == 1);
1411 1379
1412 *err = -ENOENT; 1380 bh = NULL;
1413errout: 1381errout:
1414 dxtrace(printk(KERN_DEBUG "%s not found\n", d_name->name)); 1382 dxtrace(printk(KERN_DEBUG "%s not found\n", d_name->name));
1415 dx_release (frames); 1383success:
1416 return NULL; 1384 dx_release(frames);
1385 return bh;
1417} 1386}
1418 1387
1419static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) 1388static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
@@ -1441,7 +1410,7 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi
1441 dentry); 1410 dentry);
1442 return ERR_PTR(-EIO); 1411 return ERR_PTR(-EIO);
1443 } 1412 }
1444 inode = ext4_iget(dir->i_sb, ino); 1413 inode = ext4_iget_normal(dir->i_sb, ino);
1445 if (inode == ERR_PTR(-ESTALE)) { 1414 if (inode == ERR_PTR(-ESTALE)) {
1446 EXT4_ERROR_INODE(dir, 1415 EXT4_ERROR_INODE(dir,
1447 "deleted inode referenced: %u", 1416 "deleted inode referenced: %u",
@@ -1474,7 +1443,7 @@ struct dentry *ext4_get_parent(struct dentry *child)
1474 return ERR_PTR(-EIO); 1443 return ERR_PTR(-EIO);
1475 } 1444 }
1476 1445
1477 return d_obtain_alias(ext4_iget(child->d_inode->i_sb, ino)); 1446 return d_obtain_alias(ext4_iget_normal(child->d_inode->i_sb, ino));
1478} 1447}
1479 1448
1480/* 1449/*
@@ -1533,7 +1502,7 @@ static struct ext4_dir_entry_2* dx_pack_dirents(char *base, unsigned blocksize)
1533 */ 1502 */
1534static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir, 1503static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
1535 struct buffer_head **bh,struct dx_frame *frame, 1504 struct buffer_head **bh,struct dx_frame *frame,
1536 struct dx_hash_info *hinfo, int *error) 1505 struct dx_hash_info *hinfo)
1537{ 1506{
1538 unsigned blocksize = dir->i_sb->s_blocksize; 1507 unsigned blocksize = dir->i_sb->s_blocksize;
1539 unsigned count, continued; 1508 unsigned count, continued;
@@ -1548,16 +1517,14 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
1548 int csum_size = 0; 1517 int csum_size = 0;
1549 int err = 0, i; 1518 int err = 0, i;
1550 1519
1551 if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb, 1520 if (ext4_has_metadata_csum(dir->i_sb))
1552 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
1553 csum_size = sizeof(struct ext4_dir_entry_tail); 1521 csum_size = sizeof(struct ext4_dir_entry_tail);
1554 1522
1555 bh2 = ext4_append(handle, dir, &newblock); 1523 bh2 = ext4_append(handle, dir, &newblock);
1556 if (IS_ERR(bh2)) { 1524 if (IS_ERR(bh2)) {
1557 brelse(*bh); 1525 brelse(*bh);
1558 *bh = NULL; 1526 *bh = NULL;
1559 *error = PTR_ERR(bh2); 1527 return (struct ext4_dir_entry_2 *) bh2;
1560 return NULL;
1561 } 1528 }
1562 1529
1563 BUFFER_TRACE(*bh, "get_write_access"); 1530 BUFFER_TRACE(*bh, "get_write_access");
@@ -1617,8 +1584,7 @@ static struct ext4_dir_entry_2 *do_split(handle_t *handle, struct inode *dir,
1617 dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data2, blocksize, 1)); 1584 dxtrace(dx_show_leaf (hinfo, (struct ext4_dir_entry_2 *) data2, blocksize, 1));
1618 1585
1619 /* Which block gets the new entry? */ 1586 /* Which block gets the new entry? */
1620 if (hinfo->hash >= hash2) 1587 if (hinfo->hash >= hash2) {
1621 {
1622 swap(*bh, bh2); 1588 swap(*bh, bh2);
1623 de = de2; 1589 de = de2;
1624 } 1590 }
@@ -1638,8 +1604,7 @@ journal_error:
1638 brelse(bh2); 1604 brelse(bh2);
1639 *bh = NULL; 1605 *bh = NULL;
1640 ext4_std_error(dir->i_sb, err); 1606 ext4_std_error(dir->i_sb, err);
1641 *error = err; 1607 return ERR_PTR(err);
1642 return NULL;
1643} 1608}
1644 1609
1645int ext4_find_dest_de(struct inode *dir, struct inode *inode, 1610int ext4_find_dest_de(struct inode *dir, struct inode *inode,
@@ -1718,8 +1683,7 @@ static int add_dirent_to_buf(handle_t *handle, struct dentry *dentry,
1718 int csum_size = 0; 1683 int csum_size = 0;
1719 int err; 1684 int err;
1720 1685
1721 if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, 1686 if (ext4_has_metadata_csum(inode->i_sb))
1722 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
1723 csum_size = sizeof(struct ext4_dir_entry_tail); 1687 csum_size = sizeof(struct ext4_dir_entry_tail);
1724 1688
1725 if (!de) { 1689 if (!de) {
@@ -1786,8 +1750,7 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
1786 struct fake_dirent *fde; 1750 struct fake_dirent *fde;
1787 int csum_size = 0; 1751 int csum_size = 0;
1788 1752
1789 if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, 1753 if (ext4_has_metadata_csum(inode->i_sb))
1790 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
1791 csum_size = sizeof(struct ext4_dir_entry_tail); 1754 csum_size = sizeof(struct ext4_dir_entry_tail);
1792 1755
1793 blocksize = dir->i_sb->s_blocksize; 1756 blocksize = dir->i_sb->s_blocksize;
@@ -1853,31 +1816,39 @@ static int make_indexed_dir(handle_t *handle, struct dentry *dentry,
1853 hinfo.hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned; 1816 hinfo.hash_version += EXT4_SB(dir->i_sb)->s_hash_unsigned;
1854 hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed; 1817 hinfo.seed = EXT4_SB(dir->i_sb)->s_hash_seed;
1855 ext4fs_dirhash(name, namelen, &hinfo); 1818 ext4fs_dirhash(name, namelen, &hinfo);
1819 memset(frames, 0, sizeof(frames));
1856 frame = frames; 1820 frame = frames;
1857 frame->entries = entries; 1821 frame->entries = entries;
1858 frame->at = entries; 1822 frame->at = entries;
1859 frame->bh = bh; 1823 frame->bh = bh;
1860 bh = bh2; 1824 bh = bh2;
1861 1825
1862 ext4_handle_dirty_dx_node(handle, dir, frame->bh); 1826 retval = ext4_handle_dirty_dx_node(handle, dir, frame->bh);
1863 ext4_handle_dirty_dirent_node(handle, dir, bh); 1827 if (retval)
1828 goto out_frames;
1829 retval = ext4_handle_dirty_dirent_node(handle, dir, bh);
1830 if (retval)
1831 goto out_frames;
1864 1832
1865 de = do_split(handle,dir, &bh, frame, &hinfo, &retval); 1833 de = do_split(handle,dir, &bh, frame, &hinfo);
1866 if (!de) { 1834 if (IS_ERR(de)) {
1867 /* 1835 retval = PTR_ERR(de);
1868 * Even if the block split failed, we have to properly write 1836 goto out_frames;
1869 * out all the changes we did so far. Otherwise we can end up
1870 * with corrupted filesystem.
1871 */
1872 ext4_mark_inode_dirty(handle, dir);
1873 dx_release(frames);
1874 return retval;
1875 } 1837 }
1876 dx_release(frames); 1838 dx_release(frames);
1877 1839
1878 retval = add_dirent_to_buf(handle, dentry, inode, de, bh); 1840 retval = add_dirent_to_buf(handle, dentry, inode, de, bh);
1879 brelse(bh); 1841 brelse(bh);
1880 return retval; 1842 return retval;
1843out_frames:
1844 /*
1845 * Even if the block split failed, we have to properly write
1846 * out all the changes we did so far. Otherwise we can end up
1847 * with corrupted filesystem.
1848 */
1849 ext4_mark_inode_dirty(handle, dir);
1850 dx_release(frames);
1851 return retval;
1881} 1852}
1882 1853
1883/* 1854/*
@@ -1904,8 +1875,7 @@ static int ext4_add_entry(handle_t *handle, struct dentry *dentry,
1904 ext4_lblk_t block, blocks; 1875 ext4_lblk_t block, blocks;
1905 int csum_size = 0; 1876 int csum_size = 0;
1906 1877
1907 if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, 1878 if (ext4_has_metadata_csum(inode->i_sb))
1908 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
1909 csum_size = sizeof(struct ext4_dir_entry_tail); 1879 csum_size = sizeof(struct ext4_dir_entry_tail);
1910 1880
1911 sb = dir->i_sb; 1881 sb = dir->i_sb;
@@ -1982,9 +1952,9 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
1982 struct ext4_dir_entry_2 *de; 1952 struct ext4_dir_entry_2 *de;
1983 int err; 1953 int err;
1984 1954
1985 frame = dx_probe(&dentry->d_name, dir, &hinfo, frames, &err); 1955 frame = dx_probe(&dentry->d_name, dir, &hinfo, frames);
1986 if (!frame) 1956 if (IS_ERR(frame))
1987 return err; 1957 return PTR_ERR(frame);
1988 entries = frame->entries; 1958 entries = frame->entries;
1989 at = frame->at; 1959 at = frame->at;
1990 bh = ext4_read_dirblock(dir, dx_get_block(frame->at), DIRENT); 1960 bh = ext4_read_dirblock(dir, dx_get_block(frame->at), DIRENT);
@@ -2095,9 +2065,11 @@ static int ext4_dx_add_entry(handle_t *handle, struct dentry *dentry,
2095 goto cleanup; 2065 goto cleanup;
2096 } 2066 }
2097 } 2067 }
2098 de = do_split(handle, dir, &bh, frame, &hinfo, &err); 2068 de = do_split(handle, dir, &bh, frame, &hinfo);
2099 if (!de) 2069 if (IS_ERR(de)) {
2070 err = PTR_ERR(de);
2100 goto cleanup; 2071 goto cleanup;
2072 }
2101 err = add_dirent_to_buf(handle, dentry, inode, de, bh); 2073 err = add_dirent_to_buf(handle, dentry, inode, de, bh);
2102 goto cleanup; 2074 goto cleanup;
2103 2075
@@ -2167,8 +2139,7 @@ static int ext4_delete_entry(handle_t *handle,
2167 return err; 2139 return err;
2168 } 2140 }
2169 2141
2170 if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb, 2142 if (ext4_has_metadata_csum(dir->i_sb))
2171 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
2172 csum_size = sizeof(struct ext4_dir_entry_tail); 2143 csum_size = sizeof(struct ext4_dir_entry_tail);
2173 2144
2174 BUFFER_TRACE(bh, "get_write_access"); 2145 BUFFER_TRACE(bh, "get_write_access");
@@ -2387,8 +2358,7 @@ static int ext4_init_new_dir(handle_t *handle, struct inode *dir,
2387 int csum_size = 0; 2358 int csum_size = 0;
2388 int err; 2359 int err;
2389 2360
2390 if (EXT4_HAS_RO_COMPAT_FEATURE(dir->i_sb, 2361 if (ext4_has_metadata_csum(dir->i_sb))
2391 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
2392 csum_size = sizeof(struct ext4_dir_entry_tail); 2362 csum_size = sizeof(struct ext4_dir_entry_tail);
2393 2363
2394 if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) { 2364 if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) {
@@ -2403,10 +2373,6 @@ static int ext4_init_new_dir(handle_t *handle, struct inode *dir,
2403 dir_block = ext4_append(handle, inode, &block); 2373 dir_block = ext4_append(handle, inode, &block);
2404 if (IS_ERR(dir_block)) 2374 if (IS_ERR(dir_block))
2405 return PTR_ERR(dir_block); 2375 return PTR_ERR(dir_block);
2406 BUFFER_TRACE(dir_block, "get_write_access");
2407 err = ext4_journal_get_write_access(handle, dir_block);
2408 if (err)
2409 goto out;
2410 de = (struct ext4_dir_entry_2 *)dir_block->b_data; 2376 de = (struct ext4_dir_entry_2 *)dir_block->b_data;
2411 ext4_init_dot_dotdot(inode, de, blocksize, csum_size, dir->i_ino, 0); 2377 ext4_init_dot_dotdot(inode, de, blocksize, csum_size, dir->i_ino, 0);
2412 set_nlink(inode, 2); 2378 set_nlink(inode, 2);
@@ -2573,7 +2539,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
2573 int err = 0, rc; 2539 int err = 0, rc;
2574 bool dirty = false; 2540 bool dirty = false;
2575 2541
2576 if (!sbi->s_journal) 2542 if (!sbi->s_journal || is_bad_inode(inode))
2577 return 0; 2543 return 0;
2578 2544
2579 WARN_ON_ONCE(!(inode->i_state & (I_NEW | I_FREEING)) && 2545 WARN_ON_ONCE(!(inode->i_state & (I_NEW | I_FREEING)) &&
@@ -3190,6 +3156,39 @@ static void ext4_update_dir_count(handle_t *handle, struct ext4_renament *ent)
3190 } 3156 }
3191} 3157}
3192 3158
3159static struct inode *ext4_whiteout_for_rename(struct ext4_renament *ent,
3160 int credits, handle_t **h)
3161{
3162 struct inode *wh;
3163 handle_t *handle;
3164 int retries = 0;
3165
3166 /*
3167 * for inode block, sb block, group summaries,
3168 * and inode bitmap
3169 */
3170 credits += (EXT4_MAXQUOTAS_TRANS_BLOCKS(ent->dir->i_sb) +
3171 EXT4_XATTR_TRANS_BLOCKS + 4);
3172retry:
3173 wh = ext4_new_inode_start_handle(ent->dir, S_IFCHR | WHITEOUT_MODE,
3174 &ent->dentry->d_name, 0, NULL,
3175 EXT4_HT_DIR, credits);
3176
3177 handle = ext4_journal_current_handle();
3178 if (IS_ERR(wh)) {
3179 if (handle)
3180 ext4_journal_stop(handle);
3181 if (PTR_ERR(wh) == -ENOSPC &&
3182 ext4_should_retry_alloc(ent->dir->i_sb, &retries))
3183 goto retry;
3184 } else {
3185 *h = handle;
3186 init_special_inode(wh, wh->i_mode, WHITEOUT_DEV);
3187 wh->i_op = &ext4_special_inode_operations;
3188 }
3189 return wh;
3190}
3191
3193/* 3192/*
3194 * Anybody can rename anything with this: the permission checks are left to the 3193 * Anybody can rename anything with this: the permission checks are left to the
3195 * higher-level routines. 3194 * higher-level routines.
@@ -3199,7 +3198,8 @@ static void ext4_update_dir_count(handle_t *handle, struct ext4_renament *ent)
3199 * This comes from rename(const char *oldpath, const char *newpath) 3198 * This comes from rename(const char *oldpath, const char *newpath)
3200 */ 3199 */
3201static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry, 3200static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
3202 struct inode *new_dir, struct dentry *new_dentry) 3201 struct inode *new_dir, struct dentry *new_dentry,
3202 unsigned int flags)
3203{ 3203{
3204 handle_t *handle = NULL; 3204 handle_t *handle = NULL;
3205 struct ext4_renament old = { 3205 struct ext4_renament old = {
@@ -3214,6 +3214,9 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
3214 }; 3214 };
3215 int force_reread; 3215 int force_reread;
3216 int retval; 3216 int retval;
3217 struct inode *whiteout = NULL;
3218 int credits;
3219 u8 old_file_type;
3217 3220
3218 dquot_initialize(old.dir); 3221 dquot_initialize(old.dir);
3219 dquot_initialize(new.dir); 3222 dquot_initialize(new.dir);
@@ -3252,11 +3255,17 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
3252 if (new.inode && !test_opt(new.dir->i_sb, NO_AUTO_DA_ALLOC)) 3255 if (new.inode && !test_opt(new.dir->i_sb, NO_AUTO_DA_ALLOC))
3253 ext4_alloc_da_blocks(old.inode); 3256 ext4_alloc_da_blocks(old.inode);
3254 3257
3255 handle = ext4_journal_start(old.dir, EXT4_HT_DIR, 3258 credits = (2 * EXT4_DATA_TRANS_BLOCKS(old.dir->i_sb) +
3256 (2 * EXT4_DATA_TRANS_BLOCKS(old.dir->i_sb) + 3259 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2);
3257 EXT4_INDEX_EXTRA_TRANS_BLOCKS + 2)); 3260 if (!(flags & RENAME_WHITEOUT)) {
3258 if (IS_ERR(handle)) 3261 handle = ext4_journal_start(old.dir, EXT4_HT_DIR, credits);
3259 return PTR_ERR(handle); 3262 if (IS_ERR(handle))
3263 return PTR_ERR(handle);
3264 } else {
3265 whiteout = ext4_whiteout_for_rename(&old, credits, &handle);
3266 if (IS_ERR(whiteout))
3267 return PTR_ERR(whiteout);
3268 }
3260 3269
3261 if (IS_DIRSYNC(old.dir) || IS_DIRSYNC(new.dir)) 3270 if (IS_DIRSYNC(old.dir) || IS_DIRSYNC(new.dir))
3262 ext4_handle_sync(handle); 3271 ext4_handle_sync(handle);
@@ -3284,13 +3293,26 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
3284 */ 3293 */
3285 force_reread = (new.dir->i_ino == old.dir->i_ino && 3294 force_reread = (new.dir->i_ino == old.dir->i_ino &&
3286 ext4_test_inode_flag(new.dir, EXT4_INODE_INLINE_DATA)); 3295 ext4_test_inode_flag(new.dir, EXT4_INODE_INLINE_DATA));
3296
3297 old_file_type = old.de->file_type;
3298 if (whiteout) {
3299 /*
3300 * Do this before adding a new entry, so the old entry is sure
3301 * to be still pointing to the valid old entry.
3302 */
3303 retval = ext4_setent(handle, &old, whiteout->i_ino,
3304 EXT4_FT_CHRDEV);
3305 if (retval)
3306 goto end_rename;
3307 ext4_mark_inode_dirty(handle, whiteout);
3308 }
3287 if (!new.bh) { 3309 if (!new.bh) {
3288 retval = ext4_add_entry(handle, new.dentry, old.inode); 3310 retval = ext4_add_entry(handle, new.dentry, old.inode);
3289 if (retval) 3311 if (retval)
3290 goto end_rename; 3312 goto end_rename;
3291 } else { 3313 } else {
3292 retval = ext4_setent(handle, &new, 3314 retval = ext4_setent(handle, &new,
3293 old.inode->i_ino, old.de->file_type); 3315 old.inode->i_ino, old_file_type);
3294 if (retval) 3316 if (retval)
3295 goto end_rename; 3317 goto end_rename;
3296 } 3318 }
@@ -3305,10 +3327,12 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
3305 old.inode->i_ctime = ext4_current_time(old.inode); 3327 old.inode->i_ctime = ext4_current_time(old.inode);
3306 ext4_mark_inode_dirty(handle, old.inode); 3328 ext4_mark_inode_dirty(handle, old.inode);
3307 3329
3308 /* 3330 if (!whiteout) {
3309 * ok, that's it 3331 /*
3310 */ 3332 * ok, that's it
3311 ext4_rename_delete(handle, &old, force_reread); 3333 */
3334 ext4_rename_delete(handle, &old, force_reread);
3335 }
3312 3336
3313 if (new.inode) { 3337 if (new.inode) {
3314 ext4_dec_count(handle, new.inode); 3338 ext4_dec_count(handle, new.inode);
@@ -3344,6 +3368,12 @@ end_rename:
3344 brelse(old.dir_bh); 3368 brelse(old.dir_bh);
3345 brelse(old.bh); 3369 brelse(old.bh);
3346 brelse(new.bh); 3370 brelse(new.bh);
3371 if (whiteout) {
3372 if (retval)
3373 drop_nlink(whiteout);
3374 unlock_new_inode(whiteout);
3375 iput(whiteout);
3376 }
3347 if (handle) 3377 if (handle)
3348 ext4_journal_stop(handle); 3378 ext4_journal_stop(handle);
3349 return retval; 3379 return retval;
@@ -3476,18 +3506,15 @@ static int ext4_rename2(struct inode *old_dir, struct dentry *old_dentry,
3476 struct inode *new_dir, struct dentry *new_dentry, 3506 struct inode *new_dir, struct dentry *new_dentry,
3477 unsigned int flags) 3507 unsigned int flags)
3478{ 3508{
3479 if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE)) 3509 if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
3480 return -EINVAL; 3510 return -EINVAL;
3481 3511
3482 if (flags & RENAME_EXCHANGE) { 3512 if (flags & RENAME_EXCHANGE) {
3483 return ext4_cross_rename(old_dir, old_dentry, 3513 return ext4_cross_rename(old_dir, old_dentry,
3484 new_dir, new_dentry); 3514 new_dir, new_dentry);
3485 } 3515 }
3486 /* 3516
3487 * Existence checking was done by the VFS, otherwise "RENAME_NOREPLACE" 3517 return ext4_rename(old_dir, old_dentry, new_dir, new_dentry, flags);
3488 * is equivalent to regular rename.
3489 */
3490 return ext4_rename(old_dir, old_dentry, new_dir, new_dentry);
3491} 3518}
3492 3519
3493/* 3520/*
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index 1e43b905ff98..ca4588388fc3 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -1081,7 +1081,7 @@ static void update_backups(struct super_block *sb, int blk_off, char *data,
1081 break; 1081 break;
1082 1082
1083 if (meta_bg == 0) 1083 if (meta_bg == 0)
1084 backup_block = group * bpg + blk_off; 1084 backup_block = ((ext4_fsblk_t)group) * bpg + blk_off;
1085 else 1085 else
1086 backup_block = (ext4_group_first_block_no(sb, group) + 1086 backup_block = (ext4_group_first_block_no(sb, group) +
1087 ext4_bg_has_super(sb, group)); 1087 ext4_bg_has_super(sb, group));
@@ -1212,8 +1212,7 @@ static int ext4_set_bitmap_checksums(struct super_block *sb,
1212{ 1212{
1213 struct buffer_head *bh; 1213 struct buffer_head *bh;
1214 1214
1215 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, 1215 if (!ext4_has_metadata_csum(sb))
1216 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
1217 return 0; 1216 return 0;
1218 1217
1219 bh = ext4_get_bitmap(sb, group_data->inode_bitmap); 1218 bh = ext4_get_bitmap(sb, group_data->inode_bitmap);
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 05c159218bc2..2c9e6864abd9 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -70,7 +70,6 @@ static void ext4_mark_recovery_complete(struct super_block *sb,
70static void ext4_clear_journal_err(struct super_block *sb, 70static void ext4_clear_journal_err(struct super_block *sb,
71 struct ext4_super_block *es); 71 struct ext4_super_block *es);
72static int ext4_sync_fs(struct super_block *sb, int wait); 72static int ext4_sync_fs(struct super_block *sb, int wait);
73static int ext4_sync_fs_nojournal(struct super_block *sb, int wait);
74static int ext4_remount(struct super_block *sb, int *flags, char *data); 73static int ext4_remount(struct super_block *sb, int *flags, char *data);
75static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf); 74static int ext4_statfs(struct dentry *dentry, struct kstatfs *buf);
76static int ext4_unfreeze(struct super_block *sb); 75static int ext4_unfreeze(struct super_block *sb);
@@ -141,8 +140,7 @@ static __le32 ext4_superblock_csum(struct super_block *sb,
141static int ext4_superblock_csum_verify(struct super_block *sb, 140static int ext4_superblock_csum_verify(struct super_block *sb,
142 struct ext4_super_block *es) 141 struct ext4_super_block *es)
143{ 142{
144 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, 143 if (!ext4_has_metadata_csum(sb))
145 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
146 return 1; 144 return 1;
147 145
148 return es->s_checksum == ext4_superblock_csum(sb, es); 146 return es->s_checksum == ext4_superblock_csum(sb, es);
@@ -152,8 +150,7 @@ void ext4_superblock_csum_set(struct super_block *sb)
152{ 150{
153 struct ext4_super_block *es = EXT4_SB(sb)->s_es; 151 struct ext4_super_block *es = EXT4_SB(sb)->s_es;
154 152
155 if (!EXT4_HAS_RO_COMPAT_FEATURE(sb, 153 if (!ext4_has_metadata_csum(sb))
156 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
157 return; 154 return;
158 155
159 es->s_checksum = ext4_superblock_csum(sb, es); 156 es->s_checksum = ext4_superblock_csum(sb, es);
@@ -820,10 +817,9 @@ static void ext4_put_super(struct super_block *sb)
820 percpu_counter_destroy(&sbi->s_freeinodes_counter); 817 percpu_counter_destroy(&sbi->s_freeinodes_counter);
821 percpu_counter_destroy(&sbi->s_dirs_counter); 818 percpu_counter_destroy(&sbi->s_dirs_counter);
822 percpu_counter_destroy(&sbi->s_dirtyclusters_counter); 819 percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
823 percpu_counter_destroy(&sbi->s_extent_cache_cnt);
824 brelse(sbi->s_sbh); 820 brelse(sbi->s_sbh);
825#ifdef CONFIG_QUOTA 821#ifdef CONFIG_QUOTA
826 for (i = 0; i < MAXQUOTAS; i++) 822 for (i = 0; i < EXT4_MAXQUOTAS; i++)
827 kfree(sbi->s_qf_names[i]); 823 kfree(sbi->s_qf_names[i]);
828#endif 824#endif
829 825
@@ -885,6 +881,7 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
885 ext4_es_init_tree(&ei->i_es_tree); 881 ext4_es_init_tree(&ei->i_es_tree);
886 rwlock_init(&ei->i_es_lock); 882 rwlock_init(&ei->i_es_lock);
887 INIT_LIST_HEAD(&ei->i_es_lru); 883 INIT_LIST_HEAD(&ei->i_es_lru);
884 ei->i_es_all_nr = 0;
888 ei->i_es_lru_nr = 0; 885 ei->i_es_lru_nr = 0;
889 ei->i_touch_when = 0; 886 ei->i_touch_when = 0;
890 ei->i_reserved_data_blocks = 0; 887 ei->i_reserved_data_blocks = 0;
@@ -1002,7 +999,7 @@ static struct inode *ext4_nfs_get_inode(struct super_block *sb,
1002 * Currently we don't know the generation for parent directory, so 999 * Currently we don't know the generation for parent directory, so
1003 * a generation of 0 means "accept any" 1000 * a generation of 0 means "accept any"
1004 */ 1001 */
1005 inode = ext4_iget(sb, ino); 1002 inode = ext4_iget_normal(sb, ino);
1006 if (IS_ERR(inode)) 1003 if (IS_ERR(inode))
1007 return ERR_CAST(inode); 1004 return ERR_CAST(inode);
1008 if (generation && inode->i_generation != generation) { 1005 if (generation && inode->i_generation != generation) {
@@ -1124,25 +1121,6 @@ static const struct super_operations ext4_sops = {
1124 .bdev_try_to_free_page = bdev_try_to_free_page, 1121 .bdev_try_to_free_page = bdev_try_to_free_page,
1125}; 1122};
1126 1123
1127static const struct super_operations ext4_nojournal_sops = {
1128 .alloc_inode = ext4_alloc_inode,
1129 .destroy_inode = ext4_destroy_inode,
1130 .write_inode = ext4_write_inode,
1131 .dirty_inode = ext4_dirty_inode,
1132 .drop_inode = ext4_drop_inode,
1133 .evict_inode = ext4_evict_inode,
1134 .sync_fs = ext4_sync_fs_nojournal,
1135 .put_super = ext4_put_super,
1136 .statfs = ext4_statfs,
1137 .remount_fs = ext4_remount,
1138 .show_options = ext4_show_options,
1139#ifdef CONFIG_QUOTA
1140 .quota_read = ext4_quota_read,
1141 .quota_write = ext4_quota_write,
1142#endif
1143 .bdev_try_to_free_page = bdev_try_to_free_page,
1144};
1145
1146static const struct export_operations ext4_export_ops = { 1124static const struct export_operations ext4_export_ops = {
1147 .fh_to_dentry = ext4_fh_to_dentry, 1125 .fh_to_dentry = ext4_fh_to_dentry,
1148 .fh_to_parent = ext4_fh_to_parent, 1126 .fh_to_parent = ext4_fh_to_parent,
@@ -1712,13 +1690,6 @@ static int parse_options(char *options, struct super_block *sb,
1712 "not specified"); 1690 "not specified");
1713 return 0; 1691 return 0;
1714 } 1692 }
1715 } else {
1716 if (sbi->s_jquota_fmt) {
1717 ext4_msg(sb, KERN_ERR, "journaled quota format "
1718 "specified with no journaling "
1719 "enabled");
1720 return 0;
1721 }
1722 } 1693 }
1723#endif 1694#endif
1724 if (test_opt(sb, DIOREAD_NOLOCK)) { 1695 if (test_opt(sb, DIOREAD_NOLOCK)) {
@@ -2016,8 +1987,7 @@ static __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group,
2016 __u16 crc = 0; 1987 __u16 crc = 0;
2017 __le32 le_group = cpu_to_le32(block_group); 1988 __le32 le_group = cpu_to_le32(block_group);
2018 1989
2019 if ((sbi->s_es->s_feature_ro_compat & 1990 if (ext4_has_metadata_csum(sbi->s_sb)) {
2020 cpu_to_le32(EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))) {
2021 /* Use new metadata_csum algorithm */ 1991 /* Use new metadata_csum algorithm */
2022 __le16 save_csum; 1992 __le16 save_csum;
2023 __u32 csum32; 1993 __u32 csum32;
@@ -2035,6 +2005,10 @@ static __le16 ext4_group_desc_csum(struct ext4_sb_info *sbi, __u32 block_group,
2035 } 2005 }
2036 2006
2037 /* old crc16 code */ 2007 /* old crc16 code */
2008 if (!(sbi->s_es->s_feature_ro_compat &
2009 cpu_to_le32(EXT4_FEATURE_RO_COMPAT_GDT_CSUM)))
2010 return 0;
2011
2038 offset = offsetof(struct ext4_group_desc, bg_checksum); 2012 offset = offsetof(struct ext4_group_desc, bg_checksum);
2039 2013
2040 crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid)); 2014 crc = crc16(~0, sbi->s_es->s_uuid, sizeof(sbi->s_es->s_uuid));
@@ -2191,7 +2165,7 @@ static void ext4_orphan_cleanup(struct super_block *sb,
2191 if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) { 2165 if (EXT4_SB(sb)->s_mount_state & EXT4_ERROR_FS) {
2192 /* don't clear list on RO mount w/ errors */ 2166 /* don't clear list on RO mount w/ errors */
2193 if (es->s_last_orphan && !(s_flags & MS_RDONLY)) { 2167 if (es->s_last_orphan && !(s_flags & MS_RDONLY)) {
2194 jbd_debug(1, "Errors on filesystem, " 2168 ext4_msg(sb, KERN_INFO, "Errors on filesystem, "
2195 "clearing orphan list.\n"); 2169 "clearing orphan list.\n");
2196 es->s_last_orphan = 0; 2170 es->s_last_orphan = 0;
2197 } 2171 }
@@ -2207,7 +2181,7 @@ static void ext4_orphan_cleanup(struct super_block *sb,
2207 /* Needed for iput() to work correctly and not trash data */ 2181 /* Needed for iput() to work correctly and not trash data */
2208 sb->s_flags |= MS_ACTIVE; 2182 sb->s_flags |= MS_ACTIVE;
2209 /* Turn on quotas so that they are updated correctly */ 2183 /* Turn on quotas so that they are updated correctly */
2210 for (i = 0; i < MAXQUOTAS; i++) { 2184 for (i = 0; i < EXT4_MAXQUOTAS; i++) {
2211 if (EXT4_SB(sb)->s_qf_names[i]) { 2185 if (EXT4_SB(sb)->s_qf_names[i]) {
2212 int ret = ext4_quota_on_mount(sb, i); 2186 int ret = ext4_quota_on_mount(sb, i);
2213 if (ret < 0) 2187 if (ret < 0)
@@ -2263,7 +2237,7 @@ static void ext4_orphan_cleanup(struct super_block *sb,
2263 PLURAL(nr_truncates)); 2237 PLURAL(nr_truncates));
2264#ifdef CONFIG_QUOTA 2238#ifdef CONFIG_QUOTA
2265 /* Turn quotas off */ 2239 /* Turn quotas off */
2266 for (i = 0; i < MAXQUOTAS; i++) { 2240 for (i = 0; i < EXT4_MAXQUOTAS; i++) {
2267 if (sb_dqopt(sb)->files[i]) 2241 if (sb_dqopt(sb)->files[i])
2268 dquot_quota_off(sb, i); 2242 dquot_quota_off(sb, i);
2269 } 2243 }
@@ -2548,6 +2522,16 @@ static ssize_t sbi_ui_store(struct ext4_attr *a,
2548 return count; 2522 return count;
2549} 2523}
2550 2524
2525static ssize_t es_ui_show(struct ext4_attr *a,
2526 struct ext4_sb_info *sbi, char *buf)
2527{
2528
2529 unsigned int *ui = (unsigned int *) (((char *) sbi->s_es) +
2530 a->u.offset);
2531
2532 return snprintf(buf, PAGE_SIZE, "%u\n", *ui);
2533}
2534
2551static ssize_t reserved_clusters_show(struct ext4_attr *a, 2535static ssize_t reserved_clusters_show(struct ext4_attr *a,
2552 struct ext4_sb_info *sbi, char *buf) 2536 struct ext4_sb_info *sbi, char *buf)
2553{ 2537{
@@ -2601,14 +2585,29 @@ static struct ext4_attr ext4_attr_##_name = { \
2601 .offset = offsetof(struct ext4_sb_info, _elname),\ 2585 .offset = offsetof(struct ext4_sb_info, _elname),\
2602 }, \ 2586 }, \
2603} 2587}
2588
2589#define EXT4_ATTR_OFFSET_ES(_name,_mode,_show,_store,_elname) \
2590static struct ext4_attr ext4_attr_##_name = { \
2591 .attr = {.name = __stringify(_name), .mode = _mode }, \
2592 .show = _show, \
2593 .store = _store, \
2594 .u = { \
2595 .offset = offsetof(struct ext4_super_block, _elname), \
2596 }, \
2597}
2598
2604#define EXT4_ATTR(name, mode, show, store) \ 2599#define EXT4_ATTR(name, mode, show, store) \
2605static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store) 2600static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store)
2606 2601
2607#define EXT4_INFO_ATTR(name) EXT4_ATTR(name, 0444, NULL, NULL) 2602#define EXT4_INFO_ATTR(name) EXT4_ATTR(name, 0444, NULL, NULL)
2608#define EXT4_RO_ATTR(name) EXT4_ATTR(name, 0444, name##_show, NULL) 2603#define EXT4_RO_ATTR(name) EXT4_ATTR(name, 0444, name##_show, NULL)
2609#define EXT4_RW_ATTR(name) EXT4_ATTR(name, 0644, name##_show, name##_store) 2604#define EXT4_RW_ATTR(name) EXT4_ATTR(name, 0644, name##_show, name##_store)
2605
2606#define EXT4_RO_ATTR_ES_UI(name, elname) \
2607 EXT4_ATTR_OFFSET_ES(name, 0444, es_ui_show, NULL, elname)
2610#define EXT4_RW_ATTR_SBI_UI(name, elname) \ 2608#define EXT4_RW_ATTR_SBI_UI(name, elname) \
2611 EXT4_ATTR_OFFSET(name, 0644, sbi_ui_show, sbi_ui_store, elname) 2609 EXT4_ATTR_OFFSET(name, 0644, sbi_ui_show, sbi_ui_store, elname)
2610
2612#define ATTR_LIST(name) &ext4_attr_##name.attr 2611#define ATTR_LIST(name) &ext4_attr_##name.attr
2613#define EXT4_DEPRECATED_ATTR(_name, _val) \ 2612#define EXT4_DEPRECATED_ATTR(_name, _val) \
2614static struct ext4_attr ext4_attr_##_name = { \ 2613static struct ext4_attr ext4_attr_##_name = { \
@@ -2641,6 +2640,9 @@ EXT4_RW_ATTR_SBI_UI(warning_ratelimit_interval_ms, s_warning_ratelimit_state.int
2641EXT4_RW_ATTR_SBI_UI(warning_ratelimit_burst, s_warning_ratelimit_state.burst); 2640EXT4_RW_ATTR_SBI_UI(warning_ratelimit_burst, s_warning_ratelimit_state.burst);
2642EXT4_RW_ATTR_SBI_UI(msg_ratelimit_interval_ms, s_msg_ratelimit_state.interval); 2641EXT4_RW_ATTR_SBI_UI(msg_ratelimit_interval_ms, s_msg_ratelimit_state.interval);
2643EXT4_RW_ATTR_SBI_UI(msg_ratelimit_burst, s_msg_ratelimit_state.burst); 2642EXT4_RW_ATTR_SBI_UI(msg_ratelimit_burst, s_msg_ratelimit_state.burst);
2643EXT4_RO_ATTR_ES_UI(errors_count, s_error_count);
2644EXT4_RO_ATTR_ES_UI(first_error_time, s_first_error_time);
2645EXT4_RO_ATTR_ES_UI(last_error_time, s_last_error_time);
2644 2646
2645static struct attribute *ext4_attrs[] = { 2647static struct attribute *ext4_attrs[] = {
2646 ATTR_LIST(delayed_allocation_blocks), 2648 ATTR_LIST(delayed_allocation_blocks),
@@ -2664,6 +2666,9 @@ static struct attribute *ext4_attrs[] = {
2664 ATTR_LIST(warning_ratelimit_burst), 2666 ATTR_LIST(warning_ratelimit_burst),
2665 ATTR_LIST(msg_ratelimit_interval_ms), 2667 ATTR_LIST(msg_ratelimit_interval_ms),
2666 ATTR_LIST(msg_ratelimit_burst), 2668 ATTR_LIST(msg_ratelimit_burst),
2669 ATTR_LIST(errors_count),
2670 ATTR_LIST(first_error_time),
2671 ATTR_LIST(last_error_time),
2667 NULL, 2672 NULL,
2668}; 2673};
2669 2674
@@ -2723,9 +2728,25 @@ static void ext4_feat_release(struct kobject *kobj)
2723 complete(&ext4_feat->f_kobj_unregister); 2728 complete(&ext4_feat->f_kobj_unregister);
2724} 2729}
2725 2730
2731static ssize_t ext4_feat_show(struct kobject *kobj,
2732 struct attribute *attr, char *buf)
2733{
2734 return snprintf(buf, PAGE_SIZE, "supported\n");
2735}
2736
2737/*
2738 * We can not use ext4_attr_show/store because it relies on the kobject
2739 * being embedded in the ext4_sb_info structure which is definitely not
2740 * true in this case.
2741 */
2742static const struct sysfs_ops ext4_feat_ops = {
2743 .show = ext4_feat_show,
2744 .store = NULL,
2745};
2746
2726static struct kobj_type ext4_feat_ktype = { 2747static struct kobj_type ext4_feat_ktype = {
2727 .default_attrs = ext4_feat_attrs, 2748 .default_attrs = ext4_feat_attrs,
2728 .sysfs_ops = &ext4_attr_ops, 2749 .sysfs_ops = &ext4_feat_ops,
2729 .release = ext4_feat_release, 2750 .release = ext4_feat_release,
2730}; 2751};
2731 2752
@@ -3179,8 +3200,7 @@ static int set_journal_csum_feature_set(struct super_block *sb)
3179 int compat, incompat; 3200 int compat, incompat;
3180 struct ext4_sb_info *sbi = EXT4_SB(sb); 3201 struct ext4_sb_info *sbi = EXT4_SB(sb);
3181 3202
3182 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, 3203 if (ext4_has_metadata_csum(sb)) {
3183 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM)) {
3184 /* journal checksum v3 */ 3204 /* journal checksum v3 */
3185 compat = 0; 3205 compat = 0;
3186 incompat = JBD2_FEATURE_INCOMPAT_CSUM_V3; 3206 incompat = JBD2_FEATURE_INCOMPAT_CSUM_V3;
@@ -3190,6 +3210,10 @@ static int set_journal_csum_feature_set(struct super_block *sb)
3190 incompat = 0; 3210 incompat = 0;
3191 } 3211 }
3192 3212
3213 jbd2_journal_clear_features(sbi->s_journal,
3214 JBD2_FEATURE_COMPAT_CHECKSUM, 0,
3215 JBD2_FEATURE_INCOMPAT_CSUM_V3 |
3216 JBD2_FEATURE_INCOMPAT_CSUM_V2);
3193 if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) { 3217 if (test_opt(sb, JOURNAL_ASYNC_COMMIT)) {
3194 ret = jbd2_journal_set_features(sbi->s_journal, 3218 ret = jbd2_journal_set_features(sbi->s_journal,
3195 compat, 0, 3219 compat, 0,
@@ -3202,11 +3226,8 @@ static int set_journal_csum_feature_set(struct super_block *sb)
3202 jbd2_journal_clear_features(sbi->s_journal, 0, 0, 3226 jbd2_journal_clear_features(sbi->s_journal, 0, 0,
3203 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT); 3227 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
3204 } else { 3228 } else {
3205 jbd2_journal_clear_features(sbi->s_journal, 3229 jbd2_journal_clear_features(sbi->s_journal, 0, 0,
3206 JBD2_FEATURE_COMPAT_CHECKSUM, 0, 3230 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT);
3207 JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT |
3208 JBD2_FEATURE_INCOMPAT_CSUM_V3 |
3209 JBD2_FEATURE_INCOMPAT_CSUM_V2);
3210 } 3231 }
3211 3232
3212 return ret; 3233 return ret;
@@ -3436,7 +3457,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3436 logical_sb_block = sb_block; 3457 logical_sb_block = sb_block;
3437 } 3458 }
3438 3459
3439 if (!(bh = sb_bread(sb, logical_sb_block))) { 3460 if (!(bh = sb_bread_unmovable(sb, logical_sb_block))) {
3440 ext4_msg(sb, KERN_ERR, "unable to read superblock"); 3461 ext4_msg(sb, KERN_ERR, "unable to read superblock");
3441 goto out_fail; 3462 goto out_fail;
3442 } 3463 }
@@ -3487,8 +3508,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3487 } 3508 }
3488 3509
3489 /* Precompute checksum seed for all metadata */ 3510 /* Precompute checksum seed for all metadata */
3490 if (EXT4_HAS_RO_COMPAT_FEATURE(sb, 3511 if (ext4_has_metadata_csum(sb))
3491 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
3492 sbi->s_csum_seed = ext4_chksum(sbi, ~0, es->s_uuid, 3512 sbi->s_csum_seed = ext4_chksum(sbi, ~0, es->s_uuid,
3493 sizeof(es->s_uuid)); 3513 sizeof(es->s_uuid));
3494 3514
@@ -3506,6 +3526,10 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3506#ifdef CONFIG_EXT4_FS_POSIX_ACL 3526#ifdef CONFIG_EXT4_FS_POSIX_ACL
3507 set_opt(sb, POSIX_ACL); 3527 set_opt(sb, POSIX_ACL);
3508#endif 3528#endif
3529 /* don't forget to enable journal_csum when metadata_csum is enabled. */
3530 if (ext4_has_metadata_csum(sb))
3531 set_opt(sb, JOURNAL_CHECKSUM);
3532
3509 if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA) 3533 if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_DATA)
3510 set_opt(sb, JOURNAL_DATA); 3534 set_opt(sb, JOURNAL_DATA);
3511 else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED) 3535 else if ((def_mount_opts & EXT4_DEFM_JMODE) == EXT4_DEFM_JMODE_ORDERED)
@@ -3519,8 +3543,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3519 set_opt(sb, ERRORS_CONT); 3543 set_opt(sb, ERRORS_CONT);
3520 else 3544 else
3521 set_opt(sb, ERRORS_RO); 3545 set_opt(sb, ERRORS_RO);
3522 if (def_mount_opts & EXT4_DEFM_BLOCK_VALIDITY) 3546 /* block_validity enabled by default; disable with noblock_validity */
3523 set_opt(sb, BLOCK_VALIDITY); 3547 set_opt(sb, BLOCK_VALIDITY);
3524 if (def_mount_opts & EXT4_DEFM_DISCARD) 3548 if (def_mount_opts & EXT4_DEFM_DISCARD)
3525 set_opt(sb, DISCARD); 3549 set_opt(sb, DISCARD);
3526 3550
@@ -3646,7 +3670,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3646 brelse(bh); 3670 brelse(bh);
3647 logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE; 3671 logical_sb_block = sb_block * EXT4_MIN_BLOCK_SIZE;
3648 offset = do_div(logical_sb_block, blocksize); 3672 offset = do_div(logical_sb_block, blocksize);
3649 bh = sb_bread(sb, logical_sb_block); 3673 bh = sb_bread_unmovable(sb, logical_sb_block);
3650 if (!bh) { 3674 if (!bh) {
3651 ext4_msg(sb, KERN_ERR, 3675 ext4_msg(sb, KERN_ERR,
3652 "Can't read superblock on 2nd try"); 3676 "Can't read superblock on 2nd try");
@@ -3868,7 +3892,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3868 3892
3869 for (i = 0; i < db_count; i++) { 3893 for (i = 0; i < db_count; i++) {
3870 block = descriptor_loc(sb, logical_sb_block, i); 3894 block = descriptor_loc(sb, logical_sb_block, i);
3871 sbi->s_group_desc[i] = sb_bread(sb, block); 3895 sbi->s_group_desc[i] = sb_bread_unmovable(sb, block);
3872 if (!sbi->s_group_desc[i]) { 3896 if (!sbi->s_group_desc[i]) {
3873 ext4_msg(sb, KERN_ERR, 3897 ext4_msg(sb, KERN_ERR,
3874 "can't read group descriptor %d", i); 3898 "can't read group descriptor %d", i);
@@ -3890,13 +3914,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3890 sbi->s_err_report.data = (unsigned long) sb; 3914 sbi->s_err_report.data = (unsigned long) sb;
3891 3915
3892 /* Register extent status tree shrinker */ 3916 /* Register extent status tree shrinker */
3893 ext4_es_register_shrinker(sbi); 3917 if (ext4_es_register_shrinker(sbi))
3894
3895 err = percpu_counter_init(&sbi->s_extent_cache_cnt, 0, GFP_KERNEL);
3896 if (err) {
3897 ext4_msg(sb, KERN_ERR, "insufficient memory");
3898 goto failed_mount3; 3918 goto failed_mount3;
3899 }
3900 3919
3901 sbi->s_stripe = ext4_get_stripe_size(sbi); 3920 sbi->s_stripe = ext4_get_stripe_size(sbi);
3902 sbi->s_extent_max_zeroout_kb = 32; 3921 sbi->s_extent_max_zeroout_kb = 32;
@@ -3904,11 +3923,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3904 /* 3923 /*
3905 * set up enough so that it can read an inode 3924 * set up enough so that it can read an inode
3906 */ 3925 */
3907 if (!test_opt(sb, NOLOAD) && 3926 sb->s_op = &ext4_sops;
3908 EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL))
3909 sb->s_op = &ext4_sops;
3910 else
3911 sb->s_op = &ext4_nojournal_sops;
3912 sb->s_export_op = &ext4_export_ops; 3927 sb->s_export_op = &ext4_export_ops;
3913 sb->s_xattr = ext4_xattr_handlers; 3928 sb->s_xattr = ext4_xattr_handlers;
3914#ifdef CONFIG_QUOTA 3929#ifdef CONFIG_QUOTA
@@ -3932,7 +3947,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3932 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_MMP) && 3947 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_MMP) &&
3933 !(sb->s_flags & MS_RDONLY)) 3948 !(sb->s_flags & MS_RDONLY))
3934 if (ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block))) 3949 if (ext4_multi_mount_protect(sb, le64_to_cpu(es->s_mmp_block)))
3935 goto failed_mount3; 3950 goto failed_mount3a;
3936 3951
3937 /* 3952 /*
3938 * The first inode we look at is the journal inode. Don't try 3953 * The first inode we look at is the journal inode. Don't try
@@ -3941,7 +3956,7 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3941 if (!test_opt(sb, NOLOAD) && 3956 if (!test_opt(sb, NOLOAD) &&
3942 EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) { 3957 EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL)) {
3943 if (ext4_load_journal(sb, es, journal_devnum)) 3958 if (ext4_load_journal(sb, es, journal_devnum))
3944 goto failed_mount3; 3959 goto failed_mount3a;
3945 } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) && 3960 } else if (test_opt(sb, NOLOAD) && !(sb->s_flags & MS_RDONLY) &&
3946 EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) { 3961 EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER)) {
3947 ext4_msg(sb, KERN_ERR, "required journal recovery " 3962 ext4_msg(sb, KERN_ERR, "required journal recovery "
@@ -4229,10 +4244,10 @@ failed_mount_wq:
4229 jbd2_journal_destroy(sbi->s_journal); 4244 jbd2_journal_destroy(sbi->s_journal);
4230 sbi->s_journal = NULL; 4245 sbi->s_journal = NULL;
4231 } 4246 }
4232failed_mount3: 4247failed_mount3a:
4233 ext4_es_unregister_shrinker(sbi); 4248 ext4_es_unregister_shrinker(sbi);
4249failed_mount3:
4234 del_timer_sync(&sbi->s_err_report); 4250 del_timer_sync(&sbi->s_err_report);
4235 percpu_counter_destroy(&sbi->s_extent_cache_cnt);
4236 if (sbi->s_mmp_tsk) 4251 if (sbi->s_mmp_tsk)
4237 kthread_stop(sbi->s_mmp_tsk); 4252 kthread_stop(sbi->s_mmp_tsk);
4238failed_mount2: 4253failed_mount2:
@@ -4247,7 +4262,7 @@ failed_mount:
4247 remove_proc_entry(sb->s_id, ext4_proc_root); 4262 remove_proc_entry(sb->s_id, ext4_proc_root);
4248 } 4263 }
4249#ifdef CONFIG_QUOTA 4264#ifdef CONFIG_QUOTA
4250 for (i = 0; i < MAXQUOTAS; i++) 4265 for (i = 0; i < EXT4_MAXQUOTAS; i++)
4251 kfree(sbi->s_qf_names[i]); 4266 kfree(sbi->s_qf_names[i]);
4252#endif 4267#endif
4253 ext4_blkdev_remove(sbi); 4268 ext4_blkdev_remove(sbi);
@@ -4375,6 +4390,15 @@ static journal_t *ext4_get_dev_journal(struct super_block *sb,
4375 goto out_bdev; 4390 goto out_bdev;
4376 } 4391 }
4377 4392
4393 if ((le32_to_cpu(es->s_feature_ro_compat) &
4394 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) &&
4395 es->s_checksum != ext4_superblock_csum(sb, es)) {
4396 ext4_msg(sb, KERN_ERR, "external journal has "
4397 "corrupt superblock");
4398 brelse(bh);
4399 goto out_bdev;
4400 }
4401
4378 if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) { 4402 if (memcmp(EXT4_SB(sb)->s_es->s_journal_uuid, es->s_uuid, 16)) {
4379 ext4_msg(sb, KERN_ERR, "journal UUID does not match"); 4403 ext4_msg(sb, KERN_ERR, "journal UUID does not match");
4380 brelse(bh); 4404 brelse(bh);
@@ -4677,15 +4701,19 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
4677 * being sent at the end of the function. But we can skip it if 4701 * being sent at the end of the function. But we can skip it if
4678 * transaction_commit will do it for us. 4702 * transaction_commit will do it for us.
4679 */ 4703 */
4680 target = jbd2_get_latest_transaction(sbi->s_journal); 4704 if (sbi->s_journal) {
4681 if (wait && sbi->s_journal->j_flags & JBD2_BARRIER && 4705 target = jbd2_get_latest_transaction(sbi->s_journal);
4682 !jbd2_trans_will_send_data_barrier(sbi->s_journal, target)) 4706 if (wait && sbi->s_journal->j_flags & JBD2_BARRIER &&
4707 !jbd2_trans_will_send_data_barrier(sbi->s_journal, target))
4708 needs_barrier = true;
4709
4710 if (jbd2_journal_start_commit(sbi->s_journal, &target)) {
4711 if (wait)
4712 ret = jbd2_log_wait_commit(sbi->s_journal,
4713 target);
4714 }
4715 } else if (wait && test_opt(sb, BARRIER))
4683 needs_barrier = true; 4716 needs_barrier = true;
4684
4685 if (jbd2_journal_start_commit(sbi->s_journal, &target)) {
4686 if (wait)
4687 ret = jbd2_log_wait_commit(sbi->s_journal, target);
4688 }
4689 if (needs_barrier) { 4717 if (needs_barrier) {
4690 int err; 4718 int err;
4691 err = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL, NULL); 4719 err = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL, NULL);
@@ -4696,19 +4724,6 @@ static int ext4_sync_fs(struct super_block *sb, int wait)
4696 return ret; 4724 return ret;
4697} 4725}
4698 4726
4699static int ext4_sync_fs_nojournal(struct super_block *sb, int wait)
4700{
4701 int ret = 0;
4702
4703 trace_ext4_sync_fs(sb, wait);
4704 flush_workqueue(EXT4_SB(sb)->rsv_conversion_wq);
4705 dquot_writeback_dquots(sb, -1);
4706 if (wait && test_opt(sb, BARRIER))
4707 ret = blkdev_issue_flush(sb->s_bdev, GFP_KERNEL, NULL);
4708
4709 return ret;
4710}
4711
4712/* 4727/*
4713 * LVM calls this function before a (read-only) snapshot is created. This 4728 * LVM calls this function before a (read-only) snapshot is created. This
4714 * gives us a chance to flush the journal completely and mark the fs clean. 4729 * gives us a chance to flush the journal completely and mark the fs clean.
@@ -4727,23 +4742,26 @@ static int ext4_freeze(struct super_block *sb)
4727 4742
4728 journal = EXT4_SB(sb)->s_journal; 4743 journal = EXT4_SB(sb)->s_journal;
4729 4744
4730 /* Now we set up the journal barrier. */ 4745 if (journal) {
4731 jbd2_journal_lock_updates(journal); 4746 /* Now we set up the journal barrier. */
4747 jbd2_journal_lock_updates(journal);
4732 4748
4733 /* 4749 /*
4734 * Don't clear the needs_recovery flag if we failed to flush 4750 * Don't clear the needs_recovery flag if we failed to
4735 * the journal. 4751 * flush the journal.
4736 */ 4752 */
4737 error = jbd2_journal_flush(journal); 4753 error = jbd2_journal_flush(journal);
4738 if (error < 0) 4754 if (error < 0)
4739 goto out; 4755 goto out;
4756 }
4740 4757
4741 /* Journal blocked and flushed, clear needs_recovery flag. */ 4758 /* Journal blocked and flushed, clear needs_recovery flag. */
4742 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER); 4759 EXT4_CLEAR_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_RECOVER);
4743 error = ext4_commit_super(sb, 1); 4760 error = ext4_commit_super(sb, 1);
4744out: 4761out:
4745 /* we rely on upper layer to stop further updates */ 4762 if (journal)
4746 jbd2_journal_unlock_updates(EXT4_SB(sb)->s_journal); 4763 /* we rely on upper layer to stop further updates */
4764 jbd2_journal_unlock_updates(journal);
4747 return error; 4765 return error;
4748} 4766}
4749 4767
@@ -4774,7 +4792,7 @@ struct ext4_mount_options {
4774 u32 s_min_batch_time, s_max_batch_time; 4792 u32 s_min_batch_time, s_max_batch_time;
4775#ifdef CONFIG_QUOTA 4793#ifdef CONFIG_QUOTA
4776 int s_jquota_fmt; 4794 int s_jquota_fmt;
4777 char *s_qf_names[MAXQUOTAS]; 4795 char *s_qf_names[EXT4_MAXQUOTAS];
4778#endif 4796#endif
4779}; 4797};
4780 4798
@@ -4804,7 +4822,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
4804 old_opts.s_max_batch_time = sbi->s_max_batch_time; 4822 old_opts.s_max_batch_time = sbi->s_max_batch_time;
4805#ifdef CONFIG_QUOTA 4823#ifdef CONFIG_QUOTA
4806 old_opts.s_jquota_fmt = sbi->s_jquota_fmt; 4824 old_opts.s_jquota_fmt = sbi->s_jquota_fmt;
4807 for (i = 0; i < MAXQUOTAS; i++) 4825 for (i = 0; i < EXT4_MAXQUOTAS; i++)
4808 if (sbi->s_qf_names[i]) { 4826 if (sbi->s_qf_names[i]) {
4809 old_opts.s_qf_names[i] = kstrdup(sbi->s_qf_names[i], 4827 old_opts.s_qf_names[i] = kstrdup(sbi->s_qf_names[i],
4810 GFP_KERNEL); 4828 GFP_KERNEL);
@@ -4828,6 +4846,14 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
4828 goto restore_opts; 4846 goto restore_opts;
4829 } 4847 }
4830 4848
4849 if ((old_opts.s_mount_opt & EXT4_MOUNT_JOURNAL_CHECKSUM) ^
4850 test_opt(sb, JOURNAL_CHECKSUM)) {
4851 ext4_msg(sb, KERN_ERR, "changing journal_checksum "
4852 "during remount not supported");
4853 err = -EINVAL;
4854 goto restore_opts;
4855 }
4856
4831 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) { 4857 if (test_opt(sb, DATA_FLAGS) == EXT4_MOUNT_JOURNAL_DATA) {
4832 if (test_opt2(sb, EXPLICIT_DELALLOC)) { 4858 if (test_opt2(sb, EXPLICIT_DELALLOC)) {
4833 ext4_msg(sb, KERN_ERR, "can't mount with " 4859 ext4_msg(sb, KERN_ERR, "can't mount with "
@@ -4965,7 +4991,7 @@ static int ext4_remount(struct super_block *sb, int *flags, char *data)
4965 4991
4966#ifdef CONFIG_QUOTA 4992#ifdef CONFIG_QUOTA
4967 /* Release old quota file names */ 4993 /* Release old quota file names */
4968 for (i = 0; i < MAXQUOTAS; i++) 4994 for (i = 0; i < EXT4_MAXQUOTAS; i++)
4969 kfree(old_opts.s_qf_names[i]); 4995 kfree(old_opts.s_qf_names[i]);
4970 if (enable_quota) { 4996 if (enable_quota) {
4971 if (sb_any_quota_suspended(sb)) 4997 if (sb_any_quota_suspended(sb))
@@ -4994,7 +5020,7 @@ restore_opts:
4994 sbi->s_max_batch_time = old_opts.s_max_batch_time; 5020 sbi->s_max_batch_time = old_opts.s_max_batch_time;
4995#ifdef CONFIG_QUOTA 5021#ifdef CONFIG_QUOTA
4996 sbi->s_jquota_fmt = old_opts.s_jquota_fmt; 5022 sbi->s_jquota_fmt = old_opts.s_jquota_fmt;
4997 for (i = 0; i < MAXQUOTAS; i++) { 5023 for (i = 0; i < EXT4_MAXQUOTAS; i++) {
4998 kfree(sbi->s_qf_names[i]); 5024 kfree(sbi->s_qf_names[i]);
4999 sbi->s_qf_names[i] = old_opts.s_qf_names[i]; 5025 sbi->s_qf_names[i] = old_opts.s_qf_names[i];
5000 } 5026 }
@@ -5197,7 +5223,7 @@ static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
5197{ 5223{
5198 int err; 5224 int err;
5199 struct inode *qf_inode; 5225 struct inode *qf_inode;
5200 unsigned long qf_inums[MAXQUOTAS] = { 5226 unsigned long qf_inums[EXT4_MAXQUOTAS] = {
5201 le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum), 5227 le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
5202 le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum) 5228 le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum)
5203 }; 5229 };
@@ -5225,13 +5251,13 @@ static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
5225static int ext4_enable_quotas(struct super_block *sb) 5251static int ext4_enable_quotas(struct super_block *sb)
5226{ 5252{
5227 int type, err = 0; 5253 int type, err = 0;
5228 unsigned long qf_inums[MAXQUOTAS] = { 5254 unsigned long qf_inums[EXT4_MAXQUOTAS] = {
5229 le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum), 5255 le32_to_cpu(EXT4_SB(sb)->s_es->s_usr_quota_inum),
5230 le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum) 5256 le32_to_cpu(EXT4_SB(sb)->s_es->s_grp_quota_inum)
5231 }; 5257 };
5232 5258
5233 sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE; 5259 sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE;
5234 for (type = 0; type < MAXQUOTAS; type++) { 5260 for (type = 0; type < EXT4_MAXQUOTAS; type++) {
5235 if (qf_inums[type]) { 5261 if (qf_inums[type]) {
5236 err = ext4_quota_enable(sb, type, QFMT_VFS_V1, 5262 err = ext4_quota_enable(sb, type, QFMT_VFS_V1,
5237 DQUOT_USAGE_ENABLED); 5263 DQUOT_USAGE_ENABLED);
@@ -5309,7 +5335,6 @@ static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
5309{ 5335{
5310 struct inode *inode = sb_dqopt(sb)->files[type]; 5336 struct inode *inode = sb_dqopt(sb)->files[type];
5311 ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); 5337 ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
5312 int err = 0;
5313 int offset = off & (sb->s_blocksize - 1); 5338 int offset = off & (sb->s_blocksize - 1);
5314 int tocopy; 5339 int tocopy;
5315 size_t toread; 5340 size_t toread;
@@ -5324,9 +5349,9 @@ static ssize_t ext4_quota_read(struct super_block *sb, int type, char *data,
5324 while (toread > 0) { 5349 while (toread > 0) {
5325 tocopy = sb->s_blocksize - offset < toread ? 5350 tocopy = sb->s_blocksize - offset < toread ?
5326 sb->s_blocksize - offset : toread; 5351 sb->s_blocksize - offset : toread;
5327 bh = ext4_bread(NULL, inode, blk, 0, &err); 5352 bh = ext4_bread(NULL, inode, blk, 0);
5328 if (err) 5353 if (IS_ERR(bh))
5329 return err; 5354 return PTR_ERR(bh);
5330 if (!bh) /* A hole? */ 5355 if (!bh) /* A hole? */
5331 memset(data, 0, tocopy); 5356 memset(data, 0, tocopy);
5332 else 5357 else
@@ -5347,8 +5372,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
5347{ 5372{
5348 struct inode *inode = sb_dqopt(sb)->files[type]; 5373 struct inode *inode = sb_dqopt(sb)->files[type];
5349 ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb); 5374 ext4_lblk_t blk = off >> EXT4_BLOCK_SIZE_BITS(sb);
5350 int err = 0; 5375 int err, offset = off & (sb->s_blocksize - 1);
5351 int offset = off & (sb->s_blocksize - 1);
5352 struct buffer_head *bh; 5376 struct buffer_head *bh;
5353 handle_t *handle = journal_current_handle(); 5377 handle_t *handle = journal_current_handle();
5354 5378
@@ -5369,14 +5393,16 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
5369 return -EIO; 5393 return -EIO;
5370 } 5394 }
5371 5395
5372 bh = ext4_bread(handle, inode, blk, 1, &err); 5396 bh = ext4_bread(handle, inode, blk, 1);
5397 if (IS_ERR(bh))
5398 return PTR_ERR(bh);
5373 if (!bh) 5399 if (!bh)
5374 goto out; 5400 goto out;
5375 BUFFER_TRACE(bh, "get write access"); 5401 BUFFER_TRACE(bh, "get write access");
5376 err = ext4_journal_get_write_access(handle, bh); 5402 err = ext4_journal_get_write_access(handle, bh);
5377 if (err) { 5403 if (err) {
5378 brelse(bh); 5404 brelse(bh);
5379 goto out; 5405 return err;
5380 } 5406 }
5381 lock_buffer(bh); 5407 lock_buffer(bh);
5382 memcpy(bh->b_data+offset, data, len); 5408 memcpy(bh->b_data+offset, data, len);
@@ -5385,8 +5411,6 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
5385 err = ext4_handle_dirty_metadata(handle, NULL, bh); 5411 err = ext4_handle_dirty_metadata(handle, NULL, bh);
5386 brelse(bh); 5412 brelse(bh);
5387out: 5413out:
5388 if (err)
5389 return err;
5390 if (inode->i_size < off + len) { 5414 if (inode->i_size < off + len) {
5391 i_size_write(inode, off + len); 5415 i_size_write(inode, off + len);
5392 EXT4_I(inode)->i_disksize = inode->i_size; 5416 EXT4_I(inode)->i_disksize = inode->i_size;
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index e7387337060c..1e09fc77395c 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -142,8 +142,7 @@ static int ext4_xattr_block_csum_verify(struct inode *inode,
142 sector_t block_nr, 142 sector_t block_nr,
143 struct ext4_xattr_header *hdr) 143 struct ext4_xattr_header *hdr)
144{ 144{
145 if (EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, 145 if (ext4_has_metadata_csum(inode->i_sb) &&
146 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM) &&
147 (hdr->h_checksum != ext4_xattr_block_csum(inode, block_nr, hdr))) 146 (hdr->h_checksum != ext4_xattr_block_csum(inode, block_nr, hdr)))
148 return 0; 147 return 0;
149 return 1; 148 return 1;
@@ -153,8 +152,7 @@ static void ext4_xattr_block_csum_set(struct inode *inode,
153 sector_t block_nr, 152 sector_t block_nr,
154 struct ext4_xattr_header *hdr) 153 struct ext4_xattr_header *hdr)
155{ 154{
156 if (!EXT4_HAS_RO_COMPAT_FEATURE(inode->i_sb, 155 if (!ext4_has_metadata_csum(inode->i_sb))
157 EXT4_FEATURE_RO_COMPAT_METADATA_CSUM))
158 return; 156 return;
159 157
160 hdr->h_checksum = ext4_xattr_block_csum(inode, block_nr, hdr); 158 hdr->h_checksum = ext4_xattr_block_csum(inode, block_nr, hdr);
@@ -190,14 +188,28 @@ ext4_listxattr(struct dentry *dentry, char *buffer, size_t size)
190} 188}
191 189
192static int 190static int
193ext4_xattr_check_names(struct ext4_xattr_entry *entry, void *end) 191ext4_xattr_check_names(struct ext4_xattr_entry *entry, void *end,
192 void *value_start)
194{ 193{
195 while (!IS_LAST_ENTRY(entry)) { 194 struct ext4_xattr_entry *e = entry;
196 struct ext4_xattr_entry *next = EXT4_XATTR_NEXT(entry); 195
196 while (!IS_LAST_ENTRY(e)) {
197 struct ext4_xattr_entry *next = EXT4_XATTR_NEXT(e);
197 if ((void *)next >= end) 198 if ((void *)next >= end)
198 return -EIO; 199 return -EIO;
199 entry = next; 200 e = next;
200 } 201 }
202
203 while (!IS_LAST_ENTRY(entry)) {
204 if (entry->e_value_size != 0 &&
205 (value_start + le16_to_cpu(entry->e_value_offs) <
206 (void *)e + sizeof(__u32) ||
207 value_start + le16_to_cpu(entry->e_value_offs) +
208 le32_to_cpu(entry->e_value_size) > end))
209 return -EIO;
210 entry = EXT4_XATTR_NEXT(entry);
211 }
212
201 return 0; 213 return 0;
202} 214}
203 215
@@ -214,7 +226,8 @@ ext4_xattr_check_block(struct inode *inode, struct buffer_head *bh)
214 return -EIO; 226 return -EIO;
215 if (!ext4_xattr_block_csum_verify(inode, bh->b_blocknr, BHDR(bh))) 227 if (!ext4_xattr_block_csum_verify(inode, bh->b_blocknr, BHDR(bh)))
216 return -EIO; 228 return -EIO;
217 error = ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size); 229 error = ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size,
230 bh->b_data);
218 if (!error) 231 if (!error)
219 set_buffer_verified(bh); 232 set_buffer_verified(bh);
220 return error; 233 return error;
@@ -331,7 +344,7 @@ ext4_xattr_ibody_get(struct inode *inode, int name_index, const char *name,
331 header = IHDR(inode, raw_inode); 344 header = IHDR(inode, raw_inode);
332 entry = IFIRST(header); 345 entry = IFIRST(header);
333 end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; 346 end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
334 error = ext4_xattr_check_names(entry, end); 347 error = ext4_xattr_check_names(entry, end, entry);
335 if (error) 348 if (error)
336 goto cleanup; 349 goto cleanup;
337 error = ext4_xattr_find_entry(&entry, name_index, name, 350 error = ext4_xattr_find_entry(&entry, name_index, name,
@@ -463,7 +476,7 @@ ext4_xattr_ibody_list(struct dentry *dentry, char *buffer, size_t buffer_size)
463 raw_inode = ext4_raw_inode(&iloc); 476 raw_inode = ext4_raw_inode(&iloc);
464 header = IHDR(inode, raw_inode); 477 header = IHDR(inode, raw_inode);
465 end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; 478 end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
466 error = ext4_xattr_check_names(IFIRST(header), end); 479 error = ext4_xattr_check_names(IFIRST(header), end, IFIRST(header));
467 if (error) 480 if (error)
468 goto cleanup; 481 goto cleanup;
469 error = ext4_xattr_list_entries(dentry, IFIRST(header), 482 error = ext4_xattr_list_entries(dentry, IFIRST(header),
@@ -899,14 +912,8 @@ inserted:
899 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) 912 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
900 goal = goal & EXT4_MAX_BLOCK_FILE_PHYS; 913 goal = goal & EXT4_MAX_BLOCK_FILE_PHYS;
901 914
902 /*
903 * take i_data_sem because we will test
904 * i_delalloc_reserved_flag in ext4_mb_new_blocks
905 */
906 down_read(&EXT4_I(inode)->i_data_sem);
907 block = ext4_new_meta_blocks(handle, inode, goal, 0, 915 block = ext4_new_meta_blocks(handle, inode, goal, 0,
908 NULL, &error); 916 NULL, &error);
909 up_read((&EXT4_I(inode)->i_data_sem));
910 if (error) 917 if (error)
911 goto cleanup; 918 goto cleanup;
912 919
@@ -986,7 +993,8 @@ int ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i,
986 is->s.here = is->s.first; 993 is->s.here = is->s.first;
987 is->s.end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; 994 is->s.end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
988 if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) { 995 if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) {
989 error = ext4_xattr_check_names(IFIRST(header), is->s.end); 996 error = ext4_xattr_check_names(IFIRST(header), is->s.end,
997 IFIRST(header));
990 if (error) 998 if (error)
991 return error; 999 return error;
992 /* Find the named attribute. */ 1000 /* Find the named attribute. */