aboutsummaryrefslogtreecommitdiffstats
path: root/fs/ext4
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2013-03-21 20:56:10 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2013-03-21 20:56:10 -0400
commitd3c926264a92e5ea448add3e883530e1edad3ce2 (patch)
treee57d99880376c49b933bef4e7ab1ed7651d5ca4d /fs/ext4
parent0a7e453103b9718d357688b83bb968ee108cc874 (diff)
parent2b405bfa84063bfa35621d2d6879f52693c614b0 (diff)
Merge tag 'ext4_for_linue' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
Pull ext4 fixes from Ted Ts'o: "Fix a number of regression and other bugs in ext4, most of which were relatively obscure cornercases or races that were found using regression tests." * tag 'ext4_for_linue' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (21 commits) ext4: fix data=journal fast mount/umount hang ext4: fix ext4_evict_inode() racing against workqueue processing code ext4: fix memory leakage in mext_check_coverage ext4: use s_extent_max_zeroout_kb value as number of kb ext4: use atomic64_t for the per-flexbg free_clusters count jbd2: fix use after free in jbd2_journal_dirty_metadata() ext4: reserve metadata block for every delayed write ext4: update reserved space after the 'correction' ext4: do not use yield() ext4: remove unused variable in ext4_free_blocks() ext4: fix WARN_ON from ext4_releasepage() ext4: fix the wrong number of the allocated blocks in ext4_split_extent() ext4: update extent status tree after an extent is zeroed out ext4: fix wrong m_len value after unwritten extent conversion ext4: add self-testing infrastructure to do a sanity check ext4: avoid a potential overflow in ext4_es_can_be_merged() ext4: invalidate extent status tree during extent migration ext4: remove unnecessary wait for extent conversion in ext4_fallocate() ext4: add warning to ext4_convert_unwritten_extents_endio ext4: disable merging of uninitialized extents ...
Diffstat (limited to 'fs/ext4')
-rw-r--r--fs/ext4/ext4.h8
-rw-r--r--fs/ext4/extents.c105
-rw-r--r--fs/ext4/extents_status.c212
-rw-r--r--fs/ext4/extents_status.h9
-rw-r--r--fs/ext4/ialloc.c4
-rw-r--r--fs/ext4/inode.c182
-rw-r--r--fs/ext4/mballoc.c23
-rw-r--r--fs/ext4/move_extent.c43
-rw-r--r--fs/ext4/page-io.c12
-rw-r--r--fs/ext4/resize.c4
-rw-r--r--fs/ext4/super.c4
11 files changed, 530 insertions, 76 deletions
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 4a01ba315262..3b83cd604796 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -335,9 +335,9 @@ struct ext4_group_desc
335 */ 335 */
336 336
337struct flex_groups { 337struct flex_groups {
338 atomic_t free_inodes; 338 atomic64_t free_clusters;
339 atomic_t free_clusters; 339 atomic_t free_inodes;
340 atomic_t used_dirs; 340 atomic_t used_dirs;
341}; 341};
342 342
343#define EXT4_BG_INODE_UNINIT 0x0001 /* Inode table/bitmap not in use */ 343#define EXT4_BG_INODE_UNINIT 0x0001 /* Inode table/bitmap not in use */
@@ -2617,7 +2617,7 @@ extern int ext4_move_extents(struct file *o_filp, struct file *d_filp,
2617extern int __init ext4_init_pageio(void); 2617extern int __init ext4_init_pageio(void);
2618extern void ext4_add_complete_io(ext4_io_end_t *io_end); 2618extern void ext4_add_complete_io(ext4_io_end_t *io_end);
2619extern void ext4_exit_pageio(void); 2619extern void ext4_exit_pageio(void);
2620extern void ext4_ioend_wait(struct inode *); 2620extern void ext4_ioend_shutdown(struct inode *);
2621extern void ext4_free_io_end(ext4_io_end_t *io); 2621extern void ext4_free_io_end(ext4_io_end_t *io);
2622extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags); 2622extern ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags);
2623extern void ext4_end_io_work(struct work_struct *work); 2623extern void ext4_end_io_work(struct work_struct *work);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 28dd8eeea6a9..56efcaadf848 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -1584,10 +1584,12 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
1584 unsigned short ext1_ee_len, ext2_ee_len, max_len; 1584 unsigned short ext1_ee_len, ext2_ee_len, max_len;
1585 1585
1586 /* 1586 /*
1587 * Make sure that either both extents are uninitialized, or 1587 * Make sure that both extents are initialized. We don't merge
1588 * both are _not_. 1588 * uninitialized extents so that we can be sure that end_io code has
1589 * the extent that was written properly split out and conversion to
1590 * initialized is trivial.
1589 */ 1591 */
1590 if (ext4_ext_is_uninitialized(ex1) ^ ext4_ext_is_uninitialized(ex2)) 1592 if (ext4_ext_is_uninitialized(ex1) || ext4_ext_is_uninitialized(ex2))
1591 return 0; 1593 return 0;
1592 1594
1593 if (ext4_ext_is_uninitialized(ex1)) 1595 if (ext4_ext_is_uninitialized(ex1))
@@ -2923,7 +2925,7 @@ static int ext4_split_extent_at(handle_t *handle,
2923{ 2925{
2924 ext4_fsblk_t newblock; 2926 ext4_fsblk_t newblock;
2925 ext4_lblk_t ee_block; 2927 ext4_lblk_t ee_block;
2926 struct ext4_extent *ex, newex, orig_ex; 2928 struct ext4_extent *ex, newex, orig_ex, zero_ex;
2927 struct ext4_extent *ex2 = NULL; 2929 struct ext4_extent *ex2 = NULL;
2928 unsigned int ee_len, depth; 2930 unsigned int ee_len, depth;
2929 int err = 0; 2931 int err = 0;
@@ -2943,6 +2945,10 @@ static int ext4_split_extent_at(handle_t *handle,
2943 newblock = split - ee_block + ext4_ext_pblock(ex); 2945 newblock = split - ee_block + ext4_ext_pblock(ex);
2944 2946
2945 BUG_ON(split < ee_block || split >= (ee_block + ee_len)); 2947 BUG_ON(split < ee_block || split >= (ee_block + ee_len));
2948 BUG_ON(!ext4_ext_is_uninitialized(ex) &&
2949 split_flag & (EXT4_EXT_MAY_ZEROOUT |
2950 EXT4_EXT_MARK_UNINIT1 |
2951 EXT4_EXT_MARK_UNINIT2));
2946 2952
2947 err = ext4_ext_get_access(handle, inode, path + depth); 2953 err = ext4_ext_get_access(handle, inode, path + depth);
2948 if (err) 2954 if (err)
@@ -2990,12 +2996,26 @@ static int ext4_split_extent_at(handle_t *handle,
2990 err = ext4_ext_insert_extent(handle, inode, path, &newex, flags); 2996 err = ext4_ext_insert_extent(handle, inode, path, &newex, flags);
2991 if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) { 2997 if (err == -ENOSPC && (EXT4_EXT_MAY_ZEROOUT & split_flag)) {
2992 if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) { 2998 if (split_flag & (EXT4_EXT_DATA_VALID1|EXT4_EXT_DATA_VALID2)) {
2993 if (split_flag & EXT4_EXT_DATA_VALID1) 2999 if (split_flag & EXT4_EXT_DATA_VALID1) {
2994 err = ext4_ext_zeroout(inode, ex2); 3000 err = ext4_ext_zeroout(inode, ex2);
2995 else 3001 zero_ex.ee_block = ex2->ee_block;
3002 zero_ex.ee_len = ext4_ext_get_actual_len(ex2);
3003 ext4_ext_store_pblock(&zero_ex,
3004 ext4_ext_pblock(ex2));
3005 } else {
2996 err = ext4_ext_zeroout(inode, ex); 3006 err = ext4_ext_zeroout(inode, ex);
2997 } else 3007 zero_ex.ee_block = ex->ee_block;
3008 zero_ex.ee_len = ext4_ext_get_actual_len(ex);
3009 ext4_ext_store_pblock(&zero_ex,
3010 ext4_ext_pblock(ex));
3011 }
3012 } else {
2998 err = ext4_ext_zeroout(inode, &orig_ex); 3013 err = ext4_ext_zeroout(inode, &orig_ex);
3014 zero_ex.ee_block = orig_ex.ee_block;
3015 zero_ex.ee_len = ext4_ext_get_actual_len(&orig_ex);
3016 ext4_ext_store_pblock(&zero_ex,
3017 ext4_ext_pblock(&orig_ex));
3018 }
2999 3019
3000 if (err) 3020 if (err)
3001 goto fix_extent_len; 3021 goto fix_extent_len;
@@ -3003,6 +3023,12 @@ static int ext4_split_extent_at(handle_t *handle,
3003 ex->ee_len = cpu_to_le16(ee_len); 3023 ex->ee_len = cpu_to_le16(ee_len);
3004 ext4_ext_try_to_merge(handle, inode, path, ex); 3024 ext4_ext_try_to_merge(handle, inode, path, ex);
3005 err = ext4_ext_dirty(handle, inode, path + path->p_depth); 3025 err = ext4_ext_dirty(handle, inode, path + path->p_depth);
3026 if (err)
3027 goto fix_extent_len;
3028
3029 /* update extent status tree */
3030 err = ext4_es_zeroout(inode, &zero_ex);
3031
3006 goto out; 3032 goto out;
3007 } else if (err) 3033 } else if (err)
3008 goto fix_extent_len; 3034 goto fix_extent_len;
@@ -3041,6 +3067,7 @@ static int ext4_split_extent(handle_t *handle,
3041 int err = 0; 3067 int err = 0;
3042 int uninitialized; 3068 int uninitialized;
3043 int split_flag1, flags1; 3069 int split_flag1, flags1;
3070 int allocated = map->m_len;
3044 3071
3045 depth = ext_depth(inode); 3072 depth = ext_depth(inode);
3046 ex = path[depth].p_ext; 3073 ex = path[depth].p_ext;
@@ -3060,20 +3087,29 @@ static int ext4_split_extent(handle_t *handle,
3060 map->m_lblk + map->m_len, split_flag1, flags1); 3087 map->m_lblk + map->m_len, split_flag1, flags1);
3061 if (err) 3088 if (err)
3062 goto out; 3089 goto out;
3090 } else {
3091 allocated = ee_len - (map->m_lblk - ee_block);
3063 } 3092 }
3064 3093 /*
3094 * Update path is required because previous ext4_split_extent_at() may
3095 * result in split of original leaf or extent zeroout.
3096 */
3065 ext4_ext_drop_refs(path); 3097 ext4_ext_drop_refs(path);
3066 path = ext4_ext_find_extent(inode, map->m_lblk, path); 3098 path = ext4_ext_find_extent(inode, map->m_lblk, path);
3067 if (IS_ERR(path)) 3099 if (IS_ERR(path))
3068 return PTR_ERR(path); 3100 return PTR_ERR(path);
3101 depth = ext_depth(inode);
3102 ex = path[depth].p_ext;
3103 uninitialized = ext4_ext_is_uninitialized(ex);
3104 split_flag1 = 0;
3069 3105
3070 if (map->m_lblk >= ee_block) { 3106 if (map->m_lblk >= ee_block) {
3071 split_flag1 = split_flag & (EXT4_EXT_MAY_ZEROOUT | 3107 split_flag1 = split_flag & EXT4_EXT_DATA_VALID2;
3072 EXT4_EXT_DATA_VALID2); 3108 if (uninitialized) {
3073 if (uninitialized)
3074 split_flag1 |= EXT4_EXT_MARK_UNINIT1; 3109 split_flag1 |= EXT4_EXT_MARK_UNINIT1;
3075 if (split_flag & EXT4_EXT_MARK_UNINIT2) 3110 split_flag1 |= split_flag & (EXT4_EXT_MAY_ZEROOUT |
3076 split_flag1 |= EXT4_EXT_MARK_UNINIT2; 3111 EXT4_EXT_MARK_UNINIT2);
3112 }
3077 err = ext4_split_extent_at(handle, inode, path, 3113 err = ext4_split_extent_at(handle, inode, path,
3078 map->m_lblk, split_flag1, flags); 3114 map->m_lblk, split_flag1, flags);
3079 if (err) 3115 if (err)
@@ -3082,7 +3118,7 @@ static int ext4_split_extent(handle_t *handle,
3082 3118
3083 ext4_ext_show_leaf(inode, path); 3119 ext4_ext_show_leaf(inode, path);
3084out: 3120out:
3085 return err ? err : map->m_len; 3121 return err ? err : allocated;
3086} 3122}
3087 3123
3088/* 3124/*
@@ -3137,6 +3173,7 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
3137 ee_block = le32_to_cpu(ex->ee_block); 3173 ee_block = le32_to_cpu(ex->ee_block);
3138 ee_len = ext4_ext_get_actual_len(ex); 3174 ee_len = ext4_ext_get_actual_len(ex);
3139 allocated = ee_len - (map->m_lblk - ee_block); 3175 allocated = ee_len - (map->m_lblk - ee_block);
3176 zero_ex.ee_len = 0;
3140 3177
3141 trace_ext4_ext_convert_to_initialized_enter(inode, map, ex); 3178 trace_ext4_ext_convert_to_initialized_enter(inode, map, ex);
3142 3179
@@ -3227,13 +3264,16 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
3227 3264
3228 if (EXT4_EXT_MAY_ZEROOUT & split_flag) 3265 if (EXT4_EXT_MAY_ZEROOUT & split_flag)
3229 max_zeroout = sbi->s_extent_max_zeroout_kb >> 3266 max_zeroout = sbi->s_extent_max_zeroout_kb >>
3230 inode->i_sb->s_blocksize_bits; 3267 (inode->i_sb->s_blocksize_bits - 10);
3231 3268
3232 /* If extent is less than s_max_zeroout_kb, zeroout directly */ 3269 /* If extent is less than s_max_zeroout_kb, zeroout directly */
3233 if (max_zeroout && (ee_len <= max_zeroout)) { 3270 if (max_zeroout && (ee_len <= max_zeroout)) {
3234 err = ext4_ext_zeroout(inode, ex); 3271 err = ext4_ext_zeroout(inode, ex);
3235 if (err) 3272 if (err)
3236 goto out; 3273 goto out;
3274 zero_ex.ee_block = ex->ee_block;
3275 zero_ex.ee_len = ext4_ext_get_actual_len(ex);
3276 ext4_ext_store_pblock(&zero_ex, ext4_ext_pblock(ex));
3237 3277
3238 err = ext4_ext_get_access(handle, inode, path + depth); 3278 err = ext4_ext_get_access(handle, inode, path + depth);
3239 if (err) 3279 if (err)
@@ -3292,6 +3332,9 @@ static int ext4_ext_convert_to_initialized(handle_t *handle,
3292 err = allocated; 3332 err = allocated;
3293 3333
3294out: 3334out:
3335 /* If we have gotten a failure, don't zero out status tree */
3336 if (!err)
3337 err = ext4_es_zeroout(inode, &zero_ex);
3295 return err ? err : allocated; 3338 return err ? err : allocated;
3296} 3339}
3297 3340
@@ -3374,8 +3417,19 @@ static int ext4_convert_unwritten_extents_endio(handle_t *handle,
3374 "block %llu, max_blocks %u\n", inode->i_ino, 3417 "block %llu, max_blocks %u\n", inode->i_ino,
3375 (unsigned long long)ee_block, ee_len); 3418 (unsigned long long)ee_block, ee_len);
3376 3419
3377 /* If extent is larger than requested then split is required */ 3420 /* If extent is larger than requested it is a clear sign that we still
3421 * have some extent state machine issues left. So extent_split is still
3422 * required.
3423 * TODO: Once all related issues will be fixed this situation should be
3424 * illegal.
3425 */
3378 if (ee_block != map->m_lblk || ee_len > map->m_len) { 3426 if (ee_block != map->m_lblk || ee_len > map->m_len) {
3427#ifdef EXT4_DEBUG
3428 ext4_warning("Inode (%ld) finished: extent logical block %llu,"
3429 " len %u; IO logical block %llu, len %u\n",
3430 inode->i_ino, (unsigned long long)ee_block, ee_len,
3431 (unsigned long long)map->m_lblk, map->m_len);
3432#endif
3379 err = ext4_split_unwritten_extents(handle, inode, map, path, 3433 err = ext4_split_unwritten_extents(handle, inode, map, path,
3380 EXT4_GET_BLOCKS_CONVERT); 3434 EXT4_GET_BLOCKS_CONVERT);
3381 if (err < 0) 3435 if (err < 0)
@@ -3626,6 +3680,10 @@ ext4_ext_handle_uninitialized_extents(handle_t *handle, struct inode *inode,
3626 path, map->m_len); 3680 path, map->m_len);
3627 } else 3681 } else
3628 err = ret; 3682 err = ret;
3683 map->m_flags |= EXT4_MAP_MAPPED;
3684 if (allocated > map->m_len)
3685 allocated = map->m_len;
3686 map->m_len = allocated;
3629 goto out2; 3687 goto out2;
3630 } 3688 }
3631 /* buffered IO case */ 3689 /* buffered IO case */
@@ -3675,6 +3733,7 @@ out:
3675 allocated - map->m_len); 3733 allocated - map->m_len);
3676 allocated = map->m_len; 3734 allocated = map->m_len;
3677 } 3735 }
3736 map->m_len = allocated;
3678 3737
3679 /* 3738 /*
3680 * If we have done fallocate with the offset that is already 3739 * If we have done fallocate with the offset that is already
@@ -4106,9 +4165,6 @@ got_allocated_blocks:
4106 } 4165 }
4107 } else { 4166 } else {
4108 BUG_ON(allocated_clusters < reserved_clusters); 4167 BUG_ON(allocated_clusters < reserved_clusters);
4109 /* We will claim quota for all newly allocated blocks.*/
4110 ext4_da_update_reserve_space(inode, allocated_clusters,
4111 1);
4112 if (reserved_clusters < allocated_clusters) { 4168 if (reserved_clusters < allocated_clusters) {
4113 struct ext4_inode_info *ei = EXT4_I(inode); 4169 struct ext4_inode_info *ei = EXT4_I(inode);
4114 int reservation = allocated_clusters - 4170 int reservation = allocated_clusters -
@@ -4159,6 +4215,15 @@ got_allocated_blocks:
4159 ei->i_reserved_data_blocks += reservation; 4215 ei->i_reserved_data_blocks += reservation;
4160 spin_unlock(&ei->i_block_reservation_lock); 4216 spin_unlock(&ei->i_block_reservation_lock);
4161 } 4217 }
4218 /*
4219 * We will claim quota for all newly allocated blocks.
4220 * We're updating the reserved space *after* the
4221 * correction above so we do not accidentally free
4222 * all the metadata reservation because we might
4223 * actually need it later on.
4224 */
4225 ext4_da_update_reserve_space(inode, allocated_clusters,
4226 1);
4162 } 4227 }
4163 } 4228 }
4164 4229
@@ -4368,8 +4433,6 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
4368 if (len <= EXT_UNINIT_MAX_LEN << blkbits) 4433 if (len <= EXT_UNINIT_MAX_LEN << blkbits)
4369 flags |= EXT4_GET_BLOCKS_NO_NORMALIZE; 4434 flags |= EXT4_GET_BLOCKS_NO_NORMALIZE;
4370 4435
4371 /* Prevent race condition between unwritten */
4372 ext4_flush_unwritten_io(inode);
4373retry: 4436retry:
4374 while (ret >= 0 && ret < max_blocks) { 4437 while (ret >= 0 && ret < max_blocks) {
4375 map.m_lblk = map.m_lblk + ret; 4438 map.m_lblk = map.m_lblk + ret;
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index 95796a1b7522..fe3337a85ede 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -333,17 +333,27 @@ static void ext4_es_free_extent(struct inode *inode, struct extent_status *es)
333static int ext4_es_can_be_merged(struct extent_status *es1, 333static int ext4_es_can_be_merged(struct extent_status *es1,
334 struct extent_status *es2) 334 struct extent_status *es2)
335{ 335{
336 if (es1->es_lblk + es1->es_len != es2->es_lblk) 336 if (ext4_es_status(es1) != ext4_es_status(es2))
337 return 0; 337 return 0;
338 338
339 if (ext4_es_status(es1) != ext4_es_status(es2)) 339 if (((__u64) es1->es_len) + es2->es_len > 0xFFFFFFFFULL)
340 return 0; 340 return 0;
341 341
342 if ((ext4_es_is_written(es1) || ext4_es_is_unwritten(es1)) && 342 if (((__u64) es1->es_lblk) + es1->es_len != es2->es_lblk)
343 (ext4_es_pblock(es1) + es1->es_len != ext4_es_pblock(es2)))
344 return 0; 343 return 0;
345 344
346 return 1; 345 if ((ext4_es_is_written(es1) || ext4_es_is_unwritten(es1)) &&
346 (ext4_es_pblock(es1) + es1->es_len == ext4_es_pblock(es2)))
347 return 1;
348
349 if (ext4_es_is_hole(es1))
350 return 1;
351
352 /* we need to check delayed extent is without unwritten status */
353 if (ext4_es_is_delayed(es1) && !ext4_es_is_unwritten(es1))
354 return 1;
355
356 return 0;
347} 357}
348 358
349static struct extent_status * 359static struct extent_status *
@@ -389,6 +399,179 @@ ext4_es_try_to_merge_right(struct inode *inode, struct extent_status *es)
389 return es; 399 return es;
390} 400}
391 401
402#ifdef ES_AGGRESSIVE_TEST
403static void ext4_es_insert_extent_ext_check(struct inode *inode,
404 struct extent_status *es)
405{
406 struct ext4_ext_path *path = NULL;
407 struct ext4_extent *ex;
408 ext4_lblk_t ee_block;
409 ext4_fsblk_t ee_start;
410 unsigned short ee_len;
411 int depth, ee_status, es_status;
412
413 path = ext4_ext_find_extent(inode, es->es_lblk, NULL);
414 if (IS_ERR(path))
415 return;
416
417 depth = ext_depth(inode);
418 ex = path[depth].p_ext;
419
420 if (ex) {
421
422 ee_block = le32_to_cpu(ex->ee_block);
423 ee_start = ext4_ext_pblock(ex);
424 ee_len = ext4_ext_get_actual_len(ex);
425
426 ee_status = ext4_ext_is_uninitialized(ex) ? 1 : 0;
427 es_status = ext4_es_is_unwritten(es) ? 1 : 0;
428
429 /*
430 * Make sure ex and es are not overlap when we try to insert
431 * a delayed/hole extent.
432 */
433 if (!ext4_es_is_written(es) && !ext4_es_is_unwritten(es)) {
434 if (in_range(es->es_lblk, ee_block, ee_len)) {
435 pr_warn("ES insert assertation failed for "
436 "inode: %lu we can find an extent "
437 "at block [%d/%d/%llu/%c], but we "
438 "want to add an delayed/hole extent "
439 "[%d/%d/%llu/%llx]\n",
440 inode->i_ino, ee_block, ee_len,
441 ee_start, ee_status ? 'u' : 'w',
442 es->es_lblk, es->es_len,
443 ext4_es_pblock(es), ext4_es_status(es));
444 }
445 goto out;
446 }
447
448 /*
449 * We don't check ee_block == es->es_lblk, etc. because es
450 * might be a part of whole extent, vice versa.
451 */
452 if (es->es_lblk < ee_block ||
453 ext4_es_pblock(es) != ee_start + es->es_lblk - ee_block) {
454 pr_warn("ES insert assertation failed for inode: %lu "
455 "ex_status [%d/%d/%llu/%c] != "
456 "es_status [%d/%d/%llu/%c]\n", inode->i_ino,
457 ee_block, ee_len, ee_start,
458 ee_status ? 'u' : 'w', es->es_lblk, es->es_len,
459 ext4_es_pblock(es), es_status ? 'u' : 'w');
460 goto out;
461 }
462
463 if (ee_status ^ es_status) {
464 pr_warn("ES insert assertation failed for inode: %lu "
465 "ex_status [%d/%d/%llu/%c] != "
466 "es_status [%d/%d/%llu/%c]\n", inode->i_ino,
467 ee_block, ee_len, ee_start,
468 ee_status ? 'u' : 'w', es->es_lblk, es->es_len,
469 ext4_es_pblock(es), es_status ? 'u' : 'w');
470 }
471 } else {
472 /*
473 * We can't find an extent on disk. So we need to make sure
474 * that we don't want to add an written/unwritten extent.
475 */
476 if (!ext4_es_is_delayed(es) && !ext4_es_is_hole(es)) {
477 pr_warn("ES insert assertation failed for inode: %lu "
478 "can't find an extent at block %d but we want "
479 "to add an written/unwritten extent "
480 "[%d/%d/%llu/%llx]\n", inode->i_ino,
481 es->es_lblk, es->es_lblk, es->es_len,
482 ext4_es_pblock(es), ext4_es_status(es));
483 }
484 }
485out:
486 if (path) {
487 ext4_ext_drop_refs(path);
488 kfree(path);
489 }
490}
491
492static void ext4_es_insert_extent_ind_check(struct inode *inode,
493 struct extent_status *es)
494{
495 struct ext4_map_blocks map;
496 int retval;
497
498 /*
499 * Here we call ext4_ind_map_blocks to lookup a block mapping because
500 * 'Indirect' structure is defined in indirect.c. So we couldn't
501 * access direct/indirect tree from outside. It is too dirty to define
502 * this function in indirect.c file.
503 */
504
505 map.m_lblk = es->es_lblk;
506 map.m_len = es->es_len;
507
508 retval = ext4_ind_map_blocks(NULL, inode, &map, 0);
509 if (retval > 0) {
510 if (ext4_es_is_delayed(es) || ext4_es_is_hole(es)) {
511 /*
512 * We want to add a delayed/hole extent but this
513 * block has been allocated.
514 */
515 pr_warn("ES insert assertation failed for inode: %lu "
516 "We can find blocks but we want to add a "
517 "delayed/hole extent [%d/%d/%llu/%llx]\n",
518 inode->i_ino, es->es_lblk, es->es_len,
519 ext4_es_pblock(es), ext4_es_status(es));
520 return;
521 } else if (ext4_es_is_written(es)) {
522 if (retval != es->es_len) {
523 pr_warn("ES insert assertation failed for "
524 "inode: %lu retval %d != es_len %d\n",
525 inode->i_ino, retval, es->es_len);
526 return;
527 }
528 if (map.m_pblk != ext4_es_pblock(es)) {
529 pr_warn("ES insert assertation failed for "
530 "inode: %lu m_pblk %llu != "
531 "es_pblk %llu\n",
532 inode->i_ino, map.m_pblk,
533 ext4_es_pblock(es));
534 return;
535 }
536 } else {
537 /*
538 * We don't need to check unwritten extent because
539 * indirect-based file doesn't have it.
540 */
541 BUG_ON(1);
542 }
543 } else if (retval == 0) {
544 if (ext4_es_is_written(es)) {
545 pr_warn("ES insert assertation failed for inode: %lu "
546 "We can't find the block but we want to add "
547 "an written extent [%d/%d/%llu/%llx]\n",
548 inode->i_ino, es->es_lblk, es->es_len,
549 ext4_es_pblock(es), ext4_es_status(es));
550 return;
551 }
552 }
553}
554
555static inline void ext4_es_insert_extent_check(struct inode *inode,
556 struct extent_status *es)
557{
558 /*
559 * We don't need to worry about the race condition because
560 * caller takes i_data_sem locking.
561 */
562 BUG_ON(!rwsem_is_locked(&EXT4_I(inode)->i_data_sem));
563 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
564 ext4_es_insert_extent_ext_check(inode, es);
565 else
566 ext4_es_insert_extent_ind_check(inode, es);
567}
568#else
569static inline void ext4_es_insert_extent_check(struct inode *inode,
570 struct extent_status *es)
571{
572}
573#endif
574
392static int __es_insert_extent(struct inode *inode, struct extent_status *newes) 575static int __es_insert_extent(struct inode *inode, struct extent_status *newes)
393{ 576{
394 struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree; 577 struct ext4_es_tree *tree = &EXT4_I(inode)->i_es_tree;
@@ -471,6 +654,8 @@ int ext4_es_insert_extent(struct inode *inode, ext4_lblk_t lblk,
471 ext4_es_store_status(&newes, status); 654 ext4_es_store_status(&newes, status);
472 trace_ext4_es_insert_extent(inode, &newes); 655 trace_ext4_es_insert_extent(inode, &newes);
473 656
657 ext4_es_insert_extent_check(inode, &newes);
658
474 write_lock(&EXT4_I(inode)->i_es_lock); 659 write_lock(&EXT4_I(inode)->i_es_lock);
475 err = __es_remove_extent(inode, lblk, end); 660 err = __es_remove_extent(inode, lblk, end);
476 if (err != 0) 661 if (err != 0)
@@ -669,6 +854,23 @@ int ext4_es_remove_extent(struct inode *inode, ext4_lblk_t lblk,
669 return err; 854 return err;
670} 855}
671 856
857int ext4_es_zeroout(struct inode *inode, struct ext4_extent *ex)
858{
859 ext4_lblk_t ee_block;
860 ext4_fsblk_t ee_pblock;
861 unsigned int ee_len;
862
863 ee_block = le32_to_cpu(ex->ee_block);
864 ee_len = ext4_ext_get_actual_len(ex);
865 ee_pblock = ext4_ext_pblock(ex);
866
867 if (ee_len == 0)
868 return 0;
869
870 return ext4_es_insert_extent(inode, ee_block, ee_len, ee_pblock,
871 EXTENT_STATUS_WRITTEN);
872}
873
672static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc) 874static int ext4_es_shrink(struct shrinker *shrink, struct shrink_control *sc)
673{ 875{
674 struct ext4_sb_info *sbi = container_of(shrink, 876 struct ext4_sb_info *sbi = container_of(shrink,
diff --git a/fs/ext4/extents_status.h b/fs/ext4/extents_status.h
index f190dfe969da..d8e2d4dc311e 100644
--- a/fs/ext4/extents_status.h
+++ b/fs/ext4/extents_status.h
@@ -21,6 +21,12 @@
21#endif 21#endif
22 22
23/* 23/*
24 * With ES_AGGRESSIVE_TEST defined, the result of es caching will be
25 * checked with old map_block's result.
26 */
27#define ES_AGGRESSIVE_TEST__
28
29/*
24 * These flags live in the high bits of extent_status.es_pblk 30 * These flags live in the high bits of extent_status.es_pblk
25 */ 31 */
26#define EXTENT_STATUS_WRITTEN (1ULL << 63) 32#define EXTENT_STATUS_WRITTEN (1ULL << 63)
@@ -33,6 +39,8 @@
33 EXTENT_STATUS_DELAYED | \ 39 EXTENT_STATUS_DELAYED | \
34 EXTENT_STATUS_HOLE) 40 EXTENT_STATUS_HOLE)
35 41
42struct ext4_extent;
43
36struct extent_status { 44struct extent_status {
37 struct rb_node rb_node; 45 struct rb_node rb_node;
38 ext4_lblk_t es_lblk; /* first logical block extent covers */ 46 ext4_lblk_t es_lblk; /* first logical block extent covers */
@@ -58,6 +66,7 @@ extern void ext4_es_find_delayed_extent(struct inode *inode, ext4_lblk_t lblk,
58 struct extent_status *es); 66 struct extent_status *es);
59extern int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk, 67extern int ext4_es_lookup_extent(struct inode *inode, ext4_lblk_t lblk,
60 struct extent_status *es); 68 struct extent_status *es);
69extern int ext4_es_zeroout(struct inode *inode, struct ext4_extent *ex);
61 70
62static inline int ext4_es_is_written(struct extent_status *es) 71static inline int ext4_es_is_written(struct extent_status *es)
63{ 72{
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 32fd2b9075dd..6c5bb8d993fe 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -324,8 +324,8 @@ error_return:
324} 324}
325 325
326struct orlov_stats { 326struct orlov_stats {
327 __u64 free_clusters;
327 __u32 free_inodes; 328 __u32 free_inodes;
328 __u32 free_clusters;
329 __u32 used_dirs; 329 __u32 used_dirs;
330}; 330};
331 331
@@ -342,7 +342,7 @@ static void get_orlov_stats(struct super_block *sb, ext4_group_t g,
342 342
343 if (flex_size > 1) { 343 if (flex_size > 1) {
344 stats->free_inodes = atomic_read(&flex_group[g].free_inodes); 344 stats->free_inodes = atomic_read(&flex_group[g].free_inodes);
345 stats->free_clusters = atomic_read(&flex_group[g].free_clusters); 345 stats->free_clusters = atomic64_read(&flex_group[g].free_clusters);
346 stats->used_dirs = atomic_read(&flex_group[g].used_dirs); 346 stats->used_dirs = atomic_read(&flex_group[g].used_dirs);
347 return; 347 return;
348 } 348 }
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 9ea0cde3fa9e..b3a5213bc73e 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -185,8 +185,6 @@ void ext4_evict_inode(struct inode *inode)
185 185
186 trace_ext4_evict_inode(inode); 186 trace_ext4_evict_inode(inode);
187 187
188 ext4_ioend_wait(inode);
189
190 if (inode->i_nlink) { 188 if (inode->i_nlink) {
191 /* 189 /*
192 * When journalling data dirty buffers are tracked only in the 190 * When journalling data dirty buffers are tracked only in the
@@ -207,7 +205,8 @@ void ext4_evict_inode(struct inode *inode)
207 * don't use page cache. 205 * don't use page cache.
208 */ 206 */
209 if (ext4_should_journal_data(inode) && 207 if (ext4_should_journal_data(inode) &&
210 (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode))) { 208 (S_ISLNK(inode->i_mode) || S_ISREG(inode->i_mode)) &&
209 inode->i_ino != EXT4_JOURNAL_INO) {
211 journal_t *journal = EXT4_SB(inode->i_sb)->s_journal; 210 journal_t *journal = EXT4_SB(inode->i_sb)->s_journal;
212 tid_t commit_tid = EXT4_I(inode)->i_datasync_tid; 211 tid_t commit_tid = EXT4_I(inode)->i_datasync_tid;
213 212
@@ -216,6 +215,7 @@ void ext4_evict_inode(struct inode *inode)
216 filemap_write_and_wait(&inode->i_data); 215 filemap_write_and_wait(&inode->i_data);
217 } 216 }
218 truncate_inode_pages(&inode->i_data, 0); 217 truncate_inode_pages(&inode->i_data, 0);
218 ext4_ioend_shutdown(inode);
219 goto no_delete; 219 goto no_delete;
220 } 220 }
221 221
@@ -225,6 +225,7 @@ void ext4_evict_inode(struct inode *inode)
225 if (ext4_should_order_data(inode)) 225 if (ext4_should_order_data(inode))
226 ext4_begin_ordered_truncate(inode, 0); 226 ext4_begin_ordered_truncate(inode, 0);
227 truncate_inode_pages(&inode->i_data, 0); 227 truncate_inode_pages(&inode->i_data, 0);
228 ext4_ioend_shutdown(inode);
228 229
229 if (is_bad_inode(inode)) 230 if (is_bad_inode(inode))
230 goto no_delete; 231 goto no_delete;
@@ -482,6 +483,58 @@ static pgoff_t ext4_num_dirty_pages(struct inode *inode, pgoff_t idx,
482 return num; 483 return num;
483} 484}
484 485
486#ifdef ES_AGGRESSIVE_TEST
487static void ext4_map_blocks_es_recheck(handle_t *handle,
488 struct inode *inode,
489 struct ext4_map_blocks *es_map,
490 struct ext4_map_blocks *map,
491 int flags)
492{
493 int retval;
494
495 map->m_flags = 0;
496 /*
497 * There is a race window that the result is not the same.
498 * e.g. xfstests #223 when dioread_nolock enables. The reason
499 * is that we lookup a block mapping in extent status tree with
500 * out taking i_data_sem. So at the time the unwritten extent
501 * could be converted.
502 */
503 if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
504 down_read((&EXT4_I(inode)->i_data_sem));
505 if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) {
506 retval = ext4_ext_map_blocks(handle, inode, map, flags &
507 EXT4_GET_BLOCKS_KEEP_SIZE);
508 } else {
509 retval = ext4_ind_map_blocks(handle, inode, map, flags &
510 EXT4_GET_BLOCKS_KEEP_SIZE);
511 }
512 if (!(flags & EXT4_GET_BLOCKS_NO_LOCK))
513 up_read((&EXT4_I(inode)->i_data_sem));
514 /*
515 * Clear EXT4_MAP_FROM_CLUSTER and EXT4_MAP_BOUNDARY flag
516 * because it shouldn't be marked in es_map->m_flags.
517 */
518 map->m_flags &= ~(EXT4_MAP_FROM_CLUSTER | EXT4_MAP_BOUNDARY);
519
520 /*
521 * We don't check m_len because extent will be collpased in status
522 * tree. So the m_len might not equal.
523 */
524 if (es_map->m_lblk != map->m_lblk ||
525 es_map->m_flags != map->m_flags ||
526 es_map->m_pblk != map->m_pblk) {
527 printk("ES cache assertation failed for inode: %lu "
528 "es_cached ex [%d/%d/%llu/%x] != "
529 "found ex [%d/%d/%llu/%x] retval %d flags %x\n",
530 inode->i_ino, es_map->m_lblk, es_map->m_len,
531 es_map->m_pblk, es_map->m_flags, map->m_lblk,
532 map->m_len, map->m_pblk, map->m_flags,
533 retval, flags);
534 }
535}
536#endif /* ES_AGGRESSIVE_TEST */
537
485/* 538/*
486 * The ext4_map_blocks() function tries to look up the requested blocks, 539 * The ext4_map_blocks() function tries to look up the requested blocks,
487 * and returns if the blocks are already mapped. 540 * and returns if the blocks are already mapped.
@@ -509,6 +562,11 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
509{ 562{
510 struct extent_status es; 563 struct extent_status es;
511 int retval; 564 int retval;
565#ifdef ES_AGGRESSIVE_TEST
566 struct ext4_map_blocks orig_map;
567
568 memcpy(&orig_map, map, sizeof(*map));
569#endif
512 570
513 map->m_flags = 0; 571 map->m_flags = 0;
514 ext_debug("ext4_map_blocks(): inode %lu, flag %d, max_blocks %u," 572 ext_debug("ext4_map_blocks(): inode %lu, flag %d, max_blocks %u,"
@@ -531,6 +589,10 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
531 } else { 589 } else {
532 BUG_ON(1); 590 BUG_ON(1);
533 } 591 }
592#ifdef ES_AGGRESSIVE_TEST
593 ext4_map_blocks_es_recheck(handle, inode, map,
594 &orig_map, flags);
595#endif
534 goto found; 596 goto found;
535 } 597 }
536 598
@@ -551,6 +613,15 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
551 int ret; 613 int ret;
552 unsigned long long status; 614 unsigned long long status;
553 615
616#ifdef ES_AGGRESSIVE_TEST
617 if (retval != map->m_len) {
618 printk("ES len assertation failed for inode: %lu "
619 "retval %d != map->m_len %d "
620 "in %s (lookup)\n", inode->i_ino, retval,
621 map->m_len, __func__);
622 }
623#endif
624
554 status = map->m_flags & EXT4_MAP_UNWRITTEN ? 625 status = map->m_flags & EXT4_MAP_UNWRITTEN ?
555 EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; 626 EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
556 if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) && 627 if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) &&
@@ -643,6 +714,24 @@ found:
643 int ret; 714 int ret;
644 unsigned long long status; 715 unsigned long long status;
645 716
717#ifdef ES_AGGRESSIVE_TEST
718 if (retval != map->m_len) {
719 printk("ES len assertation failed for inode: %lu "
720 "retval %d != map->m_len %d "
721 "in %s (allocation)\n", inode->i_ino, retval,
722 map->m_len, __func__);
723 }
724#endif
725
726 /*
727 * If the extent has been zeroed out, we don't need to update
728 * extent status tree.
729 */
730 if ((flags & EXT4_GET_BLOCKS_PRE_IO) &&
731 ext4_es_lookup_extent(inode, map->m_lblk, &es)) {
732 if (ext4_es_is_written(&es))
733 goto has_zeroout;
734 }
646 status = map->m_flags & EXT4_MAP_UNWRITTEN ? 735 status = map->m_flags & EXT4_MAP_UNWRITTEN ?
647 EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; 736 EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
648 if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) && 737 if (!(flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE) &&
@@ -655,6 +744,7 @@ found:
655 retval = ret; 744 retval = ret;
656 } 745 }
657 746
747has_zeroout:
658 up_write((&EXT4_I(inode)->i_data_sem)); 748 up_write((&EXT4_I(inode)->i_data_sem));
659 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) { 749 if (retval > 0 && map->m_flags & EXT4_MAP_MAPPED) {
660 int ret = check_block_validity(inode, map); 750 int ret = check_block_validity(inode, map);
@@ -1216,6 +1306,55 @@ static int ext4_journalled_write_end(struct file *file,
1216} 1306}
1217 1307
1218/* 1308/*
1309 * Reserve a metadata for a single block located at lblock
1310 */
1311static int ext4_da_reserve_metadata(struct inode *inode, ext4_lblk_t lblock)
1312{
1313 int retries = 0;
1314 struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
1315 struct ext4_inode_info *ei = EXT4_I(inode);
1316 unsigned int md_needed;
1317 ext4_lblk_t save_last_lblock;
1318 int save_len;
1319
1320 /*
1321 * recalculate the amount of metadata blocks to reserve
1322 * in order to allocate nrblocks
1323 * worse case is one extent per block
1324 */
1325repeat:
1326 spin_lock(&ei->i_block_reservation_lock);
1327 /*
1328 * ext4_calc_metadata_amount() has side effects, which we have
1329 * to be prepared undo if we fail to claim space.
1330 */
1331 save_len = ei->i_da_metadata_calc_len;
1332 save_last_lblock = ei->i_da_metadata_calc_last_lblock;
1333 md_needed = EXT4_NUM_B2C(sbi,
1334 ext4_calc_metadata_amount(inode, lblock));
1335 trace_ext4_da_reserve_space(inode, md_needed);
1336
1337 /*
1338 * We do still charge estimated metadata to the sb though;
1339 * we cannot afford to run out of free blocks.
1340 */
1341 if (ext4_claim_free_clusters(sbi, md_needed, 0)) {
1342 ei->i_da_metadata_calc_len = save_len;
1343 ei->i_da_metadata_calc_last_lblock = save_last_lblock;
1344 spin_unlock(&ei->i_block_reservation_lock);
1345 if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
1346 cond_resched();
1347 goto repeat;
1348 }
1349 return -ENOSPC;
1350 }
1351 ei->i_reserved_meta_blocks += md_needed;
1352 spin_unlock(&ei->i_block_reservation_lock);
1353
1354 return 0; /* success */
1355}
1356
1357/*
1219 * Reserve a single cluster located at lblock 1358 * Reserve a single cluster located at lblock
1220 */ 1359 */
1221static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock) 1360static int ext4_da_reserve_space(struct inode *inode, ext4_lblk_t lblock)
@@ -1263,7 +1402,7 @@ repeat:
1263 ei->i_da_metadata_calc_last_lblock = save_last_lblock; 1402 ei->i_da_metadata_calc_last_lblock = save_last_lblock;
1264 spin_unlock(&ei->i_block_reservation_lock); 1403 spin_unlock(&ei->i_block_reservation_lock);
1265 if (ext4_should_retry_alloc(inode->i_sb, &retries)) { 1404 if (ext4_should_retry_alloc(inode->i_sb, &retries)) {
1266 yield(); 1405 cond_resched();
1267 goto repeat; 1406 goto repeat;
1268 } 1407 }
1269 dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1)); 1408 dquot_release_reservation_block(inode, EXT4_C2B(sbi, 1));
@@ -1768,6 +1907,11 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
1768 struct extent_status es; 1907 struct extent_status es;
1769 int retval; 1908 int retval;
1770 sector_t invalid_block = ~((sector_t) 0xffff); 1909 sector_t invalid_block = ~((sector_t) 0xffff);
1910#ifdef ES_AGGRESSIVE_TEST
1911 struct ext4_map_blocks orig_map;
1912
1913 memcpy(&orig_map, map, sizeof(*map));
1914#endif
1771 1915
1772 if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es)) 1916 if (invalid_block < ext4_blocks_count(EXT4_SB(inode->i_sb)->s_es))
1773 invalid_block = ~0; 1917 invalid_block = ~0;
@@ -1809,6 +1953,9 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
1809 else 1953 else
1810 BUG_ON(1); 1954 BUG_ON(1);
1811 1955
1956#ifdef ES_AGGRESSIVE_TEST
1957 ext4_map_blocks_es_recheck(NULL, inode, map, &orig_map, 0);
1958#endif
1812 return retval; 1959 return retval;
1813 } 1960 }
1814 1961
@@ -1843,8 +1990,11 @@ add_delayed:
1843 * XXX: __block_prepare_write() unmaps passed block, 1990 * XXX: __block_prepare_write() unmaps passed block,
1844 * is it OK? 1991 * is it OK?
1845 */ 1992 */
1846 /* If the block was allocated from previously allocated cluster, 1993 /*
1847 * then we dont need to reserve it again. */ 1994 * If the block was allocated from previously allocated cluster,
1995 * then we don't need to reserve it again. However we still need
1996 * to reserve metadata for every block we're going to write.
1997 */
1848 if (!(map->m_flags & EXT4_MAP_FROM_CLUSTER)) { 1998 if (!(map->m_flags & EXT4_MAP_FROM_CLUSTER)) {
1849 ret = ext4_da_reserve_space(inode, iblock); 1999 ret = ext4_da_reserve_space(inode, iblock);
1850 if (ret) { 2000 if (ret) {
@@ -1852,6 +2002,13 @@ add_delayed:
1852 retval = ret; 2002 retval = ret;
1853 goto out_unlock; 2003 goto out_unlock;
1854 } 2004 }
2005 } else {
2006 ret = ext4_da_reserve_metadata(inode, iblock);
2007 if (ret) {
2008 /* not enough space to reserve */
2009 retval = ret;
2010 goto out_unlock;
2011 }
1855 } 2012 }
1856 2013
1857 ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len, 2014 ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
@@ -1873,6 +2030,15 @@ add_delayed:
1873 int ret; 2030 int ret;
1874 unsigned long long status; 2031 unsigned long long status;
1875 2032
2033#ifdef ES_AGGRESSIVE_TEST
2034 if (retval != map->m_len) {
2035 printk("ES len assertation failed for inode: %lu "
2036 "retval %d != map->m_len %d "
2037 "in %s (lookup)\n", inode->i_ino, retval,
2038 map->m_len, __func__);
2039 }
2040#endif
2041
1876 status = map->m_flags & EXT4_MAP_UNWRITTEN ? 2042 status = map->m_flags & EXT4_MAP_UNWRITTEN ?
1877 EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN; 2043 EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
1878 ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len, 2044 ret = ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
@@ -2908,8 +3074,8 @@ static int ext4_releasepage(struct page *page, gfp_t wait)
2908 3074
2909 trace_ext4_releasepage(page); 3075 trace_ext4_releasepage(page);
2910 3076
2911 WARN_ON(PageChecked(page)); 3077 /* Page has dirty journalled data -> cannot release */
2912 if (!page_has_buffers(page)) 3078 if (PageChecked(page))
2913 return 0; 3079 return 0;
2914 if (journal) 3080 if (journal)
2915 return jbd2_journal_try_to_free_buffers(journal, page, wait); 3081 return jbd2_journal_try_to_free_buffers(journal, page, wait);
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 7bb713a46fe4..ee6614bdb639 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2804,8 +2804,8 @@ ext4_mb_mark_diskspace_used(struct ext4_allocation_context *ac,
2804 if (sbi->s_log_groups_per_flex) { 2804 if (sbi->s_log_groups_per_flex) {
2805 ext4_group_t flex_group = ext4_flex_group(sbi, 2805 ext4_group_t flex_group = ext4_flex_group(sbi,
2806 ac->ac_b_ex.fe_group); 2806 ac->ac_b_ex.fe_group);
2807 atomic_sub(ac->ac_b_ex.fe_len, 2807 atomic64_sub(ac->ac_b_ex.fe_len,
2808 &sbi->s_flex_groups[flex_group].free_clusters); 2808 &sbi->s_flex_groups[flex_group].free_clusters);
2809 } 2809 }
2810 2810
2811 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh); 2811 err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
@@ -3692,11 +3692,7 @@ repeat:
3692 if (free < needed && busy) { 3692 if (free < needed && busy) {
3693 busy = 0; 3693 busy = 0;
3694 ext4_unlock_group(sb, group); 3694 ext4_unlock_group(sb, group);
3695 /* 3695 cond_resched();
3696 * Yield the CPU here so that we don't get soft lockup
3697 * in non preempt case.
3698 */
3699 yield();
3700 goto repeat; 3696 goto repeat;
3701 } 3697 }
3702 3698
@@ -4246,7 +4242,7 @@ ext4_fsblk_t ext4_mb_new_blocks(handle_t *handle,
4246 ext4_claim_free_clusters(sbi, ar->len, ar->flags)) { 4242 ext4_claim_free_clusters(sbi, ar->len, ar->flags)) {
4247 4243
4248 /* let others to free the space */ 4244 /* let others to free the space */
4249 yield(); 4245 cond_resched();
4250 ar->len = ar->len >> 1; 4246 ar->len = ar->len >> 1;
4251 } 4247 }
4252 if (!ar->len) { 4248 if (!ar->len) {
@@ -4464,7 +4460,6 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
4464 struct buffer_head *bitmap_bh = NULL; 4460 struct buffer_head *bitmap_bh = NULL;
4465 struct super_block *sb = inode->i_sb; 4461 struct super_block *sb = inode->i_sb;
4466 struct ext4_group_desc *gdp; 4462 struct ext4_group_desc *gdp;
4467 unsigned long freed = 0;
4468 unsigned int overflow; 4463 unsigned int overflow;
4469 ext4_grpblk_t bit; 4464 ext4_grpblk_t bit;
4470 struct buffer_head *gd_bh; 4465 struct buffer_head *gd_bh;
@@ -4666,14 +4661,12 @@ do_more:
4666 4661
4667 if (sbi->s_log_groups_per_flex) { 4662 if (sbi->s_log_groups_per_flex) {
4668 ext4_group_t flex_group = ext4_flex_group(sbi, block_group); 4663 ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
4669 atomic_add(count_clusters, 4664 atomic64_add(count_clusters,
4670 &sbi->s_flex_groups[flex_group].free_clusters); 4665 &sbi->s_flex_groups[flex_group].free_clusters);
4671 } 4666 }
4672 4667
4673 ext4_mb_unload_buddy(&e4b); 4668 ext4_mb_unload_buddy(&e4b);
4674 4669
4675 freed += count;
4676
4677 if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE)) 4670 if (!(flags & EXT4_FREE_BLOCKS_NO_QUOT_UPDATE))
4678 dquot_free_block(inode, EXT4_C2B(sbi, count_clusters)); 4671 dquot_free_block(inode, EXT4_C2B(sbi, count_clusters));
4679 4672
@@ -4811,8 +4804,8 @@ int ext4_group_add_blocks(handle_t *handle, struct super_block *sb,
4811 4804
4812 if (sbi->s_log_groups_per_flex) { 4805 if (sbi->s_log_groups_per_flex) {
4813 ext4_group_t flex_group = ext4_flex_group(sbi, block_group); 4806 ext4_group_t flex_group = ext4_flex_group(sbi, block_group);
4814 atomic_add(EXT4_NUM_B2C(sbi, blocks_freed), 4807 atomic64_add(EXT4_NUM_B2C(sbi, blocks_freed),
4815 &sbi->s_flex_groups[flex_group].free_clusters); 4808 &sbi->s_flex_groups[flex_group].free_clusters);
4816 } 4809 }
4817 4810
4818 ext4_mb_unload_buddy(&e4b); 4811 ext4_mb_unload_buddy(&e4b);
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 4e81d47aa8cb..33e1c086858b 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -32,16 +32,18 @@
32 */ 32 */
33static inline int 33static inline int
34get_ext_path(struct inode *inode, ext4_lblk_t lblock, 34get_ext_path(struct inode *inode, ext4_lblk_t lblock,
35 struct ext4_ext_path **path) 35 struct ext4_ext_path **orig_path)
36{ 36{
37 int ret = 0; 37 int ret = 0;
38 struct ext4_ext_path *path;
38 39
39 *path = ext4_ext_find_extent(inode, lblock, *path); 40 path = ext4_ext_find_extent(inode, lblock, *orig_path);
40 if (IS_ERR(*path)) { 41 if (IS_ERR(path))
41 ret = PTR_ERR(*path); 42 ret = PTR_ERR(path);
42 *path = NULL; 43 else if (path[ext_depth(inode)].p_ext == NULL)
43 } else if ((*path)[ext_depth(inode)].p_ext == NULL)
44 ret = -ENODATA; 44 ret = -ENODATA;
45 else
46 *orig_path = path;
45 47
46 return ret; 48 return ret;
47} 49}
@@ -611,24 +613,25 @@ mext_check_coverage(struct inode *inode, ext4_lblk_t from, ext4_lblk_t count,
611{ 613{
612 struct ext4_ext_path *path = NULL; 614 struct ext4_ext_path *path = NULL;
613 struct ext4_extent *ext; 615 struct ext4_extent *ext;
616 int ret = 0;
614 ext4_lblk_t last = from + count; 617 ext4_lblk_t last = from + count;
615 while (from < last) { 618 while (from < last) {
616 *err = get_ext_path(inode, from, &path); 619 *err = get_ext_path(inode, from, &path);
617 if (*err) 620 if (*err)
618 return 0; 621 goto out;
619 ext = path[ext_depth(inode)].p_ext; 622 ext = path[ext_depth(inode)].p_ext;
620 if (!ext) { 623 if (uninit != ext4_ext_is_uninitialized(ext))
621 ext4_ext_drop_refs(path); 624 goto out;
622 return 0;
623 }
624 if (uninit != ext4_ext_is_uninitialized(ext)) {
625 ext4_ext_drop_refs(path);
626 return 0;
627 }
628 from += ext4_ext_get_actual_len(ext); 625 from += ext4_ext_get_actual_len(ext);
629 ext4_ext_drop_refs(path); 626 ext4_ext_drop_refs(path);
630 } 627 }
631 return 1; 628 ret = 1;
629out:
630 if (path) {
631 ext4_ext_drop_refs(path);
632 kfree(path);
633 }
634 return ret;
632} 635}
633 636
634/** 637/**
@@ -666,6 +669,14 @@ mext_replace_branches(handle_t *handle, struct inode *orig_inode,
666 int replaced_count = 0; 669 int replaced_count = 0;
667 int dext_alen; 670 int dext_alen;
668 671
672 *err = ext4_es_remove_extent(orig_inode, from, count);
673 if (*err)
674 goto out;
675
676 *err = ext4_es_remove_extent(donor_inode, from, count);
677 if (*err)
678 goto out;
679
669 /* Get the original extent for the block "orig_off" */ 680 /* Get the original extent for the block "orig_off" */
670 *err = get_ext_path(orig_inode, orig_off, &orig_path); 681 *err = get_ext_path(orig_inode, orig_off, &orig_path);
671 if (*err) 682 if (*err)
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 809b31003ecc..047a6de04a0a 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -50,11 +50,21 @@ void ext4_exit_pageio(void)
50 kmem_cache_destroy(io_page_cachep); 50 kmem_cache_destroy(io_page_cachep);
51} 51}
52 52
53void ext4_ioend_wait(struct inode *inode) 53/*
54 * This function is called by ext4_evict_inode() to make sure there is
55 * no more pending I/O completion work left to do.
56 */
57void ext4_ioend_shutdown(struct inode *inode)
54{ 58{
55 wait_queue_head_t *wq = ext4_ioend_wq(inode); 59 wait_queue_head_t *wq = ext4_ioend_wq(inode);
56 60
57 wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_ioend_count) == 0)); 61 wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_ioend_count) == 0));
62 /*
63 * We need to make sure the work structure is finished being
64 * used before we let the inode get destroyed.
65 */
66 if (work_pending(&EXT4_I(inode)->i_unwritten_work))
67 cancel_work_sync(&EXT4_I(inode)->i_unwritten_work);
58} 68}
59 69
60static void put_io_page(struct ext4_io_page *io_page) 70static void put_io_page(struct ext4_io_page *io_page)
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index b2c8ee56eb98..c169477a62c9 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -1360,8 +1360,8 @@ static void ext4_update_super(struct super_block *sb,
1360 sbi->s_log_groups_per_flex) { 1360 sbi->s_log_groups_per_flex) {
1361 ext4_group_t flex_group; 1361 ext4_group_t flex_group;
1362 flex_group = ext4_flex_group(sbi, group_data[0].group); 1362 flex_group = ext4_flex_group(sbi, group_data[0].group);
1363 atomic_add(EXT4_NUM_B2C(sbi, free_blocks), 1363 atomic64_add(EXT4_NUM_B2C(sbi, free_blocks),
1364 &sbi->s_flex_groups[flex_group].free_clusters); 1364 &sbi->s_flex_groups[flex_group].free_clusters);
1365 atomic_add(EXT4_INODES_PER_GROUP(sb) * flex_gd->count, 1365 atomic_add(EXT4_INODES_PER_GROUP(sb) * flex_gd->count,
1366 &sbi->s_flex_groups[flex_group].free_inodes); 1366 &sbi->s_flex_groups[flex_group].free_inodes);
1367 } 1367 }
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index b3818b48f418..5d6d53578124 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -1927,8 +1927,8 @@ static int ext4_fill_flex_info(struct super_block *sb)
1927 flex_group = ext4_flex_group(sbi, i); 1927 flex_group = ext4_flex_group(sbi, i);
1928 atomic_add(ext4_free_inodes_count(sb, gdp), 1928 atomic_add(ext4_free_inodes_count(sb, gdp),
1929 &sbi->s_flex_groups[flex_group].free_inodes); 1929 &sbi->s_flex_groups[flex_group].free_inodes);
1930 atomic_add(ext4_free_group_clusters(sb, gdp), 1930 atomic64_add(ext4_free_group_clusters(sb, gdp),
1931 &sbi->s_flex_groups[flex_group].free_clusters); 1931 &sbi->s_flex_groups[flex_group].free_clusters);
1932 atomic_add(ext4_used_dirs_count(sb, gdp), 1932 atomic_add(ext4_used_dirs_count(sb, gdp),
1933 &sbi->s_flex_groups[flex_group].used_dirs); 1933 &sbi->s_flex_groups[flex_group].used_dirs);
1934 } 1934 }