aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2014-04-20 23:43:47 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2014-04-20 23:43:47 -0400
commit9ac03675010a69507c0a9d832d6a722e07d35cc6 (patch)
tree63902dc9fae9c0710fd4450839a82def9930d8d7 /fs
parenta798c10faf62a505d24e5f6213fbaf904a39623f (diff)
parent0a04b248532b358b27a8da050642da6f5f304b03 (diff)
Merge tag 'ext4_for_linus_stable' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4
Pull ext4 fixes from Ted Ts'o: "These are regression and bug fixes for ext4. We had a number of new features in ext4 during this merge window (ZERO_RANGE and COLLAPSE_RANGE fallocate modes, renameat, etc.) so there were many more regression and bug fixes this time around. It didn't help that xfstests hadn't been fully updated to fully stress test COLLAPSE_RANGE until after -rc1" * tag 'ext4_for_linus_stable' of git://git.kernel.org/pub/scm/linux/kernel/git/tytso/ext4: (31 commits) ext4: disable COLLAPSE_RANGE for bigalloc ext4: fix COLLAPSE_RANGE failure with 1KB block size ext4: use EINVAL if not a regular file in ext4_collapse_range() ext4: enforce we are operating on a regular file in ext4_zero_range() ext4: fix extent merging in ext4_ext_shift_path_extents() ext4: discard preallocations after removing space ext4: no need to truncate pagecache twice in collapse range ext4: fix removing status extents in ext4_collapse_range() ext4: use filemap_write_and_wait_range() correctly in collapse range ext4: use truncate_pagecache() in collapse range ext4: remove temporary shim used to merge COLLAPSE_RANGE and ZERO_RANGE ext4: fix ext4_count_free_clusters() with EXT4FS_DEBUG and bigalloc enabled ext4: always check ext4_ext_find_extent result ext4: fix error handling in ext4_ext_shift_extents ext4: silence sparse check warning for function ext4_trim_extent ext4: COLLAPSE_RANGE only works on extent-based files ext4: fix byte order problems introduced by the COLLAPSE_RANGE patches ext4: use i_size_read in ext4_unaligned_aio() fs: disallow all fallocate operation on active swapfile fs: move falloc collapse range check into the filesystem methods ...
Diffstat (limited to 'fs')
-rw-r--r--fs/ceph/file.c3
-rw-r--r--fs/ext4/balloc.c2
-rw-r--r--fs/ext4/ext4.h17
-rw-r--r--fs/ext4/extents.c109
-rw-r--r--fs/ext4/extents_status.c2
-rw-r--r--fs/ext4/file.c2
-rw-r--r--fs/ext4/inode.c53
-rw-r--r--fs/ext4/mballoc.c18
-rw-r--r--fs/ext4/page-io.c5
-rw-r--r--fs/ext4/super.c51
-rw-r--r--fs/ext4/xattr.c23
-rw-r--r--fs/open.c21
-rw-r--r--fs/xfs/xfs_file.c10
13 files changed, 188 insertions, 128 deletions
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index 39da1c2efa50..88a6df4cbe6d 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -1221,9 +1221,6 @@ static long ceph_fallocate(struct file *file, int mode,
1221 if (!S_ISREG(inode->i_mode)) 1221 if (!S_ISREG(inode->i_mode))
1222 return -EOPNOTSUPP; 1222 return -EOPNOTSUPP;
1223 1223
1224 if (IS_SWAPFILE(inode))
1225 return -ETXTBSY;
1226
1227 mutex_lock(&inode->i_mutex); 1224 mutex_lock(&inode->i_mutex);
1228 1225
1229 if (ceph_snap(inode) != CEPH_NOSNAP) { 1226 if (ceph_snap(inode) != CEPH_NOSNAP) {
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index 6ea7b1436bbc..5c56785007e0 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -667,7 +667,7 @@ ext4_fsblk_t ext4_count_free_clusters(struct super_block *sb)
667 continue; 667 continue;
668 668
669 x = ext4_count_free(bitmap_bh->b_data, 669 x = ext4_count_free(bitmap_bh->b_data,
670 EXT4_BLOCKS_PER_GROUP(sb) / 8); 670 EXT4_CLUSTERS_PER_GROUP(sb) / 8);
671 printk(KERN_DEBUG "group %u: stored = %d, counted = %u\n", 671 printk(KERN_DEBUG "group %u: stored = %d, counted = %u\n",
672 i, ext4_free_group_clusters(sb, gdp), x); 672 i, ext4_free_group_clusters(sb, gdp), x);
673 bitmap_count += x; 673 bitmap_count += x;
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index f1c65dc7cc0a..66946aa62127 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2466,23 +2466,6 @@ static inline void ext4_update_i_disksize(struct inode *inode, loff_t newsize)
2466 up_write(&EXT4_I(inode)->i_data_sem); 2466 up_write(&EXT4_I(inode)->i_data_sem);
2467} 2467}
2468 2468
2469/*
2470 * Update i_disksize after writeback has been started. Races with truncate
2471 * are avoided by checking i_size under i_data_sem.
2472 */
2473static inline void ext4_wb_update_i_disksize(struct inode *inode, loff_t newsize)
2474{
2475 loff_t i_size;
2476
2477 down_write(&EXT4_I(inode)->i_data_sem);
2478 i_size = i_size_read(inode);
2479 if (newsize > i_size)
2480 newsize = i_size;
2481 if (newsize > EXT4_I(inode)->i_disksize)
2482 EXT4_I(inode)->i_disksize = newsize;
2483 up_write(&EXT4_I(inode)->i_data_sem);
2484}
2485
2486struct ext4_group_info { 2469struct ext4_group_info {
2487 unsigned long bb_state; 2470 unsigned long bb_state;
2488 struct rb_root bb_free_root; 2471 struct rb_root bb_free_root;
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 82df3ce9874a..01b0c208f625 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3313,6 +3313,11 @@ static int ext4_split_extent(handle_t *handle,
3313 return PTR_ERR(path); 3313 return PTR_ERR(path);
3314 depth = ext_depth(inode); 3314 depth = ext_depth(inode);
3315 ex = path[depth].p_ext; 3315 ex = path[depth].p_ext;
3316 if (!ex) {
3317 EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
3318 (unsigned long) map->m_lblk);
3319 return -EIO;
3320 }
3316 uninitialized = ext4_ext_is_uninitialized(ex); 3321 uninitialized = ext4_ext_is_uninitialized(ex);
3317 split_flag1 = 0; 3322 split_flag1 = 0;
3318 3323
@@ -3694,6 +3699,12 @@ static int ext4_convert_initialized_extents(handle_t *handle,
3694 } 3699 }
3695 depth = ext_depth(inode); 3700 depth = ext_depth(inode);
3696 ex = path[depth].p_ext; 3701 ex = path[depth].p_ext;
3702 if (!ex) {
3703 EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
3704 (unsigned long) map->m_lblk);
3705 err = -EIO;
3706 goto out;
3707 }
3697 } 3708 }
3698 3709
3699 err = ext4_ext_get_access(handle, inode, path + depth); 3710 err = ext4_ext_get_access(handle, inode, path + depth);
@@ -4730,6 +4741,9 @@ static long ext4_zero_range(struct file *file, loff_t offset,
4730 4741
4731 trace_ext4_zero_range(inode, offset, len, mode); 4742 trace_ext4_zero_range(inode, offset, len, mode);
4732 4743
4744 if (!S_ISREG(inode->i_mode))
4745 return -EINVAL;
4746
4733 /* 4747 /*
4734 * Write out all dirty pages to avoid race conditions 4748 * Write out all dirty pages to avoid race conditions
4735 * Then release them. 4749 * Then release them.
@@ -4878,9 +4892,6 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
4878 if (mode & FALLOC_FL_PUNCH_HOLE) 4892 if (mode & FALLOC_FL_PUNCH_HOLE)
4879 return ext4_punch_hole(inode, offset, len); 4893 return ext4_punch_hole(inode, offset, len);
4880 4894
4881 if (mode & FALLOC_FL_COLLAPSE_RANGE)
4882 return ext4_collapse_range(inode, offset, len);
4883
4884 ret = ext4_convert_inline_data(inode); 4895 ret = ext4_convert_inline_data(inode);
4885 if (ret) 4896 if (ret)
4886 return ret; 4897 return ret;
@@ -4892,6 +4903,9 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
4892 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) 4903 if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
4893 return -EOPNOTSUPP; 4904 return -EOPNOTSUPP;
4894 4905
4906 if (mode & FALLOC_FL_COLLAPSE_RANGE)
4907 return ext4_collapse_range(inode, offset, len);
4908
4895 if (mode & FALLOC_FL_ZERO_RANGE) 4909 if (mode & FALLOC_FL_ZERO_RANGE)
4896 return ext4_zero_range(file, offset, len, mode); 4910 return ext4_zero_range(file, offset, len, mode);
4897 4911
@@ -5229,18 +5243,19 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,
5229 if (ex_start == EXT_FIRST_EXTENT(path[depth].p_hdr)) 5243 if (ex_start == EXT_FIRST_EXTENT(path[depth].p_hdr))
5230 update = 1; 5244 update = 1;
5231 5245
5232 *start = ex_last->ee_block + 5246 *start = le32_to_cpu(ex_last->ee_block) +
5233 ext4_ext_get_actual_len(ex_last); 5247 ext4_ext_get_actual_len(ex_last);
5234 5248
5235 while (ex_start <= ex_last) { 5249 while (ex_start <= ex_last) {
5236 ex_start->ee_block -= shift; 5250 le32_add_cpu(&ex_start->ee_block, -shift);
5237 if (ex_start > 5251 /* Try to merge to the left. */
5238 EXT_FIRST_EXTENT(path[depth].p_hdr)) { 5252 if ((ex_start >
5239 if (ext4_ext_try_to_merge_right(inode, 5253 EXT_FIRST_EXTENT(path[depth].p_hdr)) &&
5240 path, ex_start - 1)) 5254 ext4_ext_try_to_merge_right(inode,
5241 ex_last--; 5255 path, ex_start - 1))
5242 } 5256 ex_last--;
5243 ex_start++; 5257 else
5258 ex_start++;
5244 } 5259 }
5245 err = ext4_ext_dirty(handle, inode, path + depth); 5260 err = ext4_ext_dirty(handle, inode, path + depth);
5246 if (err) 5261 if (err)
@@ -5255,7 +5270,7 @@ ext4_ext_shift_path_extents(struct ext4_ext_path *path, ext4_lblk_t shift,
5255 if (err) 5270 if (err)
5256 goto out; 5271 goto out;
5257 5272
5258 path[depth].p_idx->ei_block -= shift; 5273 le32_add_cpu(&path[depth].p_idx->ei_block, -shift);
5259 err = ext4_ext_dirty(handle, inode, path + depth); 5274 err = ext4_ext_dirty(handle, inode, path + depth);
5260 if (err) 5275 if (err)
5261 goto out; 5276 goto out;
@@ -5300,7 +5315,8 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
5300 return ret; 5315 return ret;
5301 } 5316 }
5302 5317
5303 stop_block = extent->ee_block + ext4_ext_get_actual_len(extent); 5318 stop_block = le32_to_cpu(extent->ee_block) +
5319 ext4_ext_get_actual_len(extent);
5304 ext4_ext_drop_refs(path); 5320 ext4_ext_drop_refs(path);
5305 kfree(path); 5321 kfree(path);
5306 5322
@@ -5313,10 +5329,18 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
5313 * enough to accomodate the shift. 5329 * enough to accomodate the shift.
5314 */ 5330 */
5315 path = ext4_ext_find_extent(inode, start - 1, NULL, 0); 5331 path = ext4_ext_find_extent(inode, start - 1, NULL, 0);
5332 if (IS_ERR(path))
5333 return PTR_ERR(path);
5316 depth = path->p_depth; 5334 depth = path->p_depth;
5317 extent = path[depth].p_ext; 5335 extent = path[depth].p_ext;
5318 ex_start = extent->ee_block; 5336 if (extent) {
5319 ex_end = extent->ee_block + ext4_ext_get_actual_len(extent); 5337 ex_start = le32_to_cpu(extent->ee_block);
5338 ex_end = le32_to_cpu(extent->ee_block) +
5339 ext4_ext_get_actual_len(extent);
5340 } else {
5341 ex_start = 0;
5342 ex_end = 0;
5343 }
5320 ext4_ext_drop_refs(path); 5344 ext4_ext_drop_refs(path);
5321 kfree(path); 5345 kfree(path);
5322 5346
@@ -5331,7 +5355,13 @@ ext4_ext_shift_extents(struct inode *inode, handle_t *handle,
5331 return PTR_ERR(path); 5355 return PTR_ERR(path);
5332 depth = path->p_depth; 5356 depth = path->p_depth;
5333 extent = path[depth].p_ext; 5357 extent = path[depth].p_ext;
5334 current_block = extent->ee_block; 5358 if (!extent) {
5359 EXT4_ERROR_INODE(inode, "unexpected hole at %lu",
5360 (unsigned long) start);
5361 return -EIO;
5362 }
5363
5364 current_block = le32_to_cpu(extent->ee_block);
5335 if (start > current_block) { 5365 if (start > current_block) {
5336 /* Hole, move to the next extent */ 5366 /* Hole, move to the next extent */
5337 ret = mext_next_extent(inode, path, &extent); 5367 ret = mext_next_extent(inode, path, &extent);
@@ -5365,17 +5395,18 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
5365 ext4_lblk_t punch_start, punch_stop; 5395 ext4_lblk_t punch_start, punch_stop;
5366 handle_t *handle; 5396 handle_t *handle;
5367 unsigned int credits; 5397 unsigned int credits;
5368 loff_t new_size; 5398 loff_t new_size, ioffset;
5369 int ret; 5399 int ret;
5370 5400
5371 BUG_ON(offset + len > i_size_read(inode));
5372
5373 /* Collapse range works only on fs block size aligned offsets. */ 5401 /* Collapse range works only on fs block size aligned offsets. */
5374 if (offset & (EXT4_BLOCK_SIZE(sb) - 1) || 5402 if (offset & (EXT4_BLOCK_SIZE(sb) - 1) ||
5375 len & (EXT4_BLOCK_SIZE(sb) - 1)) 5403 len & (EXT4_BLOCK_SIZE(sb) - 1))
5376 return -EINVAL; 5404 return -EINVAL;
5377 5405
5378 if (!S_ISREG(inode->i_mode)) 5406 if (!S_ISREG(inode->i_mode))
5407 return -EINVAL;
5408
5409 if (EXT4_SB(inode->i_sb)->s_cluster_ratio > 1)
5379 return -EOPNOTSUPP; 5410 return -EOPNOTSUPP;
5380 5411
5381 trace_ext4_collapse_range(inode, offset, len); 5412 trace_ext4_collapse_range(inode, offset, len);
@@ -5383,22 +5414,34 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
5383 punch_start = offset >> EXT4_BLOCK_SIZE_BITS(sb); 5414 punch_start = offset >> EXT4_BLOCK_SIZE_BITS(sb);
5384 punch_stop = (offset + len) >> EXT4_BLOCK_SIZE_BITS(sb); 5415 punch_stop = (offset + len) >> EXT4_BLOCK_SIZE_BITS(sb);
5385 5416
5417 /* Call ext4_force_commit to flush all data in case of data=journal. */
5418 if (ext4_should_journal_data(inode)) {
5419 ret = ext4_force_commit(inode->i_sb);
5420 if (ret)
5421 return ret;
5422 }
5423
5424 /*
5425 * Need to round down offset to be aligned with page size boundary
5426 * for page size > block size.
5427 */
5428 ioffset = round_down(offset, PAGE_SIZE);
5429
5386 /* Write out all dirty pages */ 5430 /* Write out all dirty pages */
5387 ret = filemap_write_and_wait_range(inode->i_mapping, offset, -1); 5431 ret = filemap_write_and_wait_range(inode->i_mapping, ioffset,
5432 LLONG_MAX);
5388 if (ret) 5433 if (ret)
5389 return ret; 5434 return ret;
5390 5435
5391 /* Take mutex lock */ 5436 /* Take mutex lock */
5392 mutex_lock(&inode->i_mutex); 5437 mutex_lock(&inode->i_mutex);
5393 5438
5394 /* It's not possible punch hole on append only file */ 5439 /*
5395 if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) { 5440 * There is no need to overlap collapse range with EOF, in which case
5396 ret = -EPERM; 5441 * it is effectively a truncate operation
5397 goto out_mutex; 5442 */
5398 } 5443 if (offset + len >= i_size_read(inode)) {
5399 5444 ret = -EINVAL;
5400 if (IS_SWAPFILE(inode)) {
5401 ret = -ETXTBSY;
5402 goto out_mutex; 5445 goto out_mutex;
5403 } 5446 }
5404 5447
@@ -5408,7 +5451,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
5408 goto out_mutex; 5451 goto out_mutex;
5409 } 5452 }
5410 5453
5411 truncate_pagecache_range(inode, offset, -1); 5454 truncate_pagecache(inode, ioffset);
5412 5455
5413 /* Wait for existing dio to complete */ 5456 /* Wait for existing dio to complete */
5414 ext4_inode_block_unlocked_dio(inode); 5457 ext4_inode_block_unlocked_dio(inode);
@@ -5425,7 +5468,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
5425 ext4_discard_preallocations(inode); 5468 ext4_discard_preallocations(inode);
5426 5469
5427 ret = ext4_es_remove_extent(inode, punch_start, 5470 ret = ext4_es_remove_extent(inode, punch_start,
5428 EXT_MAX_BLOCKS - punch_start - 1); 5471 EXT_MAX_BLOCKS - punch_start);
5429 if (ret) { 5472 if (ret) {
5430 up_write(&EXT4_I(inode)->i_data_sem); 5473 up_write(&EXT4_I(inode)->i_data_sem);
5431 goto out_stop; 5474 goto out_stop;
@@ -5436,6 +5479,7 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
5436 up_write(&EXT4_I(inode)->i_data_sem); 5479 up_write(&EXT4_I(inode)->i_data_sem);
5437 goto out_stop; 5480 goto out_stop;
5438 } 5481 }
5482 ext4_discard_preallocations(inode);
5439 5483
5440 ret = ext4_ext_shift_extents(inode, handle, punch_stop, 5484 ret = ext4_ext_shift_extents(inode, handle, punch_stop,
5441 punch_stop - punch_start); 5485 punch_stop - punch_start);
@@ -5445,10 +5489,9 @@ int ext4_collapse_range(struct inode *inode, loff_t offset, loff_t len)
5445 } 5489 }
5446 5490
5447 new_size = i_size_read(inode) - len; 5491 new_size = i_size_read(inode) - len;
5448 truncate_setsize(inode, new_size); 5492 i_size_write(inode, new_size);
5449 EXT4_I(inode)->i_disksize = new_size; 5493 EXT4_I(inode)->i_disksize = new_size;
5450 5494
5451 ext4_discard_preallocations(inode);
5452 up_write(&EXT4_I(inode)->i_data_sem); 5495 up_write(&EXT4_I(inode)->i_data_sem);
5453 if (IS_SYNC(inode)) 5496 if (IS_SYNC(inode))
5454 ext4_handle_sync(handle); 5497 ext4_handle_sync(handle);
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index 0a014a7194b2..0ebc21204b51 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -810,7 +810,7 @@ retry:
810 810
811 newes.es_lblk = end + 1; 811 newes.es_lblk = end + 1;
812 newes.es_len = len2; 812 newes.es_len = len2;
813 block = 0x7FDEADBEEF; 813 block = 0x7FDEADBEEFULL;
814 if (ext4_es_is_written(&orig_es) || 814 if (ext4_es_is_written(&orig_es) ||
815 ext4_es_is_unwritten(&orig_es)) 815 ext4_es_is_unwritten(&orig_es))
816 block = ext4_es_pblock(&orig_es) + 816 block = ext4_es_pblock(&orig_es) +
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index ca7502d89fde..063fc1538355 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -82,7 +82,7 @@ ext4_unaligned_aio(struct inode *inode, const struct iovec *iov,
82 size_t count = iov_length(iov, nr_segs); 82 size_t count = iov_length(iov, nr_segs);
83 loff_t final_size = pos + count; 83 loff_t final_size = pos + count;
84 84
85 if (pos >= inode->i_size) 85 if (pos >= i_size_read(inode))
86 return 0; 86 return 0;
87 87
88 if ((pos & blockmask) || (final_size & blockmask)) 88 if ((pos & blockmask) || (final_size & blockmask))
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 5b0d2c7d5408..d7b7462a0e13 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -522,6 +522,10 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
522 if (unlikely(map->m_len > INT_MAX)) 522 if (unlikely(map->m_len > INT_MAX))
523 map->m_len = INT_MAX; 523 map->m_len = INT_MAX;
524 524
525 /* We can handle the block number less than EXT_MAX_BLOCKS */
526 if (unlikely(map->m_lblk >= EXT_MAX_BLOCKS))
527 return -EIO;
528
525 /* Lookup extent status tree firstly */ 529 /* Lookup extent status tree firstly */
526 if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) { 530 if (ext4_es_lookup_extent(inode, map->m_lblk, &es)) {
527 ext4_es_lru_add(inode); 531 ext4_es_lru_add(inode);
@@ -2243,13 +2247,23 @@ static int mpage_map_and_submit_extent(handle_t *handle,
2243 return err; 2247 return err;
2244 } while (map->m_len); 2248 } while (map->m_len);
2245 2249
2246 /* Update on-disk size after IO is submitted */ 2250 /*
2251 * Update on-disk size after IO is submitted. Races with
2252 * truncate are avoided by checking i_size under i_data_sem.
2253 */
2247 disksize = ((loff_t)mpd->first_page) << PAGE_CACHE_SHIFT; 2254 disksize = ((loff_t)mpd->first_page) << PAGE_CACHE_SHIFT;
2248 if (disksize > EXT4_I(inode)->i_disksize) { 2255 if (disksize > EXT4_I(inode)->i_disksize) {
2249 int err2; 2256 int err2;
2250 2257 loff_t i_size;
2251 ext4_wb_update_i_disksize(inode, disksize); 2258
2259 down_write(&EXT4_I(inode)->i_data_sem);
2260 i_size = i_size_read(inode);
2261 if (disksize > i_size)
2262 disksize = i_size;
2263 if (disksize > EXT4_I(inode)->i_disksize)
2264 EXT4_I(inode)->i_disksize = disksize;
2252 err2 = ext4_mark_inode_dirty(handle, inode); 2265 err2 = ext4_mark_inode_dirty(handle, inode);
2266 up_write(&EXT4_I(inode)->i_data_sem);
2253 if (err2) 2267 if (err2)
2254 ext4_error(inode->i_sb, 2268 ext4_error(inode->i_sb,
2255 "Failed to mark inode %lu dirty", 2269 "Failed to mark inode %lu dirty",
@@ -3527,15 +3541,6 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
3527 } 3541 }
3528 3542
3529 mutex_lock(&inode->i_mutex); 3543 mutex_lock(&inode->i_mutex);
3530 /* It's not possible punch hole on append only file */
3531 if (IS_APPEND(inode) || IS_IMMUTABLE(inode)) {
3532 ret = -EPERM;
3533 goto out_mutex;
3534 }
3535 if (IS_SWAPFILE(inode)) {
3536 ret = -ETXTBSY;
3537 goto out_mutex;
3538 }
3539 3544
3540 /* No need to punch hole beyond i_size */ 3545 /* No need to punch hole beyond i_size */
3541 if (offset >= inode->i_size) 3546 if (offset >= inode->i_size)
@@ -3616,7 +3621,6 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
3616 ret = ext4_free_hole_blocks(handle, inode, first_block, 3621 ret = ext4_free_hole_blocks(handle, inode, first_block,
3617 stop_block); 3622 stop_block);
3618 3623
3619 ext4_discard_preallocations(inode);
3620 up_write(&EXT4_I(inode)->i_data_sem); 3624 up_write(&EXT4_I(inode)->i_data_sem);
3621 if (IS_SYNC(inode)) 3625 if (IS_SYNC(inode))
3622 ext4_handle_sync(handle); 3626 ext4_handle_sync(handle);
@@ -4423,21 +4427,20 @@ out_brelse:
4423 * 4427 *
4424 * We are called from a few places: 4428 * We are called from a few places:
4425 * 4429 *
4426 * - Within generic_file_write() for O_SYNC files. 4430 * - Within generic_file_aio_write() -> generic_write_sync() for O_SYNC files.
4427 * Here, there will be no transaction running. We wait for any running 4431 * Here, there will be no transaction running. We wait for any running
4428 * transaction to commit. 4432 * transaction to commit.
4429 * 4433 *
4430 * - Within sys_sync(), kupdate and such. 4434 * - Within flush work (sys_sync(), kupdate and such).
4431 * We wait on commit, if tol to. 4435 * We wait on commit, if told to.
4432 * 4436 *
4433 * - Within prune_icache() (PF_MEMALLOC == true) 4437 * - Within iput_final() -> write_inode_now()
4434 * Here we simply return. We can't afford to block kswapd on the 4438 * We wait on commit, if told to.
4435 * journal commit.
4436 * 4439 *
4437 * In all cases it is actually safe for us to return without doing anything, 4440 * In all cases it is actually safe for us to return without doing anything,
4438 * because the inode has been copied into a raw inode buffer in 4441 * because the inode has been copied into a raw inode buffer in
4439 * ext4_mark_inode_dirty(). This is a correctness thing for O_SYNC and for 4442 * ext4_mark_inode_dirty(). This is a correctness thing for WB_SYNC_ALL
4440 * knfsd. 4443 * writeback.
4441 * 4444 *
4442 * Note that we are absolutely dependent upon all inode dirtiers doing the 4445 * Note that we are absolutely dependent upon all inode dirtiers doing the
4443 * right thing: they *must* call mark_inode_dirty() after dirtying info in 4446 * right thing: they *must* call mark_inode_dirty() after dirtying info in
@@ -4449,15 +4452,15 @@ out_brelse:
4449 * stuff(); 4452 * stuff();
4450 * inode->i_size = expr; 4453 * inode->i_size = expr;
4451 * 4454 *
4452 * is in error because a kswapd-driven write_inode() could occur while 4455 * is in error because write_inode() could occur while `stuff()' is running,
4453 * `stuff()' is running, and the new i_size will be lost. Plus the inode 4456 * and the new i_size will be lost. Plus the inode will no longer be on the
4454 * will no longer be on the superblock's dirty inode list. 4457 * superblock's dirty inode list.
4455 */ 4458 */
4456int ext4_write_inode(struct inode *inode, struct writeback_control *wbc) 4459int ext4_write_inode(struct inode *inode, struct writeback_control *wbc)
4457{ 4460{
4458 int err; 4461 int err;
4459 4462
4460 if (current->flags & PF_MEMALLOC) 4463 if (WARN_ON_ONCE(current->flags & PF_MEMALLOC))
4461 return 0; 4464 return 0;
4462 4465
4463 if (EXT4_SB(inode->i_sb)->s_journal) { 4466 if (EXT4_SB(inode->i_sb)->s_journal) {
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index a888cac76e9c..c8238a26818c 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -989,7 +989,7 @@ static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
989 poff = block % blocks_per_page; 989 poff = block % blocks_per_page;
990 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); 990 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
991 if (!page) 991 if (!page)
992 return -EIO; 992 return -ENOMEM;
993 BUG_ON(page->mapping != inode->i_mapping); 993 BUG_ON(page->mapping != inode->i_mapping);
994 e4b->bd_bitmap_page = page; 994 e4b->bd_bitmap_page = page;
995 e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize); 995 e4b->bd_bitmap = page_address(page) + (poff * sb->s_blocksize);
@@ -1003,7 +1003,7 @@ static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
1003 pnum = block / blocks_per_page; 1003 pnum = block / blocks_per_page;
1004 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS); 1004 page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
1005 if (!page) 1005 if (!page)
1006 return -EIO; 1006 return -ENOMEM;
1007 BUG_ON(page->mapping != inode->i_mapping); 1007 BUG_ON(page->mapping != inode->i_mapping);
1008 e4b->bd_buddy_page = page; 1008 e4b->bd_buddy_page = page;
1009 return 0; 1009 return 0;
@@ -1168,7 +1168,11 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
1168 unlock_page(page); 1168 unlock_page(page);
1169 } 1169 }
1170 } 1170 }
1171 if (page == NULL || !PageUptodate(page)) { 1171 if (page == NULL) {
1172 ret = -ENOMEM;
1173 goto err;
1174 }
1175 if (!PageUptodate(page)) {
1172 ret = -EIO; 1176 ret = -EIO;
1173 goto err; 1177 goto err;
1174 } 1178 }
@@ -1197,7 +1201,11 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
1197 unlock_page(page); 1201 unlock_page(page);
1198 } 1202 }
1199 } 1203 }
1200 if (page == NULL || !PageUptodate(page)) { 1204 if (page == NULL) {
1205 ret = -ENOMEM;
1206 goto err;
1207 }
1208 if (!PageUptodate(page)) {
1201 ret = -EIO; 1209 ret = -EIO;
1202 goto err; 1210 goto err;
1203 } 1211 }
@@ -5008,6 +5016,8 @@ error_return:
5008 */ 5016 */
5009static int ext4_trim_extent(struct super_block *sb, int start, int count, 5017static int ext4_trim_extent(struct super_block *sb, int start, int count,
5010 ext4_group_t group, struct ext4_buddy *e4b) 5018 ext4_group_t group, struct ext4_buddy *e4b)
5019__releases(bitlock)
5020__acquires(bitlock)
5011{ 5021{
5012 struct ext4_free_extent ex; 5022 struct ext4_free_extent ex;
5013 int ret = 0; 5023 int ret = 0;
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index ab95508e3d40..c18d95b50540 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -308,13 +308,14 @@ static void ext4_end_bio(struct bio *bio, int error)
308 if (error) { 308 if (error) {
309 struct inode *inode = io_end->inode; 309 struct inode *inode = io_end->inode;
310 310
311 ext4_warning(inode->i_sb, "I/O error writing to inode %lu " 311 ext4_warning(inode->i_sb, "I/O error %d writing to inode %lu "
312 "(offset %llu size %ld starting block %llu)", 312 "(offset %llu size %ld starting block %llu)",
313 inode->i_ino, 313 error, inode->i_ino,
314 (unsigned long long) io_end->offset, 314 (unsigned long long) io_end->offset,
315 (long) io_end->size, 315 (long) io_end->size,
316 (unsigned long long) 316 (unsigned long long)
317 bi_sector >> (inode->i_blkbits - 9)); 317 bi_sector >> (inode->i_blkbits - 9));
318 mapping_set_error(inode->i_mapping, error);
318 } 319 }
319 320
320 if (io_end->flag & EXT4_IO_END_UNWRITTEN) { 321 if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index f3c667091618..6f9e6fadac04 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3869,19 +3869,38 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3869 goto failed_mount2; 3869 goto failed_mount2;
3870 } 3870 }
3871 } 3871 }
3872
3873 /*
3874 * set up enough so that it can read an inode,
3875 * and create new inode for buddy allocator
3876 */
3877 sbi->s_gdb_count = db_count;
3878 if (!test_opt(sb, NOLOAD) &&
3879 EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL))
3880 sb->s_op = &ext4_sops;
3881 else
3882 sb->s_op = &ext4_nojournal_sops;
3883
3884 ext4_ext_init(sb);
3885 err = ext4_mb_init(sb);
3886 if (err) {
3887 ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)",
3888 err);
3889 goto failed_mount2;
3890 }
3891
3872 if (!ext4_check_descriptors(sb, &first_not_zeroed)) { 3892 if (!ext4_check_descriptors(sb, &first_not_zeroed)) {
3873 ext4_msg(sb, KERN_ERR, "group descriptors corrupted!"); 3893 ext4_msg(sb, KERN_ERR, "group descriptors corrupted!");
3874 goto failed_mount2; 3894 goto failed_mount2a;
3875 } 3895 }
3876 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG)) 3896 if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_FLEX_BG))
3877 if (!ext4_fill_flex_info(sb)) { 3897 if (!ext4_fill_flex_info(sb)) {
3878 ext4_msg(sb, KERN_ERR, 3898 ext4_msg(sb, KERN_ERR,
3879 "unable to initialize " 3899 "unable to initialize "
3880 "flex_bg meta info!"); 3900 "flex_bg meta info!");
3881 goto failed_mount2; 3901 goto failed_mount2a;
3882 } 3902 }
3883 3903
3884 sbi->s_gdb_count = db_count;
3885 get_random_bytes(&sbi->s_next_generation, sizeof(u32)); 3904 get_random_bytes(&sbi->s_next_generation, sizeof(u32));
3886 spin_lock_init(&sbi->s_next_gen_lock); 3905 spin_lock_init(&sbi->s_next_gen_lock);
3887 3906
@@ -3916,14 +3935,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
3916 sbi->s_stripe = ext4_get_stripe_size(sbi); 3935 sbi->s_stripe = ext4_get_stripe_size(sbi);
3917 sbi->s_extent_max_zeroout_kb = 32; 3936 sbi->s_extent_max_zeroout_kb = 32;
3918 3937
3919 /*
3920 * set up enough so that it can read an inode
3921 */
3922 if (!test_opt(sb, NOLOAD) &&
3923 EXT4_HAS_COMPAT_FEATURE(sb, EXT4_FEATURE_COMPAT_HAS_JOURNAL))
3924 sb->s_op = &ext4_sops;
3925 else
3926 sb->s_op = &ext4_nojournal_sops;
3927 sb->s_export_op = &ext4_export_ops; 3938 sb->s_export_op = &ext4_export_ops;
3928 sb->s_xattr = ext4_xattr_handlers; 3939 sb->s_xattr = ext4_xattr_handlers;
3929#ifdef CONFIG_QUOTA 3940#ifdef CONFIG_QUOTA
@@ -4113,21 +4124,13 @@ no_journal:
4113 if (err) { 4124 if (err) {
4114 ext4_msg(sb, KERN_ERR, "failed to reserve %llu clusters for " 4125 ext4_msg(sb, KERN_ERR, "failed to reserve %llu clusters for "
4115 "reserved pool", ext4_calculate_resv_clusters(sb)); 4126 "reserved pool", ext4_calculate_resv_clusters(sb));
4116 goto failed_mount4a; 4127 goto failed_mount5;
4117 } 4128 }
4118 4129
4119 err = ext4_setup_system_zone(sb); 4130 err = ext4_setup_system_zone(sb);
4120 if (err) { 4131 if (err) {
4121 ext4_msg(sb, KERN_ERR, "failed to initialize system " 4132 ext4_msg(sb, KERN_ERR, "failed to initialize system "
4122 "zone (%d)", err); 4133 "zone (%d)", err);
4123 goto failed_mount4a;
4124 }
4125
4126 ext4_ext_init(sb);
4127 err = ext4_mb_init(sb);
4128 if (err) {
4129 ext4_msg(sb, KERN_ERR, "failed to initialize mballoc (%d)",
4130 err);
4131 goto failed_mount5; 4134 goto failed_mount5;
4132 } 4135 }
4133 4136
@@ -4204,11 +4207,8 @@ failed_mount8:
4204failed_mount7: 4207failed_mount7:
4205 ext4_unregister_li_request(sb); 4208 ext4_unregister_li_request(sb);
4206failed_mount6: 4209failed_mount6:
4207 ext4_mb_release(sb);
4208failed_mount5:
4209 ext4_ext_release(sb);
4210 ext4_release_system_zone(sb); 4210 ext4_release_system_zone(sb);
4211failed_mount4a: 4211failed_mount5:
4212 dput(sb->s_root); 4212 dput(sb->s_root);
4213 sb->s_root = NULL; 4213 sb->s_root = NULL;
4214failed_mount4: 4214failed_mount4:
@@ -4232,11 +4232,14 @@ failed_mount3:
4232 percpu_counter_destroy(&sbi->s_extent_cache_cnt); 4232 percpu_counter_destroy(&sbi->s_extent_cache_cnt);
4233 if (sbi->s_mmp_tsk) 4233 if (sbi->s_mmp_tsk)
4234 kthread_stop(sbi->s_mmp_tsk); 4234 kthread_stop(sbi->s_mmp_tsk);
4235failed_mount2a:
4236 ext4_mb_release(sb);
4235failed_mount2: 4237failed_mount2:
4236 for (i = 0; i < db_count; i++) 4238 for (i = 0; i < db_count; i++)
4237 brelse(sbi->s_group_desc[i]); 4239 brelse(sbi->s_group_desc[i]);
4238 ext4_kvfree(sbi->s_group_desc); 4240 ext4_kvfree(sbi->s_group_desc);
4239failed_mount: 4241failed_mount:
4242 ext4_ext_release(sb);
4240 if (sbi->s_chksum_driver) 4243 if (sbi->s_chksum_driver)
4241 crypto_free_shash(sbi->s_chksum_driver); 4244 crypto_free_shash(sbi->s_chksum_driver);
4242 if (sbi->s_proc) { 4245 if (sbi->s_proc) {
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index 1f5cf5880718..4eec399ec807 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -520,8 +520,8 @@ static void ext4_xattr_update_super_block(handle_t *handle,
520} 520}
521 521
522/* 522/*
523 * Release the xattr block BH: If the reference count is > 1, decrement 523 * Release the xattr block BH: If the reference count is > 1, decrement it;
524 * it; otherwise free the block. 524 * otherwise free the block.
525 */ 525 */
526static void 526static void
527ext4_xattr_release_block(handle_t *handle, struct inode *inode, 527ext4_xattr_release_block(handle_t *handle, struct inode *inode,
@@ -542,16 +542,31 @@ ext4_xattr_release_block(handle_t *handle, struct inode *inode,
542 if (ce) 542 if (ce)
543 mb_cache_entry_free(ce); 543 mb_cache_entry_free(ce);
544 get_bh(bh); 544 get_bh(bh);
545 unlock_buffer(bh);
545 ext4_free_blocks(handle, inode, bh, 0, 1, 546 ext4_free_blocks(handle, inode, bh, 0, 1,
546 EXT4_FREE_BLOCKS_METADATA | 547 EXT4_FREE_BLOCKS_METADATA |
547 EXT4_FREE_BLOCKS_FORGET); 548 EXT4_FREE_BLOCKS_FORGET);
548 unlock_buffer(bh);
549 } else { 549 } else {
550 le32_add_cpu(&BHDR(bh)->h_refcount, -1); 550 le32_add_cpu(&BHDR(bh)->h_refcount, -1);
551 if (ce) 551 if (ce)
552 mb_cache_entry_release(ce); 552 mb_cache_entry_release(ce);
553 /*
554 * Beware of this ugliness: Releasing of xattr block references
555 * from different inodes can race and so we have to protect
556 * from a race where someone else frees the block (and releases
557 * its journal_head) before we are done dirtying the buffer. In
558 * nojournal mode this race is harmless and we actually cannot
559 * call ext4_handle_dirty_xattr_block() with locked buffer as
560 * that function can call sync_dirty_buffer() so for that case
561 * we handle the dirtying after unlocking the buffer.
562 */
563 if (ext4_handle_valid(handle))
564 error = ext4_handle_dirty_xattr_block(handle, inode,
565 bh);
553 unlock_buffer(bh); 566 unlock_buffer(bh);
554 error = ext4_handle_dirty_xattr_block(handle, inode, bh); 567 if (!ext4_handle_valid(handle))
568 error = ext4_handle_dirty_xattr_block(handle, inode,
569 bh);
555 if (IS_SYNC(inode)) 570 if (IS_SYNC(inode))
556 ext4_handle_sync(handle); 571 ext4_handle_sync(handle);
557 dquot_free_block(inode, EXT4_C2B(EXT4_SB(inode->i_sb), 1)); 572 dquot_free_block(inode, EXT4_C2B(EXT4_SB(inode->i_sb), 1));
diff --git a/fs/open.c b/fs/open.c
index 3d30eb1fc95e..9d64679cec73 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -254,17 +254,22 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
254 return -EBADF; 254 return -EBADF;
255 255
256 /* 256 /*
257 * It's not possible to punch hole or perform collapse range 257 * We can only allow pure fallocate on append only files
258 * on append only file
259 */ 258 */
260 if (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE) 259 if ((mode & ~FALLOC_FL_KEEP_SIZE) && IS_APPEND(inode))
261 && IS_APPEND(inode))
262 return -EPERM; 260 return -EPERM;
263 261
264 if (IS_IMMUTABLE(inode)) 262 if (IS_IMMUTABLE(inode))
265 return -EPERM; 263 return -EPERM;
266 264
267 /* 265 /*
266 * We can not allow to do any fallocate operation on an active
267 * swapfile
268 */
269 if (IS_SWAPFILE(inode))
270 ret = -ETXTBSY;
271
272 /*
268 * Revalidate the write permissions, in case security policy has 273 * Revalidate the write permissions, in case security policy has
269 * changed since the files were opened. 274 * changed since the files were opened.
270 */ 275 */
@@ -286,14 +291,6 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
286 if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0)) 291 if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0))
287 return -EFBIG; 292 return -EFBIG;
288 293
289 /*
290 * There is no need to overlap collapse range with EOF, in which case
291 * it is effectively a truncate operation
292 */
293 if ((mode & FALLOC_FL_COLLAPSE_RANGE) &&
294 (offset + len >= i_size_read(inode)))
295 return -EINVAL;
296
297 if (!file->f_op->fallocate) 294 if (!file->f_op->fallocate)
298 return -EOPNOTSUPP; 295 return -EOPNOTSUPP;
299 296
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 82afdcb33183..951a2321ee01 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -841,7 +841,15 @@ xfs_file_fallocate(
841 goto out_unlock; 841 goto out_unlock;
842 } 842 }
843 843
844 ASSERT(offset + len < i_size_read(inode)); 844 /*
845 * There is no need to overlap collapse range with EOF,
846 * in which case it is effectively a truncate operation
847 */
848 if (offset + len >= i_size_read(inode)) {
849 error = -EINVAL;
850 goto out_unlock;
851 }
852
845 new_size = i_size_read(inode) - len; 853 new_size = i_size_read(inode) - len;
846 854
847 error = xfs_collapse_file_space(ip, offset, len); 855 error = xfs_collapse_file_space(ip, offset, len);