aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/extent-tree.c28
-rw-r--r--fs/btrfs/extent_map.c14
-rw-r--r--fs/btrfs/extent_map.h1
-rw-r--r--fs/btrfs/file-item.c4
-rw-r--r--fs/btrfs/file.c35
-rw-r--r--fs/btrfs/free-space-cache.c20
-rw-r--r--fs/btrfs/inode.c137
-rw-r--r--fs/btrfs/ioctl.c134
-rw-r--r--fs/btrfs/ordered-data.c13
-rw-r--r--fs/btrfs/qgroup.c20
-rw-r--r--fs/btrfs/scrub.c25
-rw-r--r--fs/btrfs/send.c4
-rw-r--r--fs/btrfs/super.c2
-rw-r--r--fs/btrfs/transaction.c46
-rw-r--r--fs/btrfs/tree-log.c10
-rw-r--r--fs/btrfs/volumes.c26
-rw-r--r--fs/cifs/cifs_dfs_ref.c2
-rw-r--r--fs/cifs/connect.c2
-rw-r--r--fs/dlm/user.c8
-rw-r--r--fs/gfs2/lock_dlm.c7
-rw-r--r--fs/nfs/namespace.c20
-rw-r--r--fs/nfs/nfs4client.c62
-rw-r--r--fs/nfs/nfs4state.c22
-rw-r--r--fs/nfs/super.c22
-rw-r--r--fs/nilfs2/ioctl.c5
-rw-r--r--fs/xfs/xfs_aops.c2
-rw-r--r--fs/xfs/xfs_bmap.c6
-rw-r--r--fs/xfs/xfs_buf.c20
-rw-r--r--fs/xfs/xfs_buf_item.c12
-rw-r--r--fs/xfs/xfs_dfrag.c4
-rw-r--r--fs/xfs/xfs_iomap.c9
-rw-r--r--fs/xfs/xfs_mount.c2
-rw-r--r--fs/xfs/xfs_trace.h1
33 files changed, 519 insertions, 206 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 521e9d4424f6..5a3327b8f90d 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3997,7 +3997,7 @@ again:
3997 * We make the other tasks wait for the flush only when we can flush 3997 * We make the other tasks wait for the flush only when we can flush
3998 * all things. 3998 * all things.
3999 */ 3999 */
4000 if (ret && flush == BTRFS_RESERVE_FLUSH_ALL) { 4000 if (ret && flush != BTRFS_RESERVE_NO_FLUSH) {
4001 flushing = true; 4001 flushing = true;
4002 space_info->flush = 1; 4002 space_info->flush = 1;
4003 } 4003 }
@@ -4534,7 +4534,7 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
4534 unsigned nr_extents = 0; 4534 unsigned nr_extents = 0;
4535 int extra_reserve = 0; 4535 int extra_reserve = 0;
4536 enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL; 4536 enum btrfs_reserve_flush_enum flush = BTRFS_RESERVE_FLUSH_ALL;
4537 int ret; 4537 int ret = 0;
4538 bool delalloc_lock = true; 4538 bool delalloc_lock = true;
4539 4539
4540 /* If we are a free space inode we need to not flush since we will be in 4540 /* If we are a free space inode we need to not flush since we will be in
@@ -4579,20 +4579,18 @@ int btrfs_delalloc_reserve_metadata(struct inode *inode, u64 num_bytes)
4579 csum_bytes = BTRFS_I(inode)->csum_bytes; 4579 csum_bytes = BTRFS_I(inode)->csum_bytes;
4580 spin_unlock(&BTRFS_I(inode)->lock); 4580 spin_unlock(&BTRFS_I(inode)->lock);
4581 4581
4582 if (root->fs_info->quota_enabled) { 4582 if (root->fs_info->quota_enabled)
4583 ret = btrfs_qgroup_reserve(root, num_bytes + 4583 ret = btrfs_qgroup_reserve(root, num_bytes +
4584 nr_extents * root->leafsize); 4584 nr_extents * root->leafsize);
4585 if (ret) {
4586 spin_lock(&BTRFS_I(inode)->lock);
4587 calc_csum_metadata_size(inode, num_bytes, 0);
4588 spin_unlock(&BTRFS_I(inode)->lock);
4589 if (delalloc_lock)
4590 mutex_unlock(&BTRFS_I(inode)->delalloc_mutex);
4591 return ret;
4592 }
4593 }
4594 4585
4595 ret = reserve_metadata_bytes(root, block_rsv, to_reserve, flush); 4586 /*
4587 * ret != 0 here means the qgroup reservation failed, we go straight to
4588 * the shared error handling then.
4589 */
4590 if (ret == 0)
4591 ret = reserve_metadata_bytes(root, block_rsv,
4592 to_reserve, flush);
4593
4596 if (ret) { 4594 if (ret) {
4597 u64 to_free = 0; 4595 u64 to_free = 0;
4598 unsigned dropped; 4596 unsigned dropped;
@@ -5560,7 +5558,7 @@ static noinline int find_free_extent(struct btrfs_trans_handle *trans,
5560 int empty_cluster = 2 * 1024 * 1024; 5558 int empty_cluster = 2 * 1024 * 1024;
5561 struct btrfs_space_info *space_info; 5559 struct btrfs_space_info *space_info;
5562 int loop = 0; 5560 int loop = 0;
5563 int index = 0; 5561 int index = __get_raid_index(data);
5564 int alloc_type = (data & BTRFS_BLOCK_GROUP_DATA) ? 5562 int alloc_type = (data & BTRFS_BLOCK_GROUP_DATA) ?
5565 RESERVE_ALLOC_NO_ACCOUNT : RESERVE_ALLOC; 5563 RESERVE_ALLOC_NO_ACCOUNT : RESERVE_ALLOC;
5566 bool found_uncached_bg = false; 5564 bool found_uncached_bg = false;
@@ -6788,11 +6786,13 @@ static noinline int walk_up_proc(struct btrfs_trans_handle *trans,
6788 &wc->flags[level]); 6786 &wc->flags[level]);
6789 if (ret < 0) { 6787 if (ret < 0) {
6790 btrfs_tree_unlock_rw(eb, path->locks[level]); 6788 btrfs_tree_unlock_rw(eb, path->locks[level]);
6789 path->locks[level] = 0;
6791 return ret; 6790 return ret;
6792 } 6791 }
6793 BUG_ON(wc->refs[level] == 0); 6792 BUG_ON(wc->refs[level] == 0);
6794 if (wc->refs[level] == 1) { 6793 if (wc->refs[level] == 1) {
6795 btrfs_tree_unlock_rw(eb, path->locks[level]); 6794 btrfs_tree_unlock_rw(eb, path->locks[level]);
6795 path->locks[level] = 0;
6796 return 1; 6796 return 1;
6797 } 6797 }
6798 } 6798 }
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index f169d6b11d7f..fdb7a8db3b57 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -171,6 +171,10 @@ static int mergable_maps(struct extent_map *prev, struct extent_map *next)
171 if (test_bit(EXTENT_FLAG_COMPRESSED, &prev->flags)) 171 if (test_bit(EXTENT_FLAG_COMPRESSED, &prev->flags))
172 return 0; 172 return 0;
173 173
174 if (test_bit(EXTENT_FLAG_LOGGING, &prev->flags) ||
175 test_bit(EXTENT_FLAG_LOGGING, &next->flags))
176 return 0;
177
174 if (extent_map_end(prev) == next->start && 178 if (extent_map_end(prev) == next->start &&
175 prev->flags == next->flags && 179 prev->flags == next->flags &&
176 prev->bdev == next->bdev && 180 prev->bdev == next->bdev &&
@@ -255,7 +259,8 @@ int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len,
255 if (!em) 259 if (!em)
256 goto out; 260 goto out;
257 261
258 list_move(&em->list, &tree->modified_extents); 262 if (!test_bit(EXTENT_FLAG_LOGGING, &em->flags))
263 list_move(&em->list, &tree->modified_extents);
259 em->generation = gen; 264 em->generation = gen;
260 clear_bit(EXTENT_FLAG_PINNED, &em->flags); 265 clear_bit(EXTENT_FLAG_PINNED, &em->flags);
261 em->mod_start = em->start; 266 em->mod_start = em->start;
@@ -280,6 +285,13 @@ out:
280 285
281} 286}
282 287
288void clear_em_logging(struct extent_map_tree *tree, struct extent_map *em)
289{
290 clear_bit(EXTENT_FLAG_LOGGING, &em->flags);
291 if (em->in_tree)
292 try_merge_map(tree, em);
293}
294
283/** 295/**
284 * add_extent_mapping - add new extent map to the extent tree 296 * add_extent_mapping - add new extent map to the extent tree
285 * @tree: tree to insert new map in 297 * @tree: tree to insert new map in
diff --git a/fs/btrfs/extent_map.h b/fs/btrfs/extent_map.h
index 922943ce29e8..c6598c89cff8 100644
--- a/fs/btrfs/extent_map.h
+++ b/fs/btrfs/extent_map.h
@@ -69,6 +69,7 @@ void free_extent_map(struct extent_map *em);
69int __init extent_map_init(void); 69int __init extent_map_init(void);
70void extent_map_exit(void); 70void extent_map_exit(void);
71int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len, u64 gen); 71int unpin_extent_cache(struct extent_map_tree *tree, u64 start, u64 len, u64 gen);
72void clear_em_logging(struct extent_map_tree *tree, struct extent_map *em);
72struct extent_map *search_extent_mapping(struct extent_map_tree *tree, 73struct extent_map *search_extent_mapping(struct extent_map_tree *tree,
73 u64 start, u64 len); 74 u64 start, u64 len);
74#endif 75#endif
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index bd38cef42358..94aa53b38721 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -460,8 +460,8 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
460 if (!contig) 460 if (!contig)
461 offset = page_offset(bvec->bv_page) + bvec->bv_offset; 461 offset = page_offset(bvec->bv_page) + bvec->bv_offset;
462 462
463 if (!contig && (offset >= ordered->file_offset + ordered->len || 463 if (offset >= ordered->file_offset + ordered->len ||
464 offset < ordered->file_offset)) { 464 offset < ordered->file_offset) {
465 unsigned long bytes_left; 465 unsigned long bytes_left;
466 sums->len = this_sum_bytes; 466 sums->len = this_sum_bytes;
467 this_sum_bytes = 0; 467 this_sum_bytes = 0;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 77061bf43edb..aeb84469d2c4 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -293,15 +293,24 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
293 struct btrfs_key key; 293 struct btrfs_key key;
294 struct btrfs_ioctl_defrag_range_args range; 294 struct btrfs_ioctl_defrag_range_args range;
295 int num_defrag; 295 int num_defrag;
296 int index;
297 int ret;
296 298
297 /* get the inode */ 299 /* get the inode */
298 key.objectid = defrag->root; 300 key.objectid = defrag->root;
299 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY); 301 btrfs_set_key_type(&key, BTRFS_ROOT_ITEM_KEY);
300 key.offset = (u64)-1; 302 key.offset = (u64)-1;
303
304 index = srcu_read_lock(&fs_info->subvol_srcu);
305
301 inode_root = btrfs_read_fs_root_no_name(fs_info, &key); 306 inode_root = btrfs_read_fs_root_no_name(fs_info, &key);
302 if (IS_ERR(inode_root)) { 307 if (IS_ERR(inode_root)) {
303 kmem_cache_free(btrfs_inode_defrag_cachep, defrag); 308 ret = PTR_ERR(inode_root);
304 return PTR_ERR(inode_root); 309 goto cleanup;
310 }
311 if (btrfs_root_refs(&inode_root->root_item) == 0) {
312 ret = -ENOENT;
313 goto cleanup;
305 } 314 }
306 315
307 key.objectid = defrag->ino; 316 key.objectid = defrag->ino;
@@ -309,9 +318,10 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
309 key.offset = 0; 318 key.offset = 0;
310 inode = btrfs_iget(fs_info->sb, &key, inode_root, NULL); 319 inode = btrfs_iget(fs_info->sb, &key, inode_root, NULL);
311 if (IS_ERR(inode)) { 320 if (IS_ERR(inode)) {
312 kmem_cache_free(btrfs_inode_defrag_cachep, defrag); 321 ret = PTR_ERR(inode);
313 return PTR_ERR(inode); 322 goto cleanup;
314 } 323 }
324 srcu_read_unlock(&fs_info->subvol_srcu, index);
315 325
316 /* do a chunk of defrag */ 326 /* do a chunk of defrag */
317 clear_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags); 327 clear_bit(BTRFS_INODE_IN_DEFRAG, &BTRFS_I(inode)->runtime_flags);
@@ -346,6 +356,10 @@ static int __btrfs_run_defrag_inode(struct btrfs_fs_info *fs_info,
346 356
347 iput(inode); 357 iput(inode);
348 return 0; 358 return 0;
359cleanup:
360 srcu_read_unlock(&fs_info->subvol_srcu, index);
361 kmem_cache_free(btrfs_inode_defrag_cachep, defrag);
362 return ret;
349} 363}
350 364
351/* 365/*
@@ -1594,9 +1608,10 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
1594 if (err < 0 && num_written > 0) 1608 if (err < 0 && num_written > 0)
1595 num_written = err; 1609 num_written = err;
1596 } 1610 }
1597out: 1611
1598 if (sync) 1612 if (sync)
1599 atomic_dec(&BTRFS_I(inode)->sync_writers); 1613 atomic_dec(&BTRFS_I(inode)->sync_writers);
1614out:
1600 sb_end_write(inode->i_sb); 1615 sb_end_write(inode->i_sb);
1601 current->backing_dev_info = NULL; 1616 current->backing_dev_info = NULL;
1602 return num_written ? num_written : err; 1617 return num_written ? num_written : err;
@@ -2241,6 +2256,7 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int whence)
2241 if (lockend <= lockstart) 2256 if (lockend <= lockstart)
2242 lockend = lockstart + root->sectorsize; 2257 lockend = lockstart + root->sectorsize;
2243 2258
2259 lockend--;
2244 len = lockend - lockstart + 1; 2260 len = lockend - lockstart + 1;
2245 2261
2246 len = max_t(u64, len, root->sectorsize); 2262 len = max_t(u64, len, root->sectorsize);
@@ -2307,9 +2323,12 @@ static int find_desired_extent(struct inode *inode, loff_t *offset, int whence)
2307 } 2323 }
2308 } 2324 }
2309 2325
2310 *offset = start; 2326 if (!test_bit(EXTENT_FLAG_PREALLOC,
2311 free_extent_map(em); 2327 &em->flags)) {
2312 break; 2328 *offset = start;
2329 free_extent_map(em);
2330 break;
2331 }
2313 } 2332 }
2314 } 2333 }
2315 2334
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 59ea2e4349c9..0be7a8742a43 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -1862,11 +1862,13 @@ int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
1862{ 1862{
1863 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; 1863 struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl;
1864 struct btrfs_free_space *info; 1864 struct btrfs_free_space *info;
1865 int ret = 0; 1865 int ret;
1866 bool re_search = false;
1866 1867
1867 spin_lock(&ctl->tree_lock); 1868 spin_lock(&ctl->tree_lock);
1868 1869
1869again: 1870again:
1871 ret = 0;
1870 if (!bytes) 1872 if (!bytes)
1871 goto out_lock; 1873 goto out_lock;
1872 1874
@@ -1879,17 +1881,17 @@ again:
1879 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset), 1881 info = tree_search_offset(ctl, offset_to_bitmap(ctl, offset),
1880 1, 0); 1882 1, 0);
1881 if (!info) { 1883 if (!info) {
1882 /* the tree logging code might be calling us before we 1884 /*
1883 * have fully loaded the free space rbtree for this 1885 * If we found a partial bit of our free space in a
1884 * block group. So it is possible the entry won't 1886 * bitmap but then couldn't find the other part this may
1885 * be in the rbtree yet at all. The caching code 1887 * be a problem, so WARN about it.
1886 * will make sure not to put it in the rbtree if
1887 * the logging code has pinned it.
1888 */ 1888 */
1889 WARN_ON(re_search);
1889 goto out_lock; 1890 goto out_lock;
1890 } 1891 }
1891 } 1892 }
1892 1893
1894 re_search = false;
1893 if (!info->bitmap) { 1895 if (!info->bitmap) {
1894 unlink_free_space(ctl, info); 1896 unlink_free_space(ctl, info);
1895 if (offset == info->offset) { 1897 if (offset == info->offset) {
@@ -1935,8 +1937,10 @@ again:
1935 } 1937 }
1936 1938
1937 ret = remove_from_bitmap(ctl, info, &offset, &bytes); 1939 ret = remove_from_bitmap(ctl, info, &offset, &bytes);
1938 if (ret == -EAGAIN) 1940 if (ret == -EAGAIN) {
1941 re_search = true;
1939 goto again; 1942 goto again;
1943 }
1940 BUG_ON(ret); /* logic error */ 1944 BUG_ON(ret); /* logic error */
1941out_lock: 1945out_lock:
1942 spin_unlock(&ctl->tree_lock); 1946 spin_unlock(&ctl->tree_lock);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 16d9e8e191e6..cc93b23ca352 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -88,7 +88,7 @@ static unsigned char btrfs_type_by_mode[S_IFMT >> S_SHIFT] = {
88 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK, 88 [S_IFLNK >> S_SHIFT] = BTRFS_FT_SYMLINK,
89}; 89};
90 90
91static int btrfs_setsize(struct inode *inode, loff_t newsize); 91static int btrfs_setsize(struct inode *inode, struct iattr *attr);
92static int btrfs_truncate(struct inode *inode); 92static int btrfs_truncate(struct inode *inode);
93static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent); 93static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent);
94static noinline int cow_file_range(struct inode *inode, 94static noinline int cow_file_range(struct inode *inode,
@@ -2478,6 +2478,18 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
2478 continue; 2478 continue;
2479 } 2479 }
2480 nr_truncate++; 2480 nr_truncate++;
2481
2482 /* 1 for the orphan item deletion. */
2483 trans = btrfs_start_transaction(root, 1);
2484 if (IS_ERR(trans)) {
2485 ret = PTR_ERR(trans);
2486 goto out;
2487 }
2488 ret = btrfs_orphan_add(trans, inode);
2489 btrfs_end_transaction(trans, root);
2490 if (ret)
2491 goto out;
2492
2481 ret = btrfs_truncate(inode); 2493 ret = btrfs_truncate(inode);
2482 } else { 2494 } else {
2483 nr_unlink++; 2495 nr_unlink++;
@@ -3665,6 +3677,7 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
3665 block_end - cur_offset, 0); 3677 block_end - cur_offset, 0);
3666 if (IS_ERR(em)) { 3678 if (IS_ERR(em)) {
3667 err = PTR_ERR(em); 3679 err = PTR_ERR(em);
3680 em = NULL;
3668 break; 3681 break;
3669 } 3682 }
3670 last_byte = min(extent_map_end(em), block_end); 3683 last_byte = min(extent_map_end(em), block_end);
@@ -3748,16 +3761,27 @@ next:
3748 return err; 3761 return err;
3749} 3762}
3750 3763
3751static int btrfs_setsize(struct inode *inode, loff_t newsize) 3764static int btrfs_setsize(struct inode *inode, struct iattr *attr)
3752{ 3765{
3753 struct btrfs_root *root = BTRFS_I(inode)->root; 3766 struct btrfs_root *root = BTRFS_I(inode)->root;
3754 struct btrfs_trans_handle *trans; 3767 struct btrfs_trans_handle *trans;
3755 loff_t oldsize = i_size_read(inode); 3768 loff_t oldsize = i_size_read(inode);
3769 loff_t newsize = attr->ia_size;
3770 int mask = attr->ia_valid;
3756 int ret; 3771 int ret;
3757 3772
3758 if (newsize == oldsize) 3773 if (newsize == oldsize)
3759 return 0; 3774 return 0;
3760 3775
3776 /*
3777 * The regular truncate() case without ATTR_CTIME and ATTR_MTIME is a
3778 * special case where we need to update the times despite not having
3779 * these flags set. For all other operations the VFS set these flags
3780 * explicitly if it wants a timestamp update.
3781 */
3782 if (newsize != oldsize && (!(mask & (ATTR_CTIME | ATTR_MTIME))))
3783 inode->i_ctime = inode->i_mtime = current_fs_time(inode->i_sb);
3784
3761 if (newsize > oldsize) { 3785 if (newsize > oldsize) {
3762 truncate_pagecache(inode, oldsize, newsize); 3786 truncate_pagecache(inode, oldsize, newsize);
3763 ret = btrfs_cont_expand(inode, oldsize, newsize); 3787 ret = btrfs_cont_expand(inode, oldsize, newsize);
@@ -3783,9 +3807,34 @@ static int btrfs_setsize(struct inode *inode, loff_t newsize)
3783 set_bit(BTRFS_INODE_ORDERED_DATA_CLOSE, 3807 set_bit(BTRFS_INODE_ORDERED_DATA_CLOSE,
3784 &BTRFS_I(inode)->runtime_flags); 3808 &BTRFS_I(inode)->runtime_flags);
3785 3809
3810 /*
3811 * 1 for the orphan item we're going to add
3812 * 1 for the orphan item deletion.
3813 */
3814 trans = btrfs_start_transaction(root, 2);
3815 if (IS_ERR(trans))
3816 return PTR_ERR(trans);
3817
3818 /*
3819 * We need to do this in case we fail at _any_ point during the
3820 * actual truncate. Once we do the truncate_setsize we could
3821 * invalidate pages which forces any outstanding ordered io to
3822 * be instantly completed which will give us extents that need
3823 * to be truncated. If we fail to get an orphan inode down we
3824 * could have left over extents that were never meant to live,
3825 * so we need to garuntee from this point on that everything
3826 * will be consistent.
3827 */
3828 ret = btrfs_orphan_add(trans, inode);
3829 btrfs_end_transaction(trans, root);
3830 if (ret)
3831 return ret;
3832
3786 /* we don't support swapfiles, so vmtruncate shouldn't fail */ 3833 /* we don't support swapfiles, so vmtruncate shouldn't fail */
3787 truncate_setsize(inode, newsize); 3834 truncate_setsize(inode, newsize);
3788 ret = btrfs_truncate(inode); 3835 ret = btrfs_truncate(inode);
3836 if (ret && inode->i_nlink)
3837 btrfs_orphan_del(NULL, inode);
3789 } 3838 }
3790 3839
3791 return ret; 3840 return ret;
@@ -3805,7 +3854,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
3805 return err; 3854 return err;
3806 3855
3807 if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) { 3856 if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
3808 err = btrfs_setsize(inode, attr->ia_size); 3857 err = btrfs_setsize(inode, attr);
3809 if (err) 3858 if (err)
3810 return err; 3859 return err;
3811 } 3860 }
@@ -5572,10 +5621,13 @@ struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *pag
5572 return em; 5621 return em;
5573 if (em) { 5622 if (em) {
5574 /* 5623 /*
5575 * if our em maps to a hole, there might 5624 * if our em maps to
5576 * actually be delalloc bytes behind it 5625 * - a hole or
5626 * - a pre-alloc extent,
5627 * there might actually be delalloc bytes behind it.
5577 */ 5628 */
5578 if (em->block_start != EXTENT_MAP_HOLE) 5629 if (em->block_start != EXTENT_MAP_HOLE &&
5630 !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
5579 return em; 5631 return em;
5580 else 5632 else
5581 hole_em = em; 5633 hole_em = em;
@@ -5657,6 +5709,8 @@ struct extent_map *btrfs_get_extent_fiemap(struct inode *inode, struct page *pag
5657 */ 5709 */
5658 em->block_start = hole_em->block_start; 5710 em->block_start = hole_em->block_start;
5659 em->block_len = hole_len; 5711 em->block_len = hole_len;
5712 if (test_bit(EXTENT_FLAG_PREALLOC, &hole_em->flags))
5713 set_bit(EXTENT_FLAG_PREALLOC, &em->flags);
5660 } else { 5714 } else {
5661 em->start = range_start; 5715 em->start = range_start;
5662 em->len = found; 5716 em->len = found;
@@ -6915,11 +6969,9 @@ static int btrfs_truncate(struct inode *inode)
6915 6969
6916 /* 6970 /*
6917 * 1 for the truncate slack space 6971 * 1 for the truncate slack space
6918 * 1 for the orphan item we're going to add
6919 * 1 for the orphan item deletion
6920 * 1 for updating the inode. 6972 * 1 for updating the inode.
6921 */ 6973 */
6922 trans = btrfs_start_transaction(root, 4); 6974 trans = btrfs_start_transaction(root, 2);
6923 if (IS_ERR(trans)) { 6975 if (IS_ERR(trans)) {
6924 err = PTR_ERR(trans); 6976 err = PTR_ERR(trans);
6925 goto out; 6977 goto out;
@@ -6930,12 +6982,6 @@ static int btrfs_truncate(struct inode *inode)
6930 min_size); 6982 min_size);
6931 BUG_ON(ret); 6983 BUG_ON(ret);
6932 6984
6933 ret = btrfs_orphan_add(trans, inode);
6934 if (ret) {
6935 btrfs_end_transaction(trans, root);
6936 goto out;
6937 }
6938
6939 /* 6985 /*
6940 * setattr is responsible for setting the ordered_data_close flag, 6986 * setattr is responsible for setting the ordered_data_close flag,
6941 * but that is only tested during the last file release. That 6987 * but that is only tested during the last file release. That
@@ -7004,12 +7050,6 @@ static int btrfs_truncate(struct inode *inode)
7004 ret = btrfs_orphan_del(trans, inode); 7050 ret = btrfs_orphan_del(trans, inode);
7005 if (ret) 7051 if (ret)
7006 err = ret; 7052 err = ret;
7007 } else if (ret && inode->i_nlink > 0) {
7008 /*
7009 * Failed to do the truncate, remove us from the in memory
7010 * orphan list.
7011 */
7012 ret = btrfs_orphan_del(NULL, inode);
7013 } 7053 }
7014 7054
7015 if (trans) { 7055 if (trans) {
@@ -7531,41 +7571,61 @@ void btrfs_wait_and_free_delalloc_work(struct btrfs_delalloc_work *work)
7531 */ 7571 */
7532int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput) 7572int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
7533{ 7573{
7534 struct list_head *head = &root->fs_info->delalloc_inodes;
7535 struct btrfs_inode *binode; 7574 struct btrfs_inode *binode;
7536 struct inode *inode; 7575 struct inode *inode;
7537 struct btrfs_delalloc_work *work, *next; 7576 struct btrfs_delalloc_work *work, *next;
7538 struct list_head works; 7577 struct list_head works;
7578 struct list_head splice;
7539 int ret = 0; 7579 int ret = 0;
7540 7580
7541 if (root->fs_info->sb->s_flags & MS_RDONLY) 7581 if (root->fs_info->sb->s_flags & MS_RDONLY)
7542 return -EROFS; 7582 return -EROFS;
7543 7583
7544 INIT_LIST_HEAD(&works); 7584 INIT_LIST_HEAD(&works);
7545 7585 INIT_LIST_HEAD(&splice);
7586again:
7546 spin_lock(&root->fs_info->delalloc_lock); 7587 spin_lock(&root->fs_info->delalloc_lock);
7547 while (!list_empty(head)) { 7588 list_splice_init(&root->fs_info->delalloc_inodes, &splice);
7548 binode = list_entry(head->next, struct btrfs_inode, 7589 while (!list_empty(&splice)) {
7590 binode = list_entry(splice.next, struct btrfs_inode,
7549 delalloc_inodes); 7591 delalloc_inodes);
7592
7593 list_del_init(&binode->delalloc_inodes);
7594
7550 inode = igrab(&binode->vfs_inode); 7595 inode = igrab(&binode->vfs_inode);
7551 if (!inode) 7596 if (!inode)
7552 list_del_init(&binode->delalloc_inodes); 7597 continue;
7598
7599 list_add_tail(&binode->delalloc_inodes,
7600 &root->fs_info->delalloc_inodes);
7553 spin_unlock(&root->fs_info->delalloc_lock); 7601 spin_unlock(&root->fs_info->delalloc_lock);
7554 if (inode) { 7602
7555 work = btrfs_alloc_delalloc_work(inode, 0, delay_iput); 7603 work = btrfs_alloc_delalloc_work(inode, 0, delay_iput);
7556 if (!work) { 7604 if (unlikely(!work)) {
7557 ret = -ENOMEM; 7605 ret = -ENOMEM;
7558 goto out; 7606 goto out;
7559 }
7560 list_add_tail(&work->list, &works);
7561 btrfs_queue_worker(&root->fs_info->flush_workers,
7562 &work->work);
7563 } 7607 }
7608 list_add_tail(&work->list, &works);
7609 btrfs_queue_worker(&root->fs_info->flush_workers,
7610 &work->work);
7611
7564 cond_resched(); 7612 cond_resched();
7565 spin_lock(&root->fs_info->delalloc_lock); 7613 spin_lock(&root->fs_info->delalloc_lock);
7566 } 7614 }
7567 spin_unlock(&root->fs_info->delalloc_lock); 7615 spin_unlock(&root->fs_info->delalloc_lock);
7568 7616
7617 list_for_each_entry_safe(work, next, &works, list) {
7618 list_del_init(&work->list);
7619 btrfs_wait_and_free_delalloc_work(work);
7620 }
7621
7622 spin_lock(&root->fs_info->delalloc_lock);
7623 if (!list_empty(&root->fs_info->delalloc_inodes)) {
7624 spin_unlock(&root->fs_info->delalloc_lock);
7625 goto again;
7626 }
7627 spin_unlock(&root->fs_info->delalloc_lock);
7628
7569 /* the filemap_flush will queue IO into the worker threads, but 7629 /* the filemap_flush will queue IO into the worker threads, but
7570 * we have to make sure the IO is actually started and that 7630 * we have to make sure the IO is actually started and that
7571 * ordered extents get created before we return 7631 * ordered extents get created before we return
@@ -7578,11 +7638,18 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
7578 atomic_read(&root->fs_info->async_delalloc_pages) == 0)); 7638 atomic_read(&root->fs_info->async_delalloc_pages) == 0));
7579 } 7639 }
7580 atomic_dec(&root->fs_info->async_submit_draining); 7640 atomic_dec(&root->fs_info->async_submit_draining);
7641 return 0;
7581out: 7642out:
7582 list_for_each_entry_safe(work, next, &works, list) { 7643 list_for_each_entry_safe(work, next, &works, list) {
7583 list_del_init(&work->list); 7644 list_del_init(&work->list);
7584 btrfs_wait_and_free_delalloc_work(work); 7645 btrfs_wait_and_free_delalloc_work(work);
7585 } 7646 }
7647
7648 if (!list_empty_careful(&splice)) {
7649 spin_lock(&root->fs_info->delalloc_lock);
7650 list_splice_tail(&splice, &root->fs_info->delalloc_inodes);
7651 spin_unlock(&root->fs_info->delalloc_lock);
7652 }
7586 return ret; 7653 return ret;
7587} 7654}
7588 7655
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 4b4516770f05..338f2597bf7f 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -515,7 +515,6 @@ static noinline int create_subvol(struct btrfs_root *root,
515 515
516 BUG_ON(ret); 516 BUG_ON(ret);
517 517
518 d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry));
519fail: 518fail:
520 if (async_transid) { 519 if (async_transid) {
521 *async_transid = trans->transid; 520 *async_transid = trans->transid;
@@ -525,6 +524,10 @@ fail:
525 } 524 }
526 if (err && !ret) 525 if (err && !ret)
527 ret = err; 526 ret = err;
527
528 if (!ret)
529 d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry));
530
528 return ret; 531 return ret;
529} 532}
530 533
@@ -1339,7 +1342,8 @@ static noinline int btrfs_ioctl_resize(struct file *file,
1339 if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running, 1342 if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running,
1340 1)) { 1343 1)) {
1341 pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); 1344 pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
1342 return -EINPROGRESS; 1345 mnt_drop_write_file(file);
1346 return -EINVAL;
1343 } 1347 }
1344 1348
1345 mutex_lock(&root->fs_info->volume_mutex); 1349 mutex_lock(&root->fs_info->volume_mutex);
@@ -1362,6 +1366,7 @@ static noinline int btrfs_ioctl_resize(struct file *file,
1362 printk(KERN_INFO "btrfs: resizing devid %llu\n", 1366 printk(KERN_INFO "btrfs: resizing devid %llu\n",
1363 (unsigned long long)devid); 1367 (unsigned long long)devid);
1364 } 1368 }
1369
1365 device = btrfs_find_device(root->fs_info, devid, NULL, NULL); 1370 device = btrfs_find_device(root->fs_info, devid, NULL, NULL);
1366 if (!device) { 1371 if (!device) {
1367 printk(KERN_INFO "btrfs: resizer unable to find device %llu\n", 1372 printk(KERN_INFO "btrfs: resizer unable to find device %llu\n",
@@ -1369,9 +1374,10 @@ static noinline int btrfs_ioctl_resize(struct file *file,
1369 ret = -EINVAL; 1374 ret = -EINVAL;
1370 goto out_free; 1375 goto out_free;
1371 } 1376 }
1372 if (device->fs_devices && device->fs_devices->seeding) { 1377
1378 if (!device->writeable) {
1373 printk(KERN_INFO "btrfs: resizer unable to apply on " 1379 printk(KERN_INFO "btrfs: resizer unable to apply on "
1374 "seeding device %llu\n", 1380 "readonly device %llu\n",
1375 (unsigned long long)devid); 1381 (unsigned long long)devid);
1376 ret = -EINVAL; 1382 ret = -EINVAL;
1377 goto out_free; 1383 goto out_free;
@@ -1443,8 +1449,8 @@ out_free:
1443 kfree(vol_args); 1449 kfree(vol_args);
1444out: 1450out:
1445 mutex_unlock(&root->fs_info->volume_mutex); 1451 mutex_unlock(&root->fs_info->volume_mutex);
1446 mnt_drop_write_file(file);
1447 atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0); 1452 atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0);
1453 mnt_drop_write_file(file);
1448 return ret; 1454 return ret;
1449} 1455}
1450 1456
@@ -2095,13 +2101,13 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
2095 err = inode_permission(inode, MAY_WRITE | MAY_EXEC); 2101 err = inode_permission(inode, MAY_WRITE | MAY_EXEC);
2096 if (err) 2102 if (err)
2097 goto out_dput; 2103 goto out_dput;
2098
2099 /* check if subvolume may be deleted by a non-root user */
2100 err = btrfs_may_delete(dir, dentry, 1);
2101 if (err)
2102 goto out_dput;
2103 } 2104 }
2104 2105
2106 /* check if subvolume may be deleted by a user */
2107 err = btrfs_may_delete(dir, dentry, 1);
2108 if (err)
2109 goto out_dput;
2110
2105 if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) { 2111 if (btrfs_ino(inode) != BTRFS_FIRST_FREE_OBJECTID) {
2106 err = -EINVAL; 2112 err = -EINVAL;
2107 goto out_dput; 2113 goto out_dput;
@@ -2183,19 +2189,20 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
2183 struct btrfs_ioctl_defrag_range_args *range; 2189 struct btrfs_ioctl_defrag_range_args *range;
2184 int ret; 2190 int ret;
2185 2191
2186 if (btrfs_root_readonly(root)) 2192 ret = mnt_want_write_file(file);
2187 return -EROFS; 2193 if (ret)
2194 return ret;
2188 2195
2189 if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running, 2196 if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running,
2190 1)) { 2197 1)) {
2191 pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); 2198 pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
2192 return -EINPROGRESS; 2199 mnt_drop_write_file(file);
2200 return -EINVAL;
2193 } 2201 }
2194 ret = mnt_want_write_file(file); 2202
2195 if (ret) { 2203 if (btrfs_root_readonly(root)) {
2196 atomic_set(&root->fs_info->mutually_exclusive_operation_running, 2204 ret = -EROFS;
2197 0); 2205 goto out;
2198 return ret;
2199 } 2206 }
2200 2207
2201 switch (inode->i_mode & S_IFMT) { 2208 switch (inode->i_mode & S_IFMT) {
@@ -2247,8 +2254,8 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
2247 ret = -EINVAL; 2254 ret = -EINVAL;
2248 } 2255 }
2249out: 2256out:
2250 mnt_drop_write_file(file);
2251 atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0); 2257 atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0);
2258 mnt_drop_write_file(file);
2252 return ret; 2259 return ret;
2253} 2260}
2254 2261
@@ -2263,7 +2270,7 @@ static long btrfs_ioctl_add_dev(struct btrfs_root *root, void __user *arg)
2263 if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running, 2270 if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running,
2264 1)) { 2271 1)) {
2265 pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); 2272 pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
2266 return -EINPROGRESS; 2273 return -EINVAL;
2267 } 2274 }
2268 2275
2269 mutex_lock(&root->fs_info->volume_mutex); 2276 mutex_lock(&root->fs_info->volume_mutex);
@@ -2300,7 +2307,7 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
2300 1)) { 2307 1)) {
2301 pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n"); 2308 pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
2302 mnt_drop_write_file(file); 2309 mnt_drop_write_file(file);
2303 return -EINPROGRESS; 2310 return -EINVAL;
2304 } 2311 }
2305 2312
2306 mutex_lock(&root->fs_info->volume_mutex); 2313 mutex_lock(&root->fs_info->volume_mutex);
@@ -2316,8 +2323,8 @@ static long btrfs_ioctl_rm_dev(struct file *file, void __user *arg)
2316 kfree(vol_args); 2323 kfree(vol_args);
2317out: 2324out:
2318 mutex_unlock(&root->fs_info->volume_mutex); 2325 mutex_unlock(&root->fs_info->volume_mutex);
2319 mnt_drop_write_file(file);
2320 atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0); 2326 atomic_set(&root->fs_info->mutually_exclusive_operation_running, 0);
2327 mnt_drop_write_file(file);
2321 return ret; 2328 return ret;
2322} 2329}
2323 2330
@@ -3437,8 +3444,8 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg)
3437 struct btrfs_fs_info *fs_info = root->fs_info; 3444 struct btrfs_fs_info *fs_info = root->fs_info;
3438 struct btrfs_ioctl_balance_args *bargs; 3445 struct btrfs_ioctl_balance_args *bargs;
3439 struct btrfs_balance_control *bctl; 3446 struct btrfs_balance_control *bctl;
3447 bool need_unlock; /* for mut. excl. ops lock */
3440 int ret; 3448 int ret;
3441 int need_to_clear_lock = 0;
3442 3449
3443 if (!capable(CAP_SYS_ADMIN)) 3450 if (!capable(CAP_SYS_ADMIN))
3444 return -EPERM; 3451 return -EPERM;
@@ -3447,14 +3454,61 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg)
3447 if (ret) 3454 if (ret)
3448 return ret; 3455 return ret;
3449 3456
3450 mutex_lock(&fs_info->volume_mutex); 3457again:
3458 if (!atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1)) {
3459 mutex_lock(&fs_info->volume_mutex);
3460 mutex_lock(&fs_info->balance_mutex);
3461 need_unlock = true;
3462 goto locked;
3463 }
3464
3465 /*
3466 * mut. excl. ops lock is locked. Three possibilites:
3467 * (1) some other op is running
3468 * (2) balance is running
3469 * (3) balance is paused -- special case (think resume)
3470 */
3451 mutex_lock(&fs_info->balance_mutex); 3471 mutex_lock(&fs_info->balance_mutex);
3472 if (fs_info->balance_ctl) {
3473 /* this is either (2) or (3) */
3474 if (!atomic_read(&fs_info->balance_running)) {
3475 mutex_unlock(&fs_info->balance_mutex);
3476 if (!mutex_trylock(&fs_info->volume_mutex))
3477 goto again;
3478 mutex_lock(&fs_info->balance_mutex);
3479
3480 if (fs_info->balance_ctl &&
3481 !atomic_read(&fs_info->balance_running)) {
3482 /* this is (3) */
3483 need_unlock = false;
3484 goto locked;
3485 }
3486
3487 mutex_unlock(&fs_info->balance_mutex);
3488 mutex_unlock(&fs_info->volume_mutex);
3489 goto again;
3490 } else {
3491 /* this is (2) */
3492 mutex_unlock(&fs_info->balance_mutex);
3493 ret = -EINPROGRESS;
3494 goto out;
3495 }
3496 } else {
3497 /* this is (1) */
3498 mutex_unlock(&fs_info->balance_mutex);
3499 pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
3500 ret = -EINVAL;
3501 goto out;
3502 }
3503
3504locked:
3505 BUG_ON(!atomic_read(&fs_info->mutually_exclusive_operation_running));
3452 3506
3453 if (arg) { 3507 if (arg) {
3454 bargs = memdup_user(arg, sizeof(*bargs)); 3508 bargs = memdup_user(arg, sizeof(*bargs));
3455 if (IS_ERR(bargs)) { 3509 if (IS_ERR(bargs)) {
3456 ret = PTR_ERR(bargs); 3510 ret = PTR_ERR(bargs);
3457 goto out; 3511 goto out_unlock;
3458 } 3512 }
3459 3513
3460 if (bargs->flags & BTRFS_BALANCE_RESUME) { 3514 if (bargs->flags & BTRFS_BALANCE_RESUME) {
@@ -3474,13 +3528,10 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg)
3474 bargs = NULL; 3528 bargs = NULL;
3475 } 3529 }
3476 3530
3477 if (atomic_xchg(&root->fs_info->mutually_exclusive_operation_running, 3531 if (fs_info->balance_ctl) {
3478 1)) {
3479 pr_info("btrfs: dev add/delete/balance/replace/resize operation in progress\n");
3480 ret = -EINPROGRESS; 3532 ret = -EINPROGRESS;
3481 goto out_bargs; 3533 goto out_bargs;
3482 } 3534 }
3483 need_to_clear_lock = 1;
3484 3535
3485 bctl = kzalloc(sizeof(*bctl), GFP_NOFS); 3536 bctl = kzalloc(sizeof(*bctl), GFP_NOFS);
3486 if (!bctl) { 3537 if (!bctl) {
@@ -3501,11 +3552,17 @@ static long btrfs_ioctl_balance(struct file *file, void __user *arg)
3501 } 3552 }
3502 3553
3503do_balance: 3554do_balance:
3504 ret = btrfs_balance(bctl, bargs);
3505 /* 3555 /*
3506 * bctl is freed in __cancel_balance or in free_fs_info if 3556 * Ownership of bctl and mutually_exclusive_operation_running
3507 * restriper was paused all the way until unmount 3557 * goes to to btrfs_balance. bctl is freed in __cancel_balance,
3558 * or, if restriper was paused all the way until unmount, in
3559 * free_fs_info. mutually_exclusive_operation_running is
3560 * cleared in __cancel_balance.
3508 */ 3561 */
3562 need_unlock = false;
3563
3564 ret = btrfs_balance(bctl, bargs);
3565
3509 if (arg) { 3566 if (arg) {
3510 if (copy_to_user(arg, bargs, sizeof(*bargs))) 3567 if (copy_to_user(arg, bargs, sizeof(*bargs)))
3511 ret = -EFAULT; 3568 ret = -EFAULT;
@@ -3513,12 +3570,12 @@ do_balance:
3513 3570
3514out_bargs: 3571out_bargs:
3515 kfree(bargs); 3572 kfree(bargs);
3516out: 3573out_unlock:
3517 if (need_to_clear_lock)
3518 atomic_set(&root->fs_info->mutually_exclusive_operation_running,
3519 0);
3520 mutex_unlock(&fs_info->balance_mutex); 3574 mutex_unlock(&fs_info->balance_mutex);
3521 mutex_unlock(&fs_info->volume_mutex); 3575 mutex_unlock(&fs_info->volume_mutex);
3576 if (need_unlock)
3577 atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
3578out:
3522 mnt_drop_write_file(file); 3579 mnt_drop_write_file(file);
3523 return ret; 3580 return ret;
3524} 3581}
@@ -3698,6 +3755,11 @@ static long btrfs_ioctl_qgroup_create(struct file *file, void __user *arg)
3698 goto drop_write; 3755 goto drop_write;
3699 } 3756 }
3700 3757
3758 if (!sa->qgroupid) {
3759 ret = -EINVAL;
3760 goto out;
3761 }
3762
3701 trans = btrfs_join_transaction(root); 3763 trans = btrfs_join_transaction(root);
3702 if (IS_ERR(trans)) { 3764 if (IS_ERR(trans)) {
3703 ret = PTR_ERR(trans); 3765 ret = PTR_ERR(trans);
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index f10731297040..e5ed56729607 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -836,9 +836,16 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
836 * if the disk i_size is already at the inode->i_size, or 836 * if the disk i_size is already at the inode->i_size, or
837 * this ordered extent is inside the disk i_size, we're done 837 * this ordered extent is inside the disk i_size, we're done
838 */ 838 */
839 if (disk_i_size == i_size || offset <= disk_i_size) { 839 if (disk_i_size == i_size)
840 goto out;
841
842 /*
843 * We still need to update disk_i_size if outstanding_isize is greater
844 * than disk_i_size.
845 */
846 if (offset <= disk_i_size &&
847 (!ordered || ordered->outstanding_isize <= disk_i_size))
840 goto out; 848 goto out;
841 }
842 849
843 /* 850 /*
844 * walk backward from this ordered extent to disk_i_size. 851 * walk backward from this ordered extent to disk_i_size.
@@ -870,7 +877,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
870 break; 877 break;
871 if (test->file_offset >= i_size) 878 if (test->file_offset >= i_size)
872 break; 879 break;
873 if (test->file_offset >= disk_i_size) { 880 if (entry_end(test) > disk_i_size) {
874 /* 881 /*
875 * we don't update disk_i_size now, so record this 882 * we don't update disk_i_size now, so record this
876 * undealt i_size. Or we will not know the real 883 * undealt i_size. Or we will not know the real
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index fe9d02c45f8e..a5c856234323 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -379,6 +379,13 @@ next1:
379 379
380 ret = add_relation_rb(fs_info, found_key.objectid, 380 ret = add_relation_rb(fs_info, found_key.objectid,
381 found_key.offset); 381 found_key.offset);
382 if (ret == -ENOENT) {
383 printk(KERN_WARNING
384 "btrfs: orphan qgroup relation 0x%llx->0x%llx\n",
385 (unsigned long long)found_key.objectid,
386 (unsigned long long)found_key.offset);
387 ret = 0; /* ignore the error */
388 }
382 if (ret) 389 if (ret)
383 goto out; 390 goto out;
384next2: 391next2:
@@ -956,17 +963,28 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans,
956 struct btrfs_fs_info *fs_info, u64 qgroupid) 963 struct btrfs_fs_info *fs_info, u64 qgroupid)
957{ 964{
958 struct btrfs_root *quota_root; 965 struct btrfs_root *quota_root;
966 struct btrfs_qgroup *qgroup;
959 int ret = 0; 967 int ret = 0;
960 968
961 quota_root = fs_info->quota_root; 969 quota_root = fs_info->quota_root;
962 if (!quota_root) 970 if (!quota_root)
963 return -EINVAL; 971 return -EINVAL;
964 972
973 /* check if there are no relations to this qgroup */
974 spin_lock(&fs_info->qgroup_lock);
975 qgroup = find_qgroup_rb(fs_info, qgroupid);
976 if (qgroup) {
977 if (!list_empty(&qgroup->groups) || !list_empty(&qgroup->members)) {
978 spin_unlock(&fs_info->qgroup_lock);
979 return -EBUSY;
980 }
981 }
982 spin_unlock(&fs_info->qgroup_lock);
983
965 ret = del_qgroup_item(trans, quota_root, qgroupid); 984 ret = del_qgroup_item(trans, quota_root, qgroupid);
966 985
967 spin_lock(&fs_info->qgroup_lock); 986 spin_lock(&fs_info->qgroup_lock);
968 del_qgroup_rb(quota_root->fs_info, qgroupid); 987 del_qgroup_rb(quota_root->fs_info, qgroupid);
969
970 spin_unlock(&fs_info->qgroup_lock); 988 spin_unlock(&fs_info->qgroup_lock);
971 989
972 return ret; 990 return ret;
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index bdbb94f245c9..67783e03d121 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -580,20 +580,29 @@ static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *fixup_ctx)
580 int corrected = 0; 580 int corrected = 0;
581 struct btrfs_key key; 581 struct btrfs_key key;
582 struct inode *inode = NULL; 582 struct inode *inode = NULL;
583 struct btrfs_fs_info *fs_info;
583 u64 end = offset + PAGE_SIZE - 1; 584 u64 end = offset + PAGE_SIZE - 1;
584 struct btrfs_root *local_root; 585 struct btrfs_root *local_root;
586 int srcu_index;
585 587
586 key.objectid = root; 588 key.objectid = root;
587 key.type = BTRFS_ROOT_ITEM_KEY; 589 key.type = BTRFS_ROOT_ITEM_KEY;
588 key.offset = (u64)-1; 590 key.offset = (u64)-1;
589 local_root = btrfs_read_fs_root_no_name(fixup->root->fs_info, &key); 591
590 if (IS_ERR(local_root)) 592 fs_info = fixup->root->fs_info;
593 srcu_index = srcu_read_lock(&fs_info->subvol_srcu);
594
595 local_root = btrfs_read_fs_root_no_name(fs_info, &key);
596 if (IS_ERR(local_root)) {
597 srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
591 return PTR_ERR(local_root); 598 return PTR_ERR(local_root);
599 }
592 600
593 key.type = BTRFS_INODE_ITEM_KEY; 601 key.type = BTRFS_INODE_ITEM_KEY;
594 key.objectid = inum; 602 key.objectid = inum;
595 key.offset = 0; 603 key.offset = 0;
596 inode = btrfs_iget(fixup->root->fs_info->sb, &key, local_root, NULL); 604 inode = btrfs_iget(fs_info->sb, &key, local_root, NULL);
605 srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
597 if (IS_ERR(inode)) 606 if (IS_ERR(inode))
598 return PTR_ERR(inode); 607 return PTR_ERR(inode);
599 608
@@ -606,7 +615,6 @@ static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *fixup_ctx)
606 } 615 }
607 616
608 if (PageUptodate(page)) { 617 if (PageUptodate(page)) {
609 struct btrfs_fs_info *fs_info;
610 if (PageDirty(page)) { 618 if (PageDirty(page)) {
611 /* 619 /*
612 * we need to write the data to the defect sector. the 620 * we need to write the data to the defect sector. the
@@ -3180,18 +3188,25 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, void *ctx)
3180 u64 physical_for_dev_replace; 3188 u64 physical_for_dev_replace;
3181 u64 len; 3189 u64 len;
3182 struct btrfs_fs_info *fs_info = nocow_ctx->sctx->dev_root->fs_info; 3190 struct btrfs_fs_info *fs_info = nocow_ctx->sctx->dev_root->fs_info;
3191 int srcu_index;
3183 3192
3184 key.objectid = root; 3193 key.objectid = root;
3185 key.type = BTRFS_ROOT_ITEM_KEY; 3194 key.type = BTRFS_ROOT_ITEM_KEY;
3186 key.offset = (u64)-1; 3195 key.offset = (u64)-1;
3196
3197 srcu_index = srcu_read_lock(&fs_info->subvol_srcu);
3198
3187 local_root = btrfs_read_fs_root_no_name(fs_info, &key); 3199 local_root = btrfs_read_fs_root_no_name(fs_info, &key);
3188 if (IS_ERR(local_root)) 3200 if (IS_ERR(local_root)) {
3201 srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
3189 return PTR_ERR(local_root); 3202 return PTR_ERR(local_root);
3203 }
3190 3204
3191 key.type = BTRFS_INODE_ITEM_KEY; 3205 key.type = BTRFS_INODE_ITEM_KEY;
3192 key.objectid = inum; 3206 key.objectid = inum;
3193 key.offset = 0; 3207 key.offset = 0;
3194 inode = btrfs_iget(fs_info->sb, &key, local_root, NULL); 3208 inode = btrfs_iget(fs_info->sb, &key, local_root, NULL);
3209 srcu_read_unlock(&fs_info->subvol_srcu, srcu_index);
3195 if (IS_ERR(inode)) 3210 if (IS_ERR(inode))
3196 return PTR_ERR(inode); 3211 return PTR_ERR(inode);
3197 3212
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 54454542ad40..321b7fb4e441 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -1814,8 +1814,10 @@ static int name_cache_insert(struct send_ctx *sctx,
1814 (unsigned long)nce->ino); 1814 (unsigned long)nce->ino);
1815 if (!nce_head) { 1815 if (!nce_head) {
1816 nce_head = kmalloc(sizeof(*nce_head), GFP_NOFS); 1816 nce_head = kmalloc(sizeof(*nce_head), GFP_NOFS);
1817 if (!nce_head) 1817 if (!nce_head) {
1818 kfree(nce);
1818 return -ENOMEM; 1819 return -ENOMEM;
1820 }
1819 INIT_LIST_HEAD(nce_head); 1821 INIT_LIST_HEAD(nce_head);
1820 1822
1821 ret = radix_tree_insert(&sctx->name_cache, nce->ino, nce_head); 1823 ret = radix_tree_insert(&sctx->name_cache, nce->ino, nce_head);
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 99545df1b86c..d8982e9601d3 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -267,7 +267,7 @@ void __btrfs_abort_transaction(struct btrfs_trans_handle *trans,
267 function, line, errstr); 267 function, line, errstr);
268 return; 268 return;
269 } 269 }
270 trans->transaction->aborted = errno; 270 ACCESS_ONCE(trans->transaction->aborted) = errno;
271 __btrfs_std_error(root->fs_info, function, line, errno, NULL); 271 __btrfs_std_error(root->fs_info, function, line, errno, NULL);
272} 272}
273/* 273/*
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 87fac9a21ea5..fc03aa60b684 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -333,12 +333,14 @@ start_transaction(struct btrfs_root *root, u64 num_items, int type,
333 &root->fs_info->trans_block_rsv, 333 &root->fs_info->trans_block_rsv,
334 num_bytes, flush); 334 num_bytes, flush);
335 if (ret) 335 if (ret)
336 return ERR_PTR(ret); 336 goto reserve_fail;
337 } 337 }
338again: 338again:
339 h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS); 339 h = kmem_cache_alloc(btrfs_trans_handle_cachep, GFP_NOFS);
340 if (!h) 340 if (!h) {
341 return ERR_PTR(-ENOMEM); 341 ret = -ENOMEM;
342 goto alloc_fail;
343 }
342 344
343 /* 345 /*
344 * If we are JOIN_NOLOCK we're already committing a transaction and 346 * If we are JOIN_NOLOCK we're already committing a transaction and
@@ -365,11 +367,7 @@ again:
365 if (ret < 0) { 367 if (ret < 0) {
366 /* We must get the transaction if we are JOIN_NOLOCK. */ 368 /* We must get the transaction if we are JOIN_NOLOCK. */
367 BUG_ON(type == TRANS_JOIN_NOLOCK); 369 BUG_ON(type == TRANS_JOIN_NOLOCK);
368 370 goto join_fail;
369 if (type < TRANS_JOIN_NOLOCK)
370 sb_end_intwrite(root->fs_info->sb);
371 kmem_cache_free(btrfs_trans_handle_cachep, h);
372 return ERR_PTR(ret);
373 } 371 }
374 372
375 cur_trans = root->fs_info->running_transaction; 373 cur_trans = root->fs_info->running_transaction;
@@ -410,6 +408,19 @@ got_it:
410 if (!current->journal_info && type != TRANS_USERSPACE) 408 if (!current->journal_info && type != TRANS_USERSPACE)
411 current->journal_info = h; 409 current->journal_info = h;
412 return h; 410 return h;
411
412join_fail:
413 if (type < TRANS_JOIN_NOLOCK)
414 sb_end_intwrite(root->fs_info->sb);
415 kmem_cache_free(btrfs_trans_handle_cachep, h);
416alloc_fail:
417 if (num_bytes)
418 btrfs_block_rsv_release(root, &root->fs_info->trans_block_rsv,
419 num_bytes);
420reserve_fail:
421 if (qgroup_reserved)
422 btrfs_qgroup_free(root, qgroup_reserved);
423 return ERR_PTR(ret);
413} 424}
414 425
415struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, 426struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root,
@@ -1468,7 +1479,8 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1468 goto cleanup_transaction; 1479 goto cleanup_transaction;
1469 } 1480 }
1470 1481
1471 if (cur_trans->aborted) { 1482 /* Stop the commit early if ->aborted is set */
1483 if (unlikely(ACCESS_ONCE(cur_trans->aborted))) {
1472 ret = cur_trans->aborted; 1484 ret = cur_trans->aborted;
1473 goto cleanup_transaction; 1485 goto cleanup_transaction;
1474 } 1486 }
@@ -1574,6 +1586,11 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1574 wait_event(cur_trans->writer_wait, 1586 wait_event(cur_trans->writer_wait,
1575 atomic_read(&cur_trans->num_writers) == 1); 1587 atomic_read(&cur_trans->num_writers) == 1);
1576 1588
1589 /* ->aborted might be set after the previous check, so check it */
1590 if (unlikely(ACCESS_ONCE(cur_trans->aborted))) {
1591 ret = cur_trans->aborted;
1592 goto cleanup_transaction;
1593 }
1577 /* 1594 /*
1578 * the reloc mutex makes sure that we stop 1595 * the reloc mutex makes sure that we stop
1579 * the balancing code from coming in and moving 1596 * the balancing code from coming in and moving
@@ -1657,6 +1674,17 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
1657 goto cleanup_transaction; 1674 goto cleanup_transaction;
1658 } 1675 }
1659 1676
1677 /*
1678 * The tasks which save the space cache and inode cache may also
1679 * update ->aborted, check it.
1680 */
1681 if (unlikely(ACCESS_ONCE(cur_trans->aborted))) {
1682 ret = cur_trans->aborted;
1683 mutex_unlock(&root->fs_info->tree_log_mutex);
1684 mutex_unlock(&root->fs_info->reloc_mutex);
1685 goto cleanup_transaction;
1686 }
1687
1660 btrfs_prepare_extent_commit(trans, root); 1688 btrfs_prepare_extent_commit(trans, root);
1661 1689
1662 cur_trans = root->fs_info->running_transaction; 1690 cur_trans = root->fs_info->running_transaction;
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 83186c7e45d4..9027bb1e7466 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -3357,6 +3357,11 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
3357 if (skip_csum) 3357 if (skip_csum)
3358 return 0; 3358 return 0;
3359 3359
3360 if (em->compress_type) {
3361 csum_offset = 0;
3362 csum_len = block_len;
3363 }
3364
3360 /* block start is already adjusted for the file extent offset. */ 3365 /* block start is already adjusted for the file extent offset. */
3361 ret = btrfs_lookup_csums_range(log->fs_info->csum_root, 3366 ret = btrfs_lookup_csums_range(log->fs_info->csum_root,
3362 em->block_start + csum_offset, 3367 em->block_start + csum_offset,
@@ -3410,13 +3415,13 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
3410 em = list_entry(extents.next, struct extent_map, list); 3415 em = list_entry(extents.next, struct extent_map, list);
3411 3416
3412 list_del_init(&em->list); 3417 list_del_init(&em->list);
3413 clear_bit(EXTENT_FLAG_LOGGING, &em->flags);
3414 3418
3415 /* 3419 /*
3416 * If we had an error we just need to delete everybody from our 3420 * If we had an error we just need to delete everybody from our
3417 * private list. 3421 * private list.
3418 */ 3422 */
3419 if (ret) { 3423 if (ret) {
3424 clear_em_logging(tree, em);
3420 free_extent_map(em); 3425 free_extent_map(em);
3421 continue; 3426 continue;
3422 } 3427 }
@@ -3424,8 +3429,9 @@ static int btrfs_log_changed_extents(struct btrfs_trans_handle *trans,
3424 write_unlock(&tree->lock); 3429 write_unlock(&tree->lock);
3425 3430
3426 ret = log_one_extent(trans, inode, root, em, path); 3431 ret = log_one_extent(trans, inode, root, em, path);
3427 free_extent_map(em);
3428 write_lock(&tree->lock); 3432 write_lock(&tree->lock);
3433 clear_em_logging(tree, em);
3434 free_extent_map(em);
3429 } 3435 }
3430 WARN_ON(!list_empty(&extents)); 3436 WARN_ON(!list_empty(&extents));
3431 write_unlock(&tree->lock); 3437 write_unlock(&tree->lock);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 5cce6aa74012..5cbb7f4b1672 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1431,7 +1431,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1431 } 1431 }
1432 } else { 1432 } else {
1433 ret = btrfs_get_bdev_and_sb(device_path, 1433 ret = btrfs_get_bdev_and_sb(device_path,
1434 FMODE_READ | FMODE_EXCL, 1434 FMODE_WRITE | FMODE_EXCL,
1435 root->fs_info->bdev_holder, 0, 1435 root->fs_info->bdev_holder, 0,
1436 &bdev, &bh); 1436 &bdev, &bh);
1437 if (ret) 1437 if (ret)
@@ -1556,7 +1556,8 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
1556 ret = 0; 1556 ret = 0;
1557 1557
1558 /* Notify udev that device has changed */ 1558 /* Notify udev that device has changed */
1559 btrfs_kobject_uevent(bdev, KOBJ_CHANGE); 1559 if (bdev)
1560 btrfs_kobject_uevent(bdev, KOBJ_CHANGE);
1560 1561
1561error_brelse: 1562error_brelse:
1562 brelse(bh); 1563 brelse(bh);
@@ -2614,7 +2615,14 @@ static int chunk_usage_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset,
2614 cache = btrfs_lookup_block_group(fs_info, chunk_offset); 2615 cache = btrfs_lookup_block_group(fs_info, chunk_offset);
2615 chunk_used = btrfs_block_group_used(&cache->item); 2616 chunk_used = btrfs_block_group_used(&cache->item);
2616 2617
2617 user_thresh = div_factor_fine(cache->key.offset, bargs->usage); 2618 if (bargs->usage == 0)
2619 user_thresh = 0;
2620 else if (bargs->usage > 100)
2621 user_thresh = cache->key.offset;
2622 else
2623 user_thresh = div_factor_fine(cache->key.offset,
2624 bargs->usage);
2625
2618 if (chunk_used < user_thresh) 2626 if (chunk_used < user_thresh)
2619 ret = 0; 2627 ret = 0;
2620 2628
@@ -2959,6 +2967,8 @@ static void __cancel_balance(struct btrfs_fs_info *fs_info)
2959 unset_balance_control(fs_info); 2967 unset_balance_control(fs_info);
2960 ret = del_balance_item(fs_info->tree_root); 2968 ret = del_balance_item(fs_info->tree_root);
2961 BUG_ON(ret); 2969 BUG_ON(ret);
2970
2971 atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
2962} 2972}
2963 2973
2964void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock, 2974void update_ioctl_balance_args(struct btrfs_fs_info *fs_info, int lock,
@@ -3138,8 +3148,10 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
3138out: 3148out:
3139 if (bctl->flags & BTRFS_BALANCE_RESUME) 3149 if (bctl->flags & BTRFS_BALANCE_RESUME)
3140 __cancel_balance(fs_info); 3150 __cancel_balance(fs_info);
3141 else 3151 else {
3142 kfree(bctl); 3152 kfree(bctl);
3153 atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
3154 }
3143 return ret; 3155 return ret;
3144} 3156}
3145 3157
@@ -3156,7 +3168,6 @@ static int balance_kthread(void *data)
3156 ret = btrfs_balance(fs_info->balance_ctl, NULL); 3168 ret = btrfs_balance(fs_info->balance_ctl, NULL);
3157 } 3169 }
3158 3170
3159 atomic_set(&fs_info->mutually_exclusive_operation_running, 0);
3160 mutex_unlock(&fs_info->balance_mutex); 3171 mutex_unlock(&fs_info->balance_mutex);
3161 mutex_unlock(&fs_info->volume_mutex); 3172 mutex_unlock(&fs_info->volume_mutex);
3162 3173
@@ -3179,7 +3190,6 @@ int btrfs_resume_balance_async(struct btrfs_fs_info *fs_info)
3179 return 0; 3190 return 0;
3180 } 3191 }
3181 3192
3182 WARN_ON(atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1));
3183 tsk = kthread_run(balance_kthread, fs_info, "btrfs-balance"); 3193 tsk = kthread_run(balance_kthread, fs_info, "btrfs-balance");
3184 if (IS_ERR(tsk)) 3194 if (IS_ERR(tsk))
3185 return PTR_ERR(tsk); 3195 return PTR_ERR(tsk);
@@ -3233,6 +3243,8 @@ int btrfs_recover_balance(struct btrfs_fs_info *fs_info)
3233 btrfs_balance_sys(leaf, item, &disk_bargs); 3243 btrfs_balance_sys(leaf, item, &disk_bargs);
3234 btrfs_disk_balance_args_to_cpu(&bctl->sys, &disk_bargs); 3244 btrfs_disk_balance_args_to_cpu(&bctl->sys, &disk_bargs);
3235 3245
3246 WARN_ON(atomic_xchg(&fs_info->mutually_exclusive_operation_running, 1));
3247
3236 mutex_lock(&fs_info->volume_mutex); 3248 mutex_lock(&fs_info->volume_mutex);
3237 mutex_lock(&fs_info->balance_mutex); 3249 mutex_lock(&fs_info->balance_mutex);
3238 3250
@@ -3496,7 +3508,7 @@ struct btrfs_raid_attr btrfs_raid_array[BTRFS_NR_RAID_TYPES] = {
3496 { 1, 1, 2, 2, 2, 2 /* raid1 */ }, 3508 { 1, 1, 2, 2, 2, 2 /* raid1 */ },
3497 { 1, 2, 1, 1, 1, 2 /* dup */ }, 3509 { 1, 2, 1, 1, 1, 2 /* dup */ },
3498 { 1, 1, 0, 2, 1, 1 /* raid0 */ }, 3510 { 1, 1, 0, 2, 1, 1 /* raid0 */ },
3499 { 1, 1, 0, 1, 1, 1 /* single */ }, 3511 { 1, 1, 1, 1, 1, 1 /* single */ },
3500}; 3512};
3501 3513
3502static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, 3514static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index ce5cbd717bfc..210fce2df308 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -226,6 +226,8 @@ compose_mount_options_out:
226compose_mount_options_err: 226compose_mount_options_err:
227 kfree(mountdata); 227 kfree(mountdata);
228 mountdata = ERR_PTR(rc); 228 mountdata = ERR_PTR(rc);
229 kfree(*devname);
230 *devname = NULL;
229 goto compose_mount_options_out; 231 goto compose_mount_options_out;
230} 232}
231 233
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 17c3643e5950..12b3da39733b 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -1917,7 +1917,7 @@ srcip_matches(struct sockaddr *srcaddr, struct sockaddr *rhs)
1917 } 1917 }
1918 case AF_INET6: { 1918 case AF_INET6: {
1919 struct sockaddr_in6 *saddr6 = (struct sockaddr_in6 *)srcaddr; 1919 struct sockaddr_in6 *saddr6 = (struct sockaddr_in6 *)srcaddr;
1920 struct sockaddr_in6 *vaddr6 = (struct sockaddr_in6 *)&rhs; 1920 struct sockaddr_in6 *vaddr6 = (struct sockaddr_in6 *)rhs;
1921 return ipv6_addr_equal(&saddr6->sin6_addr, &vaddr6->sin6_addr); 1921 return ipv6_addr_equal(&saddr6->sin6_addr, &vaddr6->sin6_addr);
1922 } 1922 }
1923 default: 1923 default:
diff --git a/fs/dlm/user.c b/fs/dlm/user.c
index 7ff49852b0cb..911649a47dd5 100644
--- a/fs/dlm/user.c
+++ b/fs/dlm/user.c
@@ -503,11 +503,11 @@ static ssize_t device_write(struct file *file, const char __user *buf,
503#endif 503#endif
504 return -EINVAL; 504 return -EINVAL;
505 505
506#ifdef CONFIG_COMPAT 506 /*
507 if (count > sizeof(struct dlm_write_request32) + DLM_RESNAME_MAXLEN) 507 * can't compare against COMPAT/dlm_write_request32 because
508#else 508 * we don't yet know if is64bit is zero
509 */
509 if (count > sizeof(struct dlm_write_request) + DLM_RESNAME_MAXLEN) 510 if (count > sizeof(struct dlm_write_request) + DLM_RESNAME_MAXLEN)
510#endif
511 return -EINVAL; 511 return -EINVAL;
512 512
513 kbuf = kzalloc(count + 1, GFP_NOFS); 513 kbuf = kzalloc(count + 1, GFP_NOFS);
diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c
index b906ed17a839..9802de0f85e6 100644
--- a/fs/gfs2/lock_dlm.c
+++ b/fs/gfs2/lock_dlm.c
@@ -281,6 +281,7 @@ static void gdlm_put_lock(struct gfs2_glock *gl)
281{ 281{
282 struct gfs2_sbd *sdp = gl->gl_sbd; 282 struct gfs2_sbd *sdp = gl->gl_sbd;
283 struct lm_lockstruct *ls = &sdp->sd_lockstruct; 283 struct lm_lockstruct *ls = &sdp->sd_lockstruct;
284 int lvb_needs_unlock = 0;
284 int error; 285 int error;
285 286
286 if (gl->gl_lksb.sb_lkid == 0) { 287 if (gl->gl_lksb.sb_lkid == 0) {
@@ -294,8 +295,12 @@ static void gdlm_put_lock(struct gfs2_glock *gl)
294 gfs2_update_request_times(gl); 295 gfs2_update_request_times(gl);
295 296
296 /* don't want to skip dlm_unlock writing the lvb when lock is ex */ 297 /* don't want to skip dlm_unlock writing the lvb when lock is ex */
298
299 if (gl->gl_lksb.sb_lvbptr && (gl->gl_state == LM_ST_EXCLUSIVE))
300 lvb_needs_unlock = 1;
301
297 if (test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags) && 302 if (test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags) &&
298 gl->gl_lksb.sb_lvbptr && (gl->gl_state != LM_ST_EXCLUSIVE)) { 303 !lvb_needs_unlock) {
299 gfs2_glock_free(gl); 304 gfs2_glock_free(gl);
300 return; 305 return;
301 } 306 }
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index dd057bc6b65b..fc8dc20fdeb9 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -177,11 +177,31 @@ out_nofree:
177 return mnt; 177 return mnt;
178} 178}
179 179
180static int
181nfs_namespace_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
182{
183 if (NFS_FH(dentry->d_inode)->size != 0)
184 return nfs_getattr(mnt, dentry, stat);
185 generic_fillattr(dentry->d_inode, stat);
186 return 0;
187}
188
189static int
190nfs_namespace_setattr(struct dentry *dentry, struct iattr *attr)
191{
192 if (NFS_FH(dentry->d_inode)->size != 0)
193 return nfs_setattr(dentry, attr);
194 return -EACCES;
195}
196
180const struct inode_operations nfs_mountpoint_inode_operations = { 197const struct inode_operations nfs_mountpoint_inode_operations = {
181 .getattr = nfs_getattr, 198 .getattr = nfs_getattr,
199 .setattr = nfs_setattr,
182}; 200};
183 201
184const struct inode_operations nfs_referral_inode_operations = { 202const struct inode_operations nfs_referral_inode_operations = {
203 .getattr = nfs_namespace_getattr,
204 .setattr = nfs_namespace_setattr,
185}; 205};
186 206
187static void nfs_expire_automounts(struct work_struct *work) 207static void nfs_expire_automounts(struct work_struct *work)
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index acc347268124..2e9779b58b7a 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -236,11 +236,10 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp,
236 error = nfs4_discover_server_trunking(clp, &old); 236 error = nfs4_discover_server_trunking(clp, &old);
237 if (error < 0) 237 if (error < 0)
238 goto error; 238 goto error;
239 nfs_put_client(clp);
239 if (clp != old) { 240 if (clp != old) {
240 clp->cl_preserve_clid = true; 241 clp->cl_preserve_clid = true;
241 nfs_put_client(clp);
242 clp = old; 242 clp = old;
243 atomic_inc(&clp->cl_count);
244 } 243 }
245 244
246 return clp; 245 return clp;
@@ -306,7 +305,7 @@ int nfs40_walk_client_list(struct nfs_client *new,
306 .clientid = new->cl_clientid, 305 .clientid = new->cl_clientid,
307 .confirm = new->cl_confirm, 306 .confirm = new->cl_confirm,
308 }; 307 };
309 int status; 308 int status = -NFS4ERR_STALE_CLIENTID;
310 309
311 spin_lock(&nn->nfs_client_lock); 310 spin_lock(&nn->nfs_client_lock);
312 list_for_each_entry_safe(pos, n, &nn->nfs_client_list, cl_share_link) { 311 list_for_each_entry_safe(pos, n, &nn->nfs_client_list, cl_share_link) {
@@ -332,40 +331,33 @@ int nfs40_walk_client_list(struct nfs_client *new,
332 331
333 if (prev) 332 if (prev)
334 nfs_put_client(prev); 333 nfs_put_client(prev);
334 prev = pos;
335 335
336 status = nfs4_proc_setclientid_confirm(pos, &clid, cred); 336 status = nfs4_proc_setclientid_confirm(pos, &clid, cred);
337 if (status == 0) { 337 switch (status) {
338 case -NFS4ERR_STALE_CLIENTID:
339 break;
340 case 0:
338 nfs4_swap_callback_idents(pos, new); 341 nfs4_swap_callback_idents(pos, new);
339 342
340 nfs_put_client(pos); 343 prev = NULL;
341 *result = pos; 344 *result = pos;
342 dprintk("NFS: <-- %s using nfs_client = %p ({%d})\n", 345 dprintk("NFS: <-- %s using nfs_client = %p ({%d})\n",
343 __func__, pos, atomic_read(&pos->cl_count)); 346 __func__, pos, atomic_read(&pos->cl_count));
344 return 0; 347 default:
345 } 348 goto out;
346 if (status != -NFS4ERR_STALE_CLIENTID) {
347 nfs_put_client(pos);
348 dprintk("NFS: <-- %s status = %d, no result\n",
349 __func__, status);
350 return status;
351 } 349 }
352 350
353 spin_lock(&nn->nfs_client_lock); 351 spin_lock(&nn->nfs_client_lock);
354 prev = pos;
355 } 352 }
353 spin_unlock(&nn->nfs_client_lock);
356 354
357 /* 355 /* No match found. The server lost our clientid */
358 * No matching nfs_client found. This should be impossible, 356out:
359 * because the new nfs_client has already been added to
360 * nfs_client_list by nfs_get_client().
361 *
362 * Don't BUG(), since the caller is holding a mutex.
363 */
364 if (prev) 357 if (prev)
365 nfs_put_client(prev); 358 nfs_put_client(prev);
366 spin_unlock(&nn->nfs_client_lock); 359 dprintk("NFS: <-- %s status = %d\n", __func__, status);
367 pr_err("NFS: %s Error: no matching nfs_client found\n", __func__); 360 return status;
368 return -NFS4ERR_STALE_CLIENTID;
369} 361}
370 362
371#ifdef CONFIG_NFS_V4_1 363#ifdef CONFIG_NFS_V4_1
@@ -432,7 +424,7 @@ int nfs41_walk_client_list(struct nfs_client *new,
432{ 424{
433 struct nfs_net *nn = net_generic(new->cl_net, nfs_net_id); 425 struct nfs_net *nn = net_generic(new->cl_net, nfs_net_id);
434 struct nfs_client *pos, *n, *prev = NULL; 426 struct nfs_client *pos, *n, *prev = NULL;
435 int error; 427 int status = -NFS4ERR_STALE_CLIENTID;
436 428
437 spin_lock(&nn->nfs_client_lock); 429 spin_lock(&nn->nfs_client_lock);
438 list_for_each_entry_safe(pos, n, &nn->nfs_client_list, cl_share_link) { 430 list_for_each_entry_safe(pos, n, &nn->nfs_client_list, cl_share_link) {
@@ -448,14 +440,17 @@ int nfs41_walk_client_list(struct nfs_client *new,
448 nfs_put_client(prev); 440 nfs_put_client(prev);
449 prev = pos; 441 prev = pos;
450 442
451 error = nfs_wait_client_init_complete(pos); 443 nfs4_schedule_lease_recovery(pos);
452 if (error < 0) { 444 status = nfs_wait_client_init_complete(pos);
445 if (status < 0) {
453 nfs_put_client(pos); 446 nfs_put_client(pos);
454 spin_lock(&nn->nfs_client_lock); 447 spin_lock(&nn->nfs_client_lock);
455 continue; 448 continue;
456 } 449 }
457 450 status = pos->cl_cons_state;
458 spin_lock(&nn->nfs_client_lock); 451 spin_lock(&nn->nfs_client_lock);
452 if (status < 0)
453 continue;
459 } 454 }
460 455
461 if (pos->rpc_ops != new->rpc_ops) 456 if (pos->rpc_ops != new->rpc_ops)
@@ -473,6 +468,7 @@ int nfs41_walk_client_list(struct nfs_client *new,
473 if (!nfs4_match_serverowners(pos, new)) 468 if (!nfs4_match_serverowners(pos, new))
474 continue; 469 continue;
475 470
471 atomic_inc(&pos->cl_count);
476 spin_unlock(&nn->nfs_client_lock); 472 spin_unlock(&nn->nfs_client_lock);
477 dprintk("NFS: <-- %s using nfs_client = %p ({%d})\n", 473 dprintk("NFS: <-- %s using nfs_client = %p ({%d})\n",
478 __func__, pos, atomic_read(&pos->cl_count)); 474 __func__, pos, atomic_read(&pos->cl_count));
@@ -481,16 +477,10 @@ int nfs41_walk_client_list(struct nfs_client *new,
481 return 0; 477 return 0;
482 } 478 }
483 479
484 /* 480 /* No matching nfs_client found. */
485 * No matching nfs_client found. This should be impossible,
486 * because the new nfs_client has already been added to
487 * nfs_client_list by nfs_get_client().
488 *
489 * Don't BUG(), since the caller is holding a mutex.
490 */
491 spin_unlock(&nn->nfs_client_lock); 481 spin_unlock(&nn->nfs_client_lock);
492 pr_err("NFS: %s Error: no matching nfs_client found\n", __func__); 482 dprintk("NFS: <-- %s status = %d\n", __func__, status);
493 return -NFS4ERR_STALE_CLIENTID; 483 return status;
494} 484}
495#endif /* CONFIG_NFS_V4_1 */ 485#endif /* CONFIG_NFS_V4_1 */
496 486
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 9448c579d41a..e61f68d5ef21 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -136,16 +136,11 @@ int nfs40_discover_server_trunking(struct nfs_client *clp,
136 clp->cl_confirm = clid.confirm; 136 clp->cl_confirm = clid.confirm;
137 137
138 status = nfs40_walk_client_list(clp, result, cred); 138 status = nfs40_walk_client_list(clp, result, cred);
139 switch (status) { 139 if (status == 0) {
140 case -NFS4ERR_STALE_CLIENTID:
141 set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
142 case 0:
143 /* Sustain the lease, even if it's empty. If the clientid4 140 /* Sustain the lease, even if it's empty. If the clientid4
144 * goes stale it's of no use for trunking discovery. */ 141 * goes stale it's of no use for trunking discovery. */
145 nfs4_schedule_state_renewal(*result); 142 nfs4_schedule_state_renewal(*result);
146 break;
147 } 143 }
148
149out: 144out:
150 return status; 145 return status;
151} 146}
@@ -1863,6 +1858,7 @@ again:
1863 case -ETIMEDOUT: 1858 case -ETIMEDOUT:
1864 case -EAGAIN: 1859 case -EAGAIN:
1865 ssleep(1); 1860 ssleep(1);
1861 case -NFS4ERR_STALE_CLIENTID:
1866 dprintk("NFS: %s after status %d, retrying\n", 1862 dprintk("NFS: %s after status %d, retrying\n",
1867 __func__, status); 1863 __func__, status);
1868 goto again; 1864 goto again;
@@ -2022,8 +2018,18 @@ static int nfs4_reset_session(struct nfs_client *clp)
2022 nfs4_begin_drain_session(clp); 2018 nfs4_begin_drain_session(clp);
2023 cred = nfs4_get_exchange_id_cred(clp); 2019 cred = nfs4_get_exchange_id_cred(clp);
2024 status = nfs4_proc_destroy_session(clp->cl_session, cred); 2020 status = nfs4_proc_destroy_session(clp->cl_session, cred);
2025 if (status && status != -NFS4ERR_BADSESSION && 2021 switch (status) {
2026 status != -NFS4ERR_DEADSESSION) { 2022 case 0:
2023 case -NFS4ERR_BADSESSION:
2024 case -NFS4ERR_DEADSESSION:
2025 break;
2026 case -NFS4ERR_BACK_CHAN_BUSY:
2027 case -NFS4ERR_DELAY:
2028 set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state);
2029 status = 0;
2030 ssleep(1);
2031 goto out;
2032 default:
2027 status = nfs4_recovery_handle_error(clp, status); 2033 status = nfs4_recovery_handle_error(clp, status);
2028 goto out; 2034 goto out;
2029 } 2035 }
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 2e7e8c878e5d..b056b1628722 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -2589,27 +2589,23 @@ nfs_xdev_mount(struct file_system_type *fs_type, int flags,
2589 struct nfs_server *server; 2589 struct nfs_server *server;
2590 struct dentry *mntroot = ERR_PTR(-ENOMEM); 2590 struct dentry *mntroot = ERR_PTR(-ENOMEM);
2591 struct nfs_subversion *nfs_mod = NFS_SB(data->sb)->nfs_client->cl_nfs_mod; 2591 struct nfs_subversion *nfs_mod = NFS_SB(data->sb)->nfs_client->cl_nfs_mod;
2592 int error;
2593 2592
2594 dprintk("--> nfs_xdev_mount_common()\n"); 2593 dprintk("--> nfs_xdev_mount()\n");
2595 2594
2596 mount_info.mntfh = mount_info.cloned->fh; 2595 mount_info.mntfh = mount_info.cloned->fh;
2597 2596
2598 /* create a new volume representation */ 2597 /* create a new volume representation */
2599 server = nfs_mod->rpc_ops->clone_server(NFS_SB(data->sb), data->fh, data->fattr, data->authflavor); 2598 server = nfs_mod->rpc_ops->clone_server(NFS_SB(data->sb), data->fh, data->fattr, data->authflavor);
2600 if (IS_ERR(server)) {
2601 error = PTR_ERR(server);
2602 goto out_err;
2603 }
2604 2599
2605 mntroot = nfs_fs_mount_common(server, flags, dev_name, &mount_info, nfs_mod); 2600 if (IS_ERR(server))
2606 dprintk("<-- nfs_xdev_mount_common() = 0\n"); 2601 mntroot = ERR_CAST(server);
2607out: 2602 else
2608 return mntroot; 2603 mntroot = nfs_fs_mount_common(server, flags,
2604 dev_name, &mount_info, nfs_mod);
2609 2605
2610out_err: 2606 dprintk("<-- nfs_xdev_mount() = %ld\n",
2611 dprintk("<-- nfs_xdev_mount_common() = %d [error]\n", error); 2607 IS_ERR(mntroot) ? PTR_ERR(mntroot) : 0L);
2612 goto out; 2608 return mntroot;
2613} 2609}
2614 2610
2615#if IS_ENABLED(CONFIG_NFS_V4) 2611#if IS_ENABLED(CONFIG_NFS_V4)
diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c
index fdb180769485..f3859354e41a 100644
--- a/fs/nilfs2/ioctl.c
+++ b/fs/nilfs2/ioctl.c
@@ -664,8 +664,11 @@ static int nilfs_ioctl_clean_segments(struct inode *inode, struct file *filp,
664 if (ret < 0) 664 if (ret < 0)
665 printk(KERN_ERR "NILFS: GC failed during preparation: " 665 printk(KERN_ERR "NILFS: GC failed during preparation: "
666 "cannot read source blocks: err=%d\n", ret); 666 "cannot read source blocks: err=%d\n", ret);
667 else 667 else {
668 if (nilfs_sb_need_update(nilfs))
669 set_nilfs_discontinued(nilfs);
668 ret = nilfs_clean_segments(inode->i_sb, argv, kbufs); 670 ret = nilfs_clean_segments(inode->i_sb, argv, kbufs);
671 }
669 672
670 nilfs_remove_all_gcinodes(nilfs); 673 nilfs_remove_all_gcinodes(nilfs);
671 clear_nilfs_gc_running(nilfs); 674 clear_nilfs_gc_running(nilfs);
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 4111a40ebe1a..5f707e537171 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -86,11 +86,11 @@ xfs_destroy_ioend(
86 } 86 }
87 87
88 if (ioend->io_iocb) { 88 if (ioend->io_iocb) {
89 inode_dio_done(ioend->io_inode);
89 if (ioend->io_isasync) { 90 if (ioend->io_isasync) {
90 aio_complete(ioend->io_iocb, ioend->io_error ? 91 aio_complete(ioend->io_iocb, ioend->io_error ?
91 ioend->io_error : ioend->io_result, 0); 92 ioend->io_error : ioend->io_result, 0);
92 } 93 }
93 inode_dio_done(ioend->io_inode);
94 } 94 }
95 95
96 mempool_free(ioend, xfs_ioend_pool); 96 mempool_free(ioend, xfs_ioend_pool);
diff --git a/fs/xfs/xfs_bmap.c b/fs/xfs/xfs_bmap.c
index 0e92d12765d2..cdb2d3348583 100644
--- a/fs/xfs/xfs_bmap.c
+++ b/fs/xfs/xfs_bmap.c
@@ -4680,9 +4680,6 @@ __xfs_bmapi_allocate(
4680 return error; 4680 return error;
4681 } 4681 }
4682 4682
4683 if (bma->flags & XFS_BMAPI_STACK_SWITCH)
4684 bma->stack_switch = 1;
4685
4686 error = xfs_bmap_alloc(bma); 4683 error = xfs_bmap_alloc(bma);
4687 if (error) 4684 if (error)
4688 return error; 4685 return error;
@@ -4956,6 +4953,9 @@ xfs_bmapi_write(
4956 bma.flist = flist; 4953 bma.flist = flist;
4957 bma.firstblock = firstblock; 4954 bma.firstblock = firstblock;
4958 4955
4956 if (flags & XFS_BMAPI_STACK_SWITCH)
4957 bma.stack_switch = 1;
4958
4959 while (bno < end && n < *nmap) { 4959 while (bno < end && n < *nmap) {
4960 inhole = eof || bma.got.br_startoff > bno; 4960 inhole = eof || bma.got.br_startoff > bno;
4961 wasdelay = !inhole && isnullstartblock(bma.got.br_startblock); 4961 wasdelay = !inhole && isnullstartblock(bma.got.br_startblock);
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 56d1614760cf..fbbb9eb92e32 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -487,6 +487,7 @@ _xfs_buf_find(
487 struct rb_node *parent; 487 struct rb_node *parent;
488 xfs_buf_t *bp; 488 xfs_buf_t *bp;
489 xfs_daddr_t blkno = map[0].bm_bn; 489 xfs_daddr_t blkno = map[0].bm_bn;
490 xfs_daddr_t eofs;
490 int numblks = 0; 491 int numblks = 0;
491 int i; 492 int i;
492 493
@@ -498,6 +499,23 @@ _xfs_buf_find(
498 ASSERT(!(numbytes < (1 << btp->bt_sshift))); 499 ASSERT(!(numbytes < (1 << btp->bt_sshift)));
499 ASSERT(!(BBTOB(blkno) & (xfs_off_t)btp->bt_smask)); 500 ASSERT(!(BBTOB(blkno) & (xfs_off_t)btp->bt_smask));
500 501
502 /*
503 * Corrupted block numbers can get through to here, unfortunately, so we
504 * have to check that the buffer falls within the filesystem bounds.
505 */
506 eofs = XFS_FSB_TO_BB(btp->bt_mount, btp->bt_mount->m_sb.sb_dblocks);
507 if (blkno >= eofs) {
508 /*
509 * XXX (dgc): we should really be returning EFSCORRUPTED here,
510 * but none of the higher level infrastructure supports
511 * returning a specific error on buffer lookup failures.
512 */
513 xfs_alert(btp->bt_mount,
514 "%s: Block out of range: block 0x%llx, EOFS 0x%llx ",
515 __func__, blkno, eofs);
516 return NULL;
517 }
518
501 /* get tree root */ 519 /* get tree root */
502 pag = xfs_perag_get(btp->bt_mount, 520 pag = xfs_perag_get(btp->bt_mount,
503 xfs_daddr_to_agno(btp->bt_mount, blkno)); 521 xfs_daddr_to_agno(btp->bt_mount, blkno));
@@ -1487,6 +1505,8 @@ restart:
1487 while (!list_empty(&btp->bt_lru)) { 1505 while (!list_empty(&btp->bt_lru)) {
1488 bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru); 1506 bp = list_first_entry(&btp->bt_lru, struct xfs_buf, b_lru);
1489 if (atomic_read(&bp->b_hold) > 1) { 1507 if (atomic_read(&bp->b_hold) > 1) {
1508 trace_xfs_buf_wait_buftarg(bp, _RET_IP_);
1509 list_move_tail(&bp->b_lru, &btp->bt_lru);
1490 spin_unlock(&btp->bt_lru_lock); 1510 spin_unlock(&btp->bt_lru_lock);
1491 delay(100); 1511 delay(100);
1492 goto restart; 1512 goto restart;
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 77b09750e92c..3f9949fee391 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -652,7 +652,10 @@ xfs_buf_item_unlock(
652 652
653 /* 653 /*
654 * If the buf item isn't tracking any data, free it, otherwise drop the 654 * If the buf item isn't tracking any data, free it, otherwise drop the
655 * reference we hold to it. 655 * reference we hold to it. If we are aborting the transaction, this may
656 * be the only reference to the buf item, so we free it anyway
657 * regardless of whether it is dirty or not. A dirty abort implies a
658 * shutdown, anyway.
656 */ 659 */
657 clean = 1; 660 clean = 1;
658 for (i = 0; i < bip->bli_format_count; i++) { 661 for (i = 0; i < bip->bli_format_count; i++) {
@@ -664,7 +667,12 @@ xfs_buf_item_unlock(
664 } 667 }
665 if (clean) 668 if (clean)
666 xfs_buf_item_relse(bp); 669 xfs_buf_item_relse(bp);
667 else 670 else if (aborted) {
671 if (atomic_dec_and_test(&bip->bli_refcount)) {
672 ASSERT(XFS_FORCED_SHUTDOWN(lip->li_mountp));
673 xfs_buf_item_relse(bp);
674 }
675 } else
668 atomic_dec(&bip->bli_refcount); 676 atomic_dec(&bip->bli_refcount);
669 677
670 if (!hold) 678 if (!hold)
diff --git a/fs/xfs/xfs_dfrag.c b/fs/xfs/xfs_dfrag.c
index d0e9c74d3d96..a8bd26b82ecb 100644
--- a/fs/xfs/xfs_dfrag.c
+++ b/fs/xfs/xfs_dfrag.c
@@ -246,10 +246,10 @@ xfs_swap_extents(
246 goto out_unlock; 246 goto out_unlock;
247 } 247 }
248 248
249 error = -filemap_write_and_wait(VFS_I(ip)->i_mapping); 249 error = -filemap_write_and_wait(VFS_I(tip)->i_mapping);
250 if (error) 250 if (error)
251 goto out_unlock; 251 goto out_unlock;
252 truncate_pagecache_range(VFS_I(ip), 0, -1); 252 truncate_pagecache_range(VFS_I(tip), 0, -1);
253 253
254 /* Verify O_DIRECT for ftmp */ 254 /* Verify O_DIRECT for ftmp */
255 if (VN_CACHED(VFS_I(tip)) != 0) { 255 if (VN_CACHED(VFS_I(tip)) != 0) {
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index add06b4e9a63..364818eef40e 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -351,6 +351,15 @@ xfs_iomap_prealloc_size(
351 } 351 }
352 if (shift) 352 if (shift)
353 alloc_blocks >>= shift; 353 alloc_blocks >>= shift;
354
355 /*
356 * If we are still trying to allocate more space than is
357 * available, squash the prealloc hard. This can happen if we
358 * have a large file on a small filesystem and the above
359 * lowspace thresholds are smaller than MAXEXTLEN.
360 */
361 while (alloc_blocks >= freesp)
362 alloc_blocks >>= 4;
354 } 363 }
355 364
356 if (alloc_blocks < mp->m_writeio_blocks) 365 if (alloc_blocks < mp->m_writeio_blocks)
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index da508463ff10..7d6df7c00c36 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -658,7 +658,7 @@ xfs_sb_quiet_read_verify(
658 return; 658 return;
659 } 659 }
660 /* quietly fail */ 660 /* quietly fail */
661 xfs_buf_ioerror(bp, EFSCORRUPTED); 661 xfs_buf_ioerror(bp, EWRONGFS);
662} 662}
663 663
664static void 664static void
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 2e137d4a85ae..16a812977eab 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -341,6 +341,7 @@ DEFINE_BUF_EVENT(xfs_buf_item_relse);
341DEFINE_BUF_EVENT(xfs_buf_item_iodone); 341DEFINE_BUF_EVENT(xfs_buf_item_iodone);
342DEFINE_BUF_EVENT(xfs_buf_item_iodone_async); 342DEFINE_BUF_EVENT(xfs_buf_item_iodone_async);
343DEFINE_BUF_EVENT(xfs_buf_error_relse); 343DEFINE_BUF_EVENT(xfs_buf_error_relse);
344DEFINE_BUF_EVENT(xfs_buf_wait_buftarg);
344DEFINE_BUF_EVENT(xfs_trans_read_buf_io); 345DEFINE_BUF_EVENT(xfs_trans_read_buf_io);
345DEFINE_BUF_EVENT(xfs_trans_read_buf_shut); 346DEFINE_BUF_EVENT(xfs_trans_read_buf_shut);
346 347