aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorJason Gunthorpe <jgg@mellanox.com>2018-01-29 15:26:40 -0500
committerJason Gunthorpe <jgg@mellanox.com>2018-01-30 11:30:00 -0500
commite7996a9a77fc669387da43ff4823b91cc4872bd0 (patch)
tree617f0a128e222539d67e8cccc359f1bc4b984900 /fs
parentb5fa635aab8f0d39a824c01991266a6d06f007fb (diff)
parentd8a5b80568a9cb66810e75b182018e9edb68e8ff (diff)
Merge tag v4.15 of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6.git
To resolve conflicts in: drivers/infiniband/hw/mlx5/main.c drivers/infiniband/hw/mlx5/qp.c From patches merged into the -rc cycle. The conflict resolution matches what linux-next has been carrying. Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/dir.c37
-rw-r--r--fs/afs/inode.c4
-rw-r--r--fs/afs/rxrpc.c2
-rw-r--r--fs/afs/write.c8
-rw-r--r--fs/autofs4/waitq.c1
-rw-r--r--fs/btrfs/ctree.c18
-rw-r--r--fs/btrfs/delayed-inode.c71
-rw-r--r--fs/btrfs/disk-io.c12
-rw-r--r--fs/btrfs/extent-tree.c1
-rw-r--r--fs/btrfs/inode.c2
-rw-r--r--fs/btrfs/ioctl.c2
-rw-r--r--fs/btrfs/volumes.c1
-rw-r--r--fs/ceph/mds_client.c42
-rw-r--r--fs/cifs/smb2ops.c3
-rw-r--r--fs/cifs/smb2pdu.c30
-rw-r--r--fs/cramfs/Kconfig1
-rw-r--r--fs/dax.c3
-rw-r--r--fs/exec.c23
-rw-r--r--fs/ext4/extents.c1
-rw-r--r--fs/ext4/ialloc.c2
-rw-r--r--fs/ext4/inode.c9
-rw-r--r--fs/ext4/namei.c4
-rw-r--r--fs/hpfs/dir.c1
-rw-r--r--fs/hpfs/dnode.c2
-rw-r--r--fs/hpfs/super.c1
-rw-r--r--fs/namespace.c1
-rw-r--r--fs/nfs/client.c11
-rw-r--r--fs/nfs/nfs4client.c17
-rw-r--r--fs/nfs/write.c2
-rw-r--r--fs/nfsd/auth.c3
-rw-r--r--fs/orangefs/devorangefs-req.c3
-rw-r--r--fs/orangefs/file.c7
-rw-r--r--fs/orangefs/orangefs-kernel.h11
-rw-r--r--fs/orangefs/waitqueue.c4
-rw-r--r--fs/overlayfs/Kconfig10
-rw-r--r--fs/overlayfs/dir.c3
-rw-r--r--fs/overlayfs/namei.c18
-rw-r--r--fs/overlayfs/overlayfs.h2
-rw-r--r--fs/overlayfs/ovl_entry.h2
-rw-r--r--fs/overlayfs/readdir.c7
-rw-r--r--fs/overlayfs/super.c87
-rw-r--r--fs/proc/array.c7
-rw-r--r--fs/proc/base.c2
-rw-r--r--fs/super.c43
-rw-r--r--fs/userfaultfd.c20
-rw-r--r--fs/xfs/libxfs/xfs_alloc.c4
-rw-r--r--fs/xfs/libxfs/xfs_attr.c20
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.c9
-rw-r--r--fs/xfs/libxfs/xfs_attr_leaf.h3
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c2
-rw-r--r--fs/xfs/libxfs/xfs_defer.c39
-rw-r--r--fs/xfs/libxfs/xfs_defer.h5
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.c10
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.h1
-rw-r--r--fs/xfs/libxfs/xfs_iext_tree.c4
-rw-r--r--fs/xfs/libxfs/xfs_refcount.c52
-rw-r--r--fs/xfs/libxfs/xfs_rmap.c99
-rw-r--r--fs/xfs/libxfs/xfs_rmap.h16
-rw-r--r--fs/xfs/scrub/scrub.c1
-rw-r--r--fs/xfs/scrub/trace.c1
-rw-r--r--fs/xfs/xfs_aops.c4
-rw-r--r--fs/xfs/xfs_extfree_item.c2
-rw-r--r--fs/xfs/xfs_fsops.c5
-rw-r--r--fs/xfs/xfs_icache.c35
-rw-r--r--fs/xfs/xfs_icache.h1
-rw-r--r--fs/xfs/xfs_inode.c61
-rw-r--r--fs/xfs/xfs_inode.h3
-rw-r--r--fs/xfs/xfs_iomap.c4
-rw-r--r--fs/xfs/xfs_qm.c50
-rw-r--r--fs/xfs/xfs_reflink.c23
-rw-r--r--fs/xfs/xfs_super.c9
-rw-r--r--fs/xfs/xfs_symlink.c15
-rw-r--r--fs/xfs/xfs_trace.c1
73 files changed, 678 insertions, 342 deletions
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index ff8d5bf4354f..23c7f395d718 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -895,20 +895,38 @@ error:
895 * However, if we didn't have a callback promise outstanding, or it was 895 * However, if we didn't have a callback promise outstanding, or it was
896 * outstanding on a different server, then it won't break it either... 896 * outstanding on a different server, then it won't break it either...
897 */ 897 */
898static int afs_dir_remove_link(struct dentry *dentry, struct key *key) 898static int afs_dir_remove_link(struct dentry *dentry, struct key *key,
899 unsigned long d_version_before,
900 unsigned long d_version_after)
899{ 901{
902 bool dir_valid;
900 int ret = 0; 903 int ret = 0;
901 904
905 /* There were no intervening changes on the server if the version
906 * number we got back was incremented by exactly 1.
907 */
908 dir_valid = (d_version_after == d_version_before + 1);
909
902 if (d_really_is_positive(dentry)) { 910 if (d_really_is_positive(dentry)) {
903 struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry)); 911 struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry));
904 912
905 if (test_bit(AFS_VNODE_DELETED, &vnode->flags)) 913 if (dir_valid) {
906 kdebug("AFS_VNODE_DELETED"); 914 drop_nlink(&vnode->vfs_inode);
907 clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags); 915 if (vnode->vfs_inode.i_nlink == 0) {
908 916 set_bit(AFS_VNODE_DELETED, &vnode->flags);
909 ret = afs_validate(vnode, key); 917 clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
910 if (ret == -ESTALE) 918 }
911 ret = 0; 919 ret = 0;
920 } else {
921 clear_bit(AFS_VNODE_CB_PROMISED, &vnode->flags);
922
923 if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
924 kdebug("AFS_VNODE_DELETED");
925
926 ret = afs_validate(vnode, key);
927 if (ret == -ESTALE)
928 ret = 0;
929 }
912 _debug("nlink %d [val %d]", vnode->vfs_inode.i_nlink, ret); 930 _debug("nlink %d [val %d]", vnode->vfs_inode.i_nlink, ret);
913 } 931 }
914 932
@@ -923,6 +941,7 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
923 struct afs_fs_cursor fc; 941 struct afs_fs_cursor fc;
924 struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode; 942 struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode;
925 struct key *key; 943 struct key *key;
944 unsigned long d_version = (unsigned long)dentry->d_fsdata;
926 int ret; 945 int ret;
927 946
928 _enter("{%x:%u},{%pd}", 947 _enter("{%x:%u},{%pd}",
@@ -955,7 +974,9 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
955 afs_vnode_commit_status(&fc, dvnode, fc.cb_break); 974 afs_vnode_commit_status(&fc, dvnode, fc.cb_break);
956 ret = afs_end_vnode_operation(&fc); 975 ret = afs_end_vnode_operation(&fc);
957 if (ret == 0) 976 if (ret == 0)
958 ret = afs_dir_remove_link(dentry, key); 977 ret = afs_dir_remove_link(
978 dentry, key, d_version,
979 (unsigned long)dvnode->status.data_version);
959 } 980 }
960 981
961error_key: 982error_key:
diff --git a/fs/afs/inode.c b/fs/afs/inode.c
index 3415eb7484f6..1e81864ef0b2 100644
--- a/fs/afs/inode.c
+++ b/fs/afs/inode.c
@@ -377,6 +377,10 @@ int afs_validate(struct afs_vnode *vnode, struct key *key)
377 } 377 }
378 378
379 read_sequnlock_excl(&vnode->cb_lock); 379 read_sequnlock_excl(&vnode->cb_lock);
380
381 if (test_bit(AFS_VNODE_DELETED, &vnode->flags))
382 clear_nlink(&vnode->vfs_inode);
383
380 if (valid) 384 if (valid)
381 goto valid; 385 goto valid;
382 386
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index ea1460b9b71a..e1126659f043 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -885,7 +885,7 @@ int afs_extract_data(struct afs_call *call, void *buf, size_t count,
885{ 885{
886 struct afs_net *net = call->net; 886 struct afs_net *net = call->net;
887 enum afs_call_state state; 887 enum afs_call_state state;
888 u32 remote_abort; 888 u32 remote_abort = 0;
889 int ret; 889 int ret;
890 890
891 _enter("{%s,%zu},,%zu,%d", 891 _enter("{%s,%zu},,%zu,%d",
diff --git a/fs/afs/write.c b/fs/afs/write.c
index cb5f8a3df577..9370e2feb999 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -198,7 +198,7 @@ int afs_write_end(struct file *file, struct address_space *mapping,
198 ret = afs_fill_page(vnode, key, pos + copied, 198 ret = afs_fill_page(vnode, key, pos + copied,
199 len - copied, page); 199 len - copied, page);
200 if (ret < 0) 200 if (ret < 0)
201 return ret; 201 goto out;
202 } 202 }
203 SetPageUptodate(page); 203 SetPageUptodate(page);
204 } 204 }
@@ -206,10 +206,12 @@ int afs_write_end(struct file *file, struct address_space *mapping,
206 set_page_dirty(page); 206 set_page_dirty(page);
207 if (PageDirty(page)) 207 if (PageDirty(page))
208 _debug("dirtied"); 208 _debug("dirtied");
209 ret = copied;
210
211out:
209 unlock_page(page); 212 unlock_page(page);
210 put_page(page); 213 put_page(page);
211 214 return ret;
212 return copied;
213} 215}
214 216
215/* 217/*
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 8fc41705c7cd..961a12dc6dc8 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -170,7 +170,6 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
170 170
171 mutex_unlock(&sbi->wq_mutex); 171 mutex_unlock(&sbi->wq_mutex);
172 172
173 if (autofs4_write(sbi, pipe, &pkt, pktsz))
174 switch (ret = autofs4_write(sbi, pipe, &pkt, pktsz)) { 173 switch (ret = autofs4_write(sbi, pipe, &pkt, pktsz)) {
175 case 0: 174 case 0:
176 break; 175 break;
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 531e0a8645b0..1e74cf826532 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -1032,14 +1032,17 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
1032 root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) && 1032 root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) &&
1033 !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) { 1033 !(flags & BTRFS_BLOCK_FLAG_FULL_BACKREF)) {
1034 ret = btrfs_inc_ref(trans, root, buf, 1); 1034 ret = btrfs_inc_ref(trans, root, buf, 1);
1035 BUG_ON(ret); /* -ENOMEM */ 1035 if (ret)
1036 return ret;
1036 1037
1037 if (root->root_key.objectid == 1038 if (root->root_key.objectid ==
1038 BTRFS_TREE_RELOC_OBJECTID) { 1039 BTRFS_TREE_RELOC_OBJECTID) {
1039 ret = btrfs_dec_ref(trans, root, buf, 0); 1040 ret = btrfs_dec_ref(trans, root, buf, 0);
1040 BUG_ON(ret); /* -ENOMEM */ 1041 if (ret)
1042 return ret;
1041 ret = btrfs_inc_ref(trans, root, cow, 1); 1043 ret = btrfs_inc_ref(trans, root, cow, 1);
1042 BUG_ON(ret); /* -ENOMEM */ 1044 if (ret)
1045 return ret;
1043 } 1046 }
1044 new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF; 1047 new_flags |= BTRFS_BLOCK_FLAG_FULL_BACKREF;
1045 } else { 1048 } else {
@@ -1049,7 +1052,8 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
1049 ret = btrfs_inc_ref(trans, root, cow, 1); 1052 ret = btrfs_inc_ref(trans, root, cow, 1);
1050 else 1053 else
1051 ret = btrfs_inc_ref(trans, root, cow, 0); 1054 ret = btrfs_inc_ref(trans, root, cow, 0);
1052 BUG_ON(ret); /* -ENOMEM */ 1055 if (ret)
1056 return ret;
1053 } 1057 }
1054 if (new_flags != 0) { 1058 if (new_flags != 0) {
1055 int level = btrfs_header_level(buf); 1059 int level = btrfs_header_level(buf);
@@ -1068,9 +1072,11 @@ static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
1068 ret = btrfs_inc_ref(trans, root, cow, 1); 1072 ret = btrfs_inc_ref(trans, root, cow, 1);
1069 else 1073 else
1070 ret = btrfs_inc_ref(trans, root, cow, 0); 1074 ret = btrfs_inc_ref(trans, root, cow, 0);
1071 BUG_ON(ret); /* -ENOMEM */ 1075 if (ret)
1076 return ret;
1072 ret = btrfs_dec_ref(trans, root, buf, 1); 1077 ret = btrfs_dec_ref(trans, root, buf, 1);
1073 BUG_ON(ret); /* -ENOMEM */ 1078 if (ret)
1079 return ret;
1074 } 1080 }
1075 clean_tree_block(fs_info, buf); 1081 clean_tree_block(fs_info, buf);
1076 *last_ref = 1; 1082 *last_ref = 1;
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 5d73f79ded8b..a6226cd6063c 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -87,6 +87,7 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node(
87 87
88 spin_lock(&root->inode_lock); 88 spin_lock(&root->inode_lock);
89 node = radix_tree_lookup(&root->delayed_nodes_tree, ino); 89 node = radix_tree_lookup(&root->delayed_nodes_tree, ino);
90
90 if (node) { 91 if (node) {
91 if (btrfs_inode->delayed_node) { 92 if (btrfs_inode->delayed_node) {
92 refcount_inc(&node->refs); /* can be accessed */ 93 refcount_inc(&node->refs); /* can be accessed */
@@ -94,9 +95,30 @@ static struct btrfs_delayed_node *btrfs_get_delayed_node(
94 spin_unlock(&root->inode_lock); 95 spin_unlock(&root->inode_lock);
95 return node; 96 return node;
96 } 97 }
97 btrfs_inode->delayed_node = node; 98
98 /* can be accessed and cached in the inode */ 99 /*
99 refcount_add(2, &node->refs); 100 * It's possible that we're racing into the middle of removing
101 * this node from the radix tree. In this case, the refcount
102 * was zero and it should never go back to one. Just return
103 * NULL like it was never in the radix at all; our release
104 * function is in the process of removing it.
105 *
106 * Some implementations of refcount_inc refuse to bump the
107 * refcount once it has hit zero. If we don't do this dance
108 * here, refcount_inc() may decide to just WARN_ONCE() instead
109 * of actually bumping the refcount.
110 *
111 * If this node is properly in the radix, we want to bump the
112 * refcount twice, once for the inode and once for this get
113 * operation.
114 */
115 if (refcount_inc_not_zero(&node->refs)) {
116 refcount_inc(&node->refs);
117 btrfs_inode->delayed_node = node;
118 } else {
119 node = NULL;
120 }
121
100 spin_unlock(&root->inode_lock); 122 spin_unlock(&root->inode_lock);
101 return node; 123 return node;
102 } 124 }
@@ -254,17 +276,18 @@ static void __btrfs_release_delayed_node(
254 mutex_unlock(&delayed_node->mutex); 276 mutex_unlock(&delayed_node->mutex);
255 277
256 if (refcount_dec_and_test(&delayed_node->refs)) { 278 if (refcount_dec_and_test(&delayed_node->refs)) {
257 bool free = false;
258 struct btrfs_root *root = delayed_node->root; 279 struct btrfs_root *root = delayed_node->root;
280
259 spin_lock(&root->inode_lock); 281 spin_lock(&root->inode_lock);
260 if (refcount_read(&delayed_node->refs) == 0) { 282 /*
261 radix_tree_delete(&root->delayed_nodes_tree, 283 * Once our refcount goes to zero, nobody is allowed to bump it
262 delayed_node->inode_id); 284 * back up. We can delete it now.
263 free = true; 285 */
264 } 286 ASSERT(refcount_read(&delayed_node->refs) == 0);
287 radix_tree_delete(&root->delayed_nodes_tree,
288 delayed_node->inode_id);
265 spin_unlock(&root->inode_lock); 289 spin_unlock(&root->inode_lock);
266 if (free) 290 kmem_cache_free(delayed_node_cache, delayed_node);
267 kmem_cache_free(delayed_node_cache, delayed_node);
268 } 291 }
269} 292}
270 293
@@ -1610,28 +1633,18 @@ void btrfs_readdir_put_delayed_items(struct inode *inode,
1610int btrfs_should_delete_dir_index(struct list_head *del_list, 1633int btrfs_should_delete_dir_index(struct list_head *del_list,
1611 u64 index) 1634 u64 index)
1612{ 1635{
1613 struct btrfs_delayed_item *curr, *next; 1636 struct btrfs_delayed_item *curr;
1614 int ret; 1637 int ret = 0;
1615
1616 if (list_empty(del_list))
1617 return 0;
1618 1638
1619 list_for_each_entry_safe(curr, next, del_list, readdir_list) { 1639 list_for_each_entry(curr, del_list, readdir_list) {
1620 if (curr->key.offset > index) 1640 if (curr->key.offset > index)
1621 break; 1641 break;
1622 1642 if (curr->key.offset == index) {
1623 list_del(&curr->readdir_list); 1643 ret = 1;
1624 ret = (curr->key.offset == index); 1644 break;
1625 1645 }
1626 if (refcount_dec_and_test(&curr->refs))
1627 kfree(curr);
1628
1629 if (ret)
1630 return 1;
1631 else
1632 continue;
1633 } 1646 }
1634 return 0; 1647 return ret;
1635} 1648}
1636 1649
1637/* 1650/*
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 10a2a579cc7f..a8ecccfc36de 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3231,6 +3231,7 @@ static int write_dev_supers(struct btrfs_device *device,
3231 int errors = 0; 3231 int errors = 0;
3232 u32 crc; 3232 u32 crc;
3233 u64 bytenr; 3233 u64 bytenr;
3234 int op_flags;
3234 3235
3235 if (max_mirrors == 0) 3236 if (max_mirrors == 0)
3236 max_mirrors = BTRFS_SUPER_MIRROR_MAX; 3237 max_mirrors = BTRFS_SUPER_MIRROR_MAX;
@@ -3273,13 +3274,10 @@ static int write_dev_supers(struct btrfs_device *device,
3273 * we fua the first super. The others we allow 3274 * we fua the first super. The others we allow
3274 * to go down lazy. 3275 * to go down lazy.
3275 */ 3276 */
3276 if (i == 0) { 3277 op_flags = REQ_SYNC | REQ_META | REQ_PRIO;
3277 ret = btrfsic_submit_bh(REQ_OP_WRITE, 3278 if (i == 0 && !btrfs_test_opt(device->fs_info, NOBARRIER))
3278 REQ_SYNC | REQ_FUA | REQ_META | REQ_PRIO, bh); 3279 op_flags |= REQ_FUA;
3279 } else { 3280 ret = btrfsic_submit_bh(REQ_OP_WRITE, op_flags, bh);
3280 ret = btrfsic_submit_bh(REQ_OP_WRITE,
3281 REQ_SYNC | REQ_META | REQ_PRIO, bh);
3282 }
3283 if (ret) 3281 if (ret)
3284 errors++; 3282 errors++;
3285 } 3283 }
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 4497f937e8fb..2f4328511ac8 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -9206,6 +9206,7 @@ int btrfs_drop_snapshot(struct btrfs_root *root,
9206 ret = btrfs_del_root(trans, fs_info, &root->root_key); 9206 ret = btrfs_del_root(trans, fs_info, &root->root_key);
9207 if (ret) { 9207 if (ret) {
9208 btrfs_abort_transaction(trans, ret); 9208 btrfs_abort_transaction(trans, ret);
9209 err = ret;
9209 goto out_end_trans; 9210 goto out_end_trans;
9210 } 9211 }
9211 9212
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 993061f83067..e1a7f3cb5be9 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -3005,6 +3005,8 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
3005 compress_type = ordered_extent->compress_type; 3005 compress_type = ordered_extent->compress_type;
3006 if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) { 3006 if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {
3007 BUG_ON(compress_type); 3007 BUG_ON(compress_type);
3008 btrfs_qgroup_free_data(inode, NULL, ordered_extent->file_offset,
3009 ordered_extent->len);
3008 ret = btrfs_mark_extent_written(trans, BTRFS_I(inode), 3010 ret = btrfs_mark_extent_written(trans, BTRFS_I(inode),
3009 ordered_extent->file_offset, 3011 ordered_extent->file_offset,
3010 ordered_extent->file_offset + 3012 ordered_extent->file_offset +
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index d748ad1c3620..2ef8acaac688 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2206,7 +2206,7 @@ static noinline int btrfs_search_path_in_tree(struct btrfs_fs_info *info,
2206 if (!path) 2206 if (!path)
2207 return -ENOMEM; 2207 return -ENOMEM;
2208 2208
2209 ptr = &name[BTRFS_INO_LOOKUP_PATH_MAX]; 2209 ptr = &name[BTRFS_INO_LOOKUP_PATH_MAX - 1];
2210 2210
2211 key.objectid = tree_id; 2211 key.objectid = tree_id;
2212 key.type = BTRFS_ROOT_ITEM_KEY; 2212 key.type = BTRFS_ROOT_ITEM_KEY;
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 49810b70afd3..a25684287501 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -237,7 +237,6 @@ static struct btrfs_device *__alloc_device(void)
237 kfree(dev); 237 kfree(dev);
238 return ERR_PTR(-ENOMEM); 238 return ERR_PTR(-ENOMEM);
239 } 239 }
240 bio_get(dev->flush_bio);
241 240
242 INIT_LIST_HEAD(&dev->dev_list); 241 INIT_LIST_HEAD(&dev->dev_list);
243 INIT_LIST_HEAD(&dev->dev_alloc_list); 242 INIT_LIST_HEAD(&dev->dev_alloc_list);
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index ab69dcb70e8a..1b468250e947 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -1440,6 +1440,29 @@ static int __close_session(struct ceph_mds_client *mdsc,
1440 return request_close_session(mdsc, session); 1440 return request_close_session(mdsc, session);
1441} 1441}
1442 1442
1443static bool drop_negative_children(struct dentry *dentry)
1444{
1445 struct dentry *child;
1446 bool all_negative = true;
1447
1448 if (!d_is_dir(dentry))
1449 goto out;
1450
1451 spin_lock(&dentry->d_lock);
1452 list_for_each_entry(child, &dentry->d_subdirs, d_child) {
1453 if (d_really_is_positive(child)) {
1454 all_negative = false;
1455 break;
1456 }
1457 }
1458 spin_unlock(&dentry->d_lock);
1459
1460 if (all_negative)
1461 shrink_dcache_parent(dentry);
1462out:
1463 return all_negative;
1464}
1465
1443/* 1466/*
1444 * Trim old(er) caps. 1467 * Trim old(er) caps.
1445 * 1468 *
@@ -1490,16 +1513,27 @@ static int trim_caps_cb(struct inode *inode, struct ceph_cap *cap, void *arg)
1490 if ((used | wanted) & ~oissued & mine) 1513 if ((used | wanted) & ~oissued & mine)
1491 goto out; /* we need these caps */ 1514 goto out; /* we need these caps */
1492 1515
1493 session->s_trim_caps--;
1494 if (oissued) { 1516 if (oissued) {
1495 /* we aren't the only cap.. just remove us */ 1517 /* we aren't the only cap.. just remove us */
1496 __ceph_remove_cap(cap, true); 1518 __ceph_remove_cap(cap, true);
1519 session->s_trim_caps--;
1497 } else { 1520 } else {
1521 struct dentry *dentry;
1498 /* try dropping referring dentries */ 1522 /* try dropping referring dentries */
1499 spin_unlock(&ci->i_ceph_lock); 1523 spin_unlock(&ci->i_ceph_lock);
1500 d_prune_aliases(inode); 1524 dentry = d_find_any_alias(inode);
1501 dout("trim_caps_cb %p cap %p pruned, count now %d\n", 1525 if (dentry && drop_negative_children(dentry)) {
1502 inode, cap, atomic_read(&inode->i_count)); 1526 int count;
1527 dput(dentry);
1528 d_prune_aliases(inode);
1529 count = atomic_read(&inode->i_count);
1530 if (count == 1)
1531 session->s_trim_caps--;
1532 dout("trim_caps_cb %p cap %p pruned, count now %d\n",
1533 inode, cap, count);
1534 } else {
1535 dput(dentry);
1536 }
1503 return 0; 1537 return 0;
1504 } 1538 }
1505 1539
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index e06740436b92..ed88ab8a4774 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -1406,7 +1406,8 @@ smb2_get_dfs_refer(const unsigned int xid, struct cifs_ses *ses,
1406 } while (rc == -EAGAIN); 1406 } while (rc == -EAGAIN);
1407 1407
1408 if (rc) { 1408 if (rc) {
1409 cifs_dbg(VFS, "ioctl error in smb2_get_dfs_refer rc=%d\n", rc); 1409 if (rc != -ENOENT)
1410 cifs_dbg(VFS, "ioctl error in smb2_get_dfs_refer rc=%d\n", rc);
1410 goto out; 1411 goto out;
1411 } 1412 }
1412 1413
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 5331631386a2..01346b8b6edb 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -2678,27 +2678,27 @@ SMB2_read(const unsigned int xid, struct cifs_io_parms *io_parms,
2678 cifs_small_buf_release(req); 2678 cifs_small_buf_release(req);
2679 2679
2680 rsp = (struct smb2_read_rsp *)rsp_iov.iov_base; 2680 rsp = (struct smb2_read_rsp *)rsp_iov.iov_base;
2681 shdr = get_sync_hdr(rsp);
2682 2681
2683 if (shdr->Status == STATUS_END_OF_FILE) { 2682 if (rc) {
2683 if (rc != -ENODATA) {
2684 cifs_stats_fail_inc(io_parms->tcon, SMB2_READ_HE);
2685 cifs_dbg(VFS, "Send error in read = %d\n", rc);
2686 }
2684 free_rsp_buf(resp_buftype, rsp_iov.iov_base); 2687 free_rsp_buf(resp_buftype, rsp_iov.iov_base);
2685 return 0; 2688 return rc == -ENODATA ? 0 : rc;
2686 } 2689 }
2687 2690
2688 if (rc) { 2691 *nbytes = le32_to_cpu(rsp->DataLength);
2689 cifs_stats_fail_inc(io_parms->tcon, SMB2_READ_HE); 2692 if ((*nbytes > CIFS_MAX_MSGSIZE) ||
2690 cifs_dbg(VFS, "Send error in read = %d\n", rc); 2693 (*nbytes > io_parms->length)) {
2691 } else { 2694 cifs_dbg(FYI, "bad length %d for count %d\n",
2692 *nbytes = le32_to_cpu(rsp->DataLength); 2695 *nbytes, io_parms->length);
2693 if ((*nbytes > CIFS_MAX_MSGSIZE) || 2696 rc = -EIO;
2694 (*nbytes > io_parms->length)) { 2697 *nbytes = 0;
2695 cifs_dbg(FYI, "bad length %d for count %d\n",
2696 *nbytes, io_parms->length);
2697 rc = -EIO;
2698 *nbytes = 0;
2699 }
2700 } 2698 }
2701 2699
2700 shdr = get_sync_hdr(rsp);
2701
2702 if (*buf) { 2702 if (*buf) {
2703 memcpy(*buf, (char *)shdr + rsp->DataOffset, *nbytes); 2703 memcpy(*buf, (char *)shdr + rsp->DataOffset, *nbytes);
2704 free_rsp_buf(resp_buftype, rsp_iov.iov_base); 2704 free_rsp_buf(resp_buftype, rsp_iov.iov_base);
diff --git a/fs/cramfs/Kconfig b/fs/cramfs/Kconfig
index f937082f3244..58e2fe40b2a0 100644
--- a/fs/cramfs/Kconfig
+++ b/fs/cramfs/Kconfig
@@ -34,6 +34,7 @@ config CRAMFS_BLOCKDEV
34config CRAMFS_MTD 34config CRAMFS_MTD
35 bool "Support CramFs image directly mapped in physical memory" 35 bool "Support CramFs image directly mapped in physical memory"
36 depends on CRAMFS && MTD 36 depends on CRAMFS && MTD
37 depends on CRAMFS=m || MTD=y
37 default y if !CRAMFS_BLOCKDEV 38 default y if !CRAMFS_BLOCKDEV
38 help 39 help
39 This option allows the CramFs driver to load data directly from 40 This option allows the CramFs driver to load data directly from
diff --git a/fs/dax.c b/fs/dax.c
index 78b72c48374e..95981591977a 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -627,8 +627,7 @@ static void dax_mapping_entry_mkclean(struct address_space *mapping,
627 627
628 if (pfn != pmd_pfn(*pmdp)) 628 if (pfn != pmd_pfn(*pmdp))
629 goto unlock_pmd; 629 goto unlock_pmd;
630 if (!pmd_dirty(*pmdp) 630 if (!pmd_dirty(*pmdp) && !pmd_write(*pmdp))
631 && !pmd_access_permitted(*pmdp, WRITE))
632 goto unlock_pmd; 631 goto unlock_pmd;
633 632
634 flush_cache_page(vma, address, pfn); 633 flush_cache_page(vma, address, pfn);
diff --git a/fs/exec.c b/fs/exec.c
index 6be2aa0ab26f..7eb8d21bcab9 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1216,15 +1216,14 @@ killed:
1216 return -EAGAIN; 1216 return -EAGAIN;
1217} 1217}
1218 1218
1219char *get_task_comm(char *buf, struct task_struct *tsk) 1219char *__get_task_comm(char *buf, size_t buf_size, struct task_struct *tsk)
1220{ 1220{
1221 /* buf must be at least sizeof(tsk->comm) in size */
1222 task_lock(tsk); 1221 task_lock(tsk);
1223 strncpy(buf, tsk->comm, sizeof(tsk->comm)); 1222 strncpy(buf, tsk->comm, buf_size);
1224 task_unlock(tsk); 1223 task_unlock(tsk);
1225 return buf; 1224 return buf;
1226} 1225}
1227EXPORT_SYMBOL_GPL(get_task_comm); 1226EXPORT_SYMBOL_GPL(__get_task_comm);
1228 1227
1229/* 1228/*
1230 * These functions flushes out all traces of the currently running executable 1229 * These functions flushes out all traces of the currently running executable
@@ -1340,24 +1339,24 @@ void setup_new_exec(struct linux_binprm * bprm)
1340 * avoid bad behavior from the prior rlimits. This has to 1339 * avoid bad behavior from the prior rlimits. This has to
1341 * happen before arch_pick_mmap_layout(), which examines 1340 * happen before arch_pick_mmap_layout(), which examines
1342 * RLIMIT_STACK, but after the point of no return to avoid 1341 * RLIMIT_STACK, but after the point of no return to avoid
1343 * races from other threads changing the limits. This also 1342 * needing to clean up the change on failure.
1344 * must be protected from races with prlimit() calls.
1345 */ 1343 */
1346 task_lock(current->group_leader);
1347 if (current->signal->rlim[RLIMIT_STACK].rlim_cur > _STK_LIM) 1344 if (current->signal->rlim[RLIMIT_STACK].rlim_cur > _STK_LIM)
1348 current->signal->rlim[RLIMIT_STACK].rlim_cur = _STK_LIM; 1345 current->signal->rlim[RLIMIT_STACK].rlim_cur = _STK_LIM;
1349 if (current->signal->rlim[RLIMIT_STACK].rlim_max > _STK_LIM)
1350 current->signal->rlim[RLIMIT_STACK].rlim_max = _STK_LIM;
1351 task_unlock(current->group_leader);
1352 } 1346 }
1353 1347
1354 arch_pick_mmap_layout(current->mm); 1348 arch_pick_mmap_layout(current->mm);
1355 1349
1356 current->sas_ss_sp = current->sas_ss_size = 0; 1350 current->sas_ss_sp = current->sas_ss_size = 0;
1357 1351
1358 /* Figure out dumpability. */ 1352 /*
1353 * Figure out dumpability. Note that this checking only of current
1354 * is wrong, but userspace depends on it. This should be testing
1355 * bprm->secureexec instead.
1356 */
1359 if (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP || 1357 if (bprm->interp_flags & BINPRM_FLAGS_ENFORCE_NONDUMP ||
1360 bprm->secureexec) 1358 !(uid_eq(current_euid(), current_uid()) &&
1359 gid_eq(current_egid(), current_gid())))
1361 set_dumpable(current->mm, suid_dumpable); 1360 set_dumpable(current->mm, suid_dumpable);
1362 else 1361 else
1363 set_dumpable(current->mm, SUID_DUMP_USER); 1362 set_dumpable(current->mm, SUID_DUMP_USER);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 07bca11749d4..c941251ac0c0 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4722,6 +4722,7 @@ retry:
4722 EXT4_INODE_EOFBLOCKS); 4722 EXT4_INODE_EOFBLOCKS);
4723 } 4723 }
4724 ext4_mark_inode_dirty(handle, inode); 4724 ext4_mark_inode_dirty(handle, inode);
4725 ext4_update_inode_fsync_trans(handle, inode, 1);
4725 ret2 = ext4_journal_stop(handle); 4726 ret2 = ext4_journal_stop(handle);
4726 if (ret2) 4727 if (ret2)
4727 break; 4728 break;
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index b4267d72f249..b32cf263750d 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -816,6 +816,8 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
816#ifdef CONFIG_EXT4_FS_POSIX_ACL 816#ifdef CONFIG_EXT4_FS_POSIX_ACL
817 struct posix_acl *p = get_acl(dir, ACL_TYPE_DEFAULT); 817 struct posix_acl *p = get_acl(dir, ACL_TYPE_DEFAULT);
818 818
819 if (IS_ERR(p))
820 return ERR_CAST(p);
819 if (p) { 821 if (p) {
820 int acl_size = p->a_count * sizeof(ext4_acl_entry); 822 int acl_size = p->a_count * sizeof(ext4_acl_entry);
821 823
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 7df2c5644e59..534a9130f625 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -149,6 +149,15 @@ static int ext4_meta_trans_blocks(struct inode *inode, int lblocks,
149 */ 149 */
150int ext4_inode_is_fast_symlink(struct inode *inode) 150int ext4_inode_is_fast_symlink(struct inode *inode)
151{ 151{
152 if (!(EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)) {
153 int ea_blocks = EXT4_I(inode)->i_file_acl ?
154 EXT4_CLUSTER_SIZE(inode->i_sb) >> 9 : 0;
155
156 if (ext4_has_inline_data(inode))
157 return 0;
158
159 return (S_ISLNK(inode->i_mode) && inode->i_blocks - ea_blocks == 0);
160 }
152 return S_ISLNK(inode->i_mode) && inode->i_size && 161 return S_ISLNK(inode->i_mode) && inode->i_size &&
153 (inode->i_size < EXT4_N_BLOCKS * 4); 162 (inode->i_size < EXT4_N_BLOCKS * 4);
154} 163}
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 798b3ac680db..e750d68fbcb5 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1399,6 +1399,10 @@ static struct buffer_head * ext4_find_entry (struct inode *dir,
1399 "falling back\n")); 1399 "falling back\n"));
1400 } 1400 }
1401 nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb); 1401 nblocks = dir->i_size >> EXT4_BLOCK_SIZE_BITS(sb);
1402 if (!nblocks) {
1403 ret = NULL;
1404 goto cleanup_and_exit;
1405 }
1402 start = EXT4_I(dir)->i_dir_start_lookup; 1406 start = EXT4_I(dir)->i_dir_start_lookup;
1403 if (start >= nblocks) 1407 if (start >= nblocks)
1404 start = 0; 1408 start = 0;
diff --git a/fs/hpfs/dir.c b/fs/hpfs/dir.c
index 8d6b7e35faf9..c83ece7facc5 100644
--- a/fs/hpfs/dir.c
+++ b/fs/hpfs/dir.c
@@ -150,7 +150,6 @@ static int hpfs_readdir(struct file *file, struct dir_context *ctx)
150 if (unlikely(ret < 0)) 150 if (unlikely(ret < 0))
151 goto out; 151 goto out;
152 ctx->pos = ((loff_t) hpfs_de_as_down_as_possible(inode->i_sb, hpfs_inode->i_dno) << 4) + 1; 152 ctx->pos = ((loff_t) hpfs_de_as_down_as_possible(inode->i_sb, hpfs_inode->i_dno) << 4) + 1;
153 file->f_version = inode->i_version;
154 } 153 }
155 next_pos = ctx->pos; 154 next_pos = ctx->pos;
156 if (!(de = map_pos_dirent(inode, &next_pos, &qbh))) { 155 if (!(de = map_pos_dirent(inode, &next_pos, &qbh))) {
diff --git a/fs/hpfs/dnode.c b/fs/hpfs/dnode.c
index 3b834563b1f1..a4ad18afbdec 100644
--- a/fs/hpfs/dnode.c
+++ b/fs/hpfs/dnode.c
@@ -419,7 +419,6 @@ int hpfs_add_dirent(struct inode *i,
419 c = 1; 419 c = 1;
420 goto ret; 420 goto ret;
421 } 421 }
422 i->i_version++;
423 c = hpfs_add_to_dnode(i, dno, name, namelen, new_de, 0); 422 c = hpfs_add_to_dnode(i, dno, name, namelen, new_de, 0);
424 ret: 423 ret:
425 return c; 424 return c;
@@ -726,7 +725,6 @@ int hpfs_remove_dirent(struct inode *i, dnode_secno dno, struct hpfs_dirent *de,
726 return 2; 725 return 2;
727 } 726 }
728 } 727 }
729 i->i_version++;
730 for_all_poss(i, hpfs_pos_del, (t = get_pos(dnode, de)) + 1, 1); 728 for_all_poss(i, hpfs_pos_del, (t = get_pos(dnode, de)) + 1, 1);
731 hpfs_delete_de(i->i_sb, dnode, de); 729 hpfs_delete_de(i->i_sb, dnode, de);
732 hpfs_mark_4buffers_dirty(qbh); 730 hpfs_mark_4buffers_dirty(qbh);
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index c45a3b9b9ac7..f2c3ebcd309c 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -235,7 +235,6 @@ static struct inode *hpfs_alloc_inode(struct super_block *sb)
235 ei = kmem_cache_alloc(hpfs_inode_cachep, GFP_NOFS); 235 ei = kmem_cache_alloc(hpfs_inode_cachep, GFP_NOFS);
236 if (!ei) 236 if (!ei)
237 return NULL; 237 return NULL;
238 ei->vfs_inode.i_version = 1;
239 return &ei->vfs_inode; 238 return &ei->vfs_inode;
240} 239}
241 240
diff --git a/fs/namespace.c b/fs/namespace.c
index e158ec6b527b..9d1374ab6e06 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2826,6 +2826,7 @@ long do_mount(const char *dev_name, const char __user *dir_name,
2826 SB_DIRSYNC | 2826 SB_DIRSYNC |
2827 SB_SILENT | 2827 SB_SILENT |
2828 SB_POSIXACL | 2828 SB_POSIXACL |
2829 SB_LAZYTIME |
2829 SB_I_VERSION); 2830 SB_I_VERSION);
2830 2831
2831 if (flags & MS_REMOUNT) 2832 if (flags & MS_REMOUNT)
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 0ac2fb1c6b63..b9129e2befea 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -291,12 +291,23 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat
291 const struct sockaddr *sap = data->addr; 291 const struct sockaddr *sap = data->addr;
292 struct nfs_net *nn = net_generic(data->net, nfs_net_id); 292 struct nfs_net *nn = net_generic(data->net, nfs_net_id);
293 293
294again:
294 list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) { 295 list_for_each_entry(clp, &nn->nfs_client_list, cl_share_link) {
295 const struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr; 296 const struct sockaddr *clap = (struct sockaddr *)&clp->cl_addr;
296 /* Don't match clients that failed to initialise properly */ 297 /* Don't match clients that failed to initialise properly */
297 if (clp->cl_cons_state < 0) 298 if (clp->cl_cons_state < 0)
298 continue; 299 continue;
299 300
301 /* If a client is still initializing then we need to wait */
302 if (clp->cl_cons_state > NFS_CS_READY) {
303 refcount_inc(&clp->cl_count);
304 spin_unlock(&nn->nfs_client_lock);
305 nfs_wait_client_init_complete(clp);
306 nfs_put_client(clp);
307 spin_lock(&nn->nfs_client_lock);
308 goto again;
309 }
310
300 /* Different NFS versions cannot share the same nfs_client */ 311 /* Different NFS versions cannot share the same nfs_client */
301 if (clp->rpc_ops != data->nfs_mod->rpc_ops) 312 if (clp->rpc_ops != data->nfs_mod->rpc_ops)
302 continue; 313 continue;
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 12bbab0becb4..65a7e5da508c 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -404,15 +404,19 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp,
404 if (error < 0) 404 if (error < 0)
405 goto error; 405 goto error;
406 406
407 if (!nfs4_has_session(clp))
408 nfs_mark_client_ready(clp, NFS_CS_READY);
409
410 error = nfs4_discover_server_trunking(clp, &old); 407 error = nfs4_discover_server_trunking(clp, &old);
411 if (error < 0) 408 if (error < 0)
412 goto error; 409 goto error;
413 410
414 if (clp != old) 411 if (clp != old) {
415 clp->cl_preserve_clid = true; 412 clp->cl_preserve_clid = true;
413 /*
414 * Mark the client as having failed initialization so other
415 * processes walking the nfs_client_list in nfs_match_client()
416 * won't try to use it.
417 */
418 nfs_mark_client_ready(clp, -EPERM);
419 }
416 nfs_put_client(clp); 420 nfs_put_client(clp);
417 clear_bit(NFS_CS_TSM_POSSIBLE, &clp->cl_flags); 421 clear_bit(NFS_CS_TSM_POSSIBLE, &clp->cl_flags);
418 return old; 422 return old;
@@ -539,6 +543,9 @@ int nfs40_walk_client_list(struct nfs_client *new,
539 spin_lock(&nn->nfs_client_lock); 543 spin_lock(&nn->nfs_client_lock);
540 list_for_each_entry(pos, &nn->nfs_client_list, cl_share_link) { 544 list_for_each_entry(pos, &nn->nfs_client_list, cl_share_link) {
541 545
546 if (pos == new)
547 goto found;
548
542 status = nfs4_match_client(pos, new, &prev, nn); 549 status = nfs4_match_client(pos, new, &prev, nn);
543 if (status < 0) 550 if (status < 0)
544 goto out_unlock; 551 goto out_unlock;
@@ -559,6 +566,7 @@ int nfs40_walk_client_list(struct nfs_client *new,
559 * way that a SETCLIENTID_CONFIRM to pos can succeed is 566 * way that a SETCLIENTID_CONFIRM to pos can succeed is
560 * if new and pos point to the same server: 567 * if new and pos point to the same server:
561 */ 568 */
569found:
562 refcount_inc(&pos->cl_count); 570 refcount_inc(&pos->cl_count);
563 spin_unlock(&nn->nfs_client_lock); 571 spin_unlock(&nn->nfs_client_lock);
564 572
@@ -572,6 +580,7 @@ int nfs40_walk_client_list(struct nfs_client *new,
572 case 0: 580 case 0:
573 nfs4_swap_callback_idents(pos, new); 581 nfs4_swap_callback_idents(pos, new);
574 pos->cl_confirm = new->cl_confirm; 582 pos->cl_confirm = new->cl_confirm;
583 nfs_mark_client_ready(pos, NFS_CS_READY);
575 584
576 prev = NULL; 585 prev = NULL;
577 *result = pos; 586 *result = pos;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 5b5f464f6f2a..4a379d7918f2 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1890,6 +1890,8 @@ int nfs_commit_inode(struct inode *inode, int how)
1890 if (res) 1890 if (res)
1891 error = nfs_generic_commit_list(inode, &head, how, &cinfo); 1891 error = nfs_generic_commit_list(inode, &head, how, &cinfo);
1892 nfs_commit_end(cinfo.mds); 1892 nfs_commit_end(cinfo.mds);
1893 if (res == 0)
1894 return res;
1893 if (error < 0) 1895 if (error < 0)
1894 goto out_error; 1896 goto out_error;
1895 if (!may_wait) 1897 if (!may_wait)
diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c
index 697f8ae7792d..fdf2aad73470 100644
--- a/fs/nfsd/auth.c
+++ b/fs/nfsd/auth.c
@@ -61,6 +61,9 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp)
61 else 61 else
62 gi->gid[i] = rqgi->gid[i]; 62 gi->gid[i] = rqgi->gid[i];
63 } 63 }
64
65 /* Each thread allocates its own gi, no race */
66 groups_sort(gi);
64 } else { 67 } else {
65 gi = get_group_info(rqgi); 68 gi = get_group_info(rqgi);
66 } 69 }
diff --git a/fs/orangefs/devorangefs-req.c b/fs/orangefs/devorangefs-req.c
index ded456f17de6..c584ad8d023c 100644
--- a/fs/orangefs/devorangefs-req.c
+++ b/fs/orangefs/devorangefs-req.c
@@ -162,7 +162,7 @@ static ssize_t orangefs_devreq_read(struct file *file,
162 struct orangefs_kernel_op_s *op, *temp; 162 struct orangefs_kernel_op_s *op, *temp;
163 __s32 proto_ver = ORANGEFS_KERNEL_PROTO_VERSION; 163 __s32 proto_ver = ORANGEFS_KERNEL_PROTO_VERSION;
164 static __s32 magic = ORANGEFS_DEVREQ_MAGIC; 164 static __s32 magic = ORANGEFS_DEVREQ_MAGIC;
165 struct orangefs_kernel_op_s *cur_op = NULL; 165 struct orangefs_kernel_op_s *cur_op;
166 unsigned long ret; 166 unsigned long ret;
167 167
168 /* We do not support blocking IO. */ 168 /* We do not support blocking IO. */
@@ -186,6 +186,7 @@ static ssize_t orangefs_devreq_read(struct file *file,
186 return -EAGAIN; 186 return -EAGAIN;
187 187
188restart: 188restart:
189 cur_op = NULL;
189 /* Get next op (if any) from top of list. */ 190 /* Get next op (if any) from top of list. */
190 spin_lock(&orangefs_request_list_lock); 191 spin_lock(&orangefs_request_list_lock);
191 list_for_each_entry_safe(op, temp, &orangefs_request_list, list) { 192 list_for_each_entry_safe(op, temp, &orangefs_request_list, list) {
diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c
index 1668fd645c45..0d228cd087e6 100644
--- a/fs/orangefs/file.c
+++ b/fs/orangefs/file.c
@@ -452,7 +452,7 @@ ssize_t orangefs_inode_read(struct inode *inode,
452static ssize_t orangefs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter) 452static ssize_t orangefs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
453{ 453{
454 struct file *file = iocb->ki_filp; 454 struct file *file = iocb->ki_filp;
455 loff_t pos = *(&iocb->ki_pos); 455 loff_t pos = iocb->ki_pos;
456 ssize_t rc = 0; 456 ssize_t rc = 0;
457 457
458 BUG_ON(iocb->private); 458 BUG_ON(iocb->private);
@@ -492,9 +492,6 @@ static ssize_t orangefs_file_write_iter(struct kiocb *iocb, struct iov_iter *ite
492 } 492 }
493 } 493 }
494 494
495 if (file->f_pos > i_size_read(file->f_mapping->host))
496 orangefs_i_size_write(file->f_mapping->host, file->f_pos);
497
498 rc = generic_write_checks(iocb, iter); 495 rc = generic_write_checks(iocb, iter);
499 496
500 if (rc <= 0) { 497 if (rc <= 0) {
@@ -508,7 +505,7 @@ static ssize_t orangefs_file_write_iter(struct kiocb *iocb, struct iov_iter *ite
508 * pos to the end of the file, so we will wait till now to set 505 * pos to the end of the file, so we will wait till now to set
509 * pos... 506 * pos...
510 */ 507 */
511 pos = *(&iocb->ki_pos); 508 pos = iocb->ki_pos;
512 509
513 rc = do_readv_writev(ORANGEFS_IO_WRITE, 510 rc = do_readv_writev(ORANGEFS_IO_WRITE,
514 file, 511 file,
diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h
index 97adf7d100b5..2595453fe737 100644
--- a/fs/orangefs/orangefs-kernel.h
+++ b/fs/orangefs/orangefs-kernel.h
@@ -533,17 +533,6 @@ do { \
533 sys_attr.mask = ORANGEFS_ATTR_SYS_ALL_SETABLE; \ 533 sys_attr.mask = ORANGEFS_ATTR_SYS_ALL_SETABLE; \
534} while (0) 534} while (0)
535 535
536static inline void orangefs_i_size_write(struct inode *inode, loff_t i_size)
537{
538#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
539 inode_lock(inode);
540#endif
541 i_size_write(inode, i_size);
542#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
543 inode_unlock(inode);
544#endif
545}
546
547static inline void orangefs_set_timeout(struct dentry *dentry) 536static inline void orangefs_set_timeout(struct dentry *dentry)
548{ 537{
549 unsigned long time = jiffies + orangefs_dcache_timeout_msecs*HZ/1000; 538 unsigned long time = jiffies + orangefs_dcache_timeout_msecs*HZ/1000;
diff --git a/fs/orangefs/waitqueue.c b/fs/orangefs/waitqueue.c
index 835c6e148afc..0577d6dba8c8 100644
--- a/fs/orangefs/waitqueue.c
+++ b/fs/orangefs/waitqueue.c
@@ -29,10 +29,10 @@ static void orangefs_clean_up_interrupted_operation(struct orangefs_kernel_op_s
29 */ 29 */
30void purge_waiting_ops(void) 30void purge_waiting_ops(void)
31{ 31{
32 struct orangefs_kernel_op_s *op; 32 struct orangefs_kernel_op_s *op, *tmp;
33 33
34 spin_lock(&orangefs_request_list_lock); 34 spin_lock(&orangefs_request_list_lock);
35 list_for_each_entry(op, &orangefs_request_list, list) { 35 list_for_each_entry_safe(op, tmp, &orangefs_request_list, list) {
36 gossip_debug(GOSSIP_WAIT_DEBUG, 36 gossip_debug(GOSSIP_WAIT_DEBUG,
37 "pvfs2-client-core: purging op tag %llu %s\n", 37 "pvfs2-client-core: purging op tag %llu %s\n",
38 llu(op->tag), 38 llu(op->tag),
diff --git a/fs/overlayfs/Kconfig b/fs/overlayfs/Kconfig
index cbfc196e5dc5..5ac415466861 100644
--- a/fs/overlayfs/Kconfig
+++ b/fs/overlayfs/Kconfig
@@ -24,6 +24,16 @@ config OVERLAY_FS_REDIRECT_DIR
24 an overlay which has redirects on a kernel that doesn't support this 24 an overlay which has redirects on a kernel that doesn't support this
25 feature will have unexpected results. 25 feature will have unexpected results.
26 26
27config OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW
28 bool "Overlayfs: follow redirects even if redirects are turned off"
29 default y
30 depends on OVERLAY_FS
31 help
32 Disable this to get a possibly more secure configuration, but that
33 might not be backward compatible with previous kernels.
34
35 For more information, see Documentation/filesystems/overlayfs.txt
36
27config OVERLAY_FS_INDEX 37config OVERLAY_FS_INDEX
28 bool "Overlayfs: turn on inodes index feature by default" 38 bool "Overlayfs: turn on inodes index feature by default"
29 depends on OVERLAY_FS 39 depends on OVERLAY_FS
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index e13921824c70..f9788bc116a8 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -887,7 +887,8 @@ static int ovl_set_redirect(struct dentry *dentry, bool samedir)
887 spin_unlock(&dentry->d_lock); 887 spin_unlock(&dentry->d_lock);
888 } else { 888 } else {
889 kfree(redirect); 889 kfree(redirect);
890 pr_warn_ratelimited("overlay: failed to set redirect (%i)\n", err); 890 pr_warn_ratelimited("overlayfs: failed to set redirect (%i)\n",
891 err);
891 /* Fall back to userspace copy-up */ 892 /* Fall back to userspace copy-up */
892 err = -EXDEV; 893 err = -EXDEV;
893 } 894 }
diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c
index 625ed8066570..beb945e1963c 100644
--- a/fs/overlayfs/namei.c
+++ b/fs/overlayfs/namei.c
@@ -435,7 +435,7 @@ int ovl_verify_index(struct dentry *index, struct ovl_path *lower,
435 435
436 /* Check if index is orphan and don't warn before cleaning it */ 436 /* Check if index is orphan and don't warn before cleaning it */
437 if (d_inode(index)->i_nlink == 1 && 437 if (d_inode(index)->i_nlink == 1 &&
438 ovl_get_nlink(index, origin.dentry, 0) == 0) 438 ovl_get_nlink(origin.dentry, index, 0) == 0)
439 err = -ENOENT; 439 err = -ENOENT;
440 440
441 dput(origin.dentry); 441 dput(origin.dentry);
@@ -681,6 +681,22 @@ struct dentry *ovl_lookup(struct inode *dir, struct dentry *dentry,
681 if (d.stop) 681 if (d.stop)
682 break; 682 break;
683 683
684 /*
685 * Following redirects can have security consequences: it's like
686 * a symlink into the lower layer without the permission checks.
687 * This is only a problem if the upper layer is untrusted (e.g
688 * comes from an USB drive). This can allow a non-readable file
689 * or directory to become readable.
690 *
691 * Only following redirects when redirects are enabled disables
692 * this attack vector when not necessary.
693 */
694 err = -EPERM;
695 if (d.redirect && !ofs->config.redirect_follow) {
696 pr_warn_ratelimited("overlay: refusing to follow redirect for (%pd2)\n", dentry);
697 goto out_put;
698 }
699
684 if (d.redirect && d.redirect[0] == '/' && poe != roe) { 700 if (d.redirect && d.redirect[0] == '/' && poe != roe) {
685 poe = roe; 701 poe = roe;
686 702
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 13eab09a6b6f..b489099ccd49 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -180,7 +180,7 @@ static inline int ovl_do_whiteout(struct inode *dir, struct dentry *dentry)
180static inline struct dentry *ovl_do_tmpfile(struct dentry *dentry, umode_t mode) 180static inline struct dentry *ovl_do_tmpfile(struct dentry *dentry, umode_t mode)
181{ 181{
182 struct dentry *ret = vfs_tmpfile(dentry, mode, 0); 182 struct dentry *ret = vfs_tmpfile(dentry, mode, 0);
183 int err = IS_ERR(ret) ? PTR_ERR(ret) : 0; 183 int err = PTR_ERR_OR_ZERO(ret);
184 184
185 pr_debug("tmpfile(%pd2, 0%o) = %i\n", dentry, mode, err); 185 pr_debug("tmpfile(%pd2, 0%o) = %i\n", dentry, mode, err);
186 return ret; 186 return ret;
diff --git a/fs/overlayfs/ovl_entry.h b/fs/overlayfs/ovl_entry.h
index 752bab645879..9d0bc03bf6e4 100644
--- a/fs/overlayfs/ovl_entry.h
+++ b/fs/overlayfs/ovl_entry.h
@@ -14,6 +14,8 @@ struct ovl_config {
14 char *workdir; 14 char *workdir;
15 bool default_permissions; 15 bool default_permissions;
16 bool redirect_dir; 16 bool redirect_dir;
17 bool redirect_follow;
18 const char *redirect_mode;
17 bool index; 19 bool index;
18}; 20};
19 21
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index 0daa4354fec4..8c98578d27a1 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -499,7 +499,7 @@ out:
499 return err; 499 return err;
500 500
501fail: 501fail:
502 pr_warn_ratelimited("overlay: failed to look up (%s) for ino (%i)\n", 502 pr_warn_ratelimited("overlayfs: failed to look up (%s) for ino (%i)\n",
503 p->name, err); 503 p->name, err);
504 goto out; 504 goto out;
505} 505}
@@ -663,7 +663,10 @@ static int ovl_iterate_real(struct file *file, struct dir_context *ctx)
663 return PTR_ERR(rdt.cache); 663 return PTR_ERR(rdt.cache);
664 } 664 }
665 665
666 return iterate_dir(od->realfile, &rdt.ctx); 666 err = iterate_dir(od->realfile, &rdt.ctx);
667 ctx->pos = rdt.ctx.pos;
668
669 return err;
667} 670}
668 671
669 672
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 288d20f9a55a..76440feb79f6 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -33,6 +33,13 @@ module_param_named(redirect_dir, ovl_redirect_dir_def, bool, 0644);
33MODULE_PARM_DESC(ovl_redirect_dir_def, 33MODULE_PARM_DESC(ovl_redirect_dir_def,
34 "Default to on or off for the redirect_dir feature"); 34 "Default to on or off for the redirect_dir feature");
35 35
36static bool ovl_redirect_always_follow =
37 IS_ENABLED(CONFIG_OVERLAY_FS_REDIRECT_ALWAYS_FOLLOW);
38module_param_named(redirect_always_follow, ovl_redirect_always_follow,
39 bool, 0644);
40MODULE_PARM_DESC(ovl_redirect_always_follow,
41 "Follow redirects even if redirect_dir feature is turned off");
42
36static bool ovl_index_def = IS_ENABLED(CONFIG_OVERLAY_FS_INDEX); 43static bool ovl_index_def = IS_ENABLED(CONFIG_OVERLAY_FS_INDEX);
37module_param_named(index, ovl_index_def, bool, 0644); 44module_param_named(index, ovl_index_def, bool, 0644);
38MODULE_PARM_DESC(ovl_index_def, 45MODULE_PARM_DESC(ovl_index_def,
@@ -232,6 +239,7 @@ static void ovl_free_fs(struct ovl_fs *ofs)
232 kfree(ofs->config.lowerdir); 239 kfree(ofs->config.lowerdir);
233 kfree(ofs->config.upperdir); 240 kfree(ofs->config.upperdir);
234 kfree(ofs->config.workdir); 241 kfree(ofs->config.workdir);
242 kfree(ofs->config.redirect_mode);
235 if (ofs->creator_cred) 243 if (ofs->creator_cred)
236 put_cred(ofs->creator_cred); 244 put_cred(ofs->creator_cred);
237 kfree(ofs); 245 kfree(ofs);
@@ -244,6 +252,7 @@ static void ovl_put_super(struct super_block *sb)
244 ovl_free_fs(ofs); 252 ovl_free_fs(ofs);
245} 253}
246 254
255/* Sync real dirty inodes in upper filesystem (if it exists) */
247static int ovl_sync_fs(struct super_block *sb, int wait) 256static int ovl_sync_fs(struct super_block *sb, int wait)
248{ 257{
249 struct ovl_fs *ofs = sb->s_fs_info; 258 struct ovl_fs *ofs = sb->s_fs_info;
@@ -252,14 +261,24 @@ static int ovl_sync_fs(struct super_block *sb, int wait)
252 261
253 if (!ofs->upper_mnt) 262 if (!ofs->upper_mnt)
254 return 0; 263 return 0;
255 upper_sb = ofs->upper_mnt->mnt_sb; 264
256 if (!upper_sb->s_op->sync_fs) 265 /*
266 * If this is a sync(2) call or an emergency sync, all the super blocks
267 * will be iterated, including upper_sb, so no need to do anything.
268 *
269 * If this is a syncfs(2) call, then we do need to call
270 * sync_filesystem() on upper_sb, but enough if we do it when being
271 * called with wait == 1.
272 */
273 if (!wait)
257 return 0; 274 return 0;
258 275
259 /* real inodes have already been synced by sync_filesystem(ovl_sb) */ 276 upper_sb = ofs->upper_mnt->mnt_sb;
277
260 down_read(&upper_sb->s_umount); 278 down_read(&upper_sb->s_umount);
261 ret = upper_sb->s_op->sync_fs(upper_sb, wait); 279 ret = sync_filesystem(upper_sb);
262 up_read(&upper_sb->s_umount); 280 up_read(&upper_sb->s_umount);
281
263 return ret; 282 return ret;
264} 283}
265 284
@@ -295,6 +314,11 @@ static bool ovl_force_readonly(struct ovl_fs *ofs)
295 return (!ofs->upper_mnt || !ofs->workdir); 314 return (!ofs->upper_mnt || !ofs->workdir);
296} 315}
297 316
317static const char *ovl_redirect_mode_def(void)
318{
319 return ovl_redirect_dir_def ? "on" : "off";
320}
321
298/** 322/**
299 * ovl_show_options 323 * ovl_show_options
300 * 324 *
@@ -313,12 +337,10 @@ static int ovl_show_options(struct seq_file *m, struct dentry *dentry)
313 } 337 }
314 if (ofs->config.default_permissions) 338 if (ofs->config.default_permissions)
315 seq_puts(m, ",default_permissions"); 339 seq_puts(m, ",default_permissions");
316 if (ofs->config.redirect_dir != ovl_redirect_dir_def) 340 if (strcmp(ofs->config.redirect_mode, ovl_redirect_mode_def()) != 0)
317 seq_printf(m, ",redirect_dir=%s", 341 seq_printf(m, ",redirect_dir=%s", ofs->config.redirect_mode);
318 ofs->config.redirect_dir ? "on" : "off");
319 if (ofs->config.index != ovl_index_def) 342 if (ofs->config.index != ovl_index_def)
320 seq_printf(m, ",index=%s", 343 seq_printf(m, ",index=%s", ofs->config.index ? "on" : "off");
321 ofs->config.index ? "on" : "off");
322 return 0; 344 return 0;
323} 345}
324 346
@@ -348,8 +370,7 @@ enum {
348 OPT_UPPERDIR, 370 OPT_UPPERDIR,
349 OPT_WORKDIR, 371 OPT_WORKDIR,
350 OPT_DEFAULT_PERMISSIONS, 372 OPT_DEFAULT_PERMISSIONS,
351 OPT_REDIRECT_DIR_ON, 373 OPT_REDIRECT_DIR,
352 OPT_REDIRECT_DIR_OFF,
353 OPT_INDEX_ON, 374 OPT_INDEX_ON,
354 OPT_INDEX_OFF, 375 OPT_INDEX_OFF,
355 OPT_ERR, 376 OPT_ERR,
@@ -360,8 +381,7 @@ static const match_table_t ovl_tokens = {
360 {OPT_UPPERDIR, "upperdir=%s"}, 381 {OPT_UPPERDIR, "upperdir=%s"},
361 {OPT_WORKDIR, "workdir=%s"}, 382 {OPT_WORKDIR, "workdir=%s"},
362 {OPT_DEFAULT_PERMISSIONS, "default_permissions"}, 383 {OPT_DEFAULT_PERMISSIONS, "default_permissions"},
363 {OPT_REDIRECT_DIR_ON, "redirect_dir=on"}, 384 {OPT_REDIRECT_DIR, "redirect_dir=%s"},
364 {OPT_REDIRECT_DIR_OFF, "redirect_dir=off"},
365 {OPT_INDEX_ON, "index=on"}, 385 {OPT_INDEX_ON, "index=on"},
366 {OPT_INDEX_OFF, "index=off"}, 386 {OPT_INDEX_OFF, "index=off"},
367 {OPT_ERR, NULL} 387 {OPT_ERR, NULL}
@@ -390,10 +410,37 @@ static char *ovl_next_opt(char **s)
390 return sbegin; 410 return sbegin;
391} 411}
392 412
413static int ovl_parse_redirect_mode(struct ovl_config *config, const char *mode)
414{
415 if (strcmp(mode, "on") == 0) {
416 config->redirect_dir = true;
417 /*
418 * Does not make sense to have redirect creation without
419 * redirect following.
420 */
421 config->redirect_follow = true;
422 } else if (strcmp(mode, "follow") == 0) {
423 config->redirect_follow = true;
424 } else if (strcmp(mode, "off") == 0) {
425 if (ovl_redirect_always_follow)
426 config->redirect_follow = true;
427 } else if (strcmp(mode, "nofollow") != 0) {
428 pr_err("overlayfs: bad mount option \"redirect_dir=%s\"\n",
429 mode);
430 return -EINVAL;
431 }
432
433 return 0;
434}
435
393static int ovl_parse_opt(char *opt, struct ovl_config *config) 436static int ovl_parse_opt(char *opt, struct ovl_config *config)
394{ 437{
395 char *p; 438 char *p;
396 439
440 config->redirect_mode = kstrdup(ovl_redirect_mode_def(), GFP_KERNEL);
441 if (!config->redirect_mode)
442 return -ENOMEM;
443
397 while ((p = ovl_next_opt(&opt)) != NULL) { 444 while ((p = ovl_next_opt(&opt)) != NULL) {
398 int token; 445 int token;
399 substring_t args[MAX_OPT_ARGS]; 446 substring_t args[MAX_OPT_ARGS];
@@ -428,12 +475,11 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
428 config->default_permissions = true; 475 config->default_permissions = true;
429 break; 476 break;
430 477
431 case OPT_REDIRECT_DIR_ON: 478 case OPT_REDIRECT_DIR:
432 config->redirect_dir = true; 479 kfree(config->redirect_mode);
433 break; 480 config->redirect_mode = match_strdup(&args[0]);
434 481 if (!config->redirect_mode)
435 case OPT_REDIRECT_DIR_OFF: 482 return -ENOMEM;
436 config->redirect_dir = false;
437 break; 483 break;
438 484
439 case OPT_INDEX_ON: 485 case OPT_INDEX_ON:
@@ -458,7 +504,7 @@ static int ovl_parse_opt(char *opt, struct ovl_config *config)
458 config->workdir = NULL; 504 config->workdir = NULL;
459 } 505 }
460 506
461 return 0; 507 return ovl_parse_redirect_mode(config, config->redirect_mode);
462} 508}
463 509
464#define OVL_WORKDIR_NAME "work" 510#define OVL_WORKDIR_NAME "work"
@@ -1160,7 +1206,6 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
1160 if (!cred) 1206 if (!cred)
1161 goto out_err; 1207 goto out_err;
1162 1208
1163 ofs->config.redirect_dir = ovl_redirect_dir_def;
1164 ofs->config.index = ovl_index_def; 1209 ofs->config.index = ovl_index_def;
1165 err = ovl_parse_opt((char *) data, &ofs->config); 1210 err = ovl_parse_opt((char *) data, &ofs->config);
1166 if (err) 1211 if (err)
diff --git a/fs/proc/array.c b/fs/proc/array.c
index 79375fc115d2..d67a72dcb92c 100644
--- a/fs/proc/array.c
+++ b/fs/proc/array.c
@@ -430,8 +430,11 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
430 * safe because the task has stopped executing permanently. 430 * safe because the task has stopped executing permanently.
431 */ 431 */
432 if (permitted && (task->flags & PF_DUMPCORE)) { 432 if (permitted && (task->flags & PF_DUMPCORE)) {
433 eip = KSTK_EIP(task); 433 if (try_get_task_stack(task)) {
434 esp = KSTK_ESP(task); 434 eip = KSTK_EIP(task);
435 esp = KSTK_ESP(task);
436 put_task_stack(task);
437 }
435 } 438 }
436 } 439 }
437 440
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 28fa85276eec..60316b52d659 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2268,7 +2268,7 @@ static int show_timer(struct seq_file *m, void *v)
2268 notify = timer->it_sigev_notify; 2268 notify = timer->it_sigev_notify;
2269 2269
2270 seq_printf(m, "ID: %d\n", timer->it_id); 2270 seq_printf(m, "ID: %d\n", timer->it_id);
2271 seq_printf(m, "signal: %d/%p\n", 2271 seq_printf(m, "signal: %d/%px\n",
2272 timer->sigq->info.si_signo, 2272 timer->sigq->info.si_signo,
2273 timer->sigq->info.si_value.sival_ptr); 2273 timer->sigq->info.si_value.sival_ptr);
2274 seq_printf(m, "notify: %s/%s.%d\n", 2274 seq_printf(m, "notify: %s/%s.%d\n",
diff --git a/fs/super.c b/fs/super.c
index d4e33e8f1e6f..06bd25d90ba5 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -191,6 +191,24 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags,
191 191
192 INIT_LIST_HEAD(&s->s_mounts); 192 INIT_LIST_HEAD(&s->s_mounts);
193 s->s_user_ns = get_user_ns(user_ns); 193 s->s_user_ns = get_user_ns(user_ns);
194 init_rwsem(&s->s_umount);
195 lockdep_set_class(&s->s_umount, &type->s_umount_key);
196 /*
197 * sget() can have s_umount recursion.
198 *
199 * When it cannot find a suitable sb, it allocates a new
200 * one (this one), and tries again to find a suitable old
201 * one.
202 *
203 * In case that succeeds, it will acquire the s_umount
204 * lock of the old one. Since these are clearly distrinct
205 * locks, and this object isn't exposed yet, there's no
206 * risk of deadlocks.
207 *
208 * Annotate this by putting this lock in a different
209 * subclass.
210 */
211 down_write_nested(&s->s_umount, SINGLE_DEPTH_NESTING);
194 212
195 if (security_sb_alloc(s)) 213 if (security_sb_alloc(s))
196 goto fail; 214 goto fail;
@@ -218,25 +236,6 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags,
218 goto fail; 236 goto fail;
219 if (list_lru_init_memcg(&s->s_inode_lru)) 237 if (list_lru_init_memcg(&s->s_inode_lru))
220 goto fail; 238 goto fail;
221
222 init_rwsem(&s->s_umount);
223 lockdep_set_class(&s->s_umount, &type->s_umount_key);
224 /*
225 * sget() can have s_umount recursion.
226 *
227 * When it cannot find a suitable sb, it allocates a new
228 * one (this one), and tries again to find a suitable old
229 * one.
230 *
231 * In case that succeeds, it will acquire the s_umount
232 * lock of the old one. Since these are clearly distrinct
233 * locks, and this object isn't exposed yet, there's no
234 * risk of deadlocks.
235 *
236 * Annotate this by putting this lock in a different
237 * subclass.
238 */
239 down_write_nested(&s->s_umount, SINGLE_DEPTH_NESTING);
240 s->s_count = 1; 239 s->s_count = 1;
241 atomic_set(&s->s_active, 1); 240 atomic_set(&s->s_active, 1);
242 mutex_init(&s->s_vfs_rename_mutex); 241 mutex_init(&s->s_vfs_rename_mutex);
@@ -518,7 +517,11 @@ retry:
518 hlist_add_head(&s->s_instances, &type->fs_supers); 517 hlist_add_head(&s->s_instances, &type->fs_supers);
519 spin_unlock(&sb_lock); 518 spin_unlock(&sb_lock);
520 get_filesystem(type); 519 get_filesystem(type);
521 register_shrinker(&s->s_shrink); 520 err = register_shrinker(&s->s_shrink);
521 if (err) {
522 deactivate_locked_super(s);
523 s = ERR_PTR(err);
524 }
522 return s; 525 return s;
523} 526}
524 527
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index ac9a4e65ca49..41a75f9f23fd 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -570,11 +570,14 @@ out:
570static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx, 570static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
571 struct userfaultfd_wait_queue *ewq) 571 struct userfaultfd_wait_queue *ewq)
572{ 572{
573 struct userfaultfd_ctx *release_new_ctx;
574
573 if (WARN_ON_ONCE(current->flags & PF_EXITING)) 575 if (WARN_ON_ONCE(current->flags & PF_EXITING))
574 goto out; 576 goto out;
575 577
576 ewq->ctx = ctx; 578 ewq->ctx = ctx;
577 init_waitqueue_entry(&ewq->wq, current); 579 init_waitqueue_entry(&ewq->wq, current);
580 release_new_ctx = NULL;
578 581
579 spin_lock(&ctx->event_wqh.lock); 582 spin_lock(&ctx->event_wqh.lock);
580 /* 583 /*
@@ -601,8 +604,7 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
601 new = (struct userfaultfd_ctx *) 604 new = (struct userfaultfd_ctx *)
602 (unsigned long) 605 (unsigned long)
603 ewq->msg.arg.reserved.reserved1; 606 ewq->msg.arg.reserved.reserved1;
604 607 release_new_ctx = new;
605 userfaultfd_ctx_put(new);
606 } 608 }
607 break; 609 break;
608 } 610 }
@@ -617,6 +619,20 @@ static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx,
617 __set_current_state(TASK_RUNNING); 619 __set_current_state(TASK_RUNNING);
618 spin_unlock(&ctx->event_wqh.lock); 620 spin_unlock(&ctx->event_wqh.lock);
619 621
622 if (release_new_ctx) {
623 struct vm_area_struct *vma;
624 struct mm_struct *mm = release_new_ctx->mm;
625
626 /* the various vma->vm_userfaultfd_ctx still points to it */
627 down_write(&mm->mmap_sem);
628 for (vma = mm->mmap; vma; vma = vma->vm_next)
629 if (vma->vm_userfaultfd_ctx.ctx == release_new_ctx)
630 vma->vm_userfaultfd_ctx = NULL_VM_UFFD_CTX;
631 up_write(&mm->mmap_sem);
632
633 userfaultfd_ctx_put(release_new_ctx);
634 }
635
620 /* 636 /*
621 * ctx may go away after this if the userfault pseudo fd is 637 * ctx may go away after this if the userfault pseudo fd is
622 * already released. 638 * already released.
diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c
index 0da80019a917..83ed7715f856 100644
--- a/fs/xfs/libxfs/xfs_alloc.c
+++ b/fs/xfs/libxfs/xfs_alloc.c
@@ -702,7 +702,7 @@ xfs_alloc_ag_vextent(
702 ASSERT(args->agbno % args->alignment == 0); 702 ASSERT(args->agbno % args->alignment == 0);
703 703
704 /* if not file data, insert new block into the reverse map btree */ 704 /* if not file data, insert new block into the reverse map btree */
705 if (args->oinfo.oi_owner != XFS_RMAP_OWN_UNKNOWN) { 705 if (!xfs_rmap_should_skip_owner_update(&args->oinfo)) {
706 error = xfs_rmap_alloc(args->tp, args->agbp, args->agno, 706 error = xfs_rmap_alloc(args->tp, args->agbp, args->agno,
707 args->agbno, args->len, &args->oinfo); 707 args->agbno, args->len, &args->oinfo);
708 if (error) 708 if (error)
@@ -1682,7 +1682,7 @@ xfs_free_ag_extent(
1682 bno_cur = cnt_cur = NULL; 1682 bno_cur = cnt_cur = NULL;
1683 mp = tp->t_mountp; 1683 mp = tp->t_mountp;
1684 1684
1685 if (oinfo->oi_owner != XFS_RMAP_OWN_UNKNOWN) { 1685 if (!xfs_rmap_should_skip_owner_update(oinfo)) {
1686 error = xfs_rmap_free(tp, agbp, agno, bno, len, oinfo); 1686 error = xfs_rmap_free(tp, agbp, agno, bno, len, oinfo);
1687 if (error) 1687 if (error)
1688 goto error0; 1688 goto error0;
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index 6249c92671de..a76914db72ef 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -212,6 +212,7 @@ xfs_attr_set(
212 int flags) 212 int flags)
213{ 213{
214 struct xfs_mount *mp = dp->i_mount; 214 struct xfs_mount *mp = dp->i_mount;
215 struct xfs_buf *leaf_bp = NULL;
215 struct xfs_da_args args; 216 struct xfs_da_args args;
216 struct xfs_defer_ops dfops; 217 struct xfs_defer_ops dfops;
217 struct xfs_trans_res tres; 218 struct xfs_trans_res tres;
@@ -327,9 +328,16 @@ xfs_attr_set(
327 * GROT: another possible req'mt for a double-split btree op. 328 * GROT: another possible req'mt for a double-split btree op.
328 */ 329 */
329 xfs_defer_init(args.dfops, args.firstblock); 330 xfs_defer_init(args.dfops, args.firstblock);
330 error = xfs_attr_shortform_to_leaf(&args); 331 error = xfs_attr_shortform_to_leaf(&args, &leaf_bp);
331 if (error) 332 if (error)
332 goto out_defer_cancel; 333 goto out_defer_cancel;
334 /*
335 * Prevent the leaf buffer from being unlocked so that a
336 * concurrent AIL push cannot grab the half-baked leaf
337 * buffer and run into problems with the write verifier.
338 */
339 xfs_trans_bhold(args.trans, leaf_bp);
340 xfs_defer_bjoin(args.dfops, leaf_bp);
333 xfs_defer_ijoin(args.dfops, dp); 341 xfs_defer_ijoin(args.dfops, dp);
334 error = xfs_defer_finish(&args.trans, args.dfops); 342 error = xfs_defer_finish(&args.trans, args.dfops);
335 if (error) 343 if (error)
@@ -337,13 +345,14 @@ xfs_attr_set(
337 345
338 /* 346 /*
339 * Commit the leaf transformation. We'll need another (linked) 347 * Commit the leaf transformation. We'll need another (linked)
340 * transaction to add the new attribute to the leaf. 348 * transaction to add the new attribute to the leaf, which
349 * means that we have to hold & join the leaf buffer here too.
341 */ 350 */
342
343 error = xfs_trans_roll_inode(&args.trans, dp); 351 error = xfs_trans_roll_inode(&args.trans, dp);
344 if (error) 352 if (error)
345 goto out; 353 goto out;
346 354 xfs_trans_bjoin(args.trans, leaf_bp);
355 leaf_bp = NULL;
347 } 356 }
348 357
349 if (xfs_bmap_one_block(dp, XFS_ATTR_FORK)) 358 if (xfs_bmap_one_block(dp, XFS_ATTR_FORK))
@@ -374,8 +383,9 @@ xfs_attr_set(
374 383
375out_defer_cancel: 384out_defer_cancel:
376 xfs_defer_cancel(&dfops); 385 xfs_defer_cancel(&dfops);
377 args.trans = NULL;
378out: 386out:
387 if (leaf_bp)
388 xfs_trans_brelse(args.trans, leaf_bp);
379 if (args.trans) 389 if (args.trans)
380 xfs_trans_cancel(args.trans); 390 xfs_trans_cancel(args.trans);
381 xfs_iunlock(dp, XFS_ILOCK_EXCL); 391 xfs_iunlock(dp, XFS_ILOCK_EXCL);
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.c b/fs/xfs/libxfs/xfs_attr_leaf.c
index 53cc8b986eac..601eaa36f1ad 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.c
+++ b/fs/xfs/libxfs/xfs_attr_leaf.c
@@ -735,10 +735,13 @@ xfs_attr_shortform_getvalue(xfs_da_args_t *args)
735} 735}
736 736
737/* 737/*
738 * Convert from using the shortform to the leaf. 738 * Convert from using the shortform to the leaf. On success, return the
739 * buffer so that we can keep it locked until we're totally done with it.
739 */ 740 */
740int 741int
741xfs_attr_shortform_to_leaf(xfs_da_args_t *args) 742xfs_attr_shortform_to_leaf(
743 struct xfs_da_args *args,
744 struct xfs_buf **leaf_bp)
742{ 745{
743 xfs_inode_t *dp; 746 xfs_inode_t *dp;
744 xfs_attr_shortform_t *sf; 747 xfs_attr_shortform_t *sf;
@@ -818,7 +821,7 @@ xfs_attr_shortform_to_leaf(xfs_da_args_t *args)
818 sfe = XFS_ATTR_SF_NEXTENTRY(sfe); 821 sfe = XFS_ATTR_SF_NEXTENTRY(sfe);
819 } 822 }
820 error = 0; 823 error = 0;
821 824 *leaf_bp = bp;
822out: 825out:
823 kmem_free(tmpbuffer); 826 kmem_free(tmpbuffer);
824 return error; 827 return error;
diff --git a/fs/xfs/libxfs/xfs_attr_leaf.h b/fs/xfs/libxfs/xfs_attr_leaf.h
index f7dda0c237b0..894124efb421 100644
--- a/fs/xfs/libxfs/xfs_attr_leaf.h
+++ b/fs/xfs/libxfs/xfs_attr_leaf.h
@@ -48,7 +48,8 @@ void xfs_attr_shortform_create(struct xfs_da_args *args);
48void xfs_attr_shortform_add(struct xfs_da_args *args, int forkoff); 48void xfs_attr_shortform_add(struct xfs_da_args *args, int forkoff);
49int xfs_attr_shortform_lookup(struct xfs_da_args *args); 49int xfs_attr_shortform_lookup(struct xfs_da_args *args);
50int xfs_attr_shortform_getvalue(struct xfs_da_args *args); 50int xfs_attr_shortform_getvalue(struct xfs_da_args *args);
51int xfs_attr_shortform_to_leaf(struct xfs_da_args *args); 51int xfs_attr_shortform_to_leaf(struct xfs_da_args *args,
52 struct xfs_buf **leaf_bp);
52int xfs_attr_shortform_remove(struct xfs_da_args *args); 53int xfs_attr_shortform_remove(struct xfs_da_args *args);
53int xfs_attr_shortform_allfit(struct xfs_buf *bp, struct xfs_inode *dp); 54int xfs_attr_shortform_allfit(struct xfs_buf *bp, struct xfs_inode *dp);
54int xfs_attr_shortform_bytesfit(struct xfs_inode *dp, int bytes); 55int xfs_attr_shortform_bytesfit(struct xfs_inode *dp, int bytes);
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index 1210f684d3c2..1bddbba6b80c 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -5136,7 +5136,7 @@ __xfs_bunmapi(
5136 * blowing out the transaction with a mix of EFIs and reflink 5136 * blowing out the transaction with a mix of EFIs and reflink
5137 * adjustments. 5137 * adjustments.
5138 */ 5138 */
5139 if (xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK) 5139 if (tp && xfs_is_reflink_inode(ip) && whichfork == XFS_DATA_FORK)
5140 max_len = min(len, xfs_refcount_max_unmap(tp->t_log_res)); 5140 max_len = min(len, xfs_refcount_max_unmap(tp->t_log_res));
5141 else 5141 else
5142 max_len = len; 5142 max_len = len;
diff --git a/fs/xfs/libxfs/xfs_defer.c b/fs/xfs/libxfs/xfs_defer.c
index 072ebfe1d6ae..087fea02c389 100644
--- a/fs/xfs/libxfs/xfs_defer.c
+++ b/fs/xfs/libxfs/xfs_defer.c
@@ -249,6 +249,10 @@ xfs_defer_trans_roll(
249 for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++) 249 for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++)
250 xfs_trans_log_inode(*tp, dop->dop_inodes[i], XFS_ILOG_CORE); 250 xfs_trans_log_inode(*tp, dop->dop_inodes[i], XFS_ILOG_CORE);
251 251
252 /* Hold the (previously bjoin'd) buffer locked across the roll. */
253 for (i = 0; i < XFS_DEFER_OPS_NR_BUFS && dop->dop_bufs[i]; i++)
254 xfs_trans_dirty_buf(*tp, dop->dop_bufs[i]);
255
252 trace_xfs_defer_trans_roll((*tp)->t_mountp, dop); 256 trace_xfs_defer_trans_roll((*tp)->t_mountp, dop);
253 257
254 /* Roll the transaction. */ 258 /* Roll the transaction. */
@@ -264,6 +268,12 @@ xfs_defer_trans_roll(
264 for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++) 268 for (i = 0; i < XFS_DEFER_OPS_NR_INODES && dop->dop_inodes[i]; i++)
265 xfs_trans_ijoin(*tp, dop->dop_inodes[i], 0); 269 xfs_trans_ijoin(*tp, dop->dop_inodes[i], 0);
266 270
271 /* Rejoin the buffers and dirty them so the log moves forward. */
272 for (i = 0; i < XFS_DEFER_OPS_NR_BUFS && dop->dop_bufs[i]; i++) {
273 xfs_trans_bjoin(*tp, dop->dop_bufs[i]);
274 xfs_trans_bhold(*tp, dop->dop_bufs[i]);
275 }
276
267 return error; 277 return error;
268} 278}
269 279
@@ -295,6 +305,31 @@ xfs_defer_ijoin(
295 } 305 }
296 } 306 }
297 307
308 ASSERT(0);
309 return -EFSCORRUPTED;
310}
311
312/*
313 * Add this buffer to the deferred op. Each joined buffer is relogged
314 * each time we roll the transaction.
315 */
316int
317xfs_defer_bjoin(
318 struct xfs_defer_ops *dop,
319 struct xfs_buf *bp)
320{
321 int i;
322
323 for (i = 0; i < XFS_DEFER_OPS_NR_BUFS; i++) {
324 if (dop->dop_bufs[i] == bp)
325 return 0;
326 else if (dop->dop_bufs[i] == NULL) {
327 dop->dop_bufs[i] = bp;
328 return 0;
329 }
330 }
331
332 ASSERT(0);
298 return -EFSCORRUPTED; 333 return -EFSCORRUPTED;
299} 334}
300 335
@@ -493,9 +528,7 @@ xfs_defer_init(
493 struct xfs_defer_ops *dop, 528 struct xfs_defer_ops *dop,
494 xfs_fsblock_t *fbp) 529 xfs_fsblock_t *fbp)
495{ 530{
496 dop->dop_committed = false; 531 memset(dop, 0, sizeof(struct xfs_defer_ops));
497 dop->dop_low = false;
498 memset(&dop->dop_inodes, 0, sizeof(dop->dop_inodes));
499 *fbp = NULLFSBLOCK; 532 *fbp = NULLFSBLOCK;
500 INIT_LIST_HEAD(&dop->dop_intake); 533 INIT_LIST_HEAD(&dop->dop_intake);
501 INIT_LIST_HEAD(&dop->dop_pending); 534 INIT_LIST_HEAD(&dop->dop_pending);
diff --git a/fs/xfs/libxfs/xfs_defer.h b/fs/xfs/libxfs/xfs_defer.h
index d4f046dd44bd..045beacdd37d 100644
--- a/fs/xfs/libxfs/xfs_defer.h
+++ b/fs/xfs/libxfs/xfs_defer.h
@@ -59,6 +59,7 @@ enum xfs_defer_ops_type {
59}; 59};
60 60
61#define XFS_DEFER_OPS_NR_INODES 2 /* join up to two inodes */ 61#define XFS_DEFER_OPS_NR_INODES 2 /* join up to two inodes */
62#define XFS_DEFER_OPS_NR_BUFS 2 /* join up to two buffers */
62 63
63struct xfs_defer_ops { 64struct xfs_defer_ops {
64 bool dop_committed; /* did any trans commit? */ 65 bool dop_committed; /* did any trans commit? */
@@ -66,8 +67,9 @@ struct xfs_defer_ops {
66 struct list_head dop_intake; /* unlogged pending work */ 67 struct list_head dop_intake; /* unlogged pending work */
67 struct list_head dop_pending; /* logged pending work */ 68 struct list_head dop_pending; /* logged pending work */
68 69
69 /* relog these inodes with each roll */ 70 /* relog these with each roll */
70 struct xfs_inode *dop_inodes[XFS_DEFER_OPS_NR_INODES]; 71 struct xfs_inode *dop_inodes[XFS_DEFER_OPS_NR_INODES];
72 struct xfs_buf *dop_bufs[XFS_DEFER_OPS_NR_BUFS];
71}; 73};
72 74
73void xfs_defer_add(struct xfs_defer_ops *dop, enum xfs_defer_ops_type type, 75void xfs_defer_add(struct xfs_defer_ops *dop, enum xfs_defer_ops_type type,
@@ -77,6 +79,7 @@ void xfs_defer_cancel(struct xfs_defer_ops *dop);
77void xfs_defer_init(struct xfs_defer_ops *dop, xfs_fsblock_t *fbp); 79void xfs_defer_init(struct xfs_defer_ops *dop, xfs_fsblock_t *fbp);
78bool xfs_defer_has_unfinished_work(struct xfs_defer_ops *dop); 80bool xfs_defer_has_unfinished_work(struct xfs_defer_ops *dop);
79int xfs_defer_ijoin(struct xfs_defer_ops *dop, struct xfs_inode *ip); 81int xfs_defer_ijoin(struct xfs_defer_ops *dop, struct xfs_inode *ip);
82int xfs_defer_bjoin(struct xfs_defer_ops *dop, struct xfs_buf *bp);
80 83
81/* Description of a deferred type. */ 84/* Description of a deferred type. */
82struct xfs_defer_op_type { 85struct xfs_defer_op_type {
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index de3f04a98656..3b57ef0f2f76 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -920,8 +920,7 @@ STATIC xfs_agnumber_t
920xfs_ialloc_ag_select( 920xfs_ialloc_ag_select(
921 xfs_trans_t *tp, /* transaction pointer */ 921 xfs_trans_t *tp, /* transaction pointer */
922 xfs_ino_t parent, /* parent directory inode number */ 922 xfs_ino_t parent, /* parent directory inode number */
923 umode_t mode, /* bits set to indicate file type */ 923 umode_t mode) /* bits set to indicate file type */
924 int okalloc) /* ok to allocate more space */
925{ 924{
926 xfs_agnumber_t agcount; /* number of ag's in the filesystem */ 925 xfs_agnumber_t agcount; /* number of ag's in the filesystem */
927 xfs_agnumber_t agno; /* current ag number */ 926 xfs_agnumber_t agno; /* current ag number */
@@ -978,9 +977,6 @@ xfs_ialloc_ag_select(
978 return agno; 977 return agno;
979 } 978 }
980 979
981 if (!okalloc)
982 goto nextag;
983
984 if (!pag->pagf_init) { 980 if (!pag->pagf_init) {
985 error = xfs_alloc_pagf_init(mp, tp, agno, flags); 981 error = xfs_alloc_pagf_init(mp, tp, agno, flags);
986 if (error) 982 if (error)
@@ -1680,7 +1676,6 @@ xfs_dialloc(
1680 struct xfs_trans *tp, 1676 struct xfs_trans *tp,
1681 xfs_ino_t parent, 1677 xfs_ino_t parent,
1682 umode_t mode, 1678 umode_t mode,
1683 int okalloc,
1684 struct xfs_buf **IO_agbp, 1679 struct xfs_buf **IO_agbp,
1685 xfs_ino_t *inop) 1680 xfs_ino_t *inop)
1686{ 1681{
@@ -1692,6 +1687,7 @@ xfs_dialloc(
1692 int noroom = 0; 1687 int noroom = 0;
1693 xfs_agnumber_t start_agno; 1688 xfs_agnumber_t start_agno;
1694 struct xfs_perag *pag; 1689 struct xfs_perag *pag;
1690 int okalloc = 1;
1695 1691
1696 if (*IO_agbp) { 1692 if (*IO_agbp) {
1697 /* 1693 /*
@@ -1707,7 +1703,7 @@ xfs_dialloc(
1707 * We do not have an agbp, so select an initial allocation 1703 * We do not have an agbp, so select an initial allocation
1708 * group for inode allocation. 1704 * group for inode allocation.
1709 */ 1705 */
1710 start_agno = xfs_ialloc_ag_select(tp, parent, mode, okalloc); 1706 start_agno = xfs_ialloc_ag_select(tp, parent, mode);
1711 if (start_agno == NULLAGNUMBER) { 1707 if (start_agno == NULLAGNUMBER) {
1712 *inop = NULLFSINO; 1708 *inop = NULLFSINO;
1713 return 0; 1709 return 0;
diff --git a/fs/xfs/libxfs/xfs_ialloc.h b/fs/xfs/libxfs/xfs_ialloc.h
index d2bdcd5e7312..66a8de0b1caa 100644
--- a/fs/xfs/libxfs/xfs_ialloc.h
+++ b/fs/xfs/libxfs/xfs_ialloc.h
@@ -81,7 +81,6 @@ xfs_dialloc(
81 struct xfs_trans *tp, /* transaction pointer */ 81 struct xfs_trans *tp, /* transaction pointer */
82 xfs_ino_t parent, /* parent inode (directory) */ 82 xfs_ino_t parent, /* parent inode (directory) */
83 umode_t mode, /* mode bits for new inode */ 83 umode_t mode, /* mode bits for new inode */
84 int okalloc, /* ok to allocate more space */
85 struct xfs_buf **agbp, /* buf for a.g. inode header */ 84 struct xfs_buf **agbp, /* buf for a.g. inode header */
86 xfs_ino_t *inop); /* inode number allocated */ 85 xfs_ino_t *inop); /* inode number allocated */
87 86
diff --git a/fs/xfs/libxfs/xfs_iext_tree.c b/fs/xfs/libxfs/xfs_iext_tree.c
index 89bf16b4d937..b0f31791c7e6 100644
--- a/fs/xfs/libxfs/xfs_iext_tree.c
+++ b/fs/xfs/libxfs/xfs_iext_tree.c
@@ -632,8 +632,6 @@ xfs_iext_insert(
632 struct xfs_iext_leaf *new = NULL; 632 struct xfs_iext_leaf *new = NULL;
633 int nr_entries, i; 633 int nr_entries, i;
634 634
635 trace_xfs_iext_insert(ip, cur, state, _RET_IP_);
636
637 if (ifp->if_height == 0) 635 if (ifp->if_height == 0)
638 xfs_iext_alloc_root(ifp, cur); 636 xfs_iext_alloc_root(ifp, cur);
639 else if (ifp->if_height == 1) 637 else if (ifp->if_height == 1)
@@ -661,6 +659,8 @@ xfs_iext_insert(
661 xfs_iext_set(cur_rec(cur), irec); 659 xfs_iext_set(cur_rec(cur), irec);
662 ifp->if_bytes += sizeof(struct xfs_iext_rec); 660 ifp->if_bytes += sizeof(struct xfs_iext_rec);
663 661
662 trace_xfs_iext_insert(ip, cur, state, _RET_IP_);
663
664 if (new) 664 if (new)
665 xfs_iext_insert_node(ifp, xfs_iext_leaf_key(new, 0), new, 2); 665 xfs_iext_insert_node(ifp, xfs_iext_leaf_key(new, 0), new, 2);
666} 666}
diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c
index 585b35d34142..c40d26763075 100644
--- a/fs/xfs/libxfs/xfs_refcount.c
+++ b/fs/xfs/libxfs/xfs_refcount.c
@@ -1488,27 +1488,12 @@ __xfs_refcount_cow_alloc(
1488 xfs_extlen_t aglen, 1488 xfs_extlen_t aglen,
1489 struct xfs_defer_ops *dfops) 1489 struct xfs_defer_ops *dfops)
1490{ 1490{
1491 int error;
1492
1493 trace_xfs_refcount_cow_increase(rcur->bc_mp, rcur->bc_private.a.agno, 1491 trace_xfs_refcount_cow_increase(rcur->bc_mp, rcur->bc_private.a.agno,
1494 agbno, aglen); 1492 agbno, aglen);
1495 1493
1496 /* Add refcount btree reservation */ 1494 /* Add refcount btree reservation */
1497 error = xfs_refcount_adjust_cow(rcur, agbno, aglen, 1495 return xfs_refcount_adjust_cow(rcur, agbno, aglen,
1498 XFS_REFCOUNT_ADJUST_COW_ALLOC, dfops); 1496 XFS_REFCOUNT_ADJUST_COW_ALLOC, dfops);
1499 if (error)
1500 return error;
1501
1502 /* Add rmap entry */
1503 if (xfs_sb_version_hasrmapbt(&rcur->bc_mp->m_sb)) {
1504 error = xfs_rmap_alloc_extent(rcur->bc_mp, dfops,
1505 rcur->bc_private.a.agno,
1506 agbno, aglen, XFS_RMAP_OWN_COW);
1507 if (error)
1508 return error;
1509 }
1510
1511 return error;
1512} 1497}
1513 1498
1514/* 1499/*
@@ -1521,27 +1506,12 @@ __xfs_refcount_cow_free(
1521 xfs_extlen_t aglen, 1506 xfs_extlen_t aglen,
1522 struct xfs_defer_ops *dfops) 1507 struct xfs_defer_ops *dfops)
1523{ 1508{
1524 int error;
1525
1526 trace_xfs_refcount_cow_decrease(rcur->bc_mp, rcur->bc_private.a.agno, 1509 trace_xfs_refcount_cow_decrease(rcur->bc_mp, rcur->bc_private.a.agno,
1527 agbno, aglen); 1510 agbno, aglen);
1528 1511
1529 /* Remove refcount btree reservation */ 1512 /* Remove refcount btree reservation */
1530 error = xfs_refcount_adjust_cow(rcur, agbno, aglen, 1513 return xfs_refcount_adjust_cow(rcur, agbno, aglen,
1531 XFS_REFCOUNT_ADJUST_COW_FREE, dfops); 1514 XFS_REFCOUNT_ADJUST_COW_FREE, dfops);
1532 if (error)
1533 return error;
1534
1535 /* Remove rmap entry */
1536 if (xfs_sb_version_hasrmapbt(&rcur->bc_mp->m_sb)) {
1537 error = xfs_rmap_free_extent(rcur->bc_mp, dfops,
1538 rcur->bc_private.a.agno,
1539 agbno, aglen, XFS_RMAP_OWN_COW);
1540 if (error)
1541 return error;
1542 }
1543
1544 return error;
1545} 1515}
1546 1516
1547/* Record a CoW staging extent in the refcount btree. */ 1517/* Record a CoW staging extent in the refcount btree. */
@@ -1552,11 +1522,19 @@ xfs_refcount_alloc_cow_extent(
1552 xfs_fsblock_t fsb, 1522 xfs_fsblock_t fsb,
1553 xfs_extlen_t len) 1523 xfs_extlen_t len)
1554{ 1524{
1525 int error;
1526
1555 if (!xfs_sb_version_hasreflink(&mp->m_sb)) 1527 if (!xfs_sb_version_hasreflink(&mp->m_sb))
1556 return 0; 1528 return 0;
1557 1529
1558 return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_ALLOC_COW, 1530 error = __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_ALLOC_COW,
1559 fsb, len); 1531 fsb, len);
1532 if (error)
1533 return error;
1534
1535 /* Add rmap entry */
1536 return xfs_rmap_alloc_extent(mp, dfops, XFS_FSB_TO_AGNO(mp, fsb),
1537 XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW);
1560} 1538}
1561 1539
1562/* Forget a CoW staging event in the refcount btree. */ 1540/* Forget a CoW staging event in the refcount btree. */
@@ -1567,9 +1545,17 @@ xfs_refcount_free_cow_extent(
1567 xfs_fsblock_t fsb, 1545 xfs_fsblock_t fsb,
1568 xfs_extlen_t len) 1546 xfs_extlen_t len)
1569{ 1547{
1548 int error;
1549
1570 if (!xfs_sb_version_hasreflink(&mp->m_sb)) 1550 if (!xfs_sb_version_hasreflink(&mp->m_sb))
1571 return 0; 1551 return 0;
1572 1552
1553 /* Remove rmap entry */
1554 error = xfs_rmap_free_extent(mp, dfops, XFS_FSB_TO_AGNO(mp, fsb),
1555 XFS_FSB_TO_AGBNO(mp, fsb), len, XFS_RMAP_OWN_COW);
1556 if (error)
1557 return error;
1558
1573 return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_FREE_COW, 1559 return __xfs_refcount_add(mp, dfops, XFS_REFCOUNT_FREE_COW,
1574 fsb, len); 1560 fsb, len);
1575} 1561}
diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c
index dd019cee1b3b..50db920ceeeb 100644
--- a/fs/xfs/libxfs/xfs_rmap.c
+++ b/fs/xfs/libxfs/xfs_rmap.c
@@ -368,6 +368,51 @@ xfs_rmap_lookup_le_range(
368} 368}
369 369
370/* 370/*
371 * Perform all the relevant owner checks for a removal op. If we're doing an
372 * unknown-owner removal then we have no owner information to check.
373 */
374static int
375xfs_rmap_free_check_owner(
376 struct xfs_mount *mp,
377 uint64_t ltoff,
378 struct xfs_rmap_irec *rec,
379 xfs_fsblock_t bno,
380 xfs_filblks_t len,
381 uint64_t owner,
382 uint64_t offset,
383 unsigned int flags)
384{
385 int error = 0;
386
387 if (owner == XFS_RMAP_OWN_UNKNOWN)
388 return 0;
389
390 /* Make sure the unwritten flag matches. */
391 XFS_WANT_CORRUPTED_GOTO(mp, (flags & XFS_RMAP_UNWRITTEN) ==
392 (rec->rm_flags & XFS_RMAP_UNWRITTEN), out);
393
394 /* Make sure the owner matches what we expect to find in the tree. */
395 XFS_WANT_CORRUPTED_GOTO(mp, owner == rec->rm_owner, out);
396
397 /* Check the offset, if necessary. */
398 if (XFS_RMAP_NON_INODE_OWNER(owner))
399 goto out;
400
401 if (flags & XFS_RMAP_BMBT_BLOCK) {
402 XFS_WANT_CORRUPTED_GOTO(mp, rec->rm_flags & XFS_RMAP_BMBT_BLOCK,
403 out);
404 } else {
405 XFS_WANT_CORRUPTED_GOTO(mp, rec->rm_offset <= offset, out);
406 XFS_WANT_CORRUPTED_GOTO(mp,
407 ltoff + rec->rm_blockcount >= offset + len,
408 out);
409 }
410
411out:
412 return error;
413}
414
415/*
371 * Find the extent in the rmap btree and remove it. 416 * Find the extent in the rmap btree and remove it.
372 * 417 *
373 * The record we find should always be an exact match for the extent that we're 418 * The record we find should always be an exact match for the extent that we're
@@ -444,33 +489,40 @@ xfs_rmap_unmap(
444 goto out_done; 489 goto out_done;
445 } 490 }
446 491
447 /* Make sure the unwritten flag matches. */ 492 /*
448 XFS_WANT_CORRUPTED_GOTO(mp, (flags & XFS_RMAP_UNWRITTEN) == 493 * If we're doing an unknown-owner removal for EFI recovery, we expect
449 (ltrec.rm_flags & XFS_RMAP_UNWRITTEN), out_error); 494 * to find the full range in the rmapbt or nothing at all. If we
495 * don't find any rmaps overlapping either end of the range, we're
496 * done. Hopefully this means that the EFI creator already queued
497 * (and finished) a RUI to remove the rmap.
498 */
499 if (owner == XFS_RMAP_OWN_UNKNOWN &&
500 ltrec.rm_startblock + ltrec.rm_blockcount <= bno) {
501 struct xfs_rmap_irec rtrec;
502
503 error = xfs_btree_increment(cur, 0, &i);
504 if (error)
505 goto out_error;
506 if (i == 0)
507 goto out_done;
508 error = xfs_rmap_get_rec(cur, &rtrec, &i);
509 if (error)
510 goto out_error;
511 XFS_WANT_CORRUPTED_GOTO(mp, i == 1, out_error);
512 if (rtrec.rm_startblock >= bno + len)
513 goto out_done;
514 }
450 515
451 /* Make sure the extent we found covers the entire freeing range. */ 516 /* Make sure the extent we found covers the entire freeing range. */
452 XFS_WANT_CORRUPTED_GOTO(mp, ltrec.rm_startblock <= bno && 517 XFS_WANT_CORRUPTED_GOTO(mp, ltrec.rm_startblock <= bno &&
453 ltrec.rm_startblock + ltrec.rm_blockcount >= 518 ltrec.rm_startblock + ltrec.rm_blockcount >=
454 bno + len, out_error); 519 bno + len, out_error);
455 520
456 /* Make sure the owner matches what we expect to find in the tree. */ 521 /* Check owner information. */
457 XFS_WANT_CORRUPTED_GOTO(mp, owner == ltrec.rm_owner || 522 error = xfs_rmap_free_check_owner(mp, ltoff, &ltrec, bno, len, owner,
458 XFS_RMAP_NON_INODE_OWNER(owner), out_error); 523 offset, flags);
459 524 if (error)
460 /* Check the offset, if necessary. */ 525 goto out_error;
461 if (!XFS_RMAP_NON_INODE_OWNER(owner)) {
462 if (flags & XFS_RMAP_BMBT_BLOCK) {
463 XFS_WANT_CORRUPTED_GOTO(mp,
464 ltrec.rm_flags & XFS_RMAP_BMBT_BLOCK,
465 out_error);
466 } else {
467 XFS_WANT_CORRUPTED_GOTO(mp,
468 ltrec.rm_offset <= offset, out_error);
469 XFS_WANT_CORRUPTED_GOTO(mp,
470 ltoff + ltrec.rm_blockcount >= offset + len,
471 out_error);
472 }
473 }
474 526
475 if (ltrec.rm_startblock == bno && ltrec.rm_blockcount == len) { 527 if (ltrec.rm_startblock == bno && ltrec.rm_blockcount == len) {
476 /* exact match, simply remove the record from rmap tree */ 528 /* exact match, simply remove the record from rmap tree */
@@ -664,6 +716,7 @@ xfs_rmap_map(
664 flags |= XFS_RMAP_UNWRITTEN; 716 flags |= XFS_RMAP_UNWRITTEN;
665 trace_xfs_rmap_map(mp, cur->bc_private.a.agno, bno, len, 717 trace_xfs_rmap_map(mp, cur->bc_private.a.agno, bno, len,
666 unwritten, oinfo); 718 unwritten, oinfo);
719 ASSERT(!xfs_rmap_should_skip_owner_update(oinfo));
667 720
668 /* 721 /*
669 * For the initial lookup, look for an exact match or the left-adjacent 722 * For the initial lookup, look for an exact match or the left-adjacent
diff --git a/fs/xfs/libxfs/xfs_rmap.h b/fs/xfs/libxfs/xfs_rmap.h
index 466ede637080..0fcd5b1ba729 100644
--- a/fs/xfs/libxfs/xfs_rmap.h
+++ b/fs/xfs/libxfs/xfs_rmap.h
@@ -61,7 +61,21 @@ static inline void
61xfs_rmap_skip_owner_update( 61xfs_rmap_skip_owner_update(
62 struct xfs_owner_info *oi) 62 struct xfs_owner_info *oi)
63{ 63{
64 oi->oi_owner = XFS_RMAP_OWN_UNKNOWN; 64 xfs_rmap_ag_owner(oi, XFS_RMAP_OWN_NULL);
65}
66
67static inline bool
68xfs_rmap_should_skip_owner_update(
69 struct xfs_owner_info *oi)
70{
71 return oi->oi_owner == XFS_RMAP_OWN_NULL;
72}
73
74static inline void
75xfs_rmap_any_owner_update(
76 struct xfs_owner_info *oi)
77{
78 xfs_rmap_ag_owner(oi, XFS_RMAP_OWN_UNKNOWN);
65} 79}
66 80
67/* Reverse mapping functions. */ 81/* Reverse mapping functions. */
diff --git a/fs/xfs/scrub/scrub.c b/fs/xfs/scrub/scrub.c
index 9c42c4efd01e..ab3aef2ae823 100644
--- a/fs/xfs/scrub/scrub.c
+++ b/fs/xfs/scrub/scrub.c
@@ -46,7 +46,6 @@
46#include "scrub/scrub.h" 46#include "scrub/scrub.h"
47#include "scrub/common.h" 47#include "scrub/common.h"
48#include "scrub/trace.h" 48#include "scrub/trace.h"
49#include "scrub/scrub.h"
50#include "scrub/btree.h" 49#include "scrub/btree.h"
51 50
52/* 51/*
diff --git a/fs/xfs/scrub/trace.c b/fs/xfs/scrub/trace.c
index 472080e75788..86daed0e3a45 100644
--- a/fs/xfs/scrub/trace.c
+++ b/fs/xfs/scrub/trace.c
@@ -26,7 +26,6 @@
26#include "xfs_mount.h" 26#include "xfs_mount.h"
27#include "xfs_defer.h" 27#include "xfs_defer.h"
28#include "xfs_da_format.h" 28#include "xfs_da_format.h"
29#include "xfs_defer.h"
30#include "xfs_inode.h" 29#include "xfs_inode.h"
31#include "xfs_btree.h" 30#include "xfs_btree.h"
32#include "xfs_trans.h" 31#include "xfs_trans.h"
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index 21e2d70884e1..4fc526a27a94 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -399,7 +399,7 @@ xfs_map_blocks(
399 (ip->i_df.if_flags & XFS_IFEXTENTS)); 399 (ip->i_df.if_flags & XFS_IFEXTENTS));
400 ASSERT(offset <= mp->m_super->s_maxbytes); 400 ASSERT(offset <= mp->m_super->s_maxbytes);
401 401
402 if ((xfs_ufsize_t)offset + count > mp->m_super->s_maxbytes) 402 if (offset > mp->m_super->s_maxbytes - count)
403 count = mp->m_super->s_maxbytes - offset; 403 count = mp->m_super->s_maxbytes - offset;
404 end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count); 404 end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + count);
405 offset_fsb = XFS_B_TO_FSBT(mp, offset); 405 offset_fsb = XFS_B_TO_FSBT(mp, offset);
@@ -1312,7 +1312,7 @@ xfs_get_blocks(
1312 lockmode = xfs_ilock_data_map_shared(ip); 1312 lockmode = xfs_ilock_data_map_shared(ip);
1313 1313
1314 ASSERT(offset <= mp->m_super->s_maxbytes); 1314 ASSERT(offset <= mp->m_super->s_maxbytes);
1315 if ((xfs_ufsize_t)offset + size > mp->m_super->s_maxbytes) 1315 if (offset > mp->m_super->s_maxbytes - size)
1316 size = mp->m_super->s_maxbytes - offset; 1316 size = mp->m_super->s_maxbytes - offset;
1317 end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size); 1317 end_fsb = XFS_B_TO_FSB(mp, (xfs_ufsize_t)offset + size);
1318 offset_fsb = XFS_B_TO_FSBT(mp, offset); 1318 offset_fsb = XFS_B_TO_FSBT(mp, offset);
diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c
index 44f8c5451210..64da90655e95 100644
--- a/fs/xfs/xfs_extfree_item.c
+++ b/fs/xfs/xfs_extfree_item.c
@@ -538,7 +538,7 @@ xfs_efi_recover(
538 return error; 538 return error;
539 efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents); 539 efdp = xfs_trans_get_efd(tp, efip, efip->efi_format.efi_nextents);
540 540
541 xfs_rmap_skip_owner_update(&oinfo); 541 xfs_rmap_any_owner_update(&oinfo);
542 for (i = 0; i < efip->efi_format.efi_nextents; i++) { 542 for (i = 0; i < efip->efi_format.efi_nextents; i++) {
543 extp = &efip->efi_format.efi_extents[i]; 543 extp = &efip->efi_format.efi_extents[i];
544 error = xfs_trans_free_extent(tp, efdp, extp->ext_start, 544 error = xfs_trans_free_extent(tp, efdp, extp->ext_start,
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index 8f22fc579dbb..60a2e128cb6a 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -571,6 +571,11 @@ xfs_growfs_data_private(
571 * this doesn't actually exist in the rmap btree. 571 * this doesn't actually exist in the rmap btree.
572 */ 572 */
573 xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_NULL); 573 xfs_rmap_ag_owner(&oinfo, XFS_RMAP_OWN_NULL);
574 error = xfs_rmap_free(tp, bp, agno,
575 be32_to_cpu(agf->agf_length) - new,
576 new, &oinfo);
577 if (error)
578 goto error0;
574 error = xfs_free_extent(tp, 579 error = xfs_free_extent(tp,
575 XFS_AGB_TO_FSB(mp, agno, 580 XFS_AGB_TO_FSB(mp, agno,
576 be32_to_cpu(agf->agf_length) - new), 581 be32_to_cpu(agf->agf_length) - new),
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index 43005fbe8b1e..3861d61fb265 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -870,7 +870,7 @@ xfs_eofblocks_worker(
870 * based on the 'speculative_cow_prealloc_lifetime' tunable (5m by default). 870 * based on the 'speculative_cow_prealloc_lifetime' tunable (5m by default).
871 * (We'll just piggyback on the post-EOF prealloc space workqueue.) 871 * (We'll just piggyback on the post-EOF prealloc space workqueue.)
872 */ 872 */
873STATIC void 873void
874xfs_queue_cowblocks( 874xfs_queue_cowblocks(
875 struct xfs_mount *mp) 875 struct xfs_mount *mp)
876{ 876{
@@ -1536,8 +1536,23 @@ xfs_inode_free_quota_eofblocks(
1536 return __xfs_inode_free_quota_eofblocks(ip, xfs_icache_free_eofblocks); 1536 return __xfs_inode_free_quota_eofblocks(ip, xfs_icache_free_eofblocks);
1537} 1537}
1538 1538
1539static inline unsigned long
1540xfs_iflag_for_tag(
1541 int tag)
1542{
1543 switch (tag) {
1544 case XFS_ICI_EOFBLOCKS_TAG:
1545 return XFS_IEOFBLOCKS;
1546 case XFS_ICI_COWBLOCKS_TAG:
1547 return XFS_ICOWBLOCKS;
1548 default:
1549 ASSERT(0);
1550 return 0;
1551 }
1552}
1553
1539static void 1554static void
1540__xfs_inode_set_eofblocks_tag( 1555__xfs_inode_set_blocks_tag(
1541 xfs_inode_t *ip, 1556 xfs_inode_t *ip,
1542 void (*execute)(struct xfs_mount *mp), 1557 void (*execute)(struct xfs_mount *mp),
1543 void (*set_tp)(struct xfs_mount *mp, xfs_agnumber_t agno, 1558 void (*set_tp)(struct xfs_mount *mp, xfs_agnumber_t agno,
@@ -1552,10 +1567,10 @@ __xfs_inode_set_eofblocks_tag(
1552 * Don't bother locking the AG and looking up in the radix trees 1567 * Don't bother locking the AG and looking up in the radix trees
1553 * if we already know that we have the tag set. 1568 * if we already know that we have the tag set.
1554 */ 1569 */
1555 if (ip->i_flags & XFS_IEOFBLOCKS) 1570 if (ip->i_flags & xfs_iflag_for_tag(tag))
1556 return; 1571 return;
1557 spin_lock(&ip->i_flags_lock); 1572 spin_lock(&ip->i_flags_lock);
1558 ip->i_flags |= XFS_IEOFBLOCKS; 1573 ip->i_flags |= xfs_iflag_for_tag(tag);
1559 spin_unlock(&ip->i_flags_lock); 1574 spin_unlock(&ip->i_flags_lock);
1560 1575
1561 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); 1576 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
@@ -1587,13 +1602,13 @@ xfs_inode_set_eofblocks_tag(
1587 xfs_inode_t *ip) 1602 xfs_inode_t *ip)
1588{ 1603{
1589 trace_xfs_inode_set_eofblocks_tag(ip); 1604 trace_xfs_inode_set_eofblocks_tag(ip);
1590 return __xfs_inode_set_eofblocks_tag(ip, xfs_queue_eofblocks, 1605 return __xfs_inode_set_blocks_tag(ip, xfs_queue_eofblocks,
1591 trace_xfs_perag_set_eofblocks, 1606 trace_xfs_perag_set_eofblocks,
1592 XFS_ICI_EOFBLOCKS_TAG); 1607 XFS_ICI_EOFBLOCKS_TAG);
1593} 1608}
1594 1609
1595static void 1610static void
1596__xfs_inode_clear_eofblocks_tag( 1611__xfs_inode_clear_blocks_tag(
1597 xfs_inode_t *ip, 1612 xfs_inode_t *ip,
1598 void (*clear_tp)(struct xfs_mount *mp, xfs_agnumber_t agno, 1613 void (*clear_tp)(struct xfs_mount *mp, xfs_agnumber_t agno,
1599 int error, unsigned long caller_ip), 1614 int error, unsigned long caller_ip),
@@ -1603,7 +1618,7 @@ __xfs_inode_clear_eofblocks_tag(
1603 struct xfs_perag *pag; 1618 struct xfs_perag *pag;
1604 1619
1605 spin_lock(&ip->i_flags_lock); 1620 spin_lock(&ip->i_flags_lock);
1606 ip->i_flags &= ~XFS_IEOFBLOCKS; 1621 ip->i_flags &= ~xfs_iflag_for_tag(tag);
1607 spin_unlock(&ip->i_flags_lock); 1622 spin_unlock(&ip->i_flags_lock);
1608 1623
1609 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino)); 1624 pag = xfs_perag_get(mp, XFS_INO_TO_AGNO(mp, ip->i_ino));
@@ -1630,7 +1645,7 @@ xfs_inode_clear_eofblocks_tag(
1630 xfs_inode_t *ip) 1645 xfs_inode_t *ip)
1631{ 1646{
1632 trace_xfs_inode_clear_eofblocks_tag(ip); 1647 trace_xfs_inode_clear_eofblocks_tag(ip);
1633 return __xfs_inode_clear_eofblocks_tag(ip, 1648 return __xfs_inode_clear_blocks_tag(ip,
1634 trace_xfs_perag_clear_eofblocks, XFS_ICI_EOFBLOCKS_TAG); 1649 trace_xfs_perag_clear_eofblocks, XFS_ICI_EOFBLOCKS_TAG);
1635} 1650}
1636 1651
@@ -1724,7 +1739,7 @@ xfs_inode_set_cowblocks_tag(
1724 xfs_inode_t *ip) 1739 xfs_inode_t *ip)
1725{ 1740{
1726 trace_xfs_inode_set_cowblocks_tag(ip); 1741 trace_xfs_inode_set_cowblocks_tag(ip);
1727 return __xfs_inode_set_eofblocks_tag(ip, xfs_queue_cowblocks, 1742 return __xfs_inode_set_blocks_tag(ip, xfs_queue_cowblocks,
1728 trace_xfs_perag_set_cowblocks, 1743 trace_xfs_perag_set_cowblocks,
1729 XFS_ICI_COWBLOCKS_TAG); 1744 XFS_ICI_COWBLOCKS_TAG);
1730} 1745}
@@ -1734,6 +1749,6 @@ xfs_inode_clear_cowblocks_tag(
1734 xfs_inode_t *ip) 1749 xfs_inode_t *ip)
1735{ 1750{
1736 trace_xfs_inode_clear_cowblocks_tag(ip); 1751 trace_xfs_inode_clear_cowblocks_tag(ip);
1737 return __xfs_inode_clear_eofblocks_tag(ip, 1752 return __xfs_inode_clear_blocks_tag(ip,
1738 trace_xfs_perag_clear_cowblocks, XFS_ICI_COWBLOCKS_TAG); 1753 trace_xfs_perag_clear_cowblocks, XFS_ICI_COWBLOCKS_TAG);
1739} 1754}
diff --git a/fs/xfs/xfs_icache.h b/fs/xfs/xfs_icache.h
index bff4d85e5498..d4a77588eca1 100644
--- a/fs/xfs/xfs_icache.h
+++ b/fs/xfs/xfs_icache.h
@@ -81,6 +81,7 @@ void xfs_inode_clear_cowblocks_tag(struct xfs_inode *ip);
81int xfs_icache_free_cowblocks(struct xfs_mount *, struct xfs_eofblocks *); 81int xfs_icache_free_cowblocks(struct xfs_mount *, struct xfs_eofblocks *);
82int xfs_inode_free_quota_cowblocks(struct xfs_inode *ip); 82int xfs_inode_free_quota_cowblocks(struct xfs_inode *ip);
83void xfs_cowblocks_worker(struct work_struct *); 83void xfs_cowblocks_worker(struct work_struct *);
84void xfs_queue_cowblocks(struct xfs_mount *);
84 85
85int xfs_inode_ag_iterator(struct xfs_mount *mp, 86int xfs_inode_ag_iterator(struct xfs_mount *mp,
86 int (*execute)(struct xfs_inode *ip, int flags, void *args), 87 int (*execute)(struct xfs_inode *ip, int flags, void *args),
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index 801274126648..6f95bdb408ce 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -749,7 +749,6 @@ xfs_ialloc(
749 xfs_nlink_t nlink, 749 xfs_nlink_t nlink,
750 dev_t rdev, 750 dev_t rdev,
751 prid_t prid, 751 prid_t prid,
752 int okalloc,
753 xfs_buf_t **ialloc_context, 752 xfs_buf_t **ialloc_context,
754 xfs_inode_t **ipp) 753 xfs_inode_t **ipp)
755{ 754{
@@ -765,7 +764,7 @@ xfs_ialloc(
765 * Call the space management code to pick 764 * Call the space management code to pick
766 * the on-disk inode to be allocated. 765 * the on-disk inode to be allocated.
767 */ 766 */
768 error = xfs_dialloc(tp, pip ? pip->i_ino : 0, mode, okalloc, 767 error = xfs_dialloc(tp, pip ? pip->i_ino : 0, mode,
769 ialloc_context, &ino); 768 ialloc_context, &ino);
770 if (error) 769 if (error)
771 return error; 770 return error;
@@ -957,7 +956,6 @@ xfs_dir_ialloc(
957 xfs_nlink_t nlink, 956 xfs_nlink_t nlink,
958 dev_t rdev, 957 dev_t rdev,
959 prid_t prid, /* project id */ 958 prid_t prid, /* project id */
960 int okalloc, /* ok to allocate new space */
961 xfs_inode_t **ipp, /* pointer to inode; it will be 959 xfs_inode_t **ipp, /* pointer to inode; it will be
962 locked. */ 960 locked. */
963 int *committed) 961 int *committed)
@@ -988,8 +986,8 @@ xfs_dir_ialloc(
988 * transaction commit so that no other process can steal 986 * transaction commit so that no other process can steal
989 * the inode(s) that we've just allocated. 987 * the inode(s) that we've just allocated.
990 */ 988 */
991 code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid, okalloc, 989 code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid, &ialloc_context,
992 &ialloc_context, &ip); 990 &ip);
993 991
994 /* 992 /*
995 * Return an error if we were unable to allocate a new inode. 993 * Return an error if we were unable to allocate a new inode.
@@ -1061,7 +1059,7 @@ xfs_dir_ialloc(
1061 * this call should always succeed. 1059 * this call should always succeed.
1062 */ 1060 */
1063 code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid, 1061 code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid,
1064 okalloc, &ialloc_context, &ip); 1062 &ialloc_context, &ip);
1065 1063
1066 /* 1064 /*
1067 * If we get an error at this point, return to the caller 1065 * If we get an error at this point, return to the caller
@@ -1182,11 +1180,6 @@ xfs_create(
1182 xfs_flush_inodes(mp); 1180 xfs_flush_inodes(mp);
1183 error = xfs_trans_alloc(mp, tres, resblks, 0, 0, &tp); 1181 error = xfs_trans_alloc(mp, tres, resblks, 0, 0, &tp);
1184 } 1182 }
1185 if (error == -ENOSPC) {
1186 /* No space at all so try a "no-allocation" reservation */
1187 resblks = 0;
1188 error = xfs_trans_alloc(mp, tres, 0, 0, 0, &tp);
1189 }
1190 if (error) 1183 if (error)
1191 goto out_release_inode; 1184 goto out_release_inode;
1192 1185
@@ -1203,19 +1196,13 @@ xfs_create(
1203 if (error) 1196 if (error)
1204 goto out_trans_cancel; 1197 goto out_trans_cancel;
1205 1198
1206 if (!resblks) {
1207 error = xfs_dir_canenter(tp, dp, name);
1208 if (error)
1209 goto out_trans_cancel;
1210 }
1211
1212 /* 1199 /*
1213 * A newly created regular or special file just has one directory 1200 * A newly created regular or special file just has one directory
1214 * entry pointing to them, but a directory also the "." entry 1201 * entry pointing to them, but a directory also the "." entry
1215 * pointing to itself. 1202 * pointing to itself.
1216 */ 1203 */
1217 error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev, 1204 error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev, prid, &ip,
1218 prid, resblks > 0, &ip, NULL); 1205 NULL);
1219 if (error) 1206 if (error)
1220 goto out_trans_cancel; 1207 goto out_trans_cancel;
1221 1208
@@ -1340,11 +1327,6 @@ xfs_create_tmpfile(
1340 tres = &M_RES(mp)->tr_create_tmpfile; 1327 tres = &M_RES(mp)->tr_create_tmpfile;
1341 1328
1342 error = xfs_trans_alloc(mp, tres, resblks, 0, 0, &tp); 1329 error = xfs_trans_alloc(mp, tres, resblks, 0, 0, &tp);
1343 if (error == -ENOSPC) {
1344 /* No space at all so try a "no-allocation" reservation */
1345 resblks = 0;
1346 error = xfs_trans_alloc(mp, tres, 0, 0, 0, &tp);
1347 }
1348 if (error) 1330 if (error)
1349 goto out_release_inode; 1331 goto out_release_inode;
1350 1332
@@ -1353,8 +1335,7 @@ xfs_create_tmpfile(
1353 if (error) 1335 if (error)
1354 goto out_trans_cancel; 1336 goto out_trans_cancel;
1355 1337
1356 error = xfs_dir_ialloc(&tp, dp, mode, 1, 0, 1338 error = xfs_dir_ialloc(&tp, dp, mode, 1, 0, prid, &ip, NULL);
1357 prid, resblks > 0, &ip, NULL);
1358 if (error) 1339 if (error)
1359 goto out_trans_cancel; 1340 goto out_trans_cancel;
1360 1341
@@ -1506,6 +1487,24 @@ xfs_link(
1506 return error; 1487 return error;
1507} 1488}
1508 1489
1490/* Clear the reflink flag and the cowblocks tag if possible. */
1491static void
1492xfs_itruncate_clear_reflink_flags(
1493 struct xfs_inode *ip)
1494{
1495 struct xfs_ifork *dfork;
1496 struct xfs_ifork *cfork;
1497
1498 if (!xfs_is_reflink_inode(ip))
1499 return;
1500 dfork = XFS_IFORK_PTR(ip, XFS_DATA_FORK);
1501 cfork = XFS_IFORK_PTR(ip, XFS_COW_FORK);
1502 if (dfork->if_bytes == 0 && cfork->if_bytes == 0)
1503 ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK;
1504 if (cfork->if_bytes == 0)
1505 xfs_inode_clear_cowblocks_tag(ip);
1506}
1507
1509/* 1508/*
1510 * Free up the underlying blocks past new_size. The new size must be smaller 1509 * Free up the underlying blocks past new_size. The new size must be smaller
1511 * than the current size. This routine can be used both for the attribute and 1510 * than the current size. This routine can be used both for the attribute and
@@ -1602,15 +1601,7 @@ xfs_itruncate_extents(
1602 if (error) 1601 if (error)
1603 goto out; 1602 goto out;
1604 1603
1605 /* 1604 xfs_itruncate_clear_reflink_flags(ip);
1606 * Clear the reflink flag if there are no data fork blocks and
1607 * there are no extents staged in the cow fork.
1608 */
1609 if (xfs_is_reflink_inode(ip) && ip->i_cnextents == 0) {
1610 if (ip->i_d.di_nblocks == 0)
1611 ip->i_d.di_flags2 &= ~XFS_DIFLAG2_REFLINK;
1612 xfs_inode_clear_cowblocks_tag(ip);
1613 }
1614 1605
1615 /* 1606 /*
1616 * Always re-log the inode so that our permanent transaction can keep 1607 * Always re-log the inode so that our permanent transaction can keep
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index cc13c3763721..d383e392ec9d 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -232,6 +232,7 @@ static inline bool xfs_is_reflink_inode(struct xfs_inode *ip)
232 * log recovery to replay a bmap operation on the inode. 232 * log recovery to replay a bmap operation on the inode.
233 */ 233 */
234#define XFS_IRECOVERY (1 << 11) 234#define XFS_IRECOVERY (1 << 11)
235#define XFS_ICOWBLOCKS (1 << 12)/* has the cowblocks tag set */
235 236
236/* 237/*
237 * Per-lifetime flags need to be reset when re-using a reclaimable inode during 238 * Per-lifetime flags need to be reset when re-using a reclaimable inode during
@@ -428,7 +429,7 @@ xfs_extlen_t xfs_get_extsz_hint(struct xfs_inode *ip);
428xfs_extlen_t xfs_get_cowextsz_hint(struct xfs_inode *ip); 429xfs_extlen_t xfs_get_cowextsz_hint(struct xfs_inode *ip);
429 430
430int xfs_dir_ialloc(struct xfs_trans **, struct xfs_inode *, umode_t, 431int xfs_dir_ialloc(struct xfs_trans **, struct xfs_inode *, umode_t,
431 xfs_nlink_t, dev_t, prid_t, int, 432 xfs_nlink_t, dev_t, prid_t,
432 struct xfs_inode **, int *); 433 struct xfs_inode **, int *);
433 434
434/* from xfs_file.c */ 435/* from xfs_file.c */
diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c
index 33eb4fb2e3fd..66e1edbfb2b2 100644
--- a/fs/xfs/xfs_iomap.c
+++ b/fs/xfs/xfs_iomap.c
@@ -1006,7 +1006,7 @@ xfs_file_iomap_begin(
1006 } 1006 }
1007 1007
1008 ASSERT(offset <= mp->m_super->s_maxbytes); 1008 ASSERT(offset <= mp->m_super->s_maxbytes);
1009 if ((xfs_fsize_t)offset + length > mp->m_super->s_maxbytes) 1009 if (offset > mp->m_super->s_maxbytes - length)
1010 length = mp->m_super->s_maxbytes - offset; 1010 length = mp->m_super->s_maxbytes - offset;
1011 offset_fsb = XFS_B_TO_FSBT(mp, offset); 1011 offset_fsb = XFS_B_TO_FSBT(mp, offset);
1012 end_fsb = XFS_B_TO_FSB(mp, offset + length); 1012 end_fsb = XFS_B_TO_FSB(mp, offset + length);
@@ -1213,7 +1213,7 @@ xfs_xattr_iomap_begin(
1213 1213
1214 ASSERT(ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL); 1214 ASSERT(ip->i_d.di_aformat != XFS_DINODE_FMT_LOCAL);
1215 error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap, 1215 error = xfs_bmapi_read(ip, offset_fsb, end_fsb - offset_fsb, &imap,
1216 &nimaps, XFS_BMAPI_ENTIRE | XFS_BMAPI_ATTRFORK); 1216 &nimaps, XFS_BMAPI_ATTRFORK);
1217out_unlock: 1217out_unlock:
1218 xfs_iunlock(ip, lockmode); 1218 xfs_iunlock(ip, lockmode);
1219 1219
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 010a13a201aa..b897b11afb2c 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -48,7 +48,7 @@
48STATIC int xfs_qm_init_quotainos(xfs_mount_t *); 48STATIC int xfs_qm_init_quotainos(xfs_mount_t *);
49STATIC int xfs_qm_init_quotainfo(xfs_mount_t *); 49STATIC int xfs_qm_init_quotainfo(xfs_mount_t *);
50 50
51 51STATIC void xfs_qm_destroy_quotainos(xfs_quotainfo_t *qi);
52STATIC void xfs_qm_dqfree_one(struct xfs_dquot *dqp); 52STATIC void xfs_qm_dqfree_one(struct xfs_dquot *dqp);
53/* 53/*
54 * We use the batch lookup interface to iterate over the dquots as it 54 * We use the batch lookup interface to iterate over the dquots as it
@@ -695,9 +695,17 @@ xfs_qm_init_quotainfo(
695 qinf->qi_shrinker.scan_objects = xfs_qm_shrink_scan; 695 qinf->qi_shrinker.scan_objects = xfs_qm_shrink_scan;
696 qinf->qi_shrinker.seeks = DEFAULT_SEEKS; 696 qinf->qi_shrinker.seeks = DEFAULT_SEEKS;
697 qinf->qi_shrinker.flags = SHRINKER_NUMA_AWARE; 697 qinf->qi_shrinker.flags = SHRINKER_NUMA_AWARE;
698 register_shrinker(&qinf->qi_shrinker); 698
699 error = register_shrinker(&qinf->qi_shrinker);
700 if (error)
701 goto out_free_inos;
702
699 return 0; 703 return 0;
700 704
705out_free_inos:
706 mutex_destroy(&qinf->qi_quotaofflock);
707 mutex_destroy(&qinf->qi_tree_lock);
708 xfs_qm_destroy_quotainos(qinf);
701out_free_lru: 709out_free_lru:
702 list_lru_destroy(&qinf->qi_lru); 710 list_lru_destroy(&qinf->qi_lru);
703out_free_qinf: 711out_free_qinf:
@@ -706,7 +714,6 @@ out_free_qinf:
706 return error; 714 return error;
707} 715}
708 716
709
710/* 717/*
711 * Gets called when unmounting a filesystem or when all quotas get 718 * Gets called when unmounting a filesystem or when all quotas get
712 * turned off. 719 * turned off.
@@ -723,19 +730,8 @@ xfs_qm_destroy_quotainfo(
723 730
724 unregister_shrinker(&qi->qi_shrinker); 731 unregister_shrinker(&qi->qi_shrinker);
725 list_lru_destroy(&qi->qi_lru); 732 list_lru_destroy(&qi->qi_lru);
726 733 xfs_qm_destroy_quotainos(qi);
727 if (qi->qi_uquotaip) { 734 mutex_destroy(&qi->qi_tree_lock);
728 IRELE(qi->qi_uquotaip);
729 qi->qi_uquotaip = NULL; /* paranoia */
730 }
731 if (qi->qi_gquotaip) {
732 IRELE(qi->qi_gquotaip);
733 qi->qi_gquotaip = NULL;
734 }
735 if (qi->qi_pquotaip) {
736 IRELE(qi->qi_pquotaip);
737 qi->qi_pquotaip = NULL;
738 }
739 mutex_destroy(&qi->qi_quotaofflock); 735 mutex_destroy(&qi->qi_quotaofflock);
740 kmem_free(qi); 736 kmem_free(qi);
741 mp->m_quotainfo = NULL; 737 mp->m_quotainfo = NULL;
@@ -793,8 +789,8 @@ xfs_qm_qino_alloc(
793 return error; 789 return error;
794 790
795 if (need_alloc) { 791 if (need_alloc) {
796 error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, 1, ip, 792 error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, ip,
797 &committed); 793 &committed);
798 if (error) { 794 if (error) {
799 xfs_trans_cancel(tp); 795 xfs_trans_cancel(tp);
800 return error; 796 return error;
@@ -1600,6 +1596,24 @@ error_rele:
1600} 1596}
1601 1597
1602STATIC void 1598STATIC void
1599xfs_qm_destroy_quotainos(
1600 xfs_quotainfo_t *qi)
1601{
1602 if (qi->qi_uquotaip) {
1603 IRELE(qi->qi_uquotaip);
1604 qi->qi_uquotaip = NULL; /* paranoia */
1605 }
1606 if (qi->qi_gquotaip) {
1607 IRELE(qi->qi_gquotaip);
1608 qi->qi_gquotaip = NULL;
1609 }
1610 if (qi->qi_pquotaip) {
1611 IRELE(qi->qi_pquotaip);
1612 qi->qi_pquotaip = NULL;
1613 }
1614}
1615
1616STATIC void
1603xfs_qm_dqfree_one( 1617xfs_qm_dqfree_one(
1604 struct xfs_dquot *dqp) 1618 struct xfs_dquot *dqp)
1605{ 1619{
diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c
index cc041a29eb70..47aea2e82c26 100644
--- a/fs/xfs/xfs_reflink.c
+++ b/fs/xfs/xfs_reflink.c
@@ -49,8 +49,6 @@
49#include "xfs_alloc.h" 49#include "xfs_alloc.h"
50#include "xfs_quota_defs.h" 50#include "xfs_quota_defs.h"
51#include "xfs_quota.h" 51#include "xfs_quota.h"
52#include "xfs_btree.h"
53#include "xfs_bmap_btree.h"
54#include "xfs_reflink.h" 52#include "xfs_reflink.h"
55#include "xfs_iomap.h" 53#include "xfs_iomap.h"
56#include "xfs_rmap_btree.h" 54#include "xfs_rmap_btree.h"
@@ -456,6 +454,8 @@ retry:
456 if (error) 454 if (error)
457 goto out_bmap_cancel; 455 goto out_bmap_cancel;
458 456
457 xfs_inode_set_cowblocks_tag(ip);
458
459 /* Finish up. */ 459 /* Finish up. */
460 error = xfs_defer_finish(&tp, &dfops); 460 error = xfs_defer_finish(&tp, &dfops);
461 if (error) 461 if (error)
@@ -492,8 +492,9 @@ xfs_reflink_find_cow_mapping(
492 struct xfs_iext_cursor icur; 492 struct xfs_iext_cursor icur;
493 493
494 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED)); 494 ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL | XFS_ILOCK_SHARED));
495 ASSERT(xfs_is_reflink_inode(ip));
496 495
496 if (!xfs_is_reflink_inode(ip))
497 return false;
497 offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset); 498 offset_fsb = XFS_B_TO_FSBT(ip->i_mount, offset);
498 if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &icur, &got)) 499 if (!xfs_iext_lookup_extent(ip, ifp, offset_fsb, &icur, &got))
499 return false; 500 return false;
@@ -612,6 +613,9 @@ xfs_reflink_cancel_cow_blocks(
612 613
613 /* Remove the mapping from the CoW fork. */ 614 /* Remove the mapping from the CoW fork. */
614 xfs_bmap_del_extent_cow(ip, &icur, &got, &del); 615 xfs_bmap_del_extent_cow(ip, &icur, &got, &del);
616 } else {
617 /* Didn't do anything, push cursor back. */
618 xfs_iext_prev(ifp, &icur);
615 } 619 }
616next_extent: 620next_extent:
617 if (!xfs_iext_get_extent(ifp, &icur, &got)) 621 if (!xfs_iext_get_extent(ifp, &icur, &got))
@@ -727,7 +731,7 @@ xfs_reflink_end_cow(
727 (unsigned int)(end_fsb - offset_fsb), 731 (unsigned int)(end_fsb - offset_fsb),
728 XFS_DATA_FORK); 732 XFS_DATA_FORK);
729 error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_write, 733 error = xfs_trans_alloc(ip->i_mount, &M_RES(ip->i_mount)->tr_write,
730 resblks, 0, 0, &tp); 734 resblks, 0, XFS_TRANS_RESERVE, &tp);
731 if (error) 735 if (error)
732 goto out; 736 goto out;
733 737
@@ -1293,6 +1297,17 @@ xfs_reflink_remap_range(
1293 1297
1294 trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out); 1298 trace_xfs_reflink_remap_range(src, pos_in, len, dest, pos_out);
1295 1299
1300 /*
1301 * Clear out post-eof preallocations because we don't have page cache
1302 * backing the delayed allocations and they'll never get freed on
1303 * their own.
1304 */
1305 if (xfs_can_free_eofblocks(dest, true)) {
1306 ret = xfs_free_eofblocks(dest);
1307 if (ret)
1308 goto out_unlock;
1309 }
1310
1296 /* Set flags and remap blocks. */ 1311 /* Set flags and remap blocks. */
1297 ret = xfs_reflink_set_inode_flag(src, dest); 1312 ret = xfs_reflink_set_inode_flag(src, dest);
1298 if (ret) 1313 if (ret)
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 5122d3021117..1dacccc367f8 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -1360,6 +1360,7 @@ xfs_fs_remount(
1360 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE); 1360 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
1361 return error; 1361 return error;
1362 } 1362 }
1363 xfs_queue_cowblocks(mp);
1363 1364
1364 /* Create the per-AG metadata reservation pool .*/ 1365 /* Create the per-AG metadata reservation pool .*/
1365 error = xfs_fs_reserve_ag_blocks(mp); 1366 error = xfs_fs_reserve_ag_blocks(mp);
@@ -1369,6 +1370,14 @@ xfs_fs_remount(
1369 1370
1370 /* rw -> ro */ 1371 /* rw -> ro */
1371 if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & SB_RDONLY)) { 1372 if (!(mp->m_flags & XFS_MOUNT_RDONLY) && (*flags & SB_RDONLY)) {
1373 /* Get rid of any leftover CoW reservations... */
1374 cancel_delayed_work_sync(&mp->m_cowblocks_work);
1375 error = xfs_icache_free_cowblocks(mp, NULL);
1376 if (error) {
1377 xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
1378 return error;
1379 }
1380
1372 /* Free the per-AG metadata reservation pool. */ 1381 /* Free the per-AG metadata reservation pool. */
1373 error = xfs_fs_unreserve_ag_blocks(mp); 1382 error = xfs_fs_unreserve_ag_blocks(mp);
1374 if (error) { 1383 if (error) {
diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c
index 68d3ca2c4968..2e9e793a8f9d 100644
--- a/fs/xfs/xfs_symlink.c
+++ b/fs/xfs/xfs_symlink.c
@@ -232,11 +232,6 @@ xfs_symlink(
232 resblks = XFS_SYMLINK_SPACE_RES(mp, link_name->len, fs_blocks); 232 resblks = XFS_SYMLINK_SPACE_RES(mp, link_name->len, fs_blocks);
233 233
234 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_symlink, resblks, 0, 0, &tp); 234 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_symlink, resblks, 0, 0, &tp);
235 if (error == -ENOSPC && fs_blocks == 0) {
236 resblks = 0;
237 error = xfs_trans_alloc(mp, &M_RES(mp)->tr_symlink, 0, 0, 0,
238 &tp);
239 }
240 if (error) 235 if (error)
241 goto out_release_inode; 236 goto out_release_inode;
242 237
@@ -260,14 +255,6 @@ xfs_symlink(
260 goto out_trans_cancel; 255 goto out_trans_cancel;
261 256
262 /* 257 /*
263 * Check for ability to enter directory entry, if no space reserved.
264 */
265 if (!resblks) {
266 error = xfs_dir_canenter(tp, dp, link_name);
267 if (error)
268 goto out_trans_cancel;
269 }
270 /*
271 * Initialize the bmap freelist prior to calling either 258 * Initialize the bmap freelist prior to calling either
272 * bmapi or the directory create code. 259 * bmapi or the directory create code.
273 */ 260 */
@@ -277,7 +264,7 @@ xfs_symlink(
277 * Allocate an inode for the symlink. 264 * Allocate an inode for the symlink.
278 */ 265 */
279 error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (mode & ~S_IFMT), 1, 0, 266 error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (mode & ~S_IFMT), 1, 0,
280 prid, resblks > 0, &ip, NULL); 267 prid, &ip, NULL);
281 if (error) 268 if (error)
282 goto out_trans_cancel; 269 goto out_trans_cancel;
283 270
diff --git a/fs/xfs/xfs_trace.c b/fs/xfs/xfs_trace.c
index 5d95fe348294..35f3546b6af5 100644
--- a/fs/xfs/xfs_trace.c
+++ b/fs/xfs/xfs_trace.c
@@ -24,7 +24,6 @@
24#include "xfs_mount.h" 24#include "xfs_mount.h"
25#include "xfs_defer.h" 25#include "xfs_defer.h"
26#include "xfs_da_format.h" 26#include "xfs_da_format.h"
27#include "xfs_defer.h"
28#include "xfs_inode.h" 27#include "xfs_inode.h"
29#include "xfs_btree.h" 28#include "xfs_btree.h"
30#include "xfs_da_btree.h" 29#include "xfs_da_btree.h"