summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorJason Gunthorpe <jgg@mellanox.com>2019-08-21 13:12:29 -0400
committerJason Gunthorpe <jgg@mellanox.com>2019-08-21 19:58:18 -0400
commitdaa138a58c802e7b4c2fb73f9b85bb082616ef43 (patch)
treebe913e8e3745bb367d2ba371598f447649102cfc /fs
parent6869b7b206595ae0e326f59719090351eb8f4f5d (diff)
parentfba0e448a2c5b297a4ddc1ec4e48f4aa6600a1c9 (diff)
Merge branch 'odp_fixes' into hmm.git
From rdma.git Jason Gunthorpe says: ==================== This is a collection of general cleanups for ODP to clarify some of the flows around umem creation and use of the interval tree. ==================== The branch is based on v5.3-rc5 due to dependencies, and is being taken into hmm.git due to dependencies in the next patches. * odp_fixes: RDMA/mlx5: Use odp instead of mr->umem in pagefault_mr RDMA/mlx5: Use ib_umem_start instead of umem.address RDMA/core: Make invalidate_range a device operation RDMA/odp: Use kvcalloc for the dma_list and page_list RDMA/odp: Check for overflow when computing the umem_odp end RDMA/odp: Provide ib_umem_odp_release() to undo the allocs RDMA/odp: Split creating a umem_odp from ib_umem_get RDMA/odp: Make the three ways to create a umem_odp clear RMDA/odp: Consolidate umem_odp initialization RDMA/odp: Make it clearer when a umem is an implicit ODP umem RDMA/odp: Iterate over the whole rbtree directly RDMA/odp: Use the common interval tree library instead of generic RDMA/mlx5: Fix MR npages calculation for IB_ACCESS_HUGETLB Signed-off-by: Jason Gunthorpe <jgg@mellanox.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/afs/cmservice.c10
-rw-r--r--fs/afs/dir.c89
-rw-r--r--fs/afs/file.c12
-rw-r--r--fs/afs/fsclient.c51
-rw-r--r--fs/afs/vlclient.c11
-rw-r--r--fs/afs/yfsclient.c54
-rw-r--r--fs/block_dev.c86
-rw-r--r--fs/btrfs/Kconfig1
-rw-r--r--fs/btrfs/backref.c2
-rw-r--r--fs/btrfs/ctree.h4
-rw-r--r--fs/btrfs/disk-io.c3
-rw-r--r--fs/btrfs/extent-tree.c71
-rw-r--r--fs/btrfs/inode.c24
-rw-r--r--fs/btrfs/locking.c9
-rw-r--r--fs/btrfs/ordered-data.c11
-rw-r--r--fs/btrfs/send.c77
-rw-r--r--fs/btrfs/transaction.c32
-rw-r--r--fs/btrfs/transaction.h3
-rw-r--r--fs/btrfs/volumes.c23
-rw-r--r--fs/cifs/connect.c1
-rw-r--r--fs/cifs/smb2ops.c39
-rw-r--r--fs/cifs/smb2pdu.c7
-rw-r--r--fs/compat_ioctl.c3
-rw-r--r--fs/coredump.c44
-rw-r--r--fs/dax.c4
-rw-r--r--fs/exec.c2
-rw-r--r--fs/f2fs/file.c63
-rw-r--r--fs/f2fs/gc.c70
-rw-r--r--fs/f2fs/super.c48
-rw-r--r--fs/gfs2/bmap.c179
-rw-r--r--fs/io_uring.c96
-rw-r--r--fs/iomap/Makefile2
-rw-r--r--fs/namespace.c4
-rw-r--r--fs/nfs/delegation.c25
-rw-r--r--fs/nfs/delegation.h2
-rw-r--r--fs/nfs/fscache.c7
-rw-r--r--fs/nfs/fscache.h2
-rw-r--r--fs/nfs/nfs4_fs.h3
-rw-r--r--fs/nfs/nfs4client.c5
-rw-r--r--fs/nfs/nfs4proc.c109
-rw-r--r--fs/nfs/nfs4state.c49
-rw-r--r--fs/nfs/pnfs.c7
-rw-r--r--fs/nfs/super.c1
-rw-r--r--fs/ocfs2/xattr.c3
-rw-r--r--fs/open.c19
-rw-r--r--fs/seq_file.c2
-rw-r--r--fs/super.c5
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c29
-rw-r--r--fs/xfs/libxfs/xfs_da_btree.c19
-rw-r--r--fs/xfs/libxfs/xfs_dir2_node.c3
-rw-r--r--fs/xfs/scrub/dabtree.c6
-rw-r--r--fs/xfs/xfs_itable.c3
-rw-r--r--fs/xfs/xfs_log.c5
53 files changed, 888 insertions, 551 deletions
diff --git a/fs/afs/cmservice.c b/fs/afs/cmservice.c
index 4f1b6f466ff5..b86195e4dc6c 100644
--- a/fs/afs/cmservice.c
+++ b/fs/afs/cmservice.c
@@ -505,18 +505,14 @@ static void SRXAFSCB_ProbeUuid(struct work_struct *work)
505 struct afs_call *call = container_of(work, struct afs_call, work); 505 struct afs_call *call = container_of(work, struct afs_call, work);
506 struct afs_uuid *r = call->request; 506 struct afs_uuid *r = call->request;
507 507
508 struct {
509 __be32 match;
510 } reply;
511
512 _enter(""); 508 _enter("");
513 509
514 if (memcmp(r, &call->net->uuid, sizeof(call->net->uuid)) == 0) 510 if (memcmp(r, &call->net->uuid, sizeof(call->net->uuid)) == 0)
515 reply.match = htonl(0); 511 afs_send_empty_reply(call);
516 else 512 else
517 reply.match = htonl(1); 513 rxrpc_kernel_abort_call(call->net->socket, call->rxcall,
514 1, 1, "K-1");
518 515
519 afs_send_simple_reply(call, &reply, sizeof(reply));
520 afs_put_call(call); 516 afs_put_call(call);
521 _leave(""); 517 _leave("");
522} 518}
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index e640d67274be..81207dc3c997 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -440,7 +440,7 @@ static int afs_dir_iterate_block(struct afs_vnode *dvnode,
440 * iterate through the data blob that lists the contents of an AFS directory 440 * iterate through the data blob that lists the contents of an AFS directory
441 */ 441 */
442static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx, 442static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx,
443 struct key *key) 443 struct key *key, afs_dataversion_t *_dir_version)
444{ 444{
445 struct afs_vnode *dvnode = AFS_FS_I(dir); 445 struct afs_vnode *dvnode = AFS_FS_I(dir);
446 struct afs_xdr_dir_page *dbuf; 446 struct afs_xdr_dir_page *dbuf;
@@ -460,6 +460,7 @@ static int afs_dir_iterate(struct inode *dir, struct dir_context *ctx,
460 req = afs_read_dir(dvnode, key); 460 req = afs_read_dir(dvnode, key);
461 if (IS_ERR(req)) 461 if (IS_ERR(req))
462 return PTR_ERR(req); 462 return PTR_ERR(req);
463 *_dir_version = req->data_version;
463 464
464 /* round the file position up to the next entry boundary */ 465 /* round the file position up to the next entry boundary */
465 ctx->pos += sizeof(union afs_xdr_dirent) - 1; 466 ctx->pos += sizeof(union afs_xdr_dirent) - 1;
@@ -514,7 +515,10 @@ out:
514 */ 515 */
515static int afs_readdir(struct file *file, struct dir_context *ctx) 516static int afs_readdir(struct file *file, struct dir_context *ctx)
516{ 517{
517 return afs_dir_iterate(file_inode(file), ctx, afs_file_key(file)); 518 afs_dataversion_t dir_version;
519
520 return afs_dir_iterate(file_inode(file), ctx, afs_file_key(file),
521 &dir_version);
518} 522}
519 523
520/* 524/*
@@ -555,7 +559,8 @@ static int afs_lookup_one_filldir(struct dir_context *ctx, const char *name,
555 * - just returns the FID the dentry name maps to if found 559 * - just returns the FID the dentry name maps to if found
556 */ 560 */
557static int afs_do_lookup_one(struct inode *dir, struct dentry *dentry, 561static int afs_do_lookup_one(struct inode *dir, struct dentry *dentry,
558 struct afs_fid *fid, struct key *key) 562 struct afs_fid *fid, struct key *key,
563 afs_dataversion_t *_dir_version)
559{ 564{
560 struct afs_super_info *as = dir->i_sb->s_fs_info; 565 struct afs_super_info *as = dir->i_sb->s_fs_info;
561 struct afs_lookup_one_cookie cookie = { 566 struct afs_lookup_one_cookie cookie = {
@@ -568,7 +573,7 @@ static int afs_do_lookup_one(struct inode *dir, struct dentry *dentry,
568 _enter("{%lu},%p{%pd},", dir->i_ino, dentry, dentry); 573 _enter("{%lu},%p{%pd},", dir->i_ino, dentry, dentry);
569 574
570 /* search the directory */ 575 /* search the directory */
571 ret = afs_dir_iterate(dir, &cookie.ctx, key); 576 ret = afs_dir_iterate(dir, &cookie.ctx, key, _dir_version);
572 if (ret < 0) { 577 if (ret < 0) {
573 _leave(" = %d [iter]", ret); 578 _leave(" = %d [iter]", ret);
574 return ret; 579 return ret;
@@ -642,6 +647,7 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry,
642 struct afs_server *server; 647 struct afs_server *server;
643 struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode; 648 struct afs_vnode *dvnode = AFS_FS_I(dir), *vnode;
644 struct inode *inode = NULL, *ti; 649 struct inode *inode = NULL, *ti;
650 afs_dataversion_t data_version = READ_ONCE(dvnode->status.data_version);
645 int ret, i; 651 int ret, i;
646 652
647 _enter("{%lu},%p{%pd},", dir->i_ino, dentry, dentry); 653 _enter("{%lu},%p{%pd},", dir->i_ino, dentry, dentry);
@@ -669,12 +675,14 @@ static struct inode *afs_do_lookup(struct inode *dir, struct dentry *dentry,
669 cookie->fids[i].vid = as->volume->vid; 675 cookie->fids[i].vid = as->volume->vid;
670 676
671 /* search the directory */ 677 /* search the directory */
672 ret = afs_dir_iterate(dir, &cookie->ctx, key); 678 ret = afs_dir_iterate(dir, &cookie->ctx, key, &data_version);
673 if (ret < 0) { 679 if (ret < 0) {
674 inode = ERR_PTR(ret); 680 inode = ERR_PTR(ret);
675 goto out; 681 goto out;
676 } 682 }
677 683
684 dentry->d_fsdata = (void *)(unsigned long)data_version;
685
678 inode = ERR_PTR(-ENOENT); 686 inode = ERR_PTR(-ENOENT);
679 if (!cookie->found) 687 if (!cookie->found)
680 goto out; 688 goto out;
@@ -968,7 +976,8 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
968 struct dentry *parent; 976 struct dentry *parent;
969 struct inode *inode; 977 struct inode *inode;
970 struct key *key; 978 struct key *key;
971 long dir_version, de_version; 979 afs_dataversion_t dir_version;
980 long de_version;
972 int ret; 981 int ret;
973 982
974 if (flags & LOOKUP_RCU) 983 if (flags & LOOKUP_RCU)
@@ -1014,20 +1023,20 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
1014 * on a 32-bit system, we only have 32 bits in the dentry to store the 1023 * on a 32-bit system, we only have 32 bits in the dentry to store the
1015 * version. 1024 * version.
1016 */ 1025 */
1017 dir_version = (long)dir->status.data_version; 1026 dir_version = dir->status.data_version;
1018 de_version = (long)dentry->d_fsdata; 1027 de_version = (long)dentry->d_fsdata;
1019 if (de_version == dir_version) 1028 if (de_version == (long)dir_version)
1020 goto out_valid; 1029 goto out_valid_noupdate;
1021 1030
1022 dir_version = (long)dir->invalid_before; 1031 dir_version = dir->invalid_before;
1023 if (de_version - dir_version >= 0) 1032 if (de_version - (long)dir_version >= 0)
1024 goto out_valid; 1033 goto out_valid;
1025 1034
1026 _debug("dir modified"); 1035 _debug("dir modified");
1027 afs_stat_v(dir, n_reval); 1036 afs_stat_v(dir, n_reval);
1028 1037
1029 /* search the directory for this vnode */ 1038 /* search the directory for this vnode */
1030 ret = afs_do_lookup_one(&dir->vfs_inode, dentry, &fid, key); 1039 ret = afs_do_lookup_one(&dir->vfs_inode, dentry, &fid, key, &dir_version);
1031 switch (ret) { 1040 switch (ret) {
1032 case 0: 1041 case 0:
1033 /* the filename maps to something */ 1042 /* the filename maps to something */
@@ -1080,7 +1089,8 @@ static int afs_d_revalidate(struct dentry *dentry, unsigned int flags)
1080 } 1089 }
1081 1090
1082out_valid: 1091out_valid:
1083 dentry->d_fsdata = (void *)dir_version; 1092 dentry->d_fsdata = (void *)(unsigned long)dir_version;
1093out_valid_noupdate:
1084 dput(parent); 1094 dput(parent);
1085 key_put(key); 1095 key_put(key);
1086 _leave(" = 1 [valid]"); 1096 _leave(" = 1 [valid]");
@@ -1186,6 +1196,20 @@ static void afs_prep_for_new_inode(struct afs_fs_cursor *fc,
1186} 1196}
1187 1197
1188/* 1198/*
1199 * Note that a dentry got changed. We need to set d_fsdata to the data version
1200 * number derived from the result of the operation. It doesn't matter if
1201 * d_fsdata goes backwards as we'll just revalidate.
1202 */
1203static void afs_update_dentry_version(struct afs_fs_cursor *fc,
1204 struct dentry *dentry,
1205 struct afs_status_cb *scb)
1206{
1207 if (fc->ac.error == 0)
1208 dentry->d_fsdata =
1209 (void *)(unsigned long)scb->status.data_version;
1210}
1211
1212/*
1189 * create a directory on an AFS filesystem 1213 * create a directory on an AFS filesystem
1190 */ 1214 */
1191static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode) 1215static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
@@ -1227,6 +1251,7 @@ static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
1227 afs_check_for_remote_deletion(&fc, dvnode); 1251 afs_check_for_remote_deletion(&fc, dvnode);
1228 afs_vnode_commit_status(&fc, dvnode, fc.cb_break, 1252 afs_vnode_commit_status(&fc, dvnode, fc.cb_break,
1229 &data_version, &scb[0]); 1253 &data_version, &scb[0]);
1254 afs_update_dentry_version(&fc, dentry, &scb[0]);
1230 afs_vnode_new_inode(&fc, dentry, &iget_data, &scb[1]); 1255 afs_vnode_new_inode(&fc, dentry, &iget_data, &scb[1]);
1231 ret = afs_end_vnode_operation(&fc); 1256 ret = afs_end_vnode_operation(&fc);
1232 if (ret < 0) 1257 if (ret < 0)
@@ -1319,6 +1344,7 @@ static int afs_rmdir(struct inode *dir, struct dentry *dentry)
1319 1344
1320 afs_vnode_commit_status(&fc, dvnode, fc.cb_break, 1345 afs_vnode_commit_status(&fc, dvnode, fc.cb_break,
1321 &data_version, scb); 1346 &data_version, scb);
1347 afs_update_dentry_version(&fc, dentry, scb);
1322 ret = afs_end_vnode_operation(&fc); 1348 ret = afs_end_vnode_operation(&fc);
1323 if (ret == 0) { 1349 if (ret == 0) {
1324 afs_dir_remove_subdir(dentry); 1350 afs_dir_remove_subdir(dentry);
@@ -1458,6 +1484,7 @@ static int afs_unlink(struct inode *dir, struct dentry *dentry)
1458 &data_version, &scb[0]); 1484 &data_version, &scb[0]);
1459 afs_vnode_commit_status(&fc, vnode, fc.cb_break_2, 1485 afs_vnode_commit_status(&fc, vnode, fc.cb_break_2,
1460 &data_version_2, &scb[1]); 1486 &data_version_2, &scb[1]);
1487 afs_update_dentry_version(&fc, dentry, &scb[0]);
1461 ret = afs_end_vnode_operation(&fc); 1488 ret = afs_end_vnode_operation(&fc);
1462 if (ret == 0 && !(scb[1].have_status || scb[1].have_error)) 1489 if (ret == 0 && !(scb[1].have_status || scb[1].have_error))
1463 ret = afs_dir_remove_link(dvnode, dentry, key); 1490 ret = afs_dir_remove_link(dvnode, dentry, key);
@@ -1526,6 +1553,7 @@ static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
1526 afs_check_for_remote_deletion(&fc, dvnode); 1553 afs_check_for_remote_deletion(&fc, dvnode);
1527 afs_vnode_commit_status(&fc, dvnode, fc.cb_break, 1554 afs_vnode_commit_status(&fc, dvnode, fc.cb_break,
1528 &data_version, &scb[0]); 1555 &data_version, &scb[0]);
1556 afs_update_dentry_version(&fc, dentry, &scb[0]);
1529 afs_vnode_new_inode(&fc, dentry, &iget_data, &scb[1]); 1557 afs_vnode_new_inode(&fc, dentry, &iget_data, &scb[1]);
1530 ret = afs_end_vnode_operation(&fc); 1558 ret = afs_end_vnode_operation(&fc);
1531 if (ret < 0) 1559 if (ret < 0)
@@ -1607,6 +1635,7 @@ static int afs_link(struct dentry *from, struct inode *dir,
1607 afs_vnode_commit_status(&fc, vnode, fc.cb_break_2, 1635 afs_vnode_commit_status(&fc, vnode, fc.cb_break_2,
1608 NULL, &scb[1]); 1636 NULL, &scb[1]);
1609 ihold(&vnode->vfs_inode); 1637 ihold(&vnode->vfs_inode);
1638 afs_update_dentry_version(&fc, dentry, &scb[0]);
1610 d_instantiate(dentry, &vnode->vfs_inode); 1639 d_instantiate(dentry, &vnode->vfs_inode);
1611 1640
1612 mutex_unlock(&vnode->io_lock); 1641 mutex_unlock(&vnode->io_lock);
@@ -1686,6 +1715,7 @@ static int afs_symlink(struct inode *dir, struct dentry *dentry,
1686 afs_check_for_remote_deletion(&fc, dvnode); 1715 afs_check_for_remote_deletion(&fc, dvnode);
1687 afs_vnode_commit_status(&fc, dvnode, fc.cb_break, 1716 afs_vnode_commit_status(&fc, dvnode, fc.cb_break,
1688 &data_version, &scb[0]); 1717 &data_version, &scb[0]);
1718 afs_update_dentry_version(&fc, dentry, &scb[0]);
1689 afs_vnode_new_inode(&fc, dentry, &iget_data, &scb[1]); 1719 afs_vnode_new_inode(&fc, dentry, &iget_data, &scb[1]);
1690 ret = afs_end_vnode_operation(&fc); 1720 ret = afs_end_vnode_operation(&fc);
1691 if (ret < 0) 1721 if (ret < 0)
@@ -1791,6 +1821,17 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
1791 } 1821 }
1792 } 1822 }
1793 1823
1824 /* This bit is potentially nasty as there's a potential race with
1825 * afs_d_revalidate{,_rcu}(). We have to change d_fsdata on the dentry
1826 * to reflect it's new parent's new data_version after the op, but
1827 * d_revalidate may see old_dentry between the op having taken place
1828 * and the version being updated.
1829 *
1830 * So drop the old_dentry for now to make other threads go through
1831 * lookup instead - which we hold a lock against.
1832 */
1833 d_drop(old_dentry);
1834
1794 ret = -ERESTARTSYS; 1835 ret = -ERESTARTSYS;
1795 if (afs_begin_vnode_operation(&fc, orig_dvnode, key, true)) { 1836 if (afs_begin_vnode_operation(&fc, orig_dvnode, key, true)) {
1796 afs_dataversion_t orig_data_version; 1837 afs_dataversion_t orig_data_version;
@@ -1802,9 +1843,9 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
1802 if (orig_dvnode != new_dvnode) { 1843 if (orig_dvnode != new_dvnode) {
1803 if (mutex_lock_interruptible_nested(&new_dvnode->io_lock, 1) < 0) { 1844 if (mutex_lock_interruptible_nested(&new_dvnode->io_lock, 1) < 0) {
1804 afs_end_vnode_operation(&fc); 1845 afs_end_vnode_operation(&fc);
1805 goto error_rehash; 1846 goto error_rehash_old;
1806 } 1847 }
1807 new_data_version = new_dvnode->status.data_version; 1848 new_data_version = new_dvnode->status.data_version + 1;
1808 } else { 1849 } else {
1809 new_data_version = orig_data_version; 1850 new_data_version = orig_data_version;
1810 new_scb = &scb[0]; 1851 new_scb = &scb[0];
@@ -1827,7 +1868,7 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
1827 } 1868 }
1828 ret = afs_end_vnode_operation(&fc); 1869 ret = afs_end_vnode_operation(&fc);
1829 if (ret < 0) 1870 if (ret < 0)
1830 goto error_rehash; 1871 goto error_rehash_old;
1831 } 1872 }
1832 1873
1833 if (ret == 0) { 1874 if (ret == 0) {
@@ -1853,10 +1894,26 @@ static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
1853 drop_nlink(new_inode); 1894 drop_nlink(new_inode);
1854 spin_unlock(&new_inode->i_lock); 1895 spin_unlock(&new_inode->i_lock);
1855 } 1896 }
1897
1898 /* Now we can update d_fsdata on the dentries to reflect their
1899 * new parent's data_version.
1900 *
1901 * Note that if we ever implement RENAME_EXCHANGE, we'll have
1902 * to update both dentries with opposing dir versions.
1903 */
1904 if (new_dvnode != orig_dvnode) {
1905 afs_update_dentry_version(&fc, old_dentry, &scb[1]);
1906 afs_update_dentry_version(&fc, new_dentry, &scb[1]);
1907 } else {
1908 afs_update_dentry_version(&fc, old_dentry, &scb[0]);
1909 afs_update_dentry_version(&fc, new_dentry, &scb[0]);
1910 }
1856 d_move(old_dentry, new_dentry); 1911 d_move(old_dentry, new_dentry);
1857 goto error_tmp; 1912 goto error_tmp;
1858 } 1913 }
1859 1914
1915error_rehash_old:
1916 d_rehash(new_dentry);
1860error_rehash: 1917error_rehash:
1861 if (rehash) 1918 if (rehash)
1862 d_rehash(rehash); 1919 d_rehash(rehash);
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 56b69576274d..dd3c55c9101c 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -191,11 +191,13 @@ void afs_put_read(struct afs_read *req)
191 int i; 191 int i;
192 192
193 if (refcount_dec_and_test(&req->usage)) { 193 if (refcount_dec_and_test(&req->usage)) {
194 for (i = 0; i < req->nr_pages; i++) 194 if (req->pages) {
195 if (req->pages[i]) 195 for (i = 0; i < req->nr_pages; i++)
196 put_page(req->pages[i]); 196 if (req->pages[i])
197 if (req->pages != req->array) 197 put_page(req->pages[i]);
198 kfree(req->pages); 198 if (req->pages != req->array)
199 kfree(req->pages);
200 }
199 kfree(req); 201 kfree(req);
200 } 202 }
201} 203}
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 1ce73e014139..114f281f3687 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -339,8 +339,9 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
339 call->tmp_u = htonl(0); 339 call->tmp_u = htonl(0);
340 afs_extract_to_tmp(call); 340 afs_extract_to_tmp(call);
341 } 341 }
342 /* Fall through */
342 343
343 /* Fall through - and extract the returned data length */ 344 /* extract the returned data length */
344 case 1: 345 case 1:
345 _debug("extract data length"); 346 _debug("extract data length");
346 ret = afs_extract_data(call, true); 347 ret = afs_extract_data(call, true);
@@ -366,8 +367,9 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
366 call->bvec[0].bv_page = req->pages[req->index]; 367 call->bvec[0].bv_page = req->pages[req->index];
367 iov_iter_bvec(&call->iter, READ, call->bvec, 1, size); 368 iov_iter_bvec(&call->iter, READ, call->bvec, 1, size);
368 ASSERTCMP(size, <=, PAGE_SIZE); 369 ASSERTCMP(size, <=, PAGE_SIZE);
370 /* Fall through */
369 371
370 /* Fall through - and extract the returned data */ 372 /* extract the returned data */
371 case 2: 373 case 2:
372 _debug("extract data %zu/%llu", 374 _debug("extract data %zu/%llu",
373 iov_iter_count(&call->iter), req->remain); 375 iov_iter_count(&call->iter), req->remain);
@@ -394,8 +396,8 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
394 /* Discard any excess data the server gave us */ 396 /* Discard any excess data the server gave us */
395 iov_iter_discard(&call->iter, READ, req->actual_len - req->len); 397 iov_iter_discard(&call->iter, READ, req->actual_len - req->len);
396 call->unmarshall = 3; 398 call->unmarshall = 3;
397
398 /* Fall through */ 399 /* Fall through */
400
399 case 3: 401 case 3:
400 _debug("extract discard %zu/%llu", 402 _debug("extract discard %zu/%llu",
401 iov_iter_count(&call->iter), req->actual_len - req->len); 403 iov_iter_count(&call->iter), req->actual_len - req->len);
@@ -407,8 +409,9 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
407 no_more_data: 409 no_more_data:
408 call->unmarshall = 4; 410 call->unmarshall = 4;
409 afs_extract_to_buf(call, (21 + 3 + 6) * 4); 411 afs_extract_to_buf(call, (21 + 3 + 6) * 4);
412 /* Fall through */
410 413
411 /* Fall through - and extract the metadata */ 414 /* extract the metadata */
412 case 4: 415 case 4:
413 ret = afs_extract_data(call, false); 416 ret = afs_extract_data(call, false);
414 if (ret < 0) 417 if (ret < 0)
@@ -1471,8 +1474,9 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call)
1471 case 0: 1474 case 0:
1472 call->unmarshall++; 1475 call->unmarshall++;
1473 afs_extract_to_buf(call, 12 * 4); 1476 afs_extract_to_buf(call, 12 * 4);
1477 /* Fall through */
1474 1478
1475 /* Fall through - and extract the returned status record */ 1479 /* extract the returned status record */
1476 case 1: 1480 case 1:
1477 _debug("extract status"); 1481 _debug("extract status");
1478 ret = afs_extract_data(call, true); 1482 ret = afs_extract_data(call, true);
@@ -1483,8 +1487,9 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call)
1483 xdr_decode_AFSFetchVolumeStatus(&bp, call->out_volstatus); 1487 xdr_decode_AFSFetchVolumeStatus(&bp, call->out_volstatus);
1484 call->unmarshall++; 1488 call->unmarshall++;
1485 afs_extract_to_tmp(call); 1489 afs_extract_to_tmp(call);
1490 /* Fall through */
1486 1491
1487 /* Fall through - and extract the volume name length */ 1492 /* extract the volume name length */
1488 case 2: 1493 case 2:
1489 ret = afs_extract_data(call, true); 1494 ret = afs_extract_data(call, true);
1490 if (ret < 0) 1495 if (ret < 0)
@@ -1498,8 +1503,9 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call)
1498 size = (call->count + 3) & ~3; /* It's padded */ 1503 size = (call->count + 3) & ~3; /* It's padded */
1499 afs_extract_to_buf(call, size); 1504 afs_extract_to_buf(call, size);
1500 call->unmarshall++; 1505 call->unmarshall++;
1506 /* Fall through */
1501 1507
1502 /* Fall through - and extract the volume name */ 1508 /* extract the volume name */
1503 case 3: 1509 case 3:
1504 _debug("extract volname"); 1510 _debug("extract volname");
1505 ret = afs_extract_data(call, true); 1511 ret = afs_extract_data(call, true);
@@ -1511,8 +1517,9 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call)
1511 _debug("volname '%s'", p); 1517 _debug("volname '%s'", p);
1512 afs_extract_to_tmp(call); 1518 afs_extract_to_tmp(call);
1513 call->unmarshall++; 1519 call->unmarshall++;
1520 /* Fall through */
1514 1521
1515 /* Fall through - and extract the offline message length */ 1522 /* extract the offline message length */
1516 case 4: 1523 case 4:
1517 ret = afs_extract_data(call, true); 1524 ret = afs_extract_data(call, true);
1518 if (ret < 0) 1525 if (ret < 0)
@@ -1526,8 +1533,9 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call)
1526 size = (call->count + 3) & ~3; /* It's padded */ 1533 size = (call->count + 3) & ~3; /* It's padded */
1527 afs_extract_to_buf(call, size); 1534 afs_extract_to_buf(call, size);
1528 call->unmarshall++; 1535 call->unmarshall++;
1536 /* Fall through */
1529 1537
1530 /* Fall through - and extract the offline message */ 1538 /* extract the offline message */
1531 case 5: 1539 case 5:
1532 _debug("extract offline"); 1540 _debug("extract offline");
1533 ret = afs_extract_data(call, true); 1541 ret = afs_extract_data(call, true);
@@ -1540,8 +1548,9 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call)
1540 1548
1541 afs_extract_to_tmp(call); 1549 afs_extract_to_tmp(call);
1542 call->unmarshall++; 1550 call->unmarshall++;
1551 /* Fall through */
1543 1552
1544 /* Fall through - and extract the message of the day length */ 1553 /* extract the message of the day length */
1545 case 6: 1554 case 6:
1546 ret = afs_extract_data(call, true); 1555 ret = afs_extract_data(call, true);
1547 if (ret < 0) 1556 if (ret < 0)
@@ -1555,8 +1564,9 @@ static int afs_deliver_fs_get_volume_status(struct afs_call *call)
1555 size = (call->count + 3) & ~3; /* It's padded */ 1564 size = (call->count + 3) & ~3; /* It's padded */
1556 afs_extract_to_buf(call, size); 1565 afs_extract_to_buf(call, size);
1557 call->unmarshall++; 1566 call->unmarshall++;
1567 /* Fall through */
1558 1568
1559 /* Fall through - and extract the message of the day */ 1569 /* extract the message of the day */
1560 case 7: 1570 case 7:
1561 _debug("extract motd"); 1571 _debug("extract motd");
1562 ret = afs_extract_data(call, false); 1572 ret = afs_extract_data(call, false);
@@ -1850,8 +1860,9 @@ static int afs_deliver_fs_get_capabilities(struct afs_call *call)
1850 case 0: 1860 case 0:
1851 afs_extract_to_tmp(call); 1861 afs_extract_to_tmp(call);
1852 call->unmarshall++; 1862 call->unmarshall++;
1863 /* Fall through */
1853 1864
1854 /* Fall through - and extract the capabilities word count */ 1865 /* Extract the capabilities word count */
1855 case 1: 1866 case 1:
1856 ret = afs_extract_data(call, true); 1867 ret = afs_extract_data(call, true);
1857 if (ret < 0) 1868 if (ret < 0)
@@ -1863,8 +1874,9 @@ static int afs_deliver_fs_get_capabilities(struct afs_call *call)
1863 call->count2 = count; 1874 call->count2 = count;
1864 iov_iter_discard(&call->iter, READ, count * sizeof(__be32)); 1875 iov_iter_discard(&call->iter, READ, count * sizeof(__be32));
1865 call->unmarshall++; 1876 call->unmarshall++;
1877 /* Fall through */
1866 1878
1867 /* Fall through - and extract capabilities words */ 1879 /* Extract capabilities words */
1868 case 2: 1880 case 2:
1869 ret = afs_extract_data(call, false); 1881 ret = afs_extract_data(call, false);
1870 if (ret < 0) 1882 if (ret < 0)
@@ -2020,9 +2032,9 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call)
2020 case 0: 2032 case 0:
2021 afs_extract_to_tmp(call); 2033 afs_extract_to_tmp(call);
2022 call->unmarshall++; 2034 call->unmarshall++;
2035 /* Fall through */
2023 2036
2024 /* Extract the file status count and array in two steps */ 2037 /* Extract the file status count and array in two steps */
2025 /* Fall through */
2026 case 1: 2038 case 1:
2027 _debug("extract status count"); 2039 _debug("extract status count");
2028 ret = afs_extract_data(call, true); 2040 ret = afs_extract_data(call, true);
@@ -2039,8 +2051,8 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call)
2039 call->unmarshall++; 2051 call->unmarshall++;
2040 more_counts: 2052 more_counts:
2041 afs_extract_to_buf(call, 21 * sizeof(__be32)); 2053 afs_extract_to_buf(call, 21 * sizeof(__be32));
2042
2043 /* Fall through */ 2054 /* Fall through */
2055
2044 case 2: 2056 case 2:
2045 _debug("extract status array %u", call->count); 2057 _debug("extract status array %u", call->count);
2046 ret = afs_extract_data(call, true); 2058 ret = afs_extract_data(call, true);
@@ -2060,9 +2072,9 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call)
2060 call->count = 0; 2072 call->count = 0;
2061 call->unmarshall++; 2073 call->unmarshall++;
2062 afs_extract_to_tmp(call); 2074 afs_extract_to_tmp(call);
2075 /* Fall through */
2063 2076
2064 /* Extract the callback count and array in two steps */ 2077 /* Extract the callback count and array in two steps */
2065 /* Fall through */
2066 case 3: 2078 case 3:
2067 _debug("extract CB count"); 2079 _debug("extract CB count");
2068 ret = afs_extract_data(call, true); 2080 ret = afs_extract_data(call, true);
@@ -2078,8 +2090,8 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call)
2078 call->unmarshall++; 2090 call->unmarshall++;
2079 more_cbs: 2091 more_cbs:
2080 afs_extract_to_buf(call, 3 * sizeof(__be32)); 2092 afs_extract_to_buf(call, 3 * sizeof(__be32));
2081
2082 /* Fall through */ 2093 /* Fall through */
2094
2083 case 4: 2095 case 4:
2084 _debug("extract CB array"); 2096 _debug("extract CB array");
2085 ret = afs_extract_data(call, true); 2097 ret = afs_extract_data(call, true);
@@ -2096,8 +2108,8 @@ static int afs_deliver_fs_inline_bulk_status(struct afs_call *call)
2096 2108
2097 afs_extract_to_buf(call, 6 * sizeof(__be32)); 2109 afs_extract_to_buf(call, 6 * sizeof(__be32));
2098 call->unmarshall++; 2110 call->unmarshall++;
2099
2100 /* Fall through */ 2111 /* Fall through */
2112
2101 case 5: 2113 case 5:
2102 ret = afs_extract_data(call, false); 2114 ret = afs_extract_data(call, false);
2103 if (ret < 0) 2115 if (ret < 0)
@@ -2193,6 +2205,7 @@ static int afs_deliver_fs_fetch_acl(struct afs_call *call)
2193 case 0: 2205 case 0:
2194 afs_extract_to_tmp(call); 2206 afs_extract_to_tmp(call);
2195 call->unmarshall++; 2207 call->unmarshall++;
2208 /* Fall through */
2196 2209
2197 /* extract the returned data length */ 2210 /* extract the returned data length */
2198 case 1: 2211 case 1:
@@ -2210,6 +2223,7 @@ static int afs_deliver_fs_fetch_acl(struct afs_call *call)
2210 acl->size = call->count2; 2223 acl->size = call->count2;
2211 afs_extract_begin(call, acl->data, size); 2224 afs_extract_begin(call, acl->data, size);
2212 call->unmarshall++; 2225 call->unmarshall++;
2226 /* Fall through */
2213 2227
2214 /* extract the returned data */ 2228 /* extract the returned data */
2215 case 2: 2229 case 2:
@@ -2219,6 +2233,7 @@ static int afs_deliver_fs_fetch_acl(struct afs_call *call)
2219 2233
2220 afs_extract_to_buf(call, (21 + 6) * 4); 2234 afs_extract_to_buf(call, (21 + 6) * 4);
2221 call->unmarshall++; 2235 call->unmarshall++;
2236 /* Fall through */
2222 2237
2223 /* extract the metadata */ 2238 /* extract the metadata */
2224 case 3: 2239 case 3:
diff --git a/fs/afs/vlclient.c b/fs/afs/vlclient.c
index d7e0fd3c00df..cfb0ac4bd039 100644
--- a/fs/afs/vlclient.c
+++ b/fs/afs/vlclient.c
@@ -56,23 +56,24 @@ static int afs_deliver_vl_get_entry_by_name_u(struct afs_call *call)
56 struct afs_uuid__xdr *xdr; 56 struct afs_uuid__xdr *xdr;
57 struct afs_uuid *uuid; 57 struct afs_uuid *uuid;
58 int j; 58 int j;
59 int n = entry->nr_servers;
59 60
60 tmp = ntohl(uvldb->serverFlags[i]); 61 tmp = ntohl(uvldb->serverFlags[i]);
61 if (tmp & AFS_VLSF_DONTUSE || 62 if (tmp & AFS_VLSF_DONTUSE ||
62 (new_only && !(tmp & AFS_VLSF_NEWREPSITE))) 63 (new_only && !(tmp & AFS_VLSF_NEWREPSITE)))
63 continue; 64 continue;
64 if (tmp & AFS_VLSF_RWVOL) { 65 if (tmp & AFS_VLSF_RWVOL) {
65 entry->fs_mask[i] |= AFS_VOL_VTM_RW; 66 entry->fs_mask[n] |= AFS_VOL_VTM_RW;
66 if (vlflags & AFS_VLF_BACKEXISTS) 67 if (vlflags & AFS_VLF_BACKEXISTS)
67 entry->fs_mask[i] |= AFS_VOL_VTM_BAK; 68 entry->fs_mask[n] |= AFS_VOL_VTM_BAK;
68 } 69 }
69 if (tmp & AFS_VLSF_ROVOL) 70 if (tmp & AFS_VLSF_ROVOL)
70 entry->fs_mask[i] |= AFS_VOL_VTM_RO; 71 entry->fs_mask[n] |= AFS_VOL_VTM_RO;
71 if (!entry->fs_mask[i]) 72 if (!entry->fs_mask[n])
72 continue; 73 continue;
73 74
74 xdr = &uvldb->serverNumber[i]; 75 xdr = &uvldb->serverNumber[i];
75 uuid = (struct afs_uuid *)&entry->fs_server[i]; 76 uuid = (struct afs_uuid *)&entry->fs_server[n];
76 uuid->time_low = xdr->time_low; 77 uuid->time_low = xdr->time_low;
77 uuid->time_mid = htons(ntohl(xdr->time_mid)); 78 uuid->time_mid = htons(ntohl(xdr->time_mid));
78 uuid->time_hi_and_version = htons(ntohl(xdr->time_hi_and_version)); 79 uuid->time_hi_and_version = htons(ntohl(xdr->time_hi_and_version));
diff --git a/fs/afs/yfsclient.c b/fs/afs/yfsclient.c
index 18722aaeda33..2575503170fc 100644
--- a/fs/afs/yfsclient.c
+++ b/fs/afs/yfsclient.c
@@ -450,8 +450,9 @@ static int yfs_deliver_fs_fetch_data64(struct afs_call *call)
450 req->offset = req->pos & (PAGE_SIZE - 1); 450 req->offset = req->pos & (PAGE_SIZE - 1);
451 afs_extract_to_tmp64(call); 451 afs_extract_to_tmp64(call);
452 call->unmarshall++; 452 call->unmarshall++;
453 /* Fall through */
453 454
454 /* Fall through - and extract the returned data length */ 455 /* extract the returned data length */
455 case 1: 456 case 1:
456 _debug("extract data length"); 457 _debug("extract data length");
457 ret = afs_extract_data(call, true); 458 ret = afs_extract_data(call, true);
@@ -477,8 +478,9 @@ static int yfs_deliver_fs_fetch_data64(struct afs_call *call)
477 call->bvec[0].bv_page = req->pages[req->index]; 478 call->bvec[0].bv_page = req->pages[req->index];
478 iov_iter_bvec(&call->iter, READ, call->bvec, 1, size); 479 iov_iter_bvec(&call->iter, READ, call->bvec, 1, size);
479 ASSERTCMP(size, <=, PAGE_SIZE); 480 ASSERTCMP(size, <=, PAGE_SIZE);
481 /* Fall through */
480 482
481 /* Fall through - and extract the returned data */ 483 /* extract the returned data */
482 case 2: 484 case 2:
483 _debug("extract data %zu/%llu", 485 _debug("extract data %zu/%llu",
484 iov_iter_count(&call->iter), req->remain); 486 iov_iter_count(&call->iter), req->remain);
@@ -505,8 +507,8 @@ static int yfs_deliver_fs_fetch_data64(struct afs_call *call)
505 /* Discard any excess data the server gave us */ 507 /* Discard any excess data the server gave us */
506 iov_iter_discard(&call->iter, READ, req->actual_len - req->len); 508 iov_iter_discard(&call->iter, READ, req->actual_len - req->len);
507 call->unmarshall = 3; 509 call->unmarshall = 3;
508
509 /* Fall through */ 510 /* Fall through */
511
510 case 3: 512 case 3:
511 _debug("extract discard %zu/%llu", 513 _debug("extract discard %zu/%llu",
512 iov_iter_count(&call->iter), req->actual_len - req->len); 514 iov_iter_count(&call->iter), req->actual_len - req->len);
@@ -521,8 +523,9 @@ static int yfs_deliver_fs_fetch_data64(struct afs_call *call)
521 sizeof(struct yfs_xdr_YFSFetchStatus) + 523 sizeof(struct yfs_xdr_YFSFetchStatus) +
522 sizeof(struct yfs_xdr_YFSCallBack) + 524 sizeof(struct yfs_xdr_YFSCallBack) +
523 sizeof(struct yfs_xdr_YFSVolSync)); 525 sizeof(struct yfs_xdr_YFSVolSync));
526 /* Fall through */
524 527
525 /* Fall through - and extract the metadata */ 528 /* extract the metadata */
526 case 4: 529 case 4:
527 ret = afs_extract_data(call, false); 530 ret = afs_extract_data(call, false);
528 if (ret < 0) 531 if (ret < 0)
@@ -539,8 +542,8 @@ static int yfs_deliver_fs_fetch_data64(struct afs_call *call)
539 req->file_size = call->out_scb->status.size; 542 req->file_size = call->out_scb->status.size;
540 543
541 call->unmarshall++; 544 call->unmarshall++;
542
543 /* Fall through */ 545 /* Fall through */
546
544 case 5: 547 case 5:
545 break; 548 break;
546 } 549 }
@@ -1429,8 +1432,9 @@ static int yfs_deliver_fs_get_volume_status(struct afs_call *call)
1429 case 0: 1432 case 0:
1430 call->unmarshall++; 1433 call->unmarshall++;
1431 afs_extract_to_buf(call, sizeof(struct yfs_xdr_YFSFetchVolumeStatus)); 1434 afs_extract_to_buf(call, sizeof(struct yfs_xdr_YFSFetchVolumeStatus));
1435 /* Fall through */
1432 1436
1433 /* Fall through - and extract the returned status record */ 1437 /* extract the returned status record */
1434 case 1: 1438 case 1:
1435 _debug("extract status"); 1439 _debug("extract status");
1436 ret = afs_extract_data(call, true); 1440 ret = afs_extract_data(call, true);
@@ -1441,8 +1445,9 @@ static int yfs_deliver_fs_get_volume_status(struct afs_call *call)
1441 xdr_decode_YFSFetchVolumeStatus(&bp, call->out_volstatus); 1445 xdr_decode_YFSFetchVolumeStatus(&bp, call->out_volstatus);
1442 call->unmarshall++; 1446 call->unmarshall++;
1443 afs_extract_to_tmp(call); 1447 afs_extract_to_tmp(call);
1448 /* Fall through */
1444 1449
1445 /* Fall through - and extract the volume name length */ 1450 /* extract the volume name length */
1446 case 2: 1451 case 2:
1447 ret = afs_extract_data(call, true); 1452 ret = afs_extract_data(call, true);
1448 if (ret < 0) 1453 if (ret < 0)
@@ -1456,8 +1461,9 @@ static int yfs_deliver_fs_get_volume_status(struct afs_call *call)
1456 size = (call->count + 3) & ~3; /* It's padded */ 1461 size = (call->count + 3) & ~3; /* It's padded */
1457 afs_extract_to_buf(call, size); 1462 afs_extract_to_buf(call, size);
1458 call->unmarshall++; 1463 call->unmarshall++;
1464 /* Fall through */
1459 1465
1460 /* Fall through - and extract the volume name */ 1466 /* extract the volume name */
1461 case 3: 1467 case 3:
1462 _debug("extract volname"); 1468 _debug("extract volname");
1463 ret = afs_extract_data(call, true); 1469 ret = afs_extract_data(call, true);
@@ -1469,8 +1475,9 @@ static int yfs_deliver_fs_get_volume_status(struct afs_call *call)
1469 _debug("volname '%s'", p); 1475 _debug("volname '%s'", p);
1470 afs_extract_to_tmp(call); 1476 afs_extract_to_tmp(call);
1471 call->unmarshall++; 1477 call->unmarshall++;
1478 /* Fall through */
1472 1479
1473 /* Fall through - and extract the offline message length */ 1480 /* extract the offline message length */
1474 case 4: 1481 case 4:
1475 ret = afs_extract_data(call, true); 1482 ret = afs_extract_data(call, true);
1476 if (ret < 0) 1483 if (ret < 0)
@@ -1484,8 +1491,9 @@ static int yfs_deliver_fs_get_volume_status(struct afs_call *call)
1484 size = (call->count + 3) & ~3; /* It's padded */ 1491 size = (call->count + 3) & ~3; /* It's padded */
1485 afs_extract_to_buf(call, size); 1492 afs_extract_to_buf(call, size);
1486 call->unmarshall++; 1493 call->unmarshall++;
1494 /* Fall through */
1487 1495
1488 /* Fall through - and extract the offline message */ 1496 /* extract the offline message */
1489 case 5: 1497 case 5:
1490 _debug("extract offline"); 1498 _debug("extract offline");
1491 ret = afs_extract_data(call, true); 1499 ret = afs_extract_data(call, true);
@@ -1498,8 +1506,9 @@ static int yfs_deliver_fs_get_volume_status(struct afs_call *call)
1498 1506
1499 afs_extract_to_tmp(call); 1507 afs_extract_to_tmp(call);
1500 call->unmarshall++; 1508 call->unmarshall++;
1509 /* Fall through */
1501 1510
1502 /* Fall through - and extract the message of the day length */ 1511 /* extract the message of the day length */
1503 case 6: 1512 case 6:
1504 ret = afs_extract_data(call, true); 1513 ret = afs_extract_data(call, true);
1505 if (ret < 0) 1514 if (ret < 0)
@@ -1513,8 +1522,9 @@ static int yfs_deliver_fs_get_volume_status(struct afs_call *call)
1513 size = (call->count + 3) & ~3; /* It's padded */ 1522 size = (call->count + 3) & ~3; /* It's padded */
1514 afs_extract_to_buf(call, size); 1523 afs_extract_to_buf(call, size);
1515 call->unmarshall++; 1524 call->unmarshall++;
1525 /* Fall through */
1516 1526
1517 /* Fall through - and extract the message of the day */ 1527 /* extract the message of the day */
1518 case 7: 1528 case 7:
1519 _debug("extract motd"); 1529 _debug("extract motd");
1520 ret = afs_extract_data(call, false); 1530 ret = afs_extract_data(call, false);
@@ -1526,8 +1536,8 @@ static int yfs_deliver_fs_get_volume_status(struct afs_call *call)
1526 _debug("motd '%s'", p); 1536 _debug("motd '%s'", p);
1527 1537
1528 call->unmarshall++; 1538 call->unmarshall++;
1529
1530 /* Fall through */ 1539 /* Fall through */
1540
1531 case 8: 1541 case 8:
1532 break; 1542 break;
1533 } 1543 }
@@ -1805,9 +1815,9 @@ static int yfs_deliver_fs_inline_bulk_status(struct afs_call *call)
1805 case 0: 1815 case 0:
1806 afs_extract_to_tmp(call); 1816 afs_extract_to_tmp(call);
1807 call->unmarshall++; 1817 call->unmarshall++;
1818 /* Fall through */
1808 1819
1809 /* Extract the file status count and array in two steps */ 1820 /* Extract the file status count and array in two steps */
1810 /* Fall through */
1811 case 1: 1821 case 1:
1812 _debug("extract status count"); 1822 _debug("extract status count");
1813 ret = afs_extract_data(call, true); 1823 ret = afs_extract_data(call, true);
@@ -1824,8 +1834,8 @@ static int yfs_deliver_fs_inline_bulk_status(struct afs_call *call)
1824 call->unmarshall++; 1834 call->unmarshall++;
1825 more_counts: 1835 more_counts:
1826 afs_extract_to_buf(call, sizeof(struct yfs_xdr_YFSFetchStatus)); 1836 afs_extract_to_buf(call, sizeof(struct yfs_xdr_YFSFetchStatus));
1827
1828 /* Fall through */ 1837 /* Fall through */
1838
1829 case 2: 1839 case 2:
1830 _debug("extract status array %u", call->count); 1840 _debug("extract status array %u", call->count);
1831 ret = afs_extract_data(call, true); 1841 ret = afs_extract_data(call, true);
@@ -1845,9 +1855,9 @@ static int yfs_deliver_fs_inline_bulk_status(struct afs_call *call)
1845 call->count = 0; 1855 call->count = 0;
1846 call->unmarshall++; 1856 call->unmarshall++;
1847 afs_extract_to_tmp(call); 1857 afs_extract_to_tmp(call);
1858 /* Fall through */
1848 1859
1849 /* Extract the callback count and array in two steps */ 1860 /* Extract the callback count and array in two steps */
1850 /* Fall through */
1851 case 3: 1861 case 3:
1852 _debug("extract CB count"); 1862 _debug("extract CB count");
1853 ret = afs_extract_data(call, true); 1863 ret = afs_extract_data(call, true);
@@ -1863,8 +1873,8 @@ static int yfs_deliver_fs_inline_bulk_status(struct afs_call *call)
1863 call->unmarshall++; 1873 call->unmarshall++;
1864 more_cbs: 1874 more_cbs:
1865 afs_extract_to_buf(call, sizeof(struct yfs_xdr_YFSCallBack)); 1875 afs_extract_to_buf(call, sizeof(struct yfs_xdr_YFSCallBack));
1866
1867 /* Fall through */ 1876 /* Fall through */
1877
1868 case 4: 1878 case 4:
1869 _debug("extract CB array"); 1879 _debug("extract CB array");
1870 ret = afs_extract_data(call, true); 1880 ret = afs_extract_data(call, true);
@@ -1881,8 +1891,8 @@ static int yfs_deliver_fs_inline_bulk_status(struct afs_call *call)
1881 1891
1882 afs_extract_to_buf(call, sizeof(struct yfs_xdr_YFSVolSync)); 1892 afs_extract_to_buf(call, sizeof(struct yfs_xdr_YFSVolSync));
1883 call->unmarshall++; 1893 call->unmarshall++;
1884
1885 /* Fall through */ 1894 /* Fall through */
1895
1886 case 5: 1896 case 5:
1887 ret = afs_extract_data(call, false); 1897 ret = afs_extract_data(call, false);
1888 if (ret < 0) 1898 if (ret < 0)
@@ -1892,8 +1902,8 @@ static int yfs_deliver_fs_inline_bulk_status(struct afs_call *call)
1892 xdr_decode_YFSVolSync(&bp, call->out_volsync); 1902 xdr_decode_YFSVolSync(&bp, call->out_volsync);
1893 1903
1894 call->unmarshall++; 1904 call->unmarshall++;
1895
1896 /* Fall through */ 1905 /* Fall through */
1906
1897 case 6: 1907 case 6:
1898 break; 1908 break;
1899 } 1909 }
@@ -1978,6 +1988,7 @@ static int yfs_deliver_fs_fetch_opaque_acl(struct afs_call *call)
1978 case 0: 1988 case 0:
1979 afs_extract_to_tmp(call); 1989 afs_extract_to_tmp(call);
1980 call->unmarshall++; 1990 call->unmarshall++;
1991 /* Fall through */
1981 1992
1982 /* Extract the file ACL length */ 1993 /* Extract the file ACL length */
1983 case 1: 1994 case 1:
@@ -1999,6 +2010,7 @@ static int yfs_deliver_fs_fetch_opaque_acl(struct afs_call *call)
1999 iov_iter_discard(&call->iter, READ, size); 2010 iov_iter_discard(&call->iter, READ, size);
2000 } 2011 }
2001 call->unmarshall++; 2012 call->unmarshall++;
2013 /* Fall through */
2002 2014
2003 /* Extract the file ACL */ 2015 /* Extract the file ACL */
2004 case 2: 2016 case 2:
@@ -2008,6 +2020,7 @@ static int yfs_deliver_fs_fetch_opaque_acl(struct afs_call *call)
2008 2020
2009 afs_extract_to_tmp(call); 2021 afs_extract_to_tmp(call);
2010 call->unmarshall++; 2022 call->unmarshall++;
2023 /* Fall through */
2011 2024
2012 /* Extract the volume ACL length */ 2025 /* Extract the volume ACL length */
2013 case 3: 2026 case 3:
@@ -2029,6 +2042,7 @@ static int yfs_deliver_fs_fetch_opaque_acl(struct afs_call *call)
2029 iov_iter_discard(&call->iter, READ, size); 2042 iov_iter_discard(&call->iter, READ, size);
2030 } 2043 }
2031 call->unmarshall++; 2044 call->unmarshall++;
2045 /* Fall through */
2032 2046
2033 /* Extract the volume ACL */ 2047 /* Extract the volume ACL */
2034 case 4: 2048 case 4:
@@ -2041,6 +2055,7 @@ static int yfs_deliver_fs_fetch_opaque_acl(struct afs_call *call)
2041 sizeof(struct yfs_xdr_YFSFetchStatus) + 2055 sizeof(struct yfs_xdr_YFSFetchStatus) +
2042 sizeof(struct yfs_xdr_YFSVolSync)); 2056 sizeof(struct yfs_xdr_YFSVolSync));
2043 call->unmarshall++; 2057 call->unmarshall++;
2058 /* Fall through */
2044 2059
2045 /* extract the metadata */ 2060 /* extract the metadata */
2046 case 5: 2061 case 5:
@@ -2057,6 +2072,7 @@ static int yfs_deliver_fs_fetch_opaque_acl(struct afs_call *call)
2057 xdr_decode_YFSVolSync(&bp, call->out_volsync); 2072 xdr_decode_YFSVolSync(&bp, call->out_volsync);
2058 2073
2059 call->unmarshall++; 2074 call->unmarshall++;
2075 /* Fall through */
2060 2076
2061 case 6: 2077 case 6:
2062 break; 2078 break;
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 4707dfff991b..677cb364d33f 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1139,8 +1139,7 @@ static struct gendisk *bdev_get_gendisk(struct block_device *bdev, int *partno)
1139 * Pointer to the block device containing @bdev on success, ERR_PTR() 1139 * Pointer to the block device containing @bdev on success, ERR_PTR()
1140 * value on failure. 1140 * value on failure.
1141 */ 1141 */
1142static struct block_device *bd_start_claiming(struct block_device *bdev, 1142struct block_device *bd_start_claiming(struct block_device *bdev, void *holder)
1143 void *holder)
1144{ 1143{
1145 struct gendisk *disk; 1144 struct gendisk *disk;
1146 struct block_device *whole; 1145 struct block_device *whole;
@@ -1187,6 +1186,62 @@ static struct block_device *bd_start_claiming(struct block_device *bdev,
1187 return ERR_PTR(err); 1186 return ERR_PTR(err);
1188 } 1187 }
1189} 1188}
1189EXPORT_SYMBOL(bd_start_claiming);
1190
1191static void bd_clear_claiming(struct block_device *whole, void *holder)
1192{
1193 lockdep_assert_held(&bdev_lock);
1194 /* tell others that we're done */
1195 BUG_ON(whole->bd_claiming != holder);
1196 whole->bd_claiming = NULL;
1197 wake_up_bit(&whole->bd_claiming, 0);
1198}
1199
1200/**
1201 * bd_finish_claiming - finish claiming of a block device
1202 * @bdev: block device of interest
1203 * @whole: whole block device (returned from bd_start_claiming())
1204 * @holder: holder that has claimed @bdev
1205 *
1206 * Finish exclusive open of a block device. Mark the device as exlusively
1207 * open by the holder and wake up all waiters for exclusive open to finish.
1208 */
1209void bd_finish_claiming(struct block_device *bdev, struct block_device *whole,
1210 void *holder)
1211{
1212 spin_lock(&bdev_lock);
1213 BUG_ON(!bd_may_claim(bdev, whole, holder));
1214 /*
1215 * Note that for a whole device bd_holders will be incremented twice,
1216 * and bd_holder will be set to bd_may_claim before being set to holder
1217 */
1218 whole->bd_holders++;
1219 whole->bd_holder = bd_may_claim;
1220 bdev->bd_holders++;
1221 bdev->bd_holder = holder;
1222 bd_clear_claiming(whole, holder);
1223 spin_unlock(&bdev_lock);
1224}
1225EXPORT_SYMBOL(bd_finish_claiming);
1226
1227/**
1228 * bd_abort_claiming - abort claiming of a block device
1229 * @bdev: block device of interest
1230 * @whole: whole block device (returned from bd_start_claiming())
1231 * @holder: holder that has claimed @bdev
1232 *
1233 * Abort claiming of a block device when the exclusive open failed. This can be
1234 * also used when exclusive open is not actually desired and we just needed
1235 * to block other exclusive openers for a while.
1236 */
1237void bd_abort_claiming(struct block_device *bdev, struct block_device *whole,
1238 void *holder)
1239{
1240 spin_lock(&bdev_lock);
1241 bd_clear_claiming(whole, holder);
1242 spin_unlock(&bdev_lock);
1243}
1244EXPORT_SYMBOL(bd_abort_claiming);
1190 1245
1191#ifdef CONFIG_SYSFS 1246#ifdef CONFIG_SYSFS
1192struct bd_holder_disk { 1247struct bd_holder_disk {
@@ -1656,29 +1711,10 @@ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder)
1656 1711
1657 /* finish claiming */ 1712 /* finish claiming */
1658 mutex_lock(&bdev->bd_mutex); 1713 mutex_lock(&bdev->bd_mutex);
1659 spin_lock(&bdev_lock); 1714 if (!res)
1660 1715 bd_finish_claiming(bdev, whole, holder);
1661 if (!res) { 1716 else
1662 BUG_ON(!bd_may_claim(bdev, whole, holder)); 1717 bd_abort_claiming(bdev, whole, holder);
1663 /*
1664 * Note that for a whole device bd_holders
1665 * will be incremented twice, and bd_holder
1666 * will be set to bd_may_claim before being
1667 * set to holder
1668 */
1669 whole->bd_holders++;
1670 whole->bd_holder = bd_may_claim;
1671 bdev->bd_holders++;
1672 bdev->bd_holder = holder;
1673 }
1674
1675 /* tell others that we're done */
1676 BUG_ON(whole->bd_claiming != holder);
1677 whole->bd_claiming = NULL;
1678 wake_up_bit(&whole->bd_claiming, 0);
1679
1680 spin_unlock(&bdev_lock);
1681
1682 /* 1718 /*
1683 * Block event polling for write claims if requested. Any 1719 * Block event polling for write claims if requested. Any
1684 * write holder makes the write_holder state stick until 1720 * write holder makes the write_holder state stick until
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig
index 212b4a854f2c..38651fae7f21 100644
--- a/fs/btrfs/Kconfig
+++ b/fs/btrfs/Kconfig
@@ -4,6 +4,7 @@ config BTRFS_FS
4 tristate "Btrfs filesystem support" 4 tristate "Btrfs filesystem support"
5 select CRYPTO 5 select CRYPTO
6 select CRYPTO_CRC32C 6 select CRYPTO_CRC32C
7 select LIBCRC32C
7 select ZLIB_INFLATE 8 select ZLIB_INFLATE
8 select ZLIB_DEFLATE 9 select ZLIB_DEFLATE
9 select LZO_COMPRESS 10 select LZO_COMPRESS
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 89116afda7a2..e5d85311d5d5 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -1483,7 +1483,7 @@ int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr,
1483 ulist_init(roots); 1483 ulist_init(roots);
1484 ulist_init(tmp); 1484 ulist_init(tmp);
1485 1485
1486 trans = btrfs_attach_transaction(root); 1486 trans = btrfs_join_transaction_nostart(root);
1487 if (IS_ERR(trans)) { 1487 if (IS_ERR(trans)) {
1488 if (PTR_ERR(trans) != -ENOENT && PTR_ERR(trans) != -EROFS) { 1488 if (PTR_ERR(trans) != -ENOENT && PTR_ERR(trans) != -EROFS) {
1489 ret = PTR_ERR(trans); 1489 ret = PTR_ERR(trans);
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 299e11e6c554..94660063a162 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -401,7 +401,6 @@ struct btrfs_dev_replace {
401struct raid_kobject { 401struct raid_kobject {
402 u64 flags; 402 u64 flags;
403 struct kobject kobj; 403 struct kobject kobj;
404 struct list_head list;
405}; 404};
406 405
407/* 406/*
@@ -915,8 +914,6 @@ struct btrfs_fs_info {
915 u32 thread_pool_size; 914 u32 thread_pool_size;
916 915
917 struct kobject *space_info_kobj; 916 struct kobject *space_info_kobj;
918 struct list_head pending_raid_kobjs;
919 spinlock_t pending_raid_kobjs_lock; /* uncontended */
920 917
921 u64 total_pinned; 918 u64 total_pinned;
922 919
@@ -2698,7 +2695,6 @@ int btrfs_can_relocate(struct btrfs_fs_info *fs_info, u64 bytenr);
2698int btrfs_make_block_group(struct btrfs_trans_handle *trans, 2695int btrfs_make_block_group(struct btrfs_trans_handle *trans,
2699 u64 bytes_used, u64 type, u64 chunk_offset, 2696 u64 bytes_used, u64 type, u64 chunk_offset,
2700 u64 size); 2697 u64 size);
2701void btrfs_add_raid_kobjects(struct btrfs_fs_info *fs_info);
2702struct btrfs_trans_handle *btrfs_start_trans_remove_block_group( 2698struct btrfs_trans_handle *btrfs_start_trans_remove_block_group(
2703 struct btrfs_fs_info *fs_info, 2699 struct btrfs_fs_info *fs_info,
2704 const u64 chunk_offset); 2700 const u64 chunk_offset);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 41a2bd2e0c56..97beb351a10c 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2683,8 +2683,6 @@ int open_ctree(struct super_block *sb,
2683 INIT_LIST_HEAD(&fs_info->delayed_iputs); 2683 INIT_LIST_HEAD(&fs_info->delayed_iputs);
2684 INIT_LIST_HEAD(&fs_info->delalloc_roots); 2684 INIT_LIST_HEAD(&fs_info->delalloc_roots);
2685 INIT_LIST_HEAD(&fs_info->caching_block_groups); 2685 INIT_LIST_HEAD(&fs_info->caching_block_groups);
2686 INIT_LIST_HEAD(&fs_info->pending_raid_kobjs);
2687 spin_lock_init(&fs_info->pending_raid_kobjs_lock);
2688 spin_lock_init(&fs_info->delalloc_root_lock); 2686 spin_lock_init(&fs_info->delalloc_root_lock);
2689 spin_lock_init(&fs_info->trans_lock); 2687 spin_lock_init(&fs_info->trans_lock);
2690 spin_lock_init(&fs_info->fs_roots_radix_lock); 2688 spin_lock_init(&fs_info->fs_roots_radix_lock);
@@ -4106,6 +4104,7 @@ void close_ctree(struct btrfs_fs_info *fs_info)
4106 percpu_counter_destroy(&fs_info->dev_replace.bio_counter); 4104 percpu_counter_destroy(&fs_info->dev_replace.bio_counter);
4107 cleanup_srcu_struct(&fs_info->subvol_srcu); 4105 cleanup_srcu_struct(&fs_info->subvol_srcu);
4108 4106
4107 btrfs_free_csum_hash(fs_info);
4109 btrfs_free_stripe_hash_table(fs_info); 4108 btrfs_free_stripe_hash_table(fs_info);
4110 btrfs_free_ref_cache(fs_info); 4109 btrfs_free_ref_cache(fs_info);
4111} 4110}
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index d3b58e388535..8b7eb22d508a 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -4,6 +4,7 @@
4 */ 4 */
5 5
6#include <linux/sched.h> 6#include <linux/sched.h>
7#include <linux/sched/mm.h>
7#include <linux/sched/signal.h> 8#include <linux/sched/signal.h>
8#include <linux/pagemap.h> 9#include <linux/pagemap.h>
9#include <linux/writeback.h> 10#include <linux/writeback.h>
@@ -7888,33 +7889,6 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
7888 return 0; 7889 return 0;
7889} 7890}
7890 7891
7891/* link_block_group will queue up kobjects to add when we're reclaim-safe */
7892void btrfs_add_raid_kobjects(struct btrfs_fs_info *fs_info)
7893{
7894 struct btrfs_space_info *space_info;
7895 struct raid_kobject *rkobj;
7896 LIST_HEAD(list);
7897 int ret = 0;
7898
7899 spin_lock(&fs_info->pending_raid_kobjs_lock);
7900 list_splice_init(&fs_info->pending_raid_kobjs, &list);
7901 spin_unlock(&fs_info->pending_raid_kobjs_lock);
7902
7903 list_for_each_entry(rkobj, &list, list) {
7904 space_info = btrfs_find_space_info(fs_info, rkobj->flags);
7905
7906 ret = kobject_add(&rkobj->kobj, &space_info->kobj,
7907 "%s", btrfs_bg_type_to_raid_name(rkobj->flags));
7908 if (ret) {
7909 kobject_put(&rkobj->kobj);
7910 break;
7911 }
7912 }
7913 if (ret)
7914 btrfs_warn(fs_info,
7915 "failed to add kobject for block cache, ignoring");
7916}
7917
7918static void link_block_group(struct btrfs_block_group_cache *cache) 7892static void link_block_group(struct btrfs_block_group_cache *cache)
7919{ 7893{
7920 struct btrfs_space_info *space_info = cache->space_info; 7894 struct btrfs_space_info *space_info = cache->space_info;
@@ -7929,18 +7903,36 @@ static void link_block_group(struct btrfs_block_group_cache *cache)
7929 up_write(&space_info->groups_sem); 7903 up_write(&space_info->groups_sem);
7930 7904
7931 if (first) { 7905 if (first) {
7932 struct raid_kobject *rkobj = kzalloc(sizeof(*rkobj), GFP_NOFS); 7906 struct raid_kobject *rkobj;
7907 unsigned int nofs_flag;
7908 int ret;
7909
7910 /*
7911 * Setup a NOFS context because kobject_add(), deep in its call
7912 * chain, does GFP_KERNEL allocations, and we are often called
7913 * in a context where if reclaim is triggered we can deadlock
7914 * (we are either holding a transaction handle or some lock
7915 * required for a transaction commit).
7916 */
7917 nofs_flag = memalloc_nofs_save();
7918 rkobj = kzalloc(sizeof(*rkobj), GFP_KERNEL);
7933 if (!rkobj) { 7919 if (!rkobj) {
7920 memalloc_nofs_restore(nofs_flag);
7934 btrfs_warn(cache->fs_info, 7921 btrfs_warn(cache->fs_info,
7935 "couldn't alloc memory for raid level kobject"); 7922 "couldn't alloc memory for raid level kobject");
7936 return; 7923 return;
7937 } 7924 }
7938 rkobj->flags = cache->flags; 7925 rkobj->flags = cache->flags;
7939 kobject_init(&rkobj->kobj, &btrfs_raid_ktype); 7926 kobject_init(&rkobj->kobj, &btrfs_raid_ktype);
7940 7927 ret = kobject_add(&rkobj->kobj, &space_info->kobj, "%s",
7941 spin_lock(&fs_info->pending_raid_kobjs_lock); 7928 btrfs_bg_type_to_raid_name(rkobj->flags));
7942 list_add_tail(&rkobj->list, &fs_info->pending_raid_kobjs); 7929 memalloc_nofs_restore(nofs_flag);
7943 spin_unlock(&fs_info->pending_raid_kobjs_lock); 7930 if (ret) {
7931 kobject_put(&rkobj->kobj);
7932 btrfs_warn(fs_info,
7933 "failed to add kobject for block cache, ignoring");
7934 return;
7935 }
7944 space_info->block_group_kobjs[index] = &rkobj->kobj; 7936 space_info->block_group_kobjs[index] = &rkobj->kobj;
7945 } 7937 }
7946} 7938}
@@ -8206,7 +8198,6 @@ int btrfs_read_block_groups(struct btrfs_fs_info *info)
8206 inc_block_group_ro(cache, 1); 8198 inc_block_group_ro(cache, 1);
8207 } 8199 }
8208 8200
8209 btrfs_add_raid_kobjects(info);
8210 btrfs_init_global_block_rsv(info); 8201 btrfs_init_global_block_rsv(info);
8211 ret = check_chunk_block_group_mappings(info); 8202 ret = check_chunk_block_group_mappings(info);
8212error: 8203error:
@@ -8975,6 +8966,7 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range)
8975 struct btrfs_device *device; 8966 struct btrfs_device *device;
8976 struct list_head *devices; 8967 struct list_head *devices;
8977 u64 group_trimmed; 8968 u64 group_trimmed;
8969 u64 range_end = U64_MAX;
8978 u64 start; 8970 u64 start;
8979 u64 end; 8971 u64 end;
8980 u64 trimmed = 0; 8972 u64 trimmed = 0;
@@ -8984,16 +8976,23 @@ int btrfs_trim_fs(struct btrfs_fs_info *fs_info, struct fstrim_range *range)
8984 int dev_ret = 0; 8976 int dev_ret = 0;
8985 int ret = 0; 8977 int ret = 0;
8986 8978
8979 /*
8980 * Check range overflow if range->len is set.
8981 * The default range->len is U64_MAX.
8982 */
8983 if (range->len != U64_MAX &&
8984 check_add_overflow(range->start, range->len, &range_end))
8985 return -EINVAL;
8986
8987 cache = btrfs_lookup_first_block_group(fs_info, range->start); 8987 cache = btrfs_lookup_first_block_group(fs_info, range->start);
8988 for (; cache; cache = next_block_group(cache)) { 8988 for (; cache; cache = next_block_group(cache)) {
8989 if (cache->key.objectid >= (range->start + range->len)) { 8989 if (cache->key.objectid >= range_end) {
8990 btrfs_put_block_group(cache); 8990 btrfs_put_block_group(cache);
8991 break; 8991 break;
8992 } 8992 }
8993 8993
8994 start = max(range->start, cache->key.objectid); 8994 start = max(range->start, cache->key.objectid);
8995 end = min(range->start + range->len, 8995 end = min(range_end, cache->key.objectid + cache->key.offset);
8996 cache->key.objectid + cache->key.offset);
8997 8996
8998 if (end - start >= range->minlen) { 8997 if (end - start >= range->minlen) {
8999 if (!block_group_cache_done(cache)) { 8998 if (!block_group_cache_done(cache)) {
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 1af069a9a0c7..ee582a36653d 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -395,10 +395,31 @@ static noinline int add_async_extent(struct async_chunk *cow,
395 return 0; 395 return 0;
396} 396}
397 397
398/*
399 * Check if the inode has flags compatible with compression
400 */
401static inline bool inode_can_compress(struct inode *inode)
402{
403 if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW ||
404 BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
405 return false;
406 return true;
407}
408
409/*
410 * Check if the inode needs to be submitted to compression, based on mount
411 * options, defragmentation, properties or heuristics.
412 */
398static inline int inode_need_compress(struct inode *inode, u64 start, u64 end) 413static inline int inode_need_compress(struct inode *inode, u64 start, u64 end)
399{ 414{
400 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); 415 struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
401 416
417 if (!inode_can_compress(inode)) {
418 WARN(IS_ENABLED(CONFIG_BTRFS_DEBUG),
419 KERN_ERR "BTRFS: unexpected compression for ino %llu\n",
420 btrfs_ino(BTRFS_I(inode)));
421 return 0;
422 }
402 /* force compress */ 423 /* force compress */
403 if (btrfs_test_opt(fs_info, FORCE_COMPRESS)) 424 if (btrfs_test_opt(fs_info, FORCE_COMPRESS))
404 return 1; 425 return 1;
@@ -1631,7 +1652,8 @@ int btrfs_run_delalloc_range(struct inode *inode, struct page *locked_page,
1631 } else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC && !force_cow) { 1652 } else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC && !force_cow) {
1632 ret = run_delalloc_nocow(inode, locked_page, start, end, 1653 ret = run_delalloc_nocow(inode, locked_page, start, end,
1633 page_started, 0, nr_written); 1654 page_started, 0, nr_written);
1634 } else if (!inode_need_compress(inode, start, end)) { 1655 } else if (!inode_can_compress(inode) ||
1656 !inode_need_compress(inode, start, end)) {
1635 ret = cow_file_range(inode, locked_page, start, end, end, 1657 ret = cow_file_range(inode, locked_page, start, end, end,
1636 page_started, nr_written, 1, NULL); 1658 page_started, nr_written, 1, NULL);
1637 } else { 1659 } else {
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c
index 98fccce4208c..393eceda57c8 100644
--- a/fs/btrfs/locking.c
+++ b/fs/btrfs/locking.c
@@ -346,9 +346,12 @@ void btrfs_tree_unlock(struct extent_buffer *eb)
346 if (blockers) { 346 if (blockers) {
347 btrfs_assert_no_spinning_writers(eb); 347 btrfs_assert_no_spinning_writers(eb);
348 eb->blocking_writers--; 348 eb->blocking_writers--;
349 /* Use the lighter barrier after atomic */ 349 /*
350 smp_mb__after_atomic(); 350 * We need to order modifying blocking_writers above with
351 cond_wake_up_nomb(&eb->write_lock_wq); 351 * actually waking up the sleepers to ensure they see the
352 * updated value of blocking_writers
353 */
354 cond_wake_up(&eb->write_lock_wq);
352 } else { 355 } else {
353 btrfs_assert_spinning_writers_put(eb); 356 btrfs_assert_spinning_writers_put(eb);
354 write_unlock(&eb->lock); 357 write_unlock(&eb->lock);
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 1744ba8b2754..ae7f64a8facb 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -985,13 +985,14 @@ void btrfs_lock_and_flush_ordered_range(struct extent_io_tree *tree,
985 struct extent_state **cached_state) 985 struct extent_state **cached_state)
986{ 986{
987 struct btrfs_ordered_extent *ordered; 987 struct btrfs_ordered_extent *ordered;
988 struct extent_state *cachedp = NULL; 988 struct extent_state *cache = NULL;
989 struct extent_state **cachedp = &cache;
989 990
990 if (cached_state) 991 if (cached_state)
991 cachedp = *cached_state; 992 cachedp = cached_state;
992 993
993 while (1) { 994 while (1) {
994 lock_extent_bits(tree, start, end, &cachedp); 995 lock_extent_bits(tree, start, end, cachedp);
995 ordered = btrfs_lookup_ordered_range(inode, start, 996 ordered = btrfs_lookup_ordered_range(inode, start,
996 end - start + 1); 997 end - start + 1);
997 if (!ordered) { 998 if (!ordered) {
@@ -1001,10 +1002,10 @@ void btrfs_lock_and_flush_ordered_range(struct extent_io_tree *tree,
1001 * aren't exposing it outside of this function 1002 * aren't exposing it outside of this function
1002 */ 1003 */
1003 if (!cached_state) 1004 if (!cached_state)
1004 refcount_dec(&cachedp->refs); 1005 refcount_dec(&cache->refs);
1005 break; 1006 break;
1006 } 1007 }
1007 unlock_extent_cached(tree, start, end, &cachedp); 1008 unlock_extent_cached(tree, start, end, cachedp);
1008 btrfs_start_ordered_extent(&inode->vfs_inode, ordered, 1); 1009 btrfs_start_ordered_extent(&inode->vfs_inode, ordered, 1);
1009 btrfs_put_ordered_extent(ordered); 1010 btrfs_put_ordered_extent(ordered);
1010 } 1011 }
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 69b59bf75882..c3c0c064c25d 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -6322,68 +6322,21 @@ static int changed_extent(struct send_ctx *sctx,
6322{ 6322{
6323 int ret = 0; 6323 int ret = 0;
6324 6324
6325 if (sctx->cur_ino != sctx->cmp_key->objectid) { 6325 /*
6326 6326 * We have found an extent item that changed without the inode item
6327 if (result == BTRFS_COMPARE_TREE_CHANGED) { 6327 * having changed. This can happen either after relocation (where the
6328 struct extent_buffer *leaf_l; 6328 * disk_bytenr of an extent item is replaced at
6329 struct extent_buffer *leaf_r; 6329 * relocation.c:replace_file_extents()) or after deduplication into a
6330 struct btrfs_file_extent_item *ei_l; 6330 * file in both the parent and send snapshots (where an extent item can
6331 struct btrfs_file_extent_item *ei_r; 6331 * get modified or replaced with a new one). Note that deduplication
6332 6332 * updates the inode item, but it only changes the iversion (sequence
6333 leaf_l = sctx->left_path->nodes[0]; 6333 * field in the inode item) of the inode, so if a file is deduplicated
6334 leaf_r = sctx->right_path->nodes[0]; 6334 * the same amount of times in both the parent and send snapshots, its
6335 ei_l = btrfs_item_ptr(leaf_l, 6335 * iversion becames the same in both snapshots, whence the inode item is
6336 sctx->left_path->slots[0], 6336 * the same on both snapshots.
6337 struct btrfs_file_extent_item); 6337 */
6338 ei_r = btrfs_item_ptr(leaf_r, 6338 if (sctx->cur_ino != sctx->cmp_key->objectid)
6339 sctx->right_path->slots[0], 6339 return 0;
6340 struct btrfs_file_extent_item);
6341
6342 /*
6343 * We may have found an extent item that has changed
6344 * only its disk_bytenr field and the corresponding
6345 * inode item was not updated. This case happens due to
6346 * very specific timings during relocation when a leaf
6347 * that contains file extent items is COWed while
6348 * relocation is ongoing and its in the stage where it
6349 * updates data pointers. So when this happens we can
6350 * safely ignore it since we know it's the same extent,
6351 * but just at different logical and physical locations
6352 * (when an extent is fully replaced with a new one, we
6353 * know the generation number must have changed too,
6354 * since snapshot creation implies committing the current
6355 * transaction, and the inode item must have been updated
6356 * as well).
6357 * This replacement of the disk_bytenr happens at
6358 * relocation.c:replace_file_extents() through
6359 * relocation.c:btrfs_reloc_cow_block().
6360 */
6361 if (btrfs_file_extent_generation(leaf_l, ei_l) ==
6362 btrfs_file_extent_generation(leaf_r, ei_r) &&
6363 btrfs_file_extent_ram_bytes(leaf_l, ei_l) ==
6364 btrfs_file_extent_ram_bytes(leaf_r, ei_r) &&
6365 btrfs_file_extent_compression(leaf_l, ei_l) ==
6366 btrfs_file_extent_compression(leaf_r, ei_r) &&
6367 btrfs_file_extent_encryption(leaf_l, ei_l) ==
6368 btrfs_file_extent_encryption(leaf_r, ei_r) &&
6369 btrfs_file_extent_other_encoding(leaf_l, ei_l) ==
6370 btrfs_file_extent_other_encoding(leaf_r, ei_r) &&
6371 btrfs_file_extent_type(leaf_l, ei_l) ==
6372 btrfs_file_extent_type(leaf_r, ei_r) &&
6373 btrfs_file_extent_disk_bytenr(leaf_l, ei_l) !=
6374 btrfs_file_extent_disk_bytenr(leaf_r, ei_r) &&
6375 btrfs_file_extent_disk_num_bytes(leaf_l, ei_l) ==
6376 btrfs_file_extent_disk_num_bytes(leaf_r, ei_r) &&
6377 btrfs_file_extent_offset(leaf_l, ei_l) ==
6378 btrfs_file_extent_offset(leaf_r, ei_r) &&
6379 btrfs_file_extent_num_bytes(leaf_l, ei_l) ==
6380 btrfs_file_extent_num_bytes(leaf_r, ei_r))
6381 return 0;
6382 }
6383
6384 inconsistent_snapshot_error(sctx, result, "extent");
6385 return -EIO;
6386 }
6387 6340
6388 if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) { 6341 if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) {
6389 if (result != BTRFS_COMPARE_TREE_DELETED) 6342 if (result != BTRFS_COMPARE_TREE_DELETED)
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 3b8ae1a8f02d..e3adb714c04b 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -28,15 +28,18 @@ static const unsigned int btrfs_blocked_trans_types[TRANS_STATE_MAX] = {
28 [TRANS_STATE_COMMIT_START] = (__TRANS_START | __TRANS_ATTACH), 28 [TRANS_STATE_COMMIT_START] = (__TRANS_START | __TRANS_ATTACH),
29 [TRANS_STATE_COMMIT_DOING] = (__TRANS_START | 29 [TRANS_STATE_COMMIT_DOING] = (__TRANS_START |
30 __TRANS_ATTACH | 30 __TRANS_ATTACH |
31 __TRANS_JOIN), 31 __TRANS_JOIN |
32 __TRANS_JOIN_NOSTART),
32 [TRANS_STATE_UNBLOCKED] = (__TRANS_START | 33 [TRANS_STATE_UNBLOCKED] = (__TRANS_START |
33 __TRANS_ATTACH | 34 __TRANS_ATTACH |
34 __TRANS_JOIN | 35 __TRANS_JOIN |
35 __TRANS_JOIN_NOLOCK), 36 __TRANS_JOIN_NOLOCK |
37 __TRANS_JOIN_NOSTART),
36 [TRANS_STATE_COMPLETED] = (__TRANS_START | 38 [TRANS_STATE_COMPLETED] = (__TRANS_START |
37 __TRANS_ATTACH | 39 __TRANS_ATTACH |
38 __TRANS_JOIN | 40 __TRANS_JOIN |
39 __TRANS_JOIN_NOLOCK), 41 __TRANS_JOIN_NOLOCK |
42 __TRANS_JOIN_NOSTART),
40}; 43};
41 44
42void btrfs_put_transaction(struct btrfs_transaction *transaction) 45void btrfs_put_transaction(struct btrfs_transaction *transaction)
@@ -543,7 +546,8 @@ again:
543 ret = join_transaction(fs_info, type); 546 ret = join_transaction(fs_info, type);
544 if (ret == -EBUSY) { 547 if (ret == -EBUSY) {
545 wait_current_trans(fs_info); 548 wait_current_trans(fs_info);
546 if (unlikely(type == TRANS_ATTACH)) 549 if (unlikely(type == TRANS_ATTACH ||
550 type == TRANS_JOIN_NOSTART))
547 ret = -ENOENT; 551 ret = -ENOENT;
548 } 552 }
549 } while (ret == -EBUSY); 553 } while (ret == -EBUSY);
@@ -660,6 +664,16 @@ struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root
660} 664}
661 665
662/* 666/*
667 * Similar to regular join but it never starts a transaction when none is
668 * running or after waiting for the current one to finish.
669 */
670struct btrfs_trans_handle *btrfs_join_transaction_nostart(struct btrfs_root *root)
671{
672 return start_transaction(root, 0, TRANS_JOIN_NOSTART,
673 BTRFS_RESERVE_NO_FLUSH, true);
674}
675
676/*
663 * btrfs_attach_transaction() - catch the running transaction 677 * btrfs_attach_transaction() - catch the running transaction
664 * 678 *
665 * It is used when we want to commit the current the transaction, but 679 * It is used when we want to commit the current the transaction, but
@@ -2037,6 +2051,16 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
2037 } 2051 }
2038 } else { 2052 } else {
2039 spin_unlock(&fs_info->trans_lock); 2053 spin_unlock(&fs_info->trans_lock);
2054 /*
2055 * The previous transaction was aborted and was already removed
2056 * from the list of transactions at fs_info->trans_list. So we
2057 * abort to prevent writing a new superblock that reflects a
2058 * corrupt state (pointing to trees with unwritten nodes/leafs).
2059 */
2060 if (test_bit(BTRFS_FS_STATE_TRANS_ABORTED, &fs_info->fs_state)) {
2061 ret = -EROFS;
2062 goto cleanup_transaction;
2063 }
2040 } 2064 }
2041 2065
2042 extwriter_counter_dec(cur_trans, trans->type); 2066 extwriter_counter_dec(cur_trans, trans->type);
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 527ea94b57d9..2c5a6f6e5bb0 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -94,11 +94,13 @@ struct btrfs_transaction {
94#define __TRANS_JOIN (1U << 11) 94#define __TRANS_JOIN (1U << 11)
95#define __TRANS_JOIN_NOLOCK (1U << 12) 95#define __TRANS_JOIN_NOLOCK (1U << 12)
96#define __TRANS_DUMMY (1U << 13) 96#define __TRANS_DUMMY (1U << 13)
97#define __TRANS_JOIN_NOSTART (1U << 14)
97 98
98#define TRANS_START (__TRANS_START | __TRANS_FREEZABLE) 99#define TRANS_START (__TRANS_START | __TRANS_FREEZABLE)
99#define TRANS_ATTACH (__TRANS_ATTACH) 100#define TRANS_ATTACH (__TRANS_ATTACH)
100#define TRANS_JOIN (__TRANS_JOIN | __TRANS_FREEZABLE) 101#define TRANS_JOIN (__TRANS_JOIN | __TRANS_FREEZABLE)
101#define TRANS_JOIN_NOLOCK (__TRANS_JOIN_NOLOCK) 102#define TRANS_JOIN_NOLOCK (__TRANS_JOIN_NOLOCK)
103#define TRANS_JOIN_NOSTART (__TRANS_JOIN_NOSTART)
102 104
103#define TRANS_EXTWRITERS (__TRANS_START | __TRANS_ATTACH) 105#define TRANS_EXTWRITERS (__TRANS_START | __TRANS_ATTACH)
104 106
@@ -183,6 +185,7 @@ struct btrfs_trans_handle *btrfs_start_transaction_fallback_global_rsv(
183 int min_factor); 185 int min_factor);
184struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root); 186struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root);
185struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root); 187struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root);
188struct btrfs_trans_handle *btrfs_join_transaction_nostart(struct btrfs_root *root);
186struct btrfs_trans_handle *btrfs_attach_transaction(struct btrfs_root *root); 189struct btrfs_trans_handle *btrfs_attach_transaction(struct btrfs_root *root);
187struct btrfs_trans_handle *btrfs_attach_transaction_barrier( 190struct btrfs_trans_handle *btrfs_attach_transaction_barrier(
188 struct btrfs_root *root); 191 struct btrfs_root *root);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index a13ddba1ebc3..a447d3ec48d5 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -3087,16 +3087,6 @@ static int btrfs_relocate_chunk(struct btrfs_fs_info *fs_info, u64 chunk_offset)
3087 if (ret) 3087 if (ret)
3088 return ret; 3088 return ret;
3089 3089
3090 /*
3091 * We add the kobjects here (and after forcing data chunk creation)
3092 * since relocation is the only place we'll create chunks of a new
3093 * type at runtime. The only place where we'll remove the last
3094 * chunk of a type is the call immediately below this one. Even
3095 * so, we're protected against races with the cleaner thread since
3096 * we're covered by the delete_unused_bgs_mutex.
3097 */
3098 btrfs_add_raid_kobjects(fs_info);
3099
3100 trans = btrfs_start_trans_remove_block_group(root->fs_info, 3090 trans = btrfs_start_trans_remove_block_group(root->fs_info,
3101 chunk_offset); 3091 chunk_offset);
3102 if (IS_ERR(trans)) { 3092 if (IS_ERR(trans)) {
@@ -3223,9 +3213,6 @@ static int btrfs_may_alloc_data_chunk(struct btrfs_fs_info *fs_info,
3223 btrfs_end_transaction(trans); 3213 btrfs_end_transaction(trans);
3224 if (ret < 0) 3214 if (ret < 0)
3225 return ret; 3215 return ret;
3226
3227 btrfs_add_raid_kobjects(fs_info);
3228
3229 return 1; 3216 return 1;
3230 } 3217 }
3231 } 3218 }
@@ -5941,6 +5928,7 @@ int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
5941 u64 stripe_len; 5928 u64 stripe_len;
5942 u64 raid56_full_stripe_start = (u64)-1; 5929 u64 raid56_full_stripe_start = (u64)-1;
5943 int data_stripes; 5930 int data_stripes;
5931 int ret = 0;
5944 5932
5945 ASSERT(op != BTRFS_MAP_DISCARD); 5933 ASSERT(op != BTRFS_MAP_DISCARD);
5946 5934
@@ -5961,8 +5949,8 @@ int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
5961 btrfs_crit(fs_info, 5949 btrfs_crit(fs_info,
5962"stripe math has gone wrong, stripe_offset=%llu offset=%llu start=%llu logical=%llu stripe_len=%llu", 5950"stripe math has gone wrong, stripe_offset=%llu offset=%llu start=%llu logical=%llu stripe_len=%llu",
5963 stripe_offset, offset, em->start, logical, stripe_len); 5951 stripe_offset, offset, em->start, logical, stripe_len);
5964 free_extent_map(em); 5952 ret = -EINVAL;
5965 return -EINVAL; 5953 goto out;
5966 } 5954 }
5967 5955
5968 /* stripe_offset is the offset of this block in its stripe */ 5956 /* stripe_offset is the offset of this block in its stripe */
@@ -6009,7 +5997,10 @@ int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
6009 io_geom->stripe_offset = stripe_offset; 5997 io_geom->stripe_offset = stripe_offset;
6010 io_geom->raid56_stripe_offset = raid56_full_stripe_start; 5998 io_geom->raid56_stripe_offset = raid56_full_stripe_start;
6011 5999
6012 return 0; 6000out:
6001 /* once for us */
6002 free_extent_map(em);
6003 return ret;
6013} 6004}
6014 6005
6015static int __btrfs_map_block(struct btrfs_fs_info *fs_info, 6006static int __btrfs_map_block(struct btrfs_fs_info *fs_info,
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index a4830ced0f98..a15a6e738eb5 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -1113,6 +1113,7 @@ cifs_demultiplex_thread(void *p)
1113 mempool_resize(cifs_req_poolp, length + cifs_min_rcv); 1113 mempool_resize(cifs_req_poolp, length + cifs_min_rcv);
1114 1114
1115 set_freezable(); 1115 set_freezable();
1116 allow_signal(SIGKILL);
1116 while (server->tcpStatus != CifsExiting) { 1117 while (server->tcpStatus != CifsExiting) {
1117 if (try_to_freeze()) 1118 if (try_to_freeze())
1118 continue; 1119 continue;
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index a5bc1b671c12..64a5864127be 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -3489,7 +3489,15 @@ fill_transform_hdr(struct smb2_transform_hdr *tr_hdr, unsigned int orig_len,
3489static inline void smb2_sg_set_buf(struct scatterlist *sg, const void *buf, 3489static inline void smb2_sg_set_buf(struct scatterlist *sg, const void *buf,
3490 unsigned int buflen) 3490 unsigned int buflen)
3491{ 3491{
3492 sg_set_page(sg, virt_to_page(buf), buflen, offset_in_page(buf)); 3492 void *addr;
3493 /*
3494 * VMAP_STACK (at least) puts stack into the vmalloc address space
3495 */
3496 if (is_vmalloc_addr(buf))
3497 addr = vmalloc_to_page(buf);
3498 else
3499 addr = virt_to_page(buf);
3500 sg_set_page(sg, addr, buflen, offset_in_page(buf));
3493} 3501}
3494 3502
3495/* Assumes the first rqst has a transform header as the first iov. 3503/* Assumes the first rqst has a transform header as the first iov.
@@ -4070,7 +4078,6 @@ receive_encrypted_standard(struct TCP_Server_Info *server,
4070{ 4078{
4071 int ret, length; 4079 int ret, length;
4072 char *buf = server->smallbuf; 4080 char *buf = server->smallbuf;
4073 char *tmpbuf;
4074 struct smb2_sync_hdr *shdr; 4081 struct smb2_sync_hdr *shdr;
4075 unsigned int pdu_length = server->pdu_size; 4082 unsigned int pdu_length = server->pdu_size;
4076 unsigned int buf_size; 4083 unsigned int buf_size;
@@ -4100,18 +4107,15 @@ receive_encrypted_standard(struct TCP_Server_Info *server,
4100 return length; 4107 return length;
4101 4108
4102 next_is_large = server->large_buf; 4109 next_is_large = server->large_buf;
4103 one_more: 4110one_more:
4104 shdr = (struct smb2_sync_hdr *)buf; 4111 shdr = (struct smb2_sync_hdr *)buf;
4105 if (shdr->NextCommand) { 4112 if (shdr->NextCommand) {
4106 if (next_is_large) { 4113 if (next_is_large)
4107 tmpbuf = server->bigbuf;
4108 next_buffer = (char *)cifs_buf_get(); 4114 next_buffer = (char *)cifs_buf_get();
4109 } else { 4115 else
4110 tmpbuf = server->smallbuf;
4111 next_buffer = (char *)cifs_small_buf_get(); 4116 next_buffer = (char *)cifs_small_buf_get();
4112 }
4113 memcpy(next_buffer, 4117 memcpy(next_buffer,
4114 tmpbuf + le32_to_cpu(shdr->NextCommand), 4118 buf + le32_to_cpu(shdr->NextCommand),
4115 pdu_length - le32_to_cpu(shdr->NextCommand)); 4119 pdu_length - le32_to_cpu(shdr->NextCommand));
4116 } 4120 }
4117 4121
@@ -4140,12 +4144,21 @@ receive_encrypted_standard(struct TCP_Server_Info *server,
4140 pdu_length -= le32_to_cpu(shdr->NextCommand); 4144 pdu_length -= le32_to_cpu(shdr->NextCommand);
4141 server->large_buf = next_is_large; 4145 server->large_buf = next_is_large;
4142 if (next_is_large) 4146 if (next_is_large)
4143 server->bigbuf = next_buffer; 4147 server->bigbuf = buf = next_buffer;
4144 else 4148 else
4145 server->smallbuf = next_buffer; 4149 server->smallbuf = buf = next_buffer;
4146
4147 buf += le32_to_cpu(shdr->NextCommand);
4148 goto one_more; 4150 goto one_more;
4151 } else if (ret != 0) {
4152 /*
4153 * ret != 0 here means that we didn't get to handle_mid() thus
4154 * server->smallbuf and server->bigbuf are still valid. We need
4155 * to free next_buffer because it is not going to be used
4156 * anywhere.
4157 */
4158 if (next_is_large)
4159 free_rsp_buf(CIFS_LARGE_BUFFER, next_buffer);
4160 else
4161 free_rsp_buf(CIFS_SMALL_BUFFER, next_buffer);
4149 } 4162 }
4150 4163
4151 return ret; 4164 return ret;
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index c8cd7b6cdda2..31e4a1b0b170 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -252,7 +252,7 @@ smb2_reconnect(__le16 smb2_command, struct cifs_tcon *tcon)
252 if (tcon == NULL) 252 if (tcon == NULL)
253 return 0; 253 return 0;
254 254
255 if (smb2_command == SMB2_TREE_CONNECT) 255 if (smb2_command == SMB2_TREE_CONNECT || smb2_command == SMB2_IOCTL)
256 return 0; 256 return 0;
257 257
258 if (tcon->tidStatus == CifsExiting) { 258 if (tcon->tidStatus == CifsExiting) {
@@ -1196,7 +1196,12 @@ SMB2_sess_alloc_buffer(struct SMB2_sess_data *sess_data)
1196 else 1196 else
1197 req->SecurityMode = 0; 1197 req->SecurityMode = 0;
1198 1198
1199#ifdef CONFIG_CIFS_DFS_UPCALL
1200 req->Capabilities = cpu_to_le32(SMB2_GLOBAL_CAP_DFS);
1201#else
1199 req->Capabilities = 0; 1202 req->Capabilities = 0;
1203#endif /* DFS_UPCALL */
1204
1200 req->Channel = 0; /* MBZ */ 1205 req->Channel = 0; /* MBZ */
1201 1206
1202 sess_data->iov[0].iov_base = (char *)req; 1207 sess_data->iov[0].iov_base = (char *)req;
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 6e30949d9f77..a7ec2d3dff92 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -638,9 +638,6 @@ COMPATIBLE_IOCTL(PPPIOCDISCONN)
638COMPATIBLE_IOCTL(PPPIOCATTCHAN) 638COMPATIBLE_IOCTL(PPPIOCATTCHAN)
639COMPATIBLE_IOCTL(PPPIOCGCHAN) 639COMPATIBLE_IOCTL(PPPIOCGCHAN)
640COMPATIBLE_IOCTL(PPPIOCGL2TPSTATS) 640COMPATIBLE_IOCTL(PPPIOCGL2TPSTATS)
641/* PPPOX */
642COMPATIBLE_IOCTL(PPPOEIOCSFWD)
643COMPATIBLE_IOCTL(PPPOEIOCDFWD)
644/* Big A */ 641/* Big A */
645/* sparc only */ 642/* sparc only */
646/* Big Q for sound/OSS */ 643/* Big Q for sound/OSS */
diff --git a/fs/coredump.c b/fs/coredump.c
index e42e17e55bfd..b1ea7dfbd149 100644
--- a/fs/coredump.c
+++ b/fs/coredump.c
@@ -7,6 +7,7 @@
7#include <linux/stat.h> 7#include <linux/stat.h>
8#include <linux/fcntl.h> 8#include <linux/fcntl.h>
9#include <linux/swap.h> 9#include <linux/swap.h>
10#include <linux/ctype.h>
10#include <linux/string.h> 11#include <linux/string.h>
11#include <linux/init.h> 12#include <linux/init.h>
12#include <linux/pagemap.h> 13#include <linux/pagemap.h>
@@ -187,11 +188,13 @@ put_exe_file:
187 * name into corename, which must have space for at least 188 * name into corename, which must have space for at least
188 * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator. 189 * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
189 */ 190 */
190static int format_corename(struct core_name *cn, struct coredump_params *cprm) 191static int format_corename(struct core_name *cn, struct coredump_params *cprm,
192 size_t **argv, int *argc)
191{ 193{
192 const struct cred *cred = current_cred(); 194 const struct cred *cred = current_cred();
193 const char *pat_ptr = core_pattern; 195 const char *pat_ptr = core_pattern;
194 int ispipe = (*pat_ptr == '|'); 196 int ispipe = (*pat_ptr == '|');
197 bool was_space = false;
195 int pid_in_pattern = 0; 198 int pid_in_pattern = 0;
196 int err = 0; 199 int err = 0;
197 200
@@ -201,12 +204,35 @@ static int format_corename(struct core_name *cn, struct coredump_params *cprm)
201 return -ENOMEM; 204 return -ENOMEM;
202 cn->corename[0] = '\0'; 205 cn->corename[0] = '\0';
203 206
204 if (ispipe) 207 if (ispipe) {
208 int argvs = sizeof(core_pattern) / 2;
209 (*argv) = kmalloc_array(argvs, sizeof(**argv), GFP_KERNEL);
210 if (!(*argv))
211 return -ENOMEM;
212 (*argv)[(*argc)++] = 0;
205 ++pat_ptr; 213 ++pat_ptr;
214 }
206 215
207 /* Repeat as long as we have more pattern to process and more output 216 /* Repeat as long as we have more pattern to process and more output
208 space */ 217 space */
209 while (*pat_ptr) { 218 while (*pat_ptr) {
219 /*
220 * Split on spaces before doing template expansion so that
221 * %e and %E don't get split if they have spaces in them
222 */
223 if (ispipe) {
224 if (isspace(*pat_ptr)) {
225 was_space = true;
226 pat_ptr++;
227 continue;
228 } else if (was_space) {
229 was_space = false;
230 err = cn_printf(cn, "%c", '\0');
231 if (err)
232 return err;
233 (*argv)[(*argc)++] = cn->used;
234 }
235 }
210 if (*pat_ptr != '%') { 236 if (*pat_ptr != '%') {
211 err = cn_printf(cn, "%c", *pat_ptr++); 237 err = cn_printf(cn, "%c", *pat_ptr++);
212 } else { 238 } else {
@@ -546,6 +572,8 @@ void do_coredump(const kernel_siginfo_t *siginfo)
546 struct cred *cred; 572 struct cred *cred;
547 int retval = 0; 573 int retval = 0;
548 int ispipe; 574 int ispipe;
575 size_t *argv = NULL;
576 int argc = 0;
549 struct files_struct *displaced; 577 struct files_struct *displaced;
550 /* require nonrelative corefile path and be extra careful */ 578 /* require nonrelative corefile path and be extra careful */
551 bool need_suid_safe = false; 579 bool need_suid_safe = false;
@@ -592,9 +620,10 @@ void do_coredump(const kernel_siginfo_t *siginfo)
592 620
593 old_cred = override_creds(cred); 621 old_cred = override_creds(cred);
594 622
595 ispipe = format_corename(&cn, &cprm); 623 ispipe = format_corename(&cn, &cprm, &argv, &argc);
596 624
597 if (ispipe) { 625 if (ispipe) {
626 int argi;
598 int dump_count; 627 int dump_count;
599 char **helper_argv; 628 char **helper_argv;
600 struct subprocess_info *sub_info; 629 struct subprocess_info *sub_info;
@@ -637,12 +666,16 @@ void do_coredump(const kernel_siginfo_t *siginfo)
637 goto fail_dropcount; 666 goto fail_dropcount;
638 } 667 }
639 668
640 helper_argv = argv_split(GFP_KERNEL, cn.corename, NULL); 669 helper_argv = kmalloc_array(argc + 1, sizeof(*helper_argv),
670 GFP_KERNEL);
641 if (!helper_argv) { 671 if (!helper_argv) {
642 printk(KERN_WARNING "%s failed to allocate memory\n", 672 printk(KERN_WARNING "%s failed to allocate memory\n",
643 __func__); 673 __func__);
644 goto fail_dropcount; 674 goto fail_dropcount;
645 } 675 }
676 for (argi = 0; argi < argc; argi++)
677 helper_argv[argi] = cn.corename + argv[argi];
678 helper_argv[argi] = NULL;
646 679
647 retval = -ENOMEM; 680 retval = -ENOMEM;
648 sub_info = call_usermodehelper_setup(helper_argv[0], 681 sub_info = call_usermodehelper_setup(helper_argv[0],
@@ -652,7 +685,7 @@ void do_coredump(const kernel_siginfo_t *siginfo)
652 retval = call_usermodehelper_exec(sub_info, 685 retval = call_usermodehelper_exec(sub_info,
653 UMH_WAIT_EXEC); 686 UMH_WAIT_EXEC);
654 687
655 argv_free(helper_argv); 688 kfree(helper_argv);
656 if (retval) { 689 if (retval) {
657 printk(KERN_INFO "Core dump to |%s pipe failed\n", 690 printk(KERN_INFO "Core dump to |%s pipe failed\n",
658 cn.corename); 691 cn.corename);
@@ -766,6 +799,7 @@ fail_dropcount:
766 if (ispipe) 799 if (ispipe)
767 atomic_dec(&core_dump_count); 800 atomic_dec(&core_dump_count);
768fail_unlock: 801fail_unlock:
802 kfree(argv);
769 kfree(cn.corename); 803 kfree(cn.corename);
770 coredump_finish(mm, core_dumped); 804 coredump_finish(mm, core_dumped);
771 revert_creds(old_cred); 805 revert_creds(old_cred);
diff --git a/fs/dax.c b/fs/dax.c
index a237141d8787..6bf81f931de3 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -266,7 +266,7 @@ static void wait_entry_unlocked(struct xa_state *xas, void *entry)
266static void put_unlocked_entry(struct xa_state *xas, void *entry) 266static void put_unlocked_entry(struct xa_state *xas, void *entry)
267{ 267{
268 /* If we were the only waiter woken, wake the next one */ 268 /* If we were the only waiter woken, wake the next one */
269 if (entry && dax_is_conflict(entry)) 269 if (entry && !dax_is_conflict(entry))
270 dax_wake_entry(xas, entry, false); 270 dax_wake_entry(xas, entry, false);
271} 271}
272 272
@@ -600,7 +600,7 @@ struct page *dax_layout_busy_page(struct address_space *mapping)
600 * guaranteed to either see new references or prevent new 600 * guaranteed to either see new references or prevent new
601 * references from being established. 601 * references from being established.
602 */ 602 */
603 unmap_mapping_range(mapping, 0, 0, 1); 603 unmap_mapping_range(mapping, 0, 0, 0);
604 604
605 xas_lock_irq(&xas); 605 xas_lock_irq(&xas);
606 xas_for_each(&xas, entry, ULONG_MAX) { 606 xas_for_each(&xas, entry, ULONG_MAX) {
diff --git a/fs/exec.c b/fs/exec.c
index c71cbfe6826a..f7f6a140856a 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1828,7 +1828,7 @@ static int __do_execve_file(int fd, struct filename *filename,
1828 membarrier_execve(current); 1828 membarrier_execve(current);
1829 rseq_execve(current); 1829 rseq_execve(current);
1830 acct_update_integrals(current); 1830 acct_update_integrals(current);
1831 task_numa_free(current); 1831 task_numa_free(current, false);
1832 free_bprm(bprm); 1832 free_bprm(bprm);
1833 kfree(pathbuf); 1833 kfree(pathbuf);
1834 if (filename) 1834 if (filename)
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index f8d46df8fa9e..3e58a6f697dd 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -1653,19 +1653,12 @@ static int f2fs_file_flush(struct file *file, fl_owner_t id)
1653static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask) 1653static int f2fs_setflags_common(struct inode *inode, u32 iflags, u32 mask)
1654{ 1654{
1655 struct f2fs_inode_info *fi = F2FS_I(inode); 1655 struct f2fs_inode_info *fi = F2FS_I(inode);
1656 u32 oldflags;
1657 1656
1658 /* Is it quota file? Do not allow user to mess with it */ 1657 /* Is it quota file? Do not allow user to mess with it */
1659 if (IS_NOQUOTA(inode)) 1658 if (IS_NOQUOTA(inode))
1660 return -EPERM; 1659 return -EPERM;
1661 1660
1662 oldflags = fi->i_flags; 1661 fi->i_flags = iflags | (fi->i_flags & ~mask);
1663
1664 if ((iflags ^ oldflags) & (F2FS_APPEND_FL | F2FS_IMMUTABLE_FL))
1665 if (!capable(CAP_LINUX_IMMUTABLE))
1666 return -EPERM;
1667
1668 fi->i_flags = iflags | (oldflags & ~mask);
1669 1662
1670 if (fi->i_flags & F2FS_PROJINHERIT_FL) 1663 if (fi->i_flags & F2FS_PROJINHERIT_FL)
1671 set_inode_flag(inode, FI_PROJ_INHERIT); 1664 set_inode_flag(inode, FI_PROJ_INHERIT);
@@ -1770,7 +1763,8 @@ static int f2fs_ioc_getflags(struct file *filp, unsigned long arg)
1770static int f2fs_ioc_setflags(struct file *filp, unsigned long arg) 1763static int f2fs_ioc_setflags(struct file *filp, unsigned long arg)
1771{ 1764{
1772 struct inode *inode = file_inode(filp); 1765 struct inode *inode = file_inode(filp);
1773 u32 fsflags; 1766 struct f2fs_inode_info *fi = F2FS_I(inode);
1767 u32 fsflags, old_fsflags;
1774 u32 iflags; 1768 u32 iflags;
1775 int ret; 1769 int ret;
1776 1770
@@ -1794,8 +1788,14 @@ static int f2fs_ioc_setflags(struct file *filp, unsigned long arg)
1794 1788
1795 inode_lock(inode); 1789 inode_lock(inode);
1796 1790
1791 old_fsflags = f2fs_iflags_to_fsflags(fi->i_flags);
1792 ret = vfs_ioc_setflags_prepare(inode, old_fsflags, fsflags);
1793 if (ret)
1794 goto out;
1795
1797 ret = f2fs_setflags_common(inode, iflags, 1796 ret = f2fs_setflags_common(inode, iflags,
1798 f2fs_fsflags_to_iflags(F2FS_SETTABLE_FS_FL)); 1797 f2fs_fsflags_to_iflags(F2FS_SETTABLE_FS_FL));
1798out:
1799 inode_unlock(inode); 1799 inode_unlock(inode);
1800 mnt_drop_write_file(filp); 1800 mnt_drop_write_file(filp);
1801 return ret; 1801 return ret;
@@ -2855,52 +2855,32 @@ static inline u32 f2fs_xflags_to_iflags(u32 xflags)
2855 return iflags; 2855 return iflags;
2856} 2856}
2857 2857
2858static int f2fs_ioc_fsgetxattr(struct file *filp, unsigned long arg) 2858static void f2fs_fill_fsxattr(struct inode *inode, struct fsxattr *fa)
2859{ 2859{
2860 struct inode *inode = file_inode(filp);
2861 struct f2fs_inode_info *fi = F2FS_I(inode); 2860 struct f2fs_inode_info *fi = F2FS_I(inode);
2862 struct fsxattr fa;
2863 2861
2864 memset(&fa, 0, sizeof(struct fsxattr)); 2862 simple_fill_fsxattr(fa, f2fs_iflags_to_xflags(fi->i_flags));
2865 fa.fsx_xflags = f2fs_iflags_to_xflags(fi->i_flags);
2866 2863
2867 if (f2fs_sb_has_project_quota(F2FS_I_SB(inode))) 2864 if (f2fs_sb_has_project_quota(F2FS_I_SB(inode)))
2868 fa.fsx_projid = (__u32)from_kprojid(&init_user_ns, 2865 fa->fsx_projid = from_kprojid(&init_user_ns, fi->i_projid);
2869 fi->i_projid);
2870
2871 if (copy_to_user((struct fsxattr __user *)arg, &fa, sizeof(fa)))
2872 return -EFAULT;
2873 return 0;
2874} 2866}
2875 2867
2876static int f2fs_ioctl_check_project(struct inode *inode, struct fsxattr *fa) 2868static int f2fs_ioc_fsgetxattr(struct file *filp, unsigned long arg)
2877{ 2869{
2878 /* 2870 struct inode *inode = file_inode(filp);
2879 * Project Quota ID state is only allowed to change from within the init 2871 struct fsxattr fa;
2880 * namespace. Enforce that restriction only if we are trying to change
2881 * the quota ID state. Everything else is allowed in user namespaces.
2882 */
2883 if (current_user_ns() == &init_user_ns)
2884 return 0;
2885 2872
2886 if (__kprojid_val(F2FS_I(inode)->i_projid) != fa->fsx_projid) 2873 f2fs_fill_fsxattr(inode, &fa);
2887 return -EINVAL;
2888
2889 if (F2FS_I(inode)->i_flags & F2FS_PROJINHERIT_FL) {
2890 if (!(fa->fsx_xflags & FS_XFLAG_PROJINHERIT))
2891 return -EINVAL;
2892 } else {
2893 if (fa->fsx_xflags & FS_XFLAG_PROJINHERIT)
2894 return -EINVAL;
2895 }
2896 2874
2875 if (copy_to_user((struct fsxattr __user *)arg, &fa, sizeof(fa)))
2876 return -EFAULT;
2897 return 0; 2877 return 0;
2898} 2878}
2899 2879
2900static int f2fs_ioc_fssetxattr(struct file *filp, unsigned long arg) 2880static int f2fs_ioc_fssetxattr(struct file *filp, unsigned long arg)
2901{ 2881{
2902 struct inode *inode = file_inode(filp); 2882 struct inode *inode = file_inode(filp);
2903 struct fsxattr fa; 2883 struct fsxattr fa, old_fa;
2904 u32 iflags; 2884 u32 iflags;
2905 int err; 2885 int err;
2906 2886
@@ -2923,9 +2903,12 @@ static int f2fs_ioc_fssetxattr(struct file *filp, unsigned long arg)
2923 return err; 2903 return err;
2924 2904
2925 inode_lock(inode); 2905 inode_lock(inode);
2926 err = f2fs_ioctl_check_project(inode, &fa); 2906
2907 f2fs_fill_fsxattr(inode, &old_fa);
2908 err = vfs_ioc_fssetxattr_check(inode, &old_fa, &fa);
2927 if (err) 2909 if (err)
2928 goto out; 2910 goto out;
2911
2929 err = f2fs_setflags_common(inode, iflags, 2912 err = f2fs_setflags_common(inode, iflags,
2930 f2fs_xflags_to_iflags(F2FS_SUPPORTED_XFLAGS)); 2913 f2fs_xflags_to_iflags(F2FS_SUPPORTED_XFLAGS));
2931 if (err) 2914 if (err)
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 6691f526fa40..8974672db78f 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -796,6 +796,29 @@ static int move_data_block(struct inode *inode, block_t bidx,
796 if (lfs_mode) 796 if (lfs_mode)
797 down_write(&fio.sbi->io_order_lock); 797 down_write(&fio.sbi->io_order_lock);
798 798
799 mpage = f2fs_grab_cache_page(META_MAPPING(fio.sbi),
800 fio.old_blkaddr, false);
801 if (!mpage)
802 goto up_out;
803
804 fio.encrypted_page = mpage;
805
806 /* read source block in mpage */
807 if (!PageUptodate(mpage)) {
808 err = f2fs_submit_page_bio(&fio);
809 if (err) {
810 f2fs_put_page(mpage, 1);
811 goto up_out;
812 }
813 lock_page(mpage);
814 if (unlikely(mpage->mapping != META_MAPPING(fio.sbi) ||
815 !PageUptodate(mpage))) {
816 err = -EIO;
817 f2fs_put_page(mpage, 1);
818 goto up_out;
819 }
820 }
821
799 f2fs_allocate_data_block(fio.sbi, NULL, fio.old_blkaddr, &newaddr, 822 f2fs_allocate_data_block(fio.sbi, NULL, fio.old_blkaddr, &newaddr,
800 &sum, CURSEG_COLD_DATA, NULL, false); 823 &sum, CURSEG_COLD_DATA, NULL, false);
801 824
@@ -803,44 +826,18 @@ static int move_data_block(struct inode *inode, block_t bidx,
803 newaddr, FGP_LOCK | FGP_CREAT, GFP_NOFS); 826 newaddr, FGP_LOCK | FGP_CREAT, GFP_NOFS);
804 if (!fio.encrypted_page) { 827 if (!fio.encrypted_page) {
805 err = -ENOMEM; 828 err = -ENOMEM;
806 goto recover_block;
807 }
808
809 mpage = f2fs_pagecache_get_page(META_MAPPING(fio.sbi),
810 fio.old_blkaddr, FGP_LOCK, GFP_NOFS);
811 if (mpage) {
812 bool updated = false;
813
814 if (PageUptodate(mpage)) {
815 memcpy(page_address(fio.encrypted_page),
816 page_address(mpage), PAGE_SIZE);
817 updated = true;
818 }
819 f2fs_put_page(mpage, 1); 829 f2fs_put_page(mpage, 1);
820 invalidate_mapping_pages(META_MAPPING(fio.sbi), 830 goto recover_block;
821 fio.old_blkaddr, fio.old_blkaddr);
822 if (updated)
823 goto write_page;
824 }
825
826 err = f2fs_submit_page_bio(&fio);
827 if (err)
828 goto put_page_out;
829
830 /* write page */
831 lock_page(fio.encrypted_page);
832
833 if (unlikely(fio.encrypted_page->mapping != META_MAPPING(fio.sbi))) {
834 err = -EIO;
835 goto put_page_out;
836 }
837 if (unlikely(!PageUptodate(fio.encrypted_page))) {
838 err = -EIO;
839 goto put_page_out;
840 } 831 }
841 832
842write_page: 833 /* write target block */
843 f2fs_wait_on_page_writeback(fio.encrypted_page, DATA, true, true); 834 f2fs_wait_on_page_writeback(fio.encrypted_page, DATA, true, true);
835 memcpy(page_address(fio.encrypted_page),
836 page_address(mpage), PAGE_SIZE);
837 f2fs_put_page(mpage, 1);
838 invalidate_mapping_pages(META_MAPPING(fio.sbi),
839 fio.old_blkaddr, fio.old_blkaddr);
840
844 set_page_dirty(fio.encrypted_page); 841 set_page_dirty(fio.encrypted_page);
845 if (clear_page_dirty_for_io(fio.encrypted_page)) 842 if (clear_page_dirty_for_io(fio.encrypted_page))
846 dec_page_count(fio.sbi, F2FS_DIRTY_META); 843 dec_page_count(fio.sbi, F2FS_DIRTY_META);
@@ -871,11 +868,12 @@ write_page:
871put_page_out: 868put_page_out:
872 f2fs_put_page(fio.encrypted_page, 1); 869 f2fs_put_page(fio.encrypted_page, 1);
873recover_block: 870recover_block:
874 if (lfs_mode)
875 up_write(&fio.sbi->io_order_lock);
876 if (err) 871 if (err)
877 f2fs_do_replace_block(fio.sbi, &sum, newaddr, fio.old_blkaddr, 872 f2fs_do_replace_block(fio.sbi, &sum, newaddr, fio.old_blkaddr,
878 true, true); 873 true, true);
874up_out:
875 if (lfs_mode)
876 up_write(&fio.sbi->io_order_lock);
879put_out: 877put_out:
880 f2fs_put_dnode(&dn); 878 f2fs_put_dnode(&dn);
881out: 879out:
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 6de6cda44031..78a1b873e48a 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -2422,6 +2422,12 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
2422 size_t crc_offset = 0; 2422 size_t crc_offset = 0;
2423 __u32 crc = 0; 2423 __u32 crc = 0;
2424 2424
2425 if (le32_to_cpu(raw_super->magic) != F2FS_SUPER_MAGIC) {
2426 f2fs_info(sbi, "Magic Mismatch, valid(0x%x) - read(0x%x)",
2427 F2FS_SUPER_MAGIC, le32_to_cpu(raw_super->magic));
2428 return -EINVAL;
2429 }
2430
2425 /* Check checksum_offset and crc in superblock */ 2431 /* Check checksum_offset and crc in superblock */
2426 if (__F2FS_HAS_FEATURE(raw_super, F2FS_FEATURE_SB_CHKSUM)) { 2432 if (__F2FS_HAS_FEATURE(raw_super, F2FS_FEATURE_SB_CHKSUM)) {
2427 crc_offset = le32_to_cpu(raw_super->checksum_offset); 2433 crc_offset = le32_to_cpu(raw_super->checksum_offset);
@@ -2429,26 +2435,20 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
2429 offsetof(struct f2fs_super_block, crc)) { 2435 offsetof(struct f2fs_super_block, crc)) {
2430 f2fs_info(sbi, "Invalid SB checksum offset: %zu", 2436 f2fs_info(sbi, "Invalid SB checksum offset: %zu",
2431 crc_offset); 2437 crc_offset);
2432 return 1; 2438 return -EFSCORRUPTED;
2433 } 2439 }
2434 crc = le32_to_cpu(raw_super->crc); 2440 crc = le32_to_cpu(raw_super->crc);
2435 if (!f2fs_crc_valid(sbi, crc, raw_super, crc_offset)) { 2441 if (!f2fs_crc_valid(sbi, crc, raw_super, crc_offset)) {
2436 f2fs_info(sbi, "Invalid SB checksum value: %u", crc); 2442 f2fs_info(sbi, "Invalid SB checksum value: %u", crc);
2437 return 1; 2443 return -EFSCORRUPTED;
2438 } 2444 }
2439 } 2445 }
2440 2446
2441 if (F2FS_SUPER_MAGIC != le32_to_cpu(raw_super->magic)) {
2442 f2fs_info(sbi, "Magic Mismatch, valid(0x%x) - read(0x%x)",
2443 F2FS_SUPER_MAGIC, le32_to_cpu(raw_super->magic));
2444 return 1;
2445 }
2446
2447 /* Currently, support only 4KB page cache size */ 2447 /* Currently, support only 4KB page cache size */
2448 if (F2FS_BLKSIZE != PAGE_SIZE) { 2448 if (F2FS_BLKSIZE != PAGE_SIZE) {
2449 f2fs_info(sbi, "Invalid page_cache_size (%lu), supports only 4KB", 2449 f2fs_info(sbi, "Invalid page_cache_size (%lu), supports only 4KB",
2450 PAGE_SIZE); 2450 PAGE_SIZE);
2451 return 1; 2451 return -EFSCORRUPTED;
2452 } 2452 }
2453 2453
2454 /* Currently, support only 4KB block size */ 2454 /* Currently, support only 4KB block size */
@@ -2456,14 +2456,14 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
2456 if (blocksize != F2FS_BLKSIZE) { 2456 if (blocksize != F2FS_BLKSIZE) {
2457 f2fs_info(sbi, "Invalid blocksize (%u), supports only 4KB", 2457 f2fs_info(sbi, "Invalid blocksize (%u), supports only 4KB",
2458 blocksize); 2458 blocksize);
2459 return 1; 2459 return -EFSCORRUPTED;
2460 } 2460 }
2461 2461
2462 /* check log blocks per segment */ 2462 /* check log blocks per segment */
2463 if (le32_to_cpu(raw_super->log_blocks_per_seg) != 9) { 2463 if (le32_to_cpu(raw_super->log_blocks_per_seg) != 9) {
2464 f2fs_info(sbi, "Invalid log blocks per segment (%u)", 2464 f2fs_info(sbi, "Invalid log blocks per segment (%u)",
2465 le32_to_cpu(raw_super->log_blocks_per_seg)); 2465 le32_to_cpu(raw_super->log_blocks_per_seg));
2466 return 1; 2466 return -EFSCORRUPTED;
2467 } 2467 }
2468 2468
2469 /* Currently, support 512/1024/2048/4096 bytes sector size */ 2469 /* Currently, support 512/1024/2048/4096 bytes sector size */
@@ -2473,7 +2473,7 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
2473 F2FS_MIN_LOG_SECTOR_SIZE) { 2473 F2FS_MIN_LOG_SECTOR_SIZE) {
2474 f2fs_info(sbi, "Invalid log sectorsize (%u)", 2474 f2fs_info(sbi, "Invalid log sectorsize (%u)",
2475 le32_to_cpu(raw_super->log_sectorsize)); 2475 le32_to_cpu(raw_super->log_sectorsize));
2476 return 1; 2476 return -EFSCORRUPTED;
2477 } 2477 }
2478 if (le32_to_cpu(raw_super->log_sectors_per_block) + 2478 if (le32_to_cpu(raw_super->log_sectors_per_block) +
2479 le32_to_cpu(raw_super->log_sectorsize) != 2479 le32_to_cpu(raw_super->log_sectorsize) !=
@@ -2481,7 +2481,7 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
2481 f2fs_info(sbi, "Invalid log sectors per block(%u) log sectorsize(%u)", 2481 f2fs_info(sbi, "Invalid log sectors per block(%u) log sectorsize(%u)",
2482 le32_to_cpu(raw_super->log_sectors_per_block), 2482 le32_to_cpu(raw_super->log_sectors_per_block),
2483 le32_to_cpu(raw_super->log_sectorsize)); 2483 le32_to_cpu(raw_super->log_sectorsize));
2484 return 1; 2484 return -EFSCORRUPTED;
2485 } 2485 }
2486 2486
2487 segment_count = le32_to_cpu(raw_super->segment_count); 2487 segment_count = le32_to_cpu(raw_super->segment_count);
@@ -2495,7 +2495,7 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
2495 if (segment_count > F2FS_MAX_SEGMENT || 2495 if (segment_count > F2FS_MAX_SEGMENT ||
2496 segment_count < F2FS_MIN_SEGMENTS) { 2496 segment_count < F2FS_MIN_SEGMENTS) {
2497 f2fs_info(sbi, "Invalid segment count (%u)", segment_count); 2497 f2fs_info(sbi, "Invalid segment count (%u)", segment_count);
2498 return 1; 2498 return -EFSCORRUPTED;
2499 } 2499 }
2500 2500
2501 if (total_sections > segment_count || 2501 if (total_sections > segment_count ||
@@ -2503,25 +2503,25 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
2503 segs_per_sec > segment_count || !segs_per_sec) { 2503 segs_per_sec > segment_count || !segs_per_sec) {
2504 f2fs_info(sbi, "Invalid segment/section count (%u, %u x %u)", 2504 f2fs_info(sbi, "Invalid segment/section count (%u, %u x %u)",
2505 segment_count, total_sections, segs_per_sec); 2505 segment_count, total_sections, segs_per_sec);
2506 return 1; 2506 return -EFSCORRUPTED;
2507 } 2507 }
2508 2508
2509 if ((segment_count / segs_per_sec) < total_sections) { 2509 if ((segment_count / segs_per_sec) < total_sections) {
2510 f2fs_info(sbi, "Small segment_count (%u < %u * %u)", 2510 f2fs_info(sbi, "Small segment_count (%u < %u * %u)",
2511 segment_count, segs_per_sec, total_sections); 2511 segment_count, segs_per_sec, total_sections);
2512 return 1; 2512 return -EFSCORRUPTED;
2513 } 2513 }
2514 2514
2515 if (segment_count > (le64_to_cpu(raw_super->block_count) >> 9)) { 2515 if (segment_count > (le64_to_cpu(raw_super->block_count) >> 9)) {
2516 f2fs_info(sbi, "Wrong segment_count / block_count (%u > %llu)", 2516 f2fs_info(sbi, "Wrong segment_count / block_count (%u > %llu)",
2517 segment_count, le64_to_cpu(raw_super->block_count)); 2517 segment_count, le64_to_cpu(raw_super->block_count));
2518 return 1; 2518 return -EFSCORRUPTED;
2519 } 2519 }
2520 2520
2521 if (secs_per_zone > total_sections || !secs_per_zone) { 2521 if (secs_per_zone > total_sections || !secs_per_zone) {
2522 f2fs_info(sbi, "Wrong secs_per_zone / total_sections (%u, %u)", 2522 f2fs_info(sbi, "Wrong secs_per_zone / total_sections (%u, %u)",
2523 secs_per_zone, total_sections); 2523 secs_per_zone, total_sections);
2524 return 1; 2524 return -EFSCORRUPTED;
2525 } 2525 }
2526 if (le32_to_cpu(raw_super->extension_count) > F2FS_MAX_EXTENSION || 2526 if (le32_to_cpu(raw_super->extension_count) > F2FS_MAX_EXTENSION ||
2527 raw_super->hot_ext_count > F2FS_MAX_EXTENSION || 2527 raw_super->hot_ext_count > F2FS_MAX_EXTENSION ||
@@ -2531,7 +2531,7 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
2531 le32_to_cpu(raw_super->extension_count), 2531 le32_to_cpu(raw_super->extension_count),
2532 raw_super->hot_ext_count, 2532 raw_super->hot_ext_count,
2533 F2FS_MAX_EXTENSION); 2533 F2FS_MAX_EXTENSION);
2534 return 1; 2534 return -EFSCORRUPTED;
2535 } 2535 }
2536 2536
2537 if (le32_to_cpu(raw_super->cp_payload) > 2537 if (le32_to_cpu(raw_super->cp_payload) >
@@ -2539,7 +2539,7 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
2539 f2fs_info(sbi, "Insane cp_payload (%u > %u)", 2539 f2fs_info(sbi, "Insane cp_payload (%u > %u)",
2540 le32_to_cpu(raw_super->cp_payload), 2540 le32_to_cpu(raw_super->cp_payload),
2541 blocks_per_seg - F2FS_CP_PACKS); 2541 blocks_per_seg - F2FS_CP_PACKS);
2542 return 1; 2542 return -EFSCORRUPTED;
2543 } 2543 }
2544 2544
2545 /* check reserved ino info */ 2545 /* check reserved ino info */
@@ -2550,12 +2550,12 @@ static int sanity_check_raw_super(struct f2fs_sb_info *sbi,
2550 le32_to_cpu(raw_super->node_ino), 2550 le32_to_cpu(raw_super->node_ino),
2551 le32_to_cpu(raw_super->meta_ino), 2551 le32_to_cpu(raw_super->meta_ino),
2552 le32_to_cpu(raw_super->root_ino)); 2552 le32_to_cpu(raw_super->root_ino));
2553 return 1; 2553 return -EFSCORRUPTED;
2554 } 2554 }
2555 2555
2556 /* check CP/SIT/NAT/SSA/MAIN_AREA area boundary */ 2556 /* check CP/SIT/NAT/SSA/MAIN_AREA area boundary */
2557 if (sanity_check_area_boundary(sbi, bh)) 2557 if (sanity_check_area_boundary(sbi, bh))
2558 return 1; 2558 return -EFSCORRUPTED;
2559 2559
2560 return 0; 2560 return 0;
2561} 2561}
@@ -2870,10 +2870,10 @@ static int read_raw_super_block(struct f2fs_sb_info *sbi,
2870 } 2870 }
2871 2871
2872 /* sanity checking of raw super */ 2872 /* sanity checking of raw super */
2873 if (sanity_check_raw_super(sbi, bh)) { 2873 err = sanity_check_raw_super(sbi, bh);
2874 if (err) {
2874 f2fs_err(sbi, "Can't find valid F2FS filesystem in %dth superblock", 2875 f2fs_err(sbi, "Can't find valid F2FS filesystem in %dth superblock",
2875 block + 1); 2876 block + 1);
2876 err = -EFSCORRUPTED;
2877 brelse(bh); 2877 brelse(bh);
2878 continue; 2878 continue;
2879 } 2879 }
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 79581b9bdebb..4f8b5fd6c81f 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -390,6 +390,19 @@ static int fillup_metapath(struct gfs2_inode *ip, struct metapath *mp, int h)
390 return mp->mp_aheight - x - 1; 390 return mp->mp_aheight - x - 1;
391} 391}
392 392
393static sector_t metapath_to_block(struct gfs2_sbd *sdp, struct metapath *mp)
394{
395 sector_t factor = 1, block = 0;
396 int hgt;
397
398 for (hgt = mp->mp_fheight - 1; hgt >= 0; hgt--) {
399 if (hgt < mp->mp_aheight)
400 block += mp->mp_list[hgt] * factor;
401 factor *= sdp->sd_inptrs;
402 }
403 return block;
404}
405
393static void release_metapath(struct metapath *mp) 406static void release_metapath(struct metapath *mp)
394{ 407{
395 int i; 408 int i;
@@ -430,60 +443,84 @@ static inline unsigned int gfs2_extent_length(struct buffer_head *bh, __be64 *pt
430 return ptr - first; 443 return ptr - first;
431} 444}
432 445
433typedef const __be64 *(*gfs2_metadata_walker)( 446enum walker_status { WALK_STOP, WALK_FOLLOW, WALK_CONTINUE };
434 struct metapath *mp, 447
435 const __be64 *start, const __be64 *end, 448/*
436 u64 factor, void *data); 449 * gfs2_metadata_walker - walk an indirect block
450 * @mp: Metapath to indirect block
451 * @ptrs: Number of pointers to look at
452 *
453 * When returning WALK_FOLLOW, the walker must update @mp to point at the right
454 * indirect block to follow.
455 */
456typedef enum walker_status (*gfs2_metadata_walker)(struct metapath *mp,
457 unsigned int ptrs);
437 458
438#define WALK_STOP ((__be64 *)0) 459/*
439#define WALK_NEXT ((__be64 *)1) 460 * gfs2_walk_metadata - walk a tree of indirect blocks
461 * @inode: The inode
462 * @mp: Starting point of walk
463 * @max_len: Maximum number of blocks to walk
464 * @walker: Called during the walk
465 *
466 * Returns 1 if the walk was stopped by @walker, 0 if we went past @max_len or
467 * past the end of metadata, and a negative error code otherwise.
468 */
440 469
441static int gfs2_walk_metadata(struct inode *inode, sector_t lblock, 470static int gfs2_walk_metadata(struct inode *inode, struct metapath *mp,
442 u64 len, struct metapath *mp, gfs2_metadata_walker walker, 471 u64 max_len, gfs2_metadata_walker walker)
443 void *data)
444{ 472{
445 struct metapath clone;
446 struct gfs2_inode *ip = GFS2_I(inode); 473 struct gfs2_inode *ip = GFS2_I(inode);
447 struct gfs2_sbd *sdp = GFS2_SB(inode); 474 struct gfs2_sbd *sdp = GFS2_SB(inode);
448 const __be64 *start, *end, *ptr;
449 u64 factor = 1; 475 u64 factor = 1;
450 unsigned int hgt; 476 unsigned int hgt;
451 int ret = 0; 477 int ret;
452 478
453 for (hgt = ip->i_height - 1; hgt >= mp->mp_aheight; hgt--) 479 /*
480 * The walk starts in the lowest allocated indirect block, which may be
481 * before the position indicated by @mp. Adjust @max_len accordingly
482 * to avoid a short walk.
483 */
484 for (hgt = mp->mp_fheight - 1; hgt >= mp->mp_aheight; hgt--) {
485 max_len += mp->mp_list[hgt] * factor;
486 mp->mp_list[hgt] = 0;
454 factor *= sdp->sd_inptrs; 487 factor *= sdp->sd_inptrs;
488 }
455 489
456 for (;;) { 490 for (;;) {
457 u64 step; 491 u16 start = mp->mp_list[hgt];
492 enum walker_status status;
493 unsigned int ptrs;
494 u64 len;
458 495
459 /* Walk indirect block. */ 496 /* Walk indirect block. */
460 start = metapointer(hgt, mp); 497 ptrs = (hgt >= 1 ? sdp->sd_inptrs : sdp->sd_diptrs) - start;
461 end = metaend(hgt, mp); 498 len = ptrs * factor;
462 499 if (len > max_len)
463 step = (end - start) * factor; 500 ptrs = DIV_ROUND_UP_ULL(max_len, factor);
464 if (step > len) 501 status = walker(mp, ptrs);
465 end = start + DIV_ROUND_UP_ULL(len, factor); 502 switch (status) {
466 503 case WALK_STOP:
467 ptr = walker(mp, start, end, factor, data); 504 return 1;
468 if (ptr == WALK_STOP) 505 case WALK_FOLLOW:
506 BUG_ON(mp->mp_aheight == mp->mp_fheight);
507 ptrs = mp->mp_list[hgt] - start;
508 len = ptrs * factor;
469 break; 509 break;
470 if (step >= len) 510 case WALK_CONTINUE:
471 break; 511 break;
472 len -= step;
473 if (ptr != WALK_NEXT) {
474 BUG_ON(!*ptr);
475 mp->mp_list[hgt] += ptr - start;
476 goto fill_up_metapath;
477 } 512 }
513 if (len >= max_len)
514 break;
515 max_len -= len;
516 if (status == WALK_FOLLOW)
517 goto fill_up_metapath;
478 518
479lower_metapath: 519lower_metapath:
480 /* Decrease height of metapath. */ 520 /* Decrease height of metapath. */
481 if (mp != &clone) {
482 clone_metapath(&clone, mp);
483 mp = &clone;
484 }
485 brelse(mp->mp_bh[hgt]); 521 brelse(mp->mp_bh[hgt]);
486 mp->mp_bh[hgt] = NULL; 522 mp->mp_bh[hgt] = NULL;
523 mp->mp_list[hgt] = 0;
487 if (!hgt) 524 if (!hgt)
488 break; 525 break;
489 hgt--; 526 hgt--;
@@ -491,10 +528,7 @@ lower_metapath:
491 528
492 /* Advance in metadata tree. */ 529 /* Advance in metadata tree. */
493 (mp->mp_list[hgt])++; 530 (mp->mp_list[hgt])++;
494 start = metapointer(hgt, mp); 531 if (mp->mp_list[hgt] >= sdp->sd_inptrs) {
495 end = metaend(hgt, mp);
496 if (start >= end) {
497 mp->mp_list[hgt] = 0;
498 if (!hgt) 532 if (!hgt)
499 break; 533 break;
500 goto lower_metapath; 534 goto lower_metapath;
@@ -502,44 +536,36 @@ lower_metapath:
502 536
503fill_up_metapath: 537fill_up_metapath:
504 /* Increase height of metapath. */ 538 /* Increase height of metapath. */
505 if (mp != &clone) {
506 clone_metapath(&clone, mp);
507 mp = &clone;
508 }
509 ret = fillup_metapath(ip, mp, ip->i_height - 1); 539 ret = fillup_metapath(ip, mp, ip->i_height - 1);
510 if (ret < 0) 540 if (ret < 0)
511 break; 541 return ret;
512 hgt += ret; 542 hgt += ret;
513 for (; ret; ret--) 543 for (; ret; ret--)
514 do_div(factor, sdp->sd_inptrs); 544 do_div(factor, sdp->sd_inptrs);
515 mp->mp_aheight = hgt + 1; 545 mp->mp_aheight = hgt + 1;
516 } 546 }
517 if (mp == &clone) 547 return 0;
518 release_metapath(mp);
519 return ret;
520} 548}
521 549
522struct gfs2_hole_walker_args { 550static enum walker_status gfs2_hole_walker(struct metapath *mp,
523 u64 blocks; 551 unsigned int ptrs)
524};
525
526static const __be64 *gfs2_hole_walker(struct metapath *mp,
527 const __be64 *start, const __be64 *end,
528 u64 factor, void *data)
529{ 552{
530 struct gfs2_hole_walker_args *args = data; 553 const __be64 *start, *ptr, *end;
531 const __be64 *ptr; 554 unsigned int hgt;
555
556 hgt = mp->mp_aheight - 1;
557 start = metapointer(hgt, mp);
558 end = start + ptrs;
532 559
533 for (ptr = start; ptr < end; ptr++) { 560 for (ptr = start; ptr < end; ptr++) {
534 if (*ptr) { 561 if (*ptr) {
535 args->blocks += (ptr - start) * factor; 562 mp->mp_list[hgt] += ptr - start;
536 if (mp->mp_aheight == mp->mp_fheight) 563 if (mp->mp_aheight == mp->mp_fheight)
537 return WALK_STOP; 564 return WALK_STOP;
538 return ptr; /* increase height */ 565 return WALK_FOLLOW;
539 } 566 }
540 } 567 }
541 args->blocks += (end - start) * factor; 568 return WALK_CONTINUE;
542 return WALK_NEXT;
543} 569}
544 570
545/** 571/**
@@ -557,12 +583,24 @@ static const __be64 *gfs2_hole_walker(struct metapath *mp,
557static int gfs2_hole_size(struct inode *inode, sector_t lblock, u64 len, 583static int gfs2_hole_size(struct inode *inode, sector_t lblock, u64 len,
558 struct metapath *mp, struct iomap *iomap) 584 struct metapath *mp, struct iomap *iomap)
559{ 585{
560 struct gfs2_hole_walker_args args = { }; 586 struct metapath clone;
561 int ret = 0; 587 u64 hole_size;
588 int ret;
562 589
563 ret = gfs2_walk_metadata(inode, lblock, len, mp, gfs2_hole_walker, &args); 590 clone_metapath(&clone, mp);
564 if (!ret) 591 ret = gfs2_walk_metadata(inode, &clone, len, gfs2_hole_walker);
565 iomap->length = args.blocks << inode->i_blkbits; 592 if (ret < 0)
593 goto out;
594
595 if (ret == 1)
596 hole_size = metapath_to_block(GFS2_SB(inode), &clone) - lblock;
597 else
598 hole_size = len;
599 iomap->length = hole_size << inode->i_blkbits;
600 ret = 0;
601
602out:
603 release_metapath(&clone);
566 return ret; 604 return ret;
567} 605}
568 606
@@ -1002,11 +1040,16 @@ static void gfs2_iomap_page_done(struct inode *inode, loff_t pos,
1002 unsigned copied, struct page *page, 1040 unsigned copied, struct page *page,
1003 struct iomap *iomap) 1041 struct iomap *iomap)
1004{ 1042{
1043 struct gfs2_trans *tr = current->journal_info;
1005 struct gfs2_inode *ip = GFS2_I(inode); 1044 struct gfs2_inode *ip = GFS2_I(inode);
1006 struct gfs2_sbd *sdp = GFS2_SB(inode); 1045 struct gfs2_sbd *sdp = GFS2_SB(inode);
1007 1046
1008 if (page && !gfs2_is_stuffed(ip)) 1047 if (page && !gfs2_is_stuffed(ip))
1009 gfs2_page_add_databufs(ip, page, offset_in_page(pos), copied); 1048 gfs2_page_add_databufs(ip, page, offset_in_page(pos), copied);
1049
1050 if (tr->tr_num_buf_new)
1051 __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
1052
1010 gfs2_trans_end(sdp); 1053 gfs2_trans_end(sdp);
1011} 1054}
1012 1055
@@ -1099,8 +1142,6 @@ static int gfs2_iomap_begin_write(struct inode *inode, loff_t pos,
1099 tr = current->journal_info; 1142 tr = current->journal_info;
1100 if (tr->tr_num_buf_new) 1143 if (tr->tr_num_buf_new)
1101 __mark_inode_dirty(inode, I_DIRTY_DATASYNC); 1144 __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
1102 else
1103 gfs2_trans_add_meta(ip->i_gl, mp->mp_bh[0]);
1104 1145
1105 gfs2_trans_end(sdp); 1146 gfs2_trans_end(sdp);
1106 } 1147 }
@@ -1181,10 +1222,16 @@ static int gfs2_iomap_end(struct inode *inode, loff_t pos, loff_t length,
1181 1222
1182 if (ip->i_qadata && ip->i_qadata->qa_qd_num) 1223 if (ip->i_qadata && ip->i_qadata->qa_qd_num)
1183 gfs2_quota_unlock(ip); 1224 gfs2_quota_unlock(ip);
1225
1226 if (unlikely(!written))
1227 goto out_unlock;
1228
1184 if (iomap->flags & IOMAP_F_SIZE_CHANGED) 1229 if (iomap->flags & IOMAP_F_SIZE_CHANGED)
1185 mark_inode_dirty(inode); 1230 mark_inode_dirty(inode);
1186 gfs2_write_unlock(inode); 1231 set_bit(GLF_DIRTY, &ip->i_gl->gl_flags);
1187 1232
1233out_unlock:
1234 gfs2_write_unlock(inode);
1188out: 1235out:
1189 return 0; 1236 return 0;
1190} 1237}
diff --git a/fs/io_uring.c b/fs/io_uring.c
index e2a66e12fbc6..24bbe3cb7ad4 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -202,7 +202,7 @@ struct async_list {
202 202
203 struct file *file; 203 struct file *file;
204 off_t io_end; 204 off_t io_end;
205 size_t io_pages; 205 size_t io_len;
206}; 206};
207 207
208struct io_ring_ctx { 208struct io_ring_ctx {
@@ -333,7 +333,8 @@ struct io_kiocb {
333#define REQ_F_IO_DRAIN 16 /* drain existing IO first */ 333#define REQ_F_IO_DRAIN 16 /* drain existing IO first */
334#define REQ_F_IO_DRAINED 32 /* drain done */ 334#define REQ_F_IO_DRAINED 32 /* drain done */
335#define REQ_F_LINK 64 /* linked sqes */ 335#define REQ_F_LINK 64 /* linked sqes */
336#define REQ_F_FAIL_LINK 128 /* fail rest of links */ 336#define REQ_F_LINK_DONE 128 /* linked sqes done */
337#define REQ_F_FAIL_LINK 256 /* fail rest of links */
337 u64 user_data; 338 u64 user_data;
338 u32 result; 339 u32 result;
339 u32 sequence; 340 u32 sequence;
@@ -429,7 +430,7 @@ static inline bool io_sequence_defer(struct io_ring_ctx *ctx,
429 if ((req->flags & (REQ_F_IO_DRAIN|REQ_F_IO_DRAINED)) != REQ_F_IO_DRAIN) 430 if ((req->flags & (REQ_F_IO_DRAIN|REQ_F_IO_DRAINED)) != REQ_F_IO_DRAIN)
430 return false; 431 return false;
431 432
432 return req->sequence > ctx->cached_cq_tail + ctx->sq_ring->dropped; 433 return req->sequence != ctx->cached_cq_tail + ctx->sq_ring->dropped;
433} 434}
434 435
435static struct io_kiocb *io_get_deferred_req(struct io_ring_ctx *ctx) 436static struct io_kiocb *io_get_deferred_req(struct io_ring_ctx *ctx)
@@ -632,6 +633,7 @@ static void io_req_link_next(struct io_kiocb *req)
632 nxt->flags |= REQ_F_LINK; 633 nxt->flags |= REQ_F_LINK;
633 } 634 }
634 635
636 nxt->flags |= REQ_F_LINK_DONE;
635 INIT_WORK(&nxt->work, io_sq_wq_submit_work); 637 INIT_WORK(&nxt->work, io_sq_wq_submit_work);
636 queue_work(req->ctx->sqo_wq, &nxt->work); 638 queue_work(req->ctx->sqo_wq, &nxt->work);
637 } 639 }
@@ -1064,8 +1066,42 @@ static int io_import_fixed(struct io_ring_ctx *ctx, int rw,
1064 */ 1066 */
1065 offset = buf_addr - imu->ubuf; 1067 offset = buf_addr - imu->ubuf;
1066 iov_iter_bvec(iter, rw, imu->bvec, imu->nr_bvecs, offset + len); 1068 iov_iter_bvec(iter, rw, imu->bvec, imu->nr_bvecs, offset + len);
1067 if (offset) 1069
1068 iov_iter_advance(iter, offset); 1070 if (offset) {
1071 /*
1072 * Don't use iov_iter_advance() here, as it's really slow for
1073 * using the latter parts of a big fixed buffer - it iterates
1074 * over each segment manually. We can cheat a bit here, because
1075 * we know that:
1076 *
1077 * 1) it's a BVEC iter, we set it up
1078 * 2) all bvecs are PAGE_SIZE in size, except potentially the
1079 * first and last bvec
1080 *
1081 * So just find our index, and adjust the iterator afterwards.
1082 * If the offset is within the first bvec (or the whole first
1083 * bvec, just use iov_iter_advance(). This makes it easier
1084 * since we can just skip the first segment, which may not
1085 * be PAGE_SIZE aligned.
1086 */
1087 const struct bio_vec *bvec = imu->bvec;
1088
1089 if (offset <= bvec->bv_len) {
1090 iov_iter_advance(iter, offset);
1091 } else {
1092 unsigned long seg_skip;
1093
1094 /* skip first vec */
1095 offset -= bvec->bv_len;
1096 seg_skip = 1 + (offset >> PAGE_SHIFT);
1097
1098 iter->bvec = bvec + seg_skip;
1099 iter->nr_segs -= seg_skip;
1100 iter->count -= bvec->bv_len + offset;
1101 iter->iov_offset = offset & ~PAGE_MASK;
1102 }
1103 }
1104
1069 return 0; 1105 return 0;
1070} 1106}
1071 1107
@@ -1120,28 +1156,26 @@ static void io_async_list_note(int rw, struct io_kiocb *req, size_t len)
1120 off_t io_end = kiocb->ki_pos + len; 1156 off_t io_end = kiocb->ki_pos + len;
1121 1157
1122 if (filp == async_list->file && kiocb->ki_pos == async_list->io_end) { 1158 if (filp == async_list->file && kiocb->ki_pos == async_list->io_end) {
1123 unsigned long max_pages; 1159 unsigned long max_bytes;
1124 1160
1125 /* Use 8x RA size as a decent limiter for both reads/writes */ 1161 /* Use 8x RA size as a decent limiter for both reads/writes */
1126 max_pages = filp->f_ra.ra_pages; 1162 max_bytes = filp->f_ra.ra_pages << (PAGE_SHIFT + 3);
1127 if (!max_pages) 1163 if (!max_bytes)
1128 max_pages = VM_READAHEAD_PAGES; 1164 max_bytes = VM_READAHEAD_PAGES << (PAGE_SHIFT + 3);
1129 max_pages *= 8; 1165
1130 1166 /* If max len are exceeded, reset the state */
1131 /* If max pages are exceeded, reset the state */ 1167 if (async_list->io_len + len <= max_bytes) {
1132 len >>= PAGE_SHIFT;
1133 if (async_list->io_pages + len <= max_pages) {
1134 req->flags |= REQ_F_SEQ_PREV; 1168 req->flags |= REQ_F_SEQ_PREV;
1135 async_list->io_pages += len; 1169 async_list->io_len += len;
1136 } else { 1170 } else {
1137 io_end = 0; 1171 io_end = 0;
1138 async_list->io_pages = 0; 1172 async_list->io_len = 0;
1139 } 1173 }
1140 } 1174 }
1141 1175
1142 /* New file? Reset state. */ 1176 /* New file? Reset state. */
1143 if (async_list->file != filp) { 1177 if (async_list->file != filp) {
1144 async_list->io_pages = 0; 1178 async_list->io_len = 0;
1145 async_list->file = filp; 1179 async_list->file = filp;
1146 } 1180 }
1147 async_list->io_end = io_end; 1181 async_list->io_end = io_end;
@@ -1630,6 +1664,8 @@ static int io_poll_add(struct io_kiocb *req, const struct io_uring_sqe *sqe)
1630 INIT_LIST_HEAD(&poll->wait.entry); 1664 INIT_LIST_HEAD(&poll->wait.entry);
1631 init_waitqueue_func_entry(&poll->wait, io_poll_wake); 1665 init_waitqueue_func_entry(&poll->wait, io_poll_wake);
1632 1666
1667 INIT_LIST_HEAD(&req->list);
1668
1633 mask = vfs_poll(poll->file, &ipt.pt) & poll->events; 1669 mask = vfs_poll(poll->file, &ipt.pt) & poll->events;
1634 1670
1635 spin_lock_irq(&ctx->completion_lock); 1671 spin_lock_irq(&ctx->completion_lock);
@@ -1800,6 +1836,7 @@ restart:
1800 do { 1836 do {
1801 struct sqe_submit *s = &req->submit; 1837 struct sqe_submit *s = &req->submit;
1802 const struct io_uring_sqe *sqe = s->sqe; 1838 const struct io_uring_sqe *sqe = s->sqe;
1839 unsigned int flags = req->flags;
1803 1840
1804 /* Ensure we clear previously set non-block flag */ 1841 /* Ensure we clear previously set non-block flag */
1805 req->rw.ki_flags &= ~IOCB_NOWAIT; 1842 req->rw.ki_flags &= ~IOCB_NOWAIT;
@@ -1844,6 +1881,10 @@ restart:
1844 /* async context always use a copy of the sqe */ 1881 /* async context always use a copy of the sqe */
1845 kfree(sqe); 1882 kfree(sqe);
1846 1883
1884 /* req from defer and link list needn't decrease async cnt */
1885 if (flags & (REQ_F_IO_DRAINED | REQ_F_LINK_DONE))
1886 goto out;
1887
1847 if (!async_list) 1888 if (!async_list)
1848 break; 1889 break;
1849 if (!list_empty(&req_list)) { 1890 if (!list_empty(&req_list)) {
@@ -1891,6 +1932,7 @@ restart:
1891 } 1932 }
1892 } 1933 }
1893 1934
1935out:
1894 if (cur_mm) { 1936 if (cur_mm) {
1895 set_fs(old_fs); 1937 set_fs(old_fs);
1896 unuse_mm(cur_mm); 1938 unuse_mm(cur_mm);
@@ -1917,6 +1959,10 @@ static bool io_add_to_prev_work(struct async_list *list, struct io_kiocb *req)
1917 ret = true; 1959 ret = true;
1918 spin_lock(&list->lock); 1960 spin_lock(&list->lock);
1919 list_add_tail(&req->list, &list->list); 1961 list_add_tail(&req->list, &list->list);
1962 /*
1963 * Ensure we see a simultaneous modification from io_sq_wq_submit_work()
1964 */
1965 smp_mb();
1920 if (!atomic_read(&list->cnt)) { 1966 if (!atomic_read(&list->cnt)) {
1921 list_del_init(&req->list); 1967 list_del_init(&req->list);
1922 ret = false; 1968 ret = false;
@@ -1977,6 +2023,15 @@ static int io_queue_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
1977{ 2023{
1978 int ret; 2024 int ret;
1979 2025
2026 ret = io_req_defer(ctx, req, s->sqe);
2027 if (ret) {
2028 if (ret != -EIOCBQUEUED) {
2029 io_free_req(req);
2030 io_cqring_add_event(ctx, s->sqe->user_data, ret);
2031 }
2032 return 0;
2033 }
2034
1980 ret = __io_submit_sqe(ctx, req, s, true); 2035 ret = __io_submit_sqe(ctx, req, s, true);
1981 if (ret == -EAGAIN && !(req->flags & REQ_F_NOWAIT)) { 2036 if (ret == -EAGAIN && !(req->flags & REQ_F_NOWAIT)) {
1982 struct io_uring_sqe *sqe_copy; 2037 struct io_uring_sqe *sqe_copy;
@@ -2049,13 +2104,6 @@ err:
2049 return; 2104 return;
2050 } 2105 }
2051 2106
2052 ret = io_req_defer(ctx, req, s->sqe);
2053 if (ret) {
2054 if (ret != -EIOCBQUEUED)
2055 goto err_req;
2056 return;
2057 }
2058
2059 /* 2107 /*
2060 * If we already have a head request, queue this one for async 2108 * If we already have a head request, queue this one for async
2061 * submittal once the head completes. If we don't have a head but 2109 * submittal once the head completes. If we don't have a head but
diff --git a/fs/iomap/Makefile b/fs/iomap/Makefile
index 2d165388d952..93cd11938bf5 100644
--- a/fs/iomap/Makefile
+++ b/fs/iomap/Makefile
@@ -1,4 +1,4 @@
1# SPDX-License-Identifier: GPL-2.0-or-newer 1# SPDX-License-Identifier: GPL-2.0-or-later
2# 2#
3# Copyright (c) 2019 Oracle. 3# Copyright (c) 2019 Oracle.
4# All Rights Reserved. 4# All Rights Reserved.
diff --git a/fs/namespace.c b/fs/namespace.c
index 6464ea4acba9..d28d30b13043 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1463,7 +1463,6 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how)
1463 p->mnt.mnt_flags |= MNT_SYNC_UMOUNT; 1463 p->mnt.mnt_flags |= MNT_SYNC_UMOUNT;
1464 1464
1465 disconnect = disconnect_mount(p, how); 1465 disconnect = disconnect_mount(p, how);
1466
1467 if (mnt_has_parent(p)) { 1466 if (mnt_has_parent(p)) {
1468 mnt_add_count(p->mnt_parent, -1); 1467 mnt_add_count(p->mnt_parent, -1);
1469 if (!disconnect) { 1468 if (!disconnect) {
@@ -1471,10 +1470,11 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how)
1471 list_add_tail(&p->mnt_child, &p->mnt_parent->mnt_mounts); 1470 list_add_tail(&p->mnt_child, &p->mnt_parent->mnt_mounts);
1472 } else { 1471 } else {
1473 umount_mnt(p); 1472 umount_mnt(p);
1474 hlist_add_head(&p->mnt_umount, &unmounted);
1475 } 1473 }
1476 } 1474 }
1477 change_mnt_propagation(p, MS_PRIVATE); 1475 change_mnt_propagation(p, MS_PRIVATE);
1476 if (disconnect)
1477 hlist_add_head(&p->mnt_umount, &unmounted);
1478 } 1478 }
1479} 1479}
1480 1480
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 0ff3facf81da..071b90a45933 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -153,7 +153,7 @@ again:
153 /* Block nfs4_proc_unlck */ 153 /* Block nfs4_proc_unlck */
154 mutex_lock(&sp->so_delegreturn_mutex); 154 mutex_lock(&sp->so_delegreturn_mutex);
155 seq = raw_seqcount_begin(&sp->so_reclaim_seqcount); 155 seq = raw_seqcount_begin(&sp->so_reclaim_seqcount);
156 err = nfs4_open_delegation_recall(ctx, state, stateid, type); 156 err = nfs4_open_delegation_recall(ctx, state, stateid);
157 if (!err) 157 if (!err)
158 err = nfs_delegation_claim_locks(state, stateid); 158 err = nfs_delegation_claim_locks(state, stateid);
159 if (!err && read_seqcount_retry(&sp->so_reclaim_seqcount, seq)) 159 if (!err && read_seqcount_retry(&sp->so_reclaim_seqcount, seq))
@@ -1046,6 +1046,22 @@ void nfs_test_expired_all_delegations(struct nfs_client *clp)
1046 nfs4_schedule_state_manager(clp); 1046 nfs4_schedule_state_manager(clp);
1047} 1047}
1048 1048
1049static void
1050nfs_delegation_test_free_expired(struct inode *inode,
1051 nfs4_stateid *stateid,
1052 const struct cred *cred)
1053{
1054 struct nfs_server *server = NFS_SERVER(inode);
1055 const struct nfs4_minor_version_ops *ops = server->nfs_client->cl_mvops;
1056 int status;
1057
1058 if (!cred)
1059 return;
1060 status = ops->test_and_free_expired(server, stateid, cred);
1061 if (status == -NFS4ERR_EXPIRED || status == -NFS4ERR_BAD_STATEID)
1062 nfs_remove_bad_delegation(inode, stateid);
1063}
1064
1049/** 1065/**
1050 * nfs_reap_expired_delegations - reap expired delegations 1066 * nfs_reap_expired_delegations - reap expired delegations
1051 * @clp: nfs_client to process 1067 * @clp: nfs_client to process
@@ -1057,7 +1073,6 @@ void nfs_test_expired_all_delegations(struct nfs_client *clp)
1057 */ 1073 */
1058void nfs_reap_expired_delegations(struct nfs_client *clp) 1074void nfs_reap_expired_delegations(struct nfs_client *clp)
1059{ 1075{
1060 const struct nfs4_minor_version_ops *ops = clp->cl_mvops;
1061 struct nfs_delegation *delegation; 1076 struct nfs_delegation *delegation;
1062 struct nfs_server *server; 1077 struct nfs_server *server;
1063 struct inode *inode; 1078 struct inode *inode;
@@ -1088,11 +1103,7 @@ restart:
1088 nfs4_stateid_copy(&stateid, &delegation->stateid); 1103 nfs4_stateid_copy(&stateid, &delegation->stateid);
1089 clear_bit(NFS_DELEGATION_TEST_EXPIRED, &delegation->flags); 1104 clear_bit(NFS_DELEGATION_TEST_EXPIRED, &delegation->flags);
1090 rcu_read_unlock(); 1105 rcu_read_unlock();
1091 if (cred != NULL && 1106 nfs_delegation_test_free_expired(inode, &stateid, cred);
1092 ops->test_and_free_expired(server, &stateid, cred) < 0) {
1093 nfs_revoke_delegation(inode, &stateid);
1094 nfs_inode_find_state_and_recover(inode, &stateid);
1095 }
1096 put_cred(cred); 1107 put_cred(cred);
1097 if (nfs4_server_rebooted(clp)) { 1108 if (nfs4_server_rebooted(clp)) {
1098 nfs_inode_mark_test_expired_delegation(server,inode); 1109 nfs_inode_mark_test_expired_delegation(server,inode);
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index 5799777df5ec..9eb87ae4c982 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -63,7 +63,7 @@ void nfs_reap_expired_delegations(struct nfs_client *clp);
63 63
64/* NFSv4 delegation-related procedures */ 64/* NFSv4 delegation-related procedures */
65int nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred, const nfs4_stateid *stateid, int issync); 65int nfs4_proc_delegreturn(struct inode *inode, const struct cred *cred, const nfs4_stateid *stateid, int issync);
66int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid, fmode_t type); 66int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state *state, const nfs4_stateid *stateid);
67int nfs4_lock_delegation_recall(struct file_lock *fl, struct nfs4_state *state, const nfs4_stateid *stateid); 67int nfs4_lock_delegation_recall(struct file_lock *fl, struct nfs4_state *state, const nfs4_stateid *stateid);
68bool nfs4_copy_delegation_stateid(struct inode *inode, fmode_t flags, nfs4_stateid *dst, const struct cred **cred); 68bool nfs4_copy_delegation_stateid(struct inode *inode, fmode_t flags, nfs4_stateid *dst, const struct cred **cred);
69bool nfs4_refresh_delegation_stateid(nfs4_stateid *dst, struct inode *inode); 69bool nfs4_refresh_delegation_stateid(nfs4_stateid *dst, struct inode *inode);
diff --git a/fs/nfs/fscache.c b/fs/nfs/fscache.c
index 53507aa96b0b..3800ab6f08fa 100644
--- a/fs/nfs/fscache.c
+++ b/fs/nfs/fscache.c
@@ -114,6 +114,10 @@ void nfs_fscache_get_super_cookie(struct super_block *sb, const char *uniq, int
114 struct rb_node **p, *parent; 114 struct rb_node **p, *parent;
115 int diff; 115 int diff;
116 116
117 nfss->fscache_key = NULL;
118 nfss->fscache = NULL;
119 if (!(nfss->options & NFS_OPTION_FSCACHE))
120 return;
117 if (!uniq) { 121 if (!uniq) {
118 uniq = ""; 122 uniq = "";
119 ulen = 1; 123 ulen = 1;
@@ -226,10 +230,11 @@ void nfs_fscache_release_super_cookie(struct super_block *sb)
226void nfs_fscache_init_inode(struct inode *inode) 230void nfs_fscache_init_inode(struct inode *inode)
227{ 231{
228 struct nfs_fscache_inode_auxdata auxdata; 232 struct nfs_fscache_inode_auxdata auxdata;
233 struct nfs_server *nfss = NFS_SERVER(inode);
229 struct nfs_inode *nfsi = NFS_I(inode); 234 struct nfs_inode *nfsi = NFS_I(inode);
230 235
231 nfsi->fscache = NULL; 236 nfsi->fscache = NULL;
232 if (!S_ISREG(inode->i_mode)) 237 if (!(nfss->fscache && S_ISREG(inode->i_mode)))
233 return; 238 return;
234 239
235 memset(&auxdata, 0, sizeof(auxdata)); 240 memset(&auxdata, 0, sizeof(auxdata));
diff --git a/fs/nfs/fscache.h b/fs/nfs/fscache.h
index 25a75e40d91d..ad041cfbf9ec 100644
--- a/fs/nfs/fscache.h
+++ b/fs/nfs/fscache.h
@@ -182,7 +182,7 @@ static inline void nfs_fscache_wait_on_invalidate(struct inode *inode)
182 */ 182 */
183static inline const char *nfs_server_fscache_state(struct nfs_server *server) 183static inline const char *nfs_server_fscache_state(struct nfs_server *server)
184{ 184{
185 if (server->fscache && (server->options & NFS_OPTION_FSCACHE)) 185 if (server->fscache)
186 return "yes"; 186 return "yes";
187 return "no "; 187 return "no ";
188} 188}
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index d778dad9a75e..3564da1ba8a1 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -465,7 +465,8 @@ static inline void nfs4_schedule_session_recovery(struct nfs4_session *session,
465 465
466extern struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *, const struct cred *, gfp_t); 466extern struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *, const struct cred *, gfp_t);
467extern void nfs4_put_state_owner(struct nfs4_state_owner *); 467extern void nfs4_put_state_owner(struct nfs4_state_owner *);
468extern void nfs4_purge_state_owners(struct nfs_server *); 468extern void nfs4_purge_state_owners(struct nfs_server *, struct list_head *);
469extern void nfs4_free_state_owners(struct list_head *head);
469extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *); 470extern struct nfs4_state * nfs4_get_open_state(struct inode *, struct nfs4_state_owner *);
470extern void nfs4_put_open_state(struct nfs4_state *); 471extern void nfs4_put_open_state(struct nfs4_state *);
471extern void nfs4_close_state(struct nfs4_state *, fmode_t); 472extern void nfs4_close_state(struct nfs4_state *, fmode_t);
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 616393a01c06..da6204025a2d 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -758,9 +758,12 @@ out:
758 758
759static void nfs4_destroy_server(struct nfs_server *server) 759static void nfs4_destroy_server(struct nfs_server *server)
760{ 760{
761 LIST_HEAD(freeme);
762
761 nfs_server_return_all_delegations(server); 763 nfs_server_return_all_delegations(server);
762 unset_pnfs_layoutdriver(server); 764 unset_pnfs_layoutdriver(server);
763 nfs4_purge_state_owners(server); 765 nfs4_purge_state_owners(server, &freeme);
766 nfs4_free_state_owners(&freeme);
764} 767}
765 768
766/* 769/*
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 39896afc6edf..1406858bae6c 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -1683,6 +1683,14 @@ static void nfs_state_set_open_stateid(struct nfs4_state *state,
1683 write_sequnlock(&state->seqlock); 1683 write_sequnlock(&state->seqlock);
1684} 1684}
1685 1685
1686static void nfs_state_clear_open_state_flags(struct nfs4_state *state)
1687{
1688 clear_bit(NFS_O_RDWR_STATE, &state->flags);
1689 clear_bit(NFS_O_WRONLY_STATE, &state->flags);
1690 clear_bit(NFS_O_RDONLY_STATE, &state->flags);
1691 clear_bit(NFS_OPEN_STATE, &state->flags);
1692}
1693
1686static void nfs_state_set_delegation(struct nfs4_state *state, 1694static void nfs_state_set_delegation(struct nfs4_state *state,
1687 const nfs4_stateid *deleg_stateid, 1695 const nfs4_stateid *deleg_stateid,
1688 fmode_t fmode) 1696 fmode_t fmode)
@@ -1907,8 +1915,9 @@ _nfs4_opendata_reclaim_to_nfs4_state(struct nfs4_opendata *data)
1907 if (data->o_res.delegation_type != 0) 1915 if (data->o_res.delegation_type != 0)
1908 nfs4_opendata_check_deleg(data, state); 1916 nfs4_opendata_check_deleg(data, state);
1909update: 1917update:
1910 update_open_stateid(state, &data->o_res.stateid, NULL, 1918 if (!update_open_stateid(state, &data->o_res.stateid,
1911 data->o_arg.fmode); 1919 NULL, data->o_arg.fmode))
1920 return ERR_PTR(-EAGAIN);
1912 refcount_inc(&state->count); 1921 refcount_inc(&state->count);
1913 1922
1914 return state; 1923 return state;
@@ -1973,8 +1982,11 @@ _nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data)
1973 1982
1974 if (data->o_res.delegation_type != 0) 1983 if (data->o_res.delegation_type != 0)
1975 nfs4_opendata_check_deleg(data, state); 1984 nfs4_opendata_check_deleg(data, state);
1976 update_open_stateid(state, &data->o_res.stateid, NULL, 1985 if (!update_open_stateid(state, &data->o_res.stateid,
1977 data->o_arg.fmode); 1986 NULL, data->o_arg.fmode)) {
1987 nfs4_put_open_state(state);
1988 state = ERR_PTR(-EAGAIN);
1989 }
1978out: 1990out:
1979 nfs_release_seqid(data->o_arg.seqid); 1991 nfs_release_seqid(data->o_arg.seqid);
1980 return state; 1992 return state;
@@ -2074,13 +2086,7 @@ static int nfs4_open_recover(struct nfs4_opendata *opendata, struct nfs4_state *
2074{ 2086{
2075 int ret; 2087 int ret;
2076 2088
2077 /* Don't trigger recovery in nfs_test_and_clear_all_open_stateid */
2078 clear_bit(NFS_O_RDWR_STATE, &state->flags);
2079 clear_bit(NFS_O_WRONLY_STATE, &state->flags);
2080 clear_bit(NFS_O_RDONLY_STATE, &state->flags);
2081 /* memory barrier prior to reading state->n_* */ 2089 /* memory barrier prior to reading state->n_* */
2082 clear_bit(NFS_DELEGATED_STATE, &state->flags);
2083 clear_bit(NFS_OPEN_STATE, &state->flags);
2084 smp_rmb(); 2090 smp_rmb();
2085 ret = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE); 2091 ret = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE);
2086 if (ret != 0) 2092 if (ret != 0)
@@ -2156,6 +2162,8 @@ static int nfs4_open_reclaim(struct nfs4_state_owner *sp, struct nfs4_state *sta
2156 ctx = nfs4_state_find_open_context(state); 2162 ctx = nfs4_state_find_open_context(state);
2157 if (IS_ERR(ctx)) 2163 if (IS_ERR(ctx))
2158 return -EAGAIN; 2164 return -EAGAIN;
2165 clear_bit(NFS_DELEGATED_STATE, &state->flags);
2166 nfs_state_clear_open_state_flags(state);
2159 ret = nfs4_do_open_reclaim(ctx, state); 2167 ret = nfs4_do_open_reclaim(ctx, state);
2160 put_nfs_open_context(ctx); 2168 put_nfs_open_context(ctx);
2161 return ret; 2169 return ret;
@@ -2171,18 +2179,17 @@ static int nfs4_handle_delegation_recall_error(struct nfs_server *server, struct
2171 case -ENOENT: 2179 case -ENOENT:
2172 case -EAGAIN: 2180 case -EAGAIN:
2173 case -ESTALE: 2181 case -ESTALE:
2182 case -ETIMEDOUT:
2174 break; 2183 break;
2175 case -NFS4ERR_BADSESSION: 2184 case -NFS4ERR_BADSESSION:
2176 case -NFS4ERR_BADSLOT: 2185 case -NFS4ERR_BADSLOT:
2177 case -NFS4ERR_BAD_HIGH_SLOT: 2186 case -NFS4ERR_BAD_HIGH_SLOT:
2178 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: 2187 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
2179 case -NFS4ERR_DEADSESSION: 2188 case -NFS4ERR_DEADSESSION:
2180 set_bit(NFS_DELEGATED_STATE, &state->flags);
2181 nfs4_schedule_session_recovery(server->nfs_client->cl_session, err); 2189 nfs4_schedule_session_recovery(server->nfs_client->cl_session, err);
2182 return -EAGAIN; 2190 return -EAGAIN;
2183 case -NFS4ERR_STALE_CLIENTID: 2191 case -NFS4ERR_STALE_CLIENTID:
2184 case -NFS4ERR_STALE_STATEID: 2192 case -NFS4ERR_STALE_STATEID:
2185 set_bit(NFS_DELEGATED_STATE, &state->flags);
2186 /* Don't recall a delegation if it was lost */ 2193 /* Don't recall a delegation if it was lost */
2187 nfs4_schedule_lease_recovery(server->nfs_client); 2194 nfs4_schedule_lease_recovery(server->nfs_client);
2188 return -EAGAIN; 2195 return -EAGAIN;
@@ -2203,7 +2210,6 @@ static int nfs4_handle_delegation_recall_error(struct nfs_server *server, struct
2203 return -EAGAIN; 2210 return -EAGAIN;
2204 case -NFS4ERR_DELAY: 2211 case -NFS4ERR_DELAY:
2205 case -NFS4ERR_GRACE: 2212 case -NFS4ERR_GRACE:
2206 set_bit(NFS_DELEGATED_STATE, &state->flags);
2207 ssleep(1); 2213 ssleep(1);
2208 return -EAGAIN; 2214 return -EAGAIN;
2209 case -ENOMEM: 2215 case -ENOMEM:
@@ -2219,8 +2225,7 @@ static int nfs4_handle_delegation_recall_error(struct nfs_server *server, struct
2219} 2225}
2220 2226
2221int nfs4_open_delegation_recall(struct nfs_open_context *ctx, 2227int nfs4_open_delegation_recall(struct nfs_open_context *ctx,
2222 struct nfs4_state *state, const nfs4_stateid *stateid, 2228 struct nfs4_state *state, const nfs4_stateid *stateid)
2223 fmode_t type)
2224{ 2229{
2225 struct nfs_server *server = NFS_SERVER(state->inode); 2230 struct nfs_server *server = NFS_SERVER(state->inode);
2226 struct nfs4_opendata *opendata; 2231 struct nfs4_opendata *opendata;
@@ -2231,20 +2236,23 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx,
2231 if (IS_ERR(opendata)) 2236 if (IS_ERR(opendata))
2232 return PTR_ERR(opendata); 2237 return PTR_ERR(opendata);
2233 nfs4_stateid_copy(&opendata->o_arg.u.delegation, stateid); 2238 nfs4_stateid_copy(&opendata->o_arg.u.delegation, stateid);
2234 nfs_state_clear_delegation(state); 2239 if (!test_bit(NFS_O_RDWR_STATE, &state->flags)) {
2235 switch (type & (FMODE_READ|FMODE_WRITE)) {
2236 case FMODE_READ|FMODE_WRITE:
2237 case FMODE_WRITE:
2238 err = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE); 2240 err = nfs4_open_recover_helper(opendata, FMODE_READ|FMODE_WRITE);
2239 if (err) 2241 if (err)
2240 break; 2242 goto out;
2243 }
2244 if (!test_bit(NFS_O_WRONLY_STATE, &state->flags)) {
2241 err = nfs4_open_recover_helper(opendata, FMODE_WRITE); 2245 err = nfs4_open_recover_helper(opendata, FMODE_WRITE);
2242 if (err) 2246 if (err)
2243 break; 2247 goto out;
2244 /* Fall through */ 2248 }
2245 case FMODE_READ: 2249 if (!test_bit(NFS_O_RDONLY_STATE, &state->flags)) {
2246 err = nfs4_open_recover_helper(opendata, FMODE_READ); 2250 err = nfs4_open_recover_helper(opendata, FMODE_READ);
2251 if (err)
2252 goto out;
2247 } 2253 }
2254 nfs_state_clear_delegation(state);
2255out:
2248 nfs4_opendata_put(opendata); 2256 nfs4_opendata_put(opendata);
2249 return nfs4_handle_delegation_recall_error(server, state, stateid, NULL, err); 2257 return nfs4_handle_delegation_recall_error(server, state, stateid, NULL, err);
2250} 2258}
@@ -2492,6 +2500,7 @@ static int nfs4_run_open_task(struct nfs4_opendata *data,
2492 if (!ctx) { 2500 if (!ctx) {
2493 nfs4_init_sequence(&o_arg->seq_args, &o_res->seq_res, 1, 1); 2501 nfs4_init_sequence(&o_arg->seq_args, &o_res->seq_res, 1, 1);
2494 data->is_recover = true; 2502 data->is_recover = true;
2503 task_setup_data.flags |= RPC_TASK_TIMEOUT;
2495 } else { 2504 } else {
2496 nfs4_init_sequence(&o_arg->seq_args, &o_res->seq_res, 1, 0); 2505 nfs4_init_sequence(&o_arg->seq_args, &o_res->seq_res, 1, 0);
2497 pnfs_lgopen_prepare(data, ctx); 2506 pnfs_lgopen_prepare(data, ctx);
@@ -2698,6 +2707,7 @@ static int nfs40_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *st
2698{ 2707{
2699 /* NFSv4.0 doesn't allow for delegation recovery on open expire */ 2708 /* NFSv4.0 doesn't allow for delegation recovery on open expire */
2700 nfs40_clear_delegation_stateid(state); 2709 nfs40_clear_delegation_stateid(state);
2710 nfs_state_clear_open_state_flags(state);
2701 return nfs4_open_expired(sp, state); 2711 return nfs4_open_expired(sp, state);
2702} 2712}
2703 2713
@@ -2740,13 +2750,13 @@ out_free:
2740 return -NFS4ERR_EXPIRED; 2750 return -NFS4ERR_EXPIRED;
2741} 2751}
2742 2752
2743static void nfs41_check_delegation_stateid(struct nfs4_state *state) 2753static int nfs41_check_delegation_stateid(struct nfs4_state *state)
2744{ 2754{
2745 struct nfs_server *server = NFS_SERVER(state->inode); 2755 struct nfs_server *server = NFS_SERVER(state->inode);
2746 nfs4_stateid stateid; 2756 nfs4_stateid stateid;
2747 struct nfs_delegation *delegation; 2757 struct nfs_delegation *delegation;
2748 const struct cred *cred = NULL; 2758 const struct cred *cred = NULL;
2749 int status; 2759 int status, ret = NFS_OK;
2750 2760
2751 /* Get the delegation credential for use by test/free_stateid */ 2761 /* Get the delegation credential for use by test/free_stateid */
2752 rcu_read_lock(); 2762 rcu_read_lock();
@@ -2754,20 +2764,15 @@ static void nfs41_check_delegation_stateid(struct nfs4_state *state)
2754 if (delegation == NULL) { 2764 if (delegation == NULL) {
2755 rcu_read_unlock(); 2765 rcu_read_unlock();
2756 nfs_state_clear_delegation(state); 2766 nfs_state_clear_delegation(state);
2757 return; 2767 return NFS_OK;
2758 } 2768 }
2759 2769
2760 nfs4_stateid_copy(&stateid, &delegation->stateid); 2770 nfs4_stateid_copy(&stateid, &delegation->stateid);
2761 if (test_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) {
2762 rcu_read_unlock();
2763 nfs_state_clear_delegation(state);
2764 return;
2765 }
2766 2771
2767 if (!test_and_clear_bit(NFS_DELEGATION_TEST_EXPIRED, 2772 if (!test_and_clear_bit(NFS_DELEGATION_TEST_EXPIRED,
2768 &delegation->flags)) { 2773 &delegation->flags)) {
2769 rcu_read_unlock(); 2774 rcu_read_unlock();
2770 return; 2775 return NFS_OK;
2771 } 2776 }
2772 2777
2773 if (delegation->cred) 2778 if (delegation->cred)
@@ -2777,9 +2782,24 @@ static void nfs41_check_delegation_stateid(struct nfs4_state *state)
2777 trace_nfs4_test_delegation_stateid(state, NULL, status); 2782 trace_nfs4_test_delegation_stateid(state, NULL, status);
2778 if (status == -NFS4ERR_EXPIRED || status == -NFS4ERR_BAD_STATEID) 2783 if (status == -NFS4ERR_EXPIRED || status == -NFS4ERR_BAD_STATEID)
2779 nfs_finish_clear_delegation_stateid(state, &stateid); 2784 nfs_finish_clear_delegation_stateid(state, &stateid);
2785 else
2786 ret = status;
2780 2787
2781 if (delegation->cred) 2788 put_cred(cred);
2782 put_cred(cred); 2789 return ret;
2790}
2791
2792static void nfs41_delegation_recover_stateid(struct nfs4_state *state)
2793{
2794 nfs4_stateid tmp;
2795
2796 if (test_bit(NFS_DELEGATED_STATE, &state->flags) &&
2797 nfs4_copy_delegation_stateid(state->inode, state->state,
2798 &tmp, NULL) &&
2799 nfs4_stateid_match_other(&state->stateid, &tmp))
2800 nfs_state_set_delegation(state, &tmp, state->state);
2801 else
2802 nfs_state_clear_delegation(state);
2783} 2803}
2784 2804
2785/** 2805/**
@@ -2849,21 +2869,12 @@ static int nfs41_check_open_stateid(struct nfs4_state *state)
2849 const struct cred *cred = state->owner->so_cred; 2869 const struct cred *cred = state->owner->so_cred;
2850 int status; 2870 int status;
2851 2871
2852 if (test_bit(NFS_OPEN_STATE, &state->flags) == 0) { 2872 if (test_bit(NFS_OPEN_STATE, &state->flags) == 0)
2853 if (test_bit(NFS_DELEGATED_STATE, &state->flags) == 0) {
2854 if (nfs4_have_delegation(state->inode, state->state))
2855 return NFS_OK;
2856 return -NFS4ERR_OPENMODE;
2857 }
2858 return -NFS4ERR_BAD_STATEID; 2873 return -NFS4ERR_BAD_STATEID;
2859 }
2860 status = nfs41_test_and_free_expired_stateid(server, stateid, cred); 2874 status = nfs41_test_and_free_expired_stateid(server, stateid, cred);
2861 trace_nfs4_test_open_stateid(state, NULL, status); 2875 trace_nfs4_test_open_stateid(state, NULL, status);
2862 if (status == -NFS4ERR_EXPIRED || status == -NFS4ERR_BAD_STATEID) { 2876 if (status == -NFS4ERR_EXPIRED || status == -NFS4ERR_BAD_STATEID) {
2863 clear_bit(NFS_O_RDONLY_STATE, &state->flags); 2877 nfs_state_clear_open_state_flags(state);
2864 clear_bit(NFS_O_WRONLY_STATE, &state->flags);
2865 clear_bit(NFS_O_RDWR_STATE, &state->flags);
2866 clear_bit(NFS_OPEN_STATE, &state->flags);
2867 stateid->type = NFS4_INVALID_STATEID_TYPE; 2878 stateid->type = NFS4_INVALID_STATEID_TYPE;
2868 return status; 2879 return status;
2869 } 2880 }
@@ -2876,7 +2887,11 @@ static int nfs41_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *st
2876{ 2887{
2877 int status; 2888 int status;
2878 2889
2879 nfs41_check_delegation_stateid(state); 2890 status = nfs41_check_delegation_stateid(state);
2891 if (status != NFS_OK)
2892 return status;
2893 nfs41_delegation_recover_stateid(state);
2894
2880 status = nfs41_check_expired_locks(state); 2895 status = nfs41_check_expired_locks(state);
2881 if (status != NFS_OK) 2896 if (status != NFS_OK)
2882 return status; 2897 return status;
@@ -3201,7 +3216,7 @@ static int _nfs4_do_setattr(struct inode *inode,
3201 3216
3202 if (nfs4_copy_delegation_stateid(inode, FMODE_WRITE, &arg->stateid, &delegation_cred)) { 3217 if (nfs4_copy_delegation_stateid(inode, FMODE_WRITE, &arg->stateid, &delegation_cred)) {
3203 /* Use that stateid */ 3218 /* Use that stateid */
3204 } else if (ctx != NULL) { 3219 } else if (ctx != NULL && ctx->state) {
3205 struct nfs_lock_context *l_ctx; 3220 struct nfs_lock_context *l_ctx;
3206 if (!nfs4_valid_open_stateid(ctx->state)) 3221 if (!nfs4_valid_open_stateid(ctx->state))
3207 return -EBADF; 3222 return -EBADF;
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 9afd051a4876..cad4e064b328 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -624,24 +624,39 @@ void nfs4_put_state_owner(struct nfs4_state_owner *sp)
624/** 624/**
625 * nfs4_purge_state_owners - Release all cached state owners 625 * nfs4_purge_state_owners - Release all cached state owners
626 * @server: nfs_server with cached state owners to release 626 * @server: nfs_server with cached state owners to release
627 * @head: resulting list of state owners
627 * 628 *
628 * Called at umount time. Remaining state owners will be on 629 * Called at umount time. Remaining state owners will be on
629 * the LRU with ref count of zero. 630 * the LRU with ref count of zero.
631 * Note that the state owners are not freed, but are added
632 * to the list @head, which can later be used as an argument
633 * to nfs4_free_state_owners.
630 */ 634 */
631void nfs4_purge_state_owners(struct nfs_server *server) 635void nfs4_purge_state_owners(struct nfs_server *server, struct list_head *head)
632{ 636{
633 struct nfs_client *clp = server->nfs_client; 637 struct nfs_client *clp = server->nfs_client;
634 struct nfs4_state_owner *sp, *tmp; 638 struct nfs4_state_owner *sp, *tmp;
635 LIST_HEAD(doomed);
636 639
637 spin_lock(&clp->cl_lock); 640 spin_lock(&clp->cl_lock);
638 list_for_each_entry_safe(sp, tmp, &server->state_owners_lru, so_lru) { 641 list_for_each_entry_safe(sp, tmp, &server->state_owners_lru, so_lru) {
639 list_move(&sp->so_lru, &doomed); 642 list_move(&sp->so_lru, head);
640 nfs4_remove_state_owner_locked(sp); 643 nfs4_remove_state_owner_locked(sp);
641 } 644 }
642 spin_unlock(&clp->cl_lock); 645 spin_unlock(&clp->cl_lock);
646}
643 647
644 list_for_each_entry_safe(sp, tmp, &doomed, so_lru) { 648/**
649 * nfs4_purge_state_owners - Release all cached state owners
650 * @head: resulting list of state owners
651 *
652 * Frees a list of state owners that was generated by
653 * nfs4_purge_state_owners
654 */
655void nfs4_free_state_owners(struct list_head *head)
656{
657 struct nfs4_state_owner *sp, *tmp;
658
659 list_for_each_entry_safe(sp, tmp, head, so_lru) {
645 list_del(&sp->so_lru); 660 list_del(&sp->so_lru);
646 nfs4_free_state_owner(sp); 661 nfs4_free_state_owner(sp);
647 } 662 }
@@ -1463,7 +1478,7 @@ void nfs_inode_find_state_and_recover(struct inode *inode,
1463 nfs4_schedule_state_manager(clp); 1478 nfs4_schedule_state_manager(clp);
1464} 1479}
1465 1480
1466static void nfs4_state_mark_open_context_bad(struct nfs4_state *state) 1481static void nfs4_state_mark_open_context_bad(struct nfs4_state *state, int err)
1467{ 1482{
1468 struct inode *inode = state->inode; 1483 struct inode *inode = state->inode;
1469 struct nfs_inode *nfsi = NFS_I(inode); 1484 struct nfs_inode *nfsi = NFS_I(inode);
@@ -1474,6 +1489,8 @@ static void nfs4_state_mark_open_context_bad(struct nfs4_state *state)
1474 if (ctx->state != state) 1489 if (ctx->state != state)
1475 continue; 1490 continue;
1476 set_bit(NFS_CONTEXT_BAD, &ctx->flags); 1491 set_bit(NFS_CONTEXT_BAD, &ctx->flags);
1492 pr_warn("NFSv4: state recovery failed for open file %pd2, "
1493 "error = %d\n", ctx->dentry, err);
1477 } 1494 }
1478 rcu_read_unlock(); 1495 rcu_read_unlock();
1479} 1496}
@@ -1481,7 +1498,7 @@ static void nfs4_state_mark_open_context_bad(struct nfs4_state *state)
1481static void nfs4_state_mark_recovery_failed(struct nfs4_state *state, int error) 1498static void nfs4_state_mark_recovery_failed(struct nfs4_state *state, int error)
1482{ 1499{
1483 set_bit(NFS_STATE_RECOVERY_FAILED, &state->flags); 1500 set_bit(NFS_STATE_RECOVERY_FAILED, &state->flags);
1484 nfs4_state_mark_open_context_bad(state); 1501 nfs4_state_mark_open_context_bad(state, error);
1485} 1502}
1486 1503
1487 1504
@@ -1512,6 +1529,7 @@ restart:
1512 switch (status) { 1529 switch (status) {
1513 case 0: 1530 case 0:
1514 break; 1531 break;
1532 case -ETIMEDOUT:
1515 case -ESTALE: 1533 case -ESTALE:
1516 case -NFS4ERR_ADMIN_REVOKED: 1534 case -NFS4ERR_ADMIN_REVOKED:
1517 case -NFS4ERR_STALE_STATEID: 1535 case -NFS4ERR_STALE_STATEID:
@@ -1605,6 +1623,7 @@ static int __nfs4_reclaim_open_state(struct nfs4_state_owner *sp, struct nfs4_st
1605static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, const struct nfs4_state_recovery_ops *ops) 1623static int nfs4_reclaim_open_state(struct nfs4_state_owner *sp, const struct nfs4_state_recovery_ops *ops)
1606{ 1624{
1607 struct nfs4_state *state; 1625 struct nfs4_state *state;
1626 unsigned int loop = 0;
1608 int status = 0; 1627 int status = 0;
1609 1628
1610 /* Note: we rely on the sp->so_states list being ordered 1629 /* Note: we rely on the sp->so_states list being ordered
@@ -1631,8 +1650,10 @@ restart:
1631 1650
1632 switch (status) { 1651 switch (status) {
1633 default: 1652 default:
1634 if (status >= 0) 1653 if (status >= 0) {
1654 loop = 0;
1635 break; 1655 break;
1656 }
1636 printk(KERN_ERR "NFS: %s: unhandled error %d\n", __func__, status); 1657 printk(KERN_ERR "NFS: %s: unhandled error %d\n", __func__, status);
1637 /* Fall through */ 1658 /* Fall through */
1638 case -ENOENT: 1659 case -ENOENT:
@@ -1646,6 +1667,10 @@ restart:
1646 break; 1667 break;
1647 case -EAGAIN: 1668 case -EAGAIN:
1648 ssleep(1); 1669 ssleep(1);
1670 if (loop++ < 10) {
1671 set_bit(ops->state_flag_bit, &state->flags);
1672 break;
1673 }
1649 /* Fall through */ 1674 /* Fall through */
1650 case -NFS4ERR_ADMIN_REVOKED: 1675 case -NFS4ERR_ADMIN_REVOKED:
1651 case -NFS4ERR_STALE_STATEID: 1676 case -NFS4ERR_STALE_STATEID:
@@ -1658,11 +1683,13 @@ restart:
1658 case -NFS4ERR_EXPIRED: 1683 case -NFS4ERR_EXPIRED:
1659 case -NFS4ERR_NO_GRACE: 1684 case -NFS4ERR_NO_GRACE:
1660 nfs4_state_mark_reclaim_nograce(sp->so_server->nfs_client, state); 1685 nfs4_state_mark_reclaim_nograce(sp->so_server->nfs_client, state);
1686 /* Fall through */
1661 case -NFS4ERR_STALE_CLIENTID: 1687 case -NFS4ERR_STALE_CLIENTID:
1662 case -NFS4ERR_BADSESSION: 1688 case -NFS4ERR_BADSESSION:
1663 case -NFS4ERR_BADSLOT: 1689 case -NFS4ERR_BADSLOT:
1664 case -NFS4ERR_BAD_HIGH_SLOT: 1690 case -NFS4ERR_BAD_HIGH_SLOT:
1665 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: 1691 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
1692 case -ETIMEDOUT:
1666 goto out_err; 1693 goto out_err;
1667 } 1694 }
1668 nfs4_put_open_state(state); 1695 nfs4_put_open_state(state);
@@ -1856,12 +1883,13 @@ static int nfs4_do_reclaim(struct nfs_client *clp, const struct nfs4_state_recov
1856 struct nfs4_state_owner *sp; 1883 struct nfs4_state_owner *sp;
1857 struct nfs_server *server; 1884 struct nfs_server *server;
1858 struct rb_node *pos; 1885 struct rb_node *pos;
1886 LIST_HEAD(freeme);
1859 int status = 0; 1887 int status = 0;
1860 1888
1861restart: 1889restart:
1862 rcu_read_lock(); 1890 rcu_read_lock();
1863 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) { 1891 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
1864 nfs4_purge_state_owners(server); 1892 nfs4_purge_state_owners(server, &freeme);
1865 spin_lock(&clp->cl_lock); 1893 spin_lock(&clp->cl_lock);
1866 for (pos = rb_first(&server->state_owners); 1894 for (pos = rb_first(&server->state_owners);
1867 pos != NULL; 1895 pos != NULL;
@@ -1890,6 +1918,7 @@ restart:
1890 spin_unlock(&clp->cl_lock); 1918 spin_unlock(&clp->cl_lock);
1891 } 1919 }
1892 rcu_read_unlock(); 1920 rcu_read_unlock();
1921 nfs4_free_state_owners(&freeme);
1893 return 0; 1922 return 0;
1894} 1923}
1895 1924
@@ -1945,7 +1974,6 @@ static int nfs4_handle_reclaim_lease_error(struct nfs_client *clp, int status)
1945 return -EPERM; 1974 return -EPERM;
1946 case -EACCES: 1975 case -EACCES:
1947 case -NFS4ERR_DELAY: 1976 case -NFS4ERR_DELAY:
1948 case -ETIMEDOUT:
1949 case -EAGAIN: 1977 case -EAGAIN:
1950 ssleep(1); 1978 ssleep(1);
1951 break; 1979 break;
@@ -2574,7 +2602,7 @@ static void nfs4_state_manager(struct nfs_client *clp)
2574 } 2602 }
2575 2603
2576 /* Now recover expired state... */ 2604 /* Now recover expired state... */
2577 if (test_and_clear_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state)) { 2605 if (test_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state)) {
2578 section = "reclaim nograce"; 2606 section = "reclaim nograce";
2579 status = nfs4_do_reclaim(clp, 2607 status = nfs4_do_reclaim(clp,
2580 clp->cl_mvops->nograce_recovery_ops); 2608 clp->cl_mvops->nograce_recovery_ops);
@@ -2582,6 +2610,7 @@ static void nfs4_state_manager(struct nfs_client *clp)
2582 continue; 2610 continue;
2583 if (status < 0) 2611 if (status < 0)
2584 goto out_error; 2612 goto out_error;
2613 clear_bit(NFS4CLNT_RECLAIM_NOGRACE, &clp->cl_state);
2585 } 2614 }
2586 2615
2587 nfs4_end_drain_session(clp); 2616 nfs4_end_drain_session(clp);
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 75bd5b552ba4..4525d5acae38 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1903,12 +1903,6 @@ lookup_again:
1903 goto out_unlock; 1903 goto out_unlock;
1904 } 1904 }
1905 1905
1906 if (!nfs4_valid_open_stateid(ctx->state)) {
1907 trace_pnfs_update_layout(ino, pos, count, iomode, lo, lseg,
1908 PNFS_UPDATE_LAYOUT_INVALID_OPEN);
1909 goto out_unlock;
1910 }
1911
1912 /* 1906 /*
1913 * Choose a stateid for the LAYOUTGET. If we don't have a layout 1907 * Choose a stateid for the LAYOUTGET. If we don't have a layout
1914 * stateid, or it has been invalidated, then we must use the open 1908 * stateid, or it has been invalidated, then we must use the open
@@ -1939,6 +1933,7 @@ lookup_again:
1939 iomode == IOMODE_RW ? FMODE_WRITE : FMODE_READ, 1933 iomode == IOMODE_RW ? FMODE_WRITE : FMODE_READ,
1940 NULL, &stateid, NULL); 1934 NULL, &stateid, NULL);
1941 if (status != 0) { 1935 if (status != 0) {
1936 lseg = ERR_PTR(status);
1942 trace_pnfs_update_layout(ino, pos, count, 1937 trace_pnfs_update_layout(ino, pos, count,
1943 iomode, lo, lseg, 1938 iomode, lo, lseg,
1944 PNFS_UPDATE_LAYOUT_INVALID_OPEN); 1939 PNFS_UPDATE_LAYOUT_INVALID_OPEN);
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index 628631e2e34f..703f595dce90 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -2260,6 +2260,7 @@ nfs_compare_remount_data(struct nfs_server *nfss,
2260 data->acdirmin != nfss->acdirmin / HZ || 2260 data->acdirmin != nfss->acdirmin / HZ ||
2261 data->acdirmax != nfss->acdirmax / HZ || 2261 data->acdirmax != nfss->acdirmax / HZ ||
2262 data->timeo != (10U * nfss->client->cl_timeout->to_initval / HZ) || 2262 data->timeo != (10U * nfss->client->cl_timeout->to_initval / HZ) ||
2263 (data->options & NFS_OPTION_FSCACHE) != (nfss->options & NFS_OPTION_FSCACHE) ||
2263 data->nfs_server.port != nfss->port || 2264 data->nfs_server.port != nfss->port ||
2264 data->nfs_server.addrlen != nfss->nfs_client->cl_addrlen || 2265 data->nfs_server.addrlen != nfss->nfs_client->cl_addrlen ||
2265 !rpc_cmp_addr((struct sockaddr *)&data->nfs_server.address, 2266 !rpc_cmp_addr((struct sockaddr *)&data->nfs_server.address,
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 385f3aaa2448..90c830e3758e 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -3825,7 +3825,6 @@ static int ocfs2_xattr_bucket_find(struct inode *inode,
3825 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb); 3825 u16 blk_per_bucket = ocfs2_blocks_per_xattr_bucket(inode->i_sb);
3826 int low_bucket = 0, bucket, high_bucket; 3826 int low_bucket = 0, bucket, high_bucket;
3827 struct ocfs2_xattr_bucket *search; 3827 struct ocfs2_xattr_bucket *search;
3828 u32 last_hash;
3829 u64 blkno, lower_blkno = 0; 3828 u64 blkno, lower_blkno = 0;
3830 3829
3831 search = ocfs2_xattr_bucket_new(inode); 3830 search = ocfs2_xattr_bucket_new(inode);
@@ -3869,8 +3868,6 @@ static int ocfs2_xattr_bucket_find(struct inode *inode,
3869 if (xh->xh_count) 3868 if (xh->xh_count)
3870 xe = &xh->xh_entries[le16_to_cpu(xh->xh_count) - 1]; 3869 xe = &xh->xh_entries[le16_to_cpu(xh->xh_count) - 1];
3871 3870
3872 last_hash = le32_to_cpu(xe->xe_name_hash);
3873
3874 /* record lower_blkno which may be the insert place. */ 3871 /* record lower_blkno which may be the insert place. */
3875 lower_blkno = blkno; 3872 lower_blkno = blkno;
3876 3873
diff --git a/fs/open.c b/fs/open.c
index b5b80469b93d..a59abe3c669a 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -374,6 +374,25 @@ long do_faccessat(int dfd, const char __user *filename, int mode)
374 override_cred->cap_permitted; 374 override_cred->cap_permitted;
375 } 375 }
376 376
377 /*
378 * The new set of credentials can *only* be used in
379 * task-synchronous circumstances, and does not need
380 * RCU freeing, unless somebody then takes a separate
381 * reference to it.
382 *
383 * NOTE! This is _only_ true because this credential
384 * is used purely for override_creds() that installs
385 * it as the subjective cred. Other threads will be
386 * accessing ->real_cred, not the subjective cred.
387 *
388 * If somebody _does_ make a copy of this (using the
389 * 'get_current_cred()' function), that will clear the
390 * non_rcu field, because now that other user may be
391 * expecting RCU freeing. But normal thread-synchronous
392 * cred accesses will keep things non-RCY.
393 */
394 override_cred->non_rcu = 1;
395
377 old_cred = override_creds(override_cred); 396 old_cred = override_creds(override_cred);
378retry: 397retry:
379 res = user_path_at(dfd, filename, lookup_flags, &path); 398 res = user_path_at(dfd, filename, lookup_flags, &path);
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 04f09689cd6d..1600034a929b 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -119,6 +119,7 @@ static int traverse(struct seq_file *m, loff_t offset)
119 } 119 }
120 if (seq_has_overflowed(m)) 120 if (seq_has_overflowed(m))
121 goto Eoverflow; 121 goto Eoverflow;
122 p = m->op->next(m, p, &m->index);
122 if (pos + m->count > offset) { 123 if (pos + m->count > offset) {
123 m->from = offset - pos; 124 m->from = offset - pos;
124 m->count -= m->from; 125 m->count -= m->from;
@@ -126,7 +127,6 @@ static int traverse(struct seq_file *m, loff_t offset)
126 } 127 }
127 pos += m->count; 128 pos += m->count;
128 m->count = 0; 129 m->count = 0;
129 p = m->op->next(m, p, &m->index);
130 if (pos == offset) 130 if (pos == offset)
131 break; 131 break;
132 } 132 }
diff --git a/fs/super.c b/fs/super.c
index 113c58f19425..5960578a4076 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -478,13 +478,10 @@ EXPORT_SYMBOL(generic_shutdown_super);
478 478
479bool mount_capable(struct fs_context *fc) 479bool mount_capable(struct fs_context *fc)
480{ 480{
481 struct user_namespace *user_ns = fc->global ? &init_user_ns
482 : fc->user_ns;
483
484 if (!(fc->fs_type->fs_flags & FS_USERNS_MOUNT)) 481 if (!(fc->fs_type->fs_flags & FS_USERNS_MOUNT))
485 return capable(CAP_SYS_ADMIN); 482 return capable(CAP_SYS_ADMIN);
486 else 483 else
487 return ns_capable(user_ns, CAP_SYS_ADMIN); 484 return ns_capable(fc->user_ns, CAP_SYS_ADMIN);
488} 485}
489 486
490/** 487/**
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index baf0b72c0a37..07aad70f3931 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -3835,15 +3835,28 @@ xfs_bmapi_read(
3835 XFS_STATS_INC(mp, xs_blk_mapr); 3835 XFS_STATS_INC(mp, xs_blk_mapr);
3836 3836
3837 ifp = XFS_IFORK_PTR(ip, whichfork); 3837 ifp = XFS_IFORK_PTR(ip, whichfork);
3838 if (!ifp) {
3839 /* No CoW fork? Return a hole. */
3840 if (whichfork == XFS_COW_FORK) {
3841 mval->br_startoff = bno;
3842 mval->br_startblock = HOLESTARTBLOCK;
3843 mval->br_blockcount = len;
3844 mval->br_state = XFS_EXT_NORM;
3845 *nmap = 1;
3846 return 0;
3847 }
3838 3848
3839 /* No CoW fork? Return a hole. */ 3849 /*
3840 if (whichfork == XFS_COW_FORK && !ifp) { 3850 * A missing attr ifork implies that the inode says we're in
3841 mval->br_startoff = bno; 3851 * extents or btree format but failed to pass the inode fork
3842 mval->br_startblock = HOLESTARTBLOCK; 3852 * verifier while trying to load it. Treat that as a file
3843 mval->br_blockcount = len; 3853 * corruption too.
3844 mval->br_state = XFS_EXT_NORM; 3854 */
3845 *nmap = 1; 3855#ifdef DEBUG
3846 return 0; 3856 xfs_alert(mp, "%s: inode %llu missing fork %d",
3857 __func__, ip->i_ino, whichfork);
3858#endif /* DEBUG */
3859 return -EFSCORRUPTED;
3847 } 3860 }
3848 3861
3849 if (!(ifp->if_flags & XFS_IFEXTENTS)) { 3862 if (!(ifp->if_flags & XFS_IFEXTENTS)) {
diff --git a/fs/xfs/libxfs/xfs_da_btree.c b/fs/xfs/libxfs/xfs_da_btree.c
index d1c77fd0815d..0bf56e94bfe9 100644
--- a/fs/xfs/libxfs/xfs_da_btree.c
+++ b/fs/xfs/libxfs/xfs_da_btree.c
@@ -487,10 +487,8 @@ xfs_da3_split(
487 ASSERT(state->path.active == 0); 487 ASSERT(state->path.active == 0);
488 oldblk = &state->path.blk[0]; 488 oldblk = &state->path.blk[0];
489 error = xfs_da3_root_split(state, oldblk, addblk); 489 error = xfs_da3_root_split(state, oldblk, addblk);
490 if (error) { 490 if (error)
491 addblk->bp = NULL; 491 goto out;
492 return error; /* GROT: dir is inconsistent */
493 }
494 492
495 /* 493 /*
496 * Update pointers to the node which used to be block 0 and just got 494 * Update pointers to the node which used to be block 0 and just got
@@ -505,7 +503,10 @@ xfs_da3_split(
505 */ 503 */
506 node = oldblk->bp->b_addr; 504 node = oldblk->bp->b_addr;
507 if (node->hdr.info.forw) { 505 if (node->hdr.info.forw) {
508 ASSERT(be32_to_cpu(node->hdr.info.forw) == addblk->blkno); 506 if (be32_to_cpu(node->hdr.info.forw) != addblk->blkno) {
507 error = -EFSCORRUPTED;
508 goto out;
509 }
509 node = addblk->bp->b_addr; 510 node = addblk->bp->b_addr;
510 node->hdr.info.back = cpu_to_be32(oldblk->blkno); 511 node->hdr.info.back = cpu_to_be32(oldblk->blkno);
511 xfs_trans_log_buf(state->args->trans, addblk->bp, 512 xfs_trans_log_buf(state->args->trans, addblk->bp,
@@ -514,15 +515,19 @@ xfs_da3_split(
514 } 515 }
515 node = oldblk->bp->b_addr; 516 node = oldblk->bp->b_addr;
516 if (node->hdr.info.back) { 517 if (node->hdr.info.back) {
517 ASSERT(be32_to_cpu(node->hdr.info.back) == addblk->blkno); 518 if (be32_to_cpu(node->hdr.info.back) != addblk->blkno) {
519 error = -EFSCORRUPTED;
520 goto out;
521 }
518 node = addblk->bp->b_addr; 522 node = addblk->bp->b_addr;
519 node->hdr.info.forw = cpu_to_be32(oldblk->blkno); 523 node->hdr.info.forw = cpu_to_be32(oldblk->blkno);
520 xfs_trans_log_buf(state->args->trans, addblk->bp, 524 xfs_trans_log_buf(state->args->trans, addblk->bp,
521 XFS_DA_LOGRANGE(node, &node->hdr.info, 525 XFS_DA_LOGRANGE(node, &node->hdr.info,
522 sizeof(node->hdr.info))); 526 sizeof(node->hdr.info)));
523 } 527 }
528out:
524 addblk->bp = NULL; 529 addblk->bp = NULL;
525 return 0; 530 return error;
526} 531}
527 532
528/* 533/*
diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c
index afcc6642690a..1fc44efc344d 100644
--- a/fs/xfs/libxfs/xfs_dir2_node.c
+++ b/fs/xfs/libxfs/xfs_dir2_node.c
@@ -741,7 +741,8 @@ xfs_dir2_leafn_lookup_for_entry(
741 ents = dp->d_ops->leaf_ents_p(leaf); 741 ents = dp->d_ops->leaf_ents_p(leaf);
742 742
743 xfs_dir3_leaf_check(dp, bp); 743 xfs_dir3_leaf_check(dp, bp);
744 ASSERT(leafhdr.count > 0); 744 if (leafhdr.count <= 0)
745 return -EFSCORRUPTED;
745 746
746 /* 747 /*
747 * Look up the hash value in the leaf entries. 748 * Look up the hash value in the leaf entries.
diff --git a/fs/xfs/scrub/dabtree.c b/fs/xfs/scrub/dabtree.c
index 94c4f1de1922..77ff9f97bcda 100644
--- a/fs/xfs/scrub/dabtree.c
+++ b/fs/xfs/scrub/dabtree.c
@@ -278,7 +278,11 @@ xchk_da_btree_block_check_sibling(
278 /* Compare upper level pointer to sibling pointer. */ 278 /* Compare upper level pointer to sibling pointer. */
279 if (ds->state->altpath.blk[level].blkno != sibling) 279 if (ds->state->altpath.blk[level].blkno != sibling)
280 xchk_da_set_corrupt(ds, level); 280 xchk_da_set_corrupt(ds, level);
281 xfs_trans_brelse(ds->dargs.trans, ds->state->altpath.blk[level].bp); 281 if (ds->state->altpath.blk[level].bp) {
282 xfs_trans_brelse(ds->dargs.trans,
283 ds->state->altpath.blk[level].bp);
284 ds->state->altpath.blk[level].bp = NULL;
285 }
282out: 286out:
283 return error; 287 return error;
284} 288}
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index a8a06bb78ea8..f5c955d35be4 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -272,6 +272,7 @@ xfs_bulkstat_to_bstat(
272 struct xfs_bstat *bs1, 272 struct xfs_bstat *bs1,
273 const struct xfs_bulkstat *bstat) 273 const struct xfs_bulkstat *bstat)
274{ 274{
275 /* memset is needed here because of padding holes in the structure. */
275 memset(bs1, 0, sizeof(struct xfs_bstat)); 276 memset(bs1, 0, sizeof(struct xfs_bstat));
276 bs1->bs_ino = bstat->bs_ino; 277 bs1->bs_ino = bstat->bs_ino;
277 bs1->bs_mode = bstat->bs_mode; 278 bs1->bs_mode = bstat->bs_mode;
@@ -388,6 +389,8 @@ xfs_inumbers_to_inogrp(
388 struct xfs_inogrp *ig1, 389 struct xfs_inogrp *ig1,
389 const struct xfs_inumbers *ig) 390 const struct xfs_inumbers *ig)
390{ 391{
392 /* memset is needed here because of padding holes in the structure. */
393 memset(ig1, 0, sizeof(struct xfs_inogrp));
391 ig1->xi_startino = ig->xi_startino; 394 ig1->xi_startino = ig->xi_startino;
392 ig1->xi_alloccount = ig->xi_alloccount; 395 ig1->xi_alloccount = ig->xi_alloccount;
393 ig1->xi_allocmask = ig->xi_allocmask; 396 ig1->xi_allocmask = ig->xi_allocmask;
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 00e9f5c388d3..7fc3c1ad36bc 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -429,10 +429,7 @@ xfs_log_reserve(
429 429
430 ASSERT(*ticp == NULL); 430 ASSERT(*ticp == NULL);
431 tic = xlog_ticket_alloc(log, unit_bytes, cnt, client, permanent, 431 tic = xlog_ticket_alloc(log, unit_bytes, cnt, client, permanent,
432 KM_SLEEP | KM_MAYFAIL); 432 KM_SLEEP);
433 if (!tic)
434 return -ENOMEM;
435
436 *ticp = tic; 433 *ticp = tic;
437 434
438 xlog_grant_push_ail(log, tic->t_cnt ? tic->t_unit_res * tic->t_cnt 435 xlog_grant_push_ail(log, tic->t_cnt ? tic->t_unit_res * tic->t_cnt