aboutsummaryrefslogtreecommitdiffstats
path: root/fs/nfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/nfs')
-rw-r--r--fs/nfs/callback_proc.c2
-rw-r--r--fs/nfs/client.c131
-rw-r--r--fs/nfs/dir.c13
-rw-r--r--fs/nfs/direct.c8
-rw-r--r--fs/nfs/file.c4
-rw-r--r--fs/nfs/getroot.c42
-rw-r--r--fs/nfs/idmap.c90
-rw-r--r--fs/nfs/inode.c9
-rw-r--r--fs/nfs/internal.h43
-rw-r--r--fs/nfs/namespace.c66
-rw-r--r--fs/nfs/nfs3proc.c1
-rw-r--r--fs/nfs/nfs4_fs.h38
-rw-r--r--fs/nfs/nfs4filelayout.c361
-rw-r--r--fs/nfs/nfs4filelayout.h19
-rw-r--r--fs/nfs/nfs4filelayoutdev.c256
-rw-r--r--fs/nfs/nfs4namespace.c41
-rw-r--r--fs/nfs/nfs4proc.c214
-rw-r--r--fs/nfs/nfs4renewd.c6
-rw-r--r--fs/nfs/nfs4state.c35
-rw-r--r--fs/nfs/nfs4xdr.c42
-rw-r--r--fs/nfs/nfsroot.c29
-rw-r--r--fs/nfs/pagelist.c22
-rw-r--r--fs/nfs/pnfs.c330
-rw-r--r--fs/nfs/pnfs.h118
-rw-r--r--fs/nfs/proc.c1
-rw-r--r--fs/nfs/read.c127
-rw-r--r--fs/nfs/super.c478
-rw-r--r--fs/nfs/unlink.c22
-rw-r--r--fs/nfs/write.c155
29 files changed, 1781 insertions, 922 deletions
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 89587573fe50..2f41dccea18e 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -188,10 +188,10 @@ static u32 initiate_bulk_draining(struct nfs_client *clp,
188 rv = NFS4ERR_DELAY; 188 rv = NFS4ERR_DELAY;
189 list_del_init(&lo->plh_bulk_recall); 189 list_del_init(&lo->plh_bulk_recall);
190 spin_unlock(&ino->i_lock); 190 spin_unlock(&ino->i_lock);
191 pnfs_free_lseg_list(&free_me_list);
191 put_layout_hdr(lo); 192 put_layout_hdr(lo);
192 iput(ino); 193 iput(ino);
193 } 194 }
194 pnfs_free_lseg_list(&free_me_list);
195 return rv; 195 return rv;
196} 196}
197 197
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index bd3ca32879e7..139be9647d80 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -82,6 +82,11 @@ retry:
82#endif /* CONFIG_NFS_V4 */ 82#endif /* CONFIG_NFS_V4 */
83 83
84/* 84/*
85 * Turn off NFSv4 uid/gid mapping when using AUTH_SYS
86 */
87static int nfs4_disable_idmapping = 0;
88
89/*
85 * RPC cruft for NFS 90 * RPC cruft for NFS
86 */ 91 */
87static struct rpc_version *nfs_version[5] = { 92static struct rpc_version *nfs_version[5] = {
@@ -481,7 +486,12 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat
481 * Look up a client by IP address and protocol version 486 * Look up a client by IP address and protocol version
482 * - creates a new record if one doesn't yet exist 487 * - creates a new record if one doesn't yet exist
483 */ 488 */
484static struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_init) 489static struct nfs_client *
490nfs_get_client(const struct nfs_client_initdata *cl_init,
491 const struct rpc_timeout *timeparms,
492 const char *ip_addr,
493 rpc_authflavor_t authflavour,
494 int noresvport)
485{ 495{
486 struct nfs_client *clp, *new = NULL; 496 struct nfs_client *clp, *new = NULL;
487 int error; 497 int error;
@@ -512,6 +522,13 @@ install_client:
512 clp = new; 522 clp = new;
513 list_add(&clp->cl_share_link, &nfs_client_list); 523 list_add(&clp->cl_share_link, &nfs_client_list);
514 spin_unlock(&nfs_client_lock); 524 spin_unlock(&nfs_client_lock);
525
526 error = cl_init->rpc_ops->init_client(clp, timeparms, ip_addr,
527 authflavour, noresvport);
528 if (error < 0) {
529 nfs_put_client(clp);
530 return ERR_PTR(error);
531 }
515 dprintk("--> nfs_get_client() = %p [new]\n", clp); 532 dprintk("--> nfs_get_client() = %p [new]\n", clp);
516 return clp; 533 return clp;
517 534
@@ -767,9 +784,9 @@ static int nfs_init_server_rpcclient(struct nfs_server *server,
767/* 784/*
768 * Initialise an NFS2 or NFS3 client 785 * Initialise an NFS2 or NFS3 client
769 */ 786 */
770static int nfs_init_client(struct nfs_client *clp, 787int nfs_init_client(struct nfs_client *clp, const struct rpc_timeout *timeparms,
771 const struct rpc_timeout *timeparms, 788 const char *ip_addr, rpc_authflavor_t authflavour,
772 const struct nfs_parsed_mount_data *data) 789 int noresvport)
773{ 790{
774 int error; 791 int error;
775 792
@@ -784,7 +801,7 @@ static int nfs_init_client(struct nfs_client *clp,
784 * - RFC 2623, sec 2.3.2 801 * - RFC 2623, sec 2.3.2
785 */ 802 */
786 error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_UNIX, 803 error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_UNIX,
787 0, data->flags & NFS_MOUNT_NORESVPORT); 804 0, noresvport);
788 if (error < 0) 805 if (error < 0)
789 goto error; 806 goto error;
790 nfs_mark_client_ready(clp, NFS_CS_READY); 807 nfs_mark_client_ready(clp, NFS_CS_READY);
@@ -820,19 +837,17 @@ static int nfs_init_server(struct nfs_server *server,
820 cl_init.rpc_ops = &nfs_v3_clientops; 837 cl_init.rpc_ops = &nfs_v3_clientops;
821#endif 838#endif
822 839
840 nfs_init_timeout_values(&timeparms, data->nfs_server.protocol,
841 data->timeo, data->retrans);
842
823 /* Allocate or find a client reference we can use */ 843 /* Allocate or find a client reference we can use */
824 clp = nfs_get_client(&cl_init); 844 clp = nfs_get_client(&cl_init, &timeparms, NULL, RPC_AUTH_UNIX,
845 data->flags & NFS_MOUNT_NORESVPORT);
825 if (IS_ERR(clp)) { 846 if (IS_ERR(clp)) {
826 dprintk("<-- nfs_init_server() = error %ld\n", PTR_ERR(clp)); 847 dprintk("<-- nfs_init_server() = error %ld\n", PTR_ERR(clp));
827 return PTR_ERR(clp); 848 return PTR_ERR(clp);
828 } 849 }
829 850
830 nfs_init_timeout_values(&timeparms, data->nfs_server.protocol,
831 data->timeo, data->retrans);
832 error = nfs_init_client(clp, &timeparms, data);
833 if (error < 0)
834 goto error;
835
836 server->nfs_client = clp; 851 server->nfs_client = clp;
837 852
838 /* Initialise the client representation from the mount data */ 853 /* Initialise the client representation from the mount data */
@@ -1009,14 +1024,19 @@ static void nfs_server_insert_lists(struct nfs_server *server)
1009 spin_lock(&nfs_client_lock); 1024 spin_lock(&nfs_client_lock);
1010 list_add_tail_rcu(&server->client_link, &clp->cl_superblocks); 1025 list_add_tail_rcu(&server->client_link, &clp->cl_superblocks);
1011 list_add_tail(&server->master_link, &nfs_volume_list); 1026 list_add_tail(&server->master_link, &nfs_volume_list);
1027 clear_bit(NFS_CS_STOP_RENEW, &clp->cl_res_state);
1012 spin_unlock(&nfs_client_lock); 1028 spin_unlock(&nfs_client_lock);
1013 1029
1014} 1030}
1015 1031
1016static void nfs_server_remove_lists(struct nfs_server *server) 1032static void nfs_server_remove_lists(struct nfs_server *server)
1017{ 1033{
1034 struct nfs_client *clp = server->nfs_client;
1035
1018 spin_lock(&nfs_client_lock); 1036 spin_lock(&nfs_client_lock);
1019 list_del_rcu(&server->client_link); 1037 list_del_rcu(&server->client_link);
1038 if (clp && list_empty(&clp->cl_superblocks))
1039 set_bit(NFS_CS_STOP_RENEW, &clp->cl_res_state);
1020 list_del(&server->master_link); 1040 list_del(&server->master_link);
1021 spin_unlock(&nfs_client_lock); 1041 spin_unlock(&nfs_client_lock);
1022 1042
@@ -1307,11 +1327,11 @@ static int nfs4_init_client_minor_version(struct nfs_client *clp)
1307/* 1327/*
1308 * Initialise an NFS4 client record 1328 * Initialise an NFS4 client record
1309 */ 1329 */
1310static int nfs4_init_client(struct nfs_client *clp, 1330int nfs4_init_client(struct nfs_client *clp,
1311 const struct rpc_timeout *timeparms, 1331 const struct rpc_timeout *timeparms,
1312 const char *ip_addr, 1332 const char *ip_addr,
1313 rpc_authflavor_t authflavour, 1333 rpc_authflavor_t authflavour,
1314 int flags) 1334 int noresvport)
1315{ 1335{
1316 int error; 1336 int error;
1317 1337
@@ -1325,7 +1345,7 @@ static int nfs4_init_client(struct nfs_client *clp,
1325 clp->rpc_ops = &nfs_v4_clientops; 1345 clp->rpc_ops = &nfs_v4_clientops;
1326 1346
1327 error = nfs_create_rpc_client(clp, timeparms, authflavour, 1347 error = nfs_create_rpc_client(clp, timeparms, authflavour,
1328 1, flags & NFS_MOUNT_NORESVPORT); 1348 1, noresvport);
1329 if (error < 0) 1349 if (error < 0)
1330 goto error; 1350 goto error;
1331 strlcpy(clp->cl_ipaddr, ip_addr, sizeof(clp->cl_ipaddr)); 1351 strlcpy(clp->cl_ipaddr, ip_addr, sizeof(clp->cl_ipaddr));
@@ -1378,27 +1398,71 @@ static int nfs4_set_client(struct nfs_server *server,
1378 dprintk("--> nfs4_set_client()\n"); 1398 dprintk("--> nfs4_set_client()\n");
1379 1399
1380 /* Allocate or find a client reference we can use */ 1400 /* Allocate or find a client reference we can use */
1381 clp = nfs_get_client(&cl_init); 1401 clp = nfs_get_client(&cl_init, timeparms, ip_addr, authflavour,
1402 server->flags & NFS_MOUNT_NORESVPORT);
1382 if (IS_ERR(clp)) { 1403 if (IS_ERR(clp)) {
1383 error = PTR_ERR(clp); 1404 error = PTR_ERR(clp);
1384 goto error; 1405 goto error;
1385 } 1406 }
1386 error = nfs4_init_client(clp, timeparms, ip_addr, authflavour, 1407
1387 server->flags); 1408 /*
1388 if (error < 0) 1409 * Query for the lease time on clientid setup or renewal
1389 goto error_put; 1410 *
1411 * Note that this will be set on nfs_clients that were created
1412 * only for the DS role and did not set this bit, but now will
1413 * serve a dual role.
1414 */
1415 set_bit(NFS_CS_CHECK_LEASE_TIME, &clp->cl_res_state);
1390 1416
1391 server->nfs_client = clp; 1417 server->nfs_client = clp;
1392 dprintk("<-- nfs4_set_client() = 0 [new %p]\n", clp); 1418 dprintk("<-- nfs4_set_client() = 0 [new %p]\n", clp);
1393 return 0; 1419 return 0;
1394
1395error_put:
1396 nfs_put_client(clp);
1397error: 1420error:
1398 dprintk("<-- nfs4_set_client() = xerror %d\n", error); 1421 dprintk("<-- nfs4_set_client() = xerror %d\n", error);
1399 return error; 1422 return error;
1400} 1423}
1401 1424
1425/*
1426 * Set up a pNFS Data Server client.
1427 *
1428 * Return any existing nfs_client that matches server address,port,version
1429 * and minorversion.
1430 *
1431 * For a new nfs_client, use a soft mount (default), a low retrans and a
1432 * low timeout interval so that if a connection is lost, we retry through
1433 * the MDS.
1434 */
1435struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp,
1436 const struct sockaddr *ds_addr,
1437 int ds_addrlen, int ds_proto)
1438{
1439 struct nfs_client_initdata cl_init = {
1440 .addr = ds_addr,
1441 .addrlen = ds_addrlen,
1442 .rpc_ops = &nfs_v4_clientops,
1443 .proto = ds_proto,
1444 .minorversion = mds_clp->cl_minorversion,
1445 };
1446 struct rpc_timeout ds_timeout = {
1447 .to_initval = 15 * HZ,
1448 .to_maxval = 15 * HZ,
1449 .to_retries = 1,
1450 .to_exponential = 1,
1451 };
1452 struct nfs_client *clp;
1453
1454 /*
1455 * Set an authflavor equual to the MDS value. Use the MDS nfs_client
1456 * cl_ipaddr so as to use the same EXCHANGE_ID co_ownerid as the MDS
1457 * (section 13.1 RFC 5661).
1458 */
1459 clp = nfs_get_client(&cl_init, &ds_timeout, mds_clp->cl_ipaddr,
1460 mds_clp->cl_rpcclient->cl_auth->au_flavor, 0);
1461
1462 dprintk("<-- %s %p\n", __func__, clp);
1463 return clp;
1464}
1465EXPORT_SYMBOL(nfs4_set_ds_client);
1402 1466
1403/* 1467/*
1404 * Session has been established, and the client marked ready. 1468 * Session has been established, and the client marked ready.
@@ -1435,6 +1499,10 @@ static int nfs4_server_common_setup(struct nfs_server *server,
1435 BUG_ON(!server->nfs_client->rpc_ops); 1499 BUG_ON(!server->nfs_client->rpc_ops);
1436 BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops); 1500 BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
1437 1501
1502 /* data servers support only a subset of NFSv4.1 */
1503 if (is_ds_only_client(server->nfs_client))
1504 return -EPROTONOSUPPORT;
1505
1438 fattr = nfs_alloc_fattr(); 1506 fattr = nfs_alloc_fattr();
1439 if (fattr == NULL) 1507 if (fattr == NULL)
1440 return -ENOMEM; 1508 return -ENOMEM;
@@ -1504,6 +1572,13 @@ static int nfs4_init_server(struct nfs_server *server,
1504 if (error < 0) 1572 if (error < 0)
1505 goto error; 1573 goto error;
1506 1574
1575 /*
1576 * Don't use NFS uid/gid mapping if we're using AUTH_SYS or lower
1577 * authentication.
1578 */
1579 if (nfs4_disable_idmapping && data->auth_flavors[0] == RPC_AUTH_UNIX)
1580 server->caps |= NFS_CAP_UIDGID_NOMAP;
1581
1507 if (data->rsize) 1582 if (data->rsize)
1508 server->rsize = nfs_block_size(data->rsize, NULL); 1583 server->rsize = nfs_block_size(data->rsize, NULL);
1509 if (data->wsize) 1584 if (data->wsize)
@@ -1921,3 +1996,7 @@ void nfs_fs_proc_exit(void)
1921} 1996}
1922 1997
1923#endif /* CONFIG_PROC_FS */ 1998#endif /* CONFIG_PROC_FS */
1999
2000module_param(nfs4_disable_idmapping, bool, 0644);
2001MODULE_PARM_DESC(nfs4_disable_idmapping,
2002 "Turn off NFSv4 idmapping when using 'sec=sys'");
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 2c3eb33b904d..abdf38d5971d 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1169,11 +1169,23 @@ static void nfs_dentry_iput(struct dentry *dentry, struct inode *inode)
1169 iput(inode); 1169 iput(inode);
1170} 1170}
1171 1171
1172static void nfs_d_release(struct dentry *dentry)
1173{
1174 /* free cached devname value, if it survived that far */
1175 if (unlikely(dentry->d_fsdata)) {
1176 if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
1177 WARN_ON(1);
1178 else
1179 kfree(dentry->d_fsdata);
1180 }
1181}
1182
1172const struct dentry_operations nfs_dentry_operations = { 1183const struct dentry_operations nfs_dentry_operations = {
1173 .d_revalidate = nfs_lookup_revalidate, 1184 .d_revalidate = nfs_lookup_revalidate,
1174 .d_delete = nfs_dentry_delete, 1185 .d_delete = nfs_dentry_delete,
1175 .d_iput = nfs_dentry_iput, 1186 .d_iput = nfs_dentry_iput,
1176 .d_automount = nfs_d_automount, 1187 .d_automount = nfs_d_automount,
1188 .d_release = nfs_d_release,
1177}; 1189};
1178 1190
1179static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) 1191static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
@@ -1248,6 +1260,7 @@ const struct dentry_operations nfs4_dentry_operations = {
1248 .d_delete = nfs_dentry_delete, 1260 .d_delete = nfs_dentry_delete,
1249 .d_iput = nfs_dentry_iput, 1261 .d_iput = nfs_dentry_iput,
1250 .d_automount = nfs_d_automount, 1262 .d_automount = nfs_d_automount,
1263 .d_release = nfs_d_release,
1251}; 1264};
1252 1265
1253/* 1266/*
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 9943a75bb6d1..8eea25366717 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -45,6 +45,7 @@
45#include <linux/pagemap.h> 45#include <linux/pagemap.h>
46#include <linux/kref.h> 46#include <linux/kref.h>
47#include <linux/slab.h> 47#include <linux/slab.h>
48#include <linux/task_io_accounting_ops.h>
48 49
49#include <linux/nfs_fs.h> 50#include <linux/nfs_fs.h>
50#include <linux/nfs_page.h> 51#include <linux/nfs_page.h>
@@ -649,8 +650,7 @@ static void nfs_direct_write_result(struct rpc_task *task, void *calldata)
649{ 650{
650 struct nfs_write_data *data = calldata; 651 struct nfs_write_data *data = calldata;
651 652
652 if (nfs_writeback_done(task, data) != 0) 653 nfs_writeback_done(task, data);
653 return;
654} 654}
655 655
656/* 656/*
@@ -938,6 +938,8 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov,
938 if (retval) 938 if (retval)
939 goto out; 939 goto out;
940 940
941 task_io_account_read(count);
942
941 retval = nfs_direct_read(iocb, iov, nr_segs, pos); 943 retval = nfs_direct_read(iocb, iov, nr_segs, pos);
942 if (retval > 0) 944 if (retval > 0)
943 iocb->ki_pos = pos + retval; 945 iocb->ki_pos = pos + retval;
@@ -999,6 +1001,8 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
999 if (retval) 1001 if (retval)
1000 goto out; 1002 goto out;
1001 1003
1004 task_io_account_write(count);
1005
1002 retval = nfs_direct_write(iocb, iov, nr_segs, pos, count); 1006 retval = nfs_direct_write(iocb, iov, nr_segs, pos, count);
1003 1007
1004 if (retval > 0) 1008 if (retval > 0)
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 7bf029ef4084..d85a534b15cd 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -387,10 +387,6 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping,
387 file->f_path.dentry->d_name.name, 387 file->f_path.dentry->d_name.name,
388 mapping->host->i_ino, len, (long long) pos); 388 mapping->host->i_ino, len, (long long) pos);
389 389
390 pnfs_update_layout(mapping->host,
391 nfs_file_open_context(file),
392 IOMODE_RW);
393
394start: 390start:
395 /* 391 /*
396 * Prevent starvation issues if someone is doing a consistency 392 * Prevent starvation issues if someone is doing a consistency
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index b5ffe8fa291f..1084792bc0fe 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -75,18 +75,25 @@ static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *i
75/* 75/*
76 * get an NFS2/NFS3 root dentry from the root filehandle 76 * get an NFS2/NFS3 root dentry from the root filehandle
77 */ 77 */
78struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh) 78struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh,
79 const char *devname)
79{ 80{
80 struct nfs_server *server = NFS_SB(sb); 81 struct nfs_server *server = NFS_SB(sb);
81 struct nfs_fsinfo fsinfo; 82 struct nfs_fsinfo fsinfo;
82 struct dentry *ret; 83 struct dentry *ret;
83 struct inode *inode; 84 struct inode *inode;
85 void *name = kstrdup(devname, GFP_KERNEL);
84 int error; 86 int error;
85 87
88 if (!name)
89 return ERR_PTR(-ENOMEM);
90
86 /* get the actual root for this mount */ 91 /* get the actual root for this mount */
87 fsinfo.fattr = nfs_alloc_fattr(); 92 fsinfo.fattr = nfs_alloc_fattr();
88 if (fsinfo.fattr == NULL) 93 if (fsinfo.fattr == NULL) {
94 kfree(name);
89 return ERR_PTR(-ENOMEM); 95 return ERR_PTR(-ENOMEM);
96 }
90 97
91 error = server->nfs_client->rpc_ops->getroot(server, mntfh, &fsinfo); 98 error = server->nfs_client->rpc_ops->getroot(server, mntfh, &fsinfo);
92 if (error < 0) { 99 if (error < 0) {
@@ -119,7 +126,15 @@ struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh)
119 } 126 }
120 127
121 security_d_instantiate(ret, inode); 128 security_d_instantiate(ret, inode);
129 spin_lock(&ret->d_lock);
130 if (IS_ROOT(ret) && !(ret->d_flags & DCACHE_NFSFS_RENAMED)) {
131 ret->d_fsdata = name;
132 name = NULL;
133 }
134 spin_unlock(&ret->d_lock);
122out: 135out:
136 if (name)
137 kfree(name);
123 nfs_free_fattr(fsinfo.fattr); 138 nfs_free_fattr(fsinfo.fattr);
124 return ret; 139 return ret;
125} 140}
@@ -169,27 +184,35 @@ out:
169/* 184/*
170 * get an NFS4 root dentry from the root filehandle 185 * get an NFS4 root dentry from the root filehandle
171 */ 186 */
172struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh) 187struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh,
188 const char *devname)
173{ 189{
174 struct nfs_server *server = NFS_SB(sb); 190 struct nfs_server *server = NFS_SB(sb);
175 struct nfs_fattr *fattr = NULL; 191 struct nfs_fattr *fattr = NULL;
176 struct dentry *ret; 192 struct dentry *ret;
177 struct inode *inode; 193 struct inode *inode;
194 void *name = kstrdup(devname, GFP_KERNEL);
178 int error; 195 int error;
179 196
180 dprintk("--> nfs4_get_root()\n"); 197 dprintk("--> nfs4_get_root()\n");
181 198
199 if (!name)
200 return ERR_PTR(-ENOMEM);
201
182 /* get the info about the server and filesystem */ 202 /* get the info about the server and filesystem */
183 error = nfs4_server_capabilities(server, mntfh); 203 error = nfs4_server_capabilities(server, mntfh);
184 if (error < 0) { 204 if (error < 0) {
185 dprintk("nfs_get_root: getcaps error = %d\n", 205 dprintk("nfs_get_root: getcaps error = %d\n",
186 -error); 206 -error);
207 kfree(name);
187 return ERR_PTR(error); 208 return ERR_PTR(error);
188 } 209 }
189 210
190 fattr = nfs_alloc_fattr(); 211 fattr = nfs_alloc_fattr();
191 if (fattr == NULL) 212 if (fattr == NULL) {
192 return ERR_PTR(-ENOMEM);; 213 kfree(name);
214 return ERR_PTR(-ENOMEM);
215 }
193 216
194 /* get the actual root for this mount */ 217 /* get the actual root for this mount */
195 error = server->nfs_client->rpc_ops->getattr(server, mntfh, fattr); 218 error = server->nfs_client->rpc_ops->getattr(server, mntfh, fattr);
@@ -223,8 +246,15 @@ struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh)
223 } 246 }
224 247
225 security_d_instantiate(ret, inode); 248 security_d_instantiate(ret, inode);
226 249 spin_lock(&ret->d_lock);
250 if (IS_ROOT(ret) && !(ret->d_flags & DCACHE_NFSFS_RENAMED)) {
251 ret->d_fsdata = name;
252 name = NULL;
253 }
254 spin_unlock(&ret->d_lock);
227out: 255out:
256 if (name)
257 kfree(name);
228 nfs_free_fattr(fattr); 258 nfs_free_fattr(fattr);
229 dprintk("<-- nfs4_get_root()\n"); 259 dprintk("<-- nfs4_get_root()\n");
230 return ret; 260 return ret;
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index 18696882f1c6..79664a1025af 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -33,16 +33,41 @@
33 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 33 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
34 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 */ 35 */
36#include <linux/types.h>
37#include <linux/string.h>
38#include <linux/kernel.h>
39
40static int nfs_map_string_to_numeric(const char *name, size_t namelen, __u32 *res)
41{
42 unsigned long val;
43 char buf[16];
44
45 if (memchr(name, '@', namelen) != NULL || namelen >= sizeof(buf))
46 return 0;
47 memcpy(buf, name, namelen);
48 buf[namelen] = '\0';
49 if (strict_strtoul(buf, 0, &val) != 0)
50 return 0;
51 *res = val;
52 return 1;
53}
54
55static int nfs_map_numeric_to_string(__u32 id, char *buf, size_t buflen)
56{
57 return snprintf(buf, buflen, "%u", id);
58}
36 59
37#ifdef CONFIG_NFS_USE_NEW_IDMAPPER 60#ifdef CONFIG_NFS_USE_NEW_IDMAPPER
38 61
39#include <linux/slab.h> 62#include <linux/slab.h>
40#include <linux/cred.h> 63#include <linux/cred.h>
64#include <linux/sunrpc/sched.h>
65#include <linux/nfs4.h>
66#include <linux/nfs_fs_sb.h>
41#include <linux/nfs_idmap.h> 67#include <linux/nfs_idmap.h>
42#include <linux/keyctl.h> 68#include <linux/keyctl.h>
43#include <linux/key-type.h> 69#include <linux/key-type.h>
44#include <linux/rcupdate.h> 70#include <linux/rcupdate.h>
45#include <linux/kernel.h>
46#include <linux/err.h> 71#include <linux/err.h>
47 72
48#include <keys/user-type.h> 73#include <keys/user-type.h>
@@ -219,23 +244,39 @@ static int nfs_idmap_lookup_id(const char *name, size_t namelen,
219 return ret; 244 return ret;
220} 245}
221 246
222int nfs_map_name_to_uid(struct nfs_client *clp, const char *name, size_t namelen, __u32 *uid) 247int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid)
223{ 248{
249 if (nfs_map_string_to_numeric(name, namelen, uid))
250 return 0;
224 return nfs_idmap_lookup_id(name, namelen, "uid", uid); 251 return nfs_idmap_lookup_id(name, namelen, "uid", uid);
225} 252}
226 253
227int nfs_map_group_to_gid(struct nfs_client *clp, const char *name, size_t namelen, __u32 *gid) 254int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *gid)
228{ 255{
256 if (nfs_map_string_to_numeric(name, namelen, gid))
257 return 0;
229 return nfs_idmap_lookup_id(name, namelen, "gid", gid); 258 return nfs_idmap_lookup_id(name, namelen, "gid", gid);
230} 259}
231 260
232int nfs_map_uid_to_name(struct nfs_client *clp, __u32 uid, char *buf, size_t buflen) 261int nfs_map_uid_to_name(const struct nfs_server *server, __u32 uid, char *buf, size_t buflen)
233{ 262{
234 return nfs_idmap_lookup_name(uid, "user", buf, buflen); 263 int ret = -EINVAL;
264
265 if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
266 ret = nfs_idmap_lookup_name(uid, "user", buf, buflen);
267 if (ret < 0)
268 ret = nfs_map_numeric_to_string(uid, buf, buflen);
269 return ret;
235} 270}
236int nfs_map_gid_to_group(struct nfs_client *clp, __u32 gid, char *buf, size_t buflen) 271int nfs_map_gid_to_group(const struct nfs_server *server, __u32 gid, char *buf, size_t buflen)
237{ 272{
238 return nfs_idmap_lookup_name(gid, "group", buf, buflen); 273 int ret = -EINVAL;
274
275 if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
276 ret = nfs_idmap_lookup_name(gid, "group", buf, buflen);
277 if (ret < 0)
278 ret = nfs_map_numeric_to_string(gid, buf, buflen);
279 return ret;
239} 280}
240 281
241#else /* CONFIG_NFS_USE_NEW_IDMAPPER not defined */ 282#else /* CONFIG_NFS_USE_NEW_IDMAPPER not defined */
@@ -243,7 +284,6 @@ int nfs_map_gid_to_group(struct nfs_client *clp, __u32 gid, char *buf, size_t bu
243#include <linux/module.h> 284#include <linux/module.h>
244#include <linux/mutex.h> 285#include <linux/mutex.h>
245#include <linux/init.h> 286#include <linux/init.h>
246#include <linux/types.h>
247#include <linux/slab.h> 287#include <linux/slab.h>
248#include <linux/socket.h> 288#include <linux/socket.h>
249#include <linux/in.h> 289#include <linux/in.h>
@@ -695,31 +735,45 @@ static unsigned int fnvhash32(const void *buf, size_t buflen)
695 return hash; 735 return hash;
696} 736}
697 737
698int nfs_map_name_to_uid(struct nfs_client *clp, const char *name, size_t namelen, __u32 *uid) 738int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid)
699{ 739{
700 struct idmap *idmap = clp->cl_idmap; 740 struct idmap *idmap = server->nfs_client->cl_idmap;
701 741
742 if (nfs_map_string_to_numeric(name, namelen, uid))
743 return 0;
702 return nfs_idmap_id(idmap, &idmap->idmap_user_hash, name, namelen, uid); 744 return nfs_idmap_id(idmap, &idmap->idmap_user_hash, name, namelen, uid);
703} 745}
704 746
705int nfs_map_group_to_gid(struct nfs_client *clp, const char *name, size_t namelen, __u32 *uid) 747int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid)
706{ 748{
707 struct idmap *idmap = clp->cl_idmap; 749 struct idmap *idmap = server->nfs_client->cl_idmap;
708 750
751 if (nfs_map_string_to_numeric(name, namelen, uid))
752 return 0;
709 return nfs_idmap_id(idmap, &idmap->idmap_group_hash, name, namelen, uid); 753 return nfs_idmap_id(idmap, &idmap->idmap_group_hash, name, namelen, uid);
710} 754}
711 755
712int nfs_map_uid_to_name(struct nfs_client *clp, __u32 uid, char *buf, size_t buflen) 756int nfs_map_uid_to_name(const struct nfs_server *server, __u32 uid, char *buf, size_t buflen)
713{ 757{
714 struct idmap *idmap = clp->cl_idmap; 758 struct idmap *idmap = server->nfs_client->cl_idmap;
759 int ret = -EINVAL;
715 760
716 return nfs_idmap_name(idmap, &idmap->idmap_user_hash, uid, buf); 761 if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
762 ret = nfs_idmap_name(idmap, &idmap->idmap_user_hash, uid, buf);
763 if (ret < 0)
764 ret = nfs_map_numeric_to_string(uid, buf, buflen);
765 return ret;
717} 766}
718int nfs_map_gid_to_group(struct nfs_client *clp, __u32 uid, char *buf, size_t buflen) 767int nfs_map_gid_to_group(const struct nfs_server *server, __u32 uid, char *buf, size_t buflen)
719{ 768{
720 struct idmap *idmap = clp->cl_idmap; 769 struct idmap *idmap = server->nfs_client->cl_idmap;
770 int ret = -EINVAL;
721 771
722 return nfs_idmap_name(idmap, &idmap->idmap_group_hash, uid, buf); 772 if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
773 ret = nfs_idmap_name(idmap, &idmap->idmap_group_hash, uid, buf);
774 if (ret < 0)
775 ret = nfs_map_numeric_to_string(uid, buf, buflen);
776 return ret;
723} 777}
724 778
725#endif /* CONFIG_NFS_USE_NEW_IDMAPPER */ 779#endif /* CONFIG_NFS_USE_NEW_IDMAPPER */
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 1cc600e77bb4..01768e5e2c9b 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -37,6 +37,7 @@
37#include <linux/inet.h> 37#include <linux/inet.h>
38#include <linux/nfs_xdr.h> 38#include <linux/nfs_xdr.h>
39#include <linux/slab.h> 39#include <linux/slab.h>
40#include <linux/compat.h>
40 41
41#include <asm/system.h> 42#include <asm/system.h>
42#include <asm/uaccess.h> 43#include <asm/uaccess.h>
@@ -89,7 +90,11 @@ int nfs_wait_bit_killable(void *word)
89 */ 90 */
90u64 nfs_compat_user_ino64(u64 fileid) 91u64 nfs_compat_user_ino64(u64 fileid)
91{ 92{
92 int ino; 93#ifdef CONFIG_COMPAT
94 compat_ulong_t ino;
95#else
96 unsigned long ino;
97#endif
93 98
94 if (enable_ino64) 99 if (enable_ino64)
95 return fileid; 100 return fileid;
@@ -1513,7 +1518,7 @@ static int nfsiod_start(void)
1513{ 1518{
1514 struct workqueue_struct *wq; 1519 struct workqueue_struct *wq;
1515 dprintk("RPC: creating workqueue nfsiod\n"); 1520 dprintk("RPC: creating workqueue nfsiod\n");
1516 wq = alloc_workqueue("nfsiod", WQ_RESCUER, 0); 1521 wq = alloc_workqueue("nfsiod", WQ_MEM_RECLAIM, 0);
1517 if (wq == NULL) 1522 if (wq == NULL)
1518 return -ENOMEM; 1523 return -ENOMEM;
1519 nfsiod_workqueue = wq; 1524 nfsiod_workqueue = wq;
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index cf9fdbdabc67..72e0bddf7a2f 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -148,6 +148,9 @@ extern struct nfs_server *nfs_clone_server(struct nfs_server *,
148 struct nfs_fattr *); 148 struct nfs_fattr *);
149extern void nfs_mark_client_ready(struct nfs_client *clp, int state); 149extern void nfs_mark_client_ready(struct nfs_client *clp, int state);
150extern int nfs4_check_client_ready(struct nfs_client *clp); 150extern int nfs4_check_client_ready(struct nfs_client *clp);
151extern struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp,
152 const struct sockaddr *ds_addr,
153 int ds_addrlen, int ds_proto);
151#ifdef CONFIG_PROC_FS 154#ifdef CONFIG_PROC_FS
152extern int __init nfs_fs_proc_init(void); 155extern int __init nfs_fs_proc_init(void);
153extern void nfs_fs_proc_exit(void); 156extern void nfs_fs_proc_exit(void);
@@ -163,10 +166,10 @@ static inline void nfs_fs_proc_exit(void)
163 166
164/* nfs4namespace.c */ 167/* nfs4namespace.c */
165#ifdef CONFIG_NFS_V4 168#ifdef CONFIG_NFS_V4
166extern struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentry *dentry); 169extern struct vfsmount *nfs_do_refmount(struct dentry *dentry);
167#else 170#else
168static inline 171static inline
169struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentry *dentry) 172struct vfsmount *nfs_do_refmount(struct dentry *dentry)
170{ 173{
171 return ERR_PTR(-ENOENT); 174 return ERR_PTR(-ENOENT);
172} 175}
@@ -213,8 +216,14 @@ extern const u32 nfs41_maxwrite_overhead;
213extern struct rpc_procinfo nfs4_procedures[]; 216extern struct rpc_procinfo nfs4_procedures[];
214#endif 217#endif
215 218
219extern int nfs4_init_ds_session(struct nfs_client *clp);
220
216/* proc.c */ 221/* proc.c */
217void nfs_close_context(struct nfs_open_context *ctx, int is_sync); 222void nfs_close_context(struct nfs_open_context *ctx, int is_sync);
223extern int nfs_init_client(struct nfs_client *clp,
224 const struct rpc_timeout *timeparms,
225 const char *ip_addr, rpc_authflavor_t authflavour,
226 int noresvport);
218 227
219/* dir.c */ 228/* dir.c */
220extern int nfs_access_cache_shrinker(struct shrinker *shrink, 229extern int nfs_access_cache_shrinker(struct shrinker *shrink,
@@ -247,24 +256,30 @@ extern void nfs_sb_active(struct super_block *sb);
247extern void nfs_sb_deactive(struct super_block *sb); 256extern void nfs_sb_deactive(struct super_block *sb);
248 257
249/* namespace.c */ 258/* namespace.c */
250extern char *nfs_path(const char *base, 259extern char *nfs_path(char **p, struct dentry *dentry,
251 const struct dentry *droot,
252 const struct dentry *dentry,
253 char *buffer, ssize_t buflen); 260 char *buffer, ssize_t buflen);
254extern struct vfsmount *nfs_d_automount(struct path *path); 261extern struct vfsmount *nfs_d_automount(struct path *path);
255 262
256/* getroot.c */ 263/* getroot.c */
257extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *); 264extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *,
265 const char *);
258#ifdef CONFIG_NFS_V4 266#ifdef CONFIG_NFS_V4
259extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *); 267extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *,
268 const char *);
260 269
261extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh); 270extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh);
262#endif 271#endif
263 272
264/* read.c */ 273/* read.c */
274extern int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,
275 const struct rpc_call_ops *call_ops);
265extern void nfs_read_prepare(struct rpc_task *task, void *calldata); 276extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
266 277
267/* write.c */ 278/* write.c */
279extern int nfs_initiate_write(struct nfs_write_data *data,
280 struct rpc_clnt *clnt,
281 const struct rpc_call_ops *call_ops,
282 int how);
268extern void nfs_write_prepare(struct rpc_task *task, void *calldata); 283extern void nfs_write_prepare(struct rpc_task *task, void *calldata);
269#ifdef CONFIG_MIGRATION 284#ifdef CONFIG_MIGRATION
270extern int nfs_migrate_page(struct address_space *, 285extern int nfs_migrate_page(struct address_space *,
@@ -274,6 +289,13 @@ extern int nfs_migrate_page(struct address_space *,
274#endif 289#endif
275 290
276/* nfs4proc.c */ 291/* nfs4proc.c */
292extern void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data);
293extern int nfs4_init_client(struct nfs_client *clp,
294 const struct rpc_timeout *timeparms,
295 const char *ip_addr,
296 rpc_authflavor_t authflavour,
297 int noresvport);
298extern void nfs4_reset_write(struct rpc_task *task, struct nfs_write_data *data);
277extern int _nfs4_call_sync(struct nfs_server *server, 299extern int _nfs4_call_sync(struct nfs_server *server,
278 struct rpc_message *msg, 300 struct rpc_message *msg,
279 struct nfs4_sequence_args *args, 301 struct nfs4_sequence_args *args,
@@ -288,12 +310,11 @@ extern int _nfs4_call_sync_session(struct nfs_server *server,
288/* 310/*
289 * Determine the device name as a string 311 * Determine the device name as a string
290 */ 312 */
291static inline char *nfs_devname(const struct vfsmount *mnt_parent, 313static inline char *nfs_devname(struct dentry *dentry,
292 const struct dentry *dentry,
293 char *buffer, ssize_t buflen) 314 char *buffer, ssize_t buflen)
294{ 315{
295 return nfs_path(mnt_parent->mnt_devname, mnt_parent->mnt_root, 316 char *dummy;
296 dentry, buffer, buflen); 317 return nfs_path(&dummy, dentry, buffer, buflen);
297} 318}
298 319
299/* 320/*
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index f32b8603dca8..bf1c68009ffd 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -25,33 +25,30 @@ static LIST_HEAD(nfs_automount_list);
25static DECLARE_DELAYED_WORK(nfs_automount_task, nfs_expire_automounts); 25static DECLARE_DELAYED_WORK(nfs_automount_task, nfs_expire_automounts);
26int nfs_mountpoint_expiry_timeout = 500 * HZ; 26int nfs_mountpoint_expiry_timeout = 500 * HZ;
27 27
28static struct vfsmount *nfs_do_submount(const struct vfsmount *mnt_parent, 28static struct vfsmount *nfs_do_submount(struct dentry *dentry,
29 const struct dentry *dentry,
30 struct nfs_fh *fh, 29 struct nfs_fh *fh,
31 struct nfs_fattr *fattr); 30 struct nfs_fattr *fattr);
32 31
33/* 32/*
34 * nfs_path - reconstruct the path given an arbitrary dentry 33 * nfs_path - reconstruct the path given an arbitrary dentry
35 * @base - arbitrary string to prepend to the path 34 * @base - used to return pointer to the end of devname part of path
36 * @droot - pointer to root dentry for mountpoint
37 * @dentry - pointer to dentry 35 * @dentry - pointer to dentry
38 * @buffer - result buffer 36 * @buffer - result buffer
39 * @buflen - length of buffer 37 * @buflen - length of buffer
40 * 38 *
41 * Helper function for constructing the path from the 39 * Helper function for constructing the server pathname
42 * root dentry to an arbitrary hashed dentry. 40 * by arbitrary hashed dentry.
43 * 41 *
44 * This is mainly for use in figuring out the path on the 42 * This is mainly for use in figuring out the path on the
45 * server side when automounting on top of an existing partition. 43 * server side when automounting on top of an existing partition
44 * and in generating /proc/mounts and friends.
46 */ 45 */
47char *nfs_path(const char *base, 46char *nfs_path(char **p, struct dentry *dentry, char *buffer, ssize_t buflen)
48 const struct dentry *droot,
49 const struct dentry *dentry,
50 char *buffer, ssize_t buflen)
51{ 47{
52 char *end; 48 char *end;
53 int namelen; 49 int namelen;
54 unsigned seq; 50 unsigned seq;
51 const char *base;
55 52
56rename_retry: 53rename_retry:
57 end = buffer+buflen; 54 end = buffer+buflen;
@@ -60,7 +57,10 @@ rename_retry:
60 57
61 seq = read_seqbegin(&rename_lock); 58 seq = read_seqbegin(&rename_lock);
62 rcu_read_lock(); 59 rcu_read_lock();
63 while (!IS_ROOT(dentry) && dentry != droot) { 60 while (1) {
61 spin_lock(&dentry->d_lock);
62 if (IS_ROOT(dentry))
63 break;
64 namelen = dentry->d_name.len; 64 namelen = dentry->d_name.len;
65 buflen -= namelen + 1; 65 buflen -= namelen + 1;
66 if (buflen < 0) 66 if (buflen < 0)
@@ -68,27 +68,47 @@ rename_retry:
68 end -= namelen; 68 end -= namelen;
69 memcpy(end, dentry->d_name.name, namelen); 69 memcpy(end, dentry->d_name.name, namelen);
70 *--end = '/'; 70 *--end = '/';
71 spin_unlock(&dentry->d_lock);
71 dentry = dentry->d_parent; 72 dentry = dentry->d_parent;
72 } 73 }
73 rcu_read_unlock(); 74 if (read_seqretry(&rename_lock, seq)) {
74 if (read_seqretry(&rename_lock, seq)) 75 spin_unlock(&dentry->d_lock);
76 rcu_read_unlock();
75 goto rename_retry; 77 goto rename_retry;
78 }
76 if (*end != '/') { 79 if (*end != '/') {
77 if (--buflen < 0) 80 if (--buflen < 0) {
81 spin_unlock(&dentry->d_lock);
82 rcu_read_unlock();
78 goto Elong; 83 goto Elong;
84 }
79 *--end = '/'; 85 *--end = '/';
80 } 86 }
87 *p = end;
88 base = dentry->d_fsdata;
89 if (!base) {
90 spin_unlock(&dentry->d_lock);
91 rcu_read_unlock();
92 WARN_ON(1);
93 return end;
94 }
81 namelen = strlen(base); 95 namelen = strlen(base);
82 /* Strip off excess slashes in base string */ 96 /* Strip off excess slashes in base string */
83 while (namelen > 0 && base[namelen - 1] == '/') 97 while (namelen > 0 && base[namelen - 1] == '/')
84 namelen--; 98 namelen--;
85 buflen -= namelen; 99 buflen -= namelen;
86 if (buflen < 0) 100 if (buflen < 0) {
101 spin_unlock(&dentry->d_lock);
102 rcu_read_unlock();
87 goto Elong; 103 goto Elong;
104 }
88 end -= namelen; 105 end -= namelen;
89 memcpy(end, base, namelen); 106 memcpy(end, base, namelen);
107 spin_unlock(&dentry->d_lock);
108 rcu_read_unlock();
90 return end; 109 return end;
91Elong_unlock: 110Elong_unlock:
111 spin_unlock(&dentry->d_lock);
92 rcu_read_unlock(); 112 rcu_read_unlock();
93 if (read_seqretry(&rename_lock, seq)) 113 if (read_seqretry(&rename_lock, seq))
94 goto rename_retry; 114 goto rename_retry;
@@ -143,9 +163,9 @@ struct vfsmount *nfs_d_automount(struct path *path)
143 } 163 }
144 164
145 if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) 165 if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL)
146 mnt = nfs_do_refmount(path->mnt, path->dentry); 166 mnt = nfs_do_refmount(path->dentry);
147 else 167 else
148 mnt = nfs_do_submount(path->mnt, path->dentry, fh, fattr); 168 mnt = nfs_do_submount(path->dentry, fh, fattr);
149 if (IS_ERR(mnt)) 169 if (IS_ERR(mnt))
150 goto out; 170 goto out;
151 171
@@ -209,19 +229,17 @@ static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server,
209 229
210/** 230/**
211 * nfs_do_submount - set up mountpoint when crossing a filesystem boundary 231 * nfs_do_submount - set up mountpoint when crossing a filesystem boundary
212 * @mnt_parent - mountpoint of parent directory
213 * @dentry - parent directory 232 * @dentry - parent directory
214 * @fh - filehandle for new root dentry 233 * @fh - filehandle for new root dentry
215 * @fattr - attributes for new root inode 234 * @fattr - attributes for new root inode
216 * 235 *
217 */ 236 */
218static struct vfsmount *nfs_do_submount(const struct vfsmount *mnt_parent, 237static struct vfsmount *nfs_do_submount(struct dentry *dentry,
219 const struct dentry *dentry,
220 struct nfs_fh *fh, 238 struct nfs_fh *fh,
221 struct nfs_fattr *fattr) 239 struct nfs_fattr *fattr)
222{ 240{
223 struct nfs_clone_mount mountdata = { 241 struct nfs_clone_mount mountdata = {
224 .sb = mnt_parent->mnt_sb, 242 .sb = dentry->d_sb,
225 .dentry = dentry, 243 .dentry = dentry,
226 .fh = fh, 244 .fh = fh,
227 .fattr = fattr, 245 .fattr = fattr,
@@ -237,11 +255,11 @@ static struct vfsmount *nfs_do_submount(const struct vfsmount *mnt_parent,
237 dentry->d_name.name); 255 dentry->d_name.name);
238 if (page == NULL) 256 if (page == NULL)
239 goto out; 257 goto out;
240 devname = nfs_devname(mnt_parent, dentry, page, PAGE_SIZE); 258 devname = nfs_devname(dentry, page, PAGE_SIZE);
241 mnt = (struct vfsmount *)devname; 259 mnt = (struct vfsmount *)devname;
242 if (IS_ERR(devname)) 260 if (IS_ERR(devname))
243 goto free_page; 261 goto free_page;
244 mnt = nfs_do_clone_mount(NFS_SB(mnt_parent->mnt_sb), devname, &mountdata); 262 mnt = nfs_do_clone_mount(NFS_SB(dentry->d_sb), devname, &mountdata);
245free_page: 263free_page:
246 free_page((unsigned long)page); 264 free_page((unsigned long)page);
247out: 265out:
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index ce939c062a52..d0c80d8b3f96 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -885,4 +885,5 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
885 .lock = nfs3_proc_lock, 885 .lock = nfs3_proc_lock,
886 .clear_acl_cache = nfs3_forget_cached_acls, 886 .clear_acl_cache = nfs3_forget_cached_acls,
887 .close_context = nfs_close_context, 887 .close_context = nfs_close_context,
888 .init_client = nfs_init_client,
888}; 889};
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 7a7474073148..c64be1cff080 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -252,6 +252,9 @@ static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *ser
252extern int nfs4_setup_sequence(const struct nfs_server *server, 252extern int nfs4_setup_sequence(const struct nfs_server *server,
253 struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, 253 struct nfs4_sequence_args *args, struct nfs4_sequence_res *res,
254 int cache_reply, struct rpc_task *task); 254 int cache_reply, struct rpc_task *task);
255extern int nfs41_setup_sequence(struct nfs4_session *session,
256 struct nfs4_sequence_args *args, struct nfs4_sequence_res *res,
257 int cache_reply, struct rpc_task *task);
255extern void nfs4_destroy_session(struct nfs4_session *session); 258extern void nfs4_destroy_session(struct nfs4_session *session);
256extern struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp); 259extern struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp);
257extern int nfs4_proc_create_session(struct nfs_client *); 260extern int nfs4_proc_create_session(struct nfs_client *);
@@ -259,6 +262,19 @@ extern int nfs4_proc_destroy_session(struct nfs4_session *);
259extern int nfs4_init_session(struct nfs_server *server); 262extern int nfs4_init_session(struct nfs_server *server);
260extern int nfs4_proc_get_lease_time(struct nfs_client *clp, 263extern int nfs4_proc_get_lease_time(struct nfs_client *clp,
261 struct nfs_fsinfo *fsinfo); 264 struct nfs_fsinfo *fsinfo);
265
266static inline bool
267is_ds_only_client(struct nfs_client *clp)
268{
269 return (clp->cl_exchange_flags & EXCHGID4_FLAG_MASK_PNFS) ==
270 EXCHGID4_FLAG_USE_PNFS_DS;
271}
272
273static inline bool
274is_ds_client(struct nfs_client *clp)
275{
276 return clp->cl_exchange_flags & EXCHGID4_FLAG_USE_PNFS_DS;
277}
262#else /* CONFIG_NFS_v4_1 */ 278#else /* CONFIG_NFS_v4_1 */
263static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server) 279static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server)
264{ 280{
@@ -276,6 +292,18 @@ static inline int nfs4_init_session(struct nfs_server *server)
276{ 292{
277 return 0; 293 return 0;
278} 294}
295
296static inline bool
297is_ds_only_client(struct nfs_client *clp)
298{
299 return false;
300}
301
302static inline bool
303is_ds_client(struct nfs_client *clp)
304{
305 return false;
306}
279#endif /* CONFIG_NFS_V4_1 */ 307#endif /* CONFIG_NFS_V4_1 */
280 308
281extern const struct nfs4_minor_version_ops *nfs_v4_minor_ops[]; 309extern const struct nfs4_minor_version_ops *nfs_v4_minor_ops[];
@@ -298,6 +326,11 @@ struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp);
298#if defined(CONFIG_NFS_V4_1) 326#if defined(CONFIG_NFS_V4_1)
299struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp); 327struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp);
300struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp); 328struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp);
329extern void nfs4_schedule_session_recovery(struct nfs4_session *);
330#else
331static inline void nfs4_schedule_session_recovery(struct nfs4_session *session)
332{
333}
301#endif /* CONFIG_NFS_V4_1 */ 334#endif /* CONFIG_NFS_V4_1 */
302 335
303extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *); 336extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *);
@@ -307,10 +340,9 @@ extern void nfs4_put_open_state(struct nfs4_state *);
307extern void nfs4_close_state(struct path *, struct nfs4_state *, fmode_t); 340extern void nfs4_close_state(struct path *, struct nfs4_state *, fmode_t);
308extern void nfs4_close_sync(struct path *, struct nfs4_state *, fmode_t); 341extern void nfs4_close_sync(struct path *, struct nfs4_state *, fmode_t);
309extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t); 342extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t);
310extern void nfs4_schedule_state_recovery(struct nfs_client *); 343extern void nfs4_schedule_lease_recovery(struct nfs_client *);
311extern void nfs4_schedule_state_manager(struct nfs_client *); 344extern void nfs4_schedule_state_manager(struct nfs_client *);
312extern int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state); 345extern void nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *);
313extern int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state);
314extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags); 346extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags);
315extern void nfs41_handle_recall_slot(struct nfs_client *clp); 347extern void nfs41_handle_recall_slot(struct nfs_client *clp);
316extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); 348extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 23f930caf1e2..428558464817 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -40,32 +40,309 @@ MODULE_LICENSE("GPL");
40MODULE_AUTHOR("Dean Hildebrand <dhildebz@umich.edu>"); 40MODULE_AUTHOR("Dean Hildebrand <dhildebz@umich.edu>");
41MODULE_DESCRIPTION("The NFSv4 file layout driver"); 41MODULE_DESCRIPTION("The NFSv4 file layout driver");
42 42
43static int 43#define FILELAYOUT_POLL_RETRY_MAX (15*HZ)
44filelayout_set_layoutdriver(struct nfs_server *nfss) 44
45static loff_t
46filelayout_get_dense_offset(struct nfs4_filelayout_segment *flseg,
47 loff_t offset)
45{ 48{
46 int status = pnfs_alloc_init_deviceid_cache(nfss->nfs_client, 49 u32 stripe_width = flseg->stripe_unit * flseg->dsaddr->stripe_count;
47 nfs4_fl_free_deviceid_callback); 50 u64 tmp;
48 if (status) { 51
49 printk(KERN_WARNING "%s: deviceid cache could not be " 52 offset -= flseg->pattern_offset;
50 "initialized\n", __func__); 53 tmp = offset;
51 return status; 54 do_div(tmp, stripe_width);
55
56 return tmp * flseg->stripe_unit + do_div(offset, flseg->stripe_unit);
57}
58
59/* This function is used by the layout driver to calculate the
60 * offset of the file on the dserver based on whether the
61 * layout type is STRIPE_DENSE or STRIPE_SPARSE
62 */
63static loff_t
64filelayout_get_dserver_offset(struct pnfs_layout_segment *lseg, loff_t offset)
65{
66 struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg);
67
68 switch (flseg->stripe_type) {
69 case STRIPE_SPARSE:
70 return offset;
71
72 case STRIPE_DENSE:
73 return filelayout_get_dense_offset(flseg, offset);
52 } 74 }
53 dprintk("%s: deviceid cache has been initialized successfully\n", 75
54 __func__); 76 BUG();
77}
78
79/* For data server errors we don't recover from */
80static void
81filelayout_set_lo_fail(struct pnfs_layout_segment *lseg)
82{
83 if (lseg->pls_range.iomode == IOMODE_RW) {
84 dprintk("%s Setting layout IOMODE_RW fail bit\n", __func__);
85 set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
86 } else {
87 dprintk("%s Setting layout IOMODE_READ fail bit\n", __func__);
88 set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
89 }
90}
91
92static int filelayout_async_handle_error(struct rpc_task *task,
93 struct nfs4_state *state,
94 struct nfs_client *clp,
95 int *reset)
96{
97 if (task->tk_status >= 0)
98 return 0;
99
100 *reset = 0;
101
102 switch (task->tk_status) {
103 case -NFS4ERR_BADSESSION:
104 case -NFS4ERR_BADSLOT:
105 case -NFS4ERR_BAD_HIGH_SLOT:
106 case -NFS4ERR_DEADSESSION:
107 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
108 case -NFS4ERR_SEQ_FALSE_RETRY:
109 case -NFS4ERR_SEQ_MISORDERED:
110 dprintk("%s ERROR %d, Reset session. Exchangeid "
111 "flags 0x%x\n", __func__, task->tk_status,
112 clp->cl_exchange_flags);
113 nfs4_schedule_session_recovery(clp->cl_session);
114 break;
115 case -NFS4ERR_DELAY:
116 case -NFS4ERR_GRACE:
117 case -EKEYEXPIRED:
118 rpc_delay(task, FILELAYOUT_POLL_RETRY_MAX);
119 break;
120 default:
121 dprintk("%s DS error. Retry through MDS %d\n", __func__,
122 task->tk_status);
123 *reset = 1;
124 break;
125 }
126 task->tk_status = 0;
127 return -EAGAIN;
128}
129
130/* NFS_PROTO call done callback routines */
131
132static int filelayout_read_done_cb(struct rpc_task *task,
133 struct nfs_read_data *data)
134{
135 struct nfs_client *clp = data->ds_clp;
136 int reset = 0;
137
138 dprintk("%s DS read\n", __func__);
139
140 if (filelayout_async_handle_error(task, data->args.context->state,
141 data->ds_clp, &reset) == -EAGAIN) {
142 dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n",
143 __func__, data->ds_clp, data->ds_clp->cl_session);
144 if (reset) {
145 filelayout_set_lo_fail(data->lseg);
146 nfs4_reset_read(task, data);
147 clp = NFS_SERVER(data->inode)->nfs_client;
148 }
149 nfs_restart_rpc(task, clp);
150 return -EAGAIN;
151 }
152
55 return 0; 153 return 0;
56} 154}
57 155
58/* Clear out the layout by destroying its device list */ 156/*
59static int 157 * Call ops for the async read/write cases
60filelayout_clear_layoutdriver(struct nfs_server *nfss) 158 * In the case of dense layouts, the offset needs to be reset to its
159 * original value.
160 */
161static void filelayout_read_prepare(struct rpc_task *task, void *data)
61{ 162{
62 dprintk("--> %s\n", __func__); 163 struct nfs_read_data *rdata = (struct nfs_read_data *)data;
164
165 rdata->read_done_cb = filelayout_read_done_cb;
166
167 if (nfs41_setup_sequence(rdata->ds_clp->cl_session,
168 &rdata->args.seq_args, &rdata->res.seq_res,
169 0, task))
170 return;
171
172 rpc_call_start(task);
173}
174
175static void filelayout_read_call_done(struct rpc_task *task, void *data)
176{
177 struct nfs_read_data *rdata = (struct nfs_read_data *)data;
178
179 dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status);
180
181 /* Note this may cause RPC to be resent */
182 rdata->mds_ops->rpc_call_done(task, data);
183}
184
185static void filelayout_read_release(void *data)
186{
187 struct nfs_read_data *rdata = (struct nfs_read_data *)data;
188
189 rdata->mds_ops->rpc_release(data);
190}
191
192static int filelayout_write_done_cb(struct rpc_task *task,
193 struct nfs_write_data *data)
194{
195 int reset = 0;
196
197 if (filelayout_async_handle_error(task, data->args.context->state,
198 data->ds_clp, &reset) == -EAGAIN) {
199 struct nfs_client *clp;
200
201 dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n",
202 __func__, data->ds_clp, data->ds_clp->cl_session);
203 if (reset) {
204 filelayout_set_lo_fail(data->lseg);
205 nfs4_reset_write(task, data);
206 clp = NFS_SERVER(data->inode)->nfs_client;
207 } else
208 clp = data->ds_clp;
209 nfs_restart_rpc(task, clp);
210 return -EAGAIN;
211 }
63 212
64 if (nfss->nfs_client->cl_devid_cache)
65 pnfs_put_deviceid_cache(nfss->nfs_client);
66 return 0; 213 return 0;
67} 214}
68 215
216static void filelayout_write_prepare(struct rpc_task *task, void *data)
217{
218 struct nfs_write_data *wdata = (struct nfs_write_data *)data;
219
220 if (nfs41_setup_sequence(wdata->ds_clp->cl_session,
221 &wdata->args.seq_args, &wdata->res.seq_res,
222 0, task))
223 return;
224
225 rpc_call_start(task);
226}
227
228static void filelayout_write_call_done(struct rpc_task *task, void *data)
229{
230 struct nfs_write_data *wdata = (struct nfs_write_data *)data;
231
232 /* Note this may cause RPC to be resent */
233 wdata->mds_ops->rpc_call_done(task, data);
234}
235
236static void filelayout_write_release(void *data)
237{
238 struct nfs_write_data *wdata = (struct nfs_write_data *)data;
239
240 wdata->mds_ops->rpc_release(data);
241}
242
243struct rpc_call_ops filelayout_read_call_ops = {
244 .rpc_call_prepare = filelayout_read_prepare,
245 .rpc_call_done = filelayout_read_call_done,
246 .rpc_release = filelayout_read_release,
247};
248
249struct rpc_call_ops filelayout_write_call_ops = {
250 .rpc_call_prepare = filelayout_write_prepare,
251 .rpc_call_done = filelayout_write_call_done,
252 .rpc_release = filelayout_write_release,
253};
254
255static enum pnfs_try_status
256filelayout_read_pagelist(struct nfs_read_data *data)
257{
258 struct pnfs_layout_segment *lseg = data->lseg;
259 struct nfs4_pnfs_ds *ds;
260 loff_t offset = data->args.offset;
261 u32 j, idx;
262 struct nfs_fh *fh;
263 int status;
264
265 dprintk("--> %s ino %lu pgbase %u req %Zu@%llu\n",
266 __func__, data->inode->i_ino,
267 data->args.pgbase, (size_t)data->args.count, offset);
268
269 /* Retrieve the correct rpc_client for the byte range */
270 j = nfs4_fl_calc_j_index(lseg, offset);
271 idx = nfs4_fl_calc_ds_index(lseg, j);
272 ds = nfs4_fl_prepare_ds(lseg, idx);
273 if (!ds) {
274 /* Either layout fh index faulty, or ds connect failed */
275 set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
276 set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
277 return PNFS_NOT_ATTEMPTED;
278 }
279 dprintk("%s USE DS:ip %x %hu\n", __func__,
280 ntohl(ds->ds_ip_addr), ntohs(ds->ds_port));
281
282 /* No multipath support. Use first DS */
283 data->ds_clp = ds->ds_clp;
284 fh = nfs4_fl_select_ds_fh(lseg, j);
285 if (fh)
286 data->args.fh = fh;
287
288 data->args.offset = filelayout_get_dserver_offset(lseg, offset);
289 data->mds_offset = offset;
290
291 /* Perform an asynchronous read to ds */
292 status = nfs_initiate_read(data, ds->ds_clp->cl_rpcclient,
293 &filelayout_read_call_ops);
294 BUG_ON(status != 0);
295 return PNFS_ATTEMPTED;
296}
297
298/* Perform async writes. */
299static enum pnfs_try_status
300filelayout_write_pagelist(struct nfs_write_data *data, int sync)
301{
302 struct pnfs_layout_segment *lseg = data->lseg;
303 struct nfs4_pnfs_ds *ds;
304 loff_t offset = data->args.offset;
305 u32 j, idx;
306 struct nfs_fh *fh;
307 int status;
308
309 /* Retrieve the correct rpc_client for the byte range */
310 j = nfs4_fl_calc_j_index(lseg, offset);
311 idx = nfs4_fl_calc_ds_index(lseg, j);
312 ds = nfs4_fl_prepare_ds(lseg, idx);
313 if (!ds) {
314 printk(KERN_ERR "%s: prepare_ds failed, use MDS\n", __func__);
315 set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
316 set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
317 return PNFS_NOT_ATTEMPTED;
318 }
319 dprintk("%s ino %lu sync %d req %Zu@%llu DS:%x:%hu\n", __func__,
320 data->inode->i_ino, sync, (size_t) data->args.count, offset,
321 ntohl(ds->ds_ip_addr), ntohs(ds->ds_port));
322
323 /* We can't handle commit to ds yet */
324 if (!FILELAYOUT_LSEG(lseg)->commit_through_mds)
325 data->args.stable = NFS_FILE_SYNC;
326
327 data->write_done_cb = filelayout_write_done_cb;
328 data->ds_clp = ds->ds_clp;
329 fh = nfs4_fl_select_ds_fh(lseg, j);
330 if (fh)
331 data->args.fh = fh;
332 /*
333 * Get the file offset on the dserver. Set the write offset to
334 * this offset and save the original offset.
335 */
336 data->args.offset = filelayout_get_dserver_offset(lseg, offset);
337 data->mds_offset = offset;
338
339 /* Perform an asynchronous write */
340 status = nfs_initiate_write(data, ds->ds_clp->cl_rpcclient,
341 &filelayout_write_call_ops, sync);
342 BUG_ON(status != 0);
343 return PNFS_ATTEMPTED;
344}
345
69/* 346/*
70 * filelayout_check_layout() 347 * filelayout_check_layout()
71 * 348 *
@@ -92,14 +369,14 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
92 goto out; 369 goto out;
93 } 370 }
94 371
95 if (fl->stripe_unit % PAGE_SIZE) { 372 if (!fl->stripe_unit || fl->stripe_unit % PAGE_SIZE) {
96 dprintk("%s Stripe unit (%u) not page aligned\n", 373 dprintk("%s Invalid stripe unit (%u)\n",
97 __func__, fl->stripe_unit); 374 __func__, fl->stripe_unit);
98 goto out; 375 goto out;
99 } 376 }
100 377
101 /* find and reference the deviceid */ 378 /* find and reference the deviceid */
102 dsaddr = nfs4_fl_find_get_deviceid(nfss->nfs_client, id); 379 dsaddr = nfs4_fl_find_get_deviceid(id);
103 if (dsaddr == NULL) { 380 if (dsaddr == NULL) {
104 dsaddr = get_device_info(lo->plh_inode, id); 381 dsaddr = get_device_info(lo->plh_inode, id);
105 if (dsaddr == NULL) 382 if (dsaddr == NULL)
@@ -134,7 +411,7 @@ out:
134 dprintk("--> %s returns %d\n", __func__, status); 411 dprintk("--> %s returns %d\n", __func__, status);
135 return status; 412 return status;
136out_put: 413out_put:
137 pnfs_put_deviceid(nfss->nfs_client->cl_devid_cache, &dsaddr->deviceid); 414 nfs4_fl_put_deviceid(dsaddr);
138 goto out; 415 goto out;
139} 416}
140 417
@@ -243,23 +520,47 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
243static void 520static void
244filelayout_free_lseg(struct pnfs_layout_segment *lseg) 521filelayout_free_lseg(struct pnfs_layout_segment *lseg)
245{ 522{
246 struct nfs_server *nfss = NFS_SERVER(lseg->pls_layout->plh_inode);
247 struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); 523 struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
248 524
249 dprintk("--> %s\n", __func__); 525 dprintk("--> %s\n", __func__);
250 pnfs_put_deviceid(nfss->nfs_client->cl_devid_cache, 526 nfs4_fl_put_deviceid(fl->dsaddr);
251 &fl->dsaddr->deviceid);
252 _filelayout_free_lseg(fl); 527 _filelayout_free_lseg(fl);
253} 528}
254 529
530/*
531 * filelayout_pg_test(). Called by nfs_can_coalesce_requests()
532 *
533 * return 1 : coalesce page
534 * return 0 : don't coalesce page
535 */
536int
537filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
538 struct nfs_page *req)
539{
540 u64 p_stripe, r_stripe;
541 u32 stripe_unit;
542
543 if (!pgio->pg_lseg)
544 return 1;
545 p_stripe = (u64)prev->wb_index << PAGE_CACHE_SHIFT;
546 r_stripe = (u64)req->wb_index << PAGE_CACHE_SHIFT;
547 stripe_unit = FILELAYOUT_LSEG(pgio->pg_lseg)->stripe_unit;
548
549 do_div(p_stripe, stripe_unit);
550 do_div(r_stripe, stripe_unit);
551
552 return (p_stripe == r_stripe);
553}
554
255static struct pnfs_layoutdriver_type filelayout_type = { 555static struct pnfs_layoutdriver_type filelayout_type = {
256 .id = LAYOUT_NFSV4_1_FILES, 556 .id = LAYOUT_NFSV4_1_FILES,
257 .name = "LAYOUT_NFSV4_1_FILES", 557 .name = "LAYOUT_NFSV4_1_FILES",
258 .owner = THIS_MODULE, 558 .owner = THIS_MODULE,
259 .set_layoutdriver = filelayout_set_layoutdriver, 559 .alloc_lseg = filelayout_alloc_lseg,
260 .clear_layoutdriver = filelayout_clear_layoutdriver, 560 .free_lseg = filelayout_free_lseg,
261 .alloc_lseg = filelayout_alloc_lseg, 561 .pg_test = filelayout_pg_test,
262 .free_lseg = filelayout_free_lseg, 562 .read_pagelist = filelayout_read_pagelist,
563 .write_pagelist = filelayout_write_pagelist,
263}; 564};
264 565
265static int __init nfs4filelayout_init(void) 566static int __init nfs4filelayout_init(void)
diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h
index bbf60dd2ab9d..ee0c907742b5 100644
--- a/fs/nfs/nfs4filelayout.h
+++ b/fs/nfs/nfs4filelayout.h
@@ -55,8 +55,14 @@ struct nfs4_pnfs_ds {
55 atomic_t ds_count; 55 atomic_t ds_count;
56}; 56};
57 57
58/* nfs4_file_layout_dsaddr flags */
59#define NFS4_DEVICE_ID_NEG_ENTRY 0x00000001
60
58struct nfs4_file_layout_dsaddr { 61struct nfs4_file_layout_dsaddr {
59 struct pnfs_deviceid_node deviceid; 62 struct hlist_node node;
63 struct nfs4_deviceid deviceid;
64 atomic_t ref;
65 unsigned long flags;
60 u32 stripe_count; 66 u32 stripe_count;
61 u8 *stripe_indices; 67 u8 *stripe_indices;
62 u32 ds_num; 68 u32 ds_num;
@@ -83,11 +89,18 @@ FILELAYOUT_LSEG(struct pnfs_layout_segment *lseg)
83 generic_hdr); 89 generic_hdr);
84} 90}
85 91
86extern void nfs4_fl_free_deviceid_callback(struct pnfs_deviceid_node *); 92extern struct nfs_fh *
93nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j);
94
87extern void print_ds(struct nfs4_pnfs_ds *ds); 95extern void print_ds(struct nfs4_pnfs_ds *ds);
88extern void print_deviceid(struct nfs4_deviceid *dev_id); 96extern void print_deviceid(struct nfs4_deviceid *dev_id);
97u32 nfs4_fl_calc_j_index(struct pnfs_layout_segment *lseg, loff_t offset);
98u32 nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, u32 j);
99struct nfs4_pnfs_ds *nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg,
100 u32 ds_idx);
89extern struct nfs4_file_layout_dsaddr * 101extern struct nfs4_file_layout_dsaddr *
90nfs4_fl_find_get_deviceid(struct nfs_client *, struct nfs4_deviceid *dev_id); 102nfs4_fl_find_get_deviceid(struct nfs4_deviceid *dev_id);
103extern void nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr);
91struct nfs4_file_layout_dsaddr * 104struct nfs4_file_layout_dsaddr *
92get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id); 105get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id);
93 106
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
index f5c9b125e8cc..68143c162e3b 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -37,6 +37,30 @@
37#define NFSDBG_FACILITY NFSDBG_PNFS_LD 37#define NFSDBG_FACILITY NFSDBG_PNFS_LD
38 38
39/* 39/*
40 * Device ID RCU cache. A device ID is unique per client ID and layout type.
41 */
42#define NFS4_FL_DEVICE_ID_HASH_BITS 5
43#define NFS4_FL_DEVICE_ID_HASH_SIZE (1 << NFS4_FL_DEVICE_ID_HASH_BITS)
44#define NFS4_FL_DEVICE_ID_HASH_MASK (NFS4_FL_DEVICE_ID_HASH_SIZE - 1)
45
46static inline u32
47nfs4_fl_deviceid_hash(struct nfs4_deviceid *id)
48{
49 unsigned char *cptr = (unsigned char *)id->data;
50 unsigned int nbytes = NFS4_DEVICEID4_SIZE;
51 u32 x = 0;
52
53 while (nbytes--) {
54 x *= 37;
55 x += *cptr++;
56 }
57 return x & NFS4_FL_DEVICE_ID_HASH_MASK;
58}
59
60static struct hlist_head filelayout_deviceid_cache[NFS4_FL_DEVICE_ID_HASH_SIZE];
61static DEFINE_SPINLOCK(filelayout_deviceid_lock);
62
63/*
40 * Data server cache 64 * Data server cache
41 * 65 *
42 * Data servers can be mapped to different device ids. 66 * Data servers can be mapped to different device ids.
@@ -104,6 +128,67 @@ _data_server_lookup_locked(u32 ip_addr, u32 port)
104 return NULL; 128 return NULL;
105} 129}
106 130
131/*
132 * Create an rpc connection to the nfs4_pnfs_ds data server
133 * Currently only support IPv4
134 */
135static int
136nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
137{
138 struct nfs_client *clp;
139 struct sockaddr_in sin;
140 int status = 0;
141
142 dprintk("--> %s ip:port %x:%hu au_flavor %d\n", __func__,
143 ntohl(ds->ds_ip_addr), ntohs(ds->ds_port),
144 mds_srv->nfs_client->cl_rpcclient->cl_auth->au_flavor);
145
146 sin.sin_family = AF_INET;
147 sin.sin_addr.s_addr = ds->ds_ip_addr;
148 sin.sin_port = ds->ds_port;
149
150 clp = nfs4_set_ds_client(mds_srv->nfs_client, (struct sockaddr *)&sin,
151 sizeof(sin), IPPROTO_TCP);
152 if (IS_ERR(clp)) {
153 status = PTR_ERR(clp);
154 goto out;
155 }
156
157 if ((clp->cl_exchange_flags & EXCHGID4_FLAG_MASK_PNFS) != 0) {
158 if (!is_ds_client(clp)) {
159 status = -ENODEV;
160 goto out_put;
161 }
162 ds->ds_clp = clp;
163 dprintk("%s [existing] ip=%x, port=%hu\n", __func__,
164 ntohl(ds->ds_ip_addr), ntohs(ds->ds_port));
165 goto out;
166 }
167
168 /*
169 * Do not set NFS_CS_CHECK_LEASE_TIME instead set the DS lease to
170 * be equal to the MDS lease. Renewal is scheduled in create_session.
171 */
172 spin_lock(&mds_srv->nfs_client->cl_lock);
173 clp->cl_lease_time = mds_srv->nfs_client->cl_lease_time;
174 spin_unlock(&mds_srv->nfs_client->cl_lock);
175 clp->cl_last_renewal = jiffies;
176
177 /* New nfs_client */
178 status = nfs4_init_ds_session(clp);
179 if (status)
180 goto out_put;
181
182 ds->ds_clp = clp;
183 dprintk("%s [new] ip=%x, port=%hu\n", __func__, ntohl(ds->ds_ip_addr),
184 ntohs(ds->ds_port));
185out:
186 return status;
187out_put:
188 nfs_put_client(clp);
189 goto out;
190}
191
107static void 192static void
108destroy_ds(struct nfs4_pnfs_ds *ds) 193destroy_ds(struct nfs4_pnfs_ds *ds)
109{ 194{
@@ -122,7 +207,7 @@ nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
122 struct nfs4_pnfs_ds *ds; 207 struct nfs4_pnfs_ds *ds;
123 int i; 208 int i;
124 209
125 print_deviceid(&dsaddr->deviceid.de_id); 210 print_deviceid(&dsaddr->deviceid);
126 211
127 for (i = 0; i < dsaddr->ds_num; i++) { 212 for (i = 0; i < dsaddr->ds_num; i++) {
128 ds = dsaddr->ds_list[i]; 213 ds = dsaddr->ds_list[i];
@@ -139,15 +224,6 @@ nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
139 kfree(dsaddr); 224 kfree(dsaddr);
140} 225}
141 226
142void
143nfs4_fl_free_deviceid_callback(struct pnfs_deviceid_node *device)
144{
145 struct nfs4_file_layout_dsaddr *dsaddr =
146 container_of(device, struct nfs4_file_layout_dsaddr, deviceid);
147
148 nfs4_fl_free_deviceid(dsaddr);
149}
150
151static struct nfs4_pnfs_ds * 227static struct nfs4_pnfs_ds *
152nfs4_pnfs_ds_add(struct inode *inode, u32 ip_addr, u32 port) 228nfs4_pnfs_ds_add(struct inode *inode, u32 ip_addr, u32 port)
153{ 229{
@@ -219,6 +295,10 @@ decode_and_add_ds(__be32 **pp, struct inode *inode)
219 goto out_err; 295 goto out_err;
220 } 296 }
221 buf = kmalloc(rlen + 1, GFP_KERNEL); 297 buf = kmalloc(rlen + 1, GFP_KERNEL);
298 if (!buf) {
299 dprintk("%s: Not enough memory\n", __func__);
300 goto out_err;
301 }
222 buf[rlen] = '\0'; 302 buf[rlen] = '\0';
223 memcpy(buf, r_addr, rlen); 303 memcpy(buf, r_addr, rlen);
224 304
@@ -296,7 +376,7 @@ decode_device(struct inode *ino, struct pnfs_device *pdev)
296 dsaddr->stripe_count = cnt; 376 dsaddr->stripe_count = cnt;
297 dsaddr->ds_num = num; 377 dsaddr->ds_num = num;
298 378
299 memcpy(&dsaddr->deviceid.de_id, &pdev->dev_id, sizeof(pdev->dev_id)); 379 memcpy(&dsaddr->deviceid, &pdev->dev_id, sizeof(pdev->dev_id));
300 380
301 /* Go back an read stripe indices */ 381 /* Go back an read stripe indices */
302 p = indicesp; 382 p = indicesp;
@@ -346,28 +426,37 @@ out_err:
346} 426}
347 427
348/* 428/*
349 * Decode the opaque device specified in 'dev' 429 * Decode the opaque device specified in 'dev' and add it to the cache of
350 * and add it to the list of available devices. 430 * available devices.
351 * If the deviceid is already cached, nfs4_add_deviceid will return
352 * a pointer to the cached struct and throw away the new.
353 */ 431 */
354static struct nfs4_file_layout_dsaddr* 432static struct nfs4_file_layout_dsaddr *
355decode_and_add_device(struct inode *inode, struct pnfs_device *dev) 433decode_and_add_device(struct inode *inode, struct pnfs_device *dev)
356{ 434{
357 struct nfs4_file_layout_dsaddr *dsaddr; 435 struct nfs4_file_layout_dsaddr *d, *new;
358 struct pnfs_deviceid_node *d; 436 long hash;
359 437
360 dsaddr = decode_device(inode, dev); 438 new = decode_device(inode, dev);
361 if (!dsaddr) { 439 if (!new) {
362 printk(KERN_WARNING "%s: Could not decode or add device\n", 440 printk(KERN_WARNING "%s: Could not decode or add device\n",
363 __func__); 441 __func__);
364 return NULL; 442 return NULL;
365 } 443 }
366 444
367 d = pnfs_add_deviceid(NFS_SERVER(inode)->nfs_client->cl_devid_cache, 445 spin_lock(&filelayout_deviceid_lock);
368 &dsaddr->deviceid); 446 d = nfs4_fl_find_get_deviceid(&new->deviceid);
447 if (d) {
448 spin_unlock(&filelayout_deviceid_lock);
449 nfs4_fl_free_deviceid(new);
450 return d;
451 }
452
453 INIT_HLIST_NODE(&new->node);
454 atomic_set(&new->ref, 1);
455 hash = nfs4_fl_deviceid_hash(&new->deviceid);
456 hlist_add_head_rcu(&new->node, &filelayout_deviceid_cache[hash]);
457 spin_unlock(&filelayout_deviceid_lock);
369 458
370 return container_of(d, struct nfs4_file_layout_dsaddr, deviceid); 459 return new;
371} 460}
372 461
373/* 462/*
@@ -442,12 +531,123 @@ out_free:
442 return dsaddr; 531 return dsaddr;
443} 532}
444 533
534void
535nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
536{
537 if (atomic_dec_and_lock(&dsaddr->ref, &filelayout_deviceid_lock)) {
538 hlist_del_rcu(&dsaddr->node);
539 spin_unlock(&filelayout_deviceid_lock);
540
541 synchronize_rcu();
542 nfs4_fl_free_deviceid(dsaddr);
543 }
544}
545
445struct nfs4_file_layout_dsaddr * 546struct nfs4_file_layout_dsaddr *
446nfs4_fl_find_get_deviceid(struct nfs_client *clp, struct nfs4_deviceid *id) 547nfs4_fl_find_get_deviceid(struct nfs4_deviceid *id)
548{
549 struct nfs4_file_layout_dsaddr *d;
550 struct hlist_node *n;
551 long hash = nfs4_fl_deviceid_hash(id);
552
553
554 rcu_read_lock();
555 hlist_for_each_entry_rcu(d, n, &filelayout_deviceid_cache[hash], node) {
556 if (!memcmp(&d->deviceid, id, sizeof(*id))) {
557 if (!atomic_inc_not_zero(&d->ref))
558 goto fail;
559 rcu_read_unlock();
560 return d;
561 }
562 }
563fail:
564 rcu_read_unlock();
565 return NULL;
566}
567
568/*
569 * Want res = (offset - layout->pattern_offset)/ layout->stripe_unit
570 * Then: ((res + fsi) % dsaddr->stripe_count)
571 */
572u32
573nfs4_fl_calc_j_index(struct pnfs_layout_segment *lseg, loff_t offset)
574{
575 struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg);
576 u64 tmp;
577
578 tmp = offset - flseg->pattern_offset;
579 do_div(tmp, flseg->stripe_unit);
580 tmp += flseg->first_stripe_index;
581 return do_div(tmp, flseg->dsaddr->stripe_count);
582}
583
584u32
585nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, u32 j)
447{ 586{
448 struct pnfs_deviceid_node *d; 587 return FILELAYOUT_LSEG(lseg)->dsaddr->stripe_indices[j];
588}
449 589
450 d = pnfs_find_get_deviceid(clp->cl_devid_cache, id); 590struct nfs_fh *
451 return (d == NULL) ? NULL : 591nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j)
452 container_of(d, struct nfs4_file_layout_dsaddr, deviceid); 592{
593 struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg);
594 u32 i;
595
596 if (flseg->stripe_type == STRIPE_SPARSE) {
597 if (flseg->num_fh == 1)
598 i = 0;
599 else if (flseg->num_fh == 0)
600 /* Use the MDS OPEN fh set in nfs_read_rpcsetup */
601 return NULL;
602 else
603 i = nfs4_fl_calc_ds_index(lseg, j);
604 } else
605 i = j;
606 return flseg->fh_array[i];
607}
608
609static void
610filelayout_mark_devid_negative(struct nfs4_file_layout_dsaddr *dsaddr,
611 int err, u32 ds_addr)
612{
613 u32 *p = (u32 *)&dsaddr->deviceid;
614
615 printk(KERN_ERR "NFS: data server %x connection error %d."
616 " Deviceid [%x%x%x%x] marked out of use.\n",
617 ds_addr, err, p[0], p[1], p[2], p[3]);
618
619 spin_lock(&filelayout_deviceid_lock);
620 dsaddr->flags |= NFS4_DEVICE_ID_NEG_ENTRY;
621 spin_unlock(&filelayout_deviceid_lock);
622}
623
624struct nfs4_pnfs_ds *
625nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
626{
627 struct nfs4_file_layout_dsaddr *dsaddr = FILELAYOUT_LSEG(lseg)->dsaddr;
628 struct nfs4_pnfs_ds *ds = dsaddr->ds_list[ds_idx];
629
630 if (ds == NULL) {
631 printk(KERN_ERR "%s: No data server for offset index %d\n",
632 __func__, ds_idx);
633 return NULL;
634 }
635
636 if (!ds->ds_clp) {
637 struct nfs_server *s = NFS_SERVER(lseg->pls_layout->plh_inode);
638 int err;
639
640 if (dsaddr->flags & NFS4_DEVICE_ID_NEG_ENTRY) {
641 /* Already tried to connect, don't try again */
642 dprintk("%s Deviceid marked out of use\n", __func__);
643 return NULL;
644 }
645 err = nfs4_ds_connect(s, ds);
646 if (err) {
647 filelayout_mark_devid_negative(dsaddr, err,
648 ntohl(ds->ds_ip_addr));
649 return NULL;
650 }
651 }
652 return ds;
453} 653}
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index 3c2a1724fbd2..bb80c49b6533 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -54,33 +54,29 @@ Elong:
54/* 54/*
55 * Determine the mount path as a string 55 * Determine the mount path as a string
56 */ 56 */
57static char *nfs4_path(const struct vfsmount *mnt_parent, 57static char *nfs4_path(struct dentry *dentry, char *buffer, ssize_t buflen)
58 const struct dentry *dentry,
59 char *buffer, ssize_t buflen)
60{ 58{
61 const char *srvpath; 59 char *limit;
62 60 char *path = nfs_path(&limit, dentry, buffer, buflen);
63 srvpath = strchr(mnt_parent->mnt_devname, ':'); 61 if (!IS_ERR(path)) {
64 if (srvpath) 62 char *colon = strchr(path, ':');
65 srvpath++; 63 if (colon && colon < limit)
66 else 64 path = colon + 1;
67 srvpath = mnt_parent->mnt_devname; 65 }
68 66 return path;
69 return nfs_path(srvpath, mnt_parent->mnt_root, dentry, buffer, buflen);
70} 67}
71 68
72/* 69/*
73 * Check that fs_locations::fs_root [RFC3530 6.3] is a prefix for what we 70 * Check that fs_locations::fs_root [RFC3530 6.3] is a prefix for what we
74 * believe to be the server path to this dentry 71 * believe to be the server path to this dentry
75 */ 72 */
76static int nfs4_validate_fspath(const struct vfsmount *mnt_parent, 73static int nfs4_validate_fspath(struct dentry *dentry,
77 const struct dentry *dentry,
78 const struct nfs4_fs_locations *locations, 74 const struct nfs4_fs_locations *locations,
79 char *page, char *page2) 75 char *page, char *page2)
80{ 76{
81 const char *path, *fs_path; 77 const char *path, *fs_path;
82 78
83 path = nfs4_path(mnt_parent, dentry, page, PAGE_SIZE); 79 path = nfs4_path(dentry, page, PAGE_SIZE);
84 if (IS_ERR(path)) 80 if (IS_ERR(path))
85 return PTR_ERR(path); 81 return PTR_ERR(path);
86 82
@@ -165,20 +161,18 @@ static struct vfsmount *try_location(struct nfs_clone_mount *mountdata,
165 161
166/** 162/**
167 * nfs_follow_referral - set up mountpoint when hitting a referral on moved error 163 * nfs_follow_referral - set up mountpoint when hitting a referral on moved error
168 * @mnt_parent - mountpoint of parent directory
169 * @dentry - parent directory 164 * @dentry - parent directory
170 * @locations - array of NFSv4 server location information 165 * @locations - array of NFSv4 server location information
171 * 166 *
172 */ 167 */
173static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent, 168static struct vfsmount *nfs_follow_referral(struct dentry *dentry,
174 const struct dentry *dentry,
175 const struct nfs4_fs_locations *locations) 169 const struct nfs4_fs_locations *locations)
176{ 170{
177 struct vfsmount *mnt = ERR_PTR(-ENOENT); 171 struct vfsmount *mnt = ERR_PTR(-ENOENT);
178 struct nfs_clone_mount mountdata = { 172 struct nfs_clone_mount mountdata = {
179 .sb = mnt_parent->mnt_sb, 173 .sb = dentry->d_sb,
180 .dentry = dentry, 174 .dentry = dentry,
181 .authflavor = NFS_SB(mnt_parent->mnt_sb)->client->cl_auth->au_flavor, 175 .authflavor = NFS_SB(dentry->d_sb)->client->cl_auth->au_flavor,
182 }; 176 };
183 char *page = NULL, *page2 = NULL; 177 char *page = NULL, *page2 = NULL;
184 int loc, error; 178 int loc, error;
@@ -198,7 +192,7 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent,
198 goto out; 192 goto out;
199 193
200 /* Ensure fs path is a prefix of current dentry path */ 194 /* Ensure fs path is a prefix of current dentry path */
201 error = nfs4_validate_fspath(mnt_parent, dentry, locations, page, page2); 195 error = nfs4_validate_fspath(dentry, locations, page, page2);
202 if (error < 0) { 196 if (error < 0) {
203 mnt = ERR_PTR(error); 197 mnt = ERR_PTR(error);
204 goto out; 198 goto out;
@@ -225,11 +219,10 @@ out:
225 219
226/* 220/*
227 * nfs_do_refmount - handle crossing a referral on server 221 * nfs_do_refmount - handle crossing a referral on server
228 * @mnt_parent - mountpoint of referral
229 * @dentry - dentry of referral 222 * @dentry - dentry of referral
230 * 223 *
231 */ 224 */
232struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentry *dentry) 225struct vfsmount *nfs_do_refmount(struct dentry *dentry)
233{ 226{
234 struct vfsmount *mnt = ERR_PTR(-ENOMEM); 227 struct vfsmount *mnt = ERR_PTR(-ENOMEM);
235 struct dentry *parent; 228 struct dentry *parent;
@@ -262,7 +255,7 @@ struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentr
262 fs_locations->fs_path.ncomponents <= 0) 255 fs_locations->fs_path.ncomponents <= 0)
263 goto out_free; 256 goto out_free;
264 257
265 mnt = nfs_follow_referral(mnt_parent, dentry, fs_locations); 258 mnt = nfs_follow_referral(dentry, fs_locations);
266out_free: 259out_free:
267 __free_page(page); 260 __free_page(page);
268 kfree(fs_locations); 261 kfree(fs_locations);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 1ff76acc7e98..1d84e7088af9 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -51,7 +51,6 @@
51#include <linux/sunrpc/bc_xprt.h> 51#include <linux/sunrpc/bc_xprt.h>
52#include <linux/xattr.h> 52#include <linux/xattr.h>
53#include <linux/utsname.h> 53#include <linux/utsname.h>
54#include <linux/mm.h>
55 54
56#include "nfs4_fs.h" 55#include "nfs4_fs.h"
57#include "delegation.h" 56#include "delegation.h"
@@ -86,6 +85,9 @@ static int nfs4_map_errors(int err)
86 switch (err) { 85 switch (err) {
87 case -NFS4ERR_RESOURCE: 86 case -NFS4ERR_RESOURCE:
88 return -EREMOTEIO; 87 return -EREMOTEIO;
88 case -NFS4ERR_BADOWNER:
89 case -NFS4ERR_BADNAME:
90 return -EINVAL;
89 default: 91 default:
90 dprintk("%s could not handle NFSv4 error %d\n", 92 dprintk("%s could not handle NFSv4 error %d\n",
91 __func__, -err); 93 __func__, -err);
@@ -242,7 +244,7 @@ static int nfs4_delay(struct rpc_clnt *clnt, long *timeout)
242/* This is the error handling routine for processes that are allowed 244/* This is the error handling routine for processes that are allowed
243 * to sleep. 245 * to sleep.
244 */ 246 */
245static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception) 247static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_exception *exception)
246{ 248{
247 struct nfs_client *clp = server->nfs_client; 249 struct nfs_client *clp = server->nfs_client;
248 struct nfs4_state *state = exception->state; 250 struct nfs4_state *state = exception->state;
@@ -257,12 +259,13 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode,
257 case -NFS4ERR_OPENMODE: 259 case -NFS4ERR_OPENMODE:
258 if (state == NULL) 260 if (state == NULL)
259 break; 261 break;
260 nfs4_state_mark_reclaim_nograce(clp, state); 262 nfs4_schedule_stateid_recovery(server, state);
261 goto do_state_recovery; 263 goto wait_on_recovery;
262 case -NFS4ERR_STALE_STATEID: 264 case -NFS4ERR_STALE_STATEID:
263 case -NFS4ERR_STALE_CLIENTID: 265 case -NFS4ERR_STALE_CLIENTID:
264 case -NFS4ERR_EXPIRED: 266 case -NFS4ERR_EXPIRED:
265 goto do_state_recovery; 267 nfs4_schedule_lease_recovery(clp);
268 goto wait_on_recovery;
266#if defined(CONFIG_NFS_V4_1) 269#if defined(CONFIG_NFS_V4_1)
267 case -NFS4ERR_BADSESSION: 270 case -NFS4ERR_BADSESSION:
268 case -NFS4ERR_BADSLOT: 271 case -NFS4ERR_BADSLOT:
@@ -273,7 +276,7 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode,
273 case -NFS4ERR_SEQ_MISORDERED: 276 case -NFS4ERR_SEQ_MISORDERED:
274 dprintk("%s ERROR: %d Reset session\n", __func__, 277 dprintk("%s ERROR: %d Reset session\n", __func__,
275 errorcode); 278 errorcode);
276 nfs4_schedule_state_recovery(clp); 279 nfs4_schedule_session_recovery(clp->cl_session);
277 exception->retry = 1; 280 exception->retry = 1;
278 break; 281 break;
279#endif /* defined(CONFIG_NFS_V4_1) */ 282#endif /* defined(CONFIG_NFS_V4_1) */
@@ -293,11 +296,23 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode,
293 break; 296 break;
294 case -NFS4ERR_OLD_STATEID: 297 case -NFS4ERR_OLD_STATEID:
295 exception->retry = 1; 298 exception->retry = 1;
299 break;
300 case -NFS4ERR_BADOWNER:
301 /* The following works around a Linux server bug! */
302 case -NFS4ERR_BADNAME:
303 if (server->caps & NFS_CAP_UIDGID_NOMAP) {
304 server->caps &= ~NFS_CAP_UIDGID_NOMAP;
305 exception->retry = 1;
306 printk(KERN_WARNING "NFS: v4 server %s "
307 "does not accept raw "
308 "uid/gids. "
309 "Reenabling the idmapper.\n",
310 server->nfs_client->cl_hostname);
311 }
296 } 312 }
297 /* We failed to handle the error */ 313 /* We failed to handle the error */
298 return nfs4_map_errors(ret); 314 return nfs4_map_errors(ret);
299do_state_recovery: 315wait_on_recovery:
300 nfs4_schedule_state_recovery(clp);
301 ret = nfs4_wait_clnt_recover(clp); 316 ret = nfs4_wait_clnt_recover(clp);
302 if (ret == 0) 317 if (ret == 0)
303 exception->retry = 1; 318 exception->retry = 1;
@@ -436,8 +451,8 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *
436 clp = res->sr_session->clp; 451 clp = res->sr_session->clp;
437 do_renew_lease(clp, timestamp); 452 do_renew_lease(clp, timestamp);
438 /* Check sequence flags */ 453 /* Check sequence flags */
439 if (atomic_read(&clp->cl_count) > 1) 454 if (res->sr_status_flags != 0)
440 nfs41_handle_sequence_flag_errors(clp, res->sr_status_flags); 455 nfs4_schedule_lease_recovery(clp);
441 break; 456 break;
442 case -NFS4ERR_DELAY: 457 case -NFS4ERR_DELAY:
443 /* The server detected a resend of the RPC call and 458 /* The server detected a resend of the RPC call and
@@ -506,7 +521,7 @@ out:
506 return ret_id; 521 return ret_id;
507} 522}
508 523
509static int nfs41_setup_sequence(struct nfs4_session *session, 524int nfs41_setup_sequence(struct nfs4_session *session,
510 struct nfs4_sequence_args *args, 525 struct nfs4_sequence_args *args,
511 struct nfs4_sequence_res *res, 526 struct nfs4_sequence_res *res,
512 int cache_reply, 527 int cache_reply,
@@ -572,6 +587,7 @@ static int nfs41_setup_sequence(struct nfs4_session *session,
572 res->sr_status = 1; 587 res->sr_status = 1;
573 return 0; 588 return 0;
574} 589}
590EXPORT_SYMBOL_GPL(nfs41_setup_sequence);
575 591
576int nfs4_setup_sequence(const struct nfs_server *server, 592int nfs4_setup_sequence(const struct nfs_server *server,
577 struct nfs4_sequence_args *args, 593 struct nfs4_sequence_args *args,
@@ -1256,14 +1272,13 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state
1256 case -NFS4ERR_BAD_HIGH_SLOT: 1272 case -NFS4ERR_BAD_HIGH_SLOT:
1257 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: 1273 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
1258 case -NFS4ERR_DEADSESSION: 1274 case -NFS4ERR_DEADSESSION:
1259 nfs4_schedule_state_recovery( 1275 nfs4_schedule_session_recovery(server->nfs_client->cl_session);
1260 server->nfs_client);
1261 goto out; 1276 goto out;
1262 case -NFS4ERR_STALE_CLIENTID: 1277 case -NFS4ERR_STALE_CLIENTID:
1263 case -NFS4ERR_STALE_STATEID: 1278 case -NFS4ERR_STALE_STATEID:
1264 case -NFS4ERR_EXPIRED: 1279 case -NFS4ERR_EXPIRED:
1265 /* Don't recall a delegation if it was lost */ 1280 /* Don't recall a delegation if it was lost */
1266 nfs4_schedule_state_recovery(server->nfs_client); 1281 nfs4_schedule_lease_recovery(server->nfs_client);
1267 goto out; 1282 goto out;
1268 case -ERESTARTSYS: 1283 case -ERESTARTSYS:
1269 /* 1284 /*
@@ -1272,7 +1287,7 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state
1272 */ 1287 */
1273 case -NFS4ERR_ADMIN_REVOKED: 1288 case -NFS4ERR_ADMIN_REVOKED:
1274 case -NFS4ERR_BAD_STATEID: 1289 case -NFS4ERR_BAD_STATEID:
1275 nfs4_state_mark_reclaim_nograce(server->nfs_client, state); 1290 nfs4_schedule_stateid_recovery(server, state);
1276 case -EKEYEXPIRED: 1291 case -EKEYEXPIRED:
1277 /* 1292 /*
1278 * User RPCSEC_GSS context has expired. 1293 * User RPCSEC_GSS context has expired.
@@ -1575,9 +1590,8 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
1575 return 0; 1590 return 0;
1576} 1591}
1577 1592
1578static int nfs4_recover_expired_lease(struct nfs_server *server) 1593static int nfs4_client_recover_expired_lease(struct nfs_client *clp)
1579{ 1594{
1580 struct nfs_client *clp = server->nfs_client;
1581 unsigned int loop; 1595 unsigned int loop;
1582 int ret; 1596 int ret;
1583 1597
@@ -1588,12 +1602,17 @@ static int nfs4_recover_expired_lease(struct nfs_server *server)
1588 if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) && 1602 if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) &&
1589 !test_bit(NFS4CLNT_CHECK_LEASE,&clp->cl_state)) 1603 !test_bit(NFS4CLNT_CHECK_LEASE,&clp->cl_state))
1590 break; 1604 break;
1591 nfs4_schedule_state_recovery(clp); 1605 nfs4_schedule_state_manager(clp);
1592 ret = -EIO; 1606 ret = -EIO;
1593 } 1607 }
1594 return ret; 1608 return ret;
1595} 1609}
1596 1610
1611static int nfs4_recover_expired_lease(struct nfs_server *server)
1612{
1613 return nfs4_client_recover_expired_lease(server->nfs_client);
1614}
1615
1597/* 1616/*
1598 * OPEN_EXPIRED: 1617 * OPEN_EXPIRED:
1599 * reclaim state on the server after a network partition. 1618 * reclaim state on the server after a network partition.
@@ -3071,15 +3090,10 @@ static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
3071 return err; 3090 return err;
3072} 3091}
3073 3092
3074static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data) 3093static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data)
3075{ 3094{
3076 struct nfs_server *server = NFS_SERVER(data->inode); 3095 struct nfs_server *server = NFS_SERVER(data->inode);
3077 3096
3078 dprintk("--> %s\n", __func__);
3079
3080 if (!nfs4_sequence_done(task, &data->res.seq_res))
3081 return -EAGAIN;
3082
3083 if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) { 3097 if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) {
3084 nfs_restart_rpc(task, server->nfs_client); 3098 nfs_restart_rpc(task, server->nfs_client);
3085 return -EAGAIN; 3099 return -EAGAIN;
@@ -3091,19 +3105,44 @@ static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data)
3091 return 0; 3105 return 0;
3092} 3106}
3093 3107
3108static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data)
3109{
3110
3111 dprintk("--> %s\n", __func__);
3112
3113 if (!nfs4_sequence_done(task, &data->res.seq_res))
3114 return -EAGAIN;
3115
3116 return data->read_done_cb(task, data);
3117}
3118
3094static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg) 3119static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg)
3095{ 3120{
3096 data->timestamp = jiffies; 3121 data->timestamp = jiffies;
3122 data->read_done_cb = nfs4_read_done_cb;
3097 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ]; 3123 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
3098} 3124}
3099 3125
3100static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data) 3126/* Reset the the nfs_read_data to send the read to the MDS. */
3127void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data)
3128{
3129 dprintk("%s Reset task for i/o through\n", __func__);
3130 put_lseg(data->lseg);
3131 data->lseg = NULL;
3132 /* offsets will differ in the dense stripe case */
3133 data->args.offset = data->mds_offset;
3134 data->ds_clp = NULL;
3135 data->args.fh = NFS_FH(data->inode);
3136 data->read_done_cb = nfs4_read_done_cb;
3137 task->tk_ops = data->mds_ops;
3138 rpc_task_reset_client(task, NFS_CLIENT(data->inode));
3139}
3140EXPORT_SYMBOL_GPL(nfs4_reset_read);
3141
3142static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data)
3101{ 3143{
3102 struct inode *inode = data->inode; 3144 struct inode *inode = data->inode;
3103 3145
3104 if (!nfs4_sequence_done(task, &data->res.seq_res))
3105 return -EAGAIN;
3106
3107 if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) { 3146 if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) {
3108 nfs_restart_rpc(task, NFS_SERVER(inode)->nfs_client); 3147 nfs_restart_rpc(task, NFS_SERVER(inode)->nfs_client);
3109 return -EAGAIN; 3148 return -EAGAIN;
@@ -3115,11 +3154,41 @@ static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
3115 return 0; 3154 return 0;
3116} 3155}
3117 3156
3157static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
3158{
3159 if (!nfs4_sequence_done(task, &data->res.seq_res))
3160 return -EAGAIN;
3161 return data->write_done_cb(task, data);
3162}
3163
3164/* Reset the the nfs_write_data to send the write to the MDS. */
3165void nfs4_reset_write(struct rpc_task *task, struct nfs_write_data *data)
3166{
3167 dprintk("%s Reset task for i/o through\n", __func__);
3168 put_lseg(data->lseg);
3169 data->lseg = NULL;
3170 data->ds_clp = NULL;
3171 data->write_done_cb = nfs4_write_done_cb;
3172 data->args.fh = NFS_FH(data->inode);
3173 data->args.bitmask = data->res.server->cache_consistency_bitmask;
3174 data->args.offset = data->mds_offset;
3175 data->res.fattr = &data->fattr;
3176 task->tk_ops = data->mds_ops;
3177 rpc_task_reset_client(task, NFS_CLIENT(data->inode));
3178}
3179EXPORT_SYMBOL_GPL(nfs4_reset_write);
3180
3118static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg) 3181static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg)
3119{ 3182{
3120 struct nfs_server *server = NFS_SERVER(data->inode); 3183 struct nfs_server *server = NFS_SERVER(data->inode);
3121 3184
3122 data->args.bitmask = server->cache_consistency_bitmask; 3185 if (data->lseg) {
3186 data->args.bitmask = NULL;
3187 data->res.fattr = NULL;
3188 } else
3189 data->args.bitmask = server->cache_consistency_bitmask;
3190 if (!data->write_done_cb)
3191 data->write_done_cb = nfs4_write_done_cb;
3123 data->res.server = server; 3192 data->res.server = server;
3124 data->timestamp = jiffies; 3193 data->timestamp = jiffies;
3125 3194
@@ -3179,7 +3248,7 @@ static void nfs4_renew_done(struct rpc_task *task, void *calldata)
3179 if (task->tk_status < 0) { 3248 if (task->tk_status < 0) {
3180 /* Unless we're shutting down, schedule state recovery! */ 3249 /* Unless we're shutting down, schedule state recovery! */
3181 if (test_bit(NFS_CS_RENEWD, &clp->cl_res_state) != 0) 3250 if (test_bit(NFS_CS_RENEWD, &clp->cl_res_state) != 0)
3182 nfs4_schedule_state_recovery(clp); 3251 nfs4_schedule_lease_recovery(clp);
3183 return; 3252 return;
3184 } 3253 }
3185 do_renew_lease(clp, timestamp); 3254 do_renew_lease(clp, timestamp);
@@ -3262,7 +3331,7 @@ static int buf_to_pages_noslab(const void *buf, size_t buflen,
3262 spages = pages; 3331 spages = pages;
3263 3332
3264 do { 3333 do {
3265 len = min(PAGE_CACHE_SIZE, buflen); 3334 len = min_t(size_t, PAGE_CACHE_SIZE, buflen);
3266 newpage = alloc_page(GFP_KERNEL); 3335 newpage = alloc_page(GFP_KERNEL);
3267 3336
3268 if (newpage == NULL) 3337 if (newpage == NULL)
@@ -3504,12 +3573,13 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
3504 case -NFS4ERR_OPENMODE: 3573 case -NFS4ERR_OPENMODE:
3505 if (state == NULL) 3574 if (state == NULL)
3506 break; 3575 break;
3507 nfs4_state_mark_reclaim_nograce(clp, state); 3576 nfs4_schedule_stateid_recovery(server, state);
3508 goto do_state_recovery; 3577 goto wait_on_recovery;
3509 case -NFS4ERR_STALE_STATEID: 3578 case -NFS4ERR_STALE_STATEID:
3510 case -NFS4ERR_STALE_CLIENTID: 3579 case -NFS4ERR_STALE_CLIENTID:
3511 case -NFS4ERR_EXPIRED: 3580 case -NFS4ERR_EXPIRED:
3512 goto do_state_recovery; 3581 nfs4_schedule_lease_recovery(clp);
3582 goto wait_on_recovery;
3513#if defined(CONFIG_NFS_V4_1) 3583#if defined(CONFIG_NFS_V4_1)
3514 case -NFS4ERR_BADSESSION: 3584 case -NFS4ERR_BADSESSION:
3515 case -NFS4ERR_BADSLOT: 3585 case -NFS4ERR_BADSLOT:
@@ -3520,7 +3590,7 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
3520 case -NFS4ERR_SEQ_MISORDERED: 3590 case -NFS4ERR_SEQ_MISORDERED:
3521 dprintk("%s ERROR %d, Reset session\n", __func__, 3591 dprintk("%s ERROR %d, Reset session\n", __func__,
3522 task->tk_status); 3592 task->tk_status);
3523 nfs4_schedule_state_recovery(clp); 3593 nfs4_schedule_session_recovery(clp->cl_session);
3524 task->tk_status = 0; 3594 task->tk_status = 0;
3525 return -EAGAIN; 3595 return -EAGAIN;
3526#endif /* CONFIG_NFS_V4_1 */ 3596#endif /* CONFIG_NFS_V4_1 */
@@ -3537,9 +3607,8 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
3537 } 3607 }
3538 task->tk_status = nfs4_map_errors(task->tk_status); 3608 task->tk_status = nfs4_map_errors(task->tk_status);
3539 return 0; 3609 return 0;
3540do_state_recovery: 3610wait_on_recovery:
3541 rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL); 3611 rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL);
3542 nfs4_schedule_state_recovery(clp);
3543 if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) == 0) 3612 if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) == 0)
3544 rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task); 3613 rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task);
3545 task->tk_status = 0; 3614 task->tk_status = 0;
@@ -4150,7 +4219,7 @@ static void nfs4_lock_release(void *calldata)
4150 task = nfs4_do_unlck(&data->fl, data->ctx, data->lsp, 4219 task = nfs4_do_unlck(&data->fl, data->ctx, data->lsp,
4151 data->arg.lock_seqid); 4220 data->arg.lock_seqid);
4152 if (!IS_ERR(task)) 4221 if (!IS_ERR(task))
4153 rpc_put_task(task); 4222 rpc_put_task_async(task);
4154 dprintk("%s: cancelling lock!\n", __func__); 4223 dprintk("%s: cancelling lock!\n", __func__);
4155 } else 4224 } else
4156 nfs_free_seqid(data->arg.lock_seqid); 4225 nfs_free_seqid(data->arg.lock_seqid);
@@ -4174,23 +4243,18 @@ static const struct rpc_call_ops nfs4_recover_lock_ops = {
4174 4243
4175static void nfs4_handle_setlk_error(struct nfs_server *server, struct nfs4_lock_state *lsp, int new_lock_owner, int error) 4244static void nfs4_handle_setlk_error(struct nfs_server *server, struct nfs4_lock_state *lsp, int new_lock_owner, int error)
4176{ 4245{
4177 struct nfs_client *clp = server->nfs_client;
4178 struct nfs4_state *state = lsp->ls_state;
4179
4180 switch (error) { 4246 switch (error) {
4181 case -NFS4ERR_ADMIN_REVOKED: 4247 case -NFS4ERR_ADMIN_REVOKED:
4182 case -NFS4ERR_BAD_STATEID: 4248 case -NFS4ERR_BAD_STATEID:
4183 case -NFS4ERR_EXPIRED: 4249 lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED;
4184 if (new_lock_owner != 0 || 4250 if (new_lock_owner != 0 ||
4185 (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0) 4251 (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0)
4186 nfs4_state_mark_reclaim_nograce(clp, state); 4252 nfs4_schedule_stateid_recovery(server, lsp->ls_state);
4187 lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED;
4188 break; 4253 break;
4189 case -NFS4ERR_STALE_STATEID: 4254 case -NFS4ERR_STALE_STATEID:
4190 if (new_lock_owner != 0 ||
4191 (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0)
4192 nfs4_state_mark_reclaim_reboot(clp, state);
4193 lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED; 4255 lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED;
4256 case -NFS4ERR_EXPIRED:
4257 nfs4_schedule_lease_recovery(server->nfs_client);
4194 }; 4258 };
4195} 4259}
4196 4260
@@ -4406,12 +4470,14 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl)
4406 case -NFS4ERR_EXPIRED: 4470 case -NFS4ERR_EXPIRED:
4407 case -NFS4ERR_STALE_CLIENTID: 4471 case -NFS4ERR_STALE_CLIENTID:
4408 case -NFS4ERR_STALE_STATEID: 4472 case -NFS4ERR_STALE_STATEID:
4473 nfs4_schedule_lease_recovery(server->nfs_client);
4474 goto out;
4409 case -NFS4ERR_BADSESSION: 4475 case -NFS4ERR_BADSESSION:
4410 case -NFS4ERR_BADSLOT: 4476 case -NFS4ERR_BADSLOT:
4411 case -NFS4ERR_BAD_HIGH_SLOT: 4477 case -NFS4ERR_BAD_HIGH_SLOT:
4412 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: 4478 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
4413 case -NFS4ERR_DEADSESSION: 4479 case -NFS4ERR_DEADSESSION:
4414 nfs4_schedule_state_recovery(server->nfs_client); 4480 nfs4_schedule_session_recovery(server->nfs_client->cl_session);
4415 goto out; 4481 goto out;
4416 case -ERESTARTSYS: 4482 case -ERESTARTSYS:
4417 /* 4483 /*
@@ -4421,7 +4487,7 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl)
4421 case -NFS4ERR_ADMIN_REVOKED: 4487 case -NFS4ERR_ADMIN_REVOKED:
4422 case -NFS4ERR_BAD_STATEID: 4488 case -NFS4ERR_BAD_STATEID:
4423 case -NFS4ERR_OPENMODE: 4489 case -NFS4ERR_OPENMODE:
4424 nfs4_state_mark_reclaim_nograce(server->nfs_client, state); 4490 nfs4_schedule_stateid_recovery(server, state);
4425 err = 0; 4491 err = 0;
4426 goto out; 4492 goto out;
4427 case -EKEYEXPIRED: 4493 case -EKEYEXPIRED:
@@ -5028,10 +5094,20 @@ int nfs4_proc_create_session(struct nfs_client *clp)
5028 int status; 5094 int status;
5029 unsigned *ptr; 5095 unsigned *ptr;
5030 struct nfs4_session *session = clp->cl_session; 5096 struct nfs4_session *session = clp->cl_session;
5097 long timeout = 0;
5098 int err;
5031 5099
5032 dprintk("--> %s clp=%p session=%p\n", __func__, clp, session); 5100 dprintk("--> %s clp=%p session=%p\n", __func__, clp, session);
5033 5101
5034 status = _nfs4_proc_create_session(clp); 5102 do {
5103 status = _nfs4_proc_create_session(clp);
5104 if (status == -NFS4ERR_DELAY) {
5105 err = nfs4_delay(clp->cl_rpcclient, &timeout);
5106 if (err)
5107 status = err;
5108 }
5109 } while (status == -NFS4ERR_DELAY);
5110
5035 if (status) 5111 if (status)
5036 goto out; 5112 goto out;
5037 5113
@@ -5113,6 +5189,27 @@ int nfs4_init_session(struct nfs_server *server)
5113 return ret; 5189 return ret;
5114} 5190}
5115 5191
5192int nfs4_init_ds_session(struct nfs_client *clp)
5193{
5194 struct nfs4_session *session = clp->cl_session;
5195 int ret;
5196
5197 if (!test_and_clear_bit(NFS4_SESSION_INITING, &session->session_state))
5198 return 0;
5199
5200 ret = nfs4_client_recover_expired_lease(clp);
5201 if (!ret)
5202 /* Test for the DS role */
5203 if (!is_ds_client(clp))
5204 ret = -ENODEV;
5205 if (!ret)
5206 ret = nfs4_check_client_ready(clp);
5207 return ret;
5208
5209}
5210EXPORT_SYMBOL_GPL(nfs4_init_ds_session);
5211
5212
5116/* 5213/*
5117 * Renew the cl_session lease. 5214 * Renew the cl_session lease.
5118 */ 5215 */
@@ -5140,7 +5237,7 @@ static int nfs41_sequence_handle_errors(struct rpc_task *task, struct nfs_client
5140 rpc_delay(task, NFS4_POLL_RETRY_MAX); 5237 rpc_delay(task, NFS4_POLL_RETRY_MAX);
5141 return -EAGAIN; 5238 return -EAGAIN;
5142 default: 5239 default:
5143 nfs4_schedule_state_recovery(clp); 5240 nfs4_schedule_lease_recovery(clp);
5144 } 5241 }
5145 return 0; 5242 return 0;
5146} 5243}
@@ -5227,7 +5324,7 @@ static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cr
5227 if (IS_ERR(task)) 5324 if (IS_ERR(task))
5228 ret = PTR_ERR(task); 5325 ret = PTR_ERR(task);
5229 else 5326 else
5230 rpc_put_task(task); 5327 rpc_put_task_async(task);
5231 dprintk("<-- %s status=%d\n", __func__, ret); 5328 dprintk("<-- %s status=%d\n", __func__, ret);
5232 return ret; 5329 return ret;
5233} 5330}
@@ -5243,8 +5340,13 @@ static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred)
5243 goto out; 5340 goto out;
5244 } 5341 }
5245 ret = rpc_wait_for_completion_task(task); 5342 ret = rpc_wait_for_completion_task(task);
5246 if (!ret) 5343 if (!ret) {
5344 struct nfs4_sequence_res *res = task->tk_msg.rpc_resp;
5345
5346 if (task->tk_status == 0)
5347 nfs41_handle_sequence_flag_errors(clp, res->sr_status_flags);
5247 ret = task->tk_status; 5348 ret = task->tk_status;
5349 }
5248 rpc_put_task(task); 5350 rpc_put_task(task);
5249out: 5351out:
5250 dprintk("<-- %s status=%d\n", __func__, ret); 5352 dprintk("<-- %s status=%d\n", __func__, ret);
@@ -5281,7 +5383,7 @@ static int nfs41_reclaim_complete_handle_errors(struct rpc_task *task, struct nf
5281 rpc_delay(task, NFS4_POLL_RETRY_MAX); 5383 rpc_delay(task, NFS4_POLL_RETRY_MAX);
5282 return -EAGAIN; 5384 return -EAGAIN;
5283 default: 5385 default:
5284 nfs4_schedule_state_recovery(clp); 5386 nfs4_schedule_lease_recovery(clp);
5285 } 5387 }
5286 return 0; 5388 return 0;
5287} 5389}
@@ -5349,6 +5451,9 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp)
5349 status = PTR_ERR(task); 5451 status = PTR_ERR(task);
5350 goto out; 5452 goto out;
5351 } 5453 }
5454 status = nfs4_wait_for_completion_rpc_task(task);
5455 if (status == 0)
5456 status = task->tk_status;
5352 rpc_put_task(task); 5457 rpc_put_task(task);
5353 return 0; 5458 return 0;
5354out: 5459out:
@@ -5635,6 +5740,7 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
5635 .clear_acl_cache = nfs4_zap_acl_attr, 5740 .clear_acl_cache = nfs4_zap_acl_attr,
5636 .close_context = nfs4_close_context, 5741 .close_context = nfs4_close_context,
5637 .open_context = nfs4_atomic_open, 5742 .open_context = nfs4_atomic_open,
5743 .init_client = nfs4_init_client,
5638}; 5744};
5639 5745
5640static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = { 5746static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = {
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c
index 402143d75fc5..df8e7f3ca56d 100644
--- a/fs/nfs/nfs4renewd.c
+++ b/fs/nfs/nfs4renewd.c
@@ -64,12 +64,8 @@ nfs4_renew_state(struct work_struct *work)
64 ops = clp->cl_mvops->state_renewal_ops; 64 ops = clp->cl_mvops->state_renewal_ops;
65 dprintk("%s: start\n", __func__); 65 dprintk("%s: start\n", __func__);
66 66
67 rcu_read_lock(); 67 if (test_bit(NFS_CS_STOP_RENEW, &clp->cl_res_state))
68 if (list_empty(&clp->cl_superblocks)) {
69 rcu_read_unlock();
70 goto out; 68 goto out;
71 }
72 rcu_read_unlock();
73 69
74 spin_lock(&clp->cl_lock); 70 spin_lock(&clp->cl_lock);
75 lease = clp->cl_lease_time; 71 lease = clp->cl_lease_time;
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index e6742b57a04c..ab1bf5bb021f 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -153,6 +153,11 @@ static int nfs41_setup_state_renewal(struct nfs_client *clp)
153 int status; 153 int status;
154 struct nfs_fsinfo fsinfo; 154 struct nfs_fsinfo fsinfo;
155 155
156 if (!test_bit(NFS_CS_CHECK_LEASE_TIME, &clp->cl_res_state)) {
157 nfs4_schedule_state_renewal(clp);
158 return 0;
159 }
160
156 status = nfs4_proc_get_lease_time(clp, &fsinfo); 161 status = nfs4_proc_get_lease_time(clp, &fsinfo);
157 if (status == 0) { 162 if (status == 0) {
158 /* Update lease time and schedule renewal */ 163 /* Update lease time and schedule renewal */
@@ -1007,9 +1012,9 @@ void nfs4_schedule_state_manager(struct nfs_client *clp)
1007} 1012}
1008 1013
1009/* 1014/*
1010 * Schedule a state recovery attempt 1015 * Schedule a lease recovery attempt
1011 */ 1016 */
1012void nfs4_schedule_state_recovery(struct nfs_client *clp) 1017void nfs4_schedule_lease_recovery(struct nfs_client *clp)
1013{ 1018{
1014 if (!clp) 1019 if (!clp)
1015 return; 1020 return;
@@ -1018,7 +1023,7 @@ void nfs4_schedule_state_recovery(struct nfs_client *clp)
1018 nfs4_schedule_state_manager(clp); 1023 nfs4_schedule_state_manager(clp);
1019} 1024}
1020 1025
1021int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state) 1026static int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state)
1022{ 1027{
1023 1028
1024 set_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags); 1029 set_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags);
@@ -1032,7 +1037,7 @@ int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *st
1032 return 1; 1037 return 1;
1033} 1038}
1034 1039
1035int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state) 1040static int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state)
1036{ 1041{
1037 set_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags); 1042 set_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags);
1038 clear_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags); 1043 clear_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags);
@@ -1041,6 +1046,14 @@ int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *s
1041 return 1; 1046 return 1;
1042} 1047}
1043 1048
1049void nfs4_schedule_stateid_recovery(const struct nfs_server *server, struct nfs4_state *state)
1050{
1051 struct nfs_client *clp = server->nfs_client;
1052
1053 nfs4_state_mark_reclaim_nograce(clp, state);
1054 nfs4_schedule_state_manager(clp);
1055}
1056
1044static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_recovery_ops *ops) 1057static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_recovery_ops *ops)
1045{ 1058{
1046 struct inode *inode = state->inode; 1059 struct inode *inode = state->inode;
@@ -1436,10 +1449,16 @@ static int nfs4_reclaim_lease(struct nfs_client *clp)
1436} 1449}
1437 1450
1438#ifdef CONFIG_NFS_V4_1 1451#ifdef CONFIG_NFS_V4_1
1452void nfs4_schedule_session_recovery(struct nfs4_session *session)
1453{
1454 nfs4_schedule_lease_recovery(session->clp);
1455}
1456EXPORT_SYMBOL_GPL(nfs4_schedule_session_recovery);
1457
1439void nfs41_handle_recall_slot(struct nfs_client *clp) 1458void nfs41_handle_recall_slot(struct nfs_client *clp)
1440{ 1459{
1441 set_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state); 1460 set_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state);
1442 nfs4_schedule_state_recovery(clp); 1461 nfs4_schedule_state_manager(clp);
1443} 1462}
1444 1463
1445static void nfs4_reset_all_state(struct nfs_client *clp) 1464static void nfs4_reset_all_state(struct nfs_client *clp)
@@ -1447,7 +1466,7 @@ static void nfs4_reset_all_state(struct nfs_client *clp)
1447 if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) { 1466 if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) {
1448 clp->cl_boot_time = CURRENT_TIME; 1467 clp->cl_boot_time = CURRENT_TIME;
1449 nfs4_state_start_reclaim_nograce(clp); 1468 nfs4_state_start_reclaim_nograce(clp);
1450 nfs4_schedule_state_recovery(clp); 1469 nfs4_schedule_state_manager(clp);
1451 } 1470 }
1452} 1471}
1453 1472
@@ -1455,7 +1474,7 @@ static void nfs41_handle_server_reboot(struct nfs_client *clp)
1455{ 1474{
1456 if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) { 1475 if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) {
1457 nfs4_state_start_reclaim_reboot(clp); 1476 nfs4_state_start_reclaim_reboot(clp);
1458 nfs4_schedule_state_recovery(clp); 1477 nfs4_schedule_state_manager(clp);
1459 } 1478 }
1460} 1479}
1461 1480
@@ -1475,7 +1494,7 @@ static void nfs41_handle_cb_path_down(struct nfs_client *clp)
1475{ 1494{
1476 nfs_expire_all_delegations(clp); 1495 nfs_expire_all_delegations(clp);
1477 if (test_and_set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) == 0) 1496 if (test_and_set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) == 0)
1478 nfs4_schedule_state_recovery(clp); 1497 nfs4_schedule_state_manager(clp);
1479} 1498}
1480 1499
1481void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags) 1500void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags)
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 4e2c168b6ee9..0cf560f77884 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -844,7 +844,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const
844 if (iap->ia_valid & ATTR_MODE) 844 if (iap->ia_valid & ATTR_MODE)
845 len += 4; 845 len += 4;
846 if (iap->ia_valid & ATTR_UID) { 846 if (iap->ia_valid & ATTR_UID) {
847 owner_namelen = nfs_map_uid_to_name(server->nfs_client, iap->ia_uid, owner_name, IDMAP_NAMESZ); 847 owner_namelen = nfs_map_uid_to_name(server, iap->ia_uid, owner_name, IDMAP_NAMESZ);
848 if (owner_namelen < 0) { 848 if (owner_namelen < 0) {
849 dprintk("nfs: couldn't resolve uid %d to string\n", 849 dprintk("nfs: couldn't resolve uid %d to string\n",
850 iap->ia_uid); 850 iap->ia_uid);
@@ -856,7 +856,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const
856 len += 4 + (XDR_QUADLEN(owner_namelen) << 2); 856 len += 4 + (XDR_QUADLEN(owner_namelen) << 2);
857 } 857 }
858 if (iap->ia_valid & ATTR_GID) { 858 if (iap->ia_valid & ATTR_GID) {
859 owner_grouplen = nfs_map_gid_to_group(server->nfs_client, iap->ia_gid, owner_group, IDMAP_NAMESZ); 859 owner_grouplen = nfs_map_gid_to_group(server, iap->ia_gid, owner_group, IDMAP_NAMESZ);
860 if (owner_grouplen < 0) { 860 if (owner_grouplen < 0) {
861 dprintk("nfs: couldn't resolve gid %d to string\n", 861 dprintk("nfs: couldn't resolve gid %d to string\n",
862 iap->ia_gid); 862 iap->ia_gid);
@@ -1384,7 +1384,7 @@ static void encode_putrootfh(struct xdr_stream *xdr, struct compound_hdr *hdr)
1384 hdr->replen += decode_putrootfh_maxsz; 1384 hdr->replen += decode_putrootfh_maxsz;
1385} 1385}
1386 1386
1387static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx, const struct nfs_lock_context *l_ctx) 1387static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx, const struct nfs_lock_context *l_ctx, int zero_seqid)
1388{ 1388{
1389 nfs4_stateid stateid; 1389 nfs4_stateid stateid;
1390 __be32 *p; 1390 __be32 *p;
@@ -1392,6 +1392,8 @@ static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context
1392 p = reserve_space(xdr, NFS4_STATEID_SIZE); 1392 p = reserve_space(xdr, NFS4_STATEID_SIZE);
1393 if (ctx->state != NULL) { 1393 if (ctx->state != NULL) {
1394 nfs4_copy_stateid(&stateid, ctx->state, l_ctx->lockowner, l_ctx->pid); 1394 nfs4_copy_stateid(&stateid, ctx->state, l_ctx->lockowner, l_ctx->pid);
1395 if (zero_seqid)
1396 stateid.stateid.seqid = 0;
1395 xdr_encode_opaque_fixed(p, stateid.data, NFS4_STATEID_SIZE); 1397 xdr_encode_opaque_fixed(p, stateid.data, NFS4_STATEID_SIZE);
1396 } else 1398 } else
1397 xdr_encode_opaque_fixed(p, zero_stateid.data, NFS4_STATEID_SIZE); 1399 xdr_encode_opaque_fixed(p, zero_stateid.data, NFS4_STATEID_SIZE);
@@ -1404,7 +1406,8 @@ static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args,
1404 p = reserve_space(xdr, 4); 1406 p = reserve_space(xdr, 4);
1405 *p = cpu_to_be32(OP_READ); 1407 *p = cpu_to_be32(OP_READ);
1406 1408
1407 encode_stateid(xdr, args->context, args->lock_context); 1409 encode_stateid(xdr, args->context, args->lock_context,
1410 hdr->minorversion);
1408 1411
1409 p = reserve_space(xdr, 12); 1412 p = reserve_space(xdr, 12);
1410 p = xdr_encode_hyper(p, args->offset); 1413 p = xdr_encode_hyper(p, args->offset);
@@ -1592,7 +1595,8 @@ static void encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *arg
1592 p = reserve_space(xdr, 4); 1595 p = reserve_space(xdr, 4);
1593 *p = cpu_to_be32(OP_WRITE); 1596 *p = cpu_to_be32(OP_WRITE);
1594 1597
1595 encode_stateid(xdr, args->context, args->lock_context); 1598 encode_stateid(xdr, args->context, args->lock_context,
1599 hdr->minorversion);
1596 1600
1597 p = reserve_space(xdr, 16); 1601 p = reserve_space(xdr, 16);
1598 p = xdr_encode_hyper(p, args->offset); 1602 p = xdr_encode_hyper(p, args->offset);
@@ -1660,7 +1664,7 @@ static void encode_create_session(struct xdr_stream *xdr,
1660 1664
1661 p = reserve_space(xdr, 20 + 2*28 + 20 + len + 12); 1665 p = reserve_space(xdr, 20 + 2*28 + 20 + len + 12);
1662 *p++ = cpu_to_be32(OP_CREATE_SESSION); 1666 *p++ = cpu_to_be32(OP_CREATE_SESSION);
1663 p = xdr_encode_hyper(p, clp->cl_ex_clid); 1667 p = xdr_encode_hyper(p, clp->cl_clientid);
1664 *p++ = cpu_to_be32(clp->cl_seqid); /*Sequence id */ 1668 *p++ = cpu_to_be32(clp->cl_seqid); /*Sequence id */
1665 *p++ = cpu_to_be32(args->flags); /*flags */ 1669 *p++ = cpu_to_be32(args->flags); /*flags */
1666 1670
@@ -2271,7 +2275,8 @@ static void nfs4_xdr_enc_write(struct rpc_rqst *req, struct xdr_stream *xdr,
2271 encode_putfh(xdr, args->fh, &hdr); 2275 encode_putfh(xdr, args->fh, &hdr);
2272 encode_write(xdr, args, &hdr); 2276 encode_write(xdr, args, &hdr);
2273 req->rq_snd_buf.flags |= XDRBUF_WRITE; 2277 req->rq_snd_buf.flags |= XDRBUF_WRITE;
2274 encode_getfattr(xdr, args->bitmask, &hdr); 2278 if (args->bitmask)
2279 encode_getfattr(xdr, args->bitmask, &hdr);
2275 encode_nops(&hdr); 2280 encode_nops(&hdr);
2276} 2281}
2277 2282
@@ -3382,7 +3387,7 @@ out_overflow:
3382} 3387}
3383 3388
3384static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, 3389static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap,
3385 struct nfs_client *clp, uint32_t *uid, int may_sleep) 3390 const struct nfs_server *server, uint32_t *uid, int may_sleep)
3386{ 3391{
3387 uint32_t len; 3392 uint32_t len;
3388 __be32 *p; 3393 __be32 *p;
@@ -3402,7 +3407,7 @@ static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap,
3402 if (!may_sleep) { 3407 if (!may_sleep) {
3403 /* do nothing */ 3408 /* do nothing */
3404 } else if (len < XDR_MAX_NETOBJ) { 3409 } else if (len < XDR_MAX_NETOBJ) {
3405 if (nfs_map_name_to_uid(clp, (char *)p, len, uid) == 0) 3410 if (nfs_map_name_to_uid(server, (char *)p, len, uid) == 0)
3406 ret = NFS_ATTR_FATTR_OWNER; 3411 ret = NFS_ATTR_FATTR_OWNER;
3407 else 3412 else
3408 dprintk("%s: nfs_map_name_to_uid failed!\n", 3413 dprintk("%s: nfs_map_name_to_uid failed!\n",
@@ -3420,7 +3425,7 @@ out_overflow:
3420} 3425}
3421 3426
3422static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, 3427static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap,
3423 struct nfs_client *clp, uint32_t *gid, int may_sleep) 3428 const struct nfs_server *server, uint32_t *gid, int may_sleep)
3424{ 3429{
3425 uint32_t len; 3430 uint32_t len;
3426 __be32 *p; 3431 __be32 *p;
@@ -3440,7 +3445,7 @@ static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap,
3440 if (!may_sleep) { 3445 if (!may_sleep) {
3441 /* do nothing */ 3446 /* do nothing */
3442 } else if (len < XDR_MAX_NETOBJ) { 3447 } else if (len < XDR_MAX_NETOBJ) {
3443 if (nfs_map_group_to_gid(clp, (char *)p, len, gid) == 0) 3448 if (nfs_map_group_to_gid(server, (char *)p, len, gid) == 0)
3444 ret = NFS_ATTR_FATTR_GROUP; 3449 ret = NFS_ATTR_FATTR_GROUP;
3445 else 3450 else
3446 dprintk("%s: nfs_map_group_to_gid failed!\n", 3451 dprintk("%s: nfs_map_group_to_gid failed!\n",
@@ -3939,14 +3944,12 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,
3939 goto xdr_error; 3944 goto xdr_error;
3940 fattr->valid |= status; 3945 fattr->valid |= status;
3941 3946
3942 status = decode_attr_owner(xdr, bitmap, server->nfs_client, 3947 status = decode_attr_owner(xdr, bitmap, server, &fattr->uid, may_sleep);
3943 &fattr->uid, may_sleep);
3944 if (status < 0) 3948 if (status < 0)
3945 goto xdr_error; 3949 goto xdr_error;
3946 fattr->valid |= status; 3950 fattr->valid |= status;
3947 3951
3948 status = decode_attr_group(xdr, bitmap, server->nfs_client, 3952 status = decode_attr_group(xdr, bitmap, server, &fattr->gid, may_sleep);
3949 &fattr->gid, may_sleep);
3950 if (status < 0) 3953 if (status < 0)
3951 goto xdr_error; 3954 goto xdr_error;
3952 fattr->valid |= status; 3955 fattr->valid |= status;
@@ -4694,7 +4697,7 @@ static int decode_exchange_id(struct xdr_stream *xdr,
4694 p = xdr_inline_decode(xdr, 8); 4697 p = xdr_inline_decode(xdr, 8);
4695 if (unlikely(!p)) 4698 if (unlikely(!p))
4696 goto out_overflow; 4699 goto out_overflow;
4697 xdr_decode_hyper(p, &clp->cl_ex_clid); 4700 xdr_decode_hyper(p, &clp->cl_clientid);
4698 p = xdr_inline_decode(xdr, 12); 4701 p = xdr_inline_decode(xdr, 12);
4699 if (unlikely(!p)) 4702 if (unlikely(!p))
4700 goto out_overflow; 4703 goto out_overflow;
@@ -5690,8 +5693,9 @@ static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
5690 status = decode_write(xdr, res); 5693 status = decode_write(xdr, res);
5691 if (status) 5694 if (status)
5692 goto out; 5695 goto out;
5693 decode_getfattr(xdr, res->fattr, res->server, 5696 if (res->fattr)
5694 !RPC_IS_ASYNC(rqstp->rq_task)); 5697 decode_getfattr(xdr, res->fattr, res->server,
5698 !RPC_IS_ASYNC(rqstp->rq_task));
5695 if (!status) 5699 if (!status)
5696 status = res->count; 5700 status = res->count;
5697out: 5701out:
@@ -6167,8 +6171,6 @@ static struct {
6167 { NFS4ERR_DQUOT, -EDQUOT }, 6171 { NFS4ERR_DQUOT, -EDQUOT },
6168 { NFS4ERR_STALE, -ESTALE }, 6172 { NFS4ERR_STALE, -ESTALE },
6169 { NFS4ERR_BADHANDLE, -EBADHANDLE }, 6173 { NFS4ERR_BADHANDLE, -EBADHANDLE },
6170 { NFS4ERR_BADOWNER, -EINVAL },
6171 { NFS4ERR_BADNAME, -EINVAL },
6172 { NFS4ERR_BAD_COOKIE, -EBADCOOKIE }, 6174 { NFS4ERR_BAD_COOKIE, -EBADCOOKIE },
6173 { NFS4ERR_NOTSUPP, -ENOTSUPP }, 6175 { NFS4ERR_NOTSUPP, -ENOTSUPP },
6174 { NFS4ERR_TOOSMALL, -ETOOSMALL }, 6176 { NFS4ERR_TOOSMALL, -ETOOSMALL },
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index 903908a20023..c541093a5bf2 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -86,11 +86,14 @@
86/* Default path we try to mount. "%s" gets replaced by our IP address */ 86/* Default path we try to mount. "%s" gets replaced by our IP address */
87#define NFS_ROOT "/tftpboot/%s" 87#define NFS_ROOT "/tftpboot/%s"
88 88
89/* Default NFSROOT mount options. */
90#define NFS_DEF_OPTIONS "udp"
91
89/* Parameters passed from the kernel command line */ 92/* Parameters passed from the kernel command line */
90static char nfs_root_parms[256] __initdata = ""; 93static char nfs_root_parms[256] __initdata = "";
91 94
92/* Text-based mount options passed to super.c */ 95/* Text-based mount options passed to super.c */
93static char nfs_root_options[256] __initdata = ""; 96static char nfs_root_options[256] __initdata = NFS_DEF_OPTIONS;
94 97
95/* Address of NFS server */ 98/* Address of NFS server */
96static __be32 servaddr __initdata = htonl(INADDR_NONE); 99static __be32 servaddr __initdata = htonl(INADDR_NONE);
@@ -160,8 +163,14 @@ static int __init root_nfs_copy(char *dest, const char *src,
160} 163}
161 164
162static int __init root_nfs_cat(char *dest, const char *src, 165static int __init root_nfs_cat(char *dest, const char *src,
163 const size_t destlen) 166 const size_t destlen)
164{ 167{
168 size_t len = strlen(dest);
169
170 if (len && dest[len - 1] != ',')
171 if (strlcat(dest, ",", destlen) > destlen)
172 return -1;
173
165 if (strlcat(dest, src, destlen) > destlen) 174 if (strlcat(dest, src, destlen) > destlen)
166 return -1; 175 return -1;
167 return 0; 176 return 0;
@@ -194,16 +203,6 @@ static int __init root_nfs_parse_options(char *incoming, char *exppath,
194 if (root_nfs_cat(nfs_root_options, incoming, 203 if (root_nfs_cat(nfs_root_options, incoming,
195 sizeof(nfs_root_options))) 204 sizeof(nfs_root_options)))
196 return -1; 205 return -1;
197
198 /*
199 * Possibly prepare for more options to be appended
200 */
201 if (nfs_root_options[0] != '\0' &&
202 nfs_root_options[strlen(nfs_root_options)] != ',')
203 if (root_nfs_cat(nfs_root_options, ",",
204 sizeof(nfs_root_options)))
205 return -1;
206
207 return 0; 206 return 0;
208} 207}
209 208
@@ -217,7 +216,7 @@ static int __init root_nfs_parse_options(char *incoming, char *exppath,
217 */ 216 */
218static int __init root_nfs_data(char *cmdline) 217static int __init root_nfs_data(char *cmdline)
219{ 218{
220 char addr_option[sizeof("nolock,addr=") + INET_ADDRSTRLEN + 1]; 219 char mand_options[sizeof("nolock,addr=") + INET_ADDRSTRLEN + 1];
221 int len, retval = -1; 220 int len, retval = -1;
222 char *tmp = NULL; 221 char *tmp = NULL;
223 const size_t tmplen = sizeof(nfs_export_path); 222 const size_t tmplen = sizeof(nfs_export_path);
@@ -244,9 +243,9 @@ static int __init root_nfs_data(char *cmdline)
244 * Append mandatory options for nfsroot so they override 243 * Append mandatory options for nfsroot so they override
245 * what has come before 244 * what has come before
246 */ 245 */
247 snprintf(addr_option, sizeof(addr_option), "nolock,addr=%pI4", 246 snprintf(mand_options, sizeof(mand_options), "nolock,addr=%pI4",
248 &servaddr); 247 &servaddr);
249 if (root_nfs_cat(nfs_root_options, addr_option, 248 if (root_nfs_cat(nfs_root_options, mand_options,
250 sizeof(nfs_root_options))) 249 sizeof(nfs_root_options)))
251 goto out_optionstoolong; 250 goto out_optionstoolong;
252 251
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index e1164e3f9e69..23e794410669 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -20,6 +20,7 @@
20#include <linux/nfs_mount.h> 20#include <linux/nfs_mount.h>
21 21
22#include "internal.h" 22#include "internal.h"
23#include "pnfs.h"
23 24
24static struct kmem_cache *nfs_page_cachep; 25static struct kmem_cache *nfs_page_cachep;
25 26
@@ -213,7 +214,7 @@ nfs_wait_on_request(struct nfs_page *req)
213 */ 214 */
214void nfs_pageio_init(struct nfs_pageio_descriptor *desc, 215void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
215 struct inode *inode, 216 struct inode *inode,
216 int (*doio)(struct inode *, struct list_head *, unsigned int, size_t, int), 217 int (*doio)(struct nfs_pageio_descriptor *),
217 size_t bsize, 218 size_t bsize,
218 int io_flags) 219 int io_flags)
219{ 220{
@@ -226,6 +227,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
226 desc->pg_doio = doio; 227 desc->pg_doio = doio;
227 desc->pg_ioflags = io_flags; 228 desc->pg_ioflags = io_flags;
228 desc->pg_error = 0; 229 desc->pg_error = 0;
230 desc->pg_lseg = NULL;
229} 231}
230 232
231/** 233/**
@@ -240,7 +242,8 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
240 * Return 'true' if this is the case, else return 'false'. 242 * Return 'true' if this is the case, else return 'false'.
241 */ 243 */
242static int nfs_can_coalesce_requests(struct nfs_page *prev, 244static int nfs_can_coalesce_requests(struct nfs_page *prev,
243 struct nfs_page *req) 245 struct nfs_page *req,
246 struct nfs_pageio_descriptor *pgio)
244{ 247{
245 if (req->wb_context->cred != prev->wb_context->cred) 248 if (req->wb_context->cred != prev->wb_context->cred)
246 return 0; 249 return 0;
@@ -254,6 +257,12 @@ static int nfs_can_coalesce_requests(struct nfs_page *prev,
254 return 0; 257 return 0;
255 if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE) 258 if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE)
256 return 0; 259 return 0;
260 /*
261 * Non-whole file layouts need to check that req is inside of
262 * pgio->pg_lseg.
263 */
264 if (pgio->pg_test && !pgio->pg_test(pgio, prev, req))
265 return 0;
257 return 1; 266 return 1;
258} 267}
259 268
@@ -286,7 +295,7 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc,
286 if (newlen > desc->pg_bsize) 295 if (newlen > desc->pg_bsize)
287 return 0; 296 return 0;
288 prev = nfs_list_entry(desc->pg_list.prev); 297 prev = nfs_list_entry(desc->pg_list.prev);
289 if (!nfs_can_coalesce_requests(prev, req)) 298 if (!nfs_can_coalesce_requests(prev, req, desc))
290 return 0; 299 return 0;
291 } else 300 } else
292 desc->pg_base = req->wb_pgbase; 301 desc->pg_base = req->wb_pgbase;
@@ -302,12 +311,7 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc,
302static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc) 311static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc)
303{ 312{
304 if (!list_empty(&desc->pg_list)) { 313 if (!list_empty(&desc->pg_list)) {
305 int error = desc->pg_doio(desc->pg_inode, 314 int error = desc->pg_doio(desc);
306 &desc->pg_list,
307 nfs_page_array_len(desc->pg_base,
308 desc->pg_count),
309 desc->pg_count,
310 desc->pg_ioflags);
311 if (error < 0) 315 if (error < 0)
312 desc->pg_error = error; 316 desc->pg_error = error;
313 else 317 else
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 1b1bc1a0fb0a..f38813a0a295 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -30,6 +30,7 @@
30#include <linux/nfs_fs.h> 30#include <linux/nfs_fs.h>
31#include "internal.h" 31#include "internal.h"
32#include "pnfs.h" 32#include "pnfs.h"
33#include "iostat.h"
33 34
34#define NFSDBG_FACILITY NFSDBG_PNFS 35#define NFSDBG_FACILITY NFSDBG_PNFS
35 36
@@ -74,10 +75,8 @@ find_pnfs_driver(u32 id)
74void 75void
75unset_pnfs_layoutdriver(struct nfs_server *nfss) 76unset_pnfs_layoutdriver(struct nfs_server *nfss)
76{ 77{
77 if (nfss->pnfs_curr_ld) { 78 if (nfss->pnfs_curr_ld)
78 nfss->pnfs_curr_ld->clear_layoutdriver(nfss);
79 module_put(nfss->pnfs_curr_ld->owner); 79 module_put(nfss->pnfs_curr_ld->owner);
80 }
81 nfss->pnfs_curr_ld = NULL; 80 nfss->pnfs_curr_ld = NULL;
82} 81}
83 82
@@ -115,13 +114,7 @@ set_pnfs_layoutdriver(struct nfs_server *server, u32 id)
115 goto out_no_driver; 114 goto out_no_driver;
116 } 115 }
117 server->pnfs_curr_ld = ld_type; 116 server->pnfs_curr_ld = ld_type;
118 if (ld_type->set_layoutdriver(server)) { 117
119 printk(KERN_ERR
120 "%s: Error initializing mount point for layout driver %u.\n",
121 __func__, id);
122 module_put(ld_type->owner);
123 goto out_no_driver;
124 }
125 dprintk("%s: pNFS module for %u set\n", __func__, id); 118 dprintk("%s: pNFS module for %u set\n", __func__, id);
126 return; 119 return;
127 120
@@ -230,37 +223,41 @@ static void free_lseg(struct pnfs_layout_segment *lseg)
230 put_layout_hdr(NFS_I(ino)->layout); 223 put_layout_hdr(NFS_I(ino)->layout);
231} 224}
232 225
233/* The use of tmp_list is necessary because pnfs_curr_ld->free_lseg 226static void
234 * could sleep, so must be called outside of the lock. 227put_lseg_common(struct pnfs_layout_segment *lseg)
235 * Returns 1 if object was removed, otherwise return 0. 228{
236 */ 229 struct inode *inode = lseg->pls_layout->plh_inode;
237static int 230
238put_lseg_locked(struct pnfs_layout_segment *lseg, 231 BUG_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
239 struct list_head *tmp_list) 232 list_del_init(&lseg->pls_list);
233 if (list_empty(&lseg->pls_layout->plh_segs)) {
234 set_bit(NFS_LAYOUT_DESTROYED, &lseg->pls_layout->plh_flags);
235 /* Matched by initial refcount set in alloc_init_layout_hdr */
236 put_layout_hdr_locked(lseg->pls_layout);
237 }
238 rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq);
239}
240
241void
242put_lseg(struct pnfs_layout_segment *lseg)
240{ 243{
244 struct inode *inode;
245
246 if (!lseg)
247 return;
248
241 dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg, 249 dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg,
242 atomic_read(&lseg->pls_refcount), 250 atomic_read(&lseg->pls_refcount),
243 test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); 251 test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
244 if (atomic_dec_and_test(&lseg->pls_refcount)) { 252 inode = lseg->pls_layout->plh_inode;
245 struct inode *ino = lseg->pls_layout->plh_inode; 253 if (atomic_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) {
254 LIST_HEAD(free_me);
246 255
247 BUG_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); 256 put_lseg_common(lseg);
248 list_del(&lseg->pls_list); 257 list_add(&lseg->pls_list, &free_me);
249 if (list_empty(&lseg->pls_layout->plh_segs)) { 258 spin_unlock(&inode->i_lock);
250 struct nfs_client *clp; 259 pnfs_free_lseg_list(&free_me);
251
252 clp = NFS_SERVER(ino)->nfs_client;
253 spin_lock(&clp->cl_lock);
254 /* List does not take a reference, so no need for put here */
255 list_del_init(&lseg->pls_layout->plh_layouts);
256 spin_unlock(&clp->cl_lock);
257 clear_bit(NFS_LAYOUT_BULK_RECALL, &lseg->pls_layout->plh_flags);
258 }
259 rpc_wake_up(&NFS_SERVER(ino)->roc_rpcwaitq);
260 list_add(&lseg->pls_list, tmp_list);
261 return 1;
262 } 260 }
263 return 0;
264} 261}
265 262
266static bool 263static bool
@@ -281,7 +278,13 @@ static int mark_lseg_invalid(struct pnfs_layout_segment *lseg,
281 * list. It will now be removed when all 278 * list. It will now be removed when all
282 * outstanding io is finished. 279 * outstanding io is finished.
283 */ 280 */
284 rv = put_lseg_locked(lseg, tmp_list); 281 dprintk("%s: lseg %p ref %d\n", __func__, lseg,
282 atomic_read(&lseg->pls_refcount));
283 if (atomic_dec_and_test(&lseg->pls_refcount)) {
284 put_lseg_common(lseg);
285 list_add(&lseg->pls_list, tmp_list);
286 rv = 1;
287 }
285 } 288 }
286 return rv; 289 return rv;
287} 290}
@@ -299,6 +302,11 @@ mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
299 302
300 dprintk("%s:Begin lo %p\n", __func__, lo); 303 dprintk("%s:Begin lo %p\n", __func__, lo);
301 304
305 if (list_empty(&lo->plh_segs)) {
306 if (!test_and_set_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags))
307 put_layout_hdr_locked(lo);
308 return 0;
309 }
302 list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) 310 list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list)
303 if (should_free_lseg(lseg->pls_range.iomode, iomode)) { 311 if (should_free_lseg(lseg->pls_range.iomode, iomode)) {
304 dprintk("%s: freeing lseg %p iomode %d " 312 dprintk("%s: freeing lseg %p iomode %d "
@@ -312,11 +320,27 @@ mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
312 return invalid - removed; 320 return invalid - removed;
313} 321}
314 322
323/* note free_me must contain lsegs from a single layout_hdr */
315void 324void
316pnfs_free_lseg_list(struct list_head *free_me) 325pnfs_free_lseg_list(struct list_head *free_me)
317{ 326{
318 struct pnfs_layout_segment *lseg, *tmp; 327 struct pnfs_layout_segment *lseg, *tmp;
328 struct pnfs_layout_hdr *lo;
329
330 if (list_empty(free_me))
331 return;
319 332
333 lo = list_first_entry(free_me, struct pnfs_layout_segment,
334 pls_list)->pls_layout;
335
336 if (test_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags)) {
337 struct nfs_client *clp;
338
339 clp = NFS_SERVER(lo->plh_inode)->nfs_client;
340 spin_lock(&clp->cl_lock);
341 list_del_init(&lo->plh_layouts);
342 spin_unlock(&clp->cl_lock);
343 }
320 list_for_each_entry_safe(lseg, tmp, free_me, pls_list) { 344 list_for_each_entry_safe(lseg, tmp, free_me, pls_list) {
321 list_del(&lseg->pls_list); 345 list_del(&lseg->pls_list);
322 free_lseg(lseg); 346 free_lseg(lseg);
@@ -332,10 +356,8 @@ pnfs_destroy_layout(struct nfs_inode *nfsi)
332 spin_lock(&nfsi->vfs_inode.i_lock); 356 spin_lock(&nfsi->vfs_inode.i_lock);
333 lo = nfsi->layout; 357 lo = nfsi->layout;
334 if (lo) { 358 if (lo) {
335 set_bit(NFS_LAYOUT_DESTROYED, &nfsi->layout->plh_flags); 359 lo->plh_block_lgets++; /* permanently block new LAYOUTGETs */
336 mark_matching_lsegs_invalid(lo, &tmp_list, IOMODE_ANY); 360 mark_matching_lsegs_invalid(lo, &tmp_list, IOMODE_ANY);
337 /* Matched by refcount set to 1 in alloc_init_layout_hdr */
338 put_layout_hdr_locked(lo);
339 } 361 }
340 spin_unlock(&nfsi->vfs_inode.i_lock); 362 spin_unlock(&nfsi->vfs_inode.i_lock);
341 pnfs_free_lseg_list(&tmp_list); 363 pnfs_free_lseg_list(&tmp_list);
@@ -403,6 +425,7 @@ pnfs_layoutgets_blocked(struct pnfs_layout_hdr *lo, nfs4_stateid *stateid,
403 (int)(lo->plh_barrier - be32_to_cpu(stateid->stateid.seqid)) >= 0) 425 (int)(lo->plh_barrier - be32_to_cpu(stateid->stateid.seqid)) >= 0)
404 return true; 426 return true;
405 return lo->plh_block_lgets || 427 return lo->plh_block_lgets ||
428 test_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags) ||
406 test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) || 429 test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) ||
407 (list_empty(&lo->plh_segs) && 430 (list_empty(&lo->plh_segs) &&
408 (atomic_read(&lo->plh_outstanding) > lget)); 431 (atomic_read(&lo->plh_outstanding) > lget));
@@ -674,7 +697,7 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo, u32 iomode)
674 list_for_each_entry(lseg, &lo->plh_segs, pls_list) { 697 list_for_each_entry(lseg, &lo->plh_segs, pls_list) {
675 if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) && 698 if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) &&
676 is_matching_lseg(lseg, iomode)) { 699 is_matching_lseg(lseg, iomode)) {
677 ret = lseg; 700 ret = get_lseg(lseg);
678 break; 701 break;
679 } 702 }
680 if (cmp_layout(iomode, lseg->pls_range.iomode) > 0) 703 if (cmp_layout(iomode, lseg->pls_range.iomode) > 0)
@@ -699,6 +722,7 @@ pnfs_update_layout(struct inode *ino,
699 struct nfs_client *clp = NFS_SERVER(ino)->nfs_client; 722 struct nfs_client *clp = NFS_SERVER(ino)->nfs_client;
700 struct pnfs_layout_hdr *lo; 723 struct pnfs_layout_hdr *lo;
701 struct pnfs_layout_segment *lseg = NULL; 724 struct pnfs_layout_segment *lseg = NULL;
725 bool first = false;
702 726
703 if (!pnfs_enabled_sb(NFS_SERVER(ino))) 727 if (!pnfs_enabled_sb(NFS_SERVER(ino)))
704 return NULL; 728 return NULL;
@@ -715,21 +739,25 @@ pnfs_update_layout(struct inode *ino,
715 dprintk("%s matches recall, use MDS\n", __func__); 739 dprintk("%s matches recall, use MDS\n", __func__);
716 goto out_unlock; 740 goto out_unlock;
717 } 741 }
718 /* Check to see if the layout for the given range already exists */
719 lseg = pnfs_find_lseg(lo, iomode);
720 if (lseg)
721 goto out_unlock;
722 742
723 /* if LAYOUTGET already failed once we don't try again */ 743 /* if LAYOUTGET already failed once we don't try again */
724 if (test_bit(lo_fail_bit(iomode), &nfsi->layout->plh_flags)) 744 if (test_bit(lo_fail_bit(iomode), &nfsi->layout->plh_flags))
725 goto out_unlock; 745 goto out_unlock;
726 746
747 /* Check to see if the layout for the given range already exists */
748 lseg = pnfs_find_lseg(lo, iomode);
749 if (lseg)
750 goto out_unlock;
751
727 if (pnfs_layoutgets_blocked(lo, NULL, 0)) 752 if (pnfs_layoutgets_blocked(lo, NULL, 0))
728 goto out_unlock; 753 goto out_unlock;
729 atomic_inc(&lo->plh_outstanding); 754 atomic_inc(&lo->plh_outstanding);
730 755
731 get_layout_hdr(lo); 756 get_layout_hdr(lo);
732 if (list_empty(&lo->plh_segs)) { 757 if (list_empty(&lo->plh_segs))
758 first = true;
759 spin_unlock(&ino->i_lock);
760 if (first) {
733 /* The lo must be on the clp list if there is any 761 /* The lo must be on the clp list if there is any
734 * chance of a CB_LAYOUTRECALL(FILE) coming in. 762 * chance of a CB_LAYOUTRECALL(FILE) coming in.
735 */ 763 */
@@ -738,24 +766,18 @@ pnfs_update_layout(struct inode *ino,
738 list_add_tail(&lo->plh_layouts, &clp->cl_layouts); 766 list_add_tail(&lo->plh_layouts, &clp->cl_layouts);
739 spin_unlock(&clp->cl_lock); 767 spin_unlock(&clp->cl_lock);
740 } 768 }
741 spin_unlock(&ino->i_lock);
742 769
743 lseg = send_layoutget(lo, ctx, iomode); 770 lseg = send_layoutget(lo, ctx, iomode);
744 if (!lseg) { 771 if (!lseg && first) {
745 spin_lock(&ino->i_lock); 772 spin_lock(&clp->cl_lock);
746 if (list_empty(&lo->plh_segs)) { 773 list_del_init(&lo->plh_layouts);
747 spin_lock(&clp->cl_lock); 774 spin_unlock(&clp->cl_lock);
748 list_del_init(&lo->plh_layouts);
749 spin_unlock(&clp->cl_lock);
750 clear_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
751 }
752 spin_unlock(&ino->i_lock);
753 } 775 }
754 atomic_dec(&lo->plh_outstanding); 776 atomic_dec(&lo->plh_outstanding);
755 put_layout_hdr(lo); 777 put_layout_hdr(lo);
756out: 778out:
757 dprintk("%s end, state 0x%lx lseg %p\n", __func__, 779 dprintk("%s end, state 0x%lx lseg %p\n", __func__,
758 nfsi->layout->plh_flags, lseg); 780 nfsi->layout ? nfsi->layout->plh_flags : -1, lseg);
759 return lseg; 781 return lseg;
760out_unlock: 782out_unlock:
761 spin_unlock(&ino->i_lock); 783 spin_unlock(&ino->i_lock);
@@ -808,7 +830,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
808 } 830 }
809 init_lseg(lo, lseg); 831 init_lseg(lo, lseg);
810 lseg->pls_range = res->range; 832 lseg->pls_range = res->range;
811 *lgp->lsegpp = lseg; 833 *lgp->lsegpp = get_lseg(lseg);
812 pnfs_insert_layout(lo, lseg); 834 pnfs_insert_layout(lo, lseg);
813 835
814 if (res->return_on_close) { 836 if (res->return_on_close) {
@@ -829,137 +851,97 @@ out_forget_reply:
829 goto out; 851 goto out;
830} 852}
831 853
832/* 854static int pnfs_read_pg_test(struct nfs_pageio_descriptor *pgio,
833 * Device ID cache. Currently supports one layout type per struct nfs_client. 855 struct nfs_page *prev,
834 * Add layout type to the lookup key to expand to support multiple types. 856 struct nfs_page *req)
835 */
836int
837pnfs_alloc_init_deviceid_cache(struct nfs_client *clp,
838 void (*free_callback)(struct pnfs_deviceid_node *))
839{ 857{
840 struct pnfs_deviceid_cache *c; 858 if (pgio->pg_count == prev->wb_bytes) {
841 859 /* This is first coelesce call for a series of nfs_pages */
842 c = kzalloc(sizeof(struct pnfs_deviceid_cache), GFP_KERNEL); 860 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
843 if (!c) 861 prev->wb_context,
844 return -ENOMEM; 862 IOMODE_READ);
845 spin_lock(&clp->cl_lock);
846 if (clp->cl_devid_cache != NULL) {
847 atomic_inc(&clp->cl_devid_cache->dc_ref);
848 dprintk("%s [kref [%d]]\n", __func__,
849 atomic_read(&clp->cl_devid_cache->dc_ref));
850 kfree(c);
851 } else {
852 /* kzalloc initializes hlists */
853 spin_lock_init(&c->dc_lock);
854 atomic_set(&c->dc_ref, 1);
855 c->dc_free_callback = free_callback;
856 clp->cl_devid_cache = c;
857 dprintk("%s [new]\n", __func__);
858 } 863 }
859 spin_unlock(&clp->cl_lock); 864 return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req);
860 return 0;
861} 865}
862EXPORT_SYMBOL_GPL(pnfs_alloc_init_deviceid_cache);
863 866
864/*
865 * Called from pnfs_layoutdriver_type->free_lseg
866 * last layout segment reference frees deviceid
867 */
868void 867void
869pnfs_put_deviceid(struct pnfs_deviceid_cache *c, 868pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode)
870 struct pnfs_deviceid_node *devid)
871{ 869{
872 struct nfs4_deviceid *id = &devid->de_id; 870 struct pnfs_layoutdriver_type *ld;
873 struct pnfs_deviceid_node *d;
874 struct hlist_node *n;
875 long h = nfs4_deviceid_hash(id);
876 871
877 dprintk("%s [%d]\n", __func__, atomic_read(&devid->de_ref)); 872 ld = NFS_SERVER(inode)->pnfs_curr_ld;
878 if (!atomic_dec_and_lock(&devid->de_ref, &c->dc_lock)) 873 pgio->pg_test = (ld && ld->pg_test) ? pnfs_read_pg_test : NULL;
879 return; 874}
880 875
881 hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[h], de_node) 876static int pnfs_write_pg_test(struct nfs_pageio_descriptor *pgio,
882 if (!memcmp(&d->de_id, id, sizeof(*id))) { 877 struct nfs_page *prev,
883 hlist_del_rcu(&d->de_node); 878 struct nfs_page *req)
884 spin_unlock(&c->dc_lock); 879{
885 synchronize_rcu(); 880 if (pgio->pg_count == prev->wb_bytes) {
886 c->dc_free_callback(devid); 881 /* This is first coelesce call for a series of nfs_pages */
887 return; 882 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
888 } 883 prev->wb_context,
889 spin_unlock(&c->dc_lock); 884 IOMODE_RW);
890 /* Why wasn't it found in the list? */
891 BUG();
892}
893EXPORT_SYMBOL_GPL(pnfs_put_deviceid);
894
895/* Find and reference a deviceid */
896struct pnfs_deviceid_node *
897pnfs_find_get_deviceid(struct pnfs_deviceid_cache *c, struct nfs4_deviceid *id)
898{
899 struct pnfs_deviceid_node *d;
900 struct hlist_node *n;
901 long hash = nfs4_deviceid_hash(id);
902
903 dprintk("--> %s hash %ld\n", __func__, hash);
904 rcu_read_lock();
905 hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[hash], de_node) {
906 if (!memcmp(&d->de_id, id, sizeof(*id))) {
907 if (!atomic_inc_not_zero(&d->de_ref)) {
908 goto fail;
909 } else {
910 rcu_read_unlock();
911 return d;
912 }
913 }
914 } 885 }
915fail: 886 return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req);
916 rcu_read_unlock(); 887}
917 return NULL; 888
889void
890pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode)
891{
892 struct pnfs_layoutdriver_type *ld;
893
894 ld = NFS_SERVER(inode)->pnfs_curr_ld;
895 pgio->pg_test = (ld && ld->pg_test) ? pnfs_write_pg_test : NULL;
896}
897
898enum pnfs_try_status
899pnfs_try_to_write_data(struct nfs_write_data *wdata,
900 const struct rpc_call_ops *call_ops, int how)
901{
902 struct inode *inode = wdata->inode;
903 enum pnfs_try_status trypnfs;
904 struct nfs_server *nfss = NFS_SERVER(inode);
905
906 wdata->mds_ops = call_ops;
907
908 dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__,
909 inode->i_ino, wdata->args.count, wdata->args.offset, how);
910
911 trypnfs = nfss->pnfs_curr_ld->write_pagelist(wdata, how);
912 if (trypnfs == PNFS_NOT_ATTEMPTED) {
913 put_lseg(wdata->lseg);
914 wdata->lseg = NULL;
915 } else
916 nfs_inc_stats(inode, NFSIOS_PNFS_WRITE);
917
918 dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
919 return trypnfs;
918} 920}
919EXPORT_SYMBOL_GPL(pnfs_find_get_deviceid);
920 921
921/* 922/*
922 * Add a deviceid to the cache. 923 * Call the appropriate parallel I/O subsystem read function.
923 * GETDEVICEINFOs for same deviceid can race. If deviceid is found, discard new
924 */ 924 */
925struct pnfs_deviceid_node * 925enum pnfs_try_status
926pnfs_add_deviceid(struct pnfs_deviceid_cache *c, struct pnfs_deviceid_node *new) 926pnfs_try_to_read_data(struct nfs_read_data *rdata,
927{ 927 const struct rpc_call_ops *call_ops)
928 struct pnfs_deviceid_node *d;
929 long hash = nfs4_deviceid_hash(&new->de_id);
930
931 dprintk("--> %s hash %ld\n", __func__, hash);
932 spin_lock(&c->dc_lock);
933 d = pnfs_find_get_deviceid(c, &new->de_id);
934 if (d) {
935 spin_unlock(&c->dc_lock);
936 dprintk("%s [discard]\n", __func__);
937 c->dc_free_callback(new);
938 return d;
939 }
940 INIT_HLIST_NODE(&new->de_node);
941 atomic_set(&new->de_ref, 1);
942 hlist_add_head_rcu(&new->de_node, &c->dc_deviceids[hash]);
943 spin_unlock(&c->dc_lock);
944 dprintk("%s [new]\n", __func__);
945 return new;
946}
947EXPORT_SYMBOL_GPL(pnfs_add_deviceid);
948
949void
950pnfs_put_deviceid_cache(struct nfs_client *clp)
951{ 928{
952 struct pnfs_deviceid_cache *local = clp->cl_devid_cache; 929 struct inode *inode = rdata->inode;
930 struct nfs_server *nfss = NFS_SERVER(inode);
931 enum pnfs_try_status trypnfs;
953 932
954 dprintk("--> %s ({%d})\n", __func__, atomic_read(&local->dc_ref)); 933 rdata->mds_ops = call_ops;
955 if (atomic_dec_and_lock(&local->dc_ref, &clp->cl_lock)) { 934
956 int i; 935 dprintk("%s: Reading ino:%lu %u@%llu\n",
957 /* Verify cache is empty */ 936 __func__, inode->i_ino, rdata->args.count, rdata->args.offset);
958 for (i = 0; i < NFS4_DEVICE_ID_HASH_SIZE; i++) 937
959 BUG_ON(!hlist_empty(&local->dc_deviceids[i])); 938 trypnfs = nfss->pnfs_curr_ld->read_pagelist(rdata);
960 clp->cl_devid_cache = NULL; 939 if (trypnfs == PNFS_NOT_ATTEMPTED) {
961 spin_unlock(&clp->cl_lock); 940 put_lseg(rdata->lseg);
962 kfree(local); 941 rdata->lseg = NULL;
942 } else {
943 nfs_inc_stats(inode, NFSIOS_PNFS_READ);
963 } 944 }
945 dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
946 return trypnfs;
964} 947}
965EXPORT_SYMBOL_GPL(pnfs_put_deviceid_cache);
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index e2612ea0cbed..6380b9405bcd 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -30,6 +30,8 @@
30#ifndef FS_NFS_PNFS_H 30#ifndef FS_NFS_PNFS_H
31#define FS_NFS_PNFS_H 31#define FS_NFS_PNFS_H
32 32
33#include <linux/nfs_page.h>
34
33enum { 35enum {
34 NFS_LSEG_VALID = 0, /* cleared when lseg is recalled/returned */ 36 NFS_LSEG_VALID = 0, /* cleared when lseg is recalled/returned */
35 NFS_LSEG_ROC, /* roc bit received from server */ 37 NFS_LSEG_ROC, /* roc bit received from server */
@@ -43,6 +45,11 @@ struct pnfs_layout_segment {
43 struct pnfs_layout_hdr *pls_layout; 45 struct pnfs_layout_hdr *pls_layout;
44}; 46};
45 47
48enum pnfs_try_status {
49 PNFS_ATTEMPTED = 0,
50 PNFS_NOT_ATTEMPTED = 1,
51};
52
46#ifdef CONFIG_NFS_V4_1 53#ifdef CONFIG_NFS_V4_1
47 54
48#define LAYOUT_NFSV4_1_MODULE_PREFIX "nfs-layouttype4" 55#define LAYOUT_NFSV4_1_MODULE_PREFIX "nfs-layouttype4"
@@ -61,10 +68,18 @@ struct pnfs_layoutdriver_type {
61 const u32 id; 68 const u32 id;
62 const char *name; 69 const char *name;
63 struct module *owner; 70 struct module *owner;
64 int (*set_layoutdriver) (struct nfs_server *);
65 int (*clear_layoutdriver) (struct nfs_server *);
66 struct pnfs_layout_segment * (*alloc_lseg) (struct pnfs_layout_hdr *layoutid, struct nfs4_layoutget_res *lgr); 71 struct pnfs_layout_segment * (*alloc_lseg) (struct pnfs_layout_hdr *layoutid, struct nfs4_layoutget_res *lgr);
67 void (*free_lseg) (struct pnfs_layout_segment *lseg); 72 void (*free_lseg) (struct pnfs_layout_segment *lseg);
73
74 /* test for nfs page cache coalescing */
75 int (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);
76
77 /*
78 * Return PNFS_ATTEMPTED to indicate the layout code has attempted
79 * I/O, else return PNFS_NOT_ATTEMPTED to fall back to normal NFS
80 */
81 enum pnfs_try_status (*read_pagelist) (struct nfs_read_data *nfs_data);
82 enum pnfs_try_status (*write_pagelist) (struct nfs_write_data *nfs_data, int how);
68}; 83};
69 84
70struct pnfs_layout_hdr { 85struct pnfs_layout_hdr {
@@ -90,52 +105,6 @@ struct pnfs_device {
90 unsigned int pglen; 105 unsigned int pglen;
91}; 106};
92 107
93/*
94 * Device ID RCU cache. A device ID is unique per client ID and layout type.
95 */
96#define NFS4_DEVICE_ID_HASH_BITS 5
97#define NFS4_DEVICE_ID_HASH_SIZE (1 << NFS4_DEVICE_ID_HASH_BITS)
98#define NFS4_DEVICE_ID_HASH_MASK (NFS4_DEVICE_ID_HASH_SIZE - 1)
99
100static inline u32
101nfs4_deviceid_hash(struct nfs4_deviceid *id)
102{
103 unsigned char *cptr = (unsigned char *)id->data;
104 unsigned int nbytes = NFS4_DEVICEID4_SIZE;
105 u32 x = 0;
106
107 while (nbytes--) {
108 x *= 37;
109 x += *cptr++;
110 }
111 return x & NFS4_DEVICE_ID_HASH_MASK;
112}
113
114struct pnfs_deviceid_node {
115 struct hlist_node de_node;
116 struct nfs4_deviceid de_id;
117 atomic_t de_ref;
118};
119
120struct pnfs_deviceid_cache {
121 spinlock_t dc_lock;
122 atomic_t dc_ref;
123 void (*dc_free_callback)(struct pnfs_deviceid_node *);
124 struct hlist_head dc_deviceids[NFS4_DEVICE_ID_HASH_SIZE];
125};
126
127extern int pnfs_alloc_init_deviceid_cache(struct nfs_client *,
128 void (*free_callback)(struct pnfs_deviceid_node *));
129extern void pnfs_put_deviceid_cache(struct nfs_client *);
130extern struct pnfs_deviceid_node *pnfs_find_get_deviceid(
131 struct pnfs_deviceid_cache *,
132 struct nfs4_deviceid *);
133extern struct pnfs_deviceid_node *pnfs_add_deviceid(
134 struct pnfs_deviceid_cache *,
135 struct pnfs_deviceid_node *);
136extern void pnfs_put_deviceid(struct pnfs_deviceid_cache *c,
137 struct pnfs_deviceid_node *devid);
138
139extern int pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *); 108extern int pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *);
140extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *); 109extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *);
141 110
@@ -146,11 +115,18 @@ extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp);
146 115
147/* pnfs.c */ 116/* pnfs.c */
148void get_layout_hdr(struct pnfs_layout_hdr *lo); 117void get_layout_hdr(struct pnfs_layout_hdr *lo);
118void put_lseg(struct pnfs_layout_segment *lseg);
149struct pnfs_layout_segment * 119struct pnfs_layout_segment *
150pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, 120pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
151 enum pnfs_iomode access_type); 121 enum pnfs_iomode access_type);
152void set_pnfs_layoutdriver(struct nfs_server *, u32 id); 122void set_pnfs_layoutdriver(struct nfs_server *, u32 id);
153void unset_pnfs_layoutdriver(struct nfs_server *); 123void unset_pnfs_layoutdriver(struct nfs_server *);
124enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *,
125 const struct rpc_call_ops *, int);
126enum pnfs_try_status pnfs_try_to_read_data(struct nfs_read_data *,
127 const struct rpc_call_ops *);
128void pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *);
129void pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *);
154int pnfs_layout_process(struct nfs4_layoutget *lgp); 130int pnfs_layout_process(struct nfs4_layoutget *lgp);
155void pnfs_free_lseg_list(struct list_head *tmp_list); 131void pnfs_free_lseg_list(struct list_head *tmp_list);
156void pnfs_destroy_layout(struct nfs_inode *); 132void pnfs_destroy_layout(struct nfs_inode *);
@@ -177,6 +153,16 @@ static inline int lo_fail_bit(u32 iomode)
177 NFS_LAYOUT_RW_FAILED : NFS_LAYOUT_RO_FAILED; 153 NFS_LAYOUT_RW_FAILED : NFS_LAYOUT_RO_FAILED;
178} 154}
179 155
156static inline struct pnfs_layout_segment *
157get_lseg(struct pnfs_layout_segment *lseg)
158{
159 if (lseg) {
160 atomic_inc(&lseg->pls_refcount);
161 smp_mb__after_atomic_inc();
162 }
163 return lseg;
164}
165
180/* Return true if a layout driver is being used for this mountpoint */ 166/* Return true if a layout driver is being used for this mountpoint */
181static inline int pnfs_enabled_sb(struct nfs_server *nfss) 167static inline int pnfs_enabled_sb(struct nfs_server *nfss)
182{ 168{
@@ -194,12 +180,36 @@ static inline void pnfs_destroy_layout(struct nfs_inode *nfsi)
194} 180}
195 181
196static inline struct pnfs_layout_segment * 182static inline struct pnfs_layout_segment *
183get_lseg(struct pnfs_layout_segment *lseg)
184{
185 return NULL;
186}
187
188static inline void put_lseg(struct pnfs_layout_segment *lseg)
189{
190}
191
192static inline struct pnfs_layout_segment *
197pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, 193pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
198 enum pnfs_iomode access_type) 194 enum pnfs_iomode access_type)
199{ 195{
200 return NULL; 196 return NULL;
201} 197}
202 198
199static inline enum pnfs_try_status
200pnfs_try_to_read_data(struct nfs_read_data *data,
201 const struct rpc_call_ops *call_ops)
202{
203 return PNFS_NOT_ATTEMPTED;
204}
205
206static inline enum pnfs_try_status
207pnfs_try_to_write_data(struct nfs_write_data *data,
208 const struct rpc_call_ops *call_ops, int how)
209{
210 return PNFS_NOT_ATTEMPTED;
211}
212
203static inline bool 213static inline bool
204pnfs_roc(struct inode *ino) 214pnfs_roc(struct inode *ino)
205{ 215{
@@ -230,6 +240,18 @@ static inline void unset_pnfs_layoutdriver(struct nfs_server *s)
230{ 240{
231} 241}
232 242
243static inline void
244pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *ino)
245{
246 pgio->pg_test = NULL;
247}
248
249static inline void
250pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *ino)
251{
252 pgio->pg_test = NULL;
253}
254
233#endif /* CONFIG_NFS_V4_1 */ 255#endif /* CONFIG_NFS_V4_1 */
234 256
235#endif /* FS_NFS_PNFS_H */ 257#endif /* FS_NFS_PNFS_H */
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 77d5e21c4ad6..b8ec170f2a0f 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -741,4 +741,5 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
741 .lock = nfs_proc_lock, 741 .lock = nfs_proc_lock,
742 .lock_check_bounds = nfs_lock_check_bounds, 742 .lock_check_bounds = nfs_lock_check_bounds,
743 .close_context = nfs_close_context, 743 .close_context = nfs_close_context,
744 .init_client = nfs_init_client,
744}; 745};
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index aedcaa7f291f..7cded2b12a05 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -18,19 +18,20 @@
18#include <linux/sunrpc/clnt.h> 18#include <linux/sunrpc/clnt.h>
19#include <linux/nfs_fs.h> 19#include <linux/nfs_fs.h>
20#include <linux/nfs_page.h> 20#include <linux/nfs_page.h>
21#include <linux/module.h>
21 22
22#include <asm/system.h> 23#include <asm/system.h>
24#include "pnfs.h"
23 25
24#include "nfs4_fs.h" 26#include "nfs4_fs.h"
25#include "internal.h" 27#include "internal.h"
26#include "iostat.h" 28#include "iostat.h"
27#include "fscache.h" 29#include "fscache.h"
28#include "pnfs.h"
29 30
30#define NFSDBG_FACILITY NFSDBG_PAGECACHE 31#define NFSDBG_FACILITY NFSDBG_PAGECACHE
31 32
32static int nfs_pagein_multi(struct inode *, struct list_head *, unsigned int, size_t, int); 33static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc);
33static int nfs_pagein_one(struct inode *, struct list_head *, unsigned int, size_t, int); 34static int nfs_pagein_one(struct nfs_pageio_descriptor *desc);
34static const struct rpc_call_ops nfs_read_partial_ops; 35static const struct rpc_call_ops nfs_read_partial_ops;
35static const struct rpc_call_ops nfs_read_full_ops; 36static const struct rpc_call_ops nfs_read_full_ops;
36 37
@@ -69,6 +70,7 @@ void nfs_readdata_free(struct nfs_read_data *p)
69 70
70static void nfs_readdata_release(struct nfs_read_data *rdata) 71static void nfs_readdata_release(struct nfs_read_data *rdata)
71{ 72{
73 put_lseg(rdata->lseg);
72 put_nfs_open_context(rdata->args.context); 74 put_nfs_open_context(rdata->args.context);
73 nfs_readdata_free(rdata); 75 nfs_readdata_free(rdata);
74} 76}
@@ -114,14 +116,13 @@ static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data)
114int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, 116int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
115 struct page *page) 117 struct page *page)
116{ 118{
117 LIST_HEAD(one_request);
118 struct nfs_page *new; 119 struct nfs_page *new;
119 unsigned int len; 120 unsigned int len;
121 struct nfs_pageio_descriptor pgio;
120 122
121 len = nfs_page_length(page); 123 len = nfs_page_length(page);
122 if (len == 0) 124 if (len == 0)
123 return nfs_return_empty_page(page); 125 return nfs_return_empty_page(page);
124 pnfs_update_layout(inode, ctx, IOMODE_READ);
125 new = nfs_create_request(ctx, inode, page, 0, len); 126 new = nfs_create_request(ctx, inode, page, 0, len);
126 if (IS_ERR(new)) { 127 if (IS_ERR(new)) {
127 unlock_page(page); 128 unlock_page(page);
@@ -130,11 +131,14 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
130 if (len < PAGE_CACHE_SIZE) 131 if (len < PAGE_CACHE_SIZE)
131 zero_user_segment(page, len, PAGE_CACHE_SIZE); 132 zero_user_segment(page, len, PAGE_CACHE_SIZE);
132 133
133 nfs_list_add_request(new, &one_request); 134 nfs_pageio_init(&pgio, inode, NULL, 0, 0);
135 nfs_list_add_request(new, &pgio.pg_list);
136 pgio.pg_count = len;
137
134 if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE) 138 if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE)
135 nfs_pagein_multi(inode, &one_request, 1, len, 0); 139 nfs_pagein_multi(&pgio);
136 else 140 else
137 nfs_pagein_one(inode, &one_request, 1, len, 0); 141 nfs_pagein_one(&pgio);
138 return 0; 142 return 0;
139} 143}
140 144
@@ -155,24 +159,20 @@ static void nfs_readpage_release(struct nfs_page *req)
155 nfs_release_request(req); 159 nfs_release_request(req);
156} 160}
157 161
158/* 162int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,
159 * Set up the NFS read request struct 163 const struct rpc_call_ops *call_ops)
160 */
161static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
162 const struct rpc_call_ops *call_ops,
163 unsigned int count, unsigned int offset)
164{ 164{
165 struct inode *inode = req->wb_context->path.dentry->d_inode; 165 struct inode *inode = data->inode;
166 int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0; 166 int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0;
167 struct rpc_task *task; 167 struct rpc_task *task;
168 struct rpc_message msg = { 168 struct rpc_message msg = {
169 .rpc_argp = &data->args, 169 .rpc_argp = &data->args,
170 .rpc_resp = &data->res, 170 .rpc_resp = &data->res,
171 .rpc_cred = req->wb_context->cred, 171 .rpc_cred = data->cred,
172 }; 172 };
173 struct rpc_task_setup task_setup_data = { 173 struct rpc_task_setup task_setup_data = {
174 .task = &data->task, 174 .task = &data->task,
175 .rpc_client = NFS_CLIENT(inode), 175 .rpc_client = clnt,
176 .rpc_message = &msg, 176 .rpc_message = &msg,
177 .callback_ops = call_ops, 177 .callback_ops = call_ops,
178 .callback_data = data, 178 .callback_data = data,
@@ -180,9 +180,39 @@ static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
180 .flags = RPC_TASK_ASYNC | swap_flags, 180 .flags = RPC_TASK_ASYNC | swap_flags,
181 }; 181 };
182 182
183 /* Set up the initial task struct. */
184 NFS_PROTO(inode)->read_setup(data, &msg);
185
186 dprintk("NFS: %5u initiated read call (req %s/%lld, %u bytes @ "
187 "offset %llu)\n",
188 data->task.tk_pid,
189 inode->i_sb->s_id,
190 (long long)NFS_FILEID(inode),
191 data->args.count,
192 (unsigned long long)data->args.offset);
193
194 task = rpc_run_task(&task_setup_data);
195 if (IS_ERR(task))
196 return PTR_ERR(task);
197 rpc_put_task(task);
198 return 0;
199}
200EXPORT_SYMBOL_GPL(nfs_initiate_read);
201
202/*
203 * Set up the NFS read request struct
204 */
205static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
206 const struct rpc_call_ops *call_ops,
207 unsigned int count, unsigned int offset,
208 struct pnfs_layout_segment *lseg)
209{
210 struct inode *inode = req->wb_context->path.dentry->d_inode;
211
183 data->req = req; 212 data->req = req;
184 data->inode = inode; 213 data->inode = inode;
185 data->cred = msg.rpc_cred; 214 data->cred = req->wb_context->cred;
215 data->lseg = get_lseg(lseg);
186 216
187 data->args.fh = NFS_FH(inode); 217 data->args.fh = NFS_FH(inode);
188 data->args.offset = req_offset(req) + offset; 218 data->args.offset = req_offset(req) + offset;
@@ -197,21 +227,11 @@ static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
197 data->res.eof = 0; 227 data->res.eof = 0;
198 nfs_fattr_init(&data->fattr); 228 nfs_fattr_init(&data->fattr);
199 229
200 /* Set up the initial task struct. */ 230 if (data->lseg &&
201 NFS_PROTO(inode)->read_setup(data, &msg); 231 (pnfs_try_to_read_data(data, call_ops) == PNFS_ATTEMPTED))
202 232 return 0;
203 dprintk("NFS: %5u initiated read call (req %s/%Ld, %u bytes @ offset %Lu)\n",
204 data->task.tk_pid,
205 inode->i_sb->s_id,
206 (long long)NFS_FILEID(inode),
207 count,
208 (unsigned long long)data->args.offset);
209 233
210 task = rpc_run_task(&task_setup_data); 234 return nfs_initiate_read(data, NFS_CLIENT(inode), call_ops);
211 if (IS_ERR(task))
212 return PTR_ERR(task);
213 rpc_put_task(task);
214 return 0;
215} 235}
216 236
217static void 237static void
@@ -240,20 +260,21 @@ nfs_async_read_error(struct list_head *head)
240 * won't see the new data until our attribute cache is updated. This is more 260 * won't see the new data until our attribute cache is updated. This is more
241 * or less conventional NFS client behavior. 261 * or less conventional NFS client behavior.
242 */ 262 */
243static int nfs_pagein_multi(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int flags) 263static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc)
244{ 264{
245 struct nfs_page *req = nfs_list_entry(head->next); 265 struct nfs_page *req = nfs_list_entry(desc->pg_list.next);
246 struct page *page = req->wb_page; 266 struct page *page = req->wb_page;
247 struct nfs_read_data *data; 267 struct nfs_read_data *data;
248 size_t rsize = NFS_SERVER(inode)->rsize, nbytes; 268 size_t rsize = NFS_SERVER(desc->pg_inode)->rsize, nbytes;
249 unsigned int offset; 269 unsigned int offset;
250 int requests = 0; 270 int requests = 0;
251 int ret = 0; 271 int ret = 0;
272 struct pnfs_layout_segment *lseg;
252 LIST_HEAD(list); 273 LIST_HEAD(list);
253 274
254 nfs_list_remove_request(req); 275 nfs_list_remove_request(req);
255 276
256 nbytes = count; 277 nbytes = desc->pg_count;
257 do { 278 do {
258 size_t len = min(nbytes,rsize); 279 size_t len = min(nbytes,rsize);
259 280
@@ -266,9 +287,11 @@ static int nfs_pagein_multi(struct inode *inode, struct list_head *head, unsigne
266 } while(nbytes != 0); 287 } while(nbytes != 0);
267 atomic_set(&req->wb_complete, requests); 288 atomic_set(&req->wb_complete, requests);
268 289
290 BUG_ON(desc->pg_lseg != NULL);
291 lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_READ);
269 ClearPageError(page); 292 ClearPageError(page);
270 offset = 0; 293 offset = 0;
271 nbytes = count; 294 nbytes = desc->pg_count;
272 do { 295 do {
273 int ret2; 296 int ret2;
274 297
@@ -280,12 +303,14 @@ static int nfs_pagein_multi(struct inode *inode, struct list_head *head, unsigne
280 if (nbytes < rsize) 303 if (nbytes < rsize)
281 rsize = nbytes; 304 rsize = nbytes;
282 ret2 = nfs_read_rpcsetup(req, data, &nfs_read_partial_ops, 305 ret2 = nfs_read_rpcsetup(req, data, &nfs_read_partial_ops,
283 rsize, offset); 306 rsize, offset, lseg);
284 if (ret == 0) 307 if (ret == 0)
285 ret = ret2; 308 ret = ret2;
286 offset += rsize; 309 offset += rsize;
287 nbytes -= rsize; 310 nbytes -= rsize;
288 } while (nbytes != 0); 311 } while (nbytes != 0);
312 put_lseg(lseg);
313 desc->pg_lseg = NULL;
289 314
290 return ret; 315 return ret;
291 316
@@ -300,16 +325,21 @@ out_bad:
300 return -ENOMEM; 325 return -ENOMEM;
301} 326}
302 327
303static int nfs_pagein_one(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int flags) 328static int nfs_pagein_one(struct nfs_pageio_descriptor *desc)
304{ 329{
305 struct nfs_page *req; 330 struct nfs_page *req;
306 struct page **pages; 331 struct page **pages;
307 struct nfs_read_data *data; 332 struct nfs_read_data *data;
333 struct list_head *head = &desc->pg_list;
334 struct pnfs_layout_segment *lseg = desc->pg_lseg;
308 int ret = -ENOMEM; 335 int ret = -ENOMEM;
309 336
310 data = nfs_readdata_alloc(npages); 337 data = nfs_readdata_alloc(nfs_page_array_len(desc->pg_base,
311 if (!data) 338 desc->pg_count));
312 goto out_bad; 339 if (!data) {
340 nfs_async_read_error(head);
341 goto out;
342 }
313 343
314 pages = data->pagevec; 344 pages = data->pagevec;
315 while (!list_empty(head)) { 345 while (!list_empty(head)) {
@@ -320,10 +350,14 @@ static int nfs_pagein_one(struct inode *inode, struct list_head *head, unsigned
320 *pages++ = req->wb_page; 350 *pages++ = req->wb_page;
321 } 351 }
322 req = nfs_list_entry(data->pages.next); 352 req = nfs_list_entry(data->pages.next);
353 if ((!lseg) && list_is_singular(&data->pages))
354 lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_READ);
323 355
324 return nfs_read_rpcsetup(req, data, &nfs_read_full_ops, count, 0); 356 ret = nfs_read_rpcsetup(req, data, &nfs_read_full_ops, desc->pg_count,
325out_bad: 357 0, lseg);
326 nfs_async_read_error(head); 358out:
359 put_lseg(lseg);
360 desc->pg_lseg = NULL;
327 return ret; 361 return ret;
328} 362}
329 363
@@ -366,6 +400,7 @@ static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data
366 return; 400 return;
367 401
368 /* Yes, so retry the read at the end of the data */ 402 /* Yes, so retry the read at the end of the data */
403 data->mds_offset += resp->count;
369 argp->offset += resp->count; 404 argp->offset += resp->count;
370 argp->pgbase += resp->count; 405 argp->pgbase += resp->count;
371 argp->count -= resp->count; 406 argp->count -= resp->count;
@@ -625,7 +660,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
625 if (ret == 0) 660 if (ret == 0)
626 goto read_complete; /* all pages were read */ 661 goto read_complete; /* all pages were read */
627 662
628 pnfs_update_layout(inode, desc.ctx, IOMODE_READ); 663 pnfs_pageio_init_read(&pgio, inode);
629 if (rsize < PAGE_CACHE_SIZE) 664 if (rsize < PAGE_CACHE_SIZE)
630 nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0); 665 nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0);
631 else 666 else
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index b68c8607770f..2b8e9a5e366a 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -263,8 +263,11 @@ static match_table_t nfs_local_lock_tokens = {
263static void nfs_umount_begin(struct super_block *); 263static void nfs_umount_begin(struct super_block *);
264static int nfs_statfs(struct dentry *, struct kstatfs *); 264static int nfs_statfs(struct dentry *, struct kstatfs *);
265static int nfs_show_options(struct seq_file *, struct vfsmount *); 265static int nfs_show_options(struct seq_file *, struct vfsmount *);
266static int nfs_show_devname(struct seq_file *, struct vfsmount *);
267static int nfs_show_path(struct seq_file *, struct vfsmount *);
266static int nfs_show_stats(struct seq_file *, struct vfsmount *); 268static int nfs_show_stats(struct seq_file *, struct vfsmount *);
267static int nfs_get_sb(struct file_system_type *, int, const char *, void *, struct vfsmount *); 269static struct dentry *nfs_fs_mount(struct file_system_type *,
270 int, const char *, void *);
268static struct dentry *nfs_xdev_mount(struct file_system_type *fs_type, 271static struct dentry *nfs_xdev_mount(struct file_system_type *fs_type,
269 int flags, const char *dev_name, void *raw_data); 272 int flags, const char *dev_name, void *raw_data);
270static void nfs_put_super(struct super_block *); 273static void nfs_put_super(struct super_block *);
@@ -274,7 +277,7 @@ static int nfs_remount(struct super_block *sb, int *flags, char *raw_data);
274static struct file_system_type nfs_fs_type = { 277static struct file_system_type nfs_fs_type = {
275 .owner = THIS_MODULE, 278 .owner = THIS_MODULE,
276 .name = "nfs", 279 .name = "nfs",
277 .get_sb = nfs_get_sb, 280 .mount = nfs_fs_mount,
278 .kill_sb = nfs_kill_super, 281 .kill_sb = nfs_kill_super,
279 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, 282 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
280}; 283};
@@ -296,6 +299,8 @@ static const struct super_operations nfs_sops = {
296 .evict_inode = nfs_evict_inode, 299 .evict_inode = nfs_evict_inode,
297 .umount_begin = nfs_umount_begin, 300 .umount_begin = nfs_umount_begin,
298 .show_options = nfs_show_options, 301 .show_options = nfs_show_options,
302 .show_devname = nfs_show_devname,
303 .show_path = nfs_show_path,
299 .show_stats = nfs_show_stats, 304 .show_stats = nfs_show_stats,
300 .remount_fs = nfs_remount, 305 .remount_fs = nfs_remount,
301}; 306};
@@ -303,16 +308,16 @@ static const struct super_operations nfs_sops = {
303#ifdef CONFIG_NFS_V4 308#ifdef CONFIG_NFS_V4
304static int nfs4_validate_text_mount_data(void *options, 309static int nfs4_validate_text_mount_data(void *options,
305 struct nfs_parsed_mount_data *args, const char *dev_name); 310 struct nfs_parsed_mount_data *args, const char *dev_name);
306static int nfs4_try_mount(int flags, const char *dev_name, 311static struct dentry *nfs4_try_mount(int flags, const char *dev_name,
307 struct nfs_parsed_mount_data *data, struct vfsmount *mnt); 312 struct nfs_parsed_mount_data *data);
308static int nfs4_get_sb(struct file_system_type *fs_type, 313static struct dentry *nfs4_mount(struct file_system_type *fs_type,
309 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); 314 int flags, const char *dev_name, void *raw_data);
310static struct dentry *nfs4_remote_mount(struct file_system_type *fs_type, 315static struct dentry *nfs4_remote_mount(struct file_system_type *fs_type,
311 int flags, const char *dev_name, void *raw_data); 316 int flags, const char *dev_name, void *raw_data);
312static struct dentry *nfs4_xdev_mount(struct file_system_type *fs_type, 317static struct dentry *nfs4_xdev_mount(struct file_system_type *fs_type,
313 int flags, const char *dev_name, void *raw_data); 318 int flags, const char *dev_name, void *raw_data);
314static int nfs4_referral_get_sb(struct file_system_type *fs_type, 319static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type,
315 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); 320 int flags, const char *dev_name, void *raw_data);
316static struct dentry *nfs4_remote_referral_mount(struct file_system_type *fs_type, 321static struct dentry *nfs4_remote_referral_mount(struct file_system_type *fs_type,
317 int flags, const char *dev_name, void *raw_data); 322 int flags, const char *dev_name, void *raw_data);
318static void nfs4_kill_super(struct super_block *sb); 323static void nfs4_kill_super(struct super_block *sb);
@@ -320,7 +325,7 @@ static void nfs4_kill_super(struct super_block *sb);
320static struct file_system_type nfs4_fs_type = { 325static struct file_system_type nfs4_fs_type = {
321 .owner = THIS_MODULE, 326 .owner = THIS_MODULE,
322 .name = "nfs4", 327 .name = "nfs4",
323 .get_sb = nfs4_get_sb, 328 .mount = nfs4_mount,
324 .kill_sb = nfs4_kill_super, 329 .kill_sb = nfs4_kill_super,
325 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, 330 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
326}; 331};
@@ -352,7 +357,7 @@ static struct file_system_type nfs4_remote_referral_fs_type = {
352struct file_system_type nfs4_referral_fs_type = { 357struct file_system_type nfs4_referral_fs_type = {
353 .owner = THIS_MODULE, 358 .owner = THIS_MODULE,
354 .name = "nfs4", 359 .name = "nfs4",
355 .get_sb = nfs4_referral_get_sb, 360 .mount = nfs4_referral_mount,
356 .kill_sb = nfs4_kill_super, 361 .kill_sb = nfs4_kill_super,
357 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, 362 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
358}; 363};
@@ -366,6 +371,8 @@ static const struct super_operations nfs4_sops = {
366 .evict_inode = nfs4_evict_inode, 371 .evict_inode = nfs4_evict_inode,
367 .umount_begin = nfs_umount_begin, 372 .umount_begin = nfs_umount_begin,
368 .show_options = nfs_show_options, 373 .show_options = nfs_show_options,
374 .show_devname = nfs_show_devname,
375 .show_path = nfs_show_path,
369 .show_stats = nfs_show_stats, 376 .show_stats = nfs_show_stats,
370 .remount_fs = nfs_remount, 377 .remount_fs = nfs_remount,
371}; 378};
@@ -726,6 +733,28 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
726 return 0; 733 return 0;
727} 734}
728 735
736static int nfs_show_devname(struct seq_file *m, struct vfsmount *mnt)
737{
738 char *page = (char *) __get_free_page(GFP_KERNEL);
739 char *devname, *dummy;
740 int err = 0;
741 if (!page)
742 return -ENOMEM;
743 devname = nfs_path(&dummy, mnt->mnt_root, page, PAGE_SIZE);
744 if (IS_ERR(devname))
745 err = PTR_ERR(devname);
746 else
747 seq_escape(m, devname, " \t\n\\");
748 free_page((unsigned long)page);
749 return err;
750}
751
752static int nfs_show_path(struct seq_file *m, struct vfsmount *mnt)
753{
754 seq_puts(m, "/");
755 return 0;
756}
757
729/* 758/*
730 * Present statistical information for this VFS mountpoint 759 * Present statistical information for this VFS mountpoint
731 */ 760 */
@@ -979,6 +1008,27 @@ static int nfs_parse_security_flavors(char *value,
979 return 1; 1008 return 1;
980} 1009}
981 1010
1011static int nfs_get_option_str(substring_t args[], char **option)
1012{
1013 kfree(*option);
1014 *option = match_strdup(args);
1015 return !option;
1016}
1017
1018static int nfs_get_option_ul(substring_t args[], unsigned long *option)
1019{
1020 int rc;
1021 char *string;
1022
1023 string = match_strdup(args);
1024 if (string == NULL)
1025 return -ENOMEM;
1026 rc = strict_strtoul(string, 10, option);
1027 kfree(string);
1028
1029 return rc;
1030}
1031
982/* 1032/*
983 * Error-check and convert a string of mount options from user space into 1033 * Error-check and convert a string of mount options from user space into
984 * a data structure. The whole mount string is processed; bad options are 1034 * a data structure. The whole mount string is processed; bad options are
@@ -1127,155 +1177,82 @@ static int nfs_parse_mount_options(char *raw,
1127 * options that take numeric values 1177 * options that take numeric values
1128 */ 1178 */
1129 case Opt_port: 1179 case Opt_port:
1130 string = match_strdup(args); 1180 if (nfs_get_option_ul(args, &option) ||
1131 if (string == NULL) 1181 option > USHRT_MAX)
1132 goto out_nomem;
1133 rc = strict_strtoul(string, 10, &option);
1134 kfree(string);
1135 if (rc != 0 || option > USHRT_MAX)
1136 goto out_invalid_value; 1182 goto out_invalid_value;
1137 mnt->nfs_server.port = option; 1183 mnt->nfs_server.port = option;
1138 break; 1184 break;
1139 case Opt_rsize: 1185 case Opt_rsize:
1140 string = match_strdup(args); 1186 if (nfs_get_option_ul(args, &option))
1141 if (string == NULL)
1142 goto out_nomem;
1143 rc = strict_strtoul(string, 10, &option);
1144 kfree(string);
1145 if (rc != 0)
1146 goto out_invalid_value; 1187 goto out_invalid_value;
1147 mnt->rsize = option; 1188 mnt->rsize = option;
1148 break; 1189 break;
1149 case Opt_wsize: 1190 case Opt_wsize:
1150 string = match_strdup(args); 1191 if (nfs_get_option_ul(args, &option))
1151 if (string == NULL)
1152 goto out_nomem;
1153 rc = strict_strtoul(string, 10, &option);
1154 kfree(string);
1155 if (rc != 0)
1156 goto out_invalid_value; 1192 goto out_invalid_value;
1157 mnt->wsize = option; 1193 mnt->wsize = option;
1158 break; 1194 break;
1159 case Opt_bsize: 1195 case Opt_bsize:
1160 string = match_strdup(args); 1196 if (nfs_get_option_ul(args, &option))
1161 if (string == NULL)
1162 goto out_nomem;
1163 rc = strict_strtoul(string, 10, &option);
1164 kfree(string);
1165 if (rc != 0)
1166 goto out_invalid_value; 1197 goto out_invalid_value;
1167 mnt->bsize = option; 1198 mnt->bsize = option;
1168 break; 1199 break;
1169 case Opt_timeo: 1200 case Opt_timeo:
1170 string = match_strdup(args); 1201 if (nfs_get_option_ul(args, &option) || option == 0)
1171 if (string == NULL)
1172 goto out_nomem;
1173 rc = strict_strtoul(string, 10, &option);
1174 kfree(string);
1175 if (rc != 0 || option == 0)
1176 goto out_invalid_value; 1202 goto out_invalid_value;
1177 mnt->timeo = option; 1203 mnt->timeo = option;
1178 break; 1204 break;
1179 case Opt_retrans: 1205 case Opt_retrans:
1180 string = match_strdup(args); 1206 if (nfs_get_option_ul(args, &option) || option == 0)
1181 if (string == NULL)
1182 goto out_nomem;
1183 rc = strict_strtoul(string, 10, &option);
1184 kfree(string);
1185 if (rc != 0 || option == 0)
1186 goto out_invalid_value; 1207 goto out_invalid_value;
1187 mnt->retrans = option; 1208 mnt->retrans = option;
1188 break; 1209 break;
1189 case Opt_acregmin: 1210 case Opt_acregmin:
1190 string = match_strdup(args); 1211 if (nfs_get_option_ul(args, &option))
1191 if (string == NULL)
1192 goto out_nomem;
1193 rc = strict_strtoul(string, 10, &option);
1194 kfree(string);
1195 if (rc != 0)
1196 goto out_invalid_value; 1212 goto out_invalid_value;
1197 mnt->acregmin = option; 1213 mnt->acregmin = option;
1198 break; 1214 break;
1199 case Opt_acregmax: 1215 case Opt_acregmax:
1200 string = match_strdup(args); 1216 if (nfs_get_option_ul(args, &option))
1201 if (string == NULL)
1202 goto out_nomem;
1203 rc = strict_strtoul(string, 10, &option);
1204 kfree(string);
1205 if (rc != 0)
1206 goto out_invalid_value; 1217 goto out_invalid_value;
1207 mnt->acregmax = option; 1218 mnt->acregmax = option;
1208 break; 1219 break;
1209 case Opt_acdirmin: 1220 case Opt_acdirmin:
1210 string = match_strdup(args); 1221 if (nfs_get_option_ul(args, &option))
1211 if (string == NULL)
1212 goto out_nomem;
1213 rc = strict_strtoul(string, 10, &option);
1214 kfree(string);
1215 if (rc != 0)
1216 goto out_invalid_value; 1222 goto out_invalid_value;
1217 mnt->acdirmin = option; 1223 mnt->acdirmin = option;
1218 break; 1224 break;
1219 case Opt_acdirmax: 1225 case Opt_acdirmax:
1220 string = match_strdup(args); 1226 if (nfs_get_option_ul(args, &option))
1221 if (string == NULL)
1222 goto out_nomem;
1223 rc = strict_strtoul(string, 10, &option);
1224 kfree(string);
1225 if (rc != 0)
1226 goto out_invalid_value; 1227 goto out_invalid_value;
1227 mnt->acdirmax = option; 1228 mnt->acdirmax = option;
1228 break; 1229 break;
1229 case Opt_actimeo: 1230 case Opt_actimeo:
1230 string = match_strdup(args); 1231 if (nfs_get_option_ul(args, &option))
1231 if (string == NULL)
1232 goto out_nomem;
1233 rc = strict_strtoul(string, 10, &option);
1234 kfree(string);
1235 if (rc != 0)
1236 goto out_invalid_value; 1232 goto out_invalid_value;
1237 mnt->acregmin = mnt->acregmax = 1233 mnt->acregmin = mnt->acregmax =
1238 mnt->acdirmin = mnt->acdirmax = option; 1234 mnt->acdirmin = mnt->acdirmax = option;
1239 break; 1235 break;
1240 case Opt_namelen: 1236 case Opt_namelen:
1241 string = match_strdup(args); 1237 if (nfs_get_option_ul(args, &option))
1242 if (string == NULL)
1243 goto out_nomem;
1244 rc = strict_strtoul(string, 10, &option);
1245 kfree(string);
1246 if (rc != 0)
1247 goto out_invalid_value; 1238 goto out_invalid_value;
1248 mnt->namlen = option; 1239 mnt->namlen = option;
1249 break; 1240 break;
1250 case Opt_mountport: 1241 case Opt_mountport:
1251 string = match_strdup(args); 1242 if (nfs_get_option_ul(args, &option) ||
1252 if (string == NULL) 1243 option > USHRT_MAX)
1253 goto out_nomem;
1254 rc = strict_strtoul(string, 10, &option);
1255 kfree(string);
1256 if (rc != 0 || option > USHRT_MAX)
1257 goto out_invalid_value; 1244 goto out_invalid_value;
1258 mnt->mount_server.port = option; 1245 mnt->mount_server.port = option;
1259 break; 1246 break;
1260 case Opt_mountvers: 1247 case Opt_mountvers:
1261 string = match_strdup(args); 1248 if (nfs_get_option_ul(args, &option) ||
1262 if (string == NULL)
1263 goto out_nomem;
1264 rc = strict_strtoul(string, 10, &option);
1265 kfree(string);
1266 if (rc != 0 ||
1267 option < NFS_MNT_VERSION || 1249 option < NFS_MNT_VERSION ||
1268 option > NFS_MNT3_VERSION) 1250 option > NFS_MNT3_VERSION)
1269 goto out_invalid_value; 1251 goto out_invalid_value;
1270 mnt->mount_server.version = option; 1252 mnt->mount_server.version = option;
1271 break; 1253 break;
1272 case Opt_nfsvers: 1254 case Opt_nfsvers:
1273 string = match_strdup(args); 1255 if (nfs_get_option_ul(args, &option))
1274 if (string == NULL)
1275 goto out_nomem;
1276 rc = strict_strtoul(string, 10, &option);
1277 kfree(string);
1278 if (rc != 0)
1279 goto out_invalid_value; 1256 goto out_invalid_value;
1280 switch (option) { 1257 switch (option) {
1281 case NFS2_VERSION: 1258 case NFS2_VERSION:
@@ -1295,12 +1272,7 @@ static int nfs_parse_mount_options(char *raw,
1295 } 1272 }
1296 break; 1273 break;
1297 case Opt_minorversion: 1274 case Opt_minorversion:
1298 string = match_strdup(args); 1275 if (nfs_get_option_ul(args, &option))
1299 if (string == NULL)
1300 goto out_nomem;
1301 rc = strict_strtoul(string, 10, &option);
1302 kfree(string);
1303 if (rc != 0)
1304 goto out_invalid_value; 1276 goto out_invalid_value;
1305 if (option > NFS4_MAX_MINOR_VERSION) 1277 if (option > NFS4_MAX_MINOR_VERSION)
1306 goto out_invalid_value; 1278 goto out_invalid_value;
@@ -1336,21 +1308,18 @@ static int nfs_parse_mount_options(char *raw,
1336 case Opt_xprt_udp: 1308 case Opt_xprt_udp:
1337 mnt->flags &= ~NFS_MOUNT_TCP; 1309 mnt->flags &= ~NFS_MOUNT_TCP;
1338 mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP; 1310 mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP;
1339 kfree(string);
1340 break; 1311 break;
1341 case Opt_xprt_tcp6: 1312 case Opt_xprt_tcp6:
1342 protofamily = AF_INET6; 1313 protofamily = AF_INET6;
1343 case Opt_xprt_tcp: 1314 case Opt_xprt_tcp:
1344 mnt->flags |= NFS_MOUNT_TCP; 1315 mnt->flags |= NFS_MOUNT_TCP;
1345 mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP; 1316 mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP;
1346 kfree(string);
1347 break; 1317 break;
1348 case Opt_xprt_rdma: 1318 case Opt_xprt_rdma:
1349 /* vector side protocols to TCP */ 1319 /* vector side protocols to TCP */
1350 mnt->flags |= NFS_MOUNT_TCP; 1320 mnt->flags |= NFS_MOUNT_TCP;
1351 mnt->nfs_server.protocol = XPRT_TRANSPORT_RDMA; 1321 mnt->nfs_server.protocol = XPRT_TRANSPORT_RDMA;
1352 xprt_load_transport(string); 1322 xprt_load_transport(string);
1353 kfree(string);
1354 break; 1323 break;
1355 default: 1324 default:
1356 dfprintk(MOUNT, "NFS: unrecognized " 1325 dfprintk(MOUNT, "NFS: unrecognized "
@@ -1358,6 +1327,7 @@ static int nfs_parse_mount_options(char *raw,
1358 kfree(string); 1327 kfree(string);
1359 return 0; 1328 return 0;
1360 } 1329 }
1330 kfree(string);
1361 break; 1331 break;
1362 case Opt_mountproto: 1332 case Opt_mountproto:
1363 string = match_strdup(args); 1333 string = match_strdup(args);
@@ -1400,18 +1370,13 @@ static int nfs_parse_mount_options(char *raw,
1400 goto out_invalid_address; 1370 goto out_invalid_address;
1401 break; 1371 break;
1402 case Opt_clientaddr: 1372 case Opt_clientaddr:
1403 string = match_strdup(args); 1373 if (nfs_get_option_str(args, &mnt->client_address))
1404 if (string == NULL)
1405 goto out_nomem; 1374 goto out_nomem;
1406 kfree(mnt->client_address);
1407 mnt->client_address = string;
1408 break; 1375 break;
1409 case Opt_mounthost: 1376 case Opt_mounthost:
1410 string = match_strdup(args); 1377 if (nfs_get_option_str(args,
1411 if (string == NULL) 1378 &mnt->mount_server.hostname))
1412 goto out_nomem; 1379 goto out_nomem;
1413 kfree(mnt->mount_server.hostname);
1414 mnt->mount_server.hostname = string;
1415 break; 1380 break;
1416 case Opt_mountaddr: 1381 case Opt_mountaddr:
1417 string = match_strdup(args); 1382 string = match_strdup(args);
@@ -1451,11 +1416,8 @@ static int nfs_parse_mount_options(char *raw,
1451 }; 1416 };
1452 break; 1417 break;
1453 case Opt_fscache_uniq: 1418 case Opt_fscache_uniq:
1454 string = match_strdup(args); 1419 if (nfs_get_option_str(args, &mnt->fscache_uniq))
1455 if (string == NULL)
1456 goto out_nomem; 1420 goto out_nomem;
1457 kfree(mnt->fscache_uniq);
1458 mnt->fscache_uniq = string;
1459 mnt->options |= NFS_OPTION_FSCACHE; 1421 mnt->options |= NFS_OPTION_FSCACHE;
1460 break; 1422 break;
1461 case Opt_local_lock: 1423 case Opt_local_lock:
@@ -1665,99 +1627,59 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args,
1665 return nfs_walk_authlist(args, &request); 1627 return nfs_walk_authlist(args, &request);
1666} 1628}
1667 1629
1668static int nfs_parse_simple_hostname(const char *dev_name, 1630/*
1669 char **hostname, size_t maxnamlen, 1631 * Split "dev_name" into "hostname:export_path".
1670 char **export_path, size_t maxpathlen) 1632 *
1633 * The leftmost colon demarks the split between the server's hostname
1634 * and the export path. If the hostname starts with a left square
1635 * bracket, then it may contain colons.
1636 *
1637 * Note: caller frees hostname and export path, even on error.
1638 */
1639static int nfs_parse_devname(const char *dev_name,
1640 char **hostname, size_t maxnamlen,
1641 char **export_path, size_t maxpathlen)
1671{ 1642{
1672 size_t len; 1643 size_t len;
1673 char *colon, *comma; 1644 char *end;
1674
1675 colon = strchr(dev_name, ':');
1676 if (colon == NULL)
1677 goto out_bad_devname;
1678
1679 len = colon - dev_name;
1680 if (len > maxnamlen)
1681 goto out_hostname;
1682 1645
1683 /* N.B. caller will free nfs_server.hostname in all cases */ 1646 /* Is the host name protected with square brakcets? */
1684 *hostname = kstrndup(dev_name, len, GFP_KERNEL); 1647 if (*dev_name == '[') {
1685 if (!*hostname) 1648 end = strchr(++dev_name, ']');
1686 goto out_nomem; 1649 if (end == NULL || end[1] != ':')
1687
1688 /* kill possible hostname list: not supported */
1689 comma = strchr(*hostname, ',');
1690 if (comma != NULL) {
1691 if (comma == *hostname)
1692 goto out_bad_devname; 1650 goto out_bad_devname;
1693 *comma = '\0';
1694 }
1695 1651
1696 colon++; 1652 len = end - dev_name;
1697 len = strlen(colon); 1653 end++;
1698 if (len > maxpathlen) 1654 } else {
1699 goto out_path; 1655 char *comma;
1700 *export_path = kstrndup(colon, len, GFP_KERNEL);
1701 if (!*export_path)
1702 goto out_nomem;
1703
1704 dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", *export_path);
1705 return 0;
1706
1707out_bad_devname:
1708 dfprintk(MOUNT, "NFS: device name not in host:path format\n");
1709 return -EINVAL;
1710
1711out_nomem:
1712 dfprintk(MOUNT, "NFS: not enough memory to parse device name\n");
1713 return -ENOMEM;
1714
1715out_hostname:
1716 dfprintk(MOUNT, "NFS: server hostname too long\n");
1717 return -ENAMETOOLONG;
1718
1719out_path:
1720 dfprintk(MOUNT, "NFS: export pathname too long\n");
1721 return -ENAMETOOLONG;
1722}
1723
1724/*
1725 * Hostname has square brackets around it because it contains one or
1726 * more colons. We look for the first closing square bracket, and a
1727 * colon must follow it.
1728 */
1729static int nfs_parse_protected_hostname(const char *dev_name,
1730 char **hostname, size_t maxnamlen,
1731 char **export_path, size_t maxpathlen)
1732{
1733 size_t len;
1734 char *start, *end;
1735 1656
1736 start = (char *)(dev_name + 1); 1657 end = strchr(dev_name, ':');
1658 if (end == NULL)
1659 goto out_bad_devname;
1660 len = end - dev_name;
1737 1661
1738 end = strchr(start, ']'); 1662 /* kill possible hostname list: not supported */
1739 if (end == NULL) 1663 comma = strchr(dev_name, ',');
1740 goto out_bad_devname; 1664 if (comma != NULL && comma < end)
1741 if (*(end + 1) != ':') 1665 *comma = 0;
1742 goto out_bad_devname; 1666 }
1743 1667
1744 len = end - start;
1745 if (len > maxnamlen) 1668 if (len > maxnamlen)
1746 goto out_hostname; 1669 goto out_hostname;
1747 1670
1748 /* N.B. caller will free nfs_server.hostname in all cases */ 1671 /* N.B. caller will free nfs_server.hostname in all cases */
1749 *hostname = kstrndup(start, len, GFP_KERNEL); 1672 *hostname = kstrndup(dev_name, len, GFP_KERNEL);
1750 if (*hostname == NULL) 1673 if (*hostname == NULL)
1751 goto out_nomem; 1674 goto out_nomem;
1752 1675 len = strlen(++end);
1753 end += 2;
1754 len = strlen(end);
1755 if (len > maxpathlen) 1676 if (len > maxpathlen)
1756 goto out_path; 1677 goto out_path;
1757 *export_path = kstrndup(end, len, GFP_KERNEL); 1678 *export_path = kstrndup(end, len, GFP_KERNEL);
1758 if (!*export_path) 1679 if (!*export_path)
1759 goto out_nomem; 1680 goto out_nomem;
1760 1681
1682 dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", *export_path);
1761 return 0; 1683 return 0;
1762 1684
1763out_bad_devname: 1685out_bad_devname:
@@ -1778,29 +1700,6 @@ out_path:
1778} 1700}
1779 1701
1780/* 1702/*
1781 * Split "dev_name" into "hostname:export_path".
1782 *
1783 * The leftmost colon demarks the split between the server's hostname
1784 * and the export path. If the hostname starts with a left square
1785 * bracket, then it may contain colons.
1786 *
1787 * Note: caller frees hostname and export path, even on error.
1788 */
1789static int nfs_parse_devname(const char *dev_name,
1790 char **hostname, size_t maxnamlen,
1791 char **export_path, size_t maxpathlen)
1792{
1793 if (*dev_name == '[')
1794 return nfs_parse_protected_hostname(dev_name,
1795 hostname, maxnamlen,
1796 export_path, maxpathlen);
1797
1798 return nfs_parse_simple_hostname(dev_name,
1799 hostname, maxnamlen,
1800 export_path, maxpathlen);
1801}
1802
1803/*
1804 * Validate the NFS2/NFS3 mount data 1703 * Validate the NFS2/NFS3 mount data
1805 * - fills in the mount root filehandle 1704 * - fills in the mount root filehandle
1806 * 1705 *
@@ -2267,19 +2166,19 @@ static int nfs_bdi_register(struct nfs_server *server)
2267 return bdi_register_dev(&server->backing_dev_info, server->s_dev); 2166 return bdi_register_dev(&server->backing_dev_info, server->s_dev);
2268} 2167}
2269 2168
2270static int nfs_get_sb(struct file_system_type *fs_type, 2169static struct dentry *nfs_fs_mount(struct file_system_type *fs_type,
2271 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) 2170 int flags, const char *dev_name, void *raw_data)
2272{ 2171{
2273 struct nfs_server *server = NULL; 2172 struct nfs_server *server = NULL;
2274 struct super_block *s; 2173 struct super_block *s;
2275 struct nfs_parsed_mount_data *data; 2174 struct nfs_parsed_mount_data *data;
2276 struct nfs_fh *mntfh; 2175 struct nfs_fh *mntfh;
2277 struct dentry *mntroot; 2176 struct dentry *mntroot = ERR_PTR(-ENOMEM);
2278 int (*compare_super)(struct super_block *, void *) = nfs_compare_super; 2177 int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
2279 struct nfs_sb_mountdata sb_mntdata = { 2178 struct nfs_sb_mountdata sb_mntdata = {
2280 .mntflags = flags, 2179 .mntflags = flags,
2281 }; 2180 };
2282 int error = -ENOMEM; 2181 int error;
2283 2182
2284 data = nfs_alloc_parsed_mount_data(NFS_DEFAULT_VERSION); 2183 data = nfs_alloc_parsed_mount_data(NFS_DEFAULT_VERSION);
2285 mntfh = nfs_alloc_fhandle(); 2184 mntfh = nfs_alloc_fhandle();
@@ -2290,12 +2189,14 @@ static int nfs_get_sb(struct file_system_type *fs_type,
2290 2189
2291 /* Validate the mount data */ 2190 /* Validate the mount data */
2292 error = nfs_validate_mount_data(raw_data, data, mntfh, dev_name); 2191 error = nfs_validate_mount_data(raw_data, data, mntfh, dev_name);
2293 if (error < 0) 2192 if (error < 0) {
2193 mntroot = ERR_PTR(error);
2294 goto out; 2194 goto out;
2195 }
2295 2196
2296#ifdef CONFIG_NFS_V4 2197#ifdef CONFIG_NFS_V4
2297 if (data->version == 4) { 2198 if (data->version == 4) {
2298 error = nfs4_try_mount(flags, dev_name, data, mnt); 2199 mntroot = nfs4_try_mount(flags, dev_name, data);
2299 kfree(data->client_address); 2200 kfree(data->client_address);
2300 kfree(data->nfs_server.export_path); 2201 kfree(data->nfs_server.export_path);
2301 goto out; 2202 goto out;
@@ -2305,7 +2206,7 @@ static int nfs_get_sb(struct file_system_type *fs_type,
2305 /* Get a volume representation */ 2206 /* Get a volume representation */
2306 server = nfs_create_server(data, mntfh); 2207 server = nfs_create_server(data, mntfh);
2307 if (IS_ERR(server)) { 2208 if (IS_ERR(server)) {
2308 error = PTR_ERR(server); 2209 mntroot = ERR_CAST(server);
2309 goto out; 2210 goto out;
2310 } 2211 }
2311 sb_mntdata.server = server; 2212 sb_mntdata.server = server;
@@ -2316,7 +2217,7 @@ static int nfs_get_sb(struct file_system_type *fs_type,
2316 /* Get a superblock - note that we may end up sharing one that already exists */ 2217 /* Get a superblock - note that we may end up sharing one that already exists */
2317 s = sget(fs_type, compare_super, nfs_set_super, &sb_mntdata); 2218 s = sget(fs_type, compare_super, nfs_set_super, &sb_mntdata);
2318 if (IS_ERR(s)) { 2219 if (IS_ERR(s)) {
2319 error = PTR_ERR(s); 2220 mntroot = ERR_CAST(s);
2320 goto out_err_nosb; 2221 goto out_err_nosb;
2321 } 2222 }
2322 2223
@@ -2325,8 +2226,10 @@ static int nfs_get_sb(struct file_system_type *fs_type,
2325 server = NULL; 2226 server = NULL;
2326 } else { 2227 } else {
2327 error = nfs_bdi_register(server); 2228 error = nfs_bdi_register(server);
2328 if (error) 2229 if (error) {
2230 mntroot = ERR_PTR(error);
2329 goto error_splat_bdi; 2231 goto error_splat_bdi;
2232 }
2330 } 2233 }
2331 2234
2332 if (!s->s_root) { 2235 if (!s->s_root) {
@@ -2336,20 +2239,15 @@ static int nfs_get_sb(struct file_system_type *fs_type,
2336 s, data ? data->fscache_uniq : NULL, NULL); 2239 s, data ? data->fscache_uniq : NULL, NULL);
2337 } 2240 }
2338 2241
2339 mntroot = nfs_get_root(s, mntfh); 2242 mntroot = nfs_get_root(s, mntfh, dev_name);
2340 if (IS_ERR(mntroot)) { 2243 if (IS_ERR(mntroot))
2341 error = PTR_ERR(mntroot);
2342 goto error_splat_super; 2244 goto error_splat_super;
2343 }
2344 2245
2345 error = security_sb_set_mnt_opts(s, &data->lsm_opts); 2246 error = security_sb_set_mnt_opts(s, &data->lsm_opts);
2346 if (error) 2247 if (error)
2347 goto error_splat_root; 2248 goto error_splat_root;
2348 2249
2349 s->s_flags |= MS_ACTIVE; 2250 s->s_flags |= MS_ACTIVE;
2350 mnt->mnt_sb = s;
2351 mnt->mnt_root = mntroot;
2352 error = 0;
2353 2251
2354out: 2252out:
2355 kfree(data->nfs_server.hostname); 2253 kfree(data->nfs_server.hostname);
@@ -2359,7 +2257,7 @@ out:
2359out_free_fh: 2257out_free_fh:
2360 nfs_free_fhandle(mntfh); 2258 nfs_free_fhandle(mntfh);
2361 kfree(data); 2259 kfree(data);
2362 return error; 2260 return mntroot;
2363 2261
2364out_err_nosb: 2262out_err_nosb:
2365 nfs_free_server(server); 2263 nfs_free_server(server);
@@ -2367,6 +2265,7 @@ out_err_nosb:
2367 2265
2368error_splat_root: 2266error_splat_root:
2369 dput(mntroot); 2267 dput(mntroot);
2268 mntroot = ERR_PTR(error);
2370error_splat_super: 2269error_splat_super:
2371 if (server && !s->s_root) 2270 if (server && !s->s_root)
2372 bdi_unregister(&server->backing_dev_info); 2271 bdi_unregister(&server->backing_dev_info);
@@ -2450,7 +2349,7 @@ nfs_xdev_mount(struct file_system_type *fs_type, int flags,
2450 nfs_fscache_get_super_cookie(s, NULL, data); 2349 nfs_fscache_get_super_cookie(s, NULL, data);
2451 } 2350 }
2452 2351
2453 mntroot = nfs_get_root(s, data->fh); 2352 mntroot = nfs_get_root(s, data->fh, dev_name);
2454 if (IS_ERR(mntroot)) { 2353 if (IS_ERR(mntroot)) {
2455 error = PTR_ERR(mntroot); 2354 error = PTR_ERR(mntroot);
2456 goto error_splat_super; 2355 goto error_splat_super;
@@ -2718,7 +2617,7 @@ nfs4_remote_mount(struct file_system_type *fs_type, int flags,
2718 s, data ? data->fscache_uniq : NULL, NULL); 2617 s, data ? data->fscache_uniq : NULL, NULL);
2719 } 2618 }
2720 2619
2721 mntroot = nfs4_get_root(s, mntfh); 2620 mntroot = nfs4_get_root(s, mntfh, dev_name);
2722 if (IS_ERR(mntroot)) { 2621 if (IS_ERR(mntroot)) {
2723 error = PTR_ERR(mntroot); 2622 error = PTR_ERR(mntroot);
2724 goto error_splat_super; 2623 goto error_splat_super;
@@ -2771,27 +2670,6 @@ static struct vfsmount *nfs_do_root_mount(struct file_system_type *fs_type,
2771 return root_mnt; 2670 return root_mnt;
2772} 2671}
2773 2672
2774static void nfs_fix_devname(const struct path *path, struct vfsmount *mnt)
2775{
2776 char *page = (char *) __get_free_page(GFP_KERNEL);
2777 char *devname, *tmp;
2778
2779 if (page == NULL)
2780 return;
2781 devname = nfs_path(path->mnt->mnt_devname,
2782 path->mnt->mnt_root, path->dentry,
2783 page, PAGE_SIZE);
2784 if (IS_ERR(devname))
2785 goto out_freepage;
2786 tmp = kstrdup(devname, GFP_KERNEL);
2787 if (tmp == NULL)
2788 goto out_freepage;
2789 kfree(mnt->mnt_devname);
2790 mnt->mnt_devname = tmp;
2791out_freepage:
2792 free_page((unsigned long)page);
2793}
2794
2795struct nfs_referral_count { 2673struct nfs_referral_count {
2796 struct list_head list; 2674 struct list_head list;
2797 const struct task_struct *task; 2675 const struct task_struct *task;
@@ -2858,17 +2736,18 @@ static void nfs_referral_loop_unprotect(void)
2858 kfree(p); 2736 kfree(p);
2859} 2737}
2860 2738
2861static int nfs_follow_remote_path(struct vfsmount *root_mnt, 2739static struct dentry *nfs_follow_remote_path(struct vfsmount *root_mnt,
2862 const char *export_path, struct vfsmount *mnt_target) 2740 const char *export_path)
2863{ 2741{
2864 struct nameidata *nd = NULL; 2742 struct nameidata *nd = NULL;
2865 struct mnt_namespace *ns_private; 2743 struct mnt_namespace *ns_private;
2866 struct super_block *s; 2744 struct super_block *s;
2745 struct dentry *dentry;
2867 int ret; 2746 int ret;
2868 2747
2869 nd = kmalloc(sizeof(*nd), GFP_KERNEL); 2748 nd = kmalloc(sizeof(*nd), GFP_KERNEL);
2870 if (nd == NULL) 2749 if (nd == NULL)
2871 return -ENOMEM; 2750 return ERR_PTR(-ENOMEM);
2872 2751
2873 ns_private = create_mnt_ns(root_mnt); 2752 ns_private = create_mnt_ns(root_mnt);
2874 ret = PTR_ERR(ns_private); 2753 ret = PTR_ERR(ns_private);
@@ -2890,32 +2769,27 @@ static int nfs_follow_remote_path(struct vfsmount *root_mnt,
2890 2769
2891 s = nd->path.mnt->mnt_sb; 2770 s = nd->path.mnt->mnt_sb;
2892 atomic_inc(&s->s_active); 2771 atomic_inc(&s->s_active);
2893 mnt_target->mnt_sb = s; 2772 dentry = dget(nd->path.dentry);
2894 mnt_target->mnt_root = dget(nd->path.dentry);
2895
2896 /* Correct the device pathname */
2897 nfs_fix_devname(&nd->path, mnt_target);
2898 2773
2899 path_put(&nd->path); 2774 path_put(&nd->path);
2900 kfree(nd); 2775 kfree(nd);
2901 down_write(&s->s_umount); 2776 down_write(&s->s_umount);
2902 return 0; 2777 return dentry;
2903out_put_mnt_ns: 2778out_put_mnt_ns:
2904 put_mnt_ns(ns_private); 2779 put_mnt_ns(ns_private);
2905out_mntput: 2780out_mntput:
2906 mntput(root_mnt); 2781 mntput(root_mnt);
2907out_err: 2782out_err:
2908 kfree(nd); 2783 kfree(nd);
2909 return ret; 2784 return ERR_PTR(ret);
2910} 2785}
2911 2786
2912static int nfs4_try_mount(int flags, const char *dev_name, 2787static struct dentry *nfs4_try_mount(int flags, const char *dev_name,
2913 struct nfs_parsed_mount_data *data, 2788 struct nfs_parsed_mount_data *data)
2914 struct vfsmount *mnt)
2915{ 2789{
2916 char *export_path; 2790 char *export_path;
2917 struct vfsmount *root_mnt; 2791 struct vfsmount *root_mnt;
2918 int error; 2792 struct dentry *res;
2919 2793
2920 dfprintk(MOUNT, "--> nfs4_try_mount()\n"); 2794 dfprintk(MOUNT, "--> nfs4_try_mount()\n");
2921 2795
@@ -2925,26 +2799,25 @@ static int nfs4_try_mount(int flags, const char *dev_name,
2925 data->nfs_server.hostname); 2799 data->nfs_server.hostname);
2926 data->nfs_server.export_path = export_path; 2800 data->nfs_server.export_path = export_path;
2927 2801
2928 error = PTR_ERR(root_mnt); 2802 res = ERR_CAST(root_mnt);
2929 if (IS_ERR(root_mnt)) 2803 if (!IS_ERR(root_mnt))
2930 goto out; 2804 res = nfs_follow_remote_path(root_mnt, export_path);
2931
2932 error = nfs_follow_remote_path(root_mnt, export_path, mnt);
2933 2805
2934out: 2806 dfprintk(MOUNT, "<-- nfs4_try_mount() = %ld%s\n",
2935 dfprintk(MOUNT, "<-- nfs4_try_mount() = %d%s\n", error, 2807 IS_ERR(res) ? PTR_ERR(res) : 0,
2936 error != 0 ? " [error]" : ""); 2808 IS_ERR(res) ? " [error]" : "");
2937 return error; 2809 return res;
2938} 2810}
2939 2811
2940/* 2812/*
2941 * Get the superblock for an NFS4 mountpoint 2813 * Get the superblock for an NFS4 mountpoint
2942 */ 2814 */
2943static int nfs4_get_sb(struct file_system_type *fs_type, 2815static struct dentry *nfs4_mount(struct file_system_type *fs_type,
2944 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) 2816 int flags, const char *dev_name, void *raw_data)
2945{ 2817{
2946 struct nfs_parsed_mount_data *data; 2818 struct nfs_parsed_mount_data *data;
2947 int error = -ENOMEM; 2819 int error = -ENOMEM;
2820 struct dentry *res = ERR_PTR(-ENOMEM);
2948 2821
2949 data = nfs_alloc_parsed_mount_data(4); 2822 data = nfs_alloc_parsed_mount_data(4);
2950 if (data == NULL) 2823 if (data == NULL)
@@ -2952,10 +2825,14 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
2952 2825
2953 /* Validate the mount data */ 2826 /* Validate the mount data */
2954 error = nfs4_validate_mount_data(raw_data, data, dev_name); 2827 error = nfs4_validate_mount_data(raw_data, data, dev_name);
2955 if (error < 0) 2828 if (error < 0) {
2829 res = ERR_PTR(error);
2956 goto out; 2830 goto out;
2831 }
2957 2832
2958 error = nfs4_try_mount(flags, dev_name, data, mnt); 2833 res = nfs4_try_mount(flags, dev_name, data);
2834 if (IS_ERR(res))
2835 error = PTR_ERR(res);
2959 2836
2960out: 2837out:
2961 kfree(data->client_address); 2838 kfree(data->client_address);
@@ -2964,9 +2841,9 @@ out:
2964 kfree(data->fscache_uniq); 2841 kfree(data->fscache_uniq);
2965out_free_data: 2842out_free_data:
2966 kfree(data); 2843 kfree(data);
2967 dprintk("<-- nfs4_get_sb() = %d%s\n", error, 2844 dprintk("<-- nfs4_mount() = %d%s\n", error,
2968 error != 0 ? " [error]" : ""); 2845 error != 0 ? " [error]" : "");
2969 return error; 2846 return res;
2970} 2847}
2971 2848
2972static void nfs4_kill_super(struct super_block *sb) 2849static void nfs4_kill_super(struct super_block *sb)
@@ -3033,7 +2910,7 @@ nfs4_xdev_mount(struct file_system_type *fs_type, int flags,
3033 nfs_fscache_get_super_cookie(s, NULL, data); 2910 nfs_fscache_get_super_cookie(s, NULL, data);
3034 } 2911 }
3035 2912
3036 mntroot = nfs4_get_root(s, data->fh); 2913 mntroot = nfs4_get_root(s, data->fh, dev_name);
3037 if (IS_ERR(mntroot)) { 2914 if (IS_ERR(mntroot)) {
3038 error = PTR_ERR(mntroot); 2915 error = PTR_ERR(mntroot);
3039 goto error_splat_super; 2916 goto error_splat_super;
@@ -3120,7 +2997,7 @@ nfs4_remote_referral_mount(struct file_system_type *fs_type, int flags,
3120 nfs_fscache_get_super_cookie(s, NULL, data); 2997 nfs_fscache_get_super_cookie(s, NULL, data);
3121 } 2998 }
3122 2999
3123 mntroot = nfs4_get_root(s, mntfh); 3000 mntroot = nfs4_get_root(s, mntfh, dev_name);
3124 if (IS_ERR(mntroot)) { 3001 if (IS_ERR(mntroot)) {
3125 error = PTR_ERR(mntroot); 3002 error = PTR_ERR(mntroot);
3126 goto error_splat_super; 3003 goto error_splat_super;
@@ -3160,16 +3037,15 @@ error_splat_bdi:
3160/* 3037/*
3161 * Create an NFS4 server record on referral traversal 3038 * Create an NFS4 server record on referral traversal
3162 */ 3039 */
3163static int nfs4_referral_get_sb(struct file_system_type *fs_type, 3040static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type,
3164 int flags, const char *dev_name, void *raw_data, 3041 int flags, const char *dev_name, void *raw_data)
3165 struct vfsmount *mnt)
3166{ 3042{
3167 struct nfs_clone_mount *data = raw_data; 3043 struct nfs_clone_mount *data = raw_data;
3168 char *export_path; 3044 char *export_path;
3169 struct vfsmount *root_mnt; 3045 struct vfsmount *root_mnt;
3170 int error; 3046 struct dentry *res;
3171 3047
3172 dprintk("--> nfs4_referral_get_sb()\n"); 3048 dprintk("--> nfs4_referral_mount()\n");
3173 3049
3174 export_path = data->mnt_path; 3050 export_path = data->mnt_path;
3175 data->mnt_path = "/"; 3051 data->mnt_path = "/";
@@ -3178,15 +3054,13 @@ static int nfs4_referral_get_sb(struct file_system_type *fs_type,
3178 flags, data, data->hostname); 3054 flags, data, data->hostname);
3179 data->mnt_path = export_path; 3055 data->mnt_path = export_path;
3180 3056
3181 error = PTR_ERR(root_mnt); 3057 res = ERR_CAST(root_mnt);
3182 if (IS_ERR(root_mnt)) 3058 if (!IS_ERR(root_mnt))
3183 goto out; 3059 res = nfs_follow_remote_path(root_mnt, export_path);
3184 3060 dprintk("<-- nfs4_referral_mount() = %ld%s\n",
3185 error = nfs_follow_remote_path(root_mnt, export_path, mnt); 3061 IS_ERR(res) ? PTR_ERR(res) : 0,
3186out: 3062 IS_ERR(res) ? " [error]" : "");
3187 dprintk("<-- nfs4_referral_get_sb() = %d%s\n", error, 3063 return res;
3188 error != 0 ? " [error]" : "");
3189 return error;
3190} 3064}
3191 3065
3192#endif /* CONFIG_NFS_V4 */ 3066#endif /* CONFIG_NFS_V4 */
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index e313a51acdd1..8d6864c2a5fa 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -148,6 +148,7 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n
148 alias = d_lookup(parent, &data->args.name); 148 alias = d_lookup(parent, &data->args.name);
149 if (alias != NULL) { 149 if (alias != NULL) {
150 int ret = 0; 150 int ret = 0;
151 void *devname_garbage = NULL;
151 152
152 /* 153 /*
153 * Hey, we raced with lookup... See if we need to transfer 154 * Hey, we raced with lookup... See if we need to transfer
@@ -157,6 +158,7 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n
157 spin_lock(&alias->d_lock); 158 spin_lock(&alias->d_lock);
158 if (alias->d_inode != NULL && 159 if (alias->d_inode != NULL &&
159 !(alias->d_flags & DCACHE_NFSFS_RENAMED)) { 160 !(alias->d_flags & DCACHE_NFSFS_RENAMED)) {
161 devname_garbage = alias->d_fsdata;
160 alias->d_fsdata = data; 162 alias->d_fsdata = data;
161 alias->d_flags |= DCACHE_NFSFS_RENAMED; 163 alias->d_flags |= DCACHE_NFSFS_RENAMED;
162 ret = 1; 164 ret = 1;
@@ -164,6 +166,13 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n
164 spin_unlock(&alias->d_lock); 166 spin_unlock(&alias->d_lock);
165 nfs_dec_sillycount(dir); 167 nfs_dec_sillycount(dir);
166 dput(alias); 168 dput(alias);
169 /*
170 * If we'd displaced old cached devname, free it. At that
171 * point dentry is definitely not a root, so we won't need
172 * that anymore.
173 */
174 if (devname_garbage)
175 kfree(devname_garbage);
167 return ret; 176 return ret;
168 } 177 }
169 data->dir = igrab(dir); 178 data->dir = igrab(dir);
@@ -180,7 +189,7 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n
180 task_setup_data.rpc_client = NFS_CLIENT(dir); 189 task_setup_data.rpc_client = NFS_CLIENT(dir);
181 task = rpc_run_task(&task_setup_data); 190 task = rpc_run_task(&task_setup_data);
182 if (!IS_ERR(task)) 191 if (!IS_ERR(task))
183 rpc_put_task(task); 192 rpc_put_task_async(task);
184 return 1; 193 return 1;
185} 194}
186 195
@@ -252,6 +261,7 @@ nfs_async_unlink(struct inode *dir, struct dentry *dentry)
252{ 261{
253 struct nfs_unlinkdata *data; 262 struct nfs_unlinkdata *data;
254 int status = -ENOMEM; 263 int status = -ENOMEM;
264 void *devname_garbage = NULL;
255 265
256 data = kzalloc(sizeof(*data), GFP_KERNEL); 266 data = kzalloc(sizeof(*data), GFP_KERNEL);
257 if (data == NULL) 267 if (data == NULL)
@@ -269,8 +279,16 @@ nfs_async_unlink(struct inode *dir, struct dentry *dentry)
269 if (dentry->d_flags & DCACHE_NFSFS_RENAMED) 279 if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
270 goto out_unlock; 280 goto out_unlock;
271 dentry->d_flags |= DCACHE_NFSFS_RENAMED; 281 dentry->d_flags |= DCACHE_NFSFS_RENAMED;
282 devname_garbage = dentry->d_fsdata;
272 dentry->d_fsdata = data; 283 dentry->d_fsdata = data;
273 spin_unlock(&dentry->d_lock); 284 spin_unlock(&dentry->d_lock);
285 /*
286 * If we'd displaced old cached devname, free it. At that
287 * point dentry is definitely not a root, so we won't need
288 * that anymore.
289 */
290 if (devname_garbage)
291 kfree(devname_garbage);
274 return 0; 292 return 0;
275out_unlock: 293out_unlock:
276 spin_unlock(&dentry->d_lock); 294 spin_unlock(&dentry->d_lock);
@@ -299,6 +317,7 @@ nfs_complete_unlink(struct dentry *dentry, struct inode *inode)
299 if (dentry->d_flags & DCACHE_NFSFS_RENAMED) { 317 if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
300 dentry->d_flags &= ~DCACHE_NFSFS_RENAMED; 318 dentry->d_flags &= ~DCACHE_NFSFS_RENAMED;
301 data = dentry->d_fsdata; 319 data = dentry->d_fsdata;
320 dentry->d_fsdata = NULL;
302 } 321 }
303 spin_unlock(&dentry->d_lock); 322 spin_unlock(&dentry->d_lock);
304 323
@@ -315,6 +334,7 @@ nfs_cancel_async_unlink(struct dentry *dentry)
315 struct nfs_unlinkdata *data = dentry->d_fsdata; 334 struct nfs_unlinkdata *data = dentry->d_fsdata;
316 335
317 dentry->d_flags &= ~DCACHE_NFSFS_RENAMED; 336 dentry->d_flags &= ~DCACHE_NFSFS_RENAMED;
337 dentry->d_fsdata = NULL;
318 spin_unlock(&dentry->d_lock); 338 spin_unlock(&dentry->d_lock);
319 nfs_free_unlinkdata(data); 339 nfs_free_unlinkdata(data);
320 return; 340 return;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index c8278f4046cb..47a3ad63e0d5 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -28,6 +28,7 @@
28#include "iostat.h" 28#include "iostat.h"
29#include "nfs4_fs.h" 29#include "nfs4_fs.h"
30#include "fscache.h" 30#include "fscache.h"
31#include "pnfs.h"
31 32
32#define NFSDBG_FACILITY NFSDBG_PAGECACHE 33#define NFSDBG_FACILITY NFSDBG_PAGECACHE
33 34
@@ -96,6 +97,7 @@ void nfs_writedata_free(struct nfs_write_data *p)
96 97
97static void nfs_writedata_release(struct nfs_write_data *wdata) 98static void nfs_writedata_release(struct nfs_write_data *wdata)
98{ 99{
100 put_lseg(wdata->lseg);
99 put_nfs_open_context(wdata->args.context); 101 put_nfs_open_context(wdata->args.context);
100 nfs_writedata_free(wdata); 102 nfs_writedata_free(wdata);
101} 103}
@@ -781,25 +783,21 @@ static int flush_task_priority(int how)
781 return RPC_PRIORITY_NORMAL; 783 return RPC_PRIORITY_NORMAL;
782} 784}
783 785
784/* 786int nfs_initiate_write(struct nfs_write_data *data,
785 * Set up the argument/result storage required for the RPC call. 787 struct rpc_clnt *clnt,
786 */ 788 const struct rpc_call_ops *call_ops,
787static int nfs_write_rpcsetup(struct nfs_page *req, 789 int how)
788 struct nfs_write_data *data,
789 const struct rpc_call_ops *call_ops,
790 unsigned int count, unsigned int offset,
791 int how)
792{ 790{
793 struct inode *inode = req->wb_context->path.dentry->d_inode; 791 struct inode *inode = data->inode;
794 int priority = flush_task_priority(how); 792 int priority = flush_task_priority(how);
795 struct rpc_task *task; 793 struct rpc_task *task;
796 struct rpc_message msg = { 794 struct rpc_message msg = {
797 .rpc_argp = &data->args, 795 .rpc_argp = &data->args,
798 .rpc_resp = &data->res, 796 .rpc_resp = &data->res,
799 .rpc_cred = req->wb_context->cred, 797 .rpc_cred = data->cred,
800 }; 798 };
801 struct rpc_task_setup task_setup_data = { 799 struct rpc_task_setup task_setup_data = {
802 .rpc_client = NFS_CLIENT(inode), 800 .rpc_client = clnt,
803 .task = &data->task, 801 .task = &data->task,
804 .rpc_message = &msg, 802 .rpc_message = &msg,
805 .callback_ops = call_ops, 803 .callback_ops = call_ops,
@@ -810,12 +808,52 @@ static int nfs_write_rpcsetup(struct nfs_page *req,
810 }; 808 };
811 int ret = 0; 809 int ret = 0;
812 810
811 /* Set up the initial task struct. */
812 NFS_PROTO(inode)->write_setup(data, &msg);
813
814 dprintk("NFS: %5u initiated write call "
815 "(req %s/%lld, %u bytes @ offset %llu)\n",
816 data->task.tk_pid,
817 inode->i_sb->s_id,
818 (long long)NFS_FILEID(inode),
819 data->args.count,
820 (unsigned long long)data->args.offset);
821
822 task = rpc_run_task(&task_setup_data);
823 if (IS_ERR(task)) {
824 ret = PTR_ERR(task);
825 goto out;
826 }
827 if (how & FLUSH_SYNC) {
828 ret = rpc_wait_for_completion_task(task);
829 if (ret == 0)
830 ret = task->tk_status;
831 }
832 rpc_put_task(task);
833out:
834 return ret;
835}
836EXPORT_SYMBOL_GPL(nfs_initiate_write);
837
838/*
839 * Set up the argument/result storage required for the RPC call.
840 */
841static int nfs_write_rpcsetup(struct nfs_page *req,
842 struct nfs_write_data *data,
843 const struct rpc_call_ops *call_ops,
844 unsigned int count, unsigned int offset,
845 struct pnfs_layout_segment *lseg,
846 int how)
847{
848 struct inode *inode = req->wb_context->path.dentry->d_inode;
849
813 /* Set up the RPC argument and reply structs 850 /* Set up the RPC argument and reply structs
814 * NB: take care not to mess about with data->commit et al. */ 851 * NB: take care not to mess about with data->commit et al. */
815 852
816 data->req = req; 853 data->req = req;
817 data->inode = inode = req->wb_context->path.dentry->d_inode; 854 data->inode = inode = req->wb_context->path.dentry->d_inode;
818 data->cred = msg.rpc_cred; 855 data->cred = req->wb_context->cred;
856 data->lseg = get_lseg(lseg);
819 857
820 data->args.fh = NFS_FH(inode); 858 data->args.fh = NFS_FH(inode);
821 data->args.offset = req_offset(req) + offset; 859 data->args.offset = req_offset(req) + offset;
@@ -836,30 +874,11 @@ static int nfs_write_rpcsetup(struct nfs_page *req,
836 data->res.verf = &data->verf; 874 data->res.verf = &data->verf;
837 nfs_fattr_init(&data->fattr); 875 nfs_fattr_init(&data->fattr);
838 876
839 /* Set up the initial task struct. */ 877 if (data->lseg &&
840 NFS_PROTO(inode)->write_setup(data, &msg); 878 (pnfs_try_to_write_data(data, call_ops, how) == PNFS_ATTEMPTED))
841 879 return 0;
842 dprintk("NFS: %5u initiated write call "
843 "(req %s/%lld, %u bytes @ offset %llu)\n",
844 data->task.tk_pid,
845 inode->i_sb->s_id,
846 (long long)NFS_FILEID(inode),
847 count,
848 (unsigned long long)data->args.offset);
849 880
850 task = rpc_run_task(&task_setup_data); 881 return nfs_initiate_write(data, NFS_CLIENT(inode), call_ops, how);
851 if (IS_ERR(task)) {
852 ret = PTR_ERR(task);
853 goto out;
854 }
855 if (how & FLUSH_SYNC) {
856 ret = rpc_wait_for_completion_task(task);
857 if (ret == 0)
858 ret = task->tk_status;
859 }
860 rpc_put_task(task);
861out:
862 return ret;
863} 882}
864 883
865/* If a nfs_flush_* function fails, it should remove reqs from @head and 884/* If a nfs_flush_* function fails, it should remove reqs from @head and
@@ -879,20 +898,21 @@ static void nfs_redirty_request(struct nfs_page *req)
879 * Generate multiple small requests to write out a single 898 * Generate multiple small requests to write out a single
880 * contiguous dirty area on one page. 899 * contiguous dirty area on one page.
881 */ 900 */
882static int nfs_flush_multi(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int how) 901static int nfs_flush_multi(struct nfs_pageio_descriptor *desc)
883{ 902{
884 struct nfs_page *req = nfs_list_entry(head->next); 903 struct nfs_page *req = nfs_list_entry(desc->pg_list.next);
885 struct page *page = req->wb_page; 904 struct page *page = req->wb_page;
886 struct nfs_write_data *data; 905 struct nfs_write_data *data;
887 size_t wsize = NFS_SERVER(inode)->wsize, nbytes; 906 size_t wsize = NFS_SERVER(desc->pg_inode)->wsize, nbytes;
888 unsigned int offset; 907 unsigned int offset;
889 int requests = 0; 908 int requests = 0;
890 int ret = 0; 909 int ret = 0;
910 struct pnfs_layout_segment *lseg;
891 LIST_HEAD(list); 911 LIST_HEAD(list);
892 912
893 nfs_list_remove_request(req); 913 nfs_list_remove_request(req);
894 914
895 nbytes = count; 915 nbytes = desc->pg_count;
896 do { 916 do {
897 size_t len = min(nbytes, wsize); 917 size_t len = min(nbytes, wsize);
898 918
@@ -905,9 +925,11 @@ static int nfs_flush_multi(struct inode *inode, struct list_head *head, unsigned
905 } while (nbytes != 0); 925 } while (nbytes != 0);
906 atomic_set(&req->wb_complete, requests); 926 atomic_set(&req->wb_complete, requests);
907 927
928 BUG_ON(desc->pg_lseg);
929 lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW);
908 ClearPageError(page); 930 ClearPageError(page);
909 offset = 0; 931 offset = 0;
910 nbytes = count; 932 nbytes = desc->pg_count;
911 do { 933 do {
912 int ret2; 934 int ret2;
913 935
@@ -919,13 +941,15 @@ static int nfs_flush_multi(struct inode *inode, struct list_head *head, unsigned
919 if (nbytes < wsize) 941 if (nbytes < wsize)
920 wsize = nbytes; 942 wsize = nbytes;
921 ret2 = nfs_write_rpcsetup(req, data, &nfs_write_partial_ops, 943 ret2 = nfs_write_rpcsetup(req, data, &nfs_write_partial_ops,
922 wsize, offset, how); 944 wsize, offset, lseg, desc->pg_ioflags);
923 if (ret == 0) 945 if (ret == 0)
924 ret = ret2; 946 ret = ret2;
925 offset += wsize; 947 offset += wsize;
926 nbytes -= wsize; 948 nbytes -= wsize;
927 } while (nbytes != 0); 949 } while (nbytes != 0);
928 950
951 put_lseg(lseg);
952 desc->pg_lseg = NULL;
929 return ret; 953 return ret;
930 954
931out_bad: 955out_bad:
@@ -946,16 +970,26 @@ out_bad:
946 * This is the case if nfs_updatepage detects a conflicting request 970 * This is the case if nfs_updatepage detects a conflicting request
947 * that has been written but not committed. 971 * that has been written but not committed.
948 */ 972 */
949static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int how) 973static int nfs_flush_one(struct nfs_pageio_descriptor *desc)
950{ 974{
951 struct nfs_page *req; 975 struct nfs_page *req;
952 struct page **pages; 976 struct page **pages;
953 struct nfs_write_data *data; 977 struct nfs_write_data *data;
978 struct list_head *head = &desc->pg_list;
979 struct pnfs_layout_segment *lseg = desc->pg_lseg;
980 int ret;
954 981
955 data = nfs_writedata_alloc(npages); 982 data = nfs_writedata_alloc(nfs_page_array_len(desc->pg_base,
956 if (!data) 983 desc->pg_count));
957 goto out_bad; 984 if (!data) {
958 985 while (!list_empty(head)) {
986 req = nfs_list_entry(head->next);
987 nfs_list_remove_request(req);
988 nfs_redirty_request(req);
989 }
990 ret = -ENOMEM;
991 goto out;
992 }
959 pages = data->pagevec; 993 pages = data->pagevec;
960 while (!list_empty(head)) { 994 while (!list_empty(head)) {
961 req = nfs_list_entry(head->next); 995 req = nfs_list_entry(head->next);
@@ -965,16 +999,15 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned i
965 *pages++ = req->wb_page; 999 *pages++ = req->wb_page;
966 } 1000 }
967 req = nfs_list_entry(data->pages.next); 1001 req = nfs_list_entry(data->pages.next);
1002 if ((!lseg) && list_is_singular(&data->pages))
1003 lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW);
968 1004
969 /* Set up the argument struct */ 1005 /* Set up the argument struct */
970 return nfs_write_rpcsetup(req, data, &nfs_write_full_ops, count, 0, how); 1006 ret = nfs_write_rpcsetup(req, data, &nfs_write_full_ops, desc->pg_count, 0, lseg, desc->pg_ioflags);
971 out_bad: 1007out:
972 while (!list_empty(head)) { 1008 put_lseg(lseg); /* Cleans any gotten in ->pg_test */
973 req = nfs_list_entry(head->next); 1009 desc->pg_lseg = NULL;
974 nfs_list_remove_request(req); 1010 return ret;
975 nfs_redirty_request(req);
976 }
977 return -ENOMEM;
978} 1011}
979 1012
980static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, 1013static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
@@ -982,6 +1015,8 @@ static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
982{ 1015{
983 size_t wsize = NFS_SERVER(inode)->wsize; 1016 size_t wsize = NFS_SERVER(inode)->wsize;
984 1017
1018 pnfs_pageio_init_write(pgio, inode);
1019
985 if (wsize < PAGE_CACHE_SIZE) 1020 if (wsize < PAGE_CACHE_SIZE)
986 nfs_pageio_init(pgio, inode, nfs_flush_multi, wsize, ioflags); 1021 nfs_pageio_init(pgio, inode, nfs_flush_multi, wsize, ioflags);
987 else 1022 else
@@ -1132,7 +1167,7 @@ static const struct rpc_call_ops nfs_write_full_ops = {
1132/* 1167/*
1133 * This function is called when the WRITE call is complete. 1168 * This function is called when the WRITE call is complete.
1134 */ 1169 */
1135int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) 1170void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
1136{ 1171{
1137 struct nfs_writeargs *argp = &data->args; 1172 struct nfs_writeargs *argp = &data->args;
1138 struct nfs_writeres *resp = &data->res; 1173 struct nfs_writeres *resp = &data->res;
@@ -1151,7 +1186,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
1151 */ 1186 */
1152 status = NFS_PROTO(data->inode)->write_done(task, data); 1187 status = NFS_PROTO(data->inode)->write_done(task, data);
1153 if (status != 0) 1188 if (status != 0)
1154 return status; 1189 return;
1155 nfs_add_stats(data->inode, NFSIOS_SERVERWRITTENBYTES, resp->count); 1190 nfs_add_stats(data->inode, NFSIOS_SERVERWRITTENBYTES, resp->count);
1156 1191
1157#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 1192#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
@@ -1166,6 +1201,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
1166 */ 1201 */
1167 static unsigned long complain; 1202 static unsigned long complain;
1168 1203
1204 /* Note this will print the MDS for a DS write */
1169 if (time_before(complain, jiffies)) { 1205 if (time_before(complain, jiffies)) {
1170 dprintk("NFS: faulty NFS server %s:" 1206 dprintk("NFS: faulty NFS server %s:"
1171 " (committed = %d) != (stable = %d)\n", 1207 " (committed = %d) != (stable = %d)\n",
@@ -1186,6 +1222,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
1186 /* Was this an NFSv2 write or an NFSv3 stable write? */ 1222 /* Was this an NFSv2 write or an NFSv3 stable write? */
1187 if (resp->verf->committed != NFS_UNSTABLE) { 1223 if (resp->verf->committed != NFS_UNSTABLE) {
1188 /* Resend from where the server left off */ 1224 /* Resend from where the server left off */
1225 data->mds_offset += resp->count;
1189 argp->offset += resp->count; 1226 argp->offset += resp->count;
1190 argp->pgbase += resp->count; 1227 argp->pgbase += resp->count;
1191 argp->count -= resp->count; 1228 argp->count -= resp->count;
@@ -1196,7 +1233,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
1196 argp->stable = NFS_FILE_SYNC; 1233 argp->stable = NFS_FILE_SYNC;
1197 } 1234 }
1198 nfs_restart_rpc(task, server->nfs_client); 1235 nfs_restart_rpc(task, server->nfs_client);
1199 return -EAGAIN; 1236 return;
1200 } 1237 }
1201 if (time_before(complain, jiffies)) { 1238 if (time_before(complain, jiffies)) {
1202 printk(KERN_WARNING 1239 printk(KERN_WARNING
@@ -1207,7 +1244,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
1207 /* Can't do anything about it except throw an error. */ 1244 /* Can't do anything about it except throw an error. */
1208 task->tk_status = -EIO; 1245 task->tk_status = -EIO;
1209 } 1246 }
1210 return 0; 1247 return;
1211} 1248}
1212 1249
1213 1250
@@ -1292,6 +1329,8 @@ static int nfs_commit_rpcsetup(struct list_head *head,
1292 task = rpc_run_task(&task_setup_data); 1329 task = rpc_run_task(&task_setup_data);
1293 if (IS_ERR(task)) 1330 if (IS_ERR(task))
1294 return PTR_ERR(task); 1331 return PTR_ERR(task);
1332 if (how & FLUSH_SYNC)
1333 rpc_wait_for_completion_task(task);
1295 rpc_put_task(task); 1334 rpc_put_task(task);
1296 return 0; 1335 return 0;
1297} 1336}