aboutsummaryrefslogtreecommitdiffstats
path: root/fs/nfs
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2011-05-14 06:06:36 -0400
committerThomas Gleixner <tglx@linutronix.de>2011-05-14 06:06:36 -0400
commita18f22a968de17b29f2310cdb7ba69163e65ec15 (patch)
treea7d56d88fad5e444d7661484109758a2f436129e /fs/nfs
parenta1c57e0fec53defe745e64417eacdbd3618c3e66 (diff)
parent798778b8653f64b7b2162ac70eca10367cff6ce8 (diff)
Merge branch 'consolidate-clksrc-i8253' of master.kernel.org:~rmk/linux-2.6-arm into timers/clocksource
Conflicts: arch/ia64/kernel/cyclone.c arch/mips/kernel/i8253.c arch/x86/kernel/i8253.c Reason: Resolve conflicts so further cleanups do not conflict further Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Diffstat (limited to 'fs/nfs')
-rw-r--r--fs/nfs/callback_proc.c2
-rw-r--r--fs/nfs/callback_xdr.c2
-rw-r--r--fs/nfs/client.c131
-rw-r--r--fs/nfs/dir.c102
-rw-r--r--fs/nfs/direct.c8
-rw-r--r--fs/nfs/file.c9
-rw-r--r--fs/nfs/getroot.c46
-rw-r--r--fs/nfs/idmap.c90
-rw-r--r--fs/nfs/inode.c19
-rw-r--r--fs/nfs/internal.h70
-rw-r--r--fs/nfs/namespace.c173
-rw-r--r--fs/nfs/nfs3proc.c3
-rw-r--r--fs/nfs/nfs4_fs.h44
-rw-r--r--fs/nfs/nfs4filelayout.c695
-rw-r--r--fs/nfs/nfs4filelayout.h23
-rw-r--r--fs/nfs/nfs4filelayoutdev.c434
-rw-r--r--fs/nfs/nfs4namespace.c41
-rw-r--r--fs/nfs/nfs4proc.c628
-rw-r--r--fs/nfs/nfs4renewd.c6
-rw-r--r--fs/nfs/nfs4state.c89
-rw-r--r--fs/nfs/nfs4xdr.c392
-rw-r--r--fs/nfs/nfsroot.c29
-rw-r--r--fs/nfs/pagelist.c34
-rw-r--r--fs/nfs/pnfs.c468
-rw-r--r--fs/nfs/pnfs.h201
-rw-r--r--fs/nfs/proc.c3
-rw-r--r--fs/nfs/read.c127
-rw-r--r--fs/nfs/super.c491
-rw-r--r--fs/nfs/unlink.c22
-rw-r--r--fs/nfs/write.c388
30 files changed, 3519 insertions, 1251 deletions
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 89587573fe50..2f41dccea18e 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -188,10 +188,10 @@ static u32 initiate_bulk_draining(struct nfs_client *clp,
188 rv = NFS4ERR_DELAY; 188 rv = NFS4ERR_DELAY;
189 list_del_init(&lo->plh_bulk_recall); 189 list_del_init(&lo->plh_bulk_recall);
190 spin_unlock(&ino->i_lock); 190 spin_unlock(&ino->i_lock);
191 pnfs_free_lseg_list(&free_me_list);
191 put_layout_hdr(lo); 192 put_layout_hdr(lo);
192 iput(ino); 193 iput(ino);
193 } 194 }
194 pnfs_free_lseg_list(&free_me_list);
195 return rv; 195 return rv;
196} 196}
197 197
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 14e0f9371d14..00ecf62ce7c1 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -241,7 +241,7 @@ static __be32 decode_layoutrecall_args(struct svc_rqst *rqstp,
241 241
242 args->cbl_layout_type = ntohl(*p++); 242 args->cbl_layout_type = ntohl(*p++);
243 /* Depite the spec's xdr, iomode really belongs in the FILE switch, 243 /* Depite the spec's xdr, iomode really belongs in the FILE switch,
244 * as it is unuseable and ignored with the other types. 244 * as it is unusable and ignored with the other types.
245 */ 245 */
246 iomode = ntohl(*p++); 246 iomode = ntohl(*p++);
247 args->cbl_layoutchanged = ntohl(*p++); 247 args->cbl_layoutchanged = ntohl(*p++);
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index bd3ca32879e7..139be9647d80 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -82,6 +82,11 @@ retry:
82#endif /* CONFIG_NFS_V4 */ 82#endif /* CONFIG_NFS_V4 */
83 83
84/* 84/*
85 * Turn off NFSv4 uid/gid mapping when using AUTH_SYS
86 */
87static int nfs4_disable_idmapping = 0;
88
89/*
85 * RPC cruft for NFS 90 * RPC cruft for NFS
86 */ 91 */
87static struct rpc_version *nfs_version[5] = { 92static struct rpc_version *nfs_version[5] = {
@@ -481,7 +486,12 @@ static struct nfs_client *nfs_match_client(const struct nfs_client_initdata *dat
481 * Look up a client by IP address and protocol version 486 * Look up a client by IP address and protocol version
482 * - creates a new record if one doesn't yet exist 487 * - creates a new record if one doesn't yet exist
483 */ 488 */
484static struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_init) 489static struct nfs_client *
490nfs_get_client(const struct nfs_client_initdata *cl_init,
491 const struct rpc_timeout *timeparms,
492 const char *ip_addr,
493 rpc_authflavor_t authflavour,
494 int noresvport)
485{ 495{
486 struct nfs_client *clp, *new = NULL; 496 struct nfs_client *clp, *new = NULL;
487 int error; 497 int error;
@@ -512,6 +522,13 @@ install_client:
512 clp = new; 522 clp = new;
513 list_add(&clp->cl_share_link, &nfs_client_list); 523 list_add(&clp->cl_share_link, &nfs_client_list);
514 spin_unlock(&nfs_client_lock); 524 spin_unlock(&nfs_client_lock);
525
526 error = cl_init->rpc_ops->init_client(clp, timeparms, ip_addr,
527 authflavour, noresvport);
528 if (error < 0) {
529 nfs_put_client(clp);
530 return ERR_PTR(error);
531 }
515 dprintk("--> nfs_get_client() = %p [new]\n", clp); 532 dprintk("--> nfs_get_client() = %p [new]\n", clp);
516 return clp; 533 return clp;
517 534
@@ -767,9 +784,9 @@ static int nfs_init_server_rpcclient(struct nfs_server *server,
767/* 784/*
768 * Initialise an NFS2 or NFS3 client 785 * Initialise an NFS2 or NFS3 client
769 */ 786 */
770static int nfs_init_client(struct nfs_client *clp, 787int nfs_init_client(struct nfs_client *clp, const struct rpc_timeout *timeparms,
771 const struct rpc_timeout *timeparms, 788 const char *ip_addr, rpc_authflavor_t authflavour,
772 const struct nfs_parsed_mount_data *data) 789 int noresvport)
773{ 790{
774 int error; 791 int error;
775 792
@@ -784,7 +801,7 @@ static int nfs_init_client(struct nfs_client *clp,
784 * - RFC 2623, sec 2.3.2 801 * - RFC 2623, sec 2.3.2
785 */ 802 */
786 error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_UNIX, 803 error = nfs_create_rpc_client(clp, timeparms, RPC_AUTH_UNIX,
787 0, data->flags & NFS_MOUNT_NORESVPORT); 804 0, noresvport);
788 if (error < 0) 805 if (error < 0)
789 goto error; 806 goto error;
790 nfs_mark_client_ready(clp, NFS_CS_READY); 807 nfs_mark_client_ready(clp, NFS_CS_READY);
@@ -820,19 +837,17 @@ static int nfs_init_server(struct nfs_server *server,
820 cl_init.rpc_ops = &nfs_v3_clientops; 837 cl_init.rpc_ops = &nfs_v3_clientops;
821#endif 838#endif
822 839
840 nfs_init_timeout_values(&timeparms, data->nfs_server.protocol,
841 data->timeo, data->retrans);
842
823 /* Allocate or find a client reference we can use */ 843 /* Allocate or find a client reference we can use */
824 clp = nfs_get_client(&cl_init); 844 clp = nfs_get_client(&cl_init, &timeparms, NULL, RPC_AUTH_UNIX,
845 data->flags & NFS_MOUNT_NORESVPORT);
825 if (IS_ERR(clp)) { 846 if (IS_ERR(clp)) {
826 dprintk("<-- nfs_init_server() = error %ld\n", PTR_ERR(clp)); 847 dprintk("<-- nfs_init_server() = error %ld\n", PTR_ERR(clp));
827 return PTR_ERR(clp); 848 return PTR_ERR(clp);
828 } 849 }
829 850
830 nfs_init_timeout_values(&timeparms, data->nfs_server.protocol,
831 data->timeo, data->retrans);
832 error = nfs_init_client(clp, &timeparms, data);
833 if (error < 0)
834 goto error;
835
836 server->nfs_client = clp; 851 server->nfs_client = clp;
837 852
838 /* Initialise the client representation from the mount data */ 853 /* Initialise the client representation from the mount data */
@@ -1009,14 +1024,19 @@ static void nfs_server_insert_lists(struct nfs_server *server)
1009 spin_lock(&nfs_client_lock); 1024 spin_lock(&nfs_client_lock);
1010 list_add_tail_rcu(&server->client_link, &clp->cl_superblocks); 1025 list_add_tail_rcu(&server->client_link, &clp->cl_superblocks);
1011 list_add_tail(&server->master_link, &nfs_volume_list); 1026 list_add_tail(&server->master_link, &nfs_volume_list);
1027 clear_bit(NFS_CS_STOP_RENEW, &clp->cl_res_state);
1012 spin_unlock(&nfs_client_lock); 1028 spin_unlock(&nfs_client_lock);
1013 1029
1014} 1030}
1015 1031
1016static void nfs_server_remove_lists(struct nfs_server *server) 1032static void nfs_server_remove_lists(struct nfs_server *server)
1017{ 1033{
1034 struct nfs_client *clp = server->nfs_client;
1035
1018 spin_lock(&nfs_client_lock); 1036 spin_lock(&nfs_client_lock);
1019 list_del_rcu(&server->client_link); 1037 list_del_rcu(&server->client_link);
1038 if (clp && list_empty(&clp->cl_superblocks))
1039 set_bit(NFS_CS_STOP_RENEW, &clp->cl_res_state);
1020 list_del(&server->master_link); 1040 list_del(&server->master_link);
1021 spin_unlock(&nfs_client_lock); 1041 spin_unlock(&nfs_client_lock);
1022 1042
@@ -1307,11 +1327,11 @@ static int nfs4_init_client_minor_version(struct nfs_client *clp)
1307/* 1327/*
1308 * Initialise an NFS4 client record 1328 * Initialise an NFS4 client record
1309 */ 1329 */
1310static int nfs4_init_client(struct nfs_client *clp, 1330int nfs4_init_client(struct nfs_client *clp,
1311 const struct rpc_timeout *timeparms, 1331 const struct rpc_timeout *timeparms,
1312 const char *ip_addr, 1332 const char *ip_addr,
1313 rpc_authflavor_t authflavour, 1333 rpc_authflavor_t authflavour,
1314 int flags) 1334 int noresvport)
1315{ 1335{
1316 int error; 1336 int error;
1317 1337
@@ -1325,7 +1345,7 @@ static int nfs4_init_client(struct nfs_client *clp,
1325 clp->rpc_ops = &nfs_v4_clientops; 1345 clp->rpc_ops = &nfs_v4_clientops;
1326 1346
1327 error = nfs_create_rpc_client(clp, timeparms, authflavour, 1347 error = nfs_create_rpc_client(clp, timeparms, authflavour,
1328 1, flags & NFS_MOUNT_NORESVPORT); 1348 1, noresvport);
1329 if (error < 0) 1349 if (error < 0)
1330 goto error; 1350 goto error;
1331 strlcpy(clp->cl_ipaddr, ip_addr, sizeof(clp->cl_ipaddr)); 1351 strlcpy(clp->cl_ipaddr, ip_addr, sizeof(clp->cl_ipaddr));
@@ -1378,27 +1398,71 @@ static int nfs4_set_client(struct nfs_server *server,
1378 dprintk("--> nfs4_set_client()\n"); 1398 dprintk("--> nfs4_set_client()\n");
1379 1399
1380 /* Allocate or find a client reference we can use */ 1400 /* Allocate or find a client reference we can use */
1381 clp = nfs_get_client(&cl_init); 1401 clp = nfs_get_client(&cl_init, timeparms, ip_addr, authflavour,
1402 server->flags & NFS_MOUNT_NORESVPORT);
1382 if (IS_ERR(clp)) { 1403 if (IS_ERR(clp)) {
1383 error = PTR_ERR(clp); 1404 error = PTR_ERR(clp);
1384 goto error; 1405 goto error;
1385 } 1406 }
1386 error = nfs4_init_client(clp, timeparms, ip_addr, authflavour, 1407
1387 server->flags); 1408 /*
1388 if (error < 0) 1409 * Query for the lease time on clientid setup or renewal
1389 goto error_put; 1410 *
1411 * Note that this will be set on nfs_clients that were created
1412 * only for the DS role and did not set this bit, but now will
1413 * serve a dual role.
1414 */
1415 set_bit(NFS_CS_CHECK_LEASE_TIME, &clp->cl_res_state);
1390 1416
1391 server->nfs_client = clp; 1417 server->nfs_client = clp;
1392 dprintk("<-- nfs4_set_client() = 0 [new %p]\n", clp); 1418 dprintk("<-- nfs4_set_client() = 0 [new %p]\n", clp);
1393 return 0; 1419 return 0;
1394
1395error_put:
1396 nfs_put_client(clp);
1397error: 1420error:
1398 dprintk("<-- nfs4_set_client() = xerror %d\n", error); 1421 dprintk("<-- nfs4_set_client() = xerror %d\n", error);
1399 return error; 1422 return error;
1400} 1423}
1401 1424
1425/*
1426 * Set up a pNFS Data Server client.
1427 *
1428 * Return any existing nfs_client that matches server address,port,version
1429 * and minorversion.
1430 *
1431 * For a new nfs_client, use a soft mount (default), a low retrans and a
1432 * low timeout interval so that if a connection is lost, we retry through
1433 * the MDS.
1434 */
1435struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp,
1436 const struct sockaddr *ds_addr,
1437 int ds_addrlen, int ds_proto)
1438{
1439 struct nfs_client_initdata cl_init = {
1440 .addr = ds_addr,
1441 .addrlen = ds_addrlen,
1442 .rpc_ops = &nfs_v4_clientops,
1443 .proto = ds_proto,
1444 .minorversion = mds_clp->cl_minorversion,
1445 };
1446 struct rpc_timeout ds_timeout = {
1447 .to_initval = 15 * HZ,
1448 .to_maxval = 15 * HZ,
1449 .to_retries = 1,
1450 .to_exponential = 1,
1451 };
1452 struct nfs_client *clp;
1453
1454 /*
1455 * Set an authflavor equual to the MDS value. Use the MDS nfs_client
1456 * cl_ipaddr so as to use the same EXCHANGE_ID co_ownerid as the MDS
1457 * (section 13.1 RFC 5661).
1458 */
1459 clp = nfs_get_client(&cl_init, &ds_timeout, mds_clp->cl_ipaddr,
1460 mds_clp->cl_rpcclient->cl_auth->au_flavor, 0);
1461
1462 dprintk("<-- %s %p\n", __func__, clp);
1463 return clp;
1464}
1465EXPORT_SYMBOL(nfs4_set_ds_client);
1402 1466
1403/* 1467/*
1404 * Session has been established, and the client marked ready. 1468 * Session has been established, and the client marked ready.
@@ -1435,6 +1499,10 @@ static int nfs4_server_common_setup(struct nfs_server *server,
1435 BUG_ON(!server->nfs_client->rpc_ops); 1499 BUG_ON(!server->nfs_client->rpc_ops);
1436 BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops); 1500 BUG_ON(!server->nfs_client->rpc_ops->file_inode_ops);
1437 1501
1502 /* data servers support only a subset of NFSv4.1 */
1503 if (is_ds_only_client(server->nfs_client))
1504 return -EPROTONOSUPPORT;
1505
1438 fattr = nfs_alloc_fattr(); 1506 fattr = nfs_alloc_fattr();
1439 if (fattr == NULL) 1507 if (fattr == NULL)
1440 return -ENOMEM; 1508 return -ENOMEM;
@@ -1504,6 +1572,13 @@ static int nfs4_init_server(struct nfs_server *server,
1504 if (error < 0) 1572 if (error < 0)
1505 goto error; 1573 goto error;
1506 1574
1575 /*
1576 * Don't use NFS uid/gid mapping if we're using AUTH_SYS or lower
1577 * authentication.
1578 */
1579 if (nfs4_disable_idmapping && data->auth_flavors[0] == RPC_AUTH_UNIX)
1580 server->caps |= NFS_CAP_UIDGID_NOMAP;
1581
1507 if (data->rsize) 1582 if (data->rsize)
1508 server->rsize = nfs_block_size(data->rsize, NULL); 1583 server->rsize = nfs_block_size(data->rsize, NULL);
1509 if (data->wsize) 1584 if (data->wsize)
@@ -1921,3 +1996,7 @@ void nfs_fs_proc_exit(void)
1921} 1996}
1922 1997
1923#endif /* CONFIG_PROC_FS */ 1998#endif /* CONFIG_PROC_FS */
1999
2000module_param(nfs4_disable_idmapping, bool, 0644);
2001MODULE_PARM_DESC(nfs4_disable_idmapping,
2002 "Turn off NFSv4 idmapping when using 'sec=sys'");
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 2c3eb33b904d..7237672216c8 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -44,6 +44,7 @@
44/* #define NFS_DEBUG_VERBOSE 1 */ 44/* #define NFS_DEBUG_VERBOSE 1 */
45 45
46static int nfs_opendir(struct inode *, struct file *); 46static int nfs_opendir(struct inode *, struct file *);
47static int nfs_closedir(struct inode *, struct file *);
47static int nfs_readdir(struct file *, void *, filldir_t); 48static int nfs_readdir(struct file *, void *, filldir_t);
48static struct dentry *nfs_lookup(struct inode *, struct dentry *, struct nameidata *); 49static struct dentry *nfs_lookup(struct inode *, struct dentry *, struct nameidata *);
49static int nfs_create(struct inode *, struct dentry *, int, struct nameidata *); 50static int nfs_create(struct inode *, struct dentry *, int, struct nameidata *);
@@ -64,7 +65,7 @@ const struct file_operations nfs_dir_operations = {
64 .read = generic_read_dir, 65 .read = generic_read_dir,
65 .readdir = nfs_readdir, 66 .readdir = nfs_readdir,
66 .open = nfs_opendir, 67 .open = nfs_opendir,
67 .release = nfs_release, 68 .release = nfs_closedir,
68 .fsync = nfs_fsync_dir, 69 .fsync = nfs_fsync_dir,
69}; 70};
70 71
@@ -133,13 +134,35 @@ const struct inode_operations nfs4_dir_inode_operations = {
133 134
134#endif /* CONFIG_NFS_V4 */ 135#endif /* CONFIG_NFS_V4 */
135 136
137static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct rpc_cred *cred)
138{
139 struct nfs_open_dir_context *ctx;
140 ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
141 if (ctx != NULL) {
142 ctx->duped = 0;
143 ctx->dir_cookie = 0;
144 ctx->dup_cookie = 0;
145 ctx->cred = get_rpccred(cred);
146 } else
147 ctx = ERR_PTR(-ENOMEM);
148 return ctx;
149}
150
151static void put_nfs_open_dir_context(struct nfs_open_dir_context *ctx)
152{
153 put_rpccred(ctx->cred);
154 kfree(ctx);
155}
156
136/* 157/*
137 * Open file 158 * Open file
138 */ 159 */
139static int 160static int
140nfs_opendir(struct inode *inode, struct file *filp) 161nfs_opendir(struct inode *inode, struct file *filp)
141{ 162{
142 int res; 163 int res = 0;
164 struct nfs_open_dir_context *ctx;
165 struct rpc_cred *cred;
143 166
144 dfprintk(FILE, "NFS: open dir(%s/%s)\n", 167 dfprintk(FILE, "NFS: open dir(%s/%s)\n",
145 filp->f_path.dentry->d_parent->d_name.name, 168 filp->f_path.dentry->d_parent->d_name.name,
@@ -147,8 +170,15 @@ nfs_opendir(struct inode *inode, struct file *filp)
147 170
148 nfs_inc_stats(inode, NFSIOS_VFSOPEN); 171 nfs_inc_stats(inode, NFSIOS_VFSOPEN);
149 172
150 /* Call generic open code in order to cache credentials */ 173 cred = rpc_lookup_cred();
151 res = nfs_open(inode, filp); 174 if (IS_ERR(cred))
175 return PTR_ERR(cred);
176 ctx = alloc_nfs_open_dir_context(cred);
177 if (IS_ERR(ctx)) {
178 res = PTR_ERR(ctx);
179 goto out;
180 }
181 filp->private_data = ctx;
152 if (filp->f_path.dentry == filp->f_path.mnt->mnt_root) { 182 if (filp->f_path.dentry == filp->f_path.mnt->mnt_root) {
153 /* This is a mountpoint, so d_revalidate will never 183 /* This is a mountpoint, so d_revalidate will never
154 * have been called, so we need to refresh the 184 * have been called, so we need to refresh the
@@ -156,9 +186,18 @@ nfs_opendir(struct inode *inode, struct file *filp)
156 */ 186 */
157 __nfs_revalidate_inode(NFS_SERVER(inode), inode); 187 __nfs_revalidate_inode(NFS_SERVER(inode), inode);
158 } 188 }
189out:
190 put_rpccred(cred);
159 return res; 191 return res;
160} 192}
161 193
194static int
195nfs_closedir(struct inode *inode, struct file *filp)
196{
197 put_nfs_open_dir_context(filp->private_data);
198 return 0;
199}
200
162struct nfs_cache_array_entry { 201struct nfs_cache_array_entry {
163 u64 cookie; 202 u64 cookie;
164 u64 ino; 203 u64 ino;
@@ -284,19 +323,20 @@ int nfs_readdir_search_for_pos(struct nfs_cache_array *array, nfs_readdir_descri
284{ 323{
285 loff_t diff = desc->file->f_pos - desc->current_index; 324 loff_t diff = desc->file->f_pos - desc->current_index;
286 unsigned int index; 325 unsigned int index;
326 struct nfs_open_dir_context *ctx = desc->file->private_data;
287 327
288 if (diff < 0) 328 if (diff < 0)
289 goto out_eof; 329 goto out_eof;
290 if (diff >= array->size) { 330 if (diff >= array->size) {
291 if (array->eof_index >= 0) 331 if (array->eof_index >= 0)
292 goto out_eof; 332 goto out_eof;
293 desc->current_index += array->size;
294 return -EAGAIN; 333 return -EAGAIN;
295 } 334 }
296 335
297 index = (unsigned int)diff; 336 index = (unsigned int)diff;
298 *desc->dir_cookie = array->array[index].cookie; 337 *desc->dir_cookie = array->array[index].cookie;
299 desc->cache_entry_index = index; 338 desc->cache_entry_index = index;
339 ctx->duped = 0;
300 return 0; 340 return 0;
301out_eof: 341out_eof:
302 desc->eof = 1; 342 desc->eof = 1;
@@ -307,10 +347,18 @@ static
307int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_descriptor_t *desc) 347int nfs_readdir_search_for_cookie(struct nfs_cache_array *array, nfs_readdir_descriptor_t *desc)
308{ 348{
309 int i; 349 int i;
350 loff_t new_pos;
310 int status = -EAGAIN; 351 int status = -EAGAIN;
352 struct nfs_open_dir_context *ctx = desc->file->private_data;
311 353
312 for (i = 0; i < array->size; i++) { 354 for (i = 0; i < array->size; i++) {
313 if (array->array[i].cookie == *desc->dir_cookie) { 355 if (array->array[i].cookie == *desc->dir_cookie) {
356 new_pos = desc->current_index + i;
357 if (new_pos < desc->file->f_pos) {
358 ctx->dup_cookie = *desc->dir_cookie;
359 ctx->duped = 1;
360 }
361 desc->file->f_pos = new_pos;
314 desc->cache_entry_index = i; 362 desc->cache_entry_index = i;
315 return 0; 363 return 0;
316 } 364 }
@@ -342,6 +390,7 @@ int nfs_readdir_search_array(nfs_readdir_descriptor_t *desc)
342 390
343 if (status == -EAGAIN) { 391 if (status == -EAGAIN) {
344 desc->last_cookie = array->last_cookie; 392 desc->last_cookie = array->last_cookie;
393 desc->current_index += array->size;
345 desc->page_index++; 394 desc->page_index++;
346 } 395 }
347 nfs_readdir_release_array(desc->page); 396 nfs_readdir_release_array(desc->page);
@@ -354,7 +403,8 @@ static
354int nfs_readdir_xdr_filler(struct page **pages, nfs_readdir_descriptor_t *desc, 403int nfs_readdir_xdr_filler(struct page **pages, nfs_readdir_descriptor_t *desc,
355 struct nfs_entry *entry, struct file *file, struct inode *inode) 404 struct nfs_entry *entry, struct file *file, struct inode *inode)
356{ 405{
357 struct rpc_cred *cred = nfs_file_cred(file); 406 struct nfs_open_dir_context *ctx = file->private_data;
407 struct rpc_cred *cred = ctx->cred;
358 unsigned long timestamp, gencount; 408 unsigned long timestamp, gencount;
359 int error; 409 int error;
360 410
@@ -693,6 +743,20 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc, void *dirent,
693 int i = 0; 743 int i = 0;
694 int res = 0; 744 int res = 0;
695 struct nfs_cache_array *array = NULL; 745 struct nfs_cache_array *array = NULL;
746 struct nfs_open_dir_context *ctx = file->private_data;
747
748 if (ctx->duped != 0 && ctx->dup_cookie == *desc->dir_cookie) {
749 if (printk_ratelimit()) {
750 pr_notice("NFS: directory %s/%s contains a readdir loop. "
751 "Please contact your server vendor. "
752 "Offending cookie: %llu\n",
753 file->f_dentry->d_parent->d_name.name,
754 file->f_dentry->d_name.name,
755 *desc->dir_cookie);
756 }
757 res = -ELOOP;
758 goto out;
759 }
696 760
697 array = nfs_readdir_get_array(desc->page); 761 array = nfs_readdir_get_array(desc->page);
698 if (IS_ERR(array)) { 762 if (IS_ERR(array)) {
@@ -785,6 +849,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
785 struct inode *inode = dentry->d_inode; 849 struct inode *inode = dentry->d_inode;
786 nfs_readdir_descriptor_t my_desc, 850 nfs_readdir_descriptor_t my_desc,
787 *desc = &my_desc; 851 *desc = &my_desc;
852 struct nfs_open_dir_context *dir_ctx = filp->private_data;
788 int res; 853 int res;
789 854
790 dfprintk(FILE, "NFS: readdir(%s/%s) starting at cookie %llu\n", 855 dfprintk(FILE, "NFS: readdir(%s/%s) starting at cookie %llu\n",
@@ -801,7 +866,7 @@ static int nfs_readdir(struct file *filp, void *dirent, filldir_t filldir)
801 memset(desc, 0, sizeof(*desc)); 866 memset(desc, 0, sizeof(*desc));
802 867
803 desc->file = filp; 868 desc->file = filp;
804 desc->dir_cookie = &nfs_file_open_context(filp)->dir_cookie; 869 desc->dir_cookie = &dir_ctx->dir_cookie;
805 desc->decode = NFS_PROTO(inode)->decode_dirent; 870 desc->decode = NFS_PROTO(inode)->decode_dirent;
806 desc->plus = NFS_USE_READDIRPLUS(inode); 871 desc->plus = NFS_USE_READDIRPLUS(inode);
807 872
@@ -853,6 +918,7 @@ static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin)
853{ 918{
854 struct dentry *dentry = filp->f_path.dentry; 919 struct dentry *dentry = filp->f_path.dentry;
855 struct inode *inode = dentry->d_inode; 920 struct inode *inode = dentry->d_inode;
921 struct nfs_open_dir_context *dir_ctx = filp->private_data;
856 922
857 dfprintk(FILE, "NFS: llseek dir(%s/%s, %lld, %d)\n", 923 dfprintk(FILE, "NFS: llseek dir(%s/%s, %lld, %d)\n",
858 dentry->d_parent->d_name.name, 924 dentry->d_parent->d_name.name,
@@ -872,7 +938,8 @@ static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int origin)
872 } 938 }
873 if (offset != filp->f_pos) { 939 if (offset != filp->f_pos) {
874 filp->f_pos = offset; 940 filp->f_pos = offset;
875 nfs_file_open_context(filp)->dir_cookie = 0; 941 dir_ctx->dir_cookie = 0;
942 dir_ctx->duped = 0;
876 } 943 }
877out: 944out:
878 mutex_unlock(&inode->i_mutex); 945 mutex_unlock(&inode->i_mutex);
@@ -1068,7 +1135,7 @@ static int nfs_lookup_revalidate(struct dentry *dentry, struct nameidata *nd)
1068 if (fhandle == NULL || fattr == NULL) 1135 if (fhandle == NULL || fattr == NULL)
1069 goto out_error; 1136 goto out_error;
1070 1137
1071 error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr); 1138 error = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, &dentry->d_name, fhandle, fattr);
1072 if (error) 1139 if (error)
1073 goto out_bad; 1140 goto out_bad;
1074 if (nfs_compare_fh(NFS_FH(inode), fhandle)) 1141 if (nfs_compare_fh(NFS_FH(inode), fhandle))
@@ -1169,11 +1236,23 @@ static void nfs_dentry_iput(struct dentry *dentry, struct inode *inode)
1169 iput(inode); 1236 iput(inode);
1170} 1237}
1171 1238
1239static void nfs_d_release(struct dentry *dentry)
1240{
1241 /* free cached devname value, if it survived that far */
1242 if (unlikely(dentry->d_fsdata)) {
1243 if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
1244 WARN_ON(1);
1245 else
1246 kfree(dentry->d_fsdata);
1247 }
1248}
1249
1172const struct dentry_operations nfs_dentry_operations = { 1250const struct dentry_operations nfs_dentry_operations = {
1173 .d_revalidate = nfs_lookup_revalidate, 1251 .d_revalidate = nfs_lookup_revalidate,
1174 .d_delete = nfs_dentry_delete, 1252 .d_delete = nfs_dentry_delete,
1175 .d_iput = nfs_dentry_iput, 1253 .d_iput = nfs_dentry_iput,
1176 .d_automount = nfs_d_automount, 1254 .d_automount = nfs_d_automount,
1255 .d_release = nfs_d_release,
1177}; 1256};
1178 1257
1179static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd) 1258static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, struct nameidata *nd)
@@ -1212,7 +1291,7 @@ static struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, stru
1212 parent = dentry->d_parent; 1291 parent = dentry->d_parent;
1213 /* Protect against concurrent sillydeletes */ 1292 /* Protect against concurrent sillydeletes */
1214 nfs_block_sillyrename(parent); 1293 nfs_block_sillyrename(parent);
1215 error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr); 1294 error = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, &dentry->d_name, fhandle, fattr);
1216 if (error == -ENOENT) 1295 if (error == -ENOENT)
1217 goto no_entry; 1296 goto no_entry;
1218 if (error < 0) { 1297 if (error < 0) {
@@ -1248,6 +1327,7 @@ const struct dentry_operations nfs4_dentry_operations = {
1248 .d_delete = nfs_dentry_delete, 1327 .d_delete = nfs_dentry_delete,
1249 .d_iput = nfs_dentry_iput, 1328 .d_iput = nfs_dentry_iput,
1250 .d_automount = nfs_d_automount, 1329 .d_automount = nfs_d_automount,
1330 .d_release = nfs_d_release,
1251}; 1331};
1252 1332
1253/* 1333/*
@@ -1549,7 +1629,7 @@ int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle,
1549 if (dentry->d_inode) 1629 if (dentry->d_inode)
1550 goto out; 1630 goto out;
1551 if (fhandle->size == 0) { 1631 if (fhandle->size == 0) {
1552 error = NFS_PROTO(dir)->lookup(dir, &dentry->d_name, fhandle, fattr); 1632 error = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, &dentry->d_name, fhandle, fattr);
1553 if (error) 1633 if (error)
1554 goto out_error; 1634 goto out_error;
1555 } 1635 }
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 9943a75bb6d1..8eea25366717 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -45,6 +45,7 @@
45#include <linux/pagemap.h> 45#include <linux/pagemap.h>
46#include <linux/kref.h> 46#include <linux/kref.h>
47#include <linux/slab.h> 47#include <linux/slab.h>
48#include <linux/task_io_accounting_ops.h>
48 49
49#include <linux/nfs_fs.h> 50#include <linux/nfs_fs.h>
50#include <linux/nfs_page.h> 51#include <linux/nfs_page.h>
@@ -649,8 +650,7 @@ static void nfs_direct_write_result(struct rpc_task *task, void *calldata)
649{ 650{
650 struct nfs_write_data *data = calldata; 651 struct nfs_write_data *data = calldata;
651 652
652 if (nfs_writeback_done(task, data) != 0) 653 nfs_writeback_done(task, data);
653 return;
654} 654}
655 655
656/* 656/*
@@ -938,6 +938,8 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov,
938 if (retval) 938 if (retval)
939 goto out; 939 goto out;
940 940
941 task_io_account_read(count);
942
941 retval = nfs_direct_read(iocb, iov, nr_segs, pos); 943 retval = nfs_direct_read(iocb, iov, nr_segs, pos);
942 if (retval > 0) 944 if (retval > 0)
943 iocb->ki_pos = pos + retval; 945 iocb->ki_pos = pos + retval;
@@ -999,6 +1001,8 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov,
999 if (retval) 1001 if (retval)
1000 goto out; 1002 goto out;
1001 1003
1004 task_io_account_write(count);
1005
1002 retval = nfs_direct_write(iocb, iov, nr_segs, pos, count); 1006 retval = nfs_direct_write(iocb, iov, nr_segs, pos, count);
1003 1007
1004 if (retval > 0) 1008 if (retval > 0)
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 7bf029ef4084..2f093ed16980 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -301,7 +301,7 @@ nfs_file_mmap(struct file * file, struct vm_area_struct * vma)
301 * disk, but it retrieves and clears ctx->error after synching, despite 301 * disk, but it retrieves and clears ctx->error after synching, despite
302 * the two being set at the same time in nfs_context_set_write_error(). 302 * the two being set at the same time in nfs_context_set_write_error().
303 * This is because the former is used to notify the _next_ call to 303 * This is because the former is used to notify the _next_ call to
304 * nfs_file_write() that a write error occured, and hence cause it to 304 * nfs_file_write() that a write error occurred, and hence cause it to
305 * fall back to doing a synchronous write. 305 * fall back to doing a synchronous write.
306 */ 306 */
307static int 307static int
@@ -326,6 +326,9 @@ nfs_file_fsync(struct file *file, int datasync)
326 ret = xchg(&ctx->error, 0); 326 ret = xchg(&ctx->error, 0);
327 if (!ret && status < 0) 327 if (!ret && status < 0)
328 ret = status; 328 ret = status;
329 if (!ret && !datasync)
330 /* application has asked for meta-data sync */
331 ret = pnfs_layoutcommit_inode(inode, true);
329 return ret; 332 return ret;
330} 333}
331 334
@@ -387,10 +390,6 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping,
387 file->f_path.dentry->d_name.name, 390 file->f_path.dentry->d_name.name,
388 mapping->host->i_ino, len, (long long) pos); 391 mapping->host->i_ino, len, (long long) pos);
389 392
390 pnfs_update_layout(mapping->host,
391 nfs_file_open_context(file),
392 IOMODE_RW);
393
394start: 393start:
395 /* 394 /*
396 * Prevent starvation issues if someone is doing a consistency 395 * Prevent starvation issues if someone is doing a consistency
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index b5ffe8fa291f..dcb61548887f 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -75,18 +75,25 @@ static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *i
75/* 75/*
76 * get an NFS2/NFS3 root dentry from the root filehandle 76 * get an NFS2/NFS3 root dentry from the root filehandle
77 */ 77 */
78struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh) 78struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh,
79 const char *devname)
79{ 80{
80 struct nfs_server *server = NFS_SB(sb); 81 struct nfs_server *server = NFS_SB(sb);
81 struct nfs_fsinfo fsinfo; 82 struct nfs_fsinfo fsinfo;
82 struct dentry *ret; 83 struct dentry *ret;
83 struct inode *inode; 84 struct inode *inode;
85 void *name = kstrdup(devname, GFP_KERNEL);
84 int error; 86 int error;
85 87
88 if (!name)
89 return ERR_PTR(-ENOMEM);
90
86 /* get the actual root for this mount */ 91 /* get the actual root for this mount */
87 fsinfo.fattr = nfs_alloc_fattr(); 92 fsinfo.fattr = nfs_alloc_fattr();
88 if (fsinfo.fattr == NULL) 93 if (fsinfo.fattr == NULL) {
94 kfree(name);
89 return ERR_PTR(-ENOMEM); 95 return ERR_PTR(-ENOMEM);
96 }
90 97
91 error = server->nfs_client->rpc_ops->getroot(server, mntfh, &fsinfo); 98 error = server->nfs_client->rpc_ops->getroot(server, mntfh, &fsinfo);
92 if (error < 0) { 99 if (error < 0) {
@@ -119,7 +126,15 @@ struct dentry *nfs_get_root(struct super_block *sb, struct nfs_fh *mntfh)
119 } 126 }
120 127
121 security_d_instantiate(ret, inode); 128 security_d_instantiate(ret, inode);
129 spin_lock(&ret->d_lock);
130 if (IS_ROOT(ret) && !(ret->d_flags & DCACHE_NFSFS_RENAMED)) {
131 ret->d_fsdata = name;
132 name = NULL;
133 }
134 spin_unlock(&ret->d_lock);
122out: 135out:
136 if (name)
137 kfree(name);
123 nfs_free_fattr(fsinfo.fattr); 138 nfs_free_fattr(fsinfo.fattr);
124 return ret; 139 return ret;
125} 140}
@@ -169,27 +184,35 @@ out:
169/* 184/*
170 * get an NFS4 root dentry from the root filehandle 185 * get an NFS4 root dentry from the root filehandle
171 */ 186 */
172struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh) 187struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh,
188 const char *devname)
173{ 189{
174 struct nfs_server *server = NFS_SB(sb); 190 struct nfs_server *server = NFS_SB(sb);
175 struct nfs_fattr *fattr = NULL; 191 struct nfs_fattr *fattr = NULL;
176 struct dentry *ret; 192 struct dentry *ret;
177 struct inode *inode; 193 struct inode *inode;
194 void *name = kstrdup(devname, GFP_KERNEL);
178 int error; 195 int error;
179 196
180 dprintk("--> nfs4_get_root()\n"); 197 dprintk("--> nfs4_get_root()\n");
181 198
199 if (!name)
200 return ERR_PTR(-ENOMEM);
201
182 /* get the info about the server and filesystem */ 202 /* get the info about the server and filesystem */
183 error = nfs4_server_capabilities(server, mntfh); 203 error = nfs4_server_capabilities(server, mntfh);
184 if (error < 0) { 204 if (error < 0) {
185 dprintk("nfs_get_root: getcaps error = %d\n", 205 dprintk("nfs_get_root: getcaps error = %d\n",
186 -error); 206 -error);
207 kfree(name);
187 return ERR_PTR(error); 208 return ERR_PTR(error);
188 } 209 }
189 210
190 fattr = nfs_alloc_fattr(); 211 fattr = nfs_alloc_fattr();
191 if (fattr == NULL) 212 if (fattr == NULL) {
192 return ERR_PTR(-ENOMEM);; 213 kfree(name);
214 return ERR_PTR(-ENOMEM);
215 }
193 216
194 /* get the actual root for this mount */ 217 /* get the actual root for this mount */
195 error = server->nfs_client->rpc_ops->getattr(server, mntfh, fattr); 218 error = server->nfs_client->rpc_ops->getattr(server, mntfh, fattr);
@@ -199,6 +222,10 @@ struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh)
199 goto out; 222 goto out;
200 } 223 }
201 224
225 if (fattr->valid & NFS_ATTR_FATTR_FSID &&
226 !nfs_fsid_equal(&server->fsid, &fattr->fsid))
227 memcpy(&server->fsid, &fattr->fsid, sizeof(server->fsid));
228
202 inode = nfs_fhget(sb, mntfh, fattr); 229 inode = nfs_fhget(sb, mntfh, fattr);
203 if (IS_ERR(inode)) { 230 if (IS_ERR(inode)) {
204 dprintk("nfs_get_root: get root inode failed\n"); 231 dprintk("nfs_get_root: get root inode failed\n");
@@ -223,8 +250,15 @@ struct dentry *nfs4_get_root(struct super_block *sb, struct nfs_fh *mntfh)
223 } 250 }
224 251
225 security_d_instantiate(ret, inode); 252 security_d_instantiate(ret, inode);
226 253 spin_lock(&ret->d_lock);
254 if (IS_ROOT(ret) && !(ret->d_flags & DCACHE_NFSFS_RENAMED)) {
255 ret->d_fsdata = name;
256 name = NULL;
257 }
258 spin_unlock(&ret->d_lock);
227out: 259out:
260 if (name)
261 kfree(name);
228 nfs_free_fattr(fattr); 262 nfs_free_fattr(fattr);
229 dprintk("<-- nfs4_get_root()\n"); 263 dprintk("<-- nfs4_get_root()\n");
230 return ret; 264 return ret;
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index 18696882f1c6..79664a1025af 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -33,16 +33,41 @@
33 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 33 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
34 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
35 */ 35 */
36#include <linux/types.h>
37#include <linux/string.h>
38#include <linux/kernel.h>
39
40static int nfs_map_string_to_numeric(const char *name, size_t namelen, __u32 *res)
41{
42 unsigned long val;
43 char buf[16];
44
45 if (memchr(name, '@', namelen) != NULL || namelen >= sizeof(buf))
46 return 0;
47 memcpy(buf, name, namelen);
48 buf[namelen] = '\0';
49 if (strict_strtoul(buf, 0, &val) != 0)
50 return 0;
51 *res = val;
52 return 1;
53}
54
55static int nfs_map_numeric_to_string(__u32 id, char *buf, size_t buflen)
56{
57 return snprintf(buf, buflen, "%u", id);
58}
36 59
37#ifdef CONFIG_NFS_USE_NEW_IDMAPPER 60#ifdef CONFIG_NFS_USE_NEW_IDMAPPER
38 61
39#include <linux/slab.h> 62#include <linux/slab.h>
40#include <linux/cred.h> 63#include <linux/cred.h>
64#include <linux/sunrpc/sched.h>
65#include <linux/nfs4.h>
66#include <linux/nfs_fs_sb.h>
41#include <linux/nfs_idmap.h> 67#include <linux/nfs_idmap.h>
42#include <linux/keyctl.h> 68#include <linux/keyctl.h>
43#include <linux/key-type.h> 69#include <linux/key-type.h>
44#include <linux/rcupdate.h> 70#include <linux/rcupdate.h>
45#include <linux/kernel.h>
46#include <linux/err.h> 71#include <linux/err.h>
47 72
48#include <keys/user-type.h> 73#include <keys/user-type.h>
@@ -219,23 +244,39 @@ static int nfs_idmap_lookup_id(const char *name, size_t namelen,
219 return ret; 244 return ret;
220} 245}
221 246
222int nfs_map_name_to_uid(struct nfs_client *clp, const char *name, size_t namelen, __u32 *uid) 247int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid)
223{ 248{
249 if (nfs_map_string_to_numeric(name, namelen, uid))
250 return 0;
224 return nfs_idmap_lookup_id(name, namelen, "uid", uid); 251 return nfs_idmap_lookup_id(name, namelen, "uid", uid);
225} 252}
226 253
227int nfs_map_group_to_gid(struct nfs_client *clp, const char *name, size_t namelen, __u32 *gid) 254int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *gid)
228{ 255{
256 if (nfs_map_string_to_numeric(name, namelen, gid))
257 return 0;
229 return nfs_idmap_lookup_id(name, namelen, "gid", gid); 258 return nfs_idmap_lookup_id(name, namelen, "gid", gid);
230} 259}
231 260
232int nfs_map_uid_to_name(struct nfs_client *clp, __u32 uid, char *buf, size_t buflen) 261int nfs_map_uid_to_name(const struct nfs_server *server, __u32 uid, char *buf, size_t buflen)
233{ 262{
234 return nfs_idmap_lookup_name(uid, "user", buf, buflen); 263 int ret = -EINVAL;
264
265 if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
266 ret = nfs_idmap_lookup_name(uid, "user", buf, buflen);
267 if (ret < 0)
268 ret = nfs_map_numeric_to_string(uid, buf, buflen);
269 return ret;
235} 270}
236int nfs_map_gid_to_group(struct nfs_client *clp, __u32 gid, char *buf, size_t buflen) 271int nfs_map_gid_to_group(const struct nfs_server *server, __u32 gid, char *buf, size_t buflen)
237{ 272{
238 return nfs_idmap_lookup_name(gid, "group", buf, buflen); 273 int ret = -EINVAL;
274
275 if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
276 ret = nfs_idmap_lookup_name(gid, "group", buf, buflen);
277 if (ret < 0)
278 ret = nfs_map_numeric_to_string(gid, buf, buflen);
279 return ret;
239} 280}
240 281
241#else /* CONFIG_NFS_USE_NEW_IDMAPPER not defined */ 282#else /* CONFIG_NFS_USE_NEW_IDMAPPER not defined */
@@ -243,7 +284,6 @@ int nfs_map_gid_to_group(struct nfs_client *clp, __u32 gid, char *buf, size_t bu
243#include <linux/module.h> 284#include <linux/module.h>
244#include <linux/mutex.h> 285#include <linux/mutex.h>
245#include <linux/init.h> 286#include <linux/init.h>
246#include <linux/types.h>
247#include <linux/slab.h> 287#include <linux/slab.h>
248#include <linux/socket.h> 288#include <linux/socket.h>
249#include <linux/in.h> 289#include <linux/in.h>
@@ -695,31 +735,45 @@ static unsigned int fnvhash32(const void *buf, size_t buflen)
695 return hash; 735 return hash;
696} 736}
697 737
698int nfs_map_name_to_uid(struct nfs_client *clp, const char *name, size_t namelen, __u32 *uid) 738int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid)
699{ 739{
700 struct idmap *idmap = clp->cl_idmap; 740 struct idmap *idmap = server->nfs_client->cl_idmap;
701 741
742 if (nfs_map_string_to_numeric(name, namelen, uid))
743 return 0;
702 return nfs_idmap_id(idmap, &idmap->idmap_user_hash, name, namelen, uid); 744 return nfs_idmap_id(idmap, &idmap->idmap_user_hash, name, namelen, uid);
703} 745}
704 746
705int nfs_map_group_to_gid(struct nfs_client *clp, const char *name, size_t namelen, __u32 *uid) 747int nfs_map_group_to_gid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid)
706{ 748{
707 struct idmap *idmap = clp->cl_idmap; 749 struct idmap *idmap = server->nfs_client->cl_idmap;
708 750
751 if (nfs_map_string_to_numeric(name, namelen, uid))
752 return 0;
709 return nfs_idmap_id(idmap, &idmap->idmap_group_hash, name, namelen, uid); 753 return nfs_idmap_id(idmap, &idmap->idmap_group_hash, name, namelen, uid);
710} 754}
711 755
712int nfs_map_uid_to_name(struct nfs_client *clp, __u32 uid, char *buf, size_t buflen) 756int nfs_map_uid_to_name(const struct nfs_server *server, __u32 uid, char *buf, size_t buflen)
713{ 757{
714 struct idmap *idmap = clp->cl_idmap; 758 struct idmap *idmap = server->nfs_client->cl_idmap;
759 int ret = -EINVAL;
715 760
716 return nfs_idmap_name(idmap, &idmap->idmap_user_hash, uid, buf); 761 if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
762 ret = nfs_idmap_name(idmap, &idmap->idmap_user_hash, uid, buf);
763 if (ret < 0)
764 ret = nfs_map_numeric_to_string(uid, buf, buflen);
765 return ret;
717} 766}
718int nfs_map_gid_to_group(struct nfs_client *clp, __u32 uid, char *buf, size_t buflen) 767int nfs_map_gid_to_group(const struct nfs_server *server, __u32 uid, char *buf, size_t buflen)
719{ 768{
720 struct idmap *idmap = clp->cl_idmap; 769 struct idmap *idmap = server->nfs_client->cl_idmap;
770 int ret = -EINVAL;
721 771
722 return nfs_idmap_name(idmap, &idmap->idmap_group_hash, uid, buf); 772 if (!(server->caps & NFS_CAP_UIDGID_NOMAP))
773 ret = nfs_idmap_name(idmap, &idmap->idmap_group_hash, uid, buf);
774 if (ret < 0)
775 ret = nfs_map_numeric_to_string(uid, buf, buflen);
776 return ret;
723} 777}
724 778
725#endif /* CONFIG_NFS_USE_NEW_IDMAPPER */ 779#endif /* CONFIG_NFS_USE_NEW_IDMAPPER */
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 1cc600e77bb4..57bb31ad7a5e 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -37,6 +37,7 @@
37#include <linux/inet.h> 37#include <linux/inet.h>
38#include <linux/nfs_xdr.h> 38#include <linux/nfs_xdr.h>
39#include <linux/slab.h> 39#include <linux/slab.h>
40#include <linux/compat.h>
40 41
41#include <asm/system.h> 42#include <asm/system.h>
42#include <asm/uaccess.h> 43#include <asm/uaccess.h>
@@ -89,7 +90,11 @@ int nfs_wait_bit_killable(void *word)
89 */ 90 */
90u64 nfs_compat_user_ino64(u64 fileid) 91u64 nfs_compat_user_ino64(u64 fileid)
91{ 92{
92 int ino; 93#ifdef CONFIG_COMPAT
94 compat_ulong_t ino;
95#else
96 unsigned long ino;
97#endif
93 98
94 if (enable_ino64) 99 if (enable_ino64)
95 return fileid; 100 return fileid;
@@ -249,7 +254,9 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
249 struct inode *inode = ERR_PTR(-ENOENT); 254 struct inode *inode = ERR_PTR(-ENOENT);
250 unsigned long hash; 255 unsigned long hash;
251 256
252 if ((fattr->valid & NFS_ATTR_FATTR_FILEID) == 0) 257 nfs_attr_check_mountpoint(sb, fattr);
258
259 if ((fattr->valid & NFS_ATTR_FATTR_FILEID) == 0 && (fattr->valid & NFS_ATTR_FATTR_MOUNTPOINT) == 0)
253 goto out_no_inode; 260 goto out_no_inode;
254 if ((fattr->valid & NFS_ATTR_FATTR_TYPE) == 0) 261 if ((fattr->valid & NFS_ATTR_FATTR_TYPE) == 0)
255 goto out_no_inode; 262 goto out_no_inode;
@@ -293,8 +300,8 @@ nfs_fhget(struct super_block *sb, struct nfs_fh *fh, struct nfs_fattr *fattr)
293 if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS)) 300 if (nfs_server_capable(inode, NFS_CAP_READDIRPLUS))
294 set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags); 301 set_bit(NFS_INO_ADVISE_RDPLUS, &NFS_I(inode)->flags);
295 /* Deal with crossing mountpoints */ 302 /* Deal with crossing mountpoints */
296 if ((fattr->valid & NFS_ATTR_FATTR_FSID) 303 if (fattr->valid & NFS_ATTR_FATTR_MOUNTPOINT ||
297 && !nfs_fsid_equal(&NFS_SB(sb)->fsid, &fattr->fsid)) { 304 fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) {
298 if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) 305 if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL)
299 inode->i_op = &nfs_referral_inode_operations; 306 inode->i_op = &nfs_referral_inode_operations;
300 else 307 else
@@ -634,7 +641,6 @@ struct nfs_open_context *alloc_nfs_open_context(struct path *path, struct rpc_cr
634 ctx->mode = f_mode; 641 ctx->mode = f_mode;
635 ctx->flags = 0; 642 ctx->flags = 0;
636 ctx->error = 0; 643 ctx->error = 0;
637 ctx->dir_cookie = 0;
638 nfs_init_lock_context(&ctx->lock_context); 644 nfs_init_lock_context(&ctx->lock_context);
639 ctx->lock_context.open_context = ctx; 645 ctx->lock_context.open_context = ctx;
640 INIT_LIST_HEAD(&ctx->list); 646 INIT_LIST_HEAD(&ctx->list);
@@ -1466,6 +1472,7 @@ static inline void nfs4_init_once(struct nfs_inode *nfsi)
1466 nfsi->delegation_state = 0; 1472 nfsi->delegation_state = 0;
1467 init_rwsem(&nfsi->rwsem); 1473 init_rwsem(&nfsi->rwsem);
1468 nfsi->layout = NULL; 1474 nfsi->layout = NULL;
1475 atomic_set(&nfsi->commits_outstanding, 0);
1469#endif 1476#endif
1470} 1477}
1471 1478
@@ -1513,7 +1520,7 @@ static int nfsiod_start(void)
1513{ 1520{
1514 struct workqueue_struct *wq; 1521 struct workqueue_struct *wq;
1515 dprintk("RPC: creating workqueue nfsiod\n"); 1522 dprintk("RPC: creating workqueue nfsiod\n");
1516 wq = alloc_workqueue("nfsiod", WQ_RESCUER, 0); 1523 wq = alloc_workqueue("nfsiod", WQ_MEM_RECLAIM, 0);
1517 if (wq == NULL) 1524 if (wq == NULL)
1518 return -ENOMEM; 1525 return -ENOMEM;
1519 nfsiod_workqueue = wq; 1526 nfsiod_workqueue = wq;
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index cf9fdbdabc67..ce118ce885dd 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -39,6 +39,12 @@ static inline int nfs4_has_persistent_session(const struct nfs_client *clp)
39 return 0; 39 return 0;
40} 40}
41 41
42static inline void nfs_attr_check_mountpoint(struct super_block *parent, struct nfs_fattr *fattr)
43{
44 if (!nfs_fsid_equal(&NFS_SB(parent)->fsid, &fattr->fsid))
45 fattr->valid |= NFS_ATTR_FATTR_MOUNTPOINT;
46}
47
42struct nfs_clone_mount { 48struct nfs_clone_mount {
43 const struct super_block *sb; 49 const struct super_block *sb;
44 const struct dentry *dentry; 50 const struct dentry *dentry;
@@ -148,6 +154,9 @@ extern struct nfs_server *nfs_clone_server(struct nfs_server *,
148 struct nfs_fattr *); 154 struct nfs_fattr *);
149extern void nfs_mark_client_ready(struct nfs_client *clp, int state); 155extern void nfs_mark_client_ready(struct nfs_client *clp, int state);
150extern int nfs4_check_client_ready(struct nfs_client *clp); 156extern int nfs4_check_client_ready(struct nfs_client *clp);
157extern struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp,
158 const struct sockaddr *ds_addr,
159 int ds_addrlen, int ds_proto);
151#ifdef CONFIG_PROC_FS 160#ifdef CONFIG_PROC_FS
152extern int __init nfs_fs_proc_init(void); 161extern int __init nfs_fs_proc_init(void);
153extern void nfs_fs_proc_exit(void); 162extern void nfs_fs_proc_exit(void);
@@ -163,10 +172,10 @@ static inline void nfs_fs_proc_exit(void)
163 172
164/* nfs4namespace.c */ 173/* nfs4namespace.c */
165#ifdef CONFIG_NFS_V4 174#ifdef CONFIG_NFS_V4
166extern struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentry *dentry); 175extern struct vfsmount *nfs_do_refmount(struct dentry *dentry);
167#else 176#else
168static inline 177static inline
169struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentry *dentry) 178struct vfsmount *nfs_do_refmount(struct dentry *dentry)
170{ 179{
171 return ERR_PTR(-ENOENT); 180 return ERR_PTR(-ENOENT);
172} 181}
@@ -211,10 +220,17 @@ extern const u32 nfs41_maxwrite_overhead;
211/* nfs4proc.c */ 220/* nfs4proc.c */
212#ifdef CONFIG_NFS_V4 221#ifdef CONFIG_NFS_V4
213extern struct rpc_procinfo nfs4_procedures[]; 222extern struct rpc_procinfo nfs4_procedures[];
223void nfs_fixup_secinfo_attributes(struct nfs_fattr *, struct nfs_fh *);
214#endif 224#endif
215 225
226extern int nfs4_init_ds_session(struct nfs_client *clp);
227
216/* proc.c */ 228/* proc.c */
217void nfs_close_context(struct nfs_open_context *ctx, int is_sync); 229void nfs_close_context(struct nfs_open_context *ctx, int is_sync);
230extern int nfs_init_client(struct nfs_client *clp,
231 const struct rpc_timeout *timeparms,
232 const char *ip_addr, rpc_authflavor_t authflavour,
233 int noresvport);
218 234
219/* dir.c */ 235/* dir.c */
220extern int nfs_access_cache_shrinker(struct shrinker *shrink, 236extern int nfs_access_cache_shrinker(struct shrinker *shrink,
@@ -247,25 +263,45 @@ extern void nfs_sb_active(struct super_block *sb);
247extern void nfs_sb_deactive(struct super_block *sb); 263extern void nfs_sb_deactive(struct super_block *sb);
248 264
249/* namespace.c */ 265/* namespace.c */
250extern char *nfs_path(const char *base, 266extern char *nfs_path(char **p, struct dentry *dentry,
251 const struct dentry *droot,
252 const struct dentry *dentry,
253 char *buffer, ssize_t buflen); 267 char *buffer, ssize_t buflen);
254extern struct vfsmount *nfs_d_automount(struct path *path); 268extern struct vfsmount *nfs_d_automount(struct path *path);
255 269
256/* getroot.c */ 270/* getroot.c */
257extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *); 271extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *,
272 const char *);
258#ifdef CONFIG_NFS_V4 273#ifdef CONFIG_NFS_V4
259extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *); 274extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *,
275 const char *);
260 276
261extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh); 277extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh);
262#endif 278#endif
263 279
264/* read.c */ 280/* read.c */
281extern int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,
282 const struct rpc_call_ops *call_ops);
265extern void nfs_read_prepare(struct rpc_task *task, void *calldata); 283extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
266 284
267/* write.c */ 285/* write.c */
286extern void nfs_commit_free(struct nfs_write_data *p);
287extern int nfs_initiate_write(struct nfs_write_data *data,
288 struct rpc_clnt *clnt,
289 const struct rpc_call_ops *call_ops,
290 int how);
268extern void nfs_write_prepare(struct rpc_task *task, void *calldata); 291extern void nfs_write_prepare(struct rpc_task *task, void *calldata);
292extern int nfs_initiate_commit(struct nfs_write_data *data,
293 struct rpc_clnt *clnt,
294 const struct rpc_call_ops *call_ops,
295 int how);
296extern void nfs_init_commit(struct nfs_write_data *data,
297 struct list_head *head,
298 struct pnfs_layout_segment *lseg);
299void nfs_retry_commit(struct list_head *page_list,
300 struct pnfs_layout_segment *lseg);
301void nfs_commit_clear_lock(struct nfs_inode *nfsi);
302void nfs_commitdata_release(void *data);
303void nfs_commit_release_pages(struct nfs_write_data *data);
304
269#ifdef CONFIG_MIGRATION 305#ifdef CONFIG_MIGRATION
270extern int nfs_migrate_page(struct address_space *, 306extern int nfs_migrate_page(struct address_space *,
271 struct page *, struct page *); 307 struct page *, struct page *);
@@ -274,12 +310,21 @@ extern int nfs_migrate_page(struct address_space *,
274#endif 310#endif
275 311
276/* nfs4proc.c */ 312/* nfs4proc.c */
277extern int _nfs4_call_sync(struct nfs_server *server, 313extern void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data);
314extern int nfs4_init_client(struct nfs_client *clp,
315 const struct rpc_timeout *timeparms,
316 const char *ip_addr,
317 rpc_authflavor_t authflavour,
318 int noresvport);
319extern void nfs4_reset_write(struct rpc_task *task, struct nfs_write_data *data);
320extern int _nfs4_call_sync(struct rpc_clnt *clnt,
321 struct nfs_server *server,
278 struct rpc_message *msg, 322 struct rpc_message *msg,
279 struct nfs4_sequence_args *args, 323 struct nfs4_sequence_args *args,
280 struct nfs4_sequence_res *res, 324 struct nfs4_sequence_res *res,
281 int cache_reply); 325 int cache_reply);
282extern int _nfs4_call_sync_session(struct nfs_server *server, 326extern int _nfs4_call_sync_session(struct rpc_clnt *clnt,
327 struct nfs_server *server,
283 struct rpc_message *msg, 328 struct rpc_message *msg,
284 struct nfs4_sequence_args *args, 329 struct nfs4_sequence_args *args,
285 struct nfs4_sequence_res *res, 330 struct nfs4_sequence_res *res,
@@ -288,12 +333,11 @@ extern int _nfs4_call_sync_session(struct nfs_server *server,
288/* 333/*
289 * Determine the device name as a string 334 * Determine the device name as a string
290 */ 335 */
291static inline char *nfs_devname(const struct vfsmount *mnt_parent, 336static inline char *nfs_devname(struct dentry *dentry,
292 const struct dentry *dentry,
293 char *buffer, ssize_t buflen) 337 char *buffer, ssize_t buflen)
294{ 338{
295 return nfs_path(mnt_parent->mnt_devname, mnt_parent->mnt_root, 339 char *dummy;
296 dentry, buffer, buflen); 340 return nfs_path(&dummy, dentry, buffer, buflen);
297} 341}
298 342
299/* 343/*
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index f32b8603dca8..1f063bacd285 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -15,6 +15,7 @@
15#include <linux/string.h> 15#include <linux/string.h>
16#include <linux/sunrpc/clnt.h> 16#include <linux/sunrpc/clnt.h>
17#include <linux/vfs.h> 17#include <linux/vfs.h>
18#include <linux/sunrpc/gss_api.h>
18#include "internal.h" 19#include "internal.h"
19 20
20#define NFSDBG_FACILITY NFSDBG_VFS 21#define NFSDBG_FACILITY NFSDBG_VFS
@@ -25,33 +26,31 @@ static LIST_HEAD(nfs_automount_list);
25static DECLARE_DELAYED_WORK(nfs_automount_task, nfs_expire_automounts); 26static DECLARE_DELAYED_WORK(nfs_automount_task, nfs_expire_automounts);
26int nfs_mountpoint_expiry_timeout = 500 * HZ; 27int nfs_mountpoint_expiry_timeout = 500 * HZ;
27 28
28static struct vfsmount *nfs_do_submount(const struct vfsmount *mnt_parent, 29static struct vfsmount *nfs_do_submount(struct dentry *dentry,
29 const struct dentry *dentry,
30 struct nfs_fh *fh, 30 struct nfs_fh *fh,
31 struct nfs_fattr *fattr); 31 struct nfs_fattr *fattr,
32 rpc_authflavor_t authflavor);
32 33
33/* 34/*
34 * nfs_path - reconstruct the path given an arbitrary dentry 35 * nfs_path - reconstruct the path given an arbitrary dentry
35 * @base - arbitrary string to prepend to the path 36 * @base - used to return pointer to the end of devname part of path
36 * @droot - pointer to root dentry for mountpoint
37 * @dentry - pointer to dentry 37 * @dentry - pointer to dentry
38 * @buffer - result buffer 38 * @buffer - result buffer
39 * @buflen - length of buffer 39 * @buflen - length of buffer
40 * 40 *
41 * Helper function for constructing the path from the 41 * Helper function for constructing the server pathname
42 * root dentry to an arbitrary hashed dentry. 42 * by arbitrary hashed dentry.
43 * 43 *
44 * This is mainly for use in figuring out the path on the 44 * This is mainly for use in figuring out the path on the
45 * server side when automounting on top of an existing partition. 45 * server side when automounting on top of an existing partition
46 * and in generating /proc/mounts and friends.
46 */ 47 */
47char *nfs_path(const char *base, 48char *nfs_path(char **p, struct dentry *dentry, char *buffer, ssize_t buflen)
48 const struct dentry *droot,
49 const struct dentry *dentry,
50 char *buffer, ssize_t buflen)
51{ 49{
52 char *end; 50 char *end;
53 int namelen; 51 int namelen;
54 unsigned seq; 52 unsigned seq;
53 const char *base;
55 54
56rename_retry: 55rename_retry:
57 end = buffer+buflen; 56 end = buffer+buflen;
@@ -60,7 +59,10 @@ rename_retry:
60 59
61 seq = read_seqbegin(&rename_lock); 60 seq = read_seqbegin(&rename_lock);
62 rcu_read_lock(); 61 rcu_read_lock();
63 while (!IS_ROOT(dentry) && dentry != droot) { 62 while (1) {
63 spin_lock(&dentry->d_lock);
64 if (IS_ROOT(dentry))
65 break;
64 namelen = dentry->d_name.len; 66 namelen = dentry->d_name.len;
65 buflen -= namelen + 1; 67 buflen -= namelen + 1;
66 if (buflen < 0) 68 if (buflen < 0)
@@ -68,27 +70,47 @@ rename_retry:
68 end -= namelen; 70 end -= namelen;
69 memcpy(end, dentry->d_name.name, namelen); 71 memcpy(end, dentry->d_name.name, namelen);
70 *--end = '/'; 72 *--end = '/';
73 spin_unlock(&dentry->d_lock);
71 dentry = dentry->d_parent; 74 dentry = dentry->d_parent;
72 } 75 }
73 rcu_read_unlock(); 76 if (read_seqretry(&rename_lock, seq)) {
74 if (read_seqretry(&rename_lock, seq)) 77 spin_unlock(&dentry->d_lock);
78 rcu_read_unlock();
75 goto rename_retry; 79 goto rename_retry;
80 }
76 if (*end != '/') { 81 if (*end != '/') {
77 if (--buflen < 0) 82 if (--buflen < 0) {
83 spin_unlock(&dentry->d_lock);
84 rcu_read_unlock();
78 goto Elong; 85 goto Elong;
86 }
79 *--end = '/'; 87 *--end = '/';
80 } 88 }
89 *p = end;
90 base = dentry->d_fsdata;
91 if (!base) {
92 spin_unlock(&dentry->d_lock);
93 rcu_read_unlock();
94 WARN_ON(1);
95 return end;
96 }
81 namelen = strlen(base); 97 namelen = strlen(base);
82 /* Strip off excess slashes in base string */ 98 /* Strip off excess slashes in base string */
83 while (namelen > 0 && base[namelen - 1] == '/') 99 while (namelen > 0 && base[namelen - 1] == '/')
84 namelen--; 100 namelen--;
85 buflen -= namelen; 101 buflen -= namelen;
86 if (buflen < 0) 102 if (buflen < 0) {
103 spin_unlock(&dentry->d_lock);
104 rcu_read_unlock();
87 goto Elong; 105 goto Elong;
106 }
88 end -= namelen; 107 end -= namelen;
89 memcpy(end, base, namelen); 108 memcpy(end, base, namelen);
109 spin_unlock(&dentry->d_lock);
110 rcu_read_unlock();
90 return end; 111 return end;
91Elong_unlock: 112Elong_unlock:
113 spin_unlock(&dentry->d_lock);
92 rcu_read_unlock(); 114 rcu_read_unlock();
93 if (read_seqretry(&rename_lock, seq)) 115 if (read_seqretry(&rename_lock, seq))
94 goto rename_retry; 116 goto rename_retry;
@@ -96,6 +118,99 @@ Elong:
96 return ERR_PTR(-ENAMETOOLONG); 118 return ERR_PTR(-ENAMETOOLONG);
97} 119}
98 120
121#ifdef CONFIG_NFS_V4
122static rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors)
123{
124 struct gss_api_mech *mech;
125 struct xdr_netobj oid;
126 int i;
127 rpc_authflavor_t pseudoflavor = RPC_AUTH_UNIX;
128
129 for (i = 0; i < flavors->num_flavors; i++) {
130 struct nfs4_secinfo_flavor *flavor;
131 flavor = &flavors->flavors[i];
132
133 if (flavor->flavor == RPC_AUTH_NULL || flavor->flavor == RPC_AUTH_UNIX) {
134 pseudoflavor = flavor->flavor;
135 break;
136 } else if (flavor->flavor == RPC_AUTH_GSS) {
137 oid.len = flavor->gss.sec_oid4.len;
138 oid.data = flavor->gss.sec_oid4.data;
139 mech = gss_mech_get_by_OID(&oid);
140 if (!mech)
141 continue;
142 pseudoflavor = gss_svc_to_pseudoflavor(mech, flavor->gss.service);
143 gss_mech_put(mech);
144 break;
145 }
146 }
147
148 return pseudoflavor;
149}
150
151static int nfs_negotiate_security(const struct dentry *parent,
152 const struct dentry *dentry,
153 rpc_authflavor_t *flavor)
154{
155 struct page *page;
156 struct nfs4_secinfo_flavors *flavors;
157 int (*secinfo)(struct inode *, const struct qstr *, struct nfs4_secinfo_flavors *);
158 int ret = -EPERM;
159
160 secinfo = NFS_PROTO(parent->d_inode)->secinfo;
161 if (secinfo != NULL) {
162 page = alloc_page(GFP_KERNEL);
163 if (!page) {
164 ret = -ENOMEM;
165 goto out;
166 }
167 flavors = page_address(page);
168 ret = secinfo(parent->d_inode, &dentry->d_name, flavors);
169 *flavor = nfs_find_best_sec(flavors);
170 put_page(page);
171 }
172
173out:
174 return ret;
175}
176
177static int nfs_lookup_with_sec(struct nfs_server *server, struct dentry *parent,
178 struct dentry *dentry, struct path *path,
179 struct nfs_fh *fh, struct nfs_fattr *fattr,
180 rpc_authflavor_t *flavor)
181{
182 struct rpc_clnt *clone;
183 struct rpc_auth *auth;
184 int err;
185
186 err = nfs_negotiate_security(parent, path->dentry, flavor);
187 if (err < 0)
188 goto out;
189 clone = rpc_clone_client(server->client);
190 auth = rpcauth_create(*flavor, clone);
191 if (!auth) {
192 err = -EIO;
193 goto out_shutdown;
194 }
195 err = server->nfs_client->rpc_ops->lookup(clone, parent->d_inode,
196 &path->dentry->d_name,
197 fh, fattr);
198out_shutdown:
199 rpc_shutdown_client(clone);
200out:
201 return err;
202}
203#else /* CONFIG_NFS_V4 */
204static inline int nfs_lookup_with_sec(struct nfs_server *server,
205 struct dentry *parent, struct dentry *dentry,
206 struct path *path, struct nfs_fh *fh,
207 struct nfs_fattr *fattr,
208 rpc_authflavor_t *flavor)
209{
210 return -EPERM;
211}
212#endif /* CONFIG_NFS_V4 */
213
99/* 214/*
100 * nfs_d_automount - Handle crossing a mountpoint on the server 215 * nfs_d_automount - Handle crossing a mountpoint on the server
101 * @path - The mountpoint 216 * @path - The mountpoint
@@ -116,6 +231,7 @@ struct vfsmount *nfs_d_automount(struct path *path)
116 struct nfs_fh *fh = NULL; 231 struct nfs_fh *fh = NULL;
117 struct nfs_fattr *fattr = NULL; 232 struct nfs_fattr *fattr = NULL;
118 int err; 233 int err;
234 rpc_authflavor_t flavor = RPC_AUTH_UNIX;
119 235
120 dprintk("--> nfs_d_automount()\n"); 236 dprintk("--> nfs_d_automount()\n");
121 237
@@ -133,9 +249,11 @@ struct vfsmount *nfs_d_automount(struct path *path)
133 249
134 /* Look it up again to get its attributes */ 250 /* Look it up again to get its attributes */
135 parent = dget_parent(path->dentry); 251 parent = dget_parent(path->dentry);
136 err = server->nfs_client->rpc_ops->lookup(parent->d_inode, 252 err = server->nfs_client->rpc_ops->lookup(server->client, parent->d_inode,
137 &path->dentry->d_name, 253 &path->dentry->d_name,
138 fh, fattr); 254 fh, fattr);
255 if (err == -EPERM && NFS_PROTO(parent->d_inode)->secinfo != NULL)
256 err = nfs_lookup_with_sec(server, parent, path->dentry, path, fh, fattr, &flavor);
139 dput(parent); 257 dput(parent);
140 if (err != 0) { 258 if (err != 0) {
141 mnt = ERR_PTR(err); 259 mnt = ERR_PTR(err);
@@ -143,9 +261,9 @@ struct vfsmount *nfs_d_automount(struct path *path)
143 } 261 }
144 262
145 if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) 263 if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL)
146 mnt = nfs_do_refmount(path->mnt, path->dentry); 264 mnt = nfs_do_refmount(path->dentry);
147 else 265 else
148 mnt = nfs_do_submount(path->mnt, path->dentry, fh, fattr); 266 mnt = nfs_do_submount(path->dentry, fh, fattr, flavor);
149 if (IS_ERR(mnt)) 267 if (IS_ERR(mnt))
150 goto out; 268 goto out;
151 269
@@ -209,22 +327,23 @@ static struct vfsmount *nfs_do_clone_mount(struct nfs_server *server,
209 327
210/** 328/**
211 * nfs_do_submount - set up mountpoint when crossing a filesystem boundary 329 * nfs_do_submount - set up mountpoint when crossing a filesystem boundary
212 * @mnt_parent - mountpoint of parent directory
213 * @dentry - parent directory 330 * @dentry - parent directory
214 * @fh - filehandle for new root dentry 331 * @fh - filehandle for new root dentry
215 * @fattr - attributes for new root inode 332 * @fattr - attributes for new root inode
333 * @authflavor - security flavor to use when performing the mount
216 * 334 *
217 */ 335 */
218static struct vfsmount *nfs_do_submount(const struct vfsmount *mnt_parent, 336static struct vfsmount *nfs_do_submount(struct dentry *dentry,
219 const struct dentry *dentry,
220 struct nfs_fh *fh, 337 struct nfs_fh *fh,
221 struct nfs_fattr *fattr) 338 struct nfs_fattr *fattr,
339 rpc_authflavor_t authflavor)
222{ 340{
223 struct nfs_clone_mount mountdata = { 341 struct nfs_clone_mount mountdata = {
224 .sb = mnt_parent->mnt_sb, 342 .sb = dentry->d_sb,
225 .dentry = dentry, 343 .dentry = dentry,
226 .fh = fh, 344 .fh = fh,
227 .fattr = fattr, 345 .fattr = fattr,
346 .authflavor = authflavor,
228 }; 347 };
229 struct vfsmount *mnt = ERR_PTR(-ENOMEM); 348 struct vfsmount *mnt = ERR_PTR(-ENOMEM);
230 char *page = (char *) __get_free_page(GFP_USER); 349 char *page = (char *) __get_free_page(GFP_USER);
@@ -237,11 +356,11 @@ static struct vfsmount *nfs_do_submount(const struct vfsmount *mnt_parent,
237 dentry->d_name.name); 356 dentry->d_name.name);
238 if (page == NULL) 357 if (page == NULL)
239 goto out; 358 goto out;
240 devname = nfs_devname(mnt_parent, dentry, page, PAGE_SIZE); 359 devname = nfs_devname(dentry, page, PAGE_SIZE);
241 mnt = (struct vfsmount *)devname; 360 mnt = (struct vfsmount *)devname;
242 if (IS_ERR(devname)) 361 if (IS_ERR(devname))
243 goto free_page; 362 goto free_page;
244 mnt = nfs_do_clone_mount(NFS_SB(mnt_parent->mnt_sb), devname, &mountdata); 363 mnt = nfs_do_clone_mount(NFS_SB(dentry->d_sb), devname, &mountdata);
245free_page: 364free_page:
246 free_page((unsigned long)page); 365 free_page((unsigned long)page);
247out: 366out:
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index ce939c062a52..38053d823eb0 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -141,7 +141,7 @@ nfs3_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
141} 141}
142 142
143static int 143static int
144nfs3_proc_lookup(struct inode *dir, struct qstr *name, 144nfs3_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct qstr *name,
145 struct nfs_fh *fhandle, struct nfs_fattr *fattr) 145 struct nfs_fh *fhandle, struct nfs_fattr *fattr)
146{ 146{
147 struct nfs3_diropargs arg = { 147 struct nfs3_diropargs arg = {
@@ -885,4 +885,5 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
885 .lock = nfs3_proc_lock, 885 .lock = nfs3_proc_lock,
886 .clear_acl_cache = nfs3_forget_cached_acls, 886 .clear_acl_cache = nfs3_forget_cached_acls,
887 .close_context = nfs_close_context, 887 .close_context = nfs_close_context,
888 .init_client = nfs_init_client,
888}; 889};
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 7a7474073148..c4a69833dd0d 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -47,6 +47,7 @@ enum nfs4_client_state {
47 NFS4CLNT_LAYOUTRECALL, 47 NFS4CLNT_LAYOUTRECALL,
48 NFS4CLNT_SESSION_RESET, 48 NFS4CLNT_SESSION_RESET,
49 NFS4CLNT_RECALL_SLOT, 49 NFS4CLNT_RECALL_SLOT,
50 NFS4CLNT_LEASE_CONFIRM,
50}; 51};
51 52
52enum nfs4_session_state { 53enum nfs4_session_state {
@@ -57,7 +58,8 @@ enum nfs4_session_state {
57struct nfs4_minor_version_ops { 58struct nfs4_minor_version_ops {
58 u32 minor_version; 59 u32 minor_version;
59 60
60 int (*call_sync)(struct nfs_server *server, 61 int (*call_sync)(struct rpc_clnt *clnt,
62 struct nfs_server *server,
61 struct rpc_message *msg, 63 struct rpc_message *msg,
62 struct nfs4_sequence_args *args, 64 struct nfs4_sequence_args *args,
63 struct nfs4_sequence_res *res, 65 struct nfs4_sequence_res *res,
@@ -252,6 +254,9 @@ static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *ser
252extern int nfs4_setup_sequence(const struct nfs_server *server, 254extern int nfs4_setup_sequence(const struct nfs_server *server,
253 struct nfs4_sequence_args *args, struct nfs4_sequence_res *res, 255 struct nfs4_sequence_args *args, struct nfs4_sequence_res *res,
254 int cache_reply, struct rpc_task *task); 256 int cache_reply, struct rpc_task *task);
257extern int nfs41_setup_sequence(struct nfs4_session *session,
258 struct nfs4_sequence_args *args, struct nfs4_sequence_res *res,
259 int cache_reply, struct rpc_task *task);
255extern void nfs4_destroy_session(struct nfs4_session *session); 260extern void nfs4_destroy_session(struct nfs4_session *session);
256extern struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp); 261extern struct nfs4_session *nfs4_alloc_session(struct nfs_client *clp);
257extern int nfs4_proc_create_session(struct nfs_client *); 262extern int nfs4_proc_create_session(struct nfs_client *);
@@ -259,6 +264,21 @@ extern int nfs4_proc_destroy_session(struct nfs4_session *);
259extern int nfs4_init_session(struct nfs_server *server); 264extern int nfs4_init_session(struct nfs_server *server);
260extern int nfs4_proc_get_lease_time(struct nfs_client *clp, 265extern int nfs4_proc_get_lease_time(struct nfs_client *clp,
261 struct nfs_fsinfo *fsinfo); 266 struct nfs_fsinfo *fsinfo);
267extern int nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data,
268 bool sync);
269
270static inline bool
271is_ds_only_client(struct nfs_client *clp)
272{
273 return (clp->cl_exchange_flags & EXCHGID4_FLAG_MASK_PNFS) ==
274 EXCHGID4_FLAG_USE_PNFS_DS;
275}
276
277static inline bool
278is_ds_client(struct nfs_client *clp)
279{
280 return clp->cl_exchange_flags & EXCHGID4_FLAG_USE_PNFS_DS;
281}
262#else /* CONFIG_NFS_v4_1 */ 282#else /* CONFIG_NFS_v4_1 */
263static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server) 283static inline struct nfs4_session *nfs4_get_session(const struct nfs_server *server)
264{ 284{
@@ -276,6 +296,18 @@ static inline int nfs4_init_session(struct nfs_server *server)
276{ 296{
277 return 0; 297 return 0;
278} 298}
299
300static inline bool
301is_ds_only_client(struct nfs_client *clp)
302{
303 return false;
304}
305
306static inline bool
307is_ds_client(struct nfs_client *clp)
308{
309 return false;
310}
279#endif /* CONFIG_NFS_V4_1 */ 311#endif /* CONFIG_NFS_V4_1 */
280 312
281extern const struct nfs4_minor_version_ops *nfs_v4_minor_ops[]; 313extern const struct nfs4_minor_version_ops *nfs_v4_minor_ops[];
@@ -298,6 +330,11 @@ struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp);
298#if defined(CONFIG_NFS_V4_1) 330#if defined(CONFIG_NFS_V4_1)
299struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp); 331struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp);
300struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp); 332struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp);
333extern void nfs4_schedule_session_recovery(struct nfs4_session *);
334#else
335static inline void nfs4_schedule_session_recovery(struct nfs4_session *session)
336{
337}
301#endif /* CONFIG_NFS_V4_1 */ 338#endif /* CONFIG_NFS_V4_1 */
302 339
303extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *); 340extern struct nfs4_state_owner * nfs4_get_state_owner(struct nfs_server *, struct rpc_cred *);
@@ -307,10 +344,9 @@ extern void nfs4_put_open_state(struct nfs4_state *);
307extern void nfs4_close_state(struct path *, struct nfs4_state *, fmode_t); 344extern void nfs4_close_state(struct path *, struct nfs4_state *, fmode_t);
308extern void nfs4_close_sync(struct path *, struct nfs4_state *, fmode_t); 345extern void nfs4_close_sync(struct path *, struct nfs4_state *, fmode_t);
309extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t); 346extern void nfs4_state_set_mode_locked(struct nfs4_state *, fmode_t);
310extern void nfs4_schedule_state_recovery(struct nfs_client *); 347extern void nfs4_schedule_lease_recovery(struct nfs_client *);
311extern void nfs4_schedule_state_manager(struct nfs_client *); 348extern void nfs4_schedule_state_manager(struct nfs_client *);
312extern int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state); 349extern void nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *);
313extern int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state);
314extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags); 350extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags);
315extern void nfs41_handle_recall_slot(struct nfs_client *clp); 351extern void nfs41_handle_recall_slot(struct nfs_client *clp);
316extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); 352extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 23f930caf1e2..6f8192f4cfc7 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -40,32 +40,370 @@ MODULE_LICENSE("GPL");
40MODULE_AUTHOR("Dean Hildebrand <dhildebz@umich.edu>"); 40MODULE_AUTHOR("Dean Hildebrand <dhildebz@umich.edu>");
41MODULE_DESCRIPTION("The NFSv4 file layout driver"); 41MODULE_DESCRIPTION("The NFSv4 file layout driver");
42 42
43static int 43#define FILELAYOUT_POLL_RETRY_MAX (15*HZ)
44filelayout_set_layoutdriver(struct nfs_server *nfss) 44
45{ 45static loff_t
46 int status = pnfs_alloc_init_deviceid_cache(nfss->nfs_client, 46filelayout_get_dense_offset(struct nfs4_filelayout_segment *flseg,
47 nfs4_fl_free_deviceid_callback); 47 loff_t offset)
48 if (status) { 48{
49 printk(KERN_WARNING "%s: deviceid cache could not be " 49 u32 stripe_width = flseg->stripe_unit * flseg->dsaddr->stripe_count;
50 "initialized\n", __func__); 50 u64 tmp;
51 return status; 51
52 offset -= flseg->pattern_offset;
53 tmp = offset;
54 do_div(tmp, stripe_width);
55
56 return tmp * flseg->stripe_unit + do_div(offset, flseg->stripe_unit);
57}
58
59/* This function is used by the layout driver to calculate the
60 * offset of the file on the dserver based on whether the
61 * layout type is STRIPE_DENSE or STRIPE_SPARSE
62 */
63static loff_t
64filelayout_get_dserver_offset(struct pnfs_layout_segment *lseg, loff_t offset)
65{
66 struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg);
67
68 switch (flseg->stripe_type) {
69 case STRIPE_SPARSE:
70 return offset;
71
72 case STRIPE_DENSE:
73 return filelayout_get_dense_offset(flseg, offset);
74 }
75
76 BUG();
77}
78
79/* For data server errors we don't recover from */
80static void
81filelayout_set_lo_fail(struct pnfs_layout_segment *lseg)
82{
83 if (lseg->pls_range.iomode == IOMODE_RW) {
84 dprintk("%s Setting layout IOMODE_RW fail bit\n", __func__);
85 set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
86 } else {
87 dprintk("%s Setting layout IOMODE_READ fail bit\n", __func__);
88 set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
89 }
90}
91
92static int filelayout_async_handle_error(struct rpc_task *task,
93 struct nfs4_state *state,
94 struct nfs_client *clp,
95 int *reset)
96{
97 if (task->tk_status >= 0)
98 return 0;
99
100 *reset = 0;
101
102 switch (task->tk_status) {
103 case -NFS4ERR_BADSESSION:
104 case -NFS4ERR_BADSLOT:
105 case -NFS4ERR_BAD_HIGH_SLOT:
106 case -NFS4ERR_DEADSESSION:
107 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
108 case -NFS4ERR_SEQ_FALSE_RETRY:
109 case -NFS4ERR_SEQ_MISORDERED:
110 dprintk("%s ERROR %d, Reset session. Exchangeid "
111 "flags 0x%x\n", __func__, task->tk_status,
112 clp->cl_exchange_flags);
113 nfs4_schedule_session_recovery(clp->cl_session);
114 break;
115 case -NFS4ERR_DELAY:
116 case -NFS4ERR_GRACE:
117 case -EKEYEXPIRED:
118 rpc_delay(task, FILELAYOUT_POLL_RETRY_MAX);
119 break;
120 default:
121 dprintk("%s DS error. Retry through MDS %d\n", __func__,
122 task->tk_status);
123 *reset = 1;
124 break;
125 }
126 task->tk_status = 0;
127 return -EAGAIN;
128}
129
130/* NFS_PROTO call done callback routines */
131
132static int filelayout_read_done_cb(struct rpc_task *task,
133 struct nfs_read_data *data)
134{
135 struct nfs_client *clp = data->ds_clp;
136 int reset = 0;
137
138 dprintk("%s DS read\n", __func__);
139
140 if (filelayout_async_handle_error(task, data->args.context->state,
141 data->ds_clp, &reset) == -EAGAIN) {
142 dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n",
143 __func__, data->ds_clp, data->ds_clp->cl_session);
144 if (reset) {
145 filelayout_set_lo_fail(data->lseg);
146 nfs4_reset_read(task, data);
147 clp = NFS_SERVER(data->inode)->nfs_client;
148 }
149 nfs_restart_rpc(task, clp);
150 return -EAGAIN;
52 } 151 }
53 dprintk("%s: deviceid cache has been initialized successfully\n", 152
54 __func__);
55 return 0; 153 return 0;
56} 154}
57 155
58/* Clear out the layout by destroying its device list */ 156/*
59static int 157 * We reference the rpc_cred of the first WRITE that triggers the need for
60filelayout_clear_layoutdriver(struct nfs_server *nfss) 158 * a LAYOUTCOMMIT, and use it to send the layoutcommit compound.
159 * rfc5661 is not clear about which credential should be used.
160 */
161static void
162filelayout_set_layoutcommit(struct nfs_write_data *wdata)
61{ 163{
62 dprintk("--> %s\n", __func__); 164 if (FILELAYOUT_LSEG(wdata->lseg)->commit_through_mds ||
165 wdata->res.verf->committed == NFS_FILE_SYNC)
166 return;
167
168 pnfs_set_layoutcommit(wdata);
169 dprintk("%s ionde %lu pls_end_pos %lu\n", __func__, wdata->inode->i_ino,
170 (unsigned long) wdata->lseg->pls_end_pos);
171}
172
173/*
174 * Call ops for the async read/write cases
175 * In the case of dense layouts, the offset needs to be reset to its
176 * original value.
177 */
178static void filelayout_read_prepare(struct rpc_task *task, void *data)
179{
180 struct nfs_read_data *rdata = (struct nfs_read_data *)data;
181
182 rdata->read_done_cb = filelayout_read_done_cb;
183
184 if (nfs41_setup_sequence(rdata->ds_clp->cl_session,
185 &rdata->args.seq_args, &rdata->res.seq_res,
186 0, task))
187 return;
188
189 rpc_call_start(task);
190}
191
192static void filelayout_read_call_done(struct rpc_task *task, void *data)
193{
194 struct nfs_read_data *rdata = (struct nfs_read_data *)data;
195
196 dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status);
197
198 /* Note this may cause RPC to be resent */
199 rdata->mds_ops->rpc_call_done(task, data);
200}
201
202static void filelayout_read_release(void *data)
203{
204 struct nfs_read_data *rdata = (struct nfs_read_data *)data;
205
206 rdata->mds_ops->rpc_release(data);
207}
208
209static int filelayout_write_done_cb(struct rpc_task *task,
210 struct nfs_write_data *data)
211{
212 int reset = 0;
213
214 if (filelayout_async_handle_error(task, data->args.context->state,
215 data->ds_clp, &reset) == -EAGAIN) {
216 struct nfs_client *clp;
217
218 dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n",
219 __func__, data->ds_clp, data->ds_clp->cl_session);
220 if (reset) {
221 filelayout_set_lo_fail(data->lseg);
222 nfs4_reset_write(task, data);
223 clp = NFS_SERVER(data->inode)->nfs_client;
224 } else
225 clp = data->ds_clp;
226 nfs_restart_rpc(task, clp);
227 return -EAGAIN;
228 }
63 229
64 if (nfss->nfs_client->cl_devid_cache) 230 filelayout_set_layoutcommit(data);
65 pnfs_put_deviceid_cache(nfss->nfs_client);
66 return 0; 231 return 0;
67} 232}
68 233
234/* Fake up some data that will cause nfs_commit_release to retry the writes. */
235static void prepare_to_resend_writes(struct nfs_write_data *data)
236{
237 struct nfs_page *first = nfs_list_entry(data->pages.next);
238
239 data->task.tk_status = 0;
240 memcpy(data->verf.verifier, first->wb_verf.verifier,
241 sizeof(first->wb_verf.verifier));
242 data->verf.verifier[0]++; /* ensure verifier mismatch */
243}
244
245static int filelayout_commit_done_cb(struct rpc_task *task,
246 struct nfs_write_data *data)
247{
248 int reset = 0;
249
250 if (filelayout_async_handle_error(task, data->args.context->state,
251 data->ds_clp, &reset) == -EAGAIN) {
252 dprintk("%s calling restart ds_clp %p ds_clp->cl_session %p\n",
253 __func__, data->ds_clp, data->ds_clp->cl_session);
254 if (reset) {
255 prepare_to_resend_writes(data);
256 filelayout_set_lo_fail(data->lseg);
257 } else
258 nfs_restart_rpc(task, data->ds_clp);
259 return -EAGAIN;
260 }
261
262 return 0;
263}
264
265static void filelayout_write_prepare(struct rpc_task *task, void *data)
266{
267 struct nfs_write_data *wdata = (struct nfs_write_data *)data;
268
269 if (nfs41_setup_sequence(wdata->ds_clp->cl_session,
270 &wdata->args.seq_args, &wdata->res.seq_res,
271 0, task))
272 return;
273
274 rpc_call_start(task);
275}
276
277static void filelayout_write_call_done(struct rpc_task *task, void *data)
278{
279 struct nfs_write_data *wdata = (struct nfs_write_data *)data;
280
281 /* Note this may cause RPC to be resent */
282 wdata->mds_ops->rpc_call_done(task, data);
283}
284
285static void filelayout_write_release(void *data)
286{
287 struct nfs_write_data *wdata = (struct nfs_write_data *)data;
288
289 wdata->mds_ops->rpc_release(data);
290}
291
292static void filelayout_commit_release(void *data)
293{
294 struct nfs_write_data *wdata = (struct nfs_write_data *)data;
295
296 nfs_commit_release_pages(wdata);
297 if (atomic_dec_and_test(&NFS_I(wdata->inode)->commits_outstanding))
298 nfs_commit_clear_lock(NFS_I(wdata->inode));
299 nfs_commitdata_release(wdata);
300}
301
302struct rpc_call_ops filelayout_read_call_ops = {
303 .rpc_call_prepare = filelayout_read_prepare,
304 .rpc_call_done = filelayout_read_call_done,
305 .rpc_release = filelayout_read_release,
306};
307
308struct rpc_call_ops filelayout_write_call_ops = {
309 .rpc_call_prepare = filelayout_write_prepare,
310 .rpc_call_done = filelayout_write_call_done,
311 .rpc_release = filelayout_write_release,
312};
313
314struct rpc_call_ops filelayout_commit_call_ops = {
315 .rpc_call_prepare = filelayout_write_prepare,
316 .rpc_call_done = filelayout_write_call_done,
317 .rpc_release = filelayout_commit_release,
318};
319
320static enum pnfs_try_status
321filelayout_read_pagelist(struct nfs_read_data *data)
322{
323 struct pnfs_layout_segment *lseg = data->lseg;
324 struct nfs4_pnfs_ds *ds;
325 loff_t offset = data->args.offset;
326 u32 j, idx;
327 struct nfs_fh *fh;
328 int status;
329
330 dprintk("--> %s ino %lu pgbase %u req %Zu@%llu\n",
331 __func__, data->inode->i_ino,
332 data->args.pgbase, (size_t)data->args.count, offset);
333
334 /* Retrieve the correct rpc_client for the byte range */
335 j = nfs4_fl_calc_j_index(lseg, offset);
336 idx = nfs4_fl_calc_ds_index(lseg, j);
337 ds = nfs4_fl_prepare_ds(lseg, idx);
338 if (!ds) {
339 /* Either layout fh index faulty, or ds connect failed */
340 set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
341 set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
342 return PNFS_NOT_ATTEMPTED;
343 }
344 dprintk("%s USE DS:ip %x %hu\n", __func__,
345 ntohl(ds->ds_ip_addr), ntohs(ds->ds_port));
346
347 /* No multipath support. Use first DS */
348 data->ds_clp = ds->ds_clp;
349 fh = nfs4_fl_select_ds_fh(lseg, j);
350 if (fh)
351 data->args.fh = fh;
352
353 data->args.offset = filelayout_get_dserver_offset(lseg, offset);
354 data->mds_offset = offset;
355
356 /* Perform an asynchronous read to ds */
357 status = nfs_initiate_read(data, ds->ds_clp->cl_rpcclient,
358 &filelayout_read_call_ops);
359 BUG_ON(status != 0);
360 return PNFS_ATTEMPTED;
361}
362
363/* Perform async writes. */
364static enum pnfs_try_status
365filelayout_write_pagelist(struct nfs_write_data *data, int sync)
366{
367 struct pnfs_layout_segment *lseg = data->lseg;
368 struct nfs4_pnfs_ds *ds;
369 loff_t offset = data->args.offset;
370 u32 j, idx;
371 struct nfs_fh *fh;
372 int status;
373
374 /* Retrieve the correct rpc_client for the byte range */
375 j = nfs4_fl_calc_j_index(lseg, offset);
376 idx = nfs4_fl_calc_ds_index(lseg, j);
377 ds = nfs4_fl_prepare_ds(lseg, idx);
378 if (!ds) {
379 printk(KERN_ERR "%s: prepare_ds failed, use MDS\n", __func__);
380 set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
381 set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
382 return PNFS_NOT_ATTEMPTED;
383 }
384 dprintk("%s ino %lu sync %d req %Zu@%llu DS:%x:%hu\n", __func__,
385 data->inode->i_ino, sync, (size_t) data->args.count, offset,
386 ntohl(ds->ds_ip_addr), ntohs(ds->ds_port));
387
388 data->write_done_cb = filelayout_write_done_cb;
389 data->ds_clp = ds->ds_clp;
390 fh = nfs4_fl_select_ds_fh(lseg, j);
391 if (fh)
392 data->args.fh = fh;
393 /*
394 * Get the file offset on the dserver. Set the write offset to
395 * this offset and save the original offset.
396 */
397 data->args.offset = filelayout_get_dserver_offset(lseg, offset);
398 data->mds_offset = offset;
399
400 /* Perform an asynchronous write */
401 status = nfs_initiate_write(data, ds->ds_clp->cl_rpcclient,
402 &filelayout_write_call_ops, sync);
403 BUG_ON(status != 0);
404 return PNFS_ATTEMPTED;
405}
406
69/* 407/*
70 * filelayout_check_layout() 408 * filelayout_check_layout()
71 * 409 *
@@ -92,14 +430,14 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
92 goto out; 430 goto out;
93 } 431 }
94 432
95 if (fl->stripe_unit % PAGE_SIZE) { 433 if (!fl->stripe_unit || fl->stripe_unit % PAGE_SIZE) {
96 dprintk("%s Stripe unit (%u) not page aligned\n", 434 dprintk("%s Invalid stripe unit (%u)\n",
97 __func__, fl->stripe_unit); 435 __func__, fl->stripe_unit);
98 goto out; 436 goto out;
99 } 437 }
100 438
101 /* find and reference the deviceid */ 439 /* find and reference the deviceid */
102 dsaddr = nfs4_fl_find_get_deviceid(nfss->nfs_client, id); 440 dsaddr = nfs4_fl_find_get_deviceid(id);
103 if (dsaddr == NULL) { 441 if (dsaddr == NULL) {
104 dsaddr = get_device_info(lo->plh_inode, id); 442 dsaddr = get_device_info(lo->plh_inode, id);
105 if (dsaddr == NULL) 443 if (dsaddr == NULL)
@@ -134,7 +472,7 @@ out:
134 dprintk("--> %s returns %d\n", __func__, status); 472 dprintk("--> %s returns %d\n", __func__, status);
135 return status; 473 return status;
136out_put: 474out_put:
137 pnfs_put_deviceid(nfss->nfs_client->cl_devid_cache, &dsaddr->deviceid); 475 nfs4_fl_put_deviceid(dsaddr);
138 goto out; 476 goto out;
139} 477}
140 478
@@ -164,12 +502,33 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo,
164 struct nfs4_layoutget_res *lgr, 502 struct nfs4_layoutget_res *lgr,
165 struct nfs4_deviceid *id) 503 struct nfs4_deviceid *id)
166{ 504{
167 uint32_t *p = (uint32_t *)lgr->layout.buf; 505 struct xdr_stream stream;
506 struct xdr_buf buf = {
507 .pages = lgr->layoutp->pages,
508 .page_len = lgr->layoutp->len,
509 .buflen = lgr->layoutp->len,
510 .len = lgr->layoutp->len,
511 };
512 struct page *scratch;
513 __be32 *p;
168 uint32_t nfl_util; 514 uint32_t nfl_util;
169 int i; 515 int i;
170 516
171 dprintk("%s: set_layout_map Begin\n", __func__); 517 dprintk("%s: set_layout_map Begin\n", __func__);
172 518
519 scratch = alloc_page(GFP_KERNEL);
520 if (!scratch)
521 return -ENOMEM;
522
523 xdr_init_decode(&stream, &buf, NULL);
524 xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
525
526 /* 20 = ufl_util (4), first_stripe_index (4), pattern_offset (8),
527 * num_fh (4) */
528 p = xdr_inline_decode(&stream, NFS4_DEVICEID4_SIZE + 20);
529 if (unlikely(!p))
530 goto out_err;
531
173 memcpy(id, p, sizeof(*id)); 532 memcpy(id, p, sizeof(*id));
174 p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE); 533 p += XDR_QUADLEN(NFS4_DEVICEID4_SIZE);
175 print_deviceid(id); 534 print_deviceid(id);
@@ -191,32 +550,57 @@ filelayout_decode_layout(struct pnfs_layout_hdr *flo,
191 __func__, nfl_util, fl->num_fh, fl->first_stripe_index, 550 __func__, nfl_util, fl->num_fh, fl->first_stripe_index,
192 fl->pattern_offset); 551 fl->pattern_offset);
193 552
553 if (!fl->num_fh)
554 goto out_err;
555
194 fl->fh_array = kzalloc(fl->num_fh * sizeof(struct nfs_fh *), 556 fl->fh_array = kzalloc(fl->num_fh * sizeof(struct nfs_fh *),
195 GFP_KERNEL); 557 GFP_KERNEL);
196 if (!fl->fh_array) 558 if (!fl->fh_array)
197 return -ENOMEM; 559 goto out_err;
198 560
199 for (i = 0; i < fl->num_fh; i++) { 561 for (i = 0; i < fl->num_fh; i++) {
200 /* Do we want to use a mempool here? */ 562 /* Do we want to use a mempool here? */
201 fl->fh_array[i] = kmalloc(sizeof(struct nfs_fh), GFP_KERNEL); 563 fl->fh_array[i] = kmalloc(sizeof(struct nfs_fh), GFP_KERNEL);
202 if (!fl->fh_array[i]) { 564 if (!fl->fh_array[i])
203 filelayout_free_fh_array(fl); 565 goto out_err_free;
204 return -ENOMEM; 566
205 } 567 p = xdr_inline_decode(&stream, 4);
568 if (unlikely(!p))
569 goto out_err_free;
206 fl->fh_array[i]->size = be32_to_cpup(p++); 570 fl->fh_array[i]->size = be32_to_cpup(p++);
207 if (sizeof(struct nfs_fh) < fl->fh_array[i]->size) { 571 if (sizeof(struct nfs_fh) < fl->fh_array[i]->size) {
208 printk(KERN_ERR "Too big fh %d received %d\n", 572 printk(KERN_ERR "Too big fh %d received %d\n",
209 i, fl->fh_array[i]->size); 573 i, fl->fh_array[i]->size);
210 filelayout_free_fh_array(fl); 574 goto out_err_free;
211 return -EIO;
212 } 575 }
576
577 p = xdr_inline_decode(&stream, fl->fh_array[i]->size);
578 if (unlikely(!p))
579 goto out_err_free;
213 memcpy(fl->fh_array[i]->data, p, fl->fh_array[i]->size); 580 memcpy(fl->fh_array[i]->data, p, fl->fh_array[i]->size);
214 p += XDR_QUADLEN(fl->fh_array[i]->size);
215 dprintk("DEBUG: %s: fh len %d\n", __func__, 581 dprintk("DEBUG: %s: fh len %d\n", __func__,
216 fl->fh_array[i]->size); 582 fl->fh_array[i]->size);
217 } 583 }
218 584
585 __free_page(scratch);
219 return 0; 586 return 0;
587
588out_err_free:
589 filelayout_free_fh_array(fl);
590out_err:
591 __free_page(scratch);
592 return -EIO;
593}
594
595static void
596filelayout_free_lseg(struct pnfs_layout_segment *lseg)
597{
598 struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
599
600 dprintk("--> %s\n", __func__);
601 nfs4_fl_put_deviceid(fl->dsaddr);
602 kfree(fl->commit_buckets);
603 _filelayout_free_lseg(fl);
220} 604}
221 605
222static struct pnfs_layout_segment * 606static struct pnfs_layout_segment *
@@ -237,29 +621,252 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
237 _filelayout_free_lseg(fl); 621 _filelayout_free_lseg(fl);
238 return NULL; 622 return NULL;
239 } 623 }
624
625 /* This assumes there is only one IOMODE_RW lseg. What
626 * we really want to do is have a layout_hdr level
627 * dictionary of <multipath_list4, fh> keys, each
628 * associated with a struct list_head, populated by calls
629 * to filelayout_write_pagelist().
630 * */
631 if ((!fl->commit_through_mds) && (lgr->range.iomode == IOMODE_RW)) {
632 int i;
633 int size = (fl->stripe_type == STRIPE_SPARSE) ?
634 fl->dsaddr->ds_num : fl->dsaddr->stripe_count;
635
636 fl->commit_buckets = kcalloc(size, sizeof(struct list_head), GFP_KERNEL);
637 if (!fl->commit_buckets) {
638 filelayout_free_lseg(&fl->generic_hdr);
639 return NULL;
640 }
641 fl->number_of_buckets = size;
642 for (i = 0; i < size; i++)
643 INIT_LIST_HEAD(&fl->commit_buckets[i]);
644 }
240 return &fl->generic_hdr; 645 return &fl->generic_hdr;
241} 646}
242 647
243static void 648/*
244filelayout_free_lseg(struct pnfs_layout_segment *lseg) 649 * filelayout_pg_test(). Called by nfs_can_coalesce_requests()
650 *
651 * return 1 : coalesce page
652 * return 0 : don't coalesce page
653 */
654int
655filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
656 struct nfs_page *req)
657{
658 u64 p_stripe, r_stripe;
659 u32 stripe_unit;
660
661 if (!pgio->pg_lseg)
662 return 1;
663 p_stripe = (u64)prev->wb_index << PAGE_CACHE_SHIFT;
664 r_stripe = (u64)req->wb_index << PAGE_CACHE_SHIFT;
665 stripe_unit = FILELAYOUT_LSEG(pgio->pg_lseg)->stripe_unit;
666
667 do_div(p_stripe, stripe_unit);
668 do_div(r_stripe, stripe_unit);
669
670 return (p_stripe == r_stripe);
671}
672
673static bool filelayout_mark_pnfs_commit(struct pnfs_layout_segment *lseg)
674{
675 return !FILELAYOUT_LSEG(lseg)->commit_through_mds;
676}
677
678static u32 select_bucket_index(struct nfs4_filelayout_segment *fl, u32 j)
679{
680 if (fl->stripe_type == STRIPE_SPARSE)
681 return nfs4_fl_calc_ds_index(&fl->generic_hdr, j);
682 else
683 return j;
684}
685
686struct list_head *filelayout_choose_commit_list(struct nfs_page *req)
245{ 687{
246 struct nfs_server *nfss = NFS_SERVER(lseg->pls_layout->plh_inode); 688 struct pnfs_layout_segment *lseg = req->wb_commit_lseg;
247 struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg); 689 struct nfs4_filelayout_segment *fl = FILELAYOUT_LSEG(lseg);
690 u32 i, j;
691 struct list_head *list;
692
693 /* Note that we are calling nfs4_fl_calc_j_index on each page
694 * that ends up being committed to a data server. An attractive
695 * alternative is to add a field to nfs_write_data and nfs_page
696 * to store the value calculated in filelayout_write_pagelist
697 * and just use that here.
698 */
699 j = nfs4_fl_calc_j_index(lseg,
700 (loff_t)req->wb_index << PAGE_CACHE_SHIFT);
701 i = select_bucket_index(fl, j);
702 list = &fl->commit_buckets[i];
703 if (list_empty(list)) {
704 /* Non-empty buckets hold a reference on the lseg */
705 get_lseg(lseg);
706 }
707 return list;
708}
248 709
249 dprintk("--> %s\n", __func__); 710static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i)
250 pnfs_put_deviceid(nfss->nfs_client->cl_devid_cache, 711{
251 &fl->dsaddr->deviceid); 712 struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg);
252 _filelayout_free_lseg(fl); 713
714 if (flseg->stripe_type == STRIPE_SPARSE)
715 return i;
716 else
717 return nfs4_fl_calc_ds_index(lseg, i);
718}
719
720static struct nfs_fh *
721select_ds_fh_from_commit(struct pnfs_layout_segment *lseg, u32 i)
722{
723 struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg);
724
725 if (flseg->stripe_type == STRIPE_SPARSE) {
726 if (flseg->num_fh == 1)
727 i = 0;
728 else if (flseg->num_fh == 0)
729 /* Use the MDS OPEN fh set in nfs_read_rpcsetup */
730 return NULL;
731 }
732 return flseg->fh_array[i];
733}
734
735static int filelayout_initiate_commit(struct nfs_write_data *data, int how)
736{
737 struct pnfs_layout_segment *lseg = data->lseg;
738 struct nfs4_pnfs_ds *ds;
739 u32 idx;
740 struct nfs_fh *fh;
741
742 idx = calc_ds_index_from_commit(lseg, data->ds_commit_index);
743 ds = nfs4_fl_prepare_ds(lseg, idx);
744 if (!ds) {
745 printk(KERN_ERR "%s: prepare_ds failed, use MDS\n", __func__);
746 set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
747 set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
748 prepare_to_resend_writes(data);
749 data->mds_ops->rpc_release(data);
750 return -EAGAIN;
751 }
752 dprintk("%s ino %lu, how %d\n", __func__, data->inode->i_ino, how);
753 data->write_done_cb = filelayout_commit_done_cb;
754 data->ds_clp = ds->ds_clp;
755 fh = select_ds_fh_from_commit(lseg, data->ds_commit_index);
756 if (fh)
757 data->args.fh = fh;
758 return nfs_initiate_commit(data, ds->ds_clp->cl_rpcclient,
759 &filelayout_commit_call_ops, how);
760}
761
762/*
763 * This is only useful while we are using whole file layouts.
764 */
765static struct pnfs_layout_segment *find_only_write_lseg(struct inode *inode)
766{
767 struct pnfs_layout_segment *lseg, *rv = NULL;
768
769 spin_lock(&inode->i_lock);
770 list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list)
771 if (lseg->pls_range.iomode == IOMODE_RW)
772 rv = get_lseg(lseg);
773 spin_unlock(&inode->i_lock);
774 return rv;
775}
776
777static int alloc_ds_commits(struct inode *inode, struct list_head *list)
778{
779 struct pnfs_layout_segment *lseg;
780 struct nfs4_filelayout_segment *fl;
781 struct nfs_write_data *data;
782 int i, j;
783
784 /* Won't need this when non-whole file layout segments are supported
785 * instead we will use a pnfs_layout_hdr structure */
786 lseg = find_only_write_lseg(inode);
787 if (!lseg)
788 return 0;
789 fl = FILELAYOUT_LSEG(lseg);
790 for (i = 0; i < fl->number_of_buckets; i++) {
791 if (list_empty(&fl->commit_buckets[i]))
792 continue;
793 data = nfs_commitdata_alloc();
794 if (!data)
795 goto out_bad;
796 data->ds_commit_index = i;
797 data->lseg = lseg;
798 list_add(&data->pages, list);
799 }
800 put_lseg(lseg);
801 return 0;
802
803out_bad:
804 for (j = i; j < fl->number_of_buckets; j++) {
805 if (list_empty(&fl->commit_buckets[i]))
806 continue;
807 nfs_retry_commit(&fl->commit_buckets[i], lseg);
808 put_lseg(lseg); /* associated with emptying bucket */
809 }
810 put_lseg(lseg);
811 /* Caller will clean up entries put on list */
812 return -ENOMEM;
813}
814
815/* This follows nfs_commit_list pretty closely */
816static int
817filelayout_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
818 int how)
819{
820 struct nfs_write_data *data, *tmp;
821 LIST_HEAD(list);
822
823 if (!list_empty(mds_pages)) {
824 data = nfs_commitdata_alloc();
825 if (!data)
826 goto out_bad;
827 data->lseg = NULL;
828 list_add(&data->pages, &list);
829 }
830
831 if (alloc_ds_commits(inode, &list))
832 goto out_bad;
833
834 list_for_each_entry_safe(data, tmp, &list, pages) {
835 list_del_init(&data->pages);
836 atomic_inc(&NFS_I(inode)->commits_outstanding);
837 if (!data->lseg) {
838 nfs_init_commit(data, mds_pages, NULL);
839 nfs_initiate_commit(data, NFS_CLIENT(inode),
840 data->mds_ops, how);
841 } else {
842 nfs_init_commit(data, &FILELAYOUT_LSEG(data->lseg)->commit_buckets[data->ds_commit_index], data->lseg);
843 filelayout_initiate_commit(data, how);
844 }
845 }
846 return 0;
847 out_bad:
848 list_for_each_entry_safe(data, tmp, &list, pages) {
849 nfs_retry_commit(&data->pages, data->lseg);
850 list_del_init(&data->pages);
851 nfs_commit_free(data);
852 }
853 nfs_retry_commit(mds_pages, NULL);
854 nfs_commit_clear_lock(NFS_I(inode));
855 return -ENOMEM;
253} 856}
254 857
255static struct pnfs_layoutdriver_type filelayout_type = { 858static struct pnfs_layoutdriver_type filelayout_type = {
256 .id = LAYOUT_NFSV4_1_FILES, 859 .id = LAYOUT_NFSV4_1_FILES,
257 .name = "LAYOUT_NFSV4_1_FILES", 860 .name = "LAYOUT_NFSV4_1_FILES",
258 .owner = THIS_MODULE, 861 .owner = THIS_MODULE,
259 .set_layoutdriver = filelayout_set_layoutdriver, 862 .alloc_lseg = filelayout_alloc_lseg,
260 .clear_layoutdriver = filelayout_clear_layoutdriver, 863 .free_lseg = filelayout_free_lseg,
261 .alloc_lseg = filelayout_alloc_lseg, 864 .pg_test = filelayout_pg_test,
262 .free_lseg = filelayout_free_lseg, 865 .mark_pnfs_commit = filelayout_mark_pnfs_commit,
866 .choose_commit_list = filelayout_choose_commit_list,
867 .commit_pagelist = filelayout_commit_pagelist,
868 .read_pagelist = filelayout_read_pagelist,
869 .write_pagelist = filelayout_write_pagelist,
263}; 870};
264 871
265static int __init nfs4filelayout_init(void) 872static int __init nfs4filelayout_init(void)
diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h
index bbf60dd2ab9d..7c44579f5832 100644
--- a/fs/nfs/nfs4filelayout.h
+++ b/fs/nfs/nfs4filelayout.h
@@ -33,7 +33,7 @@
33#include "pnfs.h" 33#include "pnfs.h"
34 34
35/* 35/*
36 * Field testing shows we need to support upto 4096 stripe indices. 36 * Field testing shows we need to support up to 4096 stripe indices.
37 * We store each index as a u8 (u32 on the wire) to keep the memory footprint 37 * We store each index as a u8 (u32 on the wire) to keep the memory footprint
38 * reasonable. This in turn means we support a maximum of 256 38 * reasonable. This in turn means we support a maximum of 256
39 * RFC 5661 multipath_list4 structures. 39 * RFC 5661 multipath_list4 structures.
@@ -55,8 +55,14 @@ struct nfs4_pnfs_ds {
55 atomic_t ds_count; 55 atomic_t ds_count;
56}; 56};
57 57
58/* nfs4_file_layout_dsaddr flags */
59#define NFS4_DEVICE_ID_NEG_ENTRY 0x00000001
60
58struct nfs4_file_layout_dsaddr { 61struct nfs4_file_layout_dsaddr {
59 struct pnfs_deviceid_node deviceid; 62 struct hlist_node node;
63 struct nfs4_deviceid deviceid;
64 atomic_t ref;
65 unsigned long flags;
60 u32 stripe_count; 66 u32 stripe_count;
61 u8 *stripe_indices; 67 u8 *stripe_indices;
62 u32 ds_num; 68 u32 ds_num;
@@ -73,6 +79,8 @@ struct nfs4_filelayout_segment {
73 struct nfs4_file_layout_dsaddr *dsaddr; /* Point to GETDEVINFO data */ 79 struct nfs4_file_layout_dsaddr *dsaddr; /* Point to GETDEVINFO data */
74 unsigned int num_fh; 80 unsigned int num_fh;
75 struct nfs_fh **fh_array; 81 struct nfs_fh **fh_array;
82 struct list_head *commit_buckets; /* Sort commits to ds */
83 int number_of_buckets;
76}; 84};
77 85
78static inline struct nfs4_filelayout_segment * 86static inline struct nfs4_filelayout_segment *
@@ -83,11 +91,18 @@ FILELAYOUT_LSEG(struct pnfs_layout_segment *lseg)
83 generic_hdr); 91 generic_hdr);
84} 92}
85 93
86extern void nfs4_fl_free_deviceid_callback(struct pnfs_deviceid_node *); 94extern struct nfs_fh *
95nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j);
96
87extern void print_ds(struct nfs4_pnfs_ds *ds); 97extern void print_ds(struct nfs4_pnfs_ds *ds);
88extern void print_deviceid(struct nfs4_deviceid *dev_id); 98extern void print_deviceid(struct nfs4_deviceid *dev_id);
99u32 nfs4_fl_calc_j_index(struct pnfs_layout_segment *lseg, loff_t offset);
100u32 nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, u32 j);
101struct nfs4_pnfs_ds *nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg,
102 u32 ds_idx);
89extern struct nfs4_file_layout_dsaddr * 103extern struct nfs4_file_layout_dsaddr *
90nfs4_fl_find_get_deviceid(struct nfs_client *, struct nfs4_deviceid *dev_id); 104nfs4_fl_find_get_deviceid(struct nfs4_deviceid *dev_id);
105extern void nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr);
91struct nfs4_file_layout_dsaddr * 106struct nfs4_file_layout_dsaddr *
92get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id); 107get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id);
93 108
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
index f5c9b125e8cc..de5350f2b249 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -37,6 +37,30 @@
37#define NFSDBG_FACILITY NFSDBG_PNFS_LD 37#define NFSDBG_FACILITY NFSDBG_PNFS_LD
38 38
39/* 39/*
40 * Device ID RCU cache. A device ID is unique per client ID and layout type.
41 */
42#define NFS4_FL_DEVICE_ID_HASH_BITS 5
43#define NFS4_FL_DEVICE_ID_HASH_SIZE (1 << NFS4_FL_DEVICE_ID_HASH_BITS)
44#define NFS4_FL_DEVICE_ID_HASH_MASK (NFS4_FL_DEVICE_ID_HASH_SIZE - 1)
45
46static inline u32
47nfs4_fl_deviceid_hash(struct nfs4_deviceid *id)
48{
49 unsigned char *cptr = (unsigned char *)id->data;
50 unsigned int nbytes = NFS4_DEVICEID4_SIZE;
51 u32 x = 0;
52
53 while (nbytes--) {
54 x *= 37;
55 x += *cptr++;
56 }
57 return x & NFS4_FL_DEVICE_ID_HASH_MASK;
58}
59
60static struct hlist_head filelayout_deviceid_cache[NFS4_FL_DEVICE_ID_HASH_SIZE];
61static DEFINE_SPINLOCK(filelayout_deviceid_lock);
62
63/*
40 * Data server cache 64 * Data server cache
41 * 65 *
42 * Data servers can be mapped to different device ids. 66 * Data servers can be mapped to different device ids.
@@ -104,6 +128,67 @@ _data_server_lookup_locked(u32 ip_addr, u32 port)
104 return NULL; 128 return NULL;
105} 129}
106 130
131/*
132 * Create an rpc connection to the nfs4_pnfs_ds data server
133 * Currently only support IPv4
134 */
135static int
136nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
137{
138 struct nfs_client *clp;
139 struct sockaddr_in sin;
140 int status = 0;
141
142 dprintk("--> %s ip:port %x:%hu au_flavor %d\n", __func__,
143 ntohl(ds->ds_ip_addr), ntohs(ds->ds_port),
144 mds_srv->nfs_client->cl_rpcclient->cl_auth->au_flavor);
145
146 sin.sin_family = AF_INET;
147 sin.sin_addr.s_addr = ds->ds_ip_addr;
148 sin.sin_port = ds->ds_port;
149
150 clp = nfs4_set_ds_client(mds_srv->nfs_client, (struct sockaddr *)&sin,
151 sizeof(sin), IPPROTO_TCP);
152 if (IS_ERR(clp)) {
153 status = PTR_ERR(clp);
154 goto out;
155 }
156
157 if ((clp->cl_exchange_flags & EXCHGID4_FLAG_MASK_PNFS) != 0) {
158 if (!is_ds_client(clp)) {
159 status = -ENODEV;
160 goto out_put;
161 }
162 ds->ds_clp = clp;
163 dprintk("%s [existing] ip=%x, port=%hu\n", __func__,
164 ntohl(ds->ds_ip_addr), ntohs(ds->ds_port));
165 goto out;
166 }
167
168 /*
169 * Do not set NFS_CS_CHECK_LEASE_TIME instead set the DS lease to
170 * be equal to the MDS lease. Renewal is scheduled in create_session.
171 */
172 spin_lock(&mds_srv->nfs_client->cl_lock);
173 clp->cl_lease_time = mds_srv->nfs_client->cl_lease_time;
174 spin_unlock(&mds_srv->nfs_client->cl_lock);
175 clp->cl_last_renewal = jiffies;
176
177 /* New nfs_client */
178 status = nfs4_init_ds_session(clp);
179 if (status)
180 goto out_put;
181
182 ds->ds_clp = clp;
183 dprintk("%s [new] ip=%x, port=%hu\n", __func__, ntohl(ds->ds_ip_addr),
184 ntohs(ds->ds_port));
185out:
186 return status;
187out_put:
188 nfs_put_client(clp);
189 goto out;
190}
191
107static void 192static void
108destroy_ds(struct nfs4_pnfs_ds *ds) 193destroy_ds(struct nfs4_pnfs_ds *ds)
109{ 194{
@@ -122,7 +207,7 @@ nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
122 struct nfs4_pnfs_ds *ds; 207 struct nfs4_pnfs_ds *ds;
123 int i; 208 int i;
124 209
125 print_deviceid(&dsaddr->deviceid.de_id); 210 print_deviceid(&dsaddr->deviceid);
126 211
127 for (i = 0; i < dsaddr->ds_num; i++) { 212 for (i = 0; i < dsaddr->ds_num; i++) {
128 ds = dsaddr->ds_list[i]; 213 ds = dsaddr->ds_list[i];
@@ -139,15 +224,6 @@ nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
139 kfree(dsaddr); 224 kfree(dsaddr);
140} 225}
141 226
142void
143nfs4_fl_free_deviceid_callback(struct pnfs_deviceid_node *device)
144{
145 struct nfs4_file_layout_dsaddr *dsaddr =
146 container_of(device, struct nfs4_file_layout_dsaddr, deviceid);
147
148 nfs4_fl_free_deviceid(dsaddr);
149}
150
151static struct nfs4_pnfs_ds * 227static struct nfs4_pnfs_ds *
152nfs4_pnfs_ds_add(struct inode *inode, u32 ip_addr, u32 port) 228nfs4_pnfs_ds_add(struct inode *inode, u32 ip_addr, u32 port)
153{ 229{
@@ -185,7 +261,7 @@ out:
185 * Currently only support ipv4, and one multi-path address. 261 * Currently only support ipv4, and one multi-path address.
186 */ 262 */
187static struct nfs4_pnfs_ds * 263static struct nfs4_pnfs_ds *
188decode_and_add_ds(__be32 **pp, struct inode *inode) 264decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode)
189{ 265{
190 struct nfs4_pnfs_ds *ds = NULL; 266 struct nfs4_pnfs_ds *ds = NULL;
191 char *buf; 267 char *buf;
@@ -193,25 +269,34 @@ decode_and_add_ds(__be32 **pp, struct inode *inode)
193 u32 ip_addr, port; 269 u32 ip_addr, port;
194 int nlen, rlen, i; 270 int nlen, rlen, i;
195 int tmp[2]; 271 int tmp[2];
196 __be32 *r_netid, *r_addr, *p = *pp; 272 __be32 *p;
197 273
198 /* r_netid */ 274 /* r_netid */
275 p = xdr_inline_decode(streamp, 4);
276 if (unlikely(!p))
277 goto out_err;
199 nlen = be32_to_cpup(p++); 278 nlen = be32_to_cpup(p++);
200 r_netid = p;
201 p += XDR_QUADLEN(nlen);
202 279
203 /* r_addr */ 280 p = xdr_inline_decode(streamp, nlen);
204 rlen = be32_to_cpup(p++); 281 if (unlikely(!p))
205 r_addr = p; 282 goto out_err;
206 p += XDR_QUADLEN(rlen);
207 *pp = p;
208 283
209 /* Check that netid is "tcp" */ 284 /* Check that netid is "tcp" */
210 if (nlen != 3 || memcmp((char *)r_netid, "tcp", 3)) { 285 if (nlen != 3 || memcmp((char *)p, "tcp", 3)) {
211 dprintk("%s: ERROR: non ipv4 TCP r_netid\n", __func__); 286 dprintk("%s: ERROR: non ipv4 TCP r_netid\n", __func__);
212 goto out_err; 287 goto out_err;
213 } 288 }
214 289
290 /* r_addr */
291 p = xdr_inline_decode(streamp, 4);
292 if (unlikely(!p))
293 goto out_err;
294 rlen = be32_to_cpup(p);
295
296 p = xdr_inline_decode(streamp, rlen);
297 if (unlikely(!p))
298 goto out_err;
299
215 /* ipv6 length plus port is legal */ 300 /* ipv6 length plus port is legal */
216 if (rlen > INET6_ADDRSTRLEN + 8) { 301 if (rlen > INET6_ADDRSTRLEN + 8) {
217 dprintk("%s: Invalid address, length %d\n", __func__, 302 dprintk("%s: Invalid address, length %d\n", __func__,
@@ -219,8 +304,12 @@ decode_and_add_ds(__be32 **pp, struct inode *inode)
219 goto out_err; 304 goto out_err;
220 } 305 }
221 buf = kmalloc(rlen + 1, GFP_KERNEL); 306 buf = kmalloc(rlen + 1, GFP_KERNEL);
307 if (!buf) {
308 dprintk("%s: Not enough memory\n", __func__);
309 goto out_err;
310 }
222 buf[rlen] = '\0'; 311 buf[rlen] = '\0';
223 memcpy(buf, r_addr, rlen); 312 memcpy(buf, p, rlen);
224 313
225 /* replace the port dots with dashes for the in4_pton() delimiter*/ 314 /* replace the port dots with dashes for the in4_pton() delimiter*/
226 for (i = 0; i < 2; i++) { 315 for (i = 0; i < 2; i++) {
@@ -256,118 +345,191 @@ out_err:
256static struct nfs4_file_layout_dsaddr* 345static struct nfs4_file_layout_dsaddr*
257decode_device(struct inode *ino, struct pnfs_device *pdev) 346decode_device(struct inode *ino, struct pnfs_device *pdev)
258{ 347{
259 int i, dummy; 348 int i;
260 u32 cnt, num; 349 u32 cnt, num;
261 u8 *indexp; 350 u8 *indexp;
262 __be32 *p = (__be32 *)pdev->area, *indicesp; 351 __be32 *p;
263 struct nfs4_file_layout_dsaddr *dsaddr; 352 u8 *stripe_indices;
353 u8 max_stripe_index;
354 struct nfs4_file_layout_dsaddr *dsaddr = NULL;
355 struct xdr_stream stream;
356 struct xdr_buf buf = {
357 .pages = pdev->pages,
358 .page_len = pdev->pglen,
359 .buflen = pdev->pglen,
360 .len = pdev->pglen,
361 };
362 struct page *scratch;
363
364 /* set up xdr stream */
365 scratch = alloc_page(GFP_KERNEL);
366 if (!scratch)
367 goto out_err;
368
369 xdr_init_decode(&stream, &buf, NULL);
370 xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
264 371
265 /* Get the stripe count (number of stripe index) */ 372 /* Get the stripe count (number of stripe index) */
266 cnt = be32_to_cpup(p++); 373 p = xdr_inline_decode(&stream, 4);
374 if (unlikely(!p))
375 goto out_err_free_scratch;
376
377 cnt = be32_to_cpup(p);
267 dprintk("%s stripe count %d\n", __func__, cnt); 378 dprintk("%s stripe count %d\n", __func__, cnt);
268 if (cnt > NFS4_PNFS_MAX_STRIPE_CNT) { 379 if (cnt > NFS4_PNFS_MAX_STRIPE_CNT) {
269 printk(KERN_WARNING "%s: stripe count %d greater than " 380 printk(KERN_WARNING "%s: stripe count %d greater than "
270 "supported maximum %d\n", __func__, 381 "supported maximum %d\n", __func__,
271 cnt, NFS4_PNFS_MAX_STRIPE_CNT); 382 cnt, NFS4_PNFS_MAX_STRIPE_CNT);
272 goto out_err; 383 goto out_err_free_scratch;
384 }
385
386 /* read stripe indices */
387 stripe_indices = kcalloc(cnt, sizeof(u8), GFP_KERNEL);
388 if (!stripe_indices)
389 goto out_err_free_scratch;
390
391 p = xdr_inline_decode(&stream, cnt << 2);
392 if (unlikely(!p))
393 goto out_err_free_stripe_indices;
394
395 indexp = &stripe_indices[0];
396 max_stripe_index = 0;
397 for (i = 0; i < cnt; i++) {
398 *indexp = be32_to_cpup(p++);
399 max_stripe_index = max(max_stripe_index, *indexp);
400 indexp++;
273 } 401 }
274 402
275 /* Check the multipath list count */ 403 /* Check the multipath list count */
276 indicesp = p; 404 p = xdr_inline_decode(&stream, 4);
277 p += XDR_QUADLEN(cnt << 2); 405 if (unlikely(!p))
278 num = be32_to_cpup(p++); 406 goto out_err_free_stripe_indices;
407
408 num = be32_to_cpup(p);
279 dprintk("%s ds_num %u\n", __func__, num); 409 dprintk("%s ds_num %u\n", __func__, num);
280 if (num > NFS4_PNFS_MAX_MULTI_CNT) { 410 if (num > NFS4_PNFS_MAX_MULTI_CNT) {
281 printk(KERN_WARNING "%s: multipath count %d greater than " 411 printk(KERN_WARNING "%s: multipath count %d greater than "
282 "supported maximum %d\n", __func__, 412 "supported maximum %d\n", __func__,
283 num, NFS4_PNFS_MAX_MULTI_CNT); 413 num, NFS4_PNFS_MAX_MULTI_CNT);
284 goto out_err; 414 goto out_err_free_stripe_indices;
415 }
416
417 /* validate stripe indices are all < num */
418 if (max_stripe_index >= num) {
419 printk(KERN_WARNING "%s: stripe index %u >= num ds %u\n",
420 __func__, max_stripe_index, num);
421 goto out_err_free_stripe_indices;
285 } 422 }
423
286 dsaddr = kzalloc(sizeof(*dsaddr) + 424 dsaddr = kzalloc(sizeof(*dsaddr) +
287 (sizeof(struct nfs4_pnfs_ds *) * (num - 1)), 425 (sizeof(struct nfs4_pnfs_ds *) * (num - 1)),
288 GFP_KERNEL); 426 GFP_KERNEL);
289 if (!dsaddr) 427 if (!dsaddr)
290 goto out_err; 428 goto out_err_free_stripe_indices;
291
292 dsaddr->stripe_indices = kzalloc(sizeof(u8) * cnt, GFP_KERNEL);
293 if (!dsaddr->stripe_indices)
294 goto out_err_free;
295 429
296 dsaddr->stripe_count = cnt; 430 dsaddr->stripe_count = cnt;
431 dsaddr->stripe_indices = stripe_indices;
432 stripe_indices = NULL;
297 dsaddr->ds_num = num; 433 dsaddr->ds_num = num;
298 434
299 memcpy(&dsaddr->deviceid.de_id, &pdev->dev_id, sizeof(pdev->dev_id)); 435 memcpy(&dsaddr->deviceid, &pdev->dev_id, sizeof(pdev->dev_id));
300
301 /* Go back an read stripe indices */
302 p = indicesp;
303 indexp = &dsaddr->stripe_indices[0];
304 for (i = 0; i < dsaddr->stripe_count; i++) {
305 *indexp = be32_to_cpup(p++);
306 if (*indexp >= num)
307 goto out_err_free;
308 indexp++;
309 }
310 /* Skip already read multipath list count */
311 p++;
312 436
313 for (i = 0; i < dsaddr->ds_num; i++) { 437 for (i = 0; i < dsaddr->ds_num; i++) {
314 int j; 438 int j;
439 u32 mp_count;
315 440
316 dummy = be32_to_cpup(p++); /* multipath count */ 441 p = xdr_inline_decode(&stream, 4);
317 if (dummy > 1) { 442 if (unlikely(!p))
443 goto out_err_free_deviceid;
444
445 mp_count = be32_to_cpup(p); /* multipath count */
446 if (mp_count > 1) {
318 printk(KERN_WARNING 447 printk(KERN_WARNING
319 "%s: Multipath count %d not supported, " 448 "%s: Multipath count %d not supported, "
320 "skipping all greater than 1\n", __func__, 449 "skipping all greater than 1\n", __func__,
321 dummy); 450 mp_count);
322 } 451 }
323 for (j = 0; j < dummy; j++) { 452 for (j = 0; j < mp_count; j++) {
324 if (j == 0) { 453 if (j == 0) {
325 dsaddr->ds_list[i] = decode_and_add_ds(&p, ino); 454 dsaddr->ds_list[i] = decode_and_add_ds(&stream,
455 ino);
326 if (dsaddr->ds_list[i] == NULL) 456 if (dsaddr->ds_list[i] == NULL)
327 goto out_err_free; 457 goto out_err_free_deviceid;
328 } else { 458 } else {
329 u32 len; 459 u32 len;
330 /* skip extra multipath */ 460 /* skip extra multipath */
331 len = be32_to_cpup(p++); 461
332 p += XDR_QUADLEN(len); 462 /* read len, skip */
333 len = be32_to_cpup(p++); 463 p = xdr_inline_decode(&stream, 4);
334 p += XDR_QUADLEN(len); 464 if (unlikely(!p))
335 continue; 465 goto out_err_free_deviceid;
466 len = be32_to_cpup(p);
467
468 p = xdr_inline_decode(&stream, len);
469 if (unlikely(!p))
470 goto out_err_free_deviceid;
471
472 /* read len, skip */
473 p = xdr_inline_decode(&stream, 4);
474 if (unlikely(!p))
475 goto out_err_free_deviceid;
476 len = be32_to_cpup(p);
477
478 p = xdr_inline_decode(&stream, len);
479 if (unlikely(!p))
480 goto out_err_free_deviceid;
336 } 481 }
337 } 482 }
338 } 483 }
484
485 __free_page(scratch);
339 return dsaddr; 486 return dsaddr;
340 487
341out_err_free: 488out_err_free_deviceid:
342 nfs4_fl_free_deviceid(dsaddr); 489 nfs4_fl_free_deviceid(dsaddr);
490 /* stripe_indicies was part of dsaddr */
491 goto out_err_free_scratch;
492out_err_free_stripe_indices:
493 kfree(stripe_indices);
494out_err_free_scratch:
495 __free_page(scratch);
343out_err: 496out_err:
344 dprintk("%s ERROR: returning NULL\n", __func__); 497 dprintk("%s ERROR: returning NULL\n", __func__);
345 return NULL; 498 return NULL;
346} 499}
347 500
348/* 501/*
349 * Decode the opaque device specified in 'dev' 502 * Decode the opaque device specified in 'dev' and add it to the cache of
350 * and add it to the list of available devices. 503 * available devices.
351 * If the deviceid is already cached, nfs4_add_deviceid will return
352 * a pointer to the cached struct and throw away the new.
353 */ 504 */
354static struct nfs4_file_layout_dsaddr* 505static struct nfs4_file_layout_dsaddr *
355decode_and_add_device(struct inode *inode, struct pnfs_device *dev) 506decode_and_add_device(struct inode *inode, struct pnfs_device *dev)
356{ 507{
357 struct nfs4_file_layout_dsaddr *dsaddr; 508 struct nfs4_file_layout_dsaddr *d, *new;
358 struct pnfs_deviceid_node *d; 509 long hash;
359 510
360 dsaddr = decode_device(inode, dev); 511 new = decode_device(inode, dev);
361 if (!dsaddr) { 512 if (!new) {
362 printk(KERN_WARNING "%s: Could not decode or add device\n", 513 printk(KERN_WARNING "%s: Could not decode or add device\n",
363 __func__); 514 __func__);
364 return NULL; 515 return NULL;
365 } 516 }
366 517
367 d = pnfs_add_deviceid(NFS_SERVER(inode)->nfs_client->cl_devid_cache, 518 spin_lock(&filelayout_deviceid_lock);
368 &dsaddr->deviceid); 519 d = nfs4_fl_find_get_deviceid(&new->deviceid);
520 if (d) {
521 spin_unlock(&filelayout_deviceid_lock);
522 nfs4_fl_free_deviceid(new);
523 return d;
524 }
369 525
370 return container_of(d, struct nfs4_file_layout_dsaddr, deviceid); 526 INIT_HLIST_NODE(&new->node);
527 atomic_set(&new->ref, 1);
528 hash = nfs4_fl_deviceid_hash(&new->deviceid);
529 hlist_add_head_rcu(&new->node, &filelayout_deviceid_cache[hash]);
530 spin_unlock(&filelayout_deviceid_lock);
531
532 return new;
371} 533}
372 534
373/* 535/*
@@ -409,11 +571,6 @@ get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id)
409 goto out_free; 571 goto out_free;
410 } 572 }
411 573
412 /* set pdev->area */
413 pdev->area = vmap(pages, max_pages, VM_MAP, PAGE_KERNEL);
414 if (!pdev->area)
415 goto out_free;
416
417 memcpy(&pdev->dev_id, dev_id, sizeof(*dev_id)); 574 memcpy(&pdev->dev_id, dev_id, sizeof(*dev_id));
418 pdev->layout_type = LAYOUT_NFSV4_1_FILES; 575 pdev->layout_type = LAYOUT_NFSV4_1_FILES;
419 pdev->pages = pages; 576 pdev->pages = pages;
@@ -432,8 +589,6 @@ get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id)
432 */ 589 */
433 dsaddr = decode_and_add_device(inode, pdev); 590 dsaddr = decode_and_add_device(inode, pdev);
434out_free: 591out_free:
435 if (pdev->area != NULL)
436 vunmap(pdev->area);
437 for (i = 0; i < max_pages; i++) 592 for (i = 0; i < max_pages; i++)
438 __free_page(pages[i]); 593 __free_page(pages[i]);
439 kfree(pages); 594 kfree(pages);
@@ -442,12 +597,123 @@ out_free:
442 return dsaddr; 597 return dsaddr;
443} 598}
444 599
600void
601nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
602{
603 if (atomic_dec_and_lock(&dsaddr->ref, &filelayout_deviceid_lock)) {
604 hlist_del_rcu(&dsaddr->node);
605 spin_unlock(&filelayout_deviceid_lock);
606
607 synchronize_rcu();
608 nfs4_fl_free_deviceid(dsaddr);
609 }
610}
611
445struct nfs4_file_layout_dsaddr * 612struct nfs4_file_layout_dsaddr *
446nfs4_fl_find_get_deviceid(struct nfs_client *clp, struct nfs4_deviceid *id) 613nfs4_fl_find_get_deviceid(struct nfs4_deviceid *id)
447{ 614{
448 struct pnfs_deviceid_node *d; 615 struct nfs4_file_layout_dsaddr *d;
616 struct hlist_node *n;
617 long hash = nfs4_fl_deviceid_hash(id);
618
619
620 rcu_read_lock();
621 hlist_for_each_entry_rcu(d, n, &filelayout_deviceid_cache[hash], node) {
622 if (!memcmp(&d->deviceid, id, sizeof(*id))) {
623 if (!atomic_inc_not_zero(&d->ref))
624 goto fail;
625 rcu_read_unlock();
626 return d;
627 }
628 }
629fail:
630 rcu_read_unlock();
631 return NULL;
632}
449 633
450 d = pnfs_find_get_deviceid(clp->cl_devid_cache, id); 634/*
451 return (d == NULL) ? NULL : 635 * Want res = (offset - layout->pattern_offset)/ layout->stripe_unit
452 container_of(d, struct nfs4_file_layout_dsaddr, deviceid); 636 * Then: ((res + fsi) % dsaddr->stripe_count)
637 */
638u32
639nfs4_fl_calc_j_index(struct pnfs_layout_segment *lseg, loff_t offset)
640{
641 struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg);
642 u64 tmp;
643
644 tmp = offset - flseg->pattern_offset;
645 do_div(tmp, flseg->stripe_unit);
646 tmp += flseg->first_stripe_index;
647 return do_div(tmp, flseg->dsaddr->stripe_count);
648}
649
650u32
651nfs4_fl_calc_ds_index(struct pnfs_layout_segment *lseg, u32 j)
652{
653 return FILELAYOUT_LSEG(lseg)->dsaddr->stripe_indices[j];
654}
655
656struct nfs_fh *
657nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j)
658{
659 struct nfs4_filelayout_segment *flseg = FILELAYOUT_LSEG(lseg);
660 u32 i;
661
662 if (flseg->stripe_type == STRIPE_SPARSE) {
663 if (flseg->num_fh == 1)
664 i = 0;
665 else if (flseg->num_fh == 0)
666 /* Use the MDS OPEN fh set in nfs_read_rpcsetup */
667 return NULL;
668 else
669 i = nfs4_fl_calc_ds_index(lseg, j);
670 } else
671 i = j;
672 return flseg->fh_array[i];
673}
674
675static void
676filelayout_mark_devid_negative(struct nfs4_file_layout_dsaddr *dsaddr,
677 int err, u32 ds_addr)
678{
679 u32 *p = (u32 *)&dsaddr->deviceid;
680
681 printk(KERN_ERR "NFS: data server %x connection error %d."
682 " Deviceid [%x%x%x%x] marked out of use.\n",
683 ds_addr, err, p[0], p[1], p[2], p[3]);
684
685 spin_lock(&filelayout_deviceid_lock);
686 dsaddr->flags |= NFS4_DEVICE_ID_NEG_ENTRY;
687 spin_unlock(&filelayout_deviceid_lock);
688}
689
690struct nfs4_pnfs_ds *
691nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
692{
693 struct nfs4_file_layout_dsaddr *dsaddr = FILELAYOUT_LSEG(lseg)->dsaddr;
694 struct nfs4_pnfs_ds *ds = dsaddr->ds_list[ds_idx];
695
696 if (ds == NULL) {
697 printk(KERN_ERR "%s: No data server for offset index %d\n",
698 __func__, ds_idx);
699 return NULL;
700 }
701
702 if (!ds->ds_clp) {
703 struct nfs_server *s = NFS_SERVER(lseg->pls_layout->plh_inode);
704 int err;
705
706 if (dsaddr->flags & NFS4_DEVICE_ID_NEG_ENTRY) {
707 /* Already tried to connect, don't try again */
708 dprintk("%s Deviceid marked out of use\n", __func__);
709 return NULL;
710 }
711 err = nfs4_ds_connect(s, ds);
712 if (err) {
713 filelayout_mark_devid_negative(dsaddr, err,
714 ntohl(ds->ds_ip_addr));
715 return NULL;
716 }
717 }
718 return ds;
453} 719}
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index 3c2a1724fbd2..bb80c49b6533 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -54,33 +54,29 @@ Elong:
54/* 54/*
55 * Determine the mount path as a string 55 * Determine the mount path as a string
56 */ 56 */
57static char *nfs4_path(const struct vfsmount *mnt_parent, 57static char *nfs4_path(struct dentry *dentry, char *buffer, ssize_t buflen)
58 const struct dentry *dentry,
59 char *buffer, ssize_t buflen)
60{ 58{
61 const char *srvpath; 59 char *limit;
62 60 char *path = nfs_path(&limit, dentry, buffer, buflen);
63 srvpath = strchr(mnt_parent->mnt_devname, ':'); 61 if (!IS_ERR(path)) {
64 if (srvpath) 62 char *colon = strchr(path, ':');
65 srvpath++; 63 if (colon && colon < limit)
66 else 64 path = colon + 1;
67 srvpath = mnt_parent->mnt_devname; 65 }
68 66 return path;
69 return nfs_path(srvpath, mnt_parent->mnt_root, dentry, buffer, buflen);
70} 67}
71 68
72/* 69/*
73 * Check that fs_locations::fs_root [RFC3530 6.3] is a prefix for what we 70 * Check that fs_locations::fs_root [RFC3530 6.3] is a prefix for what we
74 * believe to be the server path to this dentry 71 * believe to be the server path to this dentry
75 */ 72 */
76static int nfs4_validate_fspath(const struct vfsmount *mnt_parent, 73static int nfs4_validate_fspath(struct dentry *dentry,
77 const struct dentry *dentry,
78 const struct nfs4_fs_locations *locations, 74 const struct nfs4_fs_locations *locations,
79 char *page, char *page2) 75 char *page, char *page2)
80{ 76{
81 const char *path, *fs_path; 77 const char *path, *fs_path;
82 78
83 path = nfs4_path(mnt_parent, dentry, page, PAGE_SIZE); 79 path = nfs4_path(dentry, page, PAGE_SIZE);
84 if (IS_ERR(path)) 80 if (IS_ERR(path))
85 return PTR_ERR(path); 81 return PTR_ERR(path);
86 82
@@ -165,20 +161,18 @@ static struct vfsmount *try_location(struct nfs_clone_mount *mountdata,
165 161
166/** 162/**
167 * nfs_follow_referral - set up mountpoint when hitting a referral on moved error 163 * nfs_follow_referral - set up mountpoint when hitting a referral on moved error
168 * @mnt_parent - mountpoint of parent directory
169 * @dentry - parent directory 164 * @dentry - parent directory
170 * @locations - array of NFSv4 server location information 165 * @locations - array of NFSv4 server location information
171 * 166 *
172 */ 167 */
173static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent, 168static struct vfsmount *nfs_follow_referral(struct dentry *dentry,
174 const struct dentry *dentry,
175 const struct nfs4_fs_locations *locations) 169 const struct nfs4_fs_locations *locations)
176{ 170{
177 struct vfsmount *mnt = ERR_PTR(-ENOENT); 171 struct vfsmount *mnt = ERR_PTR(-ENOENT);
178 struct nfs_clone_mount mountdata = { 172 struct nfs_clone_mount mountdata = {
179 .sb = mnt_parent->mnt_sb, 173 .sb = dentry->d_sb,
180 .dentry = dentry, 174 .dentry = dentry,
181 .authflavor = NFS_SB(mnt_parent->mnt_sb)->client->cl_auth->au_flavor, 175 .authflavor = NFS_SB(dentry->d_sb)->client->cl_auth->au_flavor,
182 }; 176 };
183 char *page = NULL, *page2 = NULL; 177 char *page = NULL, *page2 = NULL;
184 int loc, error; 178 int loc, error;
@@ -198,7 +192,7 @@ static struct vfsmount *nfs_follow_referral(const struct vfsmount *mnt_parent,
198 goto out; 192 goto out;
199 193
200 /* Ensure fs path is a prefix of current dentry path */ 194 /* Ensure fs path is a prefix of current dentry path */
201 error = nfs4_validate_fspath(mnt_parent, dentry, locations, page, page2); 195 error = nfs4_validate_fspath(dentry, locations, page, page2);
202 if (error < 0) { 196 if (error < 0) {
203 mnt = ERR_PTR(error); 197 mnt = ERR_PTR(error);
204 goto out; 198 goto out;
@@ -225,11 +219,10 @@ out:
225 219
226/* 220/*
227 * nfs_do_refmount - handle crossing a referral on server 221 * nfs_do_refmount - handle crossing a referral on server
228 * @mnt_parent - mountpoint of referral
229 * @dentry - dentry of referral 222 * @dentry - dentry of referral
230 * 223 *
231 */ 224 */
232struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentry *dentry) 225struct vfsmount *nfs_do_refmount(struct dentry *dentry)
233{ 226{
234 struct vfsmount *mnt = ERR_PTR(-ENOMEM); 227 struct vfsmount *mnt = ERR_PTR(-ENOMEM);
235 struct dentry *parent; 228 struct dentry *parent;
@@ -262,7 +255,7 @@ struct vfsmount *nfs_do_refmount(const struct vfsmount *mnt_parent, struct dentr
262 fs_locations->fs_path.ncomponents <= 0) 255 fs_locations->fs_path.ncomponents <= 0)
263 goto out_free; 256 goto out_free;
264 257
265 mnt = nfs_follow_referral(mnt_parent, dentry, fs_locations); 258 mnt = nfs_follow_referral(dentry, fs_locations);
266out_free: 259out_free:
267 __free_page(page); 260 __free_page(page);
268 kfree(fs_locations); 261 kfree(fs_locations);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 78936a8f40ab..69c0f3c5ee7a 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -41,10 +41,12 @@
41#include <linux/string.h> 41#include <linux/string.h>
42#include <linux/slab.h> 42#include <linux/slab.h>
43#include <linux/sunrpc/clnt.h> 43#include <linux/sunrpc/clnt.h>
44#include <linux/sunrpc/gss_api.h>
44#include <linux/nfs.h> 45#include <linux/nfs.h>
45#include <linux/nfs4.h> 46#include <linux/nfs4.h>
46#include <linux/nfs_fs.h> 47#include <linux/nfs_fs.h>
47#include <linux/nfs_page.h> 48#include <linux/nfs_page.h>
49#include <linux/nfs_mount.h>
48#include <linux/namei.h> 50#include <linux/namei.h>
49#include <linux/mount.h> 51#include <linux/mount.h>
50#include <linux/module.h> 52#include <linux/module.h>
@@ -71,7 +73,9 @@ static int _nfs4_proc_open(struct nfs4_opendata *data);
71static int _nfs4_recover_proc_open(struct nfs4_opendata *data); 73static int _nfs4_recover_proc_open(struct nfs4_opendata *data);
72static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); 74static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *);
73static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *); 75static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *);
74static int _nfs4_proc_lookup(struct inode *dir, const struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr); 76static int _nfs4_proc_lookup(struct rpc_clnt *client, struct inode *dir,
77 const struct qstr *name, struct nfs_fh *fhandle,
78 struct nfs_fattr *fattr);
75static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr); 79static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr);
76static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, 80static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
77 struct nfs_fattr *fattr, struct iattr *sattr, 81 struct nfs_fattr *fattr, struct iattr *sattr,
@@ -85,6 +89,11 @@ static int nfs4_map_errors(int err)
85 switch (err) { 89 switch (err) {
86 case -NFS4ERR_RESOURCE: 90 case -NFS4ERR_RESOURCE:
87 return -EREMOTEIO; 91 return -EREMOTEIO;
92 case -NFS4ERR_WRONGSEC:
93 return -EPERM;
94 case -NFS4ERR_BADOWNER:
95 case -NFS4ERR_BADNAME:
96 return -EINVAL;
88 default: 97 default:
89 dprintk("%s could not handle NFSv4 error %d\n", 98 dprintk("%s could not handle NFSv4 error %d\n",
90 __func__, -err); 99 __func__, -err);
@@ -241,7 +250,7 @@ static int nfs4_delay(struct rpc_clnt *clnt, long *timeout)
241/* This is the error handling routine for processes that are allowed 250/* This is the error handling routine for processes that are allowed
242 * to sleep. 251 * to sleep.
243 */ 252 */
244static int nfs4_handle_exception(const struct nfs_server *server, int errorcode, struct nfs4_exception *exception) 253static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_exception *exception)
245{ 254{
246 struct nfs_client *clp = server->nfs_client; 255 struct nfs_client *clp = server->nfs_client;
247 struct nfs4_state *state = exception->state; 256 struct nfs4_state *state = exception->state;
@@ -256,12 +265,13 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode,
256 case -NFS4ERR_OPENMODE: 265 case -NFS4ERR_OPENMODE:
257 if (state == NULL) 266 if (state == NULL)
258 break; 267 break;
259 nfs4_state_mark_reclaim_nograce(clp, state); 268 nfs4_schedule_stateid_recovery(server, state);
260 goto do_state_recovery; 269 goto wait_on_recovery;
261 case -NFS4ERR_STALE_STATEID: 270 case -NFS4ERR_STALE_STATEID:
262 case -NFS4ERR_STALE_CLIENTID: 271 case -NFS4ERR_STALE_CLIENTID:
263 case -NFS4ERR_EXPIRED: 272 case -NFS4ERR_EXPIRED:
264 goto do_state_recovery; 273 nfs4_schedule_lease_recovery(clp);
274 goto wait_on_recovery;
265#if defined(CONFIG_NFS_V4_1) 275#if defined(CONFIG_NFS_V4_1)
266 case -NFS4ERR_BADSESSION: 276 case -NFS4ERR_BADSESSION:
267 case -NFS4ERR_BADSLOT: 277 case -NFS4ERR_BADSLOT:
@@ -272,7 +282,7 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode,
272 case -NFS4ERR_SEQ_MISORDERED: 282 case -NFS4ERR_SEQ_MISORDERED:
273 dprintk("%s ERROR: %d Reset session\n", __func__, 283 dprintk("%s ERROR: %d Reset session\n", __func__,
274 errorcode); 284 errorcode);
275 nfs4_schedule_state_recovery(clp); 285 nfs4_schedule_session_recovery(clp->cl_session);
276 exception->retry = 1; 286 exception->retry = 1;
277 break; 287 break;
278#endif /* defined(CONFIG_NFS_V4_1) */ 288#endif /* defined(CONFIG_NFS_V4_1) */
@@ -292,11 +302,23 @@ static int nfs4_handle_exception(const struct nfs_server *server, int errorcode,
292 break; 302 break;
293 case -NFS4ERR_OLD_STATEID: 303 case -NFS4ERR_OLD_STATEID:
294 exception->retry = 1; 304 exception->retry = 1;
305 break;
306 case -NFS4ERR_BADOWNER:
307 /* The following works around a Linux server bug! */
308 case -NFS4ERR_BADNAME:
309 if (server->caps & NFS_CAP_UIDGID_NOMAP) {
310 server->caps &= ~NFS_CAP_UIDGID_NOMAP;
311 exception->retry = 1;
312 printk(KERN_WARNING "NFS: v4 server %s "
313 "does not accept raw "
314 "uid/gids. "
315 "Reenabling the idmapper.\n",
316 server->nfs_client->cl_hostname);
317 }
295 } 318 }
296 /* We failed to handle the error */ 319 /* We failed to handle the error */
297 return nfs4_map_errors(ret); 320 return nfs4_map_errors(ret);
298do_state_recovery: 321wait_on_recovery:
299 nfs4_schedule_state_recovery(clp);
300 ret = nfs4_wait_clnt_recover(clp); 322 ret = nfs4_wait_clnt_recover(clp);
301 if (ret == 0) 323 if (ret == 0)
302 exception->retry = 1; 324 exception->retry = 1;
@@ -422,8 +444,8 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *
422 if (res->sr_status == 1) 444 if (res->sr_status == 1)
423 res->sr_status = NFS_OK; 445 res->sr_status = NFS_OK;
424 446
425 /* -ERESTARTSYS can result in skipping nfs41_sequence_setup */ 447 /* don't increment the sequence number if the task wasn't sent */
426 if (!res->sr_slot) 448 if (!RPC_WAS_SENT(task))
427 goto out; 449 goto out;
428 450
429 /* Check the SEQUENCE operation status */ 451 /* Check the SEQUENCE operation status */
@@ -435,8 +457,8 @@ static int nfs41_sequence_done(struct rpc_task *task, struct nfs4_sequence_res *
435 clp = res->sr_session->clp; 457 clp = res->sr_session->clp;
436 do_renew_lease(clp, timestamp); 458 do_renew_lease(clp, timestamp);
437 /* Check sequence flags */ 459 /* Check sequence flags */
438 if (atomic_read(&clp->cl_count) > 1) 460 if (res->sr_status_flags != 0)
439 nfs41_handle_sequence_flag_errors(clp, res->sr_status_flags); 461 nfs4_schedule_lease_recovery(clp);
440 break; 462 break;
441 case -NFS4ERR_DELAY: 463 case -NFS4ERR_DELAY:
442 /* The server detected a resend of the RPC call and 464 /* The server detected a resend of the RPC call and
@@ -505,7 +527,7 @@ out:
505 return ret_id; 527 return ret_id;
506} 528}
507 529
508static int nfs41_setup_sequence(struct nfs4_session *session, 530int nfs41_setup_sequence(struct nfs4_session *session,
509 struct nfs4_sequence_args *args, 531 struct nfs4_sequence_args *args,
510 struct nfs4_sequence_res *res, 532 struct nfs4_sequence_res *res,
511 int cache_reply, 533 int cache_reply,
@@ -571,6 +593,7 @@ static int nfs41_setup_sequence(struct nfs4_session *session,
571 res->sr_status = 1; 593 res->sr_status = 1;
572 return 0; 594 return 0;
573} 595}
596EXPORT_SYMBOL_GPL(nfs41_setup_sequence);
574 597
575int nfs4_setup_sequence(const struct nfs_server *server, 598int nfs4_setup_sequence(const struct nfs_server *server,
576 struct nfs4_sequence_args *args, 599 struct nfs4_sequence_args *args,
@@ -640,7 +663,8 @@ struct rpc_call_ops nfs41_call_priv_sync_ops = {
640 .rpc_call_done = nfs41_call_sync_done, 663 .rpc_call_done = nfs41_call_sync_done,
641}; 664};
642 665
643static int nfs4_call_sync_sequence(struct nfs_server *server, 666static int nfs4_call_sync_sequence(struct rpc_clnt *clnt,
667 struct nfs_server *server,
644 struct rpc_message *msg, 668 struct rpc_message *msg,
645 struct nfs4_sequence_args *args, 669 struct nfs4_sequence_args *args,
646 struct nfs4_sequence_res *res, 670 struct nfs4_sequence_res *res,
@@ -656,7 +680,7 @@ static int nfs4_call_sync_sequence(struct nfs_server *server,
656 .cache_reply = cache_reply, 680 .cache_reply = cache_reply,
657 }; 681 };
658 struct rpc_task_setup task_setup = { 682 struct rpc_task_setup task_setup = {
659 .rpc_client = server->client, 683 .rpc_client = clnt,
660 .rpc_message = msg, 684 .rpc_message = msg,
661 .callback_ops = &nfs41_call_sync_ops, 685 .callback_ops = &nfs41_call_sync_ops,
662 .callback_data = &data 686 .callback_data = &data
@@ -675,13 +699,14 @@ static int nfs4_call_sync_sequence(struct nfs_server *server,
675 return ret; 699 return ret;
676} 700}
677 701
678int _nfs4_call_sync_session(struct nfs_server *server, 702int _nfs4_call_sync_session(struct rpc_clnt *clnt,
703 struct nfs_server *server,
679 struct rpc_message *msg, 704 struct rpc_message *msg,
680 struct nfs4_sequence_args *args, 705 struct nfs4_sequence_args *args,
681 struct nfs4_sequence_res *res, 706 struct nfs4_sequence_res *res,
682 int cache_reply) 707 int cache_reply)
683{ 708{
684 return nfs4_call_sync_sequence(server, msg, args, res, cache_reply, 0); 709 return nfs4_call_sync_sequence(clnt, server, msg, args, res, cache_reply, 0);
685} 710}
686 711
687#else 712#else
@@ -692,19 +717,28 @@ static int nfs4_sequence_done(struct rpc_task *task,
692} 717}
693#endif /* CONFIG_NFS_V4_1 */ 718#endif /* CONFIG_NFS_V4_1 */
694 719
695int _nfs4_call_sync(struct nfs_server *server, 720int _nfs4_call_sync(struct rpc_clnt *clnt,
721 struct nfs_server *server,
696 struct rpc_message *msg, 722 struct rpc_message *msg,
697 struct nfs4_sequence_args *args, 723 struct nfs4_sequence_args *args,
698 struct nfs4_sequence_res *res, 724 struct nfs4_sequence_res *res,
699 int cache_reply) 725 int cache_reply)
700{ 726{
701 args->sa_session = res->sr_session = NULL; 727 args->sa_session = res->sr_session = NULL;
702 return rpc_call_sync(server->client, msg, 0); 728 return rpc_call_sync(clnt, msg, 0);
703} 729}
704 730
705#define nfs4_call_sync(server, msg, args, res, cache_reply) \ 731static inline
706 (server)->nfs_client->cl_mvops->call_sync((server), (msg), &(args)->seq_args, \ 732int nfs4_call_sync(struct rpc_clnt *clnt,
707 &(res)->seq_res, (cache_reply)) 733 struct nfs_server *server,
734 struct rpc_message *msg,
735 struct nfs4_sequence_args *args,
736 struct nfs4_sequence_res *res,
737 int cache_reply)
738{
739 return server->nfs_client->cl_mvops->call_sync(clnt, server, msg,
740 args, res, cache_reply);
741}
708 742
709static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo) 743static void update_changeattr(struct inode *dir, struct nfs4_change_info *cinfo)
710{ 744{
@@ -1255,14 +1289,13 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state
1255 case -NFS4ERR_BAD_HIGH_SLOT: 1289 case -NFS4ERR_BAD_HIGH_SLOT:
1256 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: 1290 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
1257 case -NFS4ERR_DEADSESSION: 1291 case -NFS4ERR_DEADSESSION:
1258 nfs4_schedule_state_recovery( 1292 nfs4_schedule_session_recovery(server->nfs_client->cl_session);
1259 server->nfs_client);
1260 goto out; 1293 goto out;
1261 case -NFS4ERR_STALE_CLIENTID: 1294 case -NFS4ERR_STALE_CLIENTID:
1262 case -NFS4ERR_STALE_STATEID: 1295 case -NFS4ERR_STALE_STATEID:
1263 case -NFS4ERR_EXPIRED: 1296 case -NFS4ERR_EXPIRED:
1264 /* Don't recall a delegation if it was lost */ 1297 /* Don't recall a delegation if it was lost */
1265 nfs4_schedule_state_recovery(server->nfs_client); 1298 nfs4_schedule_lease_recovery(server->nfs_client);
1266 goto out; 1299 goto out;
1267 case -ERESTARTSYS: 1300 case -ERESTARTSYS:
1268 /* 1301 /*
@@ -1271,7 +1304,7 @@ int nfs4_open_delegation_recall(struct nfs_open_context *ctx, struct nfs4_state
1271 */ 1304 */
1272 case -NFS4ERR_ADMIN_REVOKED: 1305 case -NFS4ERR_ADMIN_REVOKED:
1273 case -NFS4ERR_BAD_STATEID: 1306 case -NFS4ERR_BAD_STATEID:
1274 nfs4_state_mark_reclaim_nograce(server->nfs_client, state); 1307 nfs4_schedule_stateid_recovery(server, state);
1275 case -EKEYEXPIRED: 1308 case -EKEYEXPIRED:
1276 /* 1309 /*
1277 * User RPCSEC_GSS context has expired. 1310 * User RPCSEC_GSS context has expired.
@@ -1574,9 +1607,8 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
1574 return 0; 1607 return 0;
1575} 1608}
1576 1609
1577static int nfs4_recover_expired_lease(struct nfs_server *server) 1610static int nfs4_client_recover_expired_lease(struct nfs_client *clp)
1578{ 1611{
1579 struct nfs_client *clp = server->nfs_client;
1580 unsigned int loop; 1612 unsigned int loop;
1581 int ret; 1613 int ret;
1582 1614
@@ -1587,12 +1619,17 @@ static int nfs4_recover_expired_lease(struct nfs_server *server)
1587 if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) && 1619 if (!test_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) &&
1588 !test_bit(NFS4CLNT_CHECK_LEASE,&clp->cl_state)) 1620 !test_bit(NFS4CLNT_CHECK_LEASE,&clp->cl_state))
1589 break; 1621 break;
1590 nfs4_schedule_state_recovery(clp); 1622 nfs4_schedule_state_manager(clp);
1591 ret = -EIO; 1623 ret = -EIO;
1592 } 1624 }
1593 return ret; 1625 return ret;
1594} 1626}
1595 1627
1628static int nfs4_recover_expired_lease(struct nfs_server *server)
1629{
1630 return nfs4_client_recover_expired_lease(server->nfs_client);
1631}
1632
1596/* 1633/*
1597 * OPEN_EXPIRED: 1634 * OPEN_EXPIRED:
1598 * reclaim state on the server after a network partition. 1635 * reclaim state on the server after a network partition.
@@ -1811,7 +1848,7 @@ static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
1811 } else 1848 } else
1812 memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid)); 1849 memcpy(&arg.stateid, &zero_stateid, sizeof(arg.stateid));
1813 1850
1814 status = nfs4_call_sync(server, &msg, &arg, &res, 1); 1851 status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
1815 if (status == 0 && state != NULL) 1852 if (status == 0 && state != NULL)
1816 renew_lease(server, timestamp); 1853 renew_lease(server, timestamp);
1817 return status; 1854 return status;
@@ -2070,7 +2107,7 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
2070 }; 2107 };
2071 int status; 2108 int status;
2072 2109
2073 status = nfs4_call_sync(server, &msg, &args, &res, 0); 2110 status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
2074 if (status == 0) { 2111 if (status == 0) {
2075 memcpy(server->attr_bitmask, res.attr_bitmask, sizeof(server->attr_bitmask)); 2112 memcpy(server->attr_bitmask, res.attr_bitmask, sizeof(server->attr_bitmask));
2076 server->caps &= ~(NFS_CAP_ACLS|NFS_CAP_HARDLINKS| 2113 server->caps &= ~(NFS_CAP_ACLS|NFS_CAP_HARDLINKS|
@@ -2140,7 +2177,7 @@ static int _nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle,
2140 }; 2177 };
2141 2178
2142 nfs_fattr_init(info->fattr); 2179 nfs_fattr_init(info->fattr);
2143 return nfs4_call_sync(server, &msg, &args, &res, 0); 2180 return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
2144} 2181}
2145 2182
2146static int nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle, 2183static int nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle,
@@ -2149,22 +2186,75 @@ static int nfs4_lookup_root(struct nfs_server *server, struct nfs_fh *fhandle,
2149 struct nfs4_exception exception = { }; 2186 struct nfs4_exception exception = { };
2150 int err; 2187 int err;
2151 do { 2188 do {
2152 err = nfs4_handle_exception(server, 2189 err = _nfs4_lookup_root(server, fhandle, info);
2153 _nfs4_lookup_root(server, fhandle, info), 2190 switch (err) {
2154 &exception); 2191 case 0:
2192 case -NFS4ERR_WRONGSEC:
2193 break;
2194 default:
2195 err = nfs4_handle_exception(server, err, &exception);
2196 }
2155 } while (exception.retry); 2197 } while (exception.retry);
2156 return err; 2198 return err;
2157} 2199}
2158 2200
2201static int nfs4_lookup_root_sec(struct nfs_server *server, struct nfs_fh *fhandle,
2202 struct nfs_fsinfo *info, rpc_authflavor_t flavor)
2203{
2204 struct rpc_auth *auth;
2205 int ret;
2206
2207 auth = rpcauth_create(flavor, server->client);
2208 if (!auth) {
2209 ret = -EIO;
2210 goto out;
2211 }
2212 ret = nfs4_lookup_root(server, fhandle, info);
2213out:
2214 return ret;
2215}
2216
2217static int nfs4_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle,
2218 struct nfs_fsinfo *info)
2219{
2220 int i, len, status = 0;
2221 rpc_authflavor_t flav_array[NFS_MAX_SECFLAVORS];
2222
2223 len = gss_mech_list_pseudoflavors(&flav_array[0]);
2224 flav_array[len] = RPC_AUTH_NULL;
2225 len += 1;
2226
2227 for (i = 0; i < len; i++) {
2228 status = nfs4_lookup_root_sec(server, fhandle, info, flav_array[i]);
2229 if (status == -NFS4ERR_WRONGSEC || status == -EACCES)
2230 continue;
2231 break;
2232 }
2233 /*
2234 * -EACCESS could mean that the user doesn't have correct permissions
2235 * to access the mount. It could also mean that we tried to mount
2236 * with a gss auth flavor, but rpc.gssd isn't running. Either way,
2237 * existing mount programs don't handle -EACCES very well so it should
2238 * be mapped to -EPERM instead.
2239 */
2240 if (status == -EACCES)
2241 status = -EPERM;
2242 return status;
2243}
2244
2159/* 2245/*
2160 * get the file handle for the "/" directory on the server 2246 * get the file handle for the "/" directory on the server
2161 */ 2247 */
2162static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, 2248static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
2163 struct nfs_fsinfo *info) 2249 struct nfs_fsinfo *info)
2164{ 2250{
2165 int status; 2251 int status = nfs4_lookup_root(server, fhandle, info);
2166 2252 if ((status == -NFS4ERR_WRONGSEC) && !(server->flags & NFS_MOUNT_SECFLAVOUR))
2167 status = nfs4_lookup_root(server, fhandle, info); 2253 /*
2254 * A status of -NFS4ERR_WRONGSEC will be mapped to -EPERM
2255 * by nfs4_map_errors() as this function exits.
2256 */
2257 status = nfs4_find_root_sec(server, fhandle, info);
2168 if (status == 0) 2258 if (status == 0)
2169 status = nfs4_server_capabilities(server, fhandle); 2259 status = nfs4_server_capabilities(server, fhandle);
2170 if (status == 0) 2260 if (status == 0)
@@ -2229,7 +2319,7 @@ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
2229 }; 2319 };
2230 2320
2231 nfs_fattr_init(fattr); 2321 nfs_fattr_init(fattr);
2232 return nfs4_call_sync(server, &msg, &args, &res, 0); 2322 return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
2233} 2323}
2234 2324
2235static int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr) 2325static int nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr)
@@ -2289,9 +2379,9 @@ nfs4_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
2289 return status; 2379 return status;
2290} 2380}
2291 2381
2292static int _nfs4_proc_lookupfh(struct nfs_server *server, const struct nfs_fh *dirfh, 2382static int _nfs4_proc_lookupfh(struct rpc_clnt *clnt, struct nfs_server *server,
2293 const struct qstr *name, struct nfs_fh *fhandle, 2383 const struct nfs_fh *dirfh, const struct qstr *name,
2294 struct nfs_fattr *fattr) 2384 struct nfs_fh *fhandle, struct nfs_fattr *fattr)
2295{ 2385{
2296 int status; 2386 int status;
2297 struct nfs4_lookup_arg args = { 2387 struct nfs4_lookup_arg args = {
@@ -2313,7 +2403,7 @@ static int _nfs4_proc_lookupfh(struct nfs_server *server, const struct nfs_fh *d
2313 nfs_fattr_init(fattr); 2403 nfs_fattr_init(fattr);
2314 2404
2315 dprintk("NFS call lookupfh %s\n", name->name); 2405 dprintk("NFS call lookupfh %s\n", name->name);
2316 status = nfs4_call_sync(server, &msg, &args, &res, 0); 2406 status = nfs4_call_sync(clnt, server, &msg, &args.seq_args, &res.seq_res, 0);
2317 dprintk("NFS reply lookupfh: %d\n", status); 2407 dprintk("NFS reply lookupfh: %d\n", status);
2318 return status; 2408 return status;
2319} 2409}
@@ -2325,7 +2415,7 @@ static int nfs4_proc_lookupfh(struct nfs_server *server, struct nfs_fh *dirfh,
2325 struct nfs4_exception exception = { }; 2415 struct nfs4_exception exception = { };
2326 int err; 2416 int err;
2327 do { 2417 do {
2328 err = _nfs4_proc_lookupfh(server, dirfh, name, fhandle, fattr); 2418 err = _nfs4_proc_lookupfh(server->client, server, dirfh, name, fhandle, fattr);
2329 /* FIXME: !!!! */ 2419 /* FIXME: !!!! */
2330 if (err == -NFS4ERR_MOVED) { 2420 if (err == -NFS4ERR_MOVED) {
2331 err = -EREMOTE; 2421 err = -EREMOTE;
@@ -2336,27 +2426,41 @@ static int nfs4_proc_lookupfh(struct nfs_server *server, struct nfs_fh *dirfh,
2336 return err; 2426 return err;
2337} 2427}
2338 2428
2339static int _nfs4_proc_lookup(struct inode *dir, const struct qstr *name, 2429static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir,
2340 struct nfs_fh *fhandle, struct nfs_fattr *fattr) 2430 const struct qstr *name, struct nfs_fh *fhandle,
2431 struct nfs_fattr *fattr)
2341{ 2432{
2342 int status; 2433 int status;
2343 2434
2344 dprintk("NFS call lookup %s\n", name->name); 2435 dprintk("NFS call lookup %s\n", name->name);
2345 status = _nfs4_proc_lookupfh(NFS_SERVER(dir), NFS_FH(dir), name, fhandle, fattr); 2436 status = _nfs4_proc_lookupfh(clnt, NFS_SERVER(dir), NFS_FH(dir), name, fhandle, fattr);
2346 if (status == -NFS4ERR_MOVED) 2437 if (status == -NFS4ERR_MOVED)
2347 status = nfs4_get_referral(dir, name, fattr, fhandle); 2438 status = nfs4_get_referral(dir, name, fattr, fhandle);
2348 dprintk("NFS reply lookup: %d\n", status); 2439 dprintk("NFS reply lookup: %d\n", status);
2349 return status; 2440 return status;
2350} 2441}
2351 2442
2352static int nfs4_proc_lookup(struct inode *dir, struct qstr *name, struct nfs_fh *fhandle, struct nfs_fattr *fattr) 2443void nfs_fixup_secinfo_attributes(struct nfs_fattr *fattr, struct nfs_fh *fh)
2444{
2445 memset(fh, 0, sizeof(struct nfs_fh));
2446 fattr->fsid.major = 1;
2447 fattr->valid |= NFS_ATTR_FATTR_TYPE | NFS_ATTR_FATTR_MODE |
2448 NFS_ATTR_FATTR_NLINK | NFS_ATTR_FATTR_FSID | NFS_ATTR_FATTR_MOUNTPOINT;
2449 fattr->mode = S_IFDIR | S_IRUGO | S_IXUGO;
2450 fattr->nlink = 2;
2451}
2452
2453static int nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct qstr *name,
2454 struct nfs_fh *fhandle, struct nfs_fattr *fattr)
2353{ 2455{
2354 struct nfs4_exception exception = { }; 2456 struct nfs4_exception exception = { };
2355 int err; 2457 int err;
2356 do { 2458 do {
2357 err = nfs4_handle_exception(NFS_SERVER(dir), 2459 err = nfs4_handle_exception(NFS_SERVER(dir),
2358 _nfs4_proc_lookup(dir, name, fhandle, fattr), 2460 _nfs4_proc_lookup(clnt, dir, name, fhandle, fattr),
2359 &exception); 2461 &exception);
2462 if (err == -EPERM)
2463 nfs_fixup_secinfo_attributes(fattr, fhandle);
2360 } while (exception.retry); 2464 } while (exception.retry);
2361 return err; 2465 return err;
2362} 2466}
@@ -2401,7 +2505,7 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry
2401 if (res.fattr == NULL) 2505 if (res.fattr == NULL)
2402 return -ENOMEM; 2506 return -ENOMEM;
2403 2507
2404 status = nfs4_call_sync(server, &msg, &args, &res, 0); 2508 status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
2405 if (!status) { 2509 if (!status) {
2406 entry->mask = 0; 2510 entry->mask = 0;
2407 if (res.access & NFS4_ACCESS_READ) 2511 if (res.access & NFS4_ACCESS_READ)
@@ -2468,7 +2572,7 @@ static int _nfs4_proc_readlink(struct inode *inode, struct page *page,
2468 .rpc_resp = &res, 2572 .rpc_resp = &res,
2469 }; 2573 };
2470 2574
2471 return nfs4_call_sync(NFS_SERVER(inode), &msg, &args, &res, 0); 2575 return nfs4_call_sync(NFS_SERVER(inode)->client, NFS_SERVER(inode), &msg, &args.seq_args, &res.seq_res, 0);
2472} 2576}
2473 2577
2474static int nfs4_proc_readlink(struct inode *inode, struct page *page, 2578static int nfs4_proc_readlink(struct inode *inode, struct page *page,
@@ -2557,7 +2661,7 @@ static int _nfs4_proc_remove(struct inode *dir, struct qstr *name)
2557 if (res.dir_attr == NULL) 2661 if (res.dir_attr == NULL)
2558 goto out; 2662 goto out;
2559 2663
2560 status = nfs4_call_sync(server, &msg, &args, &res, 1); 2664 status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 1);
2561 if (status == 0) { 2665 if (status == 0) {
2562 update_changeattr(dir, &res.cinfo); 2666 update_changeattr(dir, &res.cinfo);
2563 nfs_post_op_update_inode(dir, res.dir_attr); 2667 nfs_post_op_update_inode(dir, res.dir_attr);
@@ -2658,7 +2762,7 @@ static int _nfs4_proc_rename(struct inode *old_dir, struct qstr *old_name,
2658 if (res.old_fattr == NULL || res.new_fattr == NULL) 2762 if (res.old_fattr == NULL || res.new_fattr == NULL)
2659 goto out; 2763 goto out;
2660 2764
2661 status = nfs4_call_sync(server, &msg, &arg, &res, 1); 2765 status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
2662 if (!status) { 2766 if (!status) {
2663 update_changeattr(old_dir, &res.old_cinfo); 2767 update_changeattr(old_dir, &res.old_cinfo);
2664 nfs_post_op_update_inode(old_dir, res.old_fattr); 2768 nfs_post_op_update_inode(old_dir, res.old_fattr);
@@ -2709,7 +2813,7 @@ static int _nfs4_proc_link(struct inode *inode, struct inode *dir, struct qstr *
2709 if (res.fattr == NULL || res.dir_attr == NULL) 2813 if (res.fattr == NULL || res.dir_attr == NULL)
2710 goto out; 2814 goto out;
2711 2815
2712 status = nfs4_call_sync(server, &msg, &arg, &res, 1); 2816 status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
2713 if (!status) { 2817 if (!status) {
2714 update_changeattr(dir, &res.cinfo); 2818 update_changeattr(dir, &res.cinfo);
2715 nfs_post_op_update_inode(dir, res.dir_attr); 2819 nfs_post_op_update_inode(dir, res.dir_attr);
@@ -2772,8 +2876,8 @@ static struct nfs4_createdata *nfs4_alloc_createdata(struct inode *dir,
2772 2876
2773static int nfs4_do_create(struct inode *dir, struct dentry *dentry, struct nfs4_createdata *data) 2877static int nfs4_do_create(struct inode *dir, struct dentry *dentry, struct nfs4_createdata *data)
2774{ 2878{
2775 int status = nfs4_call_sync(NFS_SERVER(dir), &data->msg, 2879 int status = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &data->msg,
2776 &data->arg, &data->res, 1); 2880 &data->arg.seq_args, &data->res.seq_res, 1);
2777 if (status == 0) { 2881 if (status == 0) {
2778 update_changeattr(dir, &data->res.dir_cinfo); 2882 update_changeattr(dir, &data->res.dir_cinfo);
2779 nfs_post_op_update_inode(dir, data->res.dir_fattr); 2883 nfs_post_op_update_inode(dir, data->res.dir_fattr);
@@ -2885,7 +2989,7 @@ static int _nfs4_proc_readdir(struct dentry *dentry, struct rpc_cred *cred,
2885 (unsigned long long)cookie); 2989 (unsigned long long)cookie);
2886 nfs4_setup_readdir(cookie, NFS_COOKIEVERF(dir), dentry, &args); 2990 nfs4_setup_readdir(cookie, NFS_COOKIEVERF(dir), dentry, &args);
2887 res.pgbase = args.pgbase; 2991 res.pgbase = args.pgbase;
2888 status = nfs4_call_sync(NFS_SERVER(dir), &msg, &args, &res, 0); 2992 status = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &msg, &args.seq_args, &res.seq_res, 0);
2889 if (status >= 0) { 2993 if (status >= 0) {
2890 memcpy(NFS_COOKIEVERF(dir), res.verifier.data, NFS4_VERIFIER_SIZE); 2994 memcpy(NFS_COOKIEVERF(dir), res.verifier.data, NFS4_VERIFIER_SIZE);
2891 status += args.pgbase; 2995 status += args.pgbase;
@@ -2977,7 +3081,7 @@ static int _nfs4_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle,
2977 }; 3081 };
2978 3082
2979 nfs_fattr_init(fsstat->fattr); 3083 nfs_fattr_init(fsstat->fattr);
2980 return nfs4_call_sync(server, &msg, &args, &res, 0); 3084 return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
2981} 3085}
2982 3086
2983static int nfs4_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsstat *fsstat) 3087static int nfs4_proc_statfs(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsstat *fsstat)
@@ -3008,7 +3112,7 @@ static int _nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle,
3008 .rpc_resp = &res, 3112 .rpc_resp = &res,
3009 }; 3113 };
3010 3114
3011 return nfs4_call_sync(server, &msg, &args, &res, 0); 3115 return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
3012} 3116}
3013 3117
3014static int nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *fsinfo) 3118static int nfs4_do_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fsinfo *fsinfo)
@@ -3053,7 +3157,7 @@ static int _nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle
3053 } 3157 }
3054 3158
3055 nfs_fattr_init(pathconf->fattr); 3159 nfs_fattr_init(pathconf->fattr);
3056 return nfs4_call_sync(server, &msg, &args, &res, 0); 3160 return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
3057} 3161}
3058 3162
3059static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle, 3163static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
@@ -3070,15 +3174,10 @@ static int nfs4_proc_pathconf(struct nfs_server *server, struct nfs_fh *fhandle,
3070 return err; 3174 return err;
3071} 3175}
3072 3176
3073static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data) 3177static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_read_data *data)
3074{ 3178{
3075 struct nfs_server *server = NFS_SERVER(data->inode); 3179 struct nfs_server *server = NFS_SERVER(data->inode);
3076 3180
3077 dprintk("--> %s\n", __func__);
3078
3079 if (!nfs4_sequence_done(task, &data->res.seq_res))
3080 return -EAGAIN;
3081
3082 if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) { 3181 if (nfs4_async_handle_error(task, server, data->args.context->state) == -EAGAIN) {
3083 nfs_restart_rpc(task, server->nfs_client); 3182 nfs_restart_rpc(task, server->nfs_client);
3084 return -EAGAIN; 3183 return -EAGAIN;
@@ -3090,19 +3189,44 @@ static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data)
3090 return 0; 3189 return 0;
3091} 3190}
3092 3191
3192static int nfs4_read_done(struct rpc_task *task, struct nfs_read_data *data)
3193{
3194
3195 dprintk("--> %s\n", __func__);
3196
3197 if (!nfs4_sequence_done(task, &data->res.seq_res))
3198 return -EAGAIN;
3199
3200 return data->read_done_cb(task, data);
3201}
3202
3093static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg) 3203static void nfs4_proc_read_setup(struct nfs_read_data *data, struct rpc_message *msg)
3094{ 3204{
3095 data->timestamp = jiffies; 3205 data->timestamp = jiffies;
3206 data->read_done_cb = nfs4_read_done_cb;
3096 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ]; 3207 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ];
3097} 3208}
3098 3209
3099static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data) 3210/* Reset the the nfs_read_data to send the read to the MDS. */
3211void nfs4_reset_read(struct rpc_task *task, struct nfs_read_data *data)
3212{
3213 dprintk("%s Reset task for i/o through\n", __func__);
3214 put_lseg(data->lseg);
3215 data->lseg = NULL;
3216 /* offsets will differ in the dense stripe case */
3217 data->args.offset = data->mds_offset;
3218 data->ds_clp = NULL;
3219 data->args.fh = NFS_FH(data->inode);
3220 data->read_done_cb = nfs4_read_done_cb;
3221 task->tk_ops = data->mds_ops;
3222 rpc_task_reset_client(task, NFS_CLIENT(data->inode));
3223}
3224EXPORT_SYMBOL_GPL(nfs4_reset_read);
3225
3226static int nfs4_write_done_cb(struct rpc_task *task, struct nfs_write_data *data)
3100{ 3227{
3101 struct inode *inode = data->inode; 3228 struct inode *inode = data->inode;
3102 3229
3103 if (!nfs4_sequence_done(task, &data->res.seq_res))
3104 return -EAGAIN;
3105
3106 if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) { 3230 if (nfs4_async_handle_error(task, NFS_SERVER(inode), data->args.context->state) == -EAGAIN) {
3107 nfs_restart_rpc(task, NFS_SERVER(inode)->nfs_client); 3231 nfs_restart_rpc(task, NFS_SERVER(inode)->nfs_client);
3108 return -EAGAIN; 3232 return -EAGAIN;
@@ -3114,23 +3238,50 @@ static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
3114 return 0; 3238 return 0;
3115} 3239}
3116 3240
3241static int nfs4_write_done(struct rpc_task *task, struct nfs_write_data *data)
3242{
3243 if (!nfs4_sequence_done(task, &data->res.seq_res))
3244 return -EAGAIN;
3245 return data->write_done_cb(task, data);
3246}
3247
3248/* Reset the the nfs_write_data to send the write to the MDS. */
3249void nfs4_reset_write(struct rpc_task *task, struct nfs_write_data *data)
3250{
3251 dprintk("%s Reset task for i/o through\n", __func__);
3252 put_lseg(data->lseg);
3253 data->lseg = NULL;
3254 data->ds_clp = NULL;
3255 data->write_done_cb = nfs4_write_done_cb;
3256 data->args.fh = NFS_FH(data->inode);
3257 data->args.bitmask = data->res.server->cache_consistency_bitmask;
3258 data->args.offset = data->mds_offset;
3259 data->res.fattr = &data->fattr;
3260 task->tk_ops = data->mds_ops;
3261 rpc_task_reset_client(task, NFS_CLIENT(data->inode));
3262}
3263EXPORT_SYMBOL_GPL(nfs4_reset_write);
3264
3117static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg) 3265static void nfs4_proc_write_setup(struct nfs_write_data *data, struct rpc_message *msg)
3118{ 3266{
3119 struct nfs_server *server = NFS_SERVER(data->inode); 3267 struct nfs_server *server = NFS_SERVER(data->inode);
3120 3268
3121 data->args.bitmask = server->cache_consistency_bitmask; 3269 if (data->lseg) {
3270 data->args.bitmask = NULL;
3271 data->res.fattr = NULL;
3272 } else
3273 data->args.bitmask = server->cache_consistency_bitmask;
3274 if (!data->write_done_cb)
3275 data->write_done_cb = nfs4_write_done_cb;
3122 data->res.server = server; 3276 data->res.server = server;
3123 data->timestamp = jiffies; 3277 data->timestamp = jiffies;
3124 3278
3125 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE]; 3279 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_WRITE];
3126} 3280}
3127 3281
3128static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data) 3282static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_write_data *data)
3129{ 3283{
3130 struct inode *inode = data->inode; 3284 struct inode *inode = data->inode;
3131
3132 if (!nfs4_sequence_done(task, &data->res.seq_res))
3133 return -EAGAIN;
3134 3285
3135 if (nfs4_async_handle_error(task, NFS_SERVER(inode), NULL) == -EAGAIN) { 3286 if (nfs4_async_handle_error(task, NFS_SERVER(inode), NULL) == -EAGAIN) {
3136 nfs_restart_rpc(task, NFS_SERVER(inode)->nfs_client); 3287 nfs_restart_rpc(task, NFS_SERVER(inode)->nfs_client);
@@ -3140,11 +3291,24 @@ static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data)
3140 return 0; 3291 return 0;
3141} 3292}
3142 3293
3294static int nfs4_commit_done(struct rpc_task *task, struct nfs_write_data *data)
3295{
3296 if (!nfs4_sequence_done(task, &data->res.seq_res))
3297 return -EAGAIN;
3298 return data->write_done_cb(task, data);
3299}
3300
3143static void nfs4_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg) 3301static void nfs4_proc_commit_setup(struct nfs_write_data *data, struct rpc_message *msg)
3144{ 3302{
3145 struct nfs_server *server = NFS_SERVER(data->inode); 3303 struct nfs_server *server = NFS_SERVER(data->inode);
3146 3304
3147 data->args.bitmask = server->cache_consistency_bitmask; 3305 if (data->lseg) {
3306 data->args.bitmask = NULL;
3307 data->res.fattr = NULL;
3308 } else
3309 data->args.bitmask = server->cache_consistency_bitmask;
3310 if (!data->write_done_cb)
3311 data->write_done_cb = nfs4_commit_done_cb;
3148 data->res.server = server; 3312 data->res.server = server;
3149 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT]; 3313 msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_COMMIT];
3150} 3314}
@@ -3178,7 +3342,7 @@ static void nfs4_renew_done(struct rpc_task *task, void *calldata)
3178 if (task->tk_status < 0) { 3342 if (task->tk_status < 0) {
3179 /* Unless we're shutting down, schedule state recovery! */ 3343 /* Unless we're shutting down, schedule state recovery! */
3180 if (test_bit(NFS_CS_RENEWD, &clp->cl_res_state) != 0) 3344 if (test_bit(NFS_CS_RENEWD, &clp->cl_res_state) != 0)
3181 nfs4_schedule_state_recovery(clp); 3345 nfs4_schedule_lease_recovery(clp);
3182 return; 3346 return;
3183 } 3347 }
3184 do_renew_lease(clp, timestamp); 3348 do_renew_lease(clp, timestamp);
@@ -3252,6 +3416,35 @@ static void buf_to_pages(const void *buf, size_t buflen,
3252 } 3416 }
3253} 3417}
3254 3418
3419static int buf_to_pages_noslab(const void *buf, size_t buflen,
3420 struct page **pages, unsigned int *pgbase)
3421{
3422 struct page *newpage, **spages;
3423 int rc = 0;
3424 size_t len;
3425 spages = pages;
3426
3427 do {
3428 len = min_t(size_t, PAGE_CACHE_SIZE, buflen);
3429 newpage = alloc_page(GFP_KERNEL);
3430
3431 if (newpage == NULL)
3432 goto unwind;
3433 memcpy(page_address(newpage), buf, len);
3434 buf += len;
3435 buflen -= len;
3436 *pages++ = newpage;
3437 rc++;
3438 } while (buflen != 0);
3439
3440 return rc;
3441
3442unwind:
3443 for(; rc > 0; rc--)
3444 __free_page(spages[rc-1]);
3445 return -ENOMEM;
3446}
3447
3255struct nfs4_cached_acl { 3448struct nfs4_cached_acl {
3256 int cached; 3449 int cached;
3257 size_t len; 3450 size_t len;
@@ -3353,7 +3546,7 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu
3353 resp_buf = buf; 3546 resp_buf = buf;
3354 buf_to_pages(buf, buflen, args.acl_pages, &args.acl_pgbase); 3547 buf_to_pages(buf, buflen, args.acl_pages, &args.acl_pgbase);
3355 } 3548 }
3356 ret = nfs4_call_sync(NFS_SERVER(inode), &msg, &args, &res, 0); 3549 ret = nfs4_call_sync(NFS_SERVER(inode)->client, NFS_SERVER(inode), &msg, &args.seq_args, &res.seq_res, 0);
3357 if (ret) 3550 if (ret)
3358 goto out_free; 3551 goto out_free;
3359 if (res.acl_len > args.acl_len) 3552 if (res.acl_len > args.acl_len)
@@ -3420,13 +3613,23 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl
3420 .rpc_argp = &arg, 3613 .rpc_argp = &arg,
3421 .rpc_resp = &res, 3614 .rpc_resp = &res,
3422 }; 3615 };
3423 int ret; 3616 int ret, i;
3424 3617
3425 if (!nfs4_server_supports_acls(server)) 3618 if (!nfs4_server_supports_acls(server))
3426 return -EOPNOTSUPP; 3619 return -EOPNOTSUPP;
3620 i = buf_to_pages_noslab(buf, buflen, arg.acl_pages, &arg.acl_pgbase);
3621 if (i < 0)
3622 return i;
3427 nfs_inode_return_delegation(inode); 3623 nfs_inode_return_delegation(inode);
3428 buf_to_pages(buf, buflen, arg.acl_pages, &arg.acl_pgbase); 3624 ret = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
3429 ret = nfs4_call_sync(server, &msg, &arg, &res, 1); 3625
3626 /*
3627 * Free each page after tx, so the only ref left is
3628 * held by the network stack
3629 */
3630 for (; i > 0; i--)
3631 put_page(pages[i-1]);
3632
3430 /* 3633 /*
3431 * Acl update can result in inode attribute update. 3634 * Acl update can result in inode attribute update.
3432 * so mark the attribute cache invalid. 3635 * so mark the attribute cache invalid.
@@ -3464,12 +3667,13 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
3464 case -NFS4ERR_OPENMODE: 3667 case -NFS4ERR_OPENMODE:
3465 if (state == NULL) 3668 if (state == NULL)
3466 break; 3669 break;
3467 nfs4_state_mark_reclaim_nograce(clp, state); 3670 nfs4_schedule_stateid_recovery(server, state);
3468 goto do_state_recovery; 3671 goto wait_on_recovery;
3469 case -NFS4ERR_STALE_STATEID: 3672 case -NFS4ERR_STALE_STATEID:
3470 case -NFS4ERR_STALE_CLIENTID: 3673 case -NFS4ERR_STALE_CLIENTID:
3471 case -NFS4ERR_EXPIRED: 3674 case -NFS4ERR_EXPIRED:
3472 goto do_state_recovery; 3675 nfs4_schedule_lease_recovery(clp);
3676 goto wait_on_recovery;
3473#if defined(CONFIG_NFS_V4_1) 3677#if defined(CONFIG_NFS_V4_1)
3474 case -NFS4ERR_BADSESSION: 3678 case -NFS4ERR_BADSESSION:
3475 case -NFS4ERR_BADSLOT: 3679 case -NFS4ERR_BADSLOT:
@@ -3480,7 +3684,7 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
3480 case -NFS4ERR_SEQ_MISORDERED: 3684 case -NFS4ERR_SEQ_MISORDERED:
3481 dprintk("%s ERROR %d, Reset session\n", __func__, 3685 dprintk("%s ERROR %d, Reset session\n", __func__,
3482 task->tk_status); 3686 task->tk_status);
3483 nfs4_schedule_state_recovery(clp); 3687 nfs4_schedule_session_recovery(clp->cl_session);
3484 task->tk_status = 0; 3688 task->tk_status = 0;
3485 return -EAGAIN; 3689 return -EAGAIN;
3486#endif /* CONFIG_NFS_V4_1 */ 3690#endif /* CONFIG_NFS_V4_1 */
@@ -3497,9 +3701,8 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
3497 } 3701 }
3498 task->tk_status = nfs4_map_errors(task->tk_status); 3702 task->tk_status = nfs4_map_errors(task->tk_status);
3499 return 0; 3703 return 0;
3500do_state_recovery: 3704wait_on_recovery:
3501 rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL); 3705 rpc_sleep_on(&clp->cl_rpcwaitq, task, NULL);
3502 nfs4_schedule_state_recovery(clp);
3503 if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) == 0) 3706 if (test_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) == 0)
3504 rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task); 3707 rpc_wake_up_queued_task(&clp->cl_rpcwaitq, task);
3505 task->tk_status = 0; 3708 task->tk_status = 0;
@@ -3548,21 +3751,20 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
3548 sizeof(setclientid.sc_uaddr), "%s.%u.%u", 3751 sizeof(setclientid.sc_uaddr), "%s.%u.%u",
3549 clp->cl_ipaddr, port >> 8, port & 255); 3752 clp->cl_ipaddr, port >> 8, port & 255);
3550 3753
3551 status = rpc_call_sync(clp->cl_rpcclient, &msg, 0); 3754 status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
3552 if (status != -NFS4ERR_CLID_INUSE) 3755 if (status != -NFS4ERR_CLID_INUSE)
3553 break; 3756 break;
3554 if (signalled()) 3757 if (loop != 0) {
3758 ++clp->cl_id_uniquifier;
3555 break; 3759 break;
3556 if (loop++ & 1) 3760 }
3557 ssleep(clp->cl_lease_time / HZ + 1); 3761 ++loop;
3558 else 3762 ssleep(clp->cl_lease_time / HZ + 1);
3559 if (++clp->cl_id_uniquifier == 0)
3560 break;
3561 } 3763 }
3562 return status; 3764 return status;
3563} 3765}
3564 3766
3565static int _nfs4_proc_setclientid_confirm(struct nfs_client *clp, 3767int nfs4_proc_setclientid_confirm(struct nfs_client *clp,
3566 struct nfs4_setclientid_res *arg, 3768 struct nfs4_setclientid_res *arg,
3567 struct rpc_cred *cred) 3769 struct rpc_cred *cred)
3568{ 3770{
@@ -3577,7 +3779,7 @@ static int _nfs4_proc_setclientid_confirm(struct nfs_client *clp,
3577 int status; 3779 int status;
3578 3780
3579 now = jiffies; 3781 now = jiffies;
3580 status = rpc_call_sync(clp->cl_rpcclient, &msg, 0); 3782 status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
3581 if (status == 0) { 3783 if (status == 0) {
3582 spin_lock(&clp->cl_lock); 3784 spin_lock(&clp->cl_lock);
3583 clp->cl_lease_time = fsinfo.lease_time * HZ; 3785 clp->cl_lease_time = fsinfo.lease_time * HZ;
@@ -3587,26 +3789,6 @@ static int _nfs4_proc_setclientid_confirm(struct nfs_client *clp,
3587 return status; 3789 return status;
3588} 3790}
3589 3791
3590int nfs4_proc_setclientid_confirm(struct nfs_client *clp,
3591 struct nfs4_setclientid_res *arg,
3592 struct rpc_cred *cred)
3593{
3594 long timeout = 0;
3595 int err;
3596 do {
3597 err = _nfs4_proc_setclientid_confirm(clp, arg, cred);
3598 switch (err) {
3599 case 0:
3600 return err;
3601 case -NFS4ERR_RESOURCE:
3602 /* The IBM lawyers misread another document! */
3603 case -NFS4ERR_DELAY:
3604 err = nfs4_delay(clp->cl_rpcclient, &timeout);
3605 }
3606 } while (err == 0);
3607 return err;
3608}
3609
3610struct nfs4_delegreturndata { 3792struct nfs4_delegreturndata {
3611 struct nfs4_delegreturnargs args; 3793 struct nfs4_delegreturnargs args;
3612 struct nfs4_delegreturnres res; 3794 struct nfs4_delegreturnres res;
@@ -3781,7 +3963,7 @@ static int _nfs4_proc_getlk(struct nfs4_state *state, int cmd, struct file_lock
3781 lsp = request->fl_u.nfs4_fl.owner; 3963 lsp = request->fl_u.nfs4_fl.owner;
3782 arg.lock_owner.id = lsp->ls_id.id; 3964 arg.lock_owner.id = lsp->ls_id.id;
3783 arg.lock_owner.s_dev = server->s_dev; 3965 arg.lock_owner.s_dev = server->s_dev;
3784 status = nfs4_call_sync(server, &msg, &arg, &res, 1); 3966 status = nfs4_call_sync(server->client, server, &msg, &arg.seq_args, &res.seq_res, 1);
3785 switch (status) { 3967 switch (status) {
3786 case 0: 3968 case 0:
3787 request->fl_type = F_UNLCK; 3969 request->fl_type = F_UNLCK;
@@ -4110,7 +4292,7 @@ static void nfs4_lock_release(void *calldata)
4110 task = nfs4_do_unlck(&data->fl, data->ctx, data->lsp, 4292 task = nfs4_do_unlck(&data->fl, data->ctx, data->lsp,
4111 data->arg.lock_seqid); 4293 data->arg.lock_seqid);
4112 if (!IS_ERR(task)) 4294 if (!IS_ERR(task))
4113 rpc_put_task(task); 4295 rpc_put_task_async(task);
4114 dprintk("%s: cancelling lock!\n", __func__); 4296 dprintk("%s: cancelling lock!\n", __func__);
4115 } else 4297 } else
4116 nfs_free_seqid(data->arg.lock_seqid); 4298 nfs_free_seqid(data->arg.lock_seqid);
@@ -4134,23 +4316,18 @@ static const struct rpc_call_ops nfs4_recover_lock_ops = {
4134 4316
4135static void nfs4_handle_setlk_error(struct nfs_server *server, struct nfs4_lock_state *lsp, int new_lock_owner, int error) 4317static void nfs4_handle_setlk_error(struct nfs_server *server, struct nfs4_lock_state *lsp, int new_lock_owner, int error)
4136{ 4318{
4137 struct nfs_client *clp = server->nfs_client;
4138 struct nfs4_state *state = lsp->ls_state;
4139
4140 switch (error) { 4319 switch (error) {
4141 case -NFS4ERR_ADMIN_REVOKED: 4320 case -NFS4ERR_ADMIN_REVOKED:
4142 case -NFS4ERR_BAD_STATEID: 4321 case -NFS4ERR_BAD_STATEID:
4143 case -NFS4ERR_EXPIRED: 4322 lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED;
4144 if (new_lock_owner != 0 || 4323 if (new_lock_owner != 0 ||
4145 (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0) 4324 (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0)
4146 nfs4_state_mark_reclaim_nograce(clp, state); 4325 nfs4_schedule_stateid_recovery(server, lsp->ls_state);
4147 lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED;
4148 break; 4326 break;
4149 case -NFS4ERR_STALE_STATEID: 4327 case -NFS4ERR_STALE_STATEID:
4150 if (new_lock_owner != 0 ||
4151 (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0)
4152 nfs4_state_mark_reclaim_reboot(clp, state);
4153 lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED; 4328 lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED;
4329 case -NFS4ERR_EXPIRED:
4330 nfs4_schedule_lease_recovery(server->nfs_client);
4154 }; 4331 };
4155} 4332}
4156 4333
@@ -4366,12 +4543,14 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl)
4366 case -NFS4ERR_EXPIRED: 4543 case -NFS4ERR_EXPIRED:
4367 case -NFS4ERR_STALE_CLIENTID: 4544 case -NFS4ERR_STALE_CLIENTID:
4368 case -NFS4ERR_STALE_STATEID: 4545 case -NFS4ERR_STALE_STATEID:
4546 nfs4_schedule_lease_recovery(server->nfs_client);
4547 goto out;
4369 case -NFS4ERR_BADSESSION: 4548 case -NFS4ERR_BADSESSION:
4370 case -NFS4ERR_BADSLOT: 4549 case -NFS4ERR_BADSLOT:
4371 case -NFS4ERR_BAD_HIGH_SLOT: 4550 case -NFS4ERR_BAD_HIGH_SLOT:
4372 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: 4551 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
4373 case -NFS4ERR_DEADSESSION: 4552 case -NFS4ERR_DEADSESSION:
4374 nfs4_schedule_state_recovery(server->nfs_client); 4553 nfs4_schedule_session_recovery(server->nfs_client->cl_session);
4375 goto out; 4554 goto out;
4376 case -ERESTARTSYS: 4555 case -ERESTARTSYS:
4377 /* 4556 /*
@@ -4381,7 +4560,7 @@ int nfs4_lock_delegation_recall(struct nfs4_state *state, struct file_lock *fl)
4381 case -NFS4ERR_ADMIN_REVOKED: 4560 case -NFS4ERR_ADMIN_REVOKED:
4382 case -NFS4ERR_BAD_STATEID: 4561 case -NFS4ERR_BAD_STATEID:
4383 case -NFS4ERR_OPENMODE: 4562 case -NFS4ERR_OPENMODE:
4384 nfs4_state_mark_reclaim_nograce(server->nfs_client, state); 4563 nfs4_schedule_stateid_recovery(server, state);
4385 err = 0; 4564 err = 0;
4386 goto out; 4565 goto out;
4387 case -EKEYEXPIRED: 4566 case -EKEYEXPIRED:
@@ -4512,12 +4691,46 @@ int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name,
4512 nfs_fattr_init(&fs_locations->fattr); 4691 nfs_fattr_init(&fs_locations->fattr);
4513 fs_locations->server = server; 4692 fs_locations->server = server;
4514 fs_locations->nlocations = 0; 4693 fs_locations->nlocations = 0;
4515 status = nfs4_call_sync(server, &msg, &args, &res, 0); 4694 status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
4516 nfs_fixup_referral_attributes(&fs_locations->fattr); 4695 nfs_fixup_referral_attributes(&fs_locations->fattr);
4517 dprintk("%s: returned status = %d\n", __func__, status); 4696 dprintk("%s: returned status = %d\n", __func__, status);
4518 return status; 4697 return status;
4519} 4698}
4520 4699
4700static int _nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct nfs4_secinfo_flavors *flavors)
4701{
4702 int status;
4703 struct nfs4_secinfo_arg args = {
4704 .dir_fh = NFS_FH(dir),
4705 .name = name,
4706 };
4707 struct nfs4_secinfo_res res = {
4708 .flavors = flavors,
4709 };
4710 struct rpc_message msg = {
4711 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SECINFO],
4712 .rpc_argp = &args,
4713 .rpc_resp = &res,
4714 };
4715
4716 dprintk("NFS call secinfo %s\n", name->name);
4717 status = nfs4_call_sync(NFS_SERVER(dir)->client, NFS_SERVER(dir), &msg, &args.seq_args, &res.seq_res, 0);
4718 dprintk("NFS reply secinfo: %d\n", status);
4719 return status;
4720}
4721
4722int nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct nfs4_secinfo_flavors *flavors)
4723{
4724 struct nfs4_exception exception = { };
4725 int err;
4726 do {
4727 err = nfs4_handle_exception(NFS_SERVER(dir),
4728 _nfs4_proc_secinfo(dir, name, flavors),
4729 &exception);
4730 } while (exception.retry);
4731 return err;
4732}
4733
4521#ifdef CONFIG_NFS_V4_1 4734#ifdef CONFIG_NFS_V4_1
4522/* 4735/*
4523 * Check the exchange flags returned by the server for invalid flags, having 4736 * Check the exchange flags returned by the server for invalid flags, having
@@ -4580,7 +4793,7 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
4580 init_utsname()->domainname, 4793 init_utsname()->domainname,
4581 clp->cl_rpcclient->cl_auth->au_flavor); 4794 clp->cl_rpcclient->cl_auth->au_flavor);
4582 4795
4583 status = rpc_call_sync(clp->cl_rpcclient, &msg, 0); 4796 status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
4584 if (!status) 4797 if (!status)
4585 status = nfs4_check_cl_exchange_flags(clp->cl_exchange_flags); 4798 status = nfs4_check_cl_exchange_flags(clp->cl_exchange_flags);
4586 dprintk("<-- %s status= %d\n", __func__, status); 4799 dprintk("<-- %s status= %d\n", __func__, status);
@@ -4663,7 +4876,8 @@ int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo)
4663 .rpc_client = clp->cl_rpcclient, 4876 .rpc_client = clp->cl_rpcclient,
4664 .rpc_message = &msg, 4877 .rpc_message = &msg,
4665 .callback_ops = &nfs4_get_lease_time_ops, 4878 .callback_ops = &nfs4_get_lease_time_ops,
4666 .callback_data = &data 4879 .callback_data = &data,
4880 .flags = RPC_TASK_TIMEOUT,
4667 }; 4881 };
4668 int status; 4882 int status;
4669 4883
@@ -4965,7 +5179,7 @@ static int _nfs4_proc_create_session(struct nfs_client *clp)
4965 nfs4_init_channel_attrs(&args); 5179 nfs4_init_channel_attrs(&args);
4966 args.flags = (SESSION4_PERSIST | SESSION4_BACK_CHAN); 5180 args.flags = (SESSION4_PERSIST | SESSION4_BACK_CHAN);
4967 5181
4968 status = rpc_call_sync(session->clp->cl_rpcclient, &msg, 0); 5182 status = rpc_call_sync(session->clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
4969 5183
4970 if (!status) 5184 if (!status)
4971 /* Verify the session's negotiated channel_attrs values */ 5185 /* Verify the session's negotiated channel_attrs values */
@@ -5032,7 +5246,7 @@ int nfs4_proc_destroy_session(struct nfs4_session *session)
5032 msg.rpc_argp = session; 5246 msg.rpc_argp = session;
5033 msg.rpc_resp = NULL; 5247 msg.rpc_resp = NULL;
5034 msg.rpc_cred = NULL; 5248 msg.rpc_cred = NULL;
5035 status = rpc_call_sync(session->clp->cl_rpcclient, &msg, 0); 5249 status = rpc_call_sync(session->clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
5036 5250
5037 if (status) 5251 if (status)
5038 printk(KERN_WARNING 5252 printk(KERN_WARNING
@@ -5073,6 +5287,27 @@ int nfs4_init_session(struct nfs_server *server)
5073 return ret; 5287 return ret;
5074} 5288}
5075 5289
5290int nfs4_init_ds_session(struct nfs_client *clp)
5291{
5292 struct nfs4_session *session = clp->cl_session;
5293 int ret;
5294
5295 if (!test_and_clear_bit(NFS4_SESSION_INITING, &session->session_state))
5296 return 0;
5297
5298 ret = nfs4_client_recover_expired_lease(clp);
5299 if (!ret)
5300 /* Test for the DS role */
5301 if (!is_ds_client(clp))
5302 ret = -ENODEV;
5303 if (!ret)
5304 ret = nfs4_check_client_ready(clp);
5305 return ret;
5306
5307}
5308EXPORT_SYMBOL_GPL(nfs4_init_ds_session);
5309
5310
5076/* 5311/*
5077 * Renew the cl_session lease. 5312 * Renew the cl_session lease.
5078 */ 5313 */
@@ -5100,7 +5335,7 @@ static int nfs41_sequence_handle_errors(struct rpc_task *task, struct nfs_client
5100 rpc_delay(task, NFS4_POLL_RETRY_MAX); 5335 rpc_delay(task, NFS4_POLL_RETRY_MAX);
5101 return -EAGAIN; 5336 return -EAGAIN;
5102 default: 5337 default:
5103 nfs4_schedule_state_recovery(clp); 5338 nfs4_schedule_lease_recovery(clp);
5104 } 5339 }
5105 return 0; 5340 return 0;
5106} 5341}
@@ -5187,7 +5422,7 @@ static int nfs41_proc_async_sequence(struct nfs_client *clp, struct rpc_cred *cr
5187 if (IS_ERR(task)) 5422 if (IS_ERR(task))
5188 ret = PTR_ERR(task); 5423 ret = PTR_ERR(task);
5189 else 5424 else
5190 rpc_put_task(task); 5425 rpc_put_task_async(task);
5191 dprintk("<-- %s status=%d\n", __func__, ret); 5426 dprintk("<-- %s status=%d\n", __func__, ret);
5192 return ret; 5427 return ret;
5193} 5428}
@@ -5203,8 +5438,13 @@ static int nfs4_proc_sequence(struct nfs_client *clp, struct rpc_cred *cred)
5203 goto out; 5438 goto out;
5204 } 5439 }
5205 ret = rpc_wait_for_completion_task(task); 5440 ret = rpc_wait_for_completion_task(task);
5206 if (!ret) 5441 if (!ret) {
5442 struct nfs4_sequence_res *res = task->tk_msg.rpc_resp;
5443
5444 if (task->tk_status == 0)
5445 nfs41_handle_sequence_flag_errors(clp, res->sr_status_flags);
5207 ret = task->tk_status; 5446 ret = task->tk_status;
5447 }
5208 rpc_put_task(task); 5448 rpc_put_task(task);
5209out: 5449out:
5210 dprintk("<-- %s status=%d\n", __func__, ret); 5450 dprintk("<-- %s status=%d\n", __func__, ret);
@@ -5241,7 +5481,7 @@ static int nfs41_reclaim_complete_handle_errors(struct rpc_task *task, struct nf
5241 rpc_delay(task, NFS4_POLL_RETRY_MAX); 5481 rpc_delay(task, NFS4_POLL_RETRY_MAX);
5242 return -EAGAIN; 5482 return -EAGAIN;
5243 default: 5483 default:
5244 nfs4_schedule_state_recovery(clp); 5484 nfs4_schedule_lease_recovery(clp);
5245 } 5485 }
5246 return 0; 5486 return 0;
5247} 5487}
@@ -5309,6 +5549,9 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp)
5309 status = PTR_ERR(task); 5549 status = PTR_ERR(task);
5310 goto out; 5550 goto out;
5311 } 5551 }
5552 status = nfs4_wait_for_completion_rpc_task(task);
5553 if (status == 0)
5554 status = task->tk_status;
5312 rpc_put_task(task); 5555 rpc_put_task(task);
5313 return 0; 5556 return 0;
5314out: 5557out:
@@ -5371,8 +5614,6 @@ static void nfs4_layoutget_release(void *calldata)
5371 struct nfs4_layoutget *lgp = calldata; 5614 struct nfs4_layoutget *lgp = calldata;
5372 5615
5373 dprintk("--> %s\n", __func__); 5616 dprintk("--> %s\n", __func__);
5374 if (lgp->res.layout.buf != NULL)
5375 free_page((unsigned long) lgp->res.layout.buf);
5376 put_nfs_open_context(lgp->args.ctx); 5617 put_nfs_open_context(lgp->args.ctx);
5377 kfree(calldata); 5618 kfree(calldata);
5378 dprintk("<-- %s\n", __func__); 5619 dprintk("<-- %s\n", __func__);
@@ -5404,12 +5645,7 @@ int nfs4_proc_layoutget(struct nfs4_layoutget *lgp)
5404 5645
5405 dprintk("--> %s\n", __func__); 5646 dprintk("--> %s\n", __func__);
5406 5647
5407 lgp->res.layout.buf = (void *)__get_free_page(GFP_NOFS); 5648 lgp->res.layoutp = &lgp->args.layout;
5408 if (lgp->res.layout.buf == NULL) {
5409 nfs4_layoutget_release(lgp);
5410 return -ENOMEM;
5411 }
5412
5413 lgp->res.seq_res.sr_slot = NULL; 5649 lgp->res.seq_res.sr_slot = NULL;
5414 task = rpc_run_task(&task_setup_data); 5650 task = rpc_run_task(&task_setup_data);
5415 if (IS_ERR(task)) 5651 if (IS_ERR(task))
@@ -5441,7 +5677,7 @@ _nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev)
5441 int status; 5677 int status;
5442 5678
5443 dprintk("--> %s\n", __func__); 5679 dprintk("--> %s\n", __func__);
5444 status = nfs4_call_sync(server, &msg, &args, &res, 0); 5680 status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
5445 dprintk("<-- %s status=%d\n", __func__, status); 5681 dprintk("<-- %s status=%d\n", __func__, status);
5446 5682
5447 return status; 5683 return status;
@@ -5461,6 +5697,100 @@ int nfs4_proc_getdeviceinfo(struct nfs_server *server, struct pnfs_device *pdev)
5461} 5697}
5462EXPORT_SYMBOL_GPL(nfs4_proc_getdeviceinfo); 5698EXPORT_SYMBOL_GPL(nfs4_proc_getdeviceinfo);
5463 5699
5700static void nfs4_layoutcommit_prepare(struct rpc_task *task, void *calldata)
5701{
5702 struct nfs4_layoutcommit_data *data = calldata;
5703 struct nfs_server *server = NFS_SERVER(data->args.inode);
5704
5705 if (nfs4_setup_sequence(server, &data->args.seq_args,
5706 &data->res.seq_res, 1, task))
5707 return;
5708 rpc_call_start(task);
5709}
5710
5711static void
5712nfs4_layoutcommit_done(struct rpc_task *task, void *calldata)
5713{
5714 struct nfs4_layoutcommit_data *data = calldata;
5715 struct nfs_server *server = NFS_SERVER(data->args.inode);
5716
5717 if (!nfs4_sequence_done(task, &data->res.seq_res))
5718 return;
5719
5720 switch (task->tk_status) { /* Just ignore these failures */
5721 case NFS4ERR_DELEG_REVOKED: /* layout was recalled */
5722 case NFS4ERR_BADIOMODE: /* no IOMODE_RW layout for range */
5723 case NFS4ERR_BADLAYOUT: /* no layout */
5724 case NFS4ERR_GRACE: /* loca_recalim always false */
5725 task->tk_status = 0;
5726 }
5727
5728 if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) {
5729 nfs_restart_rpc(task, server->nfs_client);
5730 return;
5731 }
5732
5733 if (task->tk_status == 0)
5734 nfs_post_op_update_inode_force_wcc(data->args.inode,
5735 data->res.fattr);
5736}
5737
5738static void nfs4_layoutcommit_release(void *calldata)
5739{
5740 struct nfs4_layoutcommit_data *data = calldata;
5741
5742 /* Matched by references in pnfs_set_layoutcommit */
5743 put_lseg(data->lseg);
5744 put_rpccred(data->cred);
5745 kfree(data);
5746}
5747
5748static const struct rpc_call_ops nfs4_layoutcommit_ops = {
5749 .rpc_call_prepare = nfs4_layoutcommit_prepare,
5750 .rpc_call_done = nfs4_layoutcommit_done,
5751 .rpc_release = nfs4_layoutcommit_release,
5752};
5753
5754int
5755nfs4_proc_layoutcommit(struct nfs4_layoutcommit_data *data, bool sync)
5756{
5757 struct rpc_message msg = {
5758 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_LAYOUTCOMMIT],
5759 .rpc_argp = &data->args,
5760 .rpc_resp = &data->res,
5761 .rpc_cred = data->cred,
5762 };
5763 struct rpc_task_setup task_setup_data = {
5764 .task = &data->task,
5765 .rpc_client = NFS_CLIENT(data->args.inode),
5766 .rpc_message = &msg,
5767 .callback_ops = &nfs4_layoutcommit_ops,
5768 .callback_data = data,
5769 .flags = RPC_TASK_ASYNC,
5770 };
5771 struct rpc_task *task;
5772 int status = 0;
5773
5774 dprintk("NFS: %4d initiating layoutcommit call. sync %d "
5775 "lbw: %llu inode %lu\n",
5776 data->task.tk_pid, sync,
5777 data->args.lastbytewritten,
5778 data->args.inode->i_ino);
5779
5780 task = rpc_run_task(&task_setup_data);
5781 if (IS_ERR(task))
5782 return PTR_ERR(task);
5783 if (sync == false)
5784 goto out;
5785 status = nfs4_wait_for_completion_rpc_task(task);
5786 if (status != 0)
5787 goto out;
5788 status = task->tk_status;
5789out:
5790 dprintk("%s: status %d\n", __func__, status);
5791 rpc_put_task(task);
5792 return status;
5793}
5464#endif /* CONFIG_NFS_V4_1 */ 5794#endif /* CONFIG_NFS_V4_1 */
5465 5795
5466struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = { 5796struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = {
@@ -5595,6 +5925,8 @@ const struct nfs_rpc_ops nfs_v4_clientops = {
5595 .clear_acl_cache = nfs4_zap_acl_attr, 5925 .clear_acl_cache = nfs4_zap_acl_attr,
5596 .close_context = nfs4_close_context, 5926 .close_context = nfs4_close_context,
5597 .open_context = nfs4_atomic_open, 5927 .open_context = nfs4_atomic_open,
5928 .init_client = nfs4_init_client,
5929 .secinfo = nfs4_proc_secinfo,
5598}; 5930};
5599 5931
5600static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = { 5932static const struct xattr_handler nfs4_xattr_nfs4_acl_handler = {
diff --git a/fs/nfs/nfs4renewd.c b/fs/nfs/nfs4renewd.c
index 402143d75fc5..df8e7f3ca56d 100644
--- a/fs/nfs/nfs4renewd.c
+++ b/fs/nfs/nfs4renewd.c
@@ -64,12 +64,8 @@ nfs4_renew_state(struct work_struct *work)
64 ops = clp->cl_mvops->state_renewal_ops; 64 ops = clp->cl_mvops->state_renewal_ops;
65 dprintk("%s: start\n", __func__); 65 dprintk("%s: start\n", __func__);
66 66
67 rcu_read_lock(); 67 if (test_bit(NFS_CS_STOP_RENEW, &clp->cl_res_state))
68 if (list_empty(&clp->cl_superblocks)) {
69 rcu_read_unlock();
70 goto out; 68 goto out;
71 }
72 rcu_read_unlock();
73 69
74 spin_lock(&clp->cl_lock); 70 spin_lock(&clp->cl_lock);
75 lease = clp->cl_lease_time; 71 lease = clp->cl_lease_time;
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index e6742b57a04c..036f5adc9e1f 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -64,10 +64,15 @@ static LIST_HEAD(nfs4_clientid_list);
64 64
65int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred *cred) 65int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
66{ 66{
67 struct nfs4_setclientid_res clid; 67 struct nfs4_setclientid_res clid = {
68 .clientid = clp->cl_clientid,
69 .confirm = clp->cl_confirm,
70 };
68 unsigned short port; 71 unsigned short port;
69 int status; 72 int status;
70 73
74 if (test_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state))
75 goto do_confirm;
71 port = nfs_callback_tcpport; 76 port = nfs_callback_tcpport;
72 if (clp->cl_addr.ss_family == AF_INET6) 77 if (clp->cl_addr.ss_family == AF_INET6)
73 port = nfs_callback_tcpport6; 78 port = nfs_callback_tcpport6;
@@ -75,10 +80,14 @@ int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
75 status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, port, cred, &clid); 80 status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, port, cred, &clid);
76 if (status != 0) 81 if (status != 0)
77 goto out; 82 goto out;
83 clp->cl_clientid = clid.clientid;
84 clp->cl_confirm = clid.confirm;
85 set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
86do_confirm:
78 status = nfs4_proc_setclientid_confirm(clp, &clid, cred); 87 status = nfs4_proc_setclientid_confirm(clp, &clid, cred);
79 if (status != 0) 88 if (status != 0)
80 goto out; 89 goto out;
81 clp->cl_clientid = clid.clientid; 90 clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
82 nfs4_schedule_state_renewal(clp); 91 nfs4_schedule_state_renewal(clp);
83out: 92out:
84 return status; 93 return status;
@@ -153,6 +162,11 @@ static int nfs41_setup_state_renewal(struct nfs_client *clp)
153 int status; 162 int status;
154 struct nfs_fsinfo fsinfo; 163 struct nfs_fsinfo fsinfo;
155 164
165 if (!test_bit(NFS_CS_CHECK_LEASE_TIME, &clp->cl_res_state)) {
166 nfs4_schedule_state_renewal(clp);
167 return 0;
168 }
169
156 status = nfs4_proc_get_lease_time(clp, &fsinfo); 170 status = nfs4_proc_get_lease_time(clp, &fsinfo);
157 if (status == 0) { 171 if (status == 0) {
158 /* Update lease time and schedule renewal */ 172 /* Update lease time and schedule renewal */
@@ -225,13 +239,18 @@ int nfs41_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
225{ 239{
226 int status; 240 int status;
227 241
242 if (test_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state))
243 goto do_confirm;
228 nfs4_begin_drain_session(clp); 244 nfs4_begin_drain_session(clp);
229 status = nfs4_proc_exchange_id(clp, cred); 245 status = nfs4_proc_exchange_id(clp, cred);
230 if (status != 0) 246 if (status != 0)
231 goto out; 247 goto out;
248 set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
249do_confirm:
232 status = nfs4_proc_create_session(clp); 250 status = nfs4_proc_create_session(clp);
233 if (status != 0) 251 if (status != 0)
234 goto out; 252 goto out;
253 clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
235 nfs41_setup_state_renewal(clp); 254 nfs41_setup_state_renewal(clp);
236 nfs_mark_client_ready(clp, NFS_CS_READY); 255 nfs_mark_client_ready(clp, NFS_CS_READY);
237out: 256out:
@@ -585,7 +604,8 @@ nfs4_get_open_state(struct inode *inode, struct nfs4_state_owner *owner)
585 state->owner = owner; 604 state->owner = owner;
586 atomic_inc(&owner->so_count); 605 atomic_inc(&owner->so_count);
587 list_add(&state->inode_states, &nfsi->open_states); 606 list_add(&state->inode_states, &nfsi->open_states);
588 state->inode = igrab(inode); 607 ihold(inode);
608 state->inode = inode;
589 spin_unlock(&inode->i_lock); 609 spin_unlock(&inode->i_lock);
590 /* Note: The reclaim code dictates that we add stateless 610 /* Note: The reclaim code dictates that we add stateless
591 * and read-only stateids to the end of the list */ 611 * and read-only stateids to the end of the list */
@@ -1007,9 +1027,9 @@ void nfs4_schedule_state_manager(struct nfs_client *clp)
1007} 1027}
1008 1028
1009/* 1029/*
1010 * Schedule a state recovery attempt 1030 * Schedule a lease recovery attempt
1011 */ 1031 */
1012void nfs4_schedule_state_recovery(struct nfs_client *clp) 1032void nfs4_schedule_lease_recovery(struct nfs_client *clp)
1013{ 1033{
1014 if (!clp) 1034 if (!clp)
1015 return; 1035 return;
@@ -1018,7 +1038,7 @@ void nfs4_schedule_state_recovery(struct nfs_client *clp)
1018 nfs4_schedule_state_manager(clp); 1038 nfs4_schedule_state_manager(clp);
1019} 1039}
1020 1040
1021int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state) 1041static int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *state)
1022{ 1042{
1023 1043
1024 set_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags); 1044 set_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags);
@@ -1032,7 +1052,7 @@ int nfs4_state_mark_reclaim_reboot(struct nfs_client *clp, struct nfs4_state *st
1032 return 1; 1052 return 1;
1033} 1053}
1034 1054
1035int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state) 1055static int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *state)
1036{ 1056{
1037 set_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags); 1057 set_bit(NFS_STATE_RECLAIM_NOGRACE, &state->flags);
1038 clear_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags); 1058 clear_bit(NFS_STATE_RECLAIM_REBOOT, &state->flags);
@@ -1041,6 +1061,14 @@ int nfs4_state_mark_reclaim_nograce(struct nfs_client *clp, struct nfs4_state *s
1041 return 1; 1061 return 1;
1042} 1062}
1043 1063
1064void nfs4_schedule_stateid_recovery(const struct nfs_server *server, struct nfs4_state *state)
1065{
1066 struct nfs_client *clp = server->nfs_client;
1067
1068 nfs4_state_mark_reclaim_nograce(clp, state);
1069 nfs4_schedule_state_manager(clp);
1070}
1071
1044static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_recovery_ops *ops) 1072static int nfs4_reclaim_locks(struct nfs4_state *state, const struct nfs4_state_recovery_ops *ops)
1045{ 1073{
1046 struct inode *inode = state->inode; 1074 struct inode *inode = state->inode;
@@ -1436,10 +1464,16 @@ static int nfs4_reclaim_lease(struct nfs_client *clp)
1436} 1464}
1437 1465
1438#ifdef CONFIG_NFS_V4_1 1466#ifdef CONFIG_NFS_V4_1
1467void nfs4_schedule_session_recovery(struct nfs4_session *session)
1468{
1469 nfs4_schedule_lease_recovery(session->clp);
1470}
1471EXPORT_SYMBOL_GPL(nfs4_schedule_session_recovery);
1472
1439void nfs41_handle_recall_slot(struct nfs_client *clp) 1473void nfs41_handle_recall_slot(struct nfs_client *clp)
1440{ 1474{
1441 set_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state); 1475 set_bit(NFS4CLNT_RECALL_SLOT, &clp->cl_state);
1442 nfs4_schedule_state_recovery(clp); 1476 nfs4_schedule_state_manager(clp);
1443} 1477}
1444 1478
1445static void nfs4_reset_all_state(struct nfs_client *clp) 1479static void nfs4_reset_all_state(struct nfs_client *clp)
@@ -1447,7 +1481,7 @@ static void nfs4_reset_all_state(struct nfs_client *clp)
1447 if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) { 1481 if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) {
1448 clp->cl_boot_time = CURRENT_TIME; 1482 clp->cl_boot_time = CURRENT_TIME;
1449 nfs4_state_start_reclaim_nograce(clp); 1483 nfs4_state_start_reclaim_nograce(clp);
1450 nfs4_schedule_state_recovery(clp); 1484 nfs4_schedule_state_manager(clp);
1451 } 1485 }
1452} 1486}
1453 1487
@@ -1455,7 +1489,7 @@ static void nfs41_handle_server_reboot(struct nfs_client *clp)
1455{ 1489{
1456 if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) { 1490 if (test_and_set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state) == 0) {
1457 nfs4_state_start_reclaim_reboot(clp); 1491 nfs4_state_start_reclaim_reboot(clp);
1458 nfs4_schedule_state_recovery(clp); 1492 nfs4_schedule_state_manager(clp);
1459 } 1493 }
1460} 1494}
1461 1495
@@ -1475,7 +1509,7 @@ static void nfs41_handle_cb_path_down(struct nfs_client *clp)
1475{ 1509{
1476 nfs_expire_all_delegations(clp); 1510 nfs_expire_all_delegations(clp);
1477 if (test_and_set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) == 0) 1511 if (test_and_set_bit(NFS4CLNT_SESSION_RESET, &clp->cl_state) == 0)
1478 nfs4_schedule_state_recovery(clp); 1512 nfs4_schedule_state_manager(clp);
1479} 1513}
1480 1514
1481void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags) 1515void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags)
@@ -1564,20 +1598,23 @@ static int nfs4_recall_slot(struct nfs_client *clp) { return 0; }
1564 */ 1598 */
1565static void nfs4_set_lease_expired(struct nfs_client *clp, int status) 1599static void nfs4_set_lease_expired(struct nfs_client *clp, int status)
1566{ 1600{
1567 if (nfs4_has_session(clp)) { 1601 switch (status) {
1568 switch (status) { 1602 case -NFS4ERR_CLID_INUSE:
1569 case -NFS4ERR_DELAY: 1603 case -NFS4ERR_STALE_CLIENTID:
1570 case -NFS4ERR_CLID_INUSE: 1604 clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
1571 case -EAGAIN: 1605 break;
1572 break; 1606 case -NFS4ERR_DELAY:
1607 case -ETIMEDOUT:
1608 case -EAGAIN:
1609 ssleep(1);
1610 break;
1573 1611
1574 case -EKEYEXPIRED: 1612 case -EKEYEXPIRED:
1575 nfs4_warn_keyexpired(clp->cl_hostname); 1613 nfs4_warn_keyexpired(clp->cl_hostname);
1576 case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery 1614 case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery
1577 * in nfs4_exchange_id */ 1615 * in nfs4_exchange_id */
1578 default: 1616 default:
1579 return; 1617 return;
1580 }
1581 } 1618 }
1582 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state); 1619 set_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state);
1583} 1620}
@@ -1587,7 +1624,7 @@ static void nfs4_state_manager(struct nfs_client *clp)
1587 int status = 0; 1624 int status = 0;
1588 1625
1589 /* Ensure exclusive access to NFSv4 state */ 1626 /* Ensure exclusive access to NFSv4 state */
1590 for(;;) { 1627 do {
1591 if (test_and_clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) { 1628 if (test_and_clear_bit(NFS4CLNT_LEASE_EXPIRED, &clp->cl_state)) {
1592 /* We're going to have to re-establish a clientid */ 1629 /* We're going to have to re-establish a clientid */
1593 status = nfs4_reclaim_lease(clp); 1630 status = nfs4_reclaim_lease(clp);
@@ -1671,7 +1708,7 @@ static void nfs4_state_manager(struct nfs_client *clp)
1671 break; 1708 break;
1672 if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0) 1709 if (test_and_set_bit(NFS4CLNT_MANAGER_RUNNING, &clp->cl_state) != 0)
1673 break; 1710 break;
1674 } 1711 } while (atomic_read(&clp->cl_count) > 1);
1675 return; 1712 return;
1676out_error: 1713out_error:
1677 printk(KERN_WARNING "Error: state manager failed on NFSv4 server %s" 1714 printk(KERN_WARNING "Error: state manager failed on NFSv4 server %s"
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 4e2c168b6ee9..c3ccd2c46834 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -46,6 +46,7 @@
46#include <linux/kdev_t.h> 46#include <linux/kdev_t.h>
47#include <linux/sunrpc/clnt.h> 47#include <linux/sunrpc/clnt.h>
48#include <linux/sunrpc/msg_prot.h> 48#include <linux/sunrpc/msg_prot.h>
49#include <linux/sunrpc/gss_api.h>
49#include <linux/nfs.h> 50#include <linux/nfs.h>
50#include <linux/nfs4.h> 51#include <linux/nfs4.h>
51#include <linux/nfs_fs.h> 52#include <linux/nfs_fs.h>
@@ -112,7 +113,7 @@ static int nfs4_stat_to_errno(int);
112#define encode_restorefh_maxsz (op_encode_hdr_maxsz) 113#define encode_restorefh_maxsz (op_encode_hdr_maxsz)
113#define decode_restorefh_maxsz (op_decode_hdr_maxsz) 114#define decode_restorefh_maxsz (op_decode_hdr_maxsz)
114#define encode_fsinfo_maxsz (encode_getattr_maxsz) 115#define encode_fsinfo_maxsz (encode_getattr_maxsz)
115#define decode_fsinfo_maxsz (op_decode_hdr_maxsz + 11) 116#define decode_fsinfo_maxsz (op_decode_hdr_maxsz + 15)
116#define encode_renew_maxsz (op_encode_hdr_maxsz + 3) 117#define encode_renew_maxsz (op_encode_hdr_maxsz + 3)
117#define decode_renew_maxsz (op_decode_hdr_maxsz) 118#define decode_renew_maxsz (op_decode_hdr_maxsz)
118#define encode_setclientid_maxsz \ 119#define encode_setclientid_maxsz \
@@ -253,6 +254,8 @@ static int nfs4_stat_to_errno(int);
253 (encode_getattr_maxsz) 254 (encode_getattr_maxsz)
254#define decode_fs_locations_maxsz \ 255#define decode_fs_locations_maxsz \
255 (0) 256 (0)
257#define encode_secinfo_maxsz (op_encode_hdr_maxsz + nfs4_name_maxsz)
258#define decode_secinfo_maxsz (op_decode_hdr_maxsz + 4 + (NFS_MAX_SECFLAVORS * (16 + GSS_OID_MAX_LEN)))
256 259
257#if defined(CONFIG_NFS_V4_1) 260#if defined(CONFIG_NFS_V4_1)
258#define NFS4_MAX_MACHINE_NAME_LEN (64) 261#define NFS4_MAX_MACHINE_NAME_LEN (64)
@@ -324,6 +327,18 @@ static int nfs4_stat_to_errno(int);
324#define decode_layoutget_maxsz (op_decode_hdr_maxsz + 8 + \ 327#define decode_layoutget_maxsz (op_decode_hdr_maxsz + 8 + \
325 decode_stateid_maxsz + \ 328 decode_stateid_maxsz + \
326 XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE)) 329 XDR_QUADLEN(PNFS_LAYOUT_MAXSIZE))
330#define encode_layoutcommit_maxsz (op_encode_hdr_maxsz + \
331 2 /* offset */ + \
332 2 /* length */ + \
333 1 /* reclaim */ + \
334 encode_stateid_maxsz + \
335 1 /* new offset (true) */ + \
336 2 /* last byte written */ + \
337 1 /* nt_timechanged (false) */ + \
338 1 /* layoutupdate4 layout type */ + \
339 1 /* NULL filelayout layoutupdate4 payload */)
340#define decode_layoutcommit_maxsz (op_decode_hdr_maxsz + 3)
341
327#else /* CONFIG_NFS_V4_1 */ 342#else /* CONFIG_NFS_V4_1 */
328#define encode_sequence_maxsz 0 343#define encode_sequence_maxsz 0
329#define decode_sequence_maxsz 0 344#define decode_sequence_maxsz 0
@@ -676,6 +691,14 @@ static int nfs4_stat_to_errno(int);
676 decode_putfh_maxsz + \ 691 decode_putfh_maxsz + \
677 decode_lookup_maxsz + \ 692 decode_lookup_maxsz + \
678 decode_fs_locations_maxsz) 693 decode_fs_locations_maxsz)
694#define NFS4_enc_secinfo_sz (compound_encode_hdr_maxsz + \
695 encode_sequence_maxsz + \
696 encode_putfh_maxsz + \
697 encode_secinfo_maxsz)
698#define NFS4_dec_secinfo_sz (compound_decode_hdr_maxsz + \
699 decode_sequence_maxsz + \
700 decode_putfh_maxsz + \
701 decode_secinfo_maxsz)
679#if defined(CONFIG_NFS_V4_1) 702#if defined(CONFIG_NFS_V4_1)
680#define NFS4_enc_exchange_id_sz \ 703#define NFS4_enc_exchange_id_sz \
681 (compound_encode_hdr_maxsz + \ 704 (compound_encode_hdr_maxsz + \
@@ -727,6 +750,17 @@ static int nfs4_stat_to_errno(int);
727 decode_sequence_maxsz + \ 750 decode_sequence_maxsz + \
728 decode_putfh_maxsz + \ 751 decode_putfh_maxsz + \
729 decode_layoutget_maxsz) 752 decode_layoutget_maxsz)
753#define NFS4_enc_layoutcommit_sz (compound_encode_hdr_maxsz + \
754 encode_sequence_maxsz +\
755 encode_putfh_maxsz + \
756 encode_layoutcommit_maxsz + \
757 encode_getattr_maxsz)
758#define NFS4_dec_layoutcommit_sz (compound_decode_hdr_maxsz + \
759 decode_sequence_maxsz + \
760 decode_putfh_maxsz + \
761 decode_layoutcommit_maxsz + \
762 decode_getattr_maxsz)
763
730 764
731const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH + 765const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH +
732 compound_encode_hdr_maxsz + 766 compound_encode_hdr_maxsz +
@@ -844,7 +878,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const
844 if (iap->ia_valid & ATTR_MODE) 878 if (iap->ia_valid & ATTR_MODE)
845 len += 4; 879 len += 4;
846 if (iap->ia_valid & ATTR_UID) { 880 if (iap->ia_valid & ATTR_UID) {
847 owner_namelen = nfs_map_uid_to_name(server->nfs_client, iap->ia_uid, owner_name, IDMAP_NAMESZ); 881 owner_namelen = nfs_map_uid_to_name(server, iap->ia_uid, owner_name, IDMAP_NAMESZ);
848 if (owner_namelen < 0) { 882 if (owner_namelen < 0) {
849 dprintk("nfs: couldn't resolve uid %d to string\n", 883 dprintk("nfs: couldn't resolve uid %d to string\n",
850 iap->ia_uid); 884 iap->ia_uid);
@@ -856,7 +890,7 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, const
856 len += 4 + (XDR_QUADLEN(owner_namelen) << 2); 890 len += 4 + (XDR_QUADLEN(owner_namelen) << 2);
857 } 891 }
858 if (iap->ia_valid & ATTR_GID) { 892 if (iap->ia_valid & ATTR_GID) {
859 owner_grouplen = nfs_map_gid_to_group(server->nfs_client, iap->ia_gid, owner_group, IDMAP_NAMESZ); 893 owner_grouplen = nfs_map_gid_to_group(server, iap->ia_gid, owner_group, IDMAP_NAMESZ);
860 if (owner_grouplen < 0) { 894 if (owner_grouplen < 0) {
861 dprintk("nfs: couldn't resolve gid %d to string\n", 895 dprintk("nfs: couldn't resolve gid %d to string\n",
862 iap->ia_gid); 896 iap->ia_gid);
@@ -1384,7 +1418,7 @@ static void encode_putrootfh(struct xdr_stream *xdr, struct compound_hdr *hdr)
1384 hdr->replen += decode_putrootfh_maxsz; 1418 hdr->replen += decode_putrootfh_maxsz;
1385} 1419}
1386 1420
1387static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx, const struct nfs_lock_context *l_ctx) 1421static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context *ctx, const struct nfs_lock_context *l_ctx, int zero_seqid)
1388{ 1422{
1389 nfs4_stateid stateid; 1423 nfs4_stateid stateid;
1390 __be32 *p; 1424 __be32 *p;
@@ -1392,6 +1426,8 @@ static void encode_stateid(struct xdr_stream *xdr, const struct nfs_open_context
1392 p = reserve_space(xdr, NFS4_STATEID_SIZE); 1426 p = reserve_space(xdr, NFS4_STATEID_SIZE);
1393 if (ctx->state != NULL) { 1427 if (ctx->state != NULL) {
1394 nfs4_copy_stateid(&stateid, ctx->state, l_ctx->lockowner, l_ctx->pid); 1428 nfs4_copy_stateid(&stateid, ctx->state, l_ctx->lockowner, l_ctx->pid);
1429 if (zero_seqid)
1430 stateid.stateid.seqid = 0;
1395 xdr_encode_opaque_fixed(p, stateid.data, NFS4_STATEID_SIZE); 1431 xdr_encode_opaque_fixed(p, stateid.data, NFS4_STATEID_SIZE);
1396 } else 1432 } else
1397 xdr_encode_opaque_fixed(p, zero_stateid.data, NFS4_STATEID_SIZE); 1433 xdr_encode_opaque_fixed(p, zero_stateid.data, NFS4_STATEID_SIZE);
@@ -1404,7 +1440,8 @@ static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args,
1404 p = reserve_space(xdr, 4); 1440 p = reserve_space(xdr, 4);
1405 *p = cpu_to_be32(OP_READ); 1441 *p = cpu_to_be32(OP_READ);
1406 1442
1407 encode_stateid(xdr, args->context, args->lock_context); 1443 encode_stateid(xdr, args->context, args->lock_context,
1444 hdr->minorversion);
1408 1445
1409 p = reserve_space(xdr, 12); 1446 p = reserve_space(xdr, 12);
1410 p = xdr_encode_hyper(p, args->offset); 1447 p = xdr_encode_hyper(p, args->offset);
@@ -1415,26 +1452,25 @@ static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args,
1415 1452
1416static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req, struct compound_hdr *hdr) 1453static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req, struct compound_hdr *hdr)
1417{ 1454{
1418 uint32_t attrs[2] = {0, 0}; 1455 uint32_t attrs[2] = {
1456 FATTR4_WORD0_RDATTR_ERROR,
1457 FATTR4_WORD1_MOUNTED_ON_FILEID,
1458 };
1419 uint32_t dircount = readdir->count >> 1; 1459 uint32_t dircount = readdir->count >> 1;
1420 __be32 *p; 1460 __be32 *p;
1421 1461
1422 if (readdir->plus) { 1462 if (readdir->plus) {
1423 attrs[0] |= FATTR4_WORD0_TYPE|FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE| 1463 attrs[0] |= FATTR4_WORD0_TYPE|FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE|
1424 FATTR4_WORD0_FSID|FATTR4_WORD0_FILEHANDLE; 1464 FATTR4_WORD0_FSID|FATTR4_WORD0_FILEHANDLE|FATTR4_WORD0_FILEID;
1425 attrs[1] |= FATTR4_WORD1_MODE|FATTR4_WORD1_NUMLINKS|FATTR4_WORD1_OWNER| 1465 attrs[1] |= FATTR4_WORD1_MODE|FATTR4_WORD1_NUMLINKS|FATTR4_WORD1_OWNER|
1426 FATTR4_WORD1_OWNER_GROUP|FATTR4_WORD1_RAWDEV| 1466 FATTR4_WORD1_OWNER_GROUP|FATTR4_WORD1_RAWDEV|
1427 FATTR4_WORD1_SPACE_USED|FATTR4_WORD1_TIME_ACCESS| 1467 FATTR4_WORD1_SPACE_USED|FATTR4_WORD1_TIME_ACCESS|
1428 FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY; 1468 FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY;
1429 dircount >>= 1; 1469 dircount >>= 1;
1430 } 1470 }
1431 attrs[0] |= FATTR4_WORD0_RDATTR_ERROR|FATTR4_WORD0_FILEID; 1471 /* Use mounted_on_fileid only if the server supports it */
1432 attrs[1] |= FATTR4_WORD1_MOUNTED_ON_FILEID; 1472 if (!(readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID))
1433 /* Switch to mounted_on_fileid if the server supports it */ 1473 attrs[0] |= FATTR4_WORD0_FILEID;
1434 if (readdir->bitmask[1] & FATTR4_WORD1_MOUNTED_ON_FILEID)
1435 attrs[0] &= ~FATTR4_WORD0_FILEID;
1436 else
1437 attrs[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID;
1438 1474
1439 p = reserve_space(xdr, 12+NFS4_VERIFIER_SIZE+20); 1475 p = reserve_space(xdr, 12+NFS4_VERIFIER_SIZE+20);
1440 *p++ = cpu_to_be32(OP_READDIR); 1476 *p++ = cpu_to_be32(OP_READDIR);
@@ -1592,7 +1628,8 @@ static void encode_write(struct xdr_stream *xdr, const struct nfs_writeargs *arg
1592 p = reserve_space(xdr, 4); 1628 p = reserve_space(xdr, 4);
1593 *p = cpu_to_be32(OP_WRITE); 1629 *p = cpu_to_be32(OP_WRITE);
1594 1630
1595 encode_stateid(xdr, args->context, args->lock_context); 1631 encode_stateid(xdr, args->context, args->lock_context,
1632 hdr->minorversion);
1596 1633
1597 p = reserve_space(xdr, 16); 1634 p = reserve_space(xdr, 16);
1598 p = xdr_encode_hyper(p, args->offset); 1635 p = xdr_encode_hyper(p, args->offset);
@@ -1616,6 +1653,18 @@ static void encode_delegreturn(struct xdr_stream *xdr, const nfs4_stateid *state
1616 hdr->replen += decode_delegreturn_maxsz; 1653 hdr->replen += decode_delegreturn_maxsz;
1617} 1654}
1618 1655
1656static void encode_secinfo(struct xdr_stream *xdr, const struct qstr *name, struct compound_hdr *hdr)
1657{
1658 int len = name->len;
1659 __be32 *p;
1660
1661 p = reserve_space(xdr, 8 + len);
1662 *p++ = cpu_to_be32(OP_SECINFO);
1663 xdr_encode_opaque(p, name->name, len);
1664 hdr->nops++;
1665 hdr->replen += decode_secinfo_maxsz;
1666}
1667
1619#if defined(CONFIG_NFS_V4_1) 1668#if defined(CONFIG_NFS_V4_1)
1620/* NFSv4.1 operations */ 1669/* NFSv4.1 operations */
1621static void encode_exchange_id(struct xdr_stream *xdr, 1670static void encode_exchange_id(struct xdr_stream *xdr,
@@ -1660,7 +1709,7 @@ static void encode_create_session(struct xdr_stream *xdr,
1660 1709
1661 p = reserve_space(xdr, 20 + 2*28 + 20 + len + 12); 1710 p = reserve_space(xdr, 20 + 2*28 + 20 + len + 12);
1662 *p++ = cpu_to_be32(OP_CREATE_SESSION); 1711 *p++ = cpu_to_be32(OP_CREATE_SESSION);
1663 p = xdr_encode_hyper(p, clp->cl_ex_clid); 1712 p = xdr_encode_hyper(p, clp->cl_clientid);
1664 *p++ = cpu_to_be32(clp->cl_seqid); /*Sequence id */ 1713 *p++ = cpu_to_be32(clp->cl_seqid); /*Sequence id */
1665 *p++ = cpu_to_be32(args->flags); /*flags */ 1714 *p++ = cpu_to_be32(args->flags); /*flags */
1666 1715
@@ -1812,6 +1861,34 @@ encode_layoutget(struct xdr_stream *xdr,
1812 hdr->nops++; 1861 hdr->nops++;
1813 hdr->replen += decode_layoutget_maxsz; 1862 hdr->replen += decode_layoutget_maxsz;
1814} 1863}
1864
1865static int
1866encode_layoutcommit(struct xdr_stream *xdr,
1867 const struct nfs4_layoutcommit_args *args,
1868 struct compound_hdr *hdr)
1869{
1870 __be32 *p;
1871
1872 dprintk("%s: lbw: %llu type: %d\n", __func__, args->lastbytewritten,
1873 NFS_SERVER(args->inode)->pnfs_curr_ld->id);
1874
1875 p = reserve_space(xdr, 48 + NFS4_STATEID_SIZE);
1876 *p++ = cpu_to_be32(OP_LAYOUTCOMMIT);
1877 /* Only whole file layouts */
1878 p = xdr_encode_hyper(p, 0); /* offset */
1879 p = xdr_encode_hyper(p, NFS4_MAX_UINT64); /* length */
1880 *p++ = cpu_to_be32(0); /* reclaim */
1881 p = xdr_encode_opaque_fixed(p, args->stateid.data, NFS4_STATEID_SIZE);
1882 *p++ = cpu_to_be32(1); /* newoffset = TRUE */
1883 p = xdr_encode_hyper(p, args->lastbytewritten);
1884 *p++ = cpu_to_be32(0); /* Never send time_modify_changed */
1885 *p++ = cpu_to_be32(NFS_SERVER(args->inode)->pnfs_curr_ld->id);/* type */
1886 *p++ = cpu_to_be32(0); /* no file layout payload */
1887
1888 hdr->nops++;
1889 hdr->replen += decode_layoutcommit_maxsz;
1890 return 0;
1891}
1815#endif /* CONFIG_NFS_V4_1 */ 1892#endif /* CONFIG_NFS_V4_1 */
1816 1893
1817/* 1894/*
@@ -2271,7 +2348,8 @@ static void nfs4_xdr_enc_write(struct rpc_rqst *req, struct xdr_stream *xdr,
2271 encode_putfh(xdr, args->fh, &hdr); 2348 encode_putfh(xdr, args->fh, &hdr);
2272 encode_write(xdr, args, &hdr); 2349 encode_write(xdr, args, &hdr);
2273 req->rq_snd_buf.flags |= XDRBUF_WRITE; 2350 req->rq_snd_buf.flags |= XDRBUF_WRITE;
2274 encode_getfattr(xdr, args->bitmask, &hdr); 2351 if (args->bitmask)
2352 encode_getfattr(xdr, args->bitmask, &hdr);
2275 encode_nops(&hdr); 2353 encode_nops(&hdr);
2276} 2354}
2277 2355
@@ -2289,7 +2367,8 @@ static void nfs4_xdr_enc_commit(struct rpc_rqst *req, struct xdr_stream *xdr,
2289 encode_sequence(xdr, &args->seq_args, &hdr); 2367 encode_sequence(xdr, &args->seq_args, &hdr);
2290 encode_putfh(xdr, args->fh, &hdr); 2368 encode_putfh(xdr, args->fh, &hdr);
2291 encode_commit(xdr, args, &hdr); 2369 encode_commit(xdr, args, &hdr);
2292 encode_getfattr(xdr, args->bitmask, &hdr); 2370 if (args->bitmask)
2371 encode_getfattr(xdr, args->bitmask, &hdr);
2293 encode_nops(&hdr); 2372 encode_nops(&hdr);
2294} 2373}
2295 2374
@@ -2460,6 +2539,24 @@ static void nfs4_xdr_enc_fs_locations(struct rpc_rqst *req,
2460 encode_nops(&hdr); 2539 encode_nops(&hdr);
2461} 2540}
2462 2541
2542/*
2543 * Encode SECINFO request
2544 */
2545static void nfs4_xdr_enc_secinfo(struct rpc_rqst *req,
2546 struct xdr_stream *xdr,
2547 struct nfs4_secinfo_arg *args)
2548{
2549 struct compound_hdr hdr = {
2550 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
2551 };
2552
2553 encode_compound_hdr(xdr, req, &hdr);
2554 encode_sequence(xdr, &args->seq_args, &hdr);
2555 encode_putfh(xdr, args->dir_fh, &hdr);
2556 encode_secinfo(xdr, args->name, &hdr);
2557 encode_nops(&hdr);
2558}
2559
2463#if defined(CONFIG_NFS_V4_1) 2560#if defined(CONFIG_NFS_V4_1)
2464/* 2561/*
2465 * EXCHANGE_ID request 2562 * EXCHANGE_ID request
@@ -2599,7 +2696,31 @@ static void nfs4_xdr_enc_layoutget(struct rpc_rqst *req,
2599 encode_sequence(xdr, &args->seq_args, &hdr); 2696 encode_sequence(xdr, &args->seq_args, &hdr);
2600 encode_putfh(xdr, NFS_FH(args->inode), &hdr); 2697 encode_putfh(xdr, NFS_FH(args->inode), &hdr);
2601 encode_layoutget(xdr, args, &hdr); 2698 encode_layoutget(xdr, args, &hdr);
2699
2700 xdr_inline_pages(&req->rq_rcv_buf, hdr.replen << 2,
2701 args->layout.pages, 0, args->layout.pglen);
2702
2703 encode_nops(&hdr);
2704}
2705
2706/*
2707 * Encode LAYOUTCOMMIT request
2708 */
2709static int nfs4_xdr_enc_layoutcommit(struct rpc_rqst *req,
2710 struct xdr_stream *xdr,
2711 struct nfs4_layoutcommit_args *args)
2712{
2713 struct compound_hdr hdr = {
2714 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
2715 };
2716
2717 encode_compound_hdr(xdr, req, &hdr);
2718 encode_sequence(xdr, &args->seq_args, &hdr);
2719 encode_putfh(xdr, NFS_FH(args->inode), &hdr);
2720 encode_layoutcommit(xdr, args, &hdr);
2721 encode_getfattr(xdr, args->bitmask, &hdr);
2602 encode_nops(&hdr); 2722 encode_nops(&hdr);
2723 return 0;
2603} 2724}
2604#endif /* CONFIG_NFS_V4_1 */ 2725#endif /* CONFIG_NFS_V4_1 */
2605 2726
@@ -2920,6 +3041,7 @@ static int decode_attr_error(struct xdr_stream *xdr, uint32_t *bitmap)
2920 if (unlikely(!p)) 3041 if (unlikely(!p))
2921 goto out_overflow; 3042 goto out_overflow;
2922 bitmap[0] &= ~FATTR4_WORD0_RDATTR_ERROR; 3043 bitmap[0] &= ~FATTR4_WORD0_RDATTR_ERROR;
3044 return -be32_to_cpup(p);
2923 } 3045 }
2924 return 0; 3046 return 0;
2925out_overflow: 3047out_overflow:
@@ -3017,7 +3139,7 @@ static int decode_attr_mounted_on_fileid(struct xdr_stream *xdr, uint32_t *bitma
3017 goto out_overflow; 3139 goto out_overflow;
3018 xdr_decode_hyper(p, fileid); 3140 xdr_decode_hyper(p, fileid);
3019 bitmap[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID; 3141 bitmap[1] &= ~FATTR4_WORD1_MOUNTED_ON_FILEID;
3020 ret = NFS_ATTR_FATTR_FILEID; 3142 ret = NFS_ATTR_FATTR_MOUNTED_ON_FILEID;
3021 } 3143 }
3022 dprintk("%s: fileid=%Lu\n", __func__, (unsigned long long)*fileid); 3144 dprintk("%s: fileid=%Lu\n", __func__, (unsigned long long)*fileid);
3023 return ret; 3145 return ret;
@@ -3382,7 +3504,7 @@ out_overflow:
3382} 3504}
3383 3505
3384static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap, 3506static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap,
3385 struct nfs_client *clp, uint32_t *uid, int may_sleep) 3507 const struct nfs_server *server, uint32_t *uid, int may_sleep)
3386{ 3508{
3387 uint32_t len; 3509 uint32_t len;
3388 __be32 *p; 3510 __be32 *p;
@@ -3402,7 +3524,7 @@ static int decode_attr_owner(struct xdr_stream *xdr, uint32_t *bitmap,
3402 if (!may_sleep) { 3524 if (!may_sleep) {
3403 /* do nothing */ 3525 /* do nothing */
3404 } else if (len < XDR_MAX_NETOBJ) { 3526 } else if (len < XDR_MAX_NETOBJ) {
3405 if (nfs_map_name_to_uid(clp, (char *)p, len, uid) == 0) 3527 if (nfs_map_name_to_uid(server, (char *)p, len, uid) == 0)
3406 ret = NFS_ATTR_FATTR_OWNER; 3528 ret = NFS_ATTR_FATTR_OWNER;
3407 else 3529 else
3408 dprintk("%s: nfs_map_name_to_uid failed!\n", 3530 dprintk("%s: nfs_map_name_to_uid failed!\n",
@@ -3420,7 +3542,7 @@ out_overflow:
3420} 3542}
3421 3543
3422static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap, 3544static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap,
3423 struct nfs_client *clp, uint32_t *gid, int may_sleep) 3545 const struct nfs_server *server, uint32_t *gid, int may_sleep)
3424{ 3546{
3425 uint32_t len; 3547 uint32_t len;
3426 __be32 *p; 3548 __be32 *p;
@@ -3440,7 +3562,7 @@ static int decode_attr_group(struct xdr_stream *xdr, uint32_t *bitmap,
3440 if (!may_sleep) { 3562 if (!may_sleep) {
3441 /* do nothing */ 3563 /* do nothing */
3442 } else if (len < XDR_MAX_NETOBJ) { 3564 } else if (len < XDR_MAX_NETOBJ) {
3443 if (nfs_map_group_to_gid(clp, (char *)p, len, gid) == 0) 3565 if (nfs_map_group_to_gid(server, (char *)p, len, gid) == 0)
3444 ret = NFS_ATTR_FATTR_GROUP; 3566 ret = NFS_ATTR_FATTR_GROUP;
3445 else 3567 else
3446 dprintk("%s: nfs_map_group_to_gid failed!\n", 3568 dprintk("%s: nfs_map_group_to_gid failed!\n",
@@ -3879,7 +4001,6 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,
3879{ 4001{
3880 int status; 4002 int status;
3881 umode_t fmode = 0; 4003 umode_t fmode = 0;
3882 uint64_t fileid;
3883 uint32_t type; 4004 uint32_t type;
3884 4005
3885 status = decode_attr_type(xdr, bitmap, &type); 4006 status = decode_attr_type(xdr, bitmap, &type);
@@ -3907,6 +4028,10 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,
3907 fattr->valid |= status; 4028 fattr->valid |= status;
3908 4029
3909 status = decode_attr_error(xdr, bitmap); 4030 status = decode_attr_error(xdr, bitmap);
4031 if (status == -NFS4ERR_WRONGSEC) {
4032 nfs_fixup_secinfo_attributes(fattr, fh);
4033 status = 0;
4034 }
3910 if (status < 0) 4035 if (status < 0)
3911 goto xdr_error; 4036 goto xdr_error;
3912 4037
@@ -3939,14 +4064,12 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,
3939 goto xdr_error; 4064 goto xdr_error;
3940 fattr->valid |= status; 4065 fattr->valid |= status;
3941 4066
3942 status = decode_attr_owner(xdr, bitmap, server->nfs_client, 4067 status = decode_attr_owner(xdr, bitmap, server, &fattr->uid, may_sleep);
3943 &fattr->uid, may_sleep);
3944 if (status < 0) 4068 if (status < 0)
3945 goto xdr_error; 4069 goto xdr_error;
3946 fattr->valid |= status; 4070 fattr->valid |= status;
3947 4071
3948 status = decode_attr_group(xdr, bitmap, server->nfs_client, 4072 status = decode_attr_group(xdr, bitmap, server, &fattr->gid, may_sleep);
3949 &fattr->gid, may_sleep);
3950 if (status < 0) 4073 if (status < 0)
3951 goto xdr_error; 4074 goto xdr_error;
3952 fattr->valid |= status; 4075 fattr->valid |= status;
@@ -3976,13 +4099,10 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap,
3976 goto xdr_error; 4099 goto xdr_error;
3977 fattr->valid |= status; 4100 fattr->valid |= status;
3978 4101
3979 status = decode_attr_mounted_on_fileid(xdr, bitmap, &fileid); 4102 status = decode_attr_mounted_on_fileid(xdr, bitmap, &fattr->mounted_on_fileid);
3980 if (status < 0) 4103 if (status < 0)
3981 goto xdr_error; 4104 goto xdr_error;
3982 if (status != 0 && !(fattr->valid & status)) { 4105 fattr->valid |= status;
3983 fattr->fileid = fileid;
3984 fattr->valid |= status;
3985 }
3986 4106
3987xdr_error: 4107xdr_error:
3988 dprintk("%s: xdr returned %d\n", __func__, -status); 4108 dprintk("%s: xdr returned %d\n", __func__, -status);
@@ -4677,6 +4797,79 @@ static int decode_delegreturn(struct xdr_stream *xdr)
4677 return decode_op_hdr(xdr, OP_DELEGRETURN); 4797 return decode_op_hdr(xdr, OP_DELEGRETURN);
4678} 4798}
4679 4799
4800static int decode_secinfo_gss(struct xdr_stream *xdr, struct nfs4_secinfo_flavor *flavor)
4801{
4802 __be32 *p;
4803
4804 p = xdr_inline_decode(xdr, 4);
4805 if (unlikely(!p))
4806 goto out_overflow;
4807 flavor->gss.sec_oid4.len = be32_to_cpup(p);
4808 if (flavor->gss.sec_oid4.len > GSS_OID_MAX_LEN)
4809 goto out_err;
4810
4811 p = xdr_inline_decode(xdr, flavor->gss.sec_oid4.len);
4812 if (unlikely(!p))
4813 goto out_overflow;
4814 memcpy(flavor->gss.sec_oid4.data, p, flavor->gss.sec_oid4.len);
4815
4816 p = xdr_inline_decode(xdr, 8);
4817 if (unlikely(!p))
4818 goto out_overflow;
4819 flavor->gss.qop4 = be32_to_cpup(p++);
4820 flavor->gss.service = be32_to_cpup(p);
4821
4822 return 0;
4823
4824out_overflow:
4825 print_overflow_msg(__func__, xdr);
4826 return -EIO;
4827out_err:
4828 return -EINVAL;
4829}
4830
4831static int decode_secinfo(struct xdr_stream *xdr, struct nfs4_secinfo_res *res)
4832{
4833 struct nfs4_secinfo_flavor *sec_flavor;
4834 int status;
4835 __be32 *p;
4836 int i, num_flavors;
4837
4838 status = decode_op_hdr(xdr, OP_SECINFO);
4839 if (status)
4840 goto out;
4841 p = xdr_inline_decode(xdr, 4);
4842 if (unlikely(!p))
4843 goto out_overflow;
4844
4845 res->flavors->num_flavors = 0;
4846 num_flavors = be32_to_cpup(p);
4847
4848 for (i = 0; i < num_flavors; i++) {
4849 sec_flavor = &res->flavors->flavors[i];
4850 if ((char *)&sec_flavor[1] - (char *)res->flavors > PAGE_SIZE)
4851 break;
4852
4853 p = xdr_inline_decode(xdr, 4);
4854 if (unlikely(!p))
4855 goto out_overflow;
4856 sec_flavor->flavor = be32_to_cpup(p);
4857
4858 if (sec_flavor->flavor == RPC_AUTH_GSS) {
4859 status = decode_secinfo_gss(xdr, sec_flavor);
4860 if (status)
4861 goto out;
4862 }
4863 res->flavors->num_flavors++;
4864 }
4865
4866out:
4867 return status;
4868out_overflow:
4869 print_overflow_msg(__func__, xdr);
4870 return -EIO;
4871}
4872
4680#if defined(CONFIG_NFS_V4_1) 4873#if defined(CONFIG_NFS_V4_1)
4681static int decode_exchange_id(struct xdr_stream *xdr, 4874static int decode_exchange_id(struct xdr_stream *xdr,
4682 struct nfs41_exchange_id_res *res) 4875 struct nfs41_exchange_id_res *res)
@@ -4694,7 +4887,7 @@ static int decode_exchange_id(struct xdr_stream *xdr,
4694 p = xdr_inline_decode(xdr, 8); 4887 p = xdr_inline_decode(xdr, 8);
4695 if (unlikely(!p)) 4888 if (unlikely(!p))
4696 goto out_overflow; 4889 goto out_overflow;
4697 xdr_decode_hyper(p, &clp->cl_ex_clid); 4890 xdr_decode_hyper(p, &clp->cl_clientid);
4698 p = xdr_inline_decode(xdr, 12); 4891 p = xdr_inline_decode(xdr, 12);
4699 if (unlikely(!p)) 4892 if (unlikely(!p))
4700 goto out_overflow; 4893 goto out_overflow;
@@ -4947,6 +5140,9 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
4947 __be32 *p; 5140 __be32 *p;
4948 int status; 5141 int status;
4949 u32 layout_count; 5142 u32 layout_count;
5143 struct xdr_buf *rcvbuf = &req->rq_rcv_buf;
5144 struct kvec *iov = rcvbuf->head;
5145 u32 hdrlen, recvd;
4950 5146
4951 status = decode_op_hdr(xdr, OP_LAYOUTGET); 5147 status = decode_op_hdr(xdr, OP_LAYOUTGET);
4952 if (status) 5148 if (status)
@@ -4963,17 +5159,14 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
4963 return -EINVAL; 5159 return -EINVAL;
4964 } 5160 }
4965 5161
4966 p = xdr_inline_decode(xdr, 24); 5162 p = xdr_inline_decode(xdr, 28);
4967 if (unlikely(!p)) 5163 if (unlikely(!p))
4968 goto out_overflow; 5164 goto out_overflow;
4969 p = xdr_decode_hyper(p, &res->range.offset); 5165 p = xdr_decode_hyper(p, &res->range.offset);
4970 p = xdr_decode_hyper(p, &res->range.length); 5166 p = xdr_decode_hyper(p, &res->range.length);
4971 res->range.iomode = be32_to_cpup(p++); 5167 res->range.iomode = be32_to_cpup(p++);
4972 res->type = be32_to_cpup(p++); 5168 res->type = be32_to_cpup(p++);
4973 5169 res->layoutp->len = be32_to_cpup(p);
4974 status = decode_opaque_inline(xdr, &res->layout.len, (char **)&p);
4975 if (unlikely(status))
4976 return status;
4977 5170
4978 dprintk("%s roff:%lu rlen:%lu riomode:%d, lo_type:0x%x, lo.len:%d\n", 5171 dprintk("%s roff:%lu rlen:%lu riomode:%d, lo_type:0x%x, lo.len:%d\n",
4979 __func__, 5172 __func__,
@@ -4981,12 +5174,18 @@ static int decode_layoutget(struct xdr_stream *xdr, struct rpc_rqst *req,
4981 (unsigned long)res->range.length, 5174 (unsigned long)res->range.length,
4982 res->range.iomode, 5175 res->range.iomode,
4983 res->type, 5176 res->type,
4984 res->layout.len); 5177 res->layoutp->len);
4985 5178
4986 /* nfs4_proc_layoutget allocated a single page */ 5179 hdrlen = (u8 *) xdr->p - (u8 *) iov->iov_base;
4987 if (res->layout.len > PAGE_SIZE) 5180 recvd = req->rq_rcv_buf.len - hdrlen;
4988 return -ENOMEM; 5181 if (res->layoutp->len > recvd) {
4989 memcpy(res->layout.buf, p, res->layout.len); 5182 dprintk("NFS: server cheating in layoutget reply: "
5183 "layout len %u > recvd %u\n",
5184 res->layoutp->len, recvd);
5185 return -EINVAL;
5186 }
5187
5188 xdr_read_pages(xdr, res->layoutp->len);
4990 5189
4991 if (layout_count > 1) { 5190 if (layout_count > 1) {
4992 /* We only handle a length one array at the moment. Any 5191 /* We only handle a length one array at the moment. Any
@@ -5003,6 +5202,35 @@ out_overflow:
5003 print_overflow_msg(__func__, xdr); 5202 print_overflow_msg(__func__, xdr);
5004 return -EIO; 5203 return -EIO;
5005} 5204}
5205
5206static int decode_layoutcommit(struct xdr_stream *xdr,
5207 struct rpc_rqst *req,
5208 struct nfs4_layoutcommit_res *res)
5209{
5210 __be32 *p;
5211 __u32 sizechanged;
5212 int status;
5213
5214 status = decode_op_hdr(xdr, OP_LAYOUTCOMMIT);
5215 if (status)
5216 return status;
5217
5218 p = xdr_inline_decode(xdr, 4);
5219 if (unlikely(!p))
5220 goto out_overflow;
5221 sizechanged = be32_to_cpup(p);
5222
5223 if (sizechanged) {
5224 /* throw away new size */
5225 p = xdr_inline_decode(xdr, 8);
5226 if (unlikely(!p))
5227 goto out_overflow;
5228 }
5229 return 0;
5230out_overflow:
5231 print_overflow_msg(__func__, xdr);
5232 return -EIO;
5233}
5006#endif /* CONFIG_NFS_V4_1 */ 5234#endif /* CONFIG_NFS_V4_1 */
5007 5235
5008/* 5236/*
@@ -5690,8 +5918,9 @@ static int nfs4_xdr_dec_write(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
5690 status = decode_write(xdr, res); 5918 status = decode_write(xdr, res);
5691 if (status) 5919 if (status)
5692 goto out; 5920 goto out;
5693 decode_getfattr(xdr, res->fattr, res->server, 5921 if (res->fattr)
5694 !RPC_IS_ASYNC(rqstp->rq_task)); 5922 decode_getfattr(xdr, res->fattr, res->server,
5923 !RPC_IS_ASYNC(rqstp->rq_task));
5695 if (!status) 5924 if (!status)
5696 status = res->count; 5925 status = res->count;
5697out: 5926out:
@@ -5719,8 +5948,9 @@ static int nfs4_xdr_dec_commit(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
5719 status = decode_commit(xdr, res); 5948 status = decode_commit(xdr, res);
5720 if (status) 5949 if (status)
5721 goto out; 5950 goto out;
5722 decode_getfattr(xdr, res->fattr, res->server, 5951 if (res->fattr)
5723 !RPC_IS_ASYNC(rqstp->rq_task)); 5952 decode_getfattr(xdr, res->fattr, res->server,
5953 !RPC_IS_ASYNC(rqstp->rq_task));
5724out: 5954out:
5725 return status; 5955 return status;
5726} 5956}
@@ -5915,6 +6145,32 @@ out:
5915 return status; 6145 return status;
5916} 6146}
5917 6147
6148/*
6149 * Decode SECINFO response
6150 */
6151static int nfs4_xdr_dec_secinfo(struct rpc_rqst *rqstp,
6152 struct xdr_stream *xdr,
6153 struct nfs4_secinfo_res *res)
6154{
6155 struct compound_hdr hdr;
6156 int status;
6157
6158 status = decode_compound_hdr(xdr, &hdr);
6159 if (status)
6160 goto out;
6161 status = decode_sequence(xdr, &res->seq_res, rqstp);
6162 if (status)
6163 goto out;
6164 status = decode_putfh(xdr);
6165 if (status)
6166 goto out;
6167 status = decode_secinfo(xdr, res);
6168 if (status)
6169 goto out;
6170out:
6171 return status;
6172}
6173
5918#if defined(CONFIG_NFS_V4_1) 6174#if defined(CONFIG_NFS_V4_1)
5919/* 6175/*
5920 * Decode EXCHANGE_ID response 6176 * Decode EXCHANGE_ID response
@@ -6062,6 +6318,34 @@ static int nfs4_xdr_dec_layoutget(struct rpc_rqst *rqstp,
6062out: 6318out:
6063 return status; 6319 return status;
6064} 6320}
6321
6322/*
6323 * Decode LAYOUTCOMMIT response
6324 */
6325static int nfs4_xdr_dec_layoutcommit(struct rpc_rqst *rqstp,
6326 struct xdr_stream *xdr,
6327 struct nfs4_layoutcommit_res *res)
6328{
6329 struct compound_hdr hdr;
6330 int status;
6331
6332 status = decode_compound_hdr(xdr, &hdr);
6333 if (status)
6334 goto out;
6335 status = decode_sequence(xdr, &res->seq_res, rqstp);
6336 if (status)
6337 goto out;
6338 status = decode_putfh(xdr);
6339 if (status)
6340 goto out;
6341 status = decode_layoutcommit(xdr, rqstp, res);
6342 if (status)
6343 goto out;
6344 decode_getfattr(xdr, res->fattr, res->server,
6345 !RPC_IS_ASYNC(rqstp->rq_task));
6346out:
6347 return status;
6348}
6065#endif /* CONFIG_NFS_V4_1 */ 6349#endif /* CONFIG_NFS_V4_1 */
6066 6350
6067/** 6351/**
@@ -6125,7 +6409,9 @@ int nfs4_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry,
6125 if (decode_getfattr_attrs(xdr, bitmap, entry->fattr, entry->fh, 6409 if (decode_getfattr_attrs(xdr, bitmap, entry->fattr, entry->fh,
6126 entry->server, 1) < 0) 6410 entry->server, 1) < 0)
6127 goto out_overflow; 6411 goto out_overflow;
6128 if (entry->fattr->valid & NFS_ATTR_FATTR_FILEID) 6412 if (entry->fattr->valid & NFS_ATTR_FATTR_MOUNTED_ON_FILEID)
6413 entry->ino = entry->fattr->mounted_on_fileid;
6414 else if (entry->fattr->valid & NFS_ATTR_FATTR_FILEID)
6129 entry->ino = entry->fattr->fileid; 6415 entry->ino = entry->fattr->fileid;
6130 6416
6131 entry->d_type = DT_UNKNOWN; 6417 entry->d_type = DT_UNKNOWN;
@@ -6167,8 +6453,6 @@ static struct {
6167 { NFS4ERR_DQUOT, -EDQUOT }, 6453 { NFS4ERR_DQUOT, -EDQUOT },
6168 { NFS4ERR_STALE, -ESTALE }, 6454 { NFS4ERR_STALE, -ESTALE },
6169 { NFS4ERR_BADHANDLE, -EBADHANDLE }, 6455 { NFS4ERR_BADHANDLE, -EBADHANDLE },
6170 { NFS4ERR_BADOWNER, -EINVAL },
6171 { NFS4ERR_BADNAME, -EINVAL },
6172 { NFS4ERR_BAD_COOKIE, -EBADCOOKIE }, 6456 { NFS4ERR_BAD_COOKIE, -EBADCOOKIE },
6173 { NFS4ERR_NOTSUPP, -ENOTSUPP }, 6457 { NFS4ERR_NOTSUPP, -ENOTSUPP },
6174 { NFS4ERR_TOOSMALL, -ETOOSMALL }, 6458 { NFS4ERR_TOOSMALL, -ETOOSMALL },
@@ -6178,10 +6462,6 @@ static struct {
6178 { NFS4ERR_SYMLINK, -ELOOP }, 6462 { NFS4ERR_SYMLINK, -ELOOP },
6179 { NFS4ERR_OP_ILLEGAL, -EOPNOTSUPP }, 6463 { NFS4ERR_OP_ILLEGAL, -EOPNOTSUPP },
6180 { NFS4ERR_DEADLOCK, -EDEADLK }, 6464 { NFS4ERR_DEADLOCK, -EDEADLK },
6181 { NFS4ERR_WRONGSEC, -EPERM }, /* FIXME: this needs
6182 * to be handled by a
6183 * middle-layer.
6184 */
6185 { -1, -EIO } 6465 { -1, -EIO }
6186}; 6466};
6187 6467
@@ -6256,6 +6536,7 @@ struct rpc_procinfo nfs4_procedures[] = {
6256 PROC(SETACL, enc_setacl, dec_setacl), 6536 PROC(SETACL, enc_setacl, dec_setacl),
6257 PROC(FS_LOCATIONS, enc_fs_locations, dec_fs_locations), 6537 PROC(FS_LOCATIONS, enc_fs_locations, dec_fs_locations),
6258 PROC(RELEASE_LOCKOWNER, enc_release_lockowner, dec_release_lockowner), 6538 PROC(RELEASE_LOCKOWNER, enc_release_lockowner, dec_release_lockowner),
6539 PROC(SECINFO, enc_secinfo, dec_secinfo),
6259#if defined(CONFIG_NFS_V4_1) 6540#if defined(CONFIG_NFS_V4_1)
6260 PROC(EXCHANGE_ID, enc_exchange_id, dec_exchange_id), 6541 PROC(EXCHANGE_ID, enc_exchange_id, dec_exchange_id),
6261 PROC(CREATE_SESSION, enc_create_session, dec_create_session), 6542 PROC(CREATE_SESSION, enc_create_session, dec_create_session),
@@ -6265,6 +6546,7 @@ struct rpc_procinfo nfs4_procedures[] = {
6265 PROC(RECLAIM_COMPLETE, enc_reclaim_complete, dec_reclaim_complete), 6546 PROC(RECLAIM_COMPLETE, enc_reclaim_complete, dec_reclaim_complete),
6266 PROC(GETDEVICEINFO, enc_getdeviceinfo, dec_getdeviceinfo), 6547 PROC(GETDEVICEINFO, enc_getdeviceinfo, dec_getdeviceinfo),
6267 PROC(LAYOUTGET, enc_layoutget, dec_layoutget), 6548 PROC(LAYOUTGET, enc_layoutget, dec_layoutget),
6549 PROC(LAYOUTCOMMIT, enc_layoutcommit, dec_layoutcommit),
6268#endif /* CONFIG_NFS_V4_1 */ 6550#endif /* CONFIG_NFS_V4_1 */
6269}; 6551};
6270 6552
diff --git a/fs/nfs/nfsroot.c b/fs/nfs/nfsroot.c
index 903908a20023..c541093a5bf2 100644
--- a/fs/nfs/nfsroot.c
+++ b/fs/nfs/nfsroot.c
@@ -86,11 +86,14 @@
86/* Default path we try to mount. "%s" gets replaced by our IP address */ 86/* Default path we try to mount. "%s" gets replaced by our IP address */
87#define NFS_ROOT "/tftpboot/%s" 87#define NFS_ROOT "/tftpboot/%s"
88 88
89/* Default NFSROOT mount options. */
90#define NFS_DEF_OPTIONS "udp"
91
89/* Parameters passed from the kernel command line */ 92/* Parameters passed from the kernel command line */
90static char nfs_root_parms[256] __initdata = ""; 93static char nfs_root_parms[256] __initdata = "";
91 94
92/* Text-based mount options passed to super.c */ 95/* Text-based mount options passed to super.c */
93static char nfs_root_options[256] __initdata = ""; 96static char nfs_root_options[256] __initdata = NFS_DEF_OPTIONS;
94 97
95/* Address of NFS server */ 98/* Address of NFS server */
96static __be32 servaddr __initdata = htonl(INADDR_NONE); 99static __be32 servaddr __initdata = htonl(INADDR_NONE);
@@ -160,8 +163,14 @@ static int __init root_nfs_copy(char *dest, const char *src,
160} 163}
161 164
162static int __init root_nfs_cat(char *dest, const char *src, 165static int __init root_nfs_cat(char *dest, const char *src,
163 const size_t destlen) 166 const size_t destlen)
164{ 167{
168 size_t len = strlen(dest);
169
170 if (len && dest[len - 1] != ',')
171 if (strlcat(dest, ",", destlen) > destlen)
172 return -1;
173
165 if (strlcat(dest, src, destlen) > destlen) 174 if (strlcat(dest, src, destlen) > destlen)
166 return -1; 175 return -1;
167 return 0; 176 return 0;
@@ -194,16 +203,6 @@ static int __init root_nfs_parse_options(char *incoming, char *exppath,
194 if (root_nfs_cat(nfs_root_options, incoming, 203 if (root_nfs_cat(nfs_root_options, incoming,
195 sizeof(nfs_root_options))) 204 sizeof(nfs_root_options)))
196 return -1; 205 return -1;
197
198 /*
199 * Possibly prepare for more options to be appended
200 */
201 if (nfs_root_options[0] != '\0' &&
202 nfs_root_options[strlen(nfs_root_options)] != ',')
203 if (root_nfs_cat(nfs_root_options, ",",
204 sizeof(nfs_root_options)))
205 return -1;
206
207 return 0; 206 return 0;
208} 207}
209 208
@@ -217,7 +216,7 @@ static int __init root_nfs_parse_options(char *incoming, char *exppath,
217 */ 216 */
218static int __init root_nfs_data(char *cmdline) 217static int __init root_nfs_data(char *cmdline)
219{ 218{
220 char addr_option[sizeof("nolock,addr=") + INET_ADDRSTRLEN + 1]; 219 char mand_options[sizeof("nolock,addr=") + INET_ADDRSTRLEN + 1];
221 int len, retval = -1; 220 int len, retval = -1;
222 char *tmp = NULL; 221 char *tmp = NULL;
223 const size_t tmplen = sizeof(nfs_export_path); 222 const size_t tmplen = sizeof(nfs_export_path);
@@ -244,9 +243,9 @@ static int __init root_nfs_data(char *cmdline)
244 * Append mandatory options for nfsroot so they override 243 * Append mandatory options for nfsroot so they override
245 * what has come before 244 * what has come before
246 */ 245 */
247 snprintf(addr_option, sizeof(addr_option), "nolock,addr=%pI4", 246 snprintf(mand_options, sizeof(mand_options), "nolock,addr=%pI4",
248 &servaddr); 247 &servaddr);
249 if (root_nfs_cat(nfs_root_options, addr_option, 248 if (root_nfs_cat(nfs_root_options, mand_options,
250 sizeof(nfs_root_options))) 249 sizeof(nfs_root_options)))
251 goto out_optionstoolong; 250 goto out_optionstoolong;
252 251
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index e1164e3f9e69..c80add6e2213 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -20,6 +20,7 @@
20#include <linux/nfs_mount.h> 20#include <linux/nfs_mount.h>
21 21
22#include "internal.h" 22#include "internal.h"
23#include "pnfs.h"
23 24
24static struct kmem_cache *nfs_page_cachep; 25static struct kmem_cache *nfs_page_cachep;
25 26
@@ -134,14 +135,14 @@ void nfs_clear_page_tag_locked(struct nfs_page *req)
134 nfs_unlock_request(req); 135 nfs_unlock_request(req);
135} 136}
136 137
137/** 138/*
138 * nfs_clear_request - Free up all resources allocated to the request 139 * nfs_clear_request - Free up all resources allocated to the request
139 * @req: 140 * @req:
140 * 141 *
141 * Release page and open context resources associated with a read/write 142 * Release page and open context resources associated with a read/write
142 * request after it has completed. 143 * request after it has completed.
143 */ 144 */
144void nfs_clear_request(struct nfs_page *req) 145static void nfs_clear_request(struct nfs_page *req)
145{ 146{
146 struct page *page = req->wb_page; 147 struct page *page = req->wb_page;
147 struct nfs_open_context *ctx = req->wb_context; 148 struct nfs_open_context *ctx = req->wb_context;
@@ -213,7 +214,7 @@ nfs_wait_on_request(struct nfs_page *req)
213 */ 214 */
214void nfs_pageio_init(struct nfs_pageio_descriptor *desc, 215void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
215 struct inode *inode, 216 struct inode *inode,
216 int (*doio)(struct inode *, struct list_head *, unsigned int, size_t, int), 217 int (*doio)(struct nfs_pageio_descriptor *),
217 size_t bsize, 218 size_t bsize,
218 int io_flags) 219 int io_flags)
219{ 220{
@@ -222,10 +223,12 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
222 desc->pg_count = 0; 223 desc->pg_count = 0;
223 desc->pg_bsize = bsize; 224 desc->pg_bsize = bsize;
224 desc->pg_base = 0; 225 desc->pg_base = 0;
226 desc->pg_moreio = 0;
225 desc->pg_inode = inode; 227 desc->pg_inode = inode;
226 desc->pg_doio = doio; 228 desc->pg_doio = doio;
227 desc->pg_ioflags = io_flags; 229 desc->pg_ioflags = io_flags;
228 desc->pg_error = 0; 230 desc->pg_error = 0;
231 desc->pg_lseg = NULL;
229} 232}
230 233
231/** 234/**
@@ -240,7 +243,8 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
240 * Return 'true' if this is the case, else return 'false'. 243 * Return 'true' if this is the case, else return 'false'.
241 */ 244 */
242static int nfs_can_coalesce_requests(struct nfs_page *prev, 245static int nfs_can_coalesce_requests(struct nfs_page *prev,
243 struct nfs_page *req) 246 struct nfs_page *req,
247 struct nfs_pageio_descriptor *pgio)
244{ 248{
245 if (req->wb_context->cred != prev->wb_context->cred) 249 if (req->wb_context->cred != prev->wb_context->cred)
246 return 0; 250 return 0;
@@ -254,6 +258,12 @@ static int nfs_can_coalesce_requests(struct nfs_page *prev,
254 return 0; 258 return 0;
255 if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE) 259 if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE)
256 return 0; 260 return 0;
261 /*
262 * Non-whole file layouts need to check that req is inside of
263 * pgio->pg_lseg.
264 */
265 if (pgio->pg_test && !pgio->pg_test(pgio, prev, req))
266 return 0;
257 return 1; 267 return 1;
258} 268}
259 269
@@ -286,7 +296,7 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc,
286 if (newlen > desc->pg_bsize) 296 if (newlen > desc->pg_bsize)
287 return 0; 297 return 0;
288 prev = nfs_list_entry(desc->pg_list.prev); 298 prev = nfs_list_entry(desc->pg_list.prev);
289 if (!nfs_can_coalesce_requests(prev, req)) 299 if (!nfs_can_coalesce_requests(prev, req, desc))
290 return 0; 300 return 0;
291 } else 301 } else
292 desc->pg_base = req->wb_pgbase; 302 desc->pg_base = req->wb_pgbase;
@@ -302,12 +312,7 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc,
302static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc) 312static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc)
303{ 313{
304 if (!list_empty(&desc->pg_list)) { 314 if (!list_empty(&desc->pg_list)) {
305 int error = desc->pg_doio(desc->pg_inode, 315 int error = desc->pg_doio(desc);
306 &desc->pg_list,
307 nfs_page_array_len(desc->pg_base,
308 desc->pg_count),
309 desc->pg_count,
310 desc->pg_ioflags);
311 if (error < 0) 316 if (error < 0)
312 desc->pg_error = error; 317 desc->pg_error = error;
313 else 318 else
@@ -331,9 +336,11 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
331 struct nfs_page *req) 336 struct nfs_page *req)
332{ 337{
333 while (!nfs_pageio_do_add_request(desc, req)) { 338 while (!nfs_pageio_do_add_request(desc, req)) {
339 desc->pg_moreio = 1;
334 nfs_pageio_doio(desc); 340 nfs_pageio_doio(desc);
335 if (desc->pg_error < 0) 341 if (desc->pg_error < 0)
336 return 0; 342 return 0;
343 desc->pg_moreio = 0;
337 } 344 }
338 return 1; 345 return 1;
339} 346}
@@ -391,6 +398,7 @@ int nfs_scan_list(struct nfs_inode *nfsi,
391 pgoff_t idx_end; 398 pgoff_t idx_end;
392 int found, i; 399 int found, i;
393 int res; 400 int res;
401 struct list_head *list;
394 402
395 res = 0; 403 res = 0;
396 if (npages == 0) 404 if (npages == 0)
@@ -411,10 +419,10 @@ int nfs_scan_list(struct nfs_inode *nfsi,
411 idx_start = req->wb_index + 1; 419 idx_start = req->wb_index + 1;
412 if (nfs_set_page_tag_locked(req)) { 420 if (nfs_set_page_tag_locked(req)) {
413 kref_get(&req->wb_kref); 421 kref_get(&req->wb_kref);
414 nfs_list_remove_request(req);
415 radix_tree_tag_clear(&nfsi->nfs_page_tree, 422 radix_tree_tag_clear(&nfsi->nfs_page_tree,
416 req->wb_index, tag); 423 req->wb_index, tag);
417 nfs_list_add_request(req, dst); 424 list = pnfs_choose_commit_list(req, dst);
425 nfs_list_add_request(req, list);
418 res++; 426 res++;
419 if (res == INT_MAX) 427 if (res == INT_MAX)
420 goto out; 428 goto out;
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 1b1bc1a0fb0a..ff681ab65d31 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -30,6 +30,7 @@
30#include <linux/nfs_fs.h> 30#include <linux/nfs_fs.h>
31#include "internal.h" 31#include "internal.h"
32#include "pnfs.h" 32#include "pnfs.h"
33#include "iostat.h"
33 34
34#define NFSDBG_FACILITY NFSDBG_PNFS 35#define NFSDBG_FACILITY NFSDBG_PNFS
35 36
@@ -74,10 +75,8 @@ find_pnfs_driver(u32 id)
74void 75void
75unset_pnfs_layoutdriver(struct nfs_server *nfss) 76unset_pnfs_layoutdriver(struct nfs_server *nfss)
76{ 77{
77 if (nfss->pnfs_curr_ld) { 78 if (nfss->pnfs_curr_ld)
78 nfss->pnfs_curr_ld->clear_layoutdriver(nfss);
79 module_put(nfss->pnfs_curr_ld->owner); 79 module_put(nfss->pnfs_curr_ld->owner);
80 }
81 nfss->pnfs_curr_ld = NULL; 80 nfss->pnfs_curr_ld = NULL;
82} 81}
83 82
@@ -115,13 +114,7 @@ set_pnfs_layoutdriver(struct nfs_server *server, u32 id)
115 goto out_no_driver; 114 goto out_no_driver;
116 } 115 }
117 server->pnfs_curr_ld = ld_type; 116 server->pnfs_curr_ld = ld_type;
118 if (ld_type->set_layoutdriver(server)) { 117
119 printk(KERN_ERR
120 "%s: Error initializing mount point for layout driver %u.\n",
121 __func__, id);
122 module_put(ld_type->owner);
123 goto out_no_driver;
124 }
125 dprintk("%s: pNFS module for %u set\n", __func__, id); 118 dprintk("%s: pNFS module for %u set\n", __func__, id);
126 return; 119 return;
127 120
@@ -230,38 +223,43 @@ static void free_lseg(struct pnfs_layout_segment *lseg)
230 put_layout_hdr(NFS_I(ino)->layout); 223 put_layout_hdr(NFS_I(ino)->layout);
231} 224}
232 225
233/* The use of tmp_list is necessary because pnfs_curr_ld->free_lseg 226static void
234 * could sleep, so must be called outside of the lock. 227put_lseg_common(struct pnfs_layout_segment *lseg)
235 * Returns 1 if object was removed, otherwise return 0.
236 */
237static int
238put_lseg_locked(struct pnfs_layout_segment *lseg,
239 struct list_head *tmp_list)
240{ 228{
229 struct inode *inode = lseg->pls_layout->plh_inode;
230
231 BUG_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
232 list_del_init(&lseg->pls_list);
233 if (list_empty(&lseg->pls_layout->plh_segs)) {
234 set_bit(NFS_LAYOUT_DESTROYED, &lseg->pls_layout->plh_flags);
235 /* Matched by initial refcount set in alloc_init_layout_hdr */
236 put_layout_hdr_locked(lseg->pls_layout);
237 }
238 rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq);
239}
240
241void
242put_lseg(struct pnfs_layout_segment *lseg)
243{
244 struct inode *inode;
245
246 if (!lseg)
247 return;
248
241 dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg, 249 dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg,
242 atomic_read(&lseg->pls_refcount), 250 atomic_read(&lseg->pls_refcount),
243 test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); 251 test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
244 if (atomic_dec_and_test(&lseg->pls_refcount)) { 252 inode = lseg->pls_layout->plh_inode;
245 struct inode *ino = lseg->pls_layout->plh_inode; 253 if (atomic_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) {
254 LIST_HEAD(free_me);
246 255
247 BUG_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); 256 put_lseg_common(lseg);
248 list_del(&lseg->pls_list); 257 list_add(&lseg->pls_list, &free_me);
249 if (list_empty(&lseg->pls_layout->plh_segs)) { 258 spin_unlock(&inode->i_lock);
250 struct nfs_client *clp; 259 pnfs_free_lseg_list(&free_me);
251
252 clp = NFS_SERVER(ino)->nfs_client;
253 spin_lock(&clp->cl_lock);
254 /* List does not take a reference, so no need for put here */
255 list_del_init(&lseg->pls_layout->plh_layouts);
256 spin_unlock(&clp->cl_lock);
257 clear_bit(NFS_LAYOUT_BULK_RECALL, &lseg->pls_layout->plh_flags);
258 }
259 rpc_wake_up(&NFS_SERVER(ino)->roc_rpcwaitq);
260 list_add(&lseg->pls_list, tmp_list);
261 return 1;
262 } 260 }
263 return 0;
264} 261}
262EXPORT_SYMBOL_GPL(put_lseg);
265 263
266static bool 264static bool
267should_free_lseg(u32 lseg_iomode, u32 recall_iomode) 265should_free_lseg(u32 lseg_iomode, u32 recall_iomode)
@@ -281,7 +279,13 @@ static int mark_lseg_invalid(struct pnfs_layout_segment *lseg,
281 * list. It will now be removed when all 279 * list. It will now be removed when all
282 * outstanding io is finished. 280 * outstanding io is finished.
283 */ 281 */
284 rv = put_lseg_locked(lseg, tmp_list); 282 dprintk("%s: lseg %p ref %d\n", __func__, lseg,
283 atomic_read(&lseg->pls_refcount));
284 if (atomic_dec_and_test(&lseg->pls_refcount)) {
285 put_lseg_common(lseg);
286 list_add(&lseg->pls_list, tmp_list);
287 rv = 1;
288 }
285 } 289 }
286 return rv; 290 return rv;
287} 291}
@@ -299,6 +303,11 @@ mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
299 303
300 dprintk("%s:Begin lo %p\n", __func__, lo); 304 dprintk("%s:Begin lo %p\n", __func__, lo);
301 305
306 if (list_empty(&lo->plh_segs)) {
307 if (!test_and_set_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags))
308 put_layout_hdr_locked(lo);
309 return 0;
310 }
302 list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) 311 list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list)
303 if (should_free_lseg(lseg->pls_range.iomode, iomode)) { 312 if (should_free_lseg(lseg->pls_range.iomode, iomode)) {
304 dprintk("%s: freeing lseg %p iomode %d " 313 dprintk("%s: freeing lseg %p iomode %d "
@@ -312,11 +321,27 @@ mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
312 return invalid - removed; 321 return invalid - removed;
313} 322}
314 323
324/* note free_me must contain lsegs from a single layout_hdr */
315void 325void
316pnfs_free_lseg_list(struct list_head *free_me) 326pnfs_free_lseg_list(struct list_head *free_me)
317{ 327{
318 struct pnfs_layout_segment *lseg, *tmp; 328 struct pnfs_layout_segment *lseg, *tmp;
329 struct pnfs_layout_hdr *lo;
319 330
331 if (list_empty(free_me))
332 return;
333
334 lo = list_first_entry(free_me, struct pnfs_layout_segment,
335 pls_list)->pls_layout;
336
337 if (test_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags)) {
338 struct nfs_client *clp;
339
340 clp = NFS_SERVER(lo->plh_inode)->nfs_client;
341 spin_lock(&clp->cl_lock);
342 list_del_init(&lo->plh_layouts);
343 spin_unlock(&clp->cl_lock);
344 }
320 list_for_each_entry_safe(lseg, tmp, free_me, pls_list) { 345 list_for_each_entry_safe(lseg, tmp, free_me, pls_list) {
321 list_del(&lseg->pls_list); 346 list_del(&lseg->pls_list);
322 free_lseg(lseg); 347 free_lseg(lseg);
@@ -332,10 +357,8 @@ pnfs_destroy_layout(struct nfs_inode *nfsi)
332 spin_lock(&nfsi->vfs_inode.i_lock); 357 spin_lock(&nfsi->vfs_inode.i_lock);
333 lo = nfsi->layout; 358 lo = nfsi->layout;
334 if (lo) { 359 if (lo) {
335 set_bit(NFS_LAYOUT_DESTROYED, &nfsi->layout->plh_flags); 360 lo->plh_block_lgets++; /* permanently block new LAYOUTGETs */
336 mark_matching_lsegs_invalid(lo, &tmp_list, IOMODE_ANY); 361 mark_matching_lsegs_invalid(lo, &tmp_list, IOMODE_ANY);
337 /* Matched by refcount set to 1 in alloc_init_layout_hdr */
338 put_layout_hdr_locked(lo);
339 } 362 }
340 spin_unlock(&nfsi->vfs_inode.i_lock); 363 spin_unlock(&nfsi->vfs_inode.i_lock);
341 pnfs_free_lseg_list(&tmp_list); 364 pnfs_free_lseg_list(&tmp_list);
@@ -403,6 +426,7 @@ pnfs_layoutgets_blocked(struct pnfs_layout_hdr *lo, nfs4_stateid *stateid,
403 (int)(lo->plh_barrier - be32_to_cpu(stateid->stateid.seqid)) >= 0) 426 (int)(lo->plh_barrier - be32_to_cpu(stateid->stateid.seqid)) >= 0)
404 return true; 427 return true;
405 return lo->plh_block_lgets || 428 return lo->plh_block_lgets ||
429 test_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags) ||
406 test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) || 430 test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) ||
407 (list_empty(&lo->plh_segs) && 431 (list_empty(&lo->plh_segs) &&
408 (atomic_read(&lo->plh_outstanding) > lget)); 432 (atomic_read(&lo->plh_outstanding) > lget));
@@ -448,6 +472,9 @@ send_layoutget(struct pnfs_layout_hdr *lo,
448 struct nfs_server *server = NFS_SERVER(ino); 472 struct nfs_server *server = NFS_SERVER(ino);
449 struct nfs4_layoutget *lgp; 473 struct nfs4_layoutget *lgp;
450 struct pnfs_layout_segment *lseg = NULL; 474 struct pnfs_layout_segment *lseg = NULL;
475 struct page **pages = NULL;
476 int i;
477 u32 max_resp_sz, max_pages;
451 478
452 dprintk("--> %s\n", __func__); 479 dprintk("--> %s\n", __func__);
453 480
@@ -455,6 +482,21 @@ send_layoutget(struct pnfs_layout_hdr *lo,
455 lgp = kzalloc(sizeof(*lgp), GFP_KERNEL); 482 lgp = kzalloc(sizeof(*lgp), GFP_KERNEL);
456 if (lgp == NULL) 483 if (lgp == NULL)
457 return NULL; 484 return NULL;
485
486 /* allocate pages for xdr post processing */
487 max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz;
488 max_pages = max_resp_sz >> PAGE_SHIFT;
489
490 pages = kzalloc(max_pages * sizeof(struct page *), GFP_KERNEL);
491 if (!pages)
492 goto out_err_free;
493
494 for (i = 0; i < max_pages; i++) {
495 pages[i] = alloc_page(GFP_KERNEL);
496 if (!pages[i])
497 goto out_err_free;
498 }
499
458 lgp->args.minlength = NFS4_MAX_UINT64; 500 lgp->args.minlength = NFS4_MAX_UINT64;
459 lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE; 501 lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE;
460 lgp->args.range.iomode = iomode; 502 lgp->args.range.iomode = iomode;
@@ -463,6 +505,8 @@ send_layoutget(struct pnfs_layout_hdr *lo,
463 lgp->args.type = server->pnfs_curr_ld->id; 505 lgp->args.type = server->pnfs_curr_ld->id;
464 lgp->args.inode = ino; 506 lgp->args.inode = ino;
465 lgp->args.ctx = get_nfs_open_context(ctx); 507 lgp->args.ctx = get_nfs_open_context(ctx);
508 lgp->args.layout.pages = pages;
509 lgp->args.layout.pglen = max_pages * PAGE_SIZE;
466 lgp->lsegpp = &lseg; 510 lgp->lsegpp = &lseg;
467 511
468 /* Synchronously retrieve layout information from server and 512 /* Synchronously retrieve layout information from server and
@@ -473,7 +517,26 @@ send_layoutget(struct pnfs_layout_hdr *lo,
473 /* remember that LAYOUTGET failed and suspend trying */ 517 /* remember that LAYOUTGET failed and suspend trying */
474 set_bit(lo_fail_bit(iomode), &lo->plh_flags); 518 set_bit(lo_fail_bit(iomode), &lo->plh_flags);
475 } 519 }
520
521 /* free xdr pages */
522 for (i = 0; i < max_pages; i++)
523 __free_page(pages[i]);
524 kfree(pages);
525
476 return lseg; 526 return lseg;
527
528out_err_free:
529 /* free any allocated xdr pages, lgp as it's not used */
530 if (pages) {
531 for (i = 0; i < max_pages; i++) {
532 if (!pages[i])
533 break;
534 __free_page(pages[i]);
535 }
536 kfree(pages);
537 }
538 kfree(lgp);
539 return NULL;
477} 540}
478 541
479bool pnfs_roc(struct inode *ino) 542bool pnfs_roc(struct inode *ino)
@@ -674,7 +737,7 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo, u32 iomode)
674 list_for_each_entry(lseg, &lo->plh_segs, pls_list) { 737 list_for_each_entry(lseg, &lo->plh_segs, pls_list) {
675 if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) && 738 if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) &&
676 is_matching_lseg(lseg, iomode)) { 739 is_matching_lseg(lseg, iomode)) {
677 ret = lseg; 740 ret = get_lseg(lseg);
678 break; 741 break;
679 } 742 }
680 if (cmp_layout(iomode, lseg->pls_range.iomode) > 0) 743 if (cmp_layout(iomode, lseg->pls_range.iomode) > 0)
@@ -699,6 +762,7 @@ pnfs_update_layout(struct inode *ino,
699 struct nfs_client *clp = NFS_SERVER(ino)->nfs_client; 762 struct nfs_client *clp = NFS_SERVER(ino)->nfs_client;
700 struct pnfs_layout_hdr *lo; 763 struct pnfs_layout_hdr *lo;
701 struct pnfs_layout_segment *lseg = NULL; 764 struct pnfs_layout_segment *lseg = NULL;
765 bool first = false;
702 766
703 if (!pnfs_enabled_sb(NFS_SERVER(ino))) 767 if (!pnfs_enabled_sb(NFS_SERVER(ino)))
704 return NULL; 768 return NULL;
@@ -715,21 +779,25 @@ pnfs_update_layout(struct inode *ino,
715 dprintk("%s matches recall, use MDS\n", __func__); 779 dprintk("%s matches recall, use MDS\n", __func__);
716 goto out_unlock; 780 goto out_unlock;
717 } 781 }
718 /* Check to see if the layout for the given range already exists */
719 lseg = pnfs_find_lseg(lo, iomode);
720 if (lseg)
721 goto out_unlock;
722 782
723 /* if LAYOUTGET already failed once we don't try again */ 783 /* if LAYOUTGET already failed once we don't try again */
724 if (test_bit(lo_fail_bit(iomode), &nfsi->layout->plh_flags)) 784 if (test_bit(lo_fail_bit(iomode), &nfsi->layout->plh_flags))
725 goto out_unlock; 785 goto out_unlock;
726 786
787 /* Check to see if the layout for the given range already exists */
788 lseg = pnfs_find_lseg(lo, iomode);
789 if (lseg)
790 goto out_unlock;
791
727 if (pnfs_layoutgets_blocked(lo, NULL, 0)) 792 if (pnfs_layoutgets_blocked(lo, NULL, 0))
728 goto out_unlock; 793 goto out_unlock;
729 atomic_inc(&lo->plh_outstanding); 794 atomic_inc(&lo->plh_outstanding);
730 795
731 get_layout_hdr(lo); 796 get_layout_hdr(lo);
732 if (list_empty(&lo->plh_segs)) { 797 if (list_empty(&lo->plh_segs))
798 first = true;
799 spin_unlock(&ino->i_lock);
800 if (first) {
733 /* The lo must be on the clp list if there is any 801 /* The lo must be on the clp list if there is any
734 * chance of a CB_LAYOUTRECALL(FILE) coming in. 802 * chance of a CB_LAYOUTRECALL(FILE) coming in.
735 */ 803 */
@@ -738,24 +806,18 @@ pnfs_update_layout(struct inode *ino,
738 list_add_tail(&lo->plh_layouts, &clp->cl_layouts); 806 list_add_tail(&lo->plh_layouts, &clp->cl_layouts);
739 spin_unlock(&clp->cl_lock); 807 spin_unlock(&clp->cl_lock);
740 } 808 }
741 spin_unlock(&ino->i_lock);
742 809
743 lseg = send_layoutget(lo, ctx, iomode); 810 lseg = send_layoutget(lo, ctx, iomode);
744 if (!lseg) { 811 if (!lseg && first) {
745 spin_lock(&ino->i_lock); 812 spin_lock(&clp->cl_lock);
746 if (list_empty(&lo->plh_segs)) { 813 list_del_init(&lo->plh_layouts);
747 spin_lock(&clp->cl_lock); 814 spin_unlock(&clp->cl_lock);
748 list_del_init(&lo->plh_layouts);
749 spin_unlock(&clp->cl_lock);
750 clear_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
751 }
752 spin_unlock(&ino->i_lock);
753 } 815 }
754 atomic_dec(&lo->plh_outstanding); 816 atomic_dec(&lo->plh_outstanding);
755 put_layout_hdr(lo); 817 put_layout_hdr(lo);
756out: 818out:
757 dprintk("%s end, state 0x%lx lseg %p\n", __func__, 819 dprintk("%s end, state 0x%lx lseg %p\n", __func__,
758 nfsi->layout->plh_flags, lseg); 820 nfsi->layout ? nfsi->layout->plh_flags : -1, lseg);
759 return lseg; 821 return lseg;
760out_unlock: 822out_unlock:
761 spin_unlock(&ino->i_lock); 823 spin_unlock(&ino->i_lock);
@@ -808,7 +870,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
808 } 870 }
809 init_lseg(lo, lseg); 871 init_lseg(lo, lseg);
810 lseg->pls_range = res->range; 872 lseg->pls_range = res->range;
811 *lgp->lsegpp = lseg; 873 *lgp->lsegpp = get_lseg(lseg);
812 pnfs_insert_layout(lo, lseg); 874 pnfs_insert_layout(lo, lseg);
813 875
814 if (res->return_on_close) { 876 if (res->return_on_close) {
@@ -829,137 +891,205 @@ out_forget_reply:
829 goto out; 891 goto out;
830} 892}
831 893
894static int pnfs_read_pg_test(struct nfs_pageio_descriptor *pgio,
895 struct nfs_page *prev,
896 struct nfs_page *req)
897{
898 if (pgio->pg_count == prev->wb_bytes) {
899 /* This is first coelesce call for a series of nfs_pages */
900 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
901 prev->wb_context,
902 IOMODE_READ);
903 }
904 return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req);
905}
906
907void
908pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode)
909{
910 struct pnfs_layoutdriver_type *ld;
911
912 ld = NFS_SERVER(inode)->pnfs_curr_ld;
913 pgio->pg_test = (ld && ld->pg_test) ? pnfs_read_pg_test : NULL;
914}
915
916static int pnfs_write_pg_test(struct nfs_pageio_descriptor *pgio,
917 struct nfs_page *prev,
918 struct nfs_page *req)
919{
920 if (pgio->pg_count == prev->wb_bytes) {
921 /* This is first coelesce call for a series of nfs_pages */
922 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
923 prev->wb_context,
924 IOMODE_RW);
925 }
926 return NFS_SERVER(pgio->pg_inode)->pnfs_curr_ld->pg_test(pgio, prev, req);
927}
928
929void
930pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode)
931{
932 struct pnfs_layoutdriver_type *ld;
933
934 ld = NFS_SERVER(inode)->pnfs_curr_ld;
935 pgio->pg_test = (ld && ld->pg_test) ? pnfs_write_pg_test : NULL;
936}
937
938enum pnfs_try_status
939pnfs_try_to_write_data(struct nfs_write_data *wdata,
940 const struct rpc_call_ops *call_ops, int how)
941{
942 struct inode *inode = wdata->inode;
943 enum pnfs_try_status trypnfs;
944 struct nfs_server *nfss = NFS_SERVER(inode);
945
946 wdata->mds_ops = call_ops;
947
948 dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__,
949 inode->i_ino, wdata->args.count, wdata->args.offset, how);
950
951 trypnfs = nfss->pnfs_curr_ld->write_pagelist(wdata, how);
952 if (trypnfs == PNFS_NOT_ATTEMPTED) {
953 put_lseg(wdata->lseg);
954 wdata->lseg = NULL;
955 } else
956 nfs_inc_stats(inode, NFSIOS_PNFS_WRITE);
957
958 dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
959 return trypnfs;
960}
961
832/* 962/*
833 * Device ID cache. Currently supports one layout type per struct nfs_client. 963 * Call the appropriate parallel I/O subsystem read function.
834 * Add layout type to the lookup key to expand to support multiple types.
835 */ 964 */
836int 965enum pnfs_try_status
837pnfs_alloc_init_deviceid_cache(struct nfs_client *clp, 966pnfs_try_to_read_data(struct nfs_read_data *rdata,
838 void (*free_callback)(struct pnfs_deviceid_node *)) 967 const struct rpc_call_ops *call_ops)
839{ 968{
840 struct pnfs_deviceid_cache *c; 969 struct inode *inode = rdata->inode;
970 struct nfs_server *nfss = NFS_SERVER(inode);
971 enum pnfs_try_status trypnfs;
841 972
842 c = kzalloc(sizeof(struct pnfs_deviceid_cache), GFP_KERNEL); 973 rdata->mds_ops = call_ops;
843 if (!c) 974
844 return -ENOMEM; 975 dprintk("%s: Reading ino:%lu %u@%llu\n",
845 spin_lock(&clp->cl_lock); 976 __func__, inode->i_ino, rdata->args.count, rdata->args.offset);
846 if (clp->cl_devid_cache != NULL) { 977
847 atomic_inc(&clp->cl_devid_cache->dc_ref); 978 trypnfs = nfss->pnfs_curr_ld->read_pagelist(rdata);
848 dprintk("%s [kref [%d]]\n", __func__, 979 if (trypnfs == PNFS_NOT_ATTEMPTED) {
849 atomic_read(&clp->cl_devid_cache->dc_ref)); 980 put_lseg(rdata->lseg);
850 kfree(c); 981 rdata->lseg = NULL;
851 } else { 982 } else {
852 /* kzalloc initializes hlists */ 983 nfs_inc_stats(inode, NFSIOS_PNFS_READ);
853 spin_lock_init(&c->dc_lock);
854 atomic_set(&c->dc_ref, 1);
855 c->dc_free_callback = free_callback;
856 clp->cl_devid_cache = c;
857 dprintk("%s [new]\n", __func__);
858 } 984 }
859 spin_unlock(&clp->cl_lock); 985 dprintk("%s End (trypnfs:%d)\n", __func__, trypnfs);
860 return 0; 986 return trypnfs;
861} 987}
862EXPORT_SYMBOL_GPL(pnfs_alloc_init_deviceid_cache);
863 988
864/* 989/*
865 * Called from pnfs_layoutdriver_type->free_lseg 990 * Currently there is only one (whole file) write lseg.
866 * last layout segment reference frees deviceid
867 */ 991 */
868void 992static struct pnfs_layout_segment *pnfs_list_write_lseg(struct inode *inode)
869pnfs_put_deviceid(struct pnfs_deviceid_cache *c,
870 struct pnfs_deviceid_node *devid)
871{ 993{
872 struct nfs4_deviceid *id = &devid->de_id; 994 struct pnfs_layout_segment *lseg, *rv = NULL;
873 struct pnfs_deviceid_node *d;
874 struct hlist_node *n;
875 long h = nfs4_deviceid_hash(id);
876 995
877 dprintk("%s [%d]\n", __func__, atomic_read(&devid->de_ref)); 996 list_for_each_entry(lseg, &NFS_I(inode)->layout->plh_segs, pls_list)
878 if (!atomic_dec_and_lock(&devid->de_ref, &c->dc_lock)) 997 if (lseg->pls_range.iomode == IOMODE_RW)
879 return; 998 rv = lseg;
999 return rv;
1000}
880 1001
881 hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[h], de_node) 1002void
882 if (!memcmp(&d->de_id, id, sizeof(*id))) { 1003pnfs_set_layoutcommit(struct nfs_write_data *wdata)
883 hlist_del_rcu(&d->de_node); 1004{
884 spin_unlock(&c->dc_lock); 1005 struct nfs_inode *nfsi = NFS_I(wdata->inode);
885 synchronize_rcu(); 1006 loff_t end_pos = wdata->args.offset + wdata->res.count;
886 c->dc_free_callback(devid); 1007 bool mark_as_dirty = false;
887 return; 1008
888 } 1009 spin_lock(&nfsi->vfs_inode.i_lock);
889 spin_unlock(&c->dc_lock); 1010 if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) {
890 /* Why wasn't it found in the list? */ 1011 /* references matched in nfs4_layoutcommit_release */
891 BUG(); 1012 get_lseg(wdata->lseg);
892} 1013 wdata->lseg->pls_lc_cred =
893EXPORT_SYMBOL_GPL(pnfs_put_deviceid); 1014 get_rpccred(wdata->args.context->state->owner->so_cred);
894 1015 mark_as_dirty = true;
895/* Find and reference a deviceid */ 1016 dprintk("%s: Set layoutcommit for inode %lu ",
896struct pnfs_deviceid_node * 1017 __func__, wdata->inode->i_ino);
897pnfs_find_get_deviceid(struct pnfs_deviceid_cache *c, struct nfs4_deviceid *id)
898{
899 struct pnfs_deviceid_node *d;
900 struct hlist_node *n;
901 long hash = nfs4_deviceid_hash(id);
902
903 dprintk("--> %s hash %ld\n", __func__, hash);
904 rcu_read_lock();
905 hlist_for_each_entry_rcu(d, n, &c->dc_deviceids[hash], de_node) {
906 if (!memcmp(&d->de_id, id, sizeof(*id))) {
907 if (!atomic_inc_not_zero(&d->de_ref)) {
908 goto fail;
909 } else {
910 rcu_read_unlock();
911 return d;
912 }
913 }
914 } 1018 }
915fail: 1019 if (end_pos > wdata->lseg->pls_end_pos)
916 rcu_read_unlock(); 1020 wdata->lseg->pls_end_pos = end_pos;
917 return NULL; 1021 spin_unlock(&nfsi->vfs_inode.i_lock);
1022
1023 /* if pnfs_layoutcommit_inode() runs between inode locks, the next one
1024 * will be a noop because NFS_INO_LAYOUTCOMMIT will not be set */
1025 if (mark_as_dirty)
1026 mark_inode_dirty_sync(wdata->inode);
918} 1027}
919EXPORT_SYMBOL_GPL(pnfs_find_get_deviceid); 1028EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit);
920 1029
921/* 1030/*
922 * Add a deviceid to the cache. 1031 * For the LAYOUT4_NFSV4_1_FILES layout type, NFS_DATA_SYNC WRITEs and
923 * GETDEVICEINFOs for same deviceid can race. If deviceid is found, discard new 1032 * NFS_UNSTABLE WRITEs with a COMMIT to data servers must store enough
1033 * data to disk to allow the server to recover the data if it crashes.
1034 * LAYOUTCOMMIT is only needed when the NFL4_UFLG_COMMIT_THRU_MDS flag
1035 * is off, and a COMMIT is sent to a data server, or
1036 * if WRITEs to a data server return NFS_DATA_SYNC.
924 */ 1037 */
925struct pnfs_deviceid_node * 1038int
926pnfs_add_deviceid(struct pnfs_deviceid_cache *c, struct pnfs_deviceid_node *new) 1039pnfs_layoutcommit_inode(struct inode *inode, bool sync)
927{
928 struct pnfs_deviceid_node *d;
929 long hash = nfs4_deviceid_hash(&new->de_id);
930
931 dprintk("--> %s hash %ld\n", __func__, hash);
932 spin_lock(&c->dc_lock);
933 d = pnfs_find_get_deviceid(c, &new->de_id);
934 if (d) {
935 spin_unlock(&c->dc_lock);
936 dprintk("%s [discard]\n", __func__);
937 c->dc_free_callback(new);
938 return d;
939 }
940 INIT_HLIST_NODE(&new->de_node);
941 atomic_set(&new->de_ref, 1);
942 hlist_add_head_rcu(&new->de_node, &c->dc_deviceids[hash]);
943 spin_unlock(&c->dc_lock);
944 dprintk("%s [new]\n", __func__);
945 return new;
946}
947EXPORT_SYMBOL_GPL(pnfs_add_deviceid);
948
949void
950pnfs_put_deviceid_cache(struct nfs_client *clp)
951{ 1040{
952 struct pnfs_deviceid_cache *local = clp->cl_devid_cache; 1041 struct nfs4_layoutcommit_data *data;
1042 struct nfs_inode *nfsi = NFS_I(inode);
1043 struct pnfs_layout_segment *lseg;
1044 struct rpc_cred *cred;
1045 loff_t end_pos;
1046 int status = 0;
953 1047
954 dprintk("--> %s ({%d})\n", __func__, atomic_read(&local->dc_ref)); 1048 dprintk("--> %s inode %lu\n", __func__, inode->i_ino);
955 if (atomic_dec_and_lock(&local->dc_ref, &clp->cl_lock)) { 1049
956 int i; 1050 if (!test_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags))
957 /* Verify cache is empty */ 1051 return 0;
958 for (i = 0; i < NFS4_DEVICE_ID_HASH_SIZE; i++) 1052
959 BUG_ON(!hlist_empty(&local->dc_deviceids[i])); 1053 /* Note kzalloc ensures data->res.seq_res.sr_slot == NULL */
960 clp->cl_devid_cache = NULL; 1054 data = kzalloc(sizeof(*data), GFP_NOFS);
961 spin_unlock(&clp->cl_lock); 1055 if (!data) {
962 kfree(local); 1056 mark_inode_dirty_sync(inode);
1057 status = -ENOMEM;
1058 goto out;
963 } 1059 }
1060
1061 spin_lock(&inode->i_lock);
1062 if (!test_and_clear_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) {
1063 spin_unlock(&inode->i_lock);
1064 kfree(data);
1065 goto out;
1066 }
1067 /*
1068 * Currently only one (whole file) write lseg which is referenced
1069 * in pnfs_set_layoutcommit and will be found.
1070 */
1071 lseg = pnfs_list_write_lseg(inode);
1072
1073 end_pos = lseg->pls_end_pos;
1074 cred = lseg->pls_lc_cred;
1075 lseg->pls_end_pos = 0;
1076 lseg->pls_lc_cred = NULL;
1077
1078 memcpy(&data->args.stateid.data, nfsi->layout->plh_stateid.data,
1079 sizeof(nfsi->layout->plh_stateid.data));
1080 spin_unlock(&inode->i_lock);
1081
1082 data->args.inode = inode;
1083 data->lseg = lseg;
1084 data->cred = cred;
1085 nfs_fattr_init(&data->fattr);
1086 data->args.bitmask = NFS_SERVER(inode)->cache_consistency_bitmask;
1087 data->res.fattr = &data->fattr;
1088 data->args.lastbytewritten = end_pos - 1;
1089 data->res.server = NFS_SERVER(inode);
1090
1091 status = nfs4_proc_layoutcommit(data, sync);
1092out:
1093 dprintk("<-- %s status %d\n", __func__, status);
1094 return status;
964} 1095}
965EXPORT_SYMBOL_GPL(pnfs_put_deviceid_cache);
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index e2612ea0cbed..bc4827202e7a 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -30,6 +30,8 @@
30#ifndef FS_NFS_PNFS_H 30#ifndef FS_NFS_PNFS_H
31#define FS_NFS_PNFS_H 31#define FS_NFS_PNFS_H
32 32
33#include <linux/nfs_page.h>
34
33enum { 35enum {
34 NFS_LSEG_VALID = 0, /* cleared when lseg is recalled/returned */ 36 NFS_LSEG_VALID = 0, /* cleared when lseg is recalled/returned */
35 NFS_LSEG_ROC, /* roc bit received from server */ 37 NFS_LSEG_ROC, /* roc bit received from server */
@@ -41,6 +43,13 @@ struct pnfs_layout_segment {
41 atomic_t pls_refcount; 43 atomic_t pls_refcount;
42 unsigned long pls_flags; 44 unsigned long pls_flags;
43 struct pnfs_layout_hdr *pls_layout; 45 struct pnfs_layout_hdr *pls_layout;
46 struct rpc_cred *pls_lc_cred; /* LAYOUTCOMMIT credential */
47 loff_t pls_end_pos; /* LAYOUTCOMMIT write end */
48};
49
50enum pnfs_try_status {
51 PNFS_ATTEMPTED = 0,
52 PNFS_NOT_ATTEMPTED = 1,
44}; 53};
45 54
46#ifdef CONFIG_NFS_V4_1 55#ifdef CONFIG_NFS_V4_1
@@ -61,10 +70,25 @@ struct pnfs_layoutdriver_type {
61 const u32 id; 70 const u32 id;
62 const char *name; 71 const char *name;
63 struct module *owner; 72 struct module *owner;
64 int (*set_layoutdriver) (struct nfs_server *);
65 int (*clear_layoutdriver) (struct nfs_server *);
66 struct pnfs_layout_segment * (*alloc_lseg) (struct pnfs_layout_hdr *layoutid, struct nfs4_layoutget_res *lgr); 73 struct pnfs_layout_segment * (*alloc_lseg) (struct pnfs_layout_hdr *layoutid, struct nfs4_layoutget_res *lgr);
67 void (*free_lseg) (struct pnfs_layout_segment *lseg); 74 void (*free_lseg) (struct pnfs_layout_segment *lseg);
75
76 /* test for nfs page cache coalescing */
77 int (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);
78
79 /* Returns true if layoutdriver wants to divert this request to
80 * driver's commit routine.
81 */
82 bool (*mark_pnfs_commit)(struct pnfs_layout_segment *lseg);
83 struct list_head * (*choose_commit_list) (struct nfs_page *req);
84 int (*commit_pagelist)(struct inode *inode, struct list_head *mds_pages, int how);
85
86 /*
87 * Return PNFS_ATTEMPTED to indicate the layout code has attempted
88 * I/O, else return PNFS_NOT_ATTEMPTED to fall back to normal NFS
89 */
90 enum pnfs_try_status (*read_pagelist) (struct nfs_read_data *nfs_data);
91 enum pnfs_try_status (*write_pagelist) (struct nfs_write_data *nfs_data, int how);
68}; 92};
69 93
70struct pnfs_layout_hdr { 94struct pnfs_layout_hdr {
@@ -85,57 +109,10 @@ struct pnfs_device {
85 unsigned int layout_type; 109 unsigned int layout_type;
86 unsigned int mincount; 110 unsigned int mincount;
87 struct page **pages; 111 struct page **pages;
88 void *area;
89 unsigned int pgbase; 112 unsigned int pgbase;
90 unsigned int pglen; 113 unsigned int pglen;
91}; 114};
92 115
93/*
94 * Device ID RCU cache. A device ID is unique per client ID and layout type.
95 */
96#define NFS4_DEVICE_ID_HASH_BITS 5
97#define NFS4_DEVICE_ID_HASH_SIZE (1 << NFS4_DEVICE_ID_HASH_BITS)
98#define NFS4_DEVICE_ID_HASH_MASK (NFS4_DEVICE_ID_HASH_SIZE - 1)
99
100static inline u32
101nfs4_deviceid_hash(struct nfs4_deviceid *id)
102{
103 unsigned char *cptr = (unsigned char *)id->data;
104 unsigned int nbytes = NFS4_DEVICEID4_SIZE;
105 u32 x = 0;
106
107 while (nbytes--) {
108 x *= 37;
109 x += *cptr++;
110 }
111 return x & NFS4_DEVICE_ID_HASH_MASK;
112}
113
114struct pnfs_deviceid_node {
115 struct hlist_node de_node;
116 struct nfs4_deviceid de_id;
117 atomic_t de_ref;
118};
119
120struct pnfs_deviceid_cache {
121 spinlock_t dc_lock;
122 atomic_t dc_ref;
123 void (*dc_free_callback)(struct pnfs_deviceid_node *);
124 struct hlist_head dc_deviceids[NFS4_DEVICE_ID_HASH_SIZE];
125};
126
127extern int pnfs_alloc_init_deviceid_cache(struct nfs_client *,
128 void (*free_callback)(struct pnfs_deviceid_node *));
129extern void pnfs_put_deviceid_cache(struct nfs_client *);
130extern struct pnfs_deviceid_node *pnfs_find_get_deviceid(
131 struct pnfs_deviceid_cache *,
132 struct nfs4_deviceid *);
133extern struct pnfs_deviceid_node *pnfs_add_deviceid(
134 struct pnfs_deviceid_cache *,
135 struct pnfs_deviceid_node *);
136extern void pnfs_put_deviceid(struct pnfs_deviceid_cache *c,
137 struct pnfs_deviceid_node *devid);
138
139extern int pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *); 116extern int pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *);
140extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *); 117extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *);
141 118
@@ -146,11 +123,18 @@ extern int nfs4_proc_layoutget(struct nfs4_layoutget *lgp);
146 123
147/* pnfs.c */ 124/* pnfs.c */
148void get_layout_hdr(struct pnfs_layout_hdr *lo); 125void get_layout_hdr(struct pnfs_layout_hdr *lo);
126void put_lseg(struct pnfs_layout_segment *lseg);
149struct pnfs_layout_segment * 127struct pnfs_layout_segment *
150pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, 128pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
151 enum pnfs_iomode access_type); 129 enum pnfs_iomode access_type);
152void set_pnfs_layoutdriver(struct nfs_server *, u32 id); 130void set_pnfs_layoutdriver(struct nfs_server *, u32 id);
153void unset_pnfs_layoutdriver(struct nfs_server *); 131void unset_pnfs_layoutdriver(struct nfs_server *);
132enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *,
133 const struct rpc_call_ops *, int);
134enum pnfs_try_status pnfs_try_to_read_data(struct nfs_read_data *,
135 const struct rpc_call_ops *);
136void pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *);
137void pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *);
154int pnfs_layout_process(struct nfs4_layoutget *lgp); 138int pnfs_layout_process(struct nfs4_layoutget *lgp);
155void pnfs_free_lseg_list(struct list_head *tmp_list); 139void pnfs_free_lseg_list(struct list_head *tmp_list);
156void pnfs_destroy_layout(struct nfs_inode *); 140void pnfs_destroy_layout(struct nfs_inode *);
@@ -169,7 +153,8 @@ bool pnfs_roc(struct inode *ino);
169void pnfs_roc_release(struct inode *ino); 153void pnfs_roc_release(struct inode *ino);
170void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); 154void pnfs_roc_set_barrier(struct inode *ino, u32 barrier);
171bool pnfs_roc_drain(struct inode *ino, u32 *barrier); 155bool pnfs_roc_drain(struct inode *ino, u32 *barrier);
172 156void pnfs_set_layoutcommit(struct nfs_write_data *wdata);
157int pnfs_layoutcommit_inode(struct inode *inode, bool sync);
173 158
174static inline int lo_fail_bit(u32 iomode) 159static inline int lo_fail_bit(u32 iomode)
175{ 160{
@@ -177,12 +162,67 @@ static inline int lo_fail_bit(u32 iomode)
177 NFS_LAYOUT_RW_FAILED : NFS_LAYOUT_RO_FAILED; 162 NFS_LAYOUT_RW_FAILED : NFS_LAYOUT_RO_FAILED;
178} 163}
179 164
165static inline struct pnfs_layout_segment *
166get_lseg(struct pnfs_layout_segment *lseg)
167{
168 if (lseg) {
169 atomic_inc(&lseg->pls_refcount);
170 smp_mb__after_atomic_inc();
171 }
172 return lseg;
173}
174
180/* Return true if a layout driver is being used for this mountpoint */ 175/* Return true if a layout driver is being used for this mountpoint */
181static inline int pnfs_enabled_sb(struct nfs_server *nfss) 176static inline int pnfs_enabled_sb(struct nfs_server *nfss)
182{ 177{
183 return nfss->pnfs_curr_ld != NULL; 178 return nfss->pnfs_curr_ld != NULL;
184} 179}
185 180
181static inline void
182pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
183{
184 if (lseg) {
185 struct pnfs_layoutdriver_type *ld;
186
187 ld = NFS_SERVER(req->wb_page->mapping->host)->pnfs_curr_ld;
188 if (ld->mark_pnfs_commit && ld->mark_pnfs_commit(lseg)) {
189 set_bit(PG_PNFS_COMMIT, &req->wb_flags);
190 req->wb_commit_lseg = get_lseg(lseg);
191 }
192 }
193}
194
195static inline int
196pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how)
197{
198 if (!test_and_clear_bit(NFS_INO_PNFS_COMMIT, &NFS_I(inode)->flags))
199 return PNFS_NOT_ATTEMPTED;
200 return NFS_SERVER(inode)->pnfs_curr_ld->commit_pagelist(inode, mds_pages, how);
201}
202
203static inline struct list_head *
204pnfs_choose_commit_list(struct nfs_page *req, struct list_head *mds)
205{
206 struct list_head *rv;
207
208 if (test_and_clear_bit(PG_PNFS_COMMIT, &req->wb_flags)) {
209 struct inode *inode = req->wb_commit_lseg->pls_layout->plh_inode;
210
211 set_bit(NFS_INO_PNFS_COMMIT, &NFS_I(inode)->flags);
212 rv = NFS_SERVER(inode)->pnfs_curr_ld->choose_commit_list(req);
213 /* matched by ref taken when PG_PNFS_COMMIT is set */
214 put_lseg(req->wb_commit_lseg);
215 } else
216 rv = mds;
217 return rv;
218}
219
220static inline void pnfs_clear_request_commit(struct nfs_page *req)
221{
222 if (test_and_clear_bit(PG_PNFS_COMMIT, &req->wb_flags))
223 put_lseg(req->wb_commit_lseg);
224}
225
186#else /* CONFIG_NFS_V4_1 */ 226#else /* CONFIG_NFS_V4_1 */
187 227
188static inline void pnfs_destroy_all_layouts(struct nfs_client *clp) 228static inline void pnfs_destroy_all_layouts(struct nfs_client *clp)
@@ -194,12 +234,36 @@ static inline void pnfs_destroy_layout(struct nfs_inode *nfsi)
194} 234}
195 235
196static inline struct pnfs_layout_segment * 236static inline struct pnfs_layout_segment *
237get_lseg(struct pnfs_layout_segment *lseg)
238{
239 return NULL;
240}
241
242static inline void put_lseg(struct pnfs_layout_segment *lseg)
243{
244}
245
246static inline struct pnfs_layout_segment *
197pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, 247pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
198 enum pnfs_iomode access_type) 248 enum pnfs_iomode access_type)
199{ 249{
200 return NULL; 250 return NULL;
201} 251}
202 252
253static inline enum pnfs_try_status
254pnfs_try_to_read_data(struct nfs_read_data *data,
255 const struct rpc_call_ops *call_ops)
256{
257 return PNFS_NOT_ATTEMPTED;
258}
259
260static inline enum pnfs_try_status
261pnfs_try_to_write_data(struct nfs_write_data *data,
262 const struct rpc_call_ops *call_ops, int how)
263{
264 return PNFS_NOT_ATTEMPTED;
265}
266
203static inline bool 267static inline bool
204pnfs_roc(struct inode *ino) 268pnfs_roc(struct inode *ino)
205{ 269{
@@ -230,6 +294,43 @@ static inline void unset_pnfs_layoutdriver(struct nfs_server *s)
230{ 294{
231} 295}
232 296
297static inline void
298pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *ino)
299{
300 pgio->pg_test = NULL;
301}
302
303static inline void
304pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *ino)
305{
306 pgio->pg_test = NULL;
307}
308
309static inline void
310pnfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
311{
312}
313
314static inline int
315pnfs_commit_list(struct inode *inode, struct list_head *mds_pages, int how)
316{
317 return PNFS_NOT_ATTEMPTED;
318}
319
320static inline struct list_head *
321pnfs_choose_commit_list(struct nfs_page *req, struct list_head *mds)
322{
323 return mds;
324}
325
326static inline void pnfs_clear_request_commit(struct nfs_page *req)
327{
328}
329
330static inline int pnfs_layoutcommit_inode(struct inode *inode, bool sync)
331{
332 return 0;
333}
233#endif /* CONFIG_NFS_V4_1 */ 334#endif /* CONFIG_NFS_V4_1 */
234 335
235#endif /* FS_NFS_PNFS_H */ 336#endif /* FS_NFS_PNFS_H */
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index 77d5e21c4ad6..ac40b8535d7e 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -177,7 +177,7 @@ nfs_proc_setattr(struct dentry *dentry, struct nfs_fattr *fattr,
177} 177}
178 178
179static int 179static int
180nfs_proc_lookup(struct inode *dir, struct qstr *name, 180nfs_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct qstr *name,
181 struct nfs_fh *fhandle, struct nfs_fattr *fattr) 181 struct nfs_fh *fhandle, struct nfs_fattr *fattr)
182{ 182{
183 struct nfs_diropargs arg = { 183 struct nfs_diropargs arg = {
@@ -741,4 +741,5 @@ const struct nfs_rpc_ops nfs_v2_clientops = {
741 .lock = nfs_proc_lock, 741 .lock = nfs_proc_lock,
742 .lock_check_bounds = nfs_lock_check_bounds, 742 .lock_check_bounds = nfs_lock_check_bounds,
743 .close_context = nfs_close_context, 743 .close_context = nfs_close_context,
744 .init_client = nfs_init_client,
744}; 745};
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index aedcaa7f291f..7cded2b12a05 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -18,19 +18,20 @@
18#include <linux/sunrpc/clnt.h> 18#include <linux/sunrpc/clnt.h>
19#include <linux/nfs_fs.h> 19#include <linux/nfs_fs.h>
20#include <linux/nfs_page.h> 20#include <linux/nfs_page.h>
21#include <linux/module.h>
21 22
22#include <asm/system.h> 23#include <asm/system.h>
24#include "pnfs.h"
23 25
24#include "nfs4_fs.h" 26#include "nfs4_fs.h"
25#include "internal.h" 27#include "internal.h"
26#include "iostat.h" 28#include "iostat.h"
27#include "fscache.h" 29#include "fscache.h"
28#include "pnfs.h"
29 30
30#define NFSDBG_FACILITY NFSDBG_PAGECACHE 31#define NFSDBG_FACILITY NFSDBG_PAGECACHE
31 32
32static int nfs_pagein_multi(struct inode *, struct list_head *, unsigned int, size_t, int); 33static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc);
33static int nfs_pagein_one(struct inode *, struct list_head *, unsigned int, size_t, int); 34static int nfs_pagein_one(struct nfs_pageio_descriptor *desc);
34static const struct rpc_call_ops nfs_read_partial_ops; 35static const struct rpc_call_ops nfs_read_partial_ops;
35static const struct rpc_call_ops nfs_read_full_ops; 36static const struct rpc_call_ops nfs_read_full_ops;
36 37
@@ -69,6 +70,7 @@ void nfs_readdata_free(struct nfs_read_data *p)
69 70
70static void nfs_readdata_release(struct nfs_read_data *rdata) 71static void nfs_readdata_release(struct nfs_read_data *rdata)
71{ 72{
73 put_lseg(rdata->lseg);
72 put_nfs_open_context(rdata->args.context); 74 put_nfs_open_context(rdata->args.context);
73 nfs_readdata_free(rdata); 75 nfs_readdata_free(rdata);
74} 76}
@@ -114,14 +116,13 @@ static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data)
114int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, 116int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
115 struct page *page) 117 struct page *page)
116{ 118{
117 LIST_HEAD(one_request);
118 struct nfs_page *new; 119 struct nfs_page *new;
119 unsigned int len; 120 unsigned int len;
121 struct nfs_pageio_descriptor pgio;
120 122
121 len = nfs_page_length(page); 123 len = nfs_page_length(page);
122 if (len == 0) 124 if (len == 0)
123 return nfs_return_empty_page(page); 125 return nfs_return_empty_page(page);
124 pnfs_update_layout(inode, ctx, IOMODE_READ);
125 new = nfs_create_request(ctx, inode, page, 0, len); 126 new = nfs_create_request(ctx, inode, page, 0, len);
126 if (IS_ERR(new)) { 127 if (IS_ERR(new)) {
127 unlock_page(page); 128 unlock_page(page);
@@ -130,11 +131,14 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
130 if (len < PAGE_CACHE_SIZE) 131 if (len < PAGE_CACHE_SIZE)
131 zero_user_segment(page, len, PAGE_CACHE_SIZE); 132 zero_user_segment(page, len, PAGE_CACHE_SIZE);
132 133
133 nfs_list_add_request(new, &one_request); 134 nfs_pageio_init(&pgio, inode, NULL, 0, 0);
135 nfs_list_add_request(new, &pgio.pg_list);
136 pgio.pg_count = len;
137
134 if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE) 138 if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE)
135 nfs_pagein_multi(inode, &one_request, 1, len, 0); 139 nfs_pagein_multi(&pgio);
136 else 140 else
137 nfs_pagein_one(inode, &one_request, 1, len, 0); 141 nfs_pagein_one(&pgio);
138 return 0; 142 return 0;
139} 143}
140 144
@@ -155,24 +159,20 @@ static void nfs_readpage_release(struct nfs_page *req)
155 nfs_release_request(req); 159 nfs_release_request(req);
156} 160}
157 161
158/* 162int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,
159 * Set up the NFS read request struct 163 const struct rpc_call_ops *call_ops)
160 */
161static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
162 const struct rpc_call_ops *call_ops,
163 unsigned int count, unsigned int offset)
164{ 164{
165 struct inode *inode = req->wb_context->path.dentry->d_inode; 165 struct inode *inode = data->inode;
166 int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0; 166 int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0;
167 struct rpc_task *task; 167 struct rpc_task *task;
168 struct rpc_message msg = { 168 struct rpc_message msg = {
169 .rpc_argp = &data->args, 169 .rpc_argp = &data->args,
170 .rpc_resp = &data->res, 170 .rpc_resp = &data->res,
171 .rpc_cred = req->wb_context->cred, 171 .rpc_cred = data->cred,
172 }; 172 };
173 struct rpc_task_setup task_setup_data = { 173 struct rpc_task_setup task_setup_data = {
174 .task = &data->task, 174 .task = &data->task,
175 .rpc_client = NFS_CLIENT(inode), 175 .rpc_client = clnt,
176 .rpc_message = &msg, 176 .rpc_message = &msg,
177 .callback_ops = call_ops, 177 .callback_ops = call_ops,
178 .callback_data = data, 178 .callback_data = data,
@@ -180,9 +180,39 @@ static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
180 .flags = RPC_TASK_ASYNC | swap_flags, 180 .flags = RPC_TASK_ASYNC | swap_flags,
181 }; 181 };
182 182
183 /* Set up the initial task struct. */
184 NFS_PROTO(inode)->read_setup(data, &msg);
185
186 dprintk("NFS: %5u initiated read call (req %s/%lld, %u bytes @ "
187 "offset %llu)\n",
188 data->task.tk_pid,
189 inode->i_sb->s_id,
190 (long long)NFS_FILEID(inode),
191 data->args.count,
192 (unsigned long long)data->args.offset);
193
194 task = rpc_run_task(&task_setup_data);
195 if (IS_ERR(task))
196 return PTR_ERR(task);
197 rpc_put_task(task);
198 return 0;
199}
200EXPORT_SYMBOL_GPL(nfs_initiate_read);
201
202/*
203 * Set up the NFS read request struct
204 */
205static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
206 const struct rpc_call_ops *call_ops,
207 unsigned int count, unsigned int offset,
208 struct pnfs_layout_segment *lseg)
209{
210 struct inode *inode = req->wb_context->path.dentry->d_inode;
211
183 data->req = req; 212 data->req = req;
184 data->inode = inode; 213 data->inode = inode;
185 data->cred = msg.rpc_cred; 214 data->cred = req->wb_context->cred;
215 data->lseg = get_lseg(lseg);
186 216
187 data->args.fh = NFS_FH(inode); 217 data->args.fh = NFS_FH(inode);
188 data->args.offset = req_offset(req) + offset; 218 data->args.offset = req_offset(req) + offset;
@@ -197,21 +227,11 @@ static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
197 data->res.eof = 0; 227 data->res.eof = 0;
198 nfs_fattr_init(&data->fattr); 228 nfs_fattr_init(&data->fattr);
199 229
200 /* Set up the initial task struct. */ 230 if (data->lseg &&
201 NFS_PROTO(inode)->read_setup(data, &msg); 231 (pnfs_try_to_read_data(data, call_ops) == PNFS_ATTEMPTED))
202 232 return 0;
203 dprintk("NFS: %5u initiated read call (req %s/%Ld, %u bytes @ offset %Lu)\n",
204 data->task.tk_pid,
205 inode->i_sb->s_id,
206 (long long)NFS_FILEID(inode),
207 count,
208 (unsigned long long)data->args.offset);
209 233
210 task = rpc_run_task(&task_setup_data); 234 return nfs_initiate_read(data, NFS_CLIENT(inode), call_ops);
211 if (IS_ERR(task))
212 return PTR_ERR(task);
213 rpc_put_task(task);
214 return 0;
215} 235}
216 236
217static void 237static void
@@ -240,20 +260,21 @@ nfs_async_read_error(struct list_head *head)
240 * won't see the new data until our attribute cache is updated. This is more 260 * won't see the new data until our attribute cache is updated. This is more
241 * or less conventional NFS client behavior. 261 * or less conventional NFS client behavior.
242 */ 262 */
243static int nfs_pagein_multi(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int flags) 263static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc)
244{ 264{
245 struct nfs_page *req = nfs_list_entry(head->next); 265 struct nfs_page *req = nfs_list_entry(desc->pg_list.next);
246 struct page *page = req->wb_page; 266 struct page *page = req->wb_page;
247 struct nfs_read_data *data; 267 struct nfs_read_data *data;
248 size_t rsize = NFS_SERVER(inode)->rsize, nbytes; 268 size_t rsize = NFS_SERVER(desc->pg_inode)->rsize, nbytes;
249 unsigned int offset; 269 unsigned int offset;
250 int requests = 0; 270 int requests = 0;
251 int ret = 0; 271 int ret = 0;
272 struct pnfs_layout_segment *lseg;
252 LIST_HEAD(list); 273 LIST_HEAD(list);
253 274
254 nfs_list_remove_request(req); 275 nfs_list_remove_request(req);
255 276
256 nbytes = count; 277 nbytes = desc->pg_count;
257 do { 278 do {
258 size_t len = min(nbytes,rsize); 279 size_t len = min(nbytes,rsize);
259 280
@@ -266,9 +287,11 @@ static int nfs_pagein_multi(struct inode *inode, struct list_head *head, unsigne
266 } while(nbytes != 0); 287 } while(nbytes != 0);
267 atomic_set(&req->wb_complete, requests); 288 atomic_set(&req->wb_complete, requests);
268 289
290 BUG_ON(desc->pg_lseg != NULL);
291 lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_READ);
269 ClearPageError(page); 292 ClearPageError(page);
270 offset = 0; 293 offset = 0;
271 nbytes = count; 294 nbytes = desc->pg_count;
272 do { 295 do {
273 int ret2; 296 int ret2;
274 297
@@ -280,12 +303,14 @@ static int nfs_pagein_multi(struct inode *inode, struct list_head *head, unsigne
280 if (nbytes < rsize) 303 if (nbytes < rsize)
281 rsize = nbytes; 304 rsize = nbytes;
282 ret2 = nfs_read_rpcsetup(req, data, &nfs_read_partial_ops, 305 ret2 = nfs_read_rpcsetup(req, data, &nfs_read_partial_ops,
283 rsize, offset); 306 rsize, offset, lseg);
284 if (ret == 0) 307 if (ret == 0)
285 ret = ret2; 308 ret = ret2;
286 offset += rsize; 309 offset += rsize;
287 nbytes -= rsize; 310 nbytes -= rsize;
288 } while (nbytes != 0); 311 } while (nbytes != 0);
312 put_lseg(lseg);
313 desc->pg_lseg = NULL;
289 314
290 return ret; 315 return ret;
291 316
@@ -300,16 +325,21 @@ out_bad:
300 return -ENOMEM; 325 return -ENOMEM;
301} 326}
302 327
303static int nfs_pagein_one(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int flags) 328static int nfs_pagein_one(struct nfs_pageio_descriptor *desc)
304{ 329{
305 struct nfs_page *req; 330 struct nfs_page *req;
306 struct page **pages; 331 struct page **pages;
307 struct nfs_read_data *data; 332 struct nfs_read_data *data;
333 struct list_head *head = &desc->pg_list;
334 struct pnfs_layout_segment *lseg = desc->pg_lseg;
308 int ret = -ENOMEM; 335 int ret = -ENOMEM;
309 336
310 data = nfs_readdata_alloc(npages); 337 data = nfs_readdata_alloc(nfs_page_array_len(desc->pg_base,
311 if (!data) 338 desc->pg_count));
312 goto out_bad; 339 if (!data) {
340 nfs_async_read_error(head);
341 goto out;
342 }
313 343
314 pages = data->pagevec; 344 pages = data->pagevec;
315 while (!list_empty(head)) { 345 while (!list_empty(head)) {
@@ -320,10 +350,14 @@ static int nfs_pagein_one(struct inode *inode, struct list_head *head, unsigned
320 *pages++ = req->wb_page; 350 *pages++ = req->wb_page;
321 } 351 }
322 req = nfs_list_entry(data->pages.next); 352 req = nfs_list_entry(data->pages.next);
353 if ((!lseg) && list_is_singular(&data->pages))
354 lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_READ);
323 355
324 return nfs_read_rpcsetup(req, data, &nfs_read_full_ops, count, 0); 356 ret = nfs_read_rpcsetup(req, data, &nfs_read_full_ops, desc->pg_count,
325out_bad: 357 0, lseg);
326 nfs_async_read_error(head); 358out:
359 put_lseg(lseg);
360 desc->pg_lseg = NULL;
327 return ret; 361 return ret;
328} 362}
329 363
@@ -366,6 +400,7 @@ static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data
366 return; 400 return;
367 401
368 /* Yes, so retry the read at the end of the data */ 402 /* Yes, so retry the read at the end of the data */
403 data->mds_offset += resp->count;
369 argp->offset += resp->count; 404 argp->offset += resp->count;
370 argp->pgbase += resp->count; 405 argp->pgbase += resp->count;
371 argp->count -= resp->count; 406 argp->count -= resp->count;
@@ -625,7 +660,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
625 if (ret == 0) 660 if (ret == 0)
626 goto read_complete; /* all pages were read */ 661 goto read_complete; /* all pages were read */
627 662
628 pnfs_update_layout(inode, desc.ctx, IOMODE_READ); 663 pnfs_pageio_init_read(&pgio, inode);
629 if (rsize < PAGE_CACHE_SIZE) 664 if (rsize < PAGE_CACHE_SIZE)
630 nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0); 665 nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0);
631 else 666 else
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index b68c8607770f..e288f06d3fa7 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -263,8 +263,11 @@ static match_table_t nfs_local_lock_tokens = {
263static void nfs_umount_begin(struct super_block *); 263static void nfs_umount_begin(struct super_block *);
264static int nfs_statfs(struct dentry *, struct kstatfs *); 264static int nfs_statfs(struct dentry *, struct kstatfs *);
265static int nfs_show_options(struct seq_file *, struct vfsmount *); 265static int nfs_show_options(struct seq_file *, struct vfsmount *);
266static int nfs_show_devname(struct seq_file *, struct vfsmount *);
267static int nfs_show_path(struct seq_file *, struct vfsmount *);
266static int nfs_show_stats(struct seq_file *, struct vfsmount *); 268static int nfs_show_stats(struct seq_file *, struct vfsmount *);
267static int nfs_get_sb(struct file_system_type *, int, const char *, void *, struct vfsmount *); 269static struct dentry *nfs_fs_mount(struct file_system_type *,
270 int, const char *, void *);
268static struct dentry *nfs_xdev_mount(struct file_system_type *fs_type, 271static struct dentry *nfs_xdev_mount(struct file_system_type *fs_type,
269 int flags, const char *dev_name, void *raw_data); 272 int flags, const char *dev_name, void *raw_data);
270static void nfs_put_super(struct super_block *); 273static void nfs_put_super(struct super_block *);
@@ -274,7 +277,7 @@ static int nfs_remount(struct super_block *sb, int *flags, char *raw_data);
274static struct file_system_type nfs_fs_type = { 277static struct file_system_type nfs_fs_type = {
275 .owner = THIS_MODULE, 278 .owner = THIS_MODULE,
276 .name = "nfs", 279 .name = "nfs",
277 .get_sb = nfs_get_sb, 280 .mount = nfs_fs_mount,
278 .kill_sb = nfs_kill_super, 281 .kill_sb = nfs_kill_super,
279 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, 282 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
280}; 283};
@@ -296,6 +299,8 @@ static const struct super_operations nfs_sops = {
296 .evict_inode = nfs_evict_inode, 299 .evict_inode = nfs_evict_inode,
297 .umount_begin = nfs_umount_begin, 300 .umount_begin = nfs_umount_begin,
298 .show_options = nfs_show_options, 301 .show_options = nfs_show_options,
302 .show_devname = nfs_show_devname,
303 .show_path = nfs_show_path,
299 .show_stats = nfs_show_stats, 304 .show_stats = nfs_show_stats,
300 .remount_fs = nfs_remount, 305 .remount_fs = nfs_remount,
301}; 306};
@@ -303,16 +308,16 @@ static const struct super_operations nfs_sops = {
303#ifdef CONFIG_NFS_V4 308#ifdef CONFIG_NFS_V4
304static int nfs4_validate_text_mount_data(void *options, 309static int nfs4_validate_text_mount_data(void *options,
305 struct nfs_parsed_mount_data *args, const char *dev_name); 310 struct nfs_parsed_mount_data *args, const char *dev_name);
306static int nfs4_try_mount(int flags, const char *dev_name, 311static struct dentry *nfs4_try_mount(int flags, const char *dev_name,
307 struct nfs_parsed_mount_data *data, struct vfsmount *mnt); 312 struct nfs_parsed_mount_data *data);
308static int nfs4_get_sb(struct file_system_type *fs_type, 313static struct dentry *nfs4_mount(struct file_system_type *fs_type,
309 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); 314 int flags, const char *dev_name, void *raw_data);
310static struct dentry *nfs4_remote_mount(struct file_system_type *fs_type, 315static struct dentry *nfs4_remote_mount(struct file_system_type *fs_type,
311 int flags, const char *dev_name, void *raw_data); 316 int flags, const char *dev_name, void *raw_data);
312static struct dentry *nfs4_xdev_mount(struct file_system_type *fs_type, 317static struct dentry *nfs4_xdev_mount(struct file_system_type *fs_type,
313 int flags, const char *dev_name, void *raw_data); 318 int flags, const char *dev_name, void *raw_data);
314static int nfs4_referral_get_sb(struct file_system_type *fs_type, 319static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type,
315 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt); 320 int flags, const char *dev_name, void *raw_data);
316static struct dentry *nfs4_remote_referral_mount(struct file_system_type *fs_type, 321static struct dentry *nfs4_remote_referral_mount(struct file_system_type *fs_type,
317 int flags, const char *dev_name, void *raw_data); 322 int flags, const char *dev_name, void *raw_data);
318static void nfs4_kill_super(struct super_block *sb); 323static void nfs4_kill_super(struct super_block *sb);
@@ -320,7 +325,7 @@ static void nfs4_kill_super(struct super_block *sb);
320static struct file_system_type nfs4_fs_type = { 325static struct file_system_type nfs4_fs_type = {
321 .owner = THIS_MODULE, 326 .owner = THIS_MODULE,
322 .name = "nfs4", 327 .name = "nfs4",
323 .get_sb = nfs4_get_sb, 328 .mount = nfs4_mount,
324 .kill_sb = nfs4_kill_super, 329 .kill_sb = nfs4_kill_super,
325 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, 330 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
326}; 331};
@@ -352,7 +357,7 @@ static struct file_system_type nfs4_remote_referral_fs_type = {
352struct file_system_type nfs4_referral_fs_type = { 357struct file_system_type nfs4_referral_fs_type = {
353 .owner = THIS_MODULE, 358 .owner = THIS_MODULE,
354 .name = "nfs4", 359 .name = "nfs4",
355 .get_sb = nfs4_referral_get_sb, 360 .mount = nfs4_referral_mount,
356 .kill_sb = nfs4_kill_super, 361 .kill_sb = nfs4_kill_super,
357 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA, 362 .fs_flags = FS_RENAME_DOES_D_MOVE|FS_REVAL_DOT|FS_BINARY_MOUNTDATA,
358}; 363};
@@ -366,6 +371,8 @@ static const struct super_operations nfs4_sops = {
366 .evict_inode = nfs4_evict_inode, 371 .evict_inode = nfs4_evict_inode,
367 .umount_begin = nfs_umount_begin, 372 .umount_begin = nfs_umount_begin,
368 .show_options = nfs_show_options, 373 .show_options = nfs_show_options,
374 .show_devname = nfs_show_devname,
375 .show_path = nfs_show_path,
369 .show_stats = nfs_show_stats, 376 .show_stats = nfs_show_stats,
370 .remount_fs = nfs_remount, 377 .remount_fs = nfs_remount,
371}; 378};
@@ -726,6 +733,28 @@ static int nfs_show_options(struct seq_file *m, struct vfsmount *mnt)
726 return 0; 733 return 0;
727} 734}
728 735
736static int nfs_show_devname(struct seq_file *m, struct vfsmount *mnt)
737{
738 char *page = (char *) __get_free_page(GFP_KERNEL);
739 char *devname, *dummy;
740 int err = 0;
741 if (!page)
742 return -ENOMEM;
743 devname = nfs_path(&dummy, mnt->mnt_root, page, PAGE_SIZE);
744 if (IS_ERR(devname))
745 err = PTR_ERR(devname);
746 else
747 seq_escape(m, devname, " \t\n\\");
748 free_page((unsigned long)page);
749 return err;
750}
751
752static int nfs_show_path(struct seq_file *m, struct vfsmount *mnt)
753{
754 seq_puts(m, "/");
755 return 0;
756}
757
729/* 758/*
730 * Present statistical information for this VFS mountpoint 759 * Present statistical information for this VFS mountpoint
731 */ 760 */
@@ -975,10 +1004,32 @@ static int nfs_parse_security_flavors(char *value,
975 return 0; 1004 return 0;
976 } 1005 }
977 1006
1007 mnt->flags |= NFS_MOUNT_SECFLAVOUR;
978 mnt->auth_flavor_len = 1; 1008 mnt->auth_flavor_len = 1;
979 return 1; 1009 return 1;
980} 1010}
981 1011
1012static int nfs_get_option_str(substring_t args[], char **option)
1013{
1014 kfree(*option);
1015 *option = match_strdup(args);
1016 return !option;
1017}
1018
1019static int nfs_get_option_ul(substring_t args[], unsigned long *option)
1020{
1021 int rc;
1022 char *string;
1023
1024 string = match_strdup(args);
1025 if (string == NULL)
1026 return -ENOMEM;
1027 rc = strict_strtoul(string, 10, option);
1028 kfree(string);
1029
1030 return rc;
1031}
1032
982/* 1033/*
983 * Error-check and convert a string of mount options from user space into 1034 * Error-check and convert a string of mount options from user space into
984 * a data structure. The whole mount string is processed; bad options are 1035 * a data structure. The whole mount string is processed; bad options are
@@ -1127,155 +1178,82 @@ static int nfs_parse_mount_options(char *raw,
1127 * options that take numeric values 1178 * options that take numeric values
1128 */ 1179 */
1129 case Opt_port: 1180 case Opt_port:
1130 string = match_strdup(args); 1181 if (nfs_get_option_ul(args, &option) ||
1131 if (string == NULL) 1182 option > USHRT_MAX)
1132 goto out_nomem;
1133 rc = strict_strtoul(string, 10, &option);
1134 kfree(string);
1135 if (rc != 0 || option > USHRT_MAX)
1136 goto out_invalid_value; 1183 goto out_invalid_value;
1137 mnt->nfs_server.port = option; 1184 mnt->nfs_server.port = option;
1138 break; 1185 break;
1139 case Opt_rsize: 1186 case Opt_rsize:
1140 string = match_strdup(args); 1187 if (nfs_get_option_ul(args, &option))
1141 if (string == NULL)
1142 goto out_nomem;
1143 rc = strict_strtoul(string, 10, &option);
1144 kfree(string);
1145 if (rc != 0)
1146 goto out_invalid_value; 1188 goto out_invalid_value;
1147 mnt->rsize = option; 1189 mnt->rsize = option;
1148 break; 1190 break;
1149 case Opt_wsize: 1191 case Opt_wsize:
1150 string = match_strdup(args); 1192 if (nfs_get_option_ul(args, &option))
1151 if (string == NULL)
1152 goto out_nomem;
1153 rc = strict_strtoul(string, 10, &option);
1154 kfree(string);
1155 if (rc != 0)
1156 goto out_invalid_value; 1193 goto out_invalid_value;
1157 mnt->wsize = option; 1194 mnt->wsize = option;
1158 break; 1195 break;
1159 case Opt_bsize: 1196 case Opt_bsize:
1160 string = match_strdup(args); 1197 if (nfs_get_option_ul(args, &option))
1161 if (string == NULL)
1162 goto out_nomem;
1163 rc = strict_strtoul(string, 10, &option);
1164 kfree(string);
1165 if (rc != 0)
1166 goto out_invalid_value; 1198 goto out_invalid_value;
1167 mnt->bsize = option; 1199 mnt->bsize = option;
1168 break; 1200 break;
1169 case Opt_timeo: 1201 case Opt_timeo:
1170 string = match_strdup(args); 1202 if (nfs_get_option_ul(args, &option) || option == 0)
1171 if (string == NULL)
1172 goto out_nomem;
1173 rc = strict_strtoul(string, 10, &option);
1174 kfree(string);
1175 if (rc != 0 || option == 0)
1176 goto out_invalid_value; 1203 goto out_invalid_value;
1177 mnt->timeo = option; 1204 mnt->timeo = option;
1178 break; 1205 break;
1179 case Opt_retrans: 1206 case Opt_retrans:
1180 string = match_strdup(args); 1207 if (nfs_get_option_ul(args, &option) || option == 0)
1181 if (string == NULL)
1182 goto out_nomem;
1183 rc = strict_strtoul(string, 10, &option);
1184 kfree(string);
1185 if (rc != 0 || option == 0)
1186 goto out_invalid_value; 1208 goto out_invalid_value;
1187 mnt->retrans = option; 1209 mnt->retrans = option;
1188 break; 1210 break;
1189 case Opt_acregmin: 1211 case Opt_acregmin:
1190 string = match_strdup(args); 1212 if (nfs_get_option_ul(args, &option))
1191 if (string == NULL)
1192 goto out_nomem;
1193 rc = strict_strtoul(string, 10, &option);
1194 kfree(string);
1195 if (rc != 0)
1196 goto out_invalid_value; 1213 goto out_invalid_value;
1197 mnt->acregmin = option; 1214 mnt->acregmin = option;
1198 break; 1215 break;
1199 case Opt_acregmax: 1216 case Opt_acregmax:
1200 string = match_strdup(args); 1217 if (nfs_get_option_ul(args, &option))
1201 if (string == NULL)
1202 goto out_nomem;
1203 rc = strict_strtoul(string, 10, &option);
1204 kfree(string);
1205 if (rc != 0)
1206 goto out_invalid_value; 1218 goto out_invalid_value;
1207 mnt->acregmax = option; 1219 mnt->acregmax = option;
1208 break; 1220 break;
1209 case Opt_acdirmin: 1221 case Opt_acdirmin:
1210 string = match_strdup(args); 1222 if (nfs_get_option_ul(args, &option))
1211 if (string == NULL)
1212 goto out_nomem;
1213 rc = strict_strtoul(string, 10, &option);
1214 kfree(string);
1215 if (rc != 0)
1216 goto out_invalid_value; 1223 goto out_invalid_value;
1217 mnt->acdirmin = option; 1224 mnt->acdirmin = option;
1218 break; 1225 break;
1219 case Opt_acdirmax: 1226 case Opt_acdirmax:
1220 string = match_strdup(args); 1227 if (nfs_get_option_ul(args, &option))
1221 if (string == NULL)
1222 goto out_nomem;
1223 rc = strict_strtoul(string, 10, &option);
1224 kfree(string);
1225 if (rc != 0)
1226 goto out_invalid_value; 1228 goto out_invalid_value;
1227 mnt->acdirmax = option; 1229 mnt->acdirmax = option;
1228 break; 1230 break;
1229 case Opt_actimeo: 1231 case Opt_actimeo:
1230 string = match_strdup(args); 1232 if (nfs_get_option_ul(args, &option))
1231 if (string == NULL)
1232 goto out_nomem;
1233 rc = strict_strtoul(string, 10, &option);
1234 kfree(string);
1235 if (rc != 0)
1236 goto out_invalid_value; 1233 goto out_invalid_value;
1237 mnt->acregmin = mnt->acregmax = 1234 mnt->acregmin = mnt->acregmax =
1238 mnt->acdirmin = mnt->acdirmax = option; 1235 mnt->acdirmin = mnt->acdirmax = option;
1239 break; 1236 break;
1240 case Opt_namelen: 1237 case Opt_namelen:
1241 string = match_strdup(args); 1238 if (nfs_get_option_ul(args, &option))
1242 if (string == NULL)
1243 goto out_nomem;
1244 rc = strict_strtoul(string, 10, &option);
1245 kfree(string);
1246 if (rc != 0)
1247 goto out_invalid_value; 1239 goto out_invalid_value;
1248 mnt->namlen = option; 1240 mnt->namlen = option;
1249 break; 1241 break;
1250 case Opt_mountport: 1242 case Opt_mountport:
1251 string = match_strdup(args); 1243 if (nfs_get_option_ul(args, &option) ||
1252 if (string == NULL) 1244 option > USHRT_MAX)
1253 goto out_nomem;
1254 rc = strict_strtoul(string, 10, &option);
1255 kfree(string);
1256 if (rc != 0 || option > USHRT_MAX)
1257 goto out_invalid_value; 1245 goto out_invalid_value;
1258 mnt->mount_server.port = option; 1246 mnt->mount_server.port = option;
1259 break; 1247 break;
1260 case Opt_mountvers: 1248 case Opt_mountvers:
1261 string = match_strdup(args); 1249 if (nfs_get_option_ul(args, &option) ||
1262 if (string == NULL)
1263 goto out_nomem;
1264 rc = strict_strtoul(string, 10, &option);
1265 kfree(string);
1266 if (rc != 0 ||
1267 option < NFS_MNT_VERSION || 1250 option < NFS_MNT_VERSION ||
1268 option > NFS_MNT3_VERSION) 1251 option > NFS_MNT3_VERSION)
1269 goto out_invalid_value; 1252 goto out_invalid_value;
1270 mnt->mount_server.version = option; 1253 mnt->mount_server.version = option;
1271 break; 1254 break;
1272 case Opt_nfsvers: 1255 case Opt_nfsvers:
1273 string = match_strdup(args); 1256 if (nfs_get_option_ul(args, &option))
1274 if (string == NULL)
1275 goto out_nomem;
1276 rc = strict_strtoul(string, 10, &option);
1277 kfree(string);
1278 if (rc != 0)
1279 goto out_invalid_value; 1257 goto out_invalid_value;
1280 switch (option) { 1258 switch (option) {
1281 case NFS2_VERSION: 1259 case NFS2_VERSION:
@@ -1295,12 +1273,7 @@ static int nfs_parse_mount_options(char *raw,
1295 } 1273 }
1296 break; 1274 break;
1297 case Opt_minorversion: 1275 case Opt_minorversion:
1298 string = match_strdup(args); 1276 if (nfs_get_option_ul(args, &option))
1299 if (string == NULL)
1300 goto out_nomem;
1301 rc = strict_strtoul(string, 10, &option);
1302 kfree(string);
1303 if (rc != 0)
1304 goto out_invalid_value; 1277 goto out_invalid_value;
1305 if (option > NFS4_MAX_MINOR_VERSION) 1278 if (option > NFS4_MAX_MINOR_VERSION)
1306 goto out_invalid_value; 1279 goto out_invalid_value;
@@ -1336,21 +1309,18 @@ static int nfs_parse_mount_options(char *raw,
1336 case Opt_xprt_udp: 1309 case Opt_xprt_udp:
1337 mnt->flags &= ~NFS_MOUNT_TCP; 1310 mnt->flags &= ~NFS_MOUNT_TCP;
1338 mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP; 1311 mnt->nfs_server.protocol = XPRT_TRANSPORT_UDP;
1339 kfree(string);
1340 break; 1312 break;
1341 case Opt_xprt_tcp6: 1313 case Opt_xprt_tcp6:
1342 protofamily = AF_INET6; 1314 protofamily = AF_INET6;
1343 case Opt_xprt_tcp: 1315 case Opt_xprt_tcp:
1344 mnt->flags |= NFS_MOUNT_TCP; 1316 mnt->flags |= NFS_MOUNT_TCP;
1345 mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP; 1317 mnt->nfs_server.protocol = XPRT_TRANSPORT_TCP;
1346 kfree(string);
1347 break; 1318 break;
1348 case Opt_xprt_rdma: 1319 case Opt_xprt_rdma:
1349 /* vector side protocols to TCP */ 1320 /* vector side protocols to TCP */
1350 mnt->flags |= NFS_MOUNT_TCP; 1321 mnt->flags |= NFS_MOUNT_TCP;
1351 mnt->nfs_server.protocol = XPRT_TRANSPORT_RDMA; 1322 mnt->nfs_server.protocol = XPRT_TRANSPORT_RDMA;
1352 xprt_load_transport(string); 1323 xprt_load_transport(string);
1353 kfree(string);
1354 break; 1324 break;
1355 default: 1325 default:
1356 dfprintk(MOUNT, "NFS: unrecognized " 1326 dfprintk(MOUNT, "NFS: unrecognized "
@@ -1358,6 +1328,7 @@ static int nfs_parse_mount_options(char *raw,
1358 kfree(string); 1328 kfree(string);
1359 return 0; 1329 return 0;
1360 } 1330 }
1331 kfree(string);
1361 break; 1332 break;
1362 case Opt_mountproto: 1333 case Opt_mountproto:
1363 string = match_strdup(args); 1334 string = match_strdup(args);
@@ -1400,18 +1371,13 @@ static int nfs_parse_mount_options(char *raw,
1400 goto out_invalid_address; 1371 goto out_invalid_address;
1401 break; 1372 break;
1402 case Opt_clientaddr: 1373 case Opt_clientaddr:
1403 string = match_strdup(args); 1374 if (nfs_get_option_str(args, &mnt->client_address))
1404 if (string == NULL)
1405 goto out_nomem; 1375 goto out_nomem;
1406 kfree(mnt->client_address);
1407 mnt->client_address = string;
1408 break; 1376 break;
1409 case Opt_mounthost: 1377 case Opt_mounthost:
1410 string = match_strdup(args); 1378 if (nfs_get_option_str(args,
1411 if (string == NULL) 1379 &mnt->mount_server.hostname))
1412 goto out_nomem; 1380 goto out_nomem;
1413 kfree(mnt->mount_server.hostname);
1414 mnt->mount_server.hostname = string;
1415 break; 1381 break;
1416 case Opt_mountaddr: 1382 case Opt_mountaddr:
1417 string = match_strdup(args); 1383 string = match_strdup(args);
@@ -1451,11 +1417,8 @@ static int nfs_parse_mount_options(char *raw,
1451 }; 1417 };
1452 break; 1418 break;
1453 case Opt_fscache_uniq: 1419 case Opt_fscache_uniq:
1454 string = match_strdup(args); 1420 if (nfs_get_option_str(args, &mnt->fscache_uniq))
1455 if (string == NULL)
1456 goto out_nomem; 1421 goto out_nomem;
1457 kfree(mnt->fscache_uniq);
1458 mnt->fscache_uniq = string;
1459 mnt->options |= NFS_OPTION_FSCACHE; 1422 mnt->options |= NFS_OPTION_FSCACHE;
1460 break; 1423 break;
1461 case Opt_local_lock: 1424 case Opt_local_lock:
@@ -1665,99 +1628,59 @@ static int nfs_try_mount(struct nfs_parsed_mount_data *args,
1665 return nfs_walk_authlist(args, &request); 1628 return nfs_walk_authlist(args, &request);
1666} 1629}
1667 1630
1668static int nfs_parse_simple_hostname(const char *dev_name, 1631/*
1669 char **hostname, size_t maxnamlen, 1632 * Split "dev_name" into "hostname:export_path".
1670 char **export_path, size_t maxpathlen) 1633 *
1634 * The leftmost colon demarks the split between the server's hostname
1635 * and the export path. If the hostname starts with a left square
1636 * bracket, then it may contain colons.
1637 *
1638 * Note: caller frees hostname and export path, even on error.
1639 */
1640static int nfs_parse_devname(const char *dev_name,
1641 char **hostname, size_t maxnamlen,
1642 char **export_path, size_t maxpathlen)
1671{ 1643{
1672 size_t len; 1644 size_t len;
1673 char *colon, *comma; 1645 char *end;
1674
1675 colon = strchr(dev_name, ':');
1676 if (colon == NULL)
1677 goto out_bad_devname;
1678
1679 len = colon - dev_name;
1680 if (len > maxnamlen)
1681 goto out_hostname;
1682
1683 /* N.B. caller will free nfs_server.hostname in all cases */
1684 *hostname = kstrndup(dev_name, len, GFP_KERNEL);
1685 if (!*hostname)
1686 goto out_nomem;
1687 1646
1688 /* kill possible hostname list: not supported */ 1647 /* Is the host name protected with square brakcets? */
1689 comma = strchr(*hostname, ','); 1648 if (*dev_name == '[') {
1690 if (comma != NULL) { 1649 end = strchr(++dev_name, ']');
1691 if (comma == *hostname) 1650 if (end == NULL || end[1] != ':')
1692 goto out_bad_devname; 1651 goto out_bad_devname;
1693 *comma = '\0';
1694 }
1695
1696 colon++;
1697 len = strlen(colon);
1698 if (len > maxpathlen)
1699 goto out_path;
1700 *export_path = kstrndup(colon, len, GFP_KERNEL);
1701 if (!*export_path)
1702 goto out_nomem;
1703
1704 dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", *export_path);
1705 return 0;
1706
1707out_bad_devname:
1708 dfprintk(MOUNT, "NFS: device name not in host:path format\n");
1709 return -EINVAL;
1710 1652
1711out_nomem: 1653 len = end - dev_name;
1712 dfprintk(MOUNT, "NFS: not enough memory to parse device name\n"); 1654 end++;
1713 return -ENOMEM; 1655 } else {
1714 1656 char *comma;
1715out_hostname:
1716 dfprintk(MOUNT, "NFS: server hostname too long\n");
1717 return -ENAMETOOLONG;
1718
1719out_path:
1720 dfprintk(MOUNT, "NFS: export pathname too long\n");
1721 return -ENAMETOOLONG;
1722}
1723
1724/*
1725 * Hostname has square brackets around it because it contains one or
1726 * more colons. We look for the first closing square bracket, and a
1727 * colon must follow it.
1728 */
1729static int nfs_parse_protected_hostname(const char *dev_name,
1730 char **hostname, size_t maxnamlen,
1731 char **export_path, size_t maxpathlen)
1732{
1733 size_t len;
1734 char *start, *end;
1735 1657
1736 start = (char *)(dev_name + 1); 1658 end = strchr(dev_name, ':');
1659 if (end == NULL)
1660 goto out_bad_devname;
1661 len = end - dev_name;
1737 1662
1738 end = strchr(start, ']'); 1663 /* kill possible hostname list: not supported */
1739 if (end == NULL) 1664 comma = strchr(dev_name, ',');
1740 goto out_bad_devname; 1665 if (comma != NULL && comma < end)
1741 if (*(end + 1) != ':') 1666 *comma = 0;
1742 goto out_bad_devname; 1667 }
1743 1668
1744 len = end - start;
1745 if (len > maxnamlen) 1669 if (len > maxnamlen)
1746 goto out_hostname; 1670 goto out_hostname;
1747 1671
1748 /* N.B. caller will free nfs_server.hostname in all cases */ 1672 /* N.B. caller will free nfs_server.hostname in all cases */
1749 *hostname = kstrndup(start, len, GFP_KERNEL); 1673 *hostname = kstrndup(dev_name, len, GFP_KERNEL);
1750 if (*hostname == NULL) 1674 if (*hostname == NULL)
1751 goto out_nomem; 1675 goto out_nomem;
1752 1676 len = strlen(++end);
1753 end += 2;
1754 len = strlen(end);
1755 if (len > maxpathlen) 1677 if (len > maxpathlen)
1756 goto out_path; 1678 goto out_path;
1757 *export_path = kstrndup(end, len, GFP_KERNEL); 1679 *export_path = kstrndup(end, len, GFP_KERNEL);
1758 if (!*export_path) 1680 if (!*export_path)
1759 goto out_nomem; 1681 goto out_nomem;
1760 1682
1683 dfprintk(MOUNT, "NFS: MNTPATH: '%s'\n", *export_path);
1761 return 0; 1684 return 0;
1762 1685
1763out_bad_devname: 1686out_bad_devname:
@@ -1778,29 +1701,6 @@ out_path:
1778} 1701}
1779 1702
1780/* 1703/*
1781 * Split "dev_name" into "hostname:export_path".
1782 *
1783 * The leftmost colon demarks the split between the server's hostname
1784 * and the export path. If the hostname starts with a left square
1785 * bracket, then it may contain colons.
1786 *
1787 * Note: caller frees hostname and export path, even on error.
1788 */
1789static int nfs_parse_devname(const char *dev_name,
1790 char **hostname, size_t maxnamlen,
1791 char **export_path, size_t maxpathlen)
1792{
1793 if (*dev_name == '[')
1794 return nfs_parse_protected_hostname(dev_name,
1795 hostname, maxnamlen,
1796 export_path, maxpathlen);
1797
1798 return nfs_parse_simple_hostname(dev_name,
1799 hostname, maxnamlen,
1800 export_path, maxpathlen);
1801}
1802
1803/*
1804 * Validate the NFS2/NFS3 mount data 1704 * Validate the NFS2/NFS3 mount data
1805 * - fills in the mount root filehandle 1705 * - fills in the mount root filehandle
1806 * 1706 *
@@ -2077,6 +1977,15 @@ nfs_remount(struct super_block *sb, int *flags, char *raw_data)
2077 if (error < 0) 1977 if (error < 0)
2078 goto out; 1978 goto out;
2079 1979
1980 /*
1981 * noac is a special case. It implies -o sync, but that's not
1982 * necessarily reflected in the mtab options. do_remount_sb
1983 * will clear MS_SYNCHRONOUS if -o sync wasn't specified in the
1984 * remount options, so we have to explicitly reset it.
1985 */
1986 if (data->flags & NFS_MOUNT_NOAC)
1987 *flags |= MS_SYNCHRONOUS;
1988
2080 /* compare new mount options with old ones */ 1989 /* compare new mount options with old ones */
2081 error = nfs_compare_remount_data(nfss, data); 1990 error = nfs_compare_remount_data(nfss, data);
2082out: 1991out:
@@ -2267,19 +2176,19 @@ static int nfs_bdi_register(struct nfs_server *server)
2267 return bdi_register_dev(&server->backing_dev_info, server->s_dev); 2176 return bdi_register_dev(&server->backing_dev_info, server->s_dev);
2268} 2177}
2269 2178
2270static int nfs_get_sb(struct file_system_type *fs_type, 2179static struct dentry *nfs_fs_mount(struct file_system_type *fs_type,
2271 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) 2180 int flags, const char *dev_name, void *raw_data)
2272{ 2181{
2273 struct nfs_server *server = NULL; 2182 struct nfs_server *server = NULL;
2274 struct super_block *s; 2183 struct super_block *s;
2275 struct nfs_parsed_mount_data *data; 2184 struct nfs_parsed_mount_data *data;
2276 struct nfs_fh *mntfh; 2185 struct nfs_fh *mntfh;
2277 struct dentry *mntroot; 2186 struct dentry *mntroot = ERR_PTR(-ENOMEM);
2278 int (*compare_super)(struct super_block *, void *) = nfs_compare_super; 2187 int (*compare_super)(struct super_block *, void *) = nfs_compare_super;
2279 struct nfs_sb_mountdata sb_mntdata = { 2188 struct nfs_sb_mountdata sb_mntdata = {
2280 .mntflags = flags, 2189 .mntflags = flags,
2281 }; 2190 };
2282 int error = -ENOMEM; 2191 int error;
2283 2192
2284 data = nfs_alloc_parsed_mount_data(NFS_DEFAULT_VERSION); 2193 data = nfs_alloc_parsed_mount_data(NFS_DEFAULT_VERSION);
2285 mntfh = nfs_alloc_fhandle(); 2194 mntfh = nfs_alloc_fhandle();
@@ -2290,12 +2199,14 @@ static int nfs_get_sb(struct file_system_type *fs_type,
2290 2199
2291 /* Validate the mount data */ 2200 /* Validate the mount data */
2292 error = nfs_validate_mount_data(raw_data, data, mntfh, dev_name); 2201 error = nfs_validate_mount_data(raw_data, data, mntfh, dev_name);
2293 if (error < 0) 2202 if (error < 0) {
2203 mntroot = ERR_PTR(error);
2294 goto out; 2204 goto out;
2205 }
2295 2206
2296#ifdef CONFIG_NFS_V4 2207#ifdef CONFIG_NFS_V4
2297 if (data->version == 4) { 2208 if (data->version == 4) {
2298 error = nfs4_try_mount(flags, dev_name, data, mnt); 2209 mntroot = nfs4_try_mount(flags, dev_name, data);
2299 kfree(data->client_address); 2210 kfree(data->client_address);
2300 kfree(data->nfs_server.export_path); 2211 kfree(data->nfs_server.export_path);
2301 goto out; 2212 goto out;
@@ -2305,7 +2216,7 @@ static int nfs_get_sb(struct file_system_type *fs_type,
2305 /* Get a volume representation */ 2216 /* Get a volume representation */
2306 server = nfs_create_server(data, mntfh); 2217 server = nfs_create_server(data, mntfh);
2307 if (IS_ERR(server)) { 2218 if (IS_ERR(server)) {
2308 error = PTR_ERR(server); 2219 mntroot = ERR_CAST(server);
2309 goto out; 2220 goto out;
2310 } 2221 }
2311 sb_mntdata.server = server; 2222 sb_mntdata.server = server;
@@ -2316,7 +2227,7 @@ static int nfs_get_sb(struct file_system_type *fs_type,
2316 /* Get a superblock - note that we may end up sharing one that already exists */ 2227 /* Get a superblock - note that we may end up sharing one that already exists */
2317 s = sget(fs_type, compare_super, nfs_set_super, &sb_mntdata); 2228 s = sget(fs_type, compare_super, nfs_set_super, &sb_mntdata);
2318 if (IS_ERR(s)) { 2229 if (IS_ERR(s)) {
2319 error = PTR_ERR(s); 2230 mntroot = ERR_CAST(s);
2320 goto out_err_nosb; 2231 goto out_err_nosb;
2321 } 2232 }
2322 2233
@@ -2325,31 +2236,27 @@ static int nfs_get_sb(struct file_system_type *fs_type,
2325 server = NULL; 2236 server = NULL;
2326 } else { 2237 } else {
2327 error = nfs_bdi_register(server); 2238 error = nfs_bdi_register(server);
2328 if (error) 2239 if (error) {
2240 mntroot = ERR_PTR(error);
2329 goto error_splat_bdi; 2241 goto error_splat_bdi;
2242 }
2330 } 2243 }
2331 2244
2332 if (!s->s_root) { 2245 if (!s->s_root) {
2333 /* initial superblock/root creation */ 2246 /* initial superblock/root creation */
2334 nfs_fill_super(s, data); 2247 nfs_fill_super(s, data);
2335 nfs_fscache_get_super_cookie( 2248 nfs_fscache_get_super_cookie(s, data->fscache_uniq, NULL);
2336 s, data ? data->fscache_uniq : NULL, NULL);
2337 } 2249 }
2338 2250
2339 mntroot = nfs_get_root(s, mntfh); 2251 mntroot = nfs_get_root(s, mntfh, dev_name);
2340 if (IS_ERR(mntroot)) { 2252 if (IS_ERR(mntroot))
2341 error = PTR_ERR(mntroot);
2342 goto error_splat_super; 2253 goto error_splat_super;
2343 }
2344 2254
2345 error = security_sb_set_mnt_opts(s, &data->lsm_opts); 2255 error = security_sb_set_mnt_opts(s, &data->lsm_opts);
2346 if (error) 2256 if (error)
2347 goto error_splat_root; 2257 goto error_splat_root;
2348 2258
2349 s->s_flags |= MS_ACTIVE; 2259 s->s_flags |= MS_ACTIVE;
2350 mnt->mnt_sb = s;
2351 mnt->mnt_root = mntroot;
2352 error = 0;
2353 2260
2354out: 2261out:
2355 kfree(data->nfs_server.hostname); 2262 kfree(data->nfs_server.hostname);
@@ -2359,7 +2266,7 @@ out:
2359out_free_fh: 2266out_free_fh:
2360 nfs_free_fhandle(mntfh); 2267 nfs_free_fhandle(mntfh);
2361 kfree(data); 2268 kfree(data);
2362 return error; 2269 return mntroot;
2363 2270
2364out_err_nosb: 2271out_err_nosb:
2365 nfs_free_server(server); 2272 nfs_free_server(server);
@@ -2367,6 +2274,7 @@ out_err_nosb:
2367 2274
2368error_splat_root: 2275error_splat_root:
2369 dput(mntroot); 2276 dput(mntroot);
2277 mntroot = ERR_PTR(error);
2370error_splat_super: 2278error_splat_super:
2371 if (server && !s->s_root) 2279 if (server && !s->s_root)
2372 bdi_unregister(&server->backing_dev_info); 2280 bdi_unregister(&server->backing_dev_info);
@@ -2450,7 +2358,7 @@ nfs_xdev_mount(struct file_system_type *fs_type, int flags,
2450 nfs_fscache_get_super_cookie(s, NULL, data); 2358 nfs_fscache_get_super_cookie(s, NULL, data);
2451 } 2359 }
2452 2360
2453 mntroot = nfs_get_root(s, data->fh); 2361 mntroot = nfs_get_root(s, data->fh, dev_name);
2454 if (IS_ERR(mntroot)) { 2362 if (IS_ERR(mntroot)) {
2455 error = PTR_ERR(mntroot); 2363 error = PTR_ERR(mntroot);
2456 goto error_splat_super; 2364 goto error_splat_super;
@@ -2718,7 +2626,7 @@ nfs4_remote_mount(struct file_system_type *fs_type, int flags,
2718 s, data ? data->fscache_uniq : NULL, NULL); 2626 s, data ? data->fscache_uniq : NULL, NULL);
2719 } 2627 }
2720 2628
2721 mntroot = nfs4_get_root(s, mntfh); 2629 mntroot = nfs4_get_root(s, mntfh, dev_name);
2722 if (IS_ERR(mntroot)) { 2630 if (IS_ERR(mntroot)) {
2723 error = PTR_ERR(mntroot); 2631 error = PTR_ERR(mntroot);
2724 goto error_splat_super; 2632 goto error_splat_super;
@@ -2771,27 +2679,6 @@ static struct vfsmount *nfs_do_root_mount(struct file_system_type *fs_type,
2771 return root_mnt; 2679 return root_mnt;
2772} 2680}
2773 2681
2774static void nfs_fix_devname(const struct path *path, struct vfsmount *mnt)
2775{
2776 char *page = (char *) __get_free_page(GFP_KERNEL);
2777 char *devname, *tmp;
2778
2779 if (page == NULL)
2780 return;
2781 devname = nfs_path(path->mnt->mnt_devname,
2782 path->mnt->mnt_root, path->dentry,
2783 page, PAGE_SIZE);
2784 if (IS_ERR(devname))
2785 goto out_freepage;
2786 tmp = kstrdup(devname, GFP_KERNEL);
2787 if (tmp == NULL)
2788 goto out_freepage;
2789 kfree(mnt->mnt_devname);
2790 mnt->mnt_devname = tmp;
2791out_freepage:
2792 free_page((unsigned long)page);
2793}
2794
2795struct nfs_referral_count { 2682struct nfs_referral_count {
2796 struct list_head list; 2683 struct list_head list;
2797 const struct task_struct *task; 2684 const struct task_struct *task;
@@ -2858,17 +2745,18 @@ static void nfs_referral_loop_unprotect(void)
2858 kfree(p); 2745 kfree(p);
2859} 2746}
2860 2747
2861static int nfs_follow_remote_path(struct vfsmount *root_mnt, 2748static struct dentry *nfs_follow_remote_path(struct vfsmount *root_mnt,
2862 const char *export_path, struct vfsmount *mnt_target) 2749 const char *export_path)
2863{ 2750{
2864 struct nameidata *nd = NULL; 2751 struct nameidata *nd = NULL;
2865 struct mnt_namespace *ns_private; 2752 struct mnt_namespace *ns_private;
2866 struct super_block *s; 2753 struct super_block *s;
2754 struct dentry *dentry;
2867 int ret; 2755 int ret;
2868 2756
2869 nd = kmalloc(sizeof(*nd), GFP_KERNEL); 2757 nd = kmalloc(sizeof(*nd), GFP_KERNEL);
2870 if (nd == NULL) 2758 if (nd == NULL)
2871 return -ENOMEM; 2759 return ERR_PTR(-ENOMEM);
2872 2760
2873 ns_private = create_mnt_ns(root_mnt); 2761 ns_private = create_mnt_ns(root_mnt);
2874 ret = PTR_ERR(ns_private); 2762 ret = PTR_ERR(ns_private);
@@ -2890,32 +2778,27 @@ static int nfs_follow_remote_path(struct vfsmount *root_mnt,
2890 2778
2891 s = nd->path.mnt->mnt_sb; 2779 s = nd->path.mnt->mnt_sb;
2892 atomic_inc(&s->s_active); 2780 atomic_inc(&s->s_active);
2893 mnt_target->mnt_sb = s; 2781 dentry = dget(nd->path.dentry);
2894 mnt_target->mnt_root = dget(nd->path.dentry);
2895
2896 /* Correct the device pathname */
2897 nfs_fix_devname(&nd->path, mnt_target);
2898 2782
2899 path_put(&nd->path); 2783 path_put(&nd->path);
2900 kfree(nd); 2784 kfree(nd);
2901 down_write(&s->s_umount); 2785 down_write(&s->s_umount);
2902 return 0; 2786 return dentry;
2903out_put_mnt_ns: 2787out_put_mnt_ns:
2904 put_mnt_ns(ns_private); 2788 put_mnt_ns(ns_private);
2905out_mntput: 2789out_mntput:
2906 mntput(root_mnt); 2790 mntput(root_mnt);
2907out_err: 2791out_err:
2908 kfree(nd); 2792 kfree(nd);
2909 return ret; 2793 return ERR_PTR(ret);
2910} 2794}
2911 2795
2912static int nfs4_try_mount(int flags, const char *dev_name, 2796static struct dentry *nfs4_try_mount(int flags, const char *dev_name,
2913 struct nfs_parsed_mount_data *data, 2797 struct nfs_parsed_mount_data *data)
2914 struct vfsmount *mnt)
2915{ 2798{
2916 char *export_path; 2799 char *export_path;
2917 struct vfsmount *root_mnt; 2800 struct vfsmount *root_mnt;
2918 int error; 2801 struct dentry *res;
2919 2802
2920 dfprintk(MOUNT, "--> nfs4_try_mount()\n"); 2803 dfprintk(MOUNT, "--> nfs4_try_mount()\n");
2921 2804
@@ -2925,26 +2808,25 @@ static int nfs4_try_mount(int flags, const char *dev_name,
2925 data->nfs_server.hostname); 2808 data->nfs_server.hostname);
2926 data->nfs_server.export_path = export_path; 2809 data->nfs_server.export_path = export_path;
2927 2810
2928 error = PTR_ERR(root_mnt); 2811 res = ERR_CAST(root_mnt);
2929 if (IS_ERR(root_mnt)) 2812 if (!IS_ERR(root_mnt))
2930 goto out; 2813 res = nfs_follow_remote_path(root_mnt, export_path);
2931
2932 error = nfs_follow_remote_path(root_mnt, export_path, mnt);
2933 2814
2934out: 2815 dfprintk(MOUNT, "<-- nfs4_try_mount() = %ld%s\n",
2935 dfprintk(MOUNT, "<-- nfs4_try_mount() = %d%s\n", error, 2816 IS_ERR(res) ? PTR_ERR(res) : 0,
2936 error != 0 ? " [error]" : ""); 2817 IS_ERR(res) ? " [error]" : "");
2937 return error; 2818 return res;
2938} 2819}
2939 2820
2940/* 2821/*
2941 * Get the superblock for an NFS4 mountpoint 2822 * Get the superblock for an NFS4 mountpoint
2942 */ 2823 */
2943static int nfs4_get_sb(struct file_system_type *fs_type, 2824static struct dentry *nfs4_mount(struct file_system_type *fs_type,
2944 int flags, const char *dev_name, void *raw_data, struct vfsmount *mnt) 2825 int flags, const char *dev_name, void *raw_data)
2945{ 2826{
2946 struct nfs_parsed_mount_data *data; 2827 struct nfs_parsed_mount_data *data;
2947 int error = -ENOMEM; 2828 int error = -ENOMEM;
2829 struct dentry *res = ERR_PTR(-ENOMEM);
2948 2830
2949 data = nfs_alloc_parsed_mount_data(4); 2831 data = nfs_alloc_parsed_mount_data(4);
2950 if (data == NULL) 2832 if (data == NULL)
@@ -2952,10 +2834,14 @@ static int nfs4_get_sb(struct file_system_type *fs_type,
2952 2834
2953 /* Validate the mount data */ 2835 /* Validate the mount data */
2954 error = nfs4_validate_mount_data(raw_data, data, dev_name); 2836 error = nfs4_validate_mount_data(raw_data, data, dev_name);
2955 if (error < 0) 2837 if (error < 0) {
2838 res = ERR_PTR(error);
2956 goto out; 2839 goto out;
2840 }
2957 2841
2958 error = nfs4_try_mount(flags, dev_name, data, mnt); 2842 res = nfs4_try_mount(flags, dev_name, data);
2843 if (IS_ERR(res))
2844 error = PTR_ERR(res);
2959 2845
2960out: 2846out:
2961 kfree(data->client_address); 2847 kfree(data->client_address);
@@ -2964,9 +2850,9 @@ out:
2964 kfree(data->fscache_uniq); 2850 kfree(data->fscache_uniq);
2965out_free_data: 2851out_free_data:
2966 kfree(data); 2852 kfree(data);
2967 dprintk("<-- nfs4_get_sb() = %d%s\n", error, 2853 dprintk("<-- nfs4_mount() = %d%s\n", error,
2968 error != 0 ? " [error]" : ""); 2854 error != 0 ? " [error]" : "");
2969 return error; 2855 return res;
2970} 2856}
2971 2857
2972static void nfs4_kill_super(struct super_block *sb) 2858static void nfs4_kill_super(struct super_block *sb)
@@ -3033,7 +2919,7 @@ nfs4_xdev_mount(struct file_system_type *fs_type, int flags,
3033 nfs_fscache_get_super_cookie(s, NULL, data); 2919 nfs_fscache_get_super_cookie(s, NULL, data);
3034 } 2920 }
3035 2921
3036 mntroot = nfs4_get_root(s, data->fh); 2922 mntroot = nfs4_get_root(s, data->fh, dev_name);
3037 if (IS_ERR(mntroot)) { 2923 if (IS_ERR(mntroot)) {
3038 error = PTR_ERR(mntroot); 2924 error = PTR_ERR(mntroot);
3039 goto error_splat_super; 2925 goto error_splat_super;
@@ -3120,7 +3006,7 @@ nfs4_remote_referral_mount(struct file_system_type *fs_type, int flags,
3120 nfs_fscache_get_super_cookie(s, NULL, data); 3006 nfs_fscache_get_super_cookie(s, NULL, data);
3121 } 3007 }
3122 3008
3123 mntroot = nfs4_get_root(s, mntfh); 3009 mntroot = nfs4_get_root(s, mntfh, dev_name);
3124 if (IS_ERR(mntroot)) { 3010 if (IS_ERR(mntroot)) {
3125 error = PTR_ERR(mntroot); 3011 error = PTR_ERR(mntroot);
3126 goto error_splat_super; 3012 goto error_splat_super;
@@ -3160,16 +3046,15 @@ error_splat_bdi:
3160/* 3046/*
3161 * Create an NFS4 server record on referral traversal 3047 * Create an NFS4 server record on referral traversal
3162 */ 3048 */
3163static int nfs4_referral_get_sb(struct file_system_type *fs_type, 3049static struct dentry *nfs4_referral_mount(struct file_system_type *fs_type,
3164 int flags, const char *dev_name, void *raw_data, 3050 int flags, const char *dev_name, void *raw_data)
3165 struct vfsmount *mnt)
3166{ 3051{
3167 struct nfs_clone_mount *data = raw_data; 3052 struct nfs_clone_mount *data = raw_data;
3168 char *export_path; 3053 char *export_path;
3169 struct vfsmount *root_mnt; 3054 struct vfsmount *root_mnt;
3170 int error; 3055 struct dentry *res;
3171 3056
3172 dprintk("--> nfs4_referral_get_sb()\n"); 3057 dprintk("--> nfs4_referral_mount()\n");
3173 3058
3174 export_path = data->mnt_path; 3059 export_path = data->mnt_path;
3175 data->mnt_path = "/"; 3060 data->mnt_path = "/";
@@ -3178,15 +3063,13 @@ static int nfs4_referral_get_sb(struct file_system_type *fs_type,
3178 flags, data, data->hostname); 3063 flags, data, data->hostname);
3179 data->mnt_path = export_path; 3064 data->mnt_path = export_path;
3180 3065
3181 error = PTR_ERR(root_mnt); 3066 res = ERR_CAST(root_mnt);
3182 if (IS_ERR(root_mnt)) 3067 if (!IS_ERR(root_mnt))
3183 goto out; 3068 res = nfs_follow_remote_path(root_mnt, export_path);
3184 3069 dprintk("<-- nfs4_referral_mount() = %ld%s\n",
3185 error = nfs_follow_remote_path(root_mnt, export_path, mnt); 3070 IS_ERR(res) ? PTR_ERR(res) : 0,
3186out: 3071 IS_ERR(res) ? " [error]" : "");
3187 dprintk("<-- nfs4_referral_get_sb() = %d%s\n", error, 3072 return res;
3188 error != 0 ? " [error]" : "");
3189 return error;
3190} 3073}
3191 3074
3192#endif /* CONFIG_NFS_V4 */ 3075#endif /* CONFIG_NFS_V4 */
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index e313a51acdd1..8d6864c2a5fa 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -148,6 +148,7 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n
148 alias = d_lookup(parent, &data->args.name); 148 alias = d_lookup(parent, &data->args.name);
149 if (alias != NULL) { 149 if (alias != NULL) {
150 int ret = 0; 150 int ret = 0;
151 void *devname_garbage = NULL;
151 152
152 /* 153 /*
153 * Hey, we raced with lookup... See if we need to transfer 154 * Hey, we raced with lookup... See if we need to transfer
@@ -157,6 +158,7 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n
157 spin_lock(&alias->d_lock); 158 spin_lock(&alias->d_lock);
158 if (alias->d_inode != NULL && 159 if (alias->d_inode != NULL &&
159 !(alias->d_flags & DCACHE_NFSFS_RENAMED)) { 160 !(alias->d_flags & DCACHE_NFSFS_RENAMED)) {
161 devname_garbage = alias->d_fsdata;
160 alias->d_fsdata = data; 162 alias->d_fsdata = data;
161 alias->d_flags |= DCACHE_NFSFS_RENAMED; 163 alias->d_flags |= DCACHE_NFSFS_RENAMED;
162 ret = 1; 164 ret = 1;
@@ -164,6 +166,13 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n
164 spin_unlock(&alias->d_lock); 166 spin_unlock(&alias->d_lock);
165 nfs_dec_sillycount(dir); 167 nfs_dec_sillycount(dir);
166 dput(alias); 168 dput(alias);
169 /*
170 * If we'd displaced old cached devname, free it. At that
171 * point dentry is definitely not a root, so we won't need
172 * that anymore.
173 */
174 if (devname_garbage)
175 kfree(devname_garbage);
167 return ret; 176 return ret;
168 } 177 }
169 data->dir = igrab(dir); 178 data->dir = igrab(dir);
@@ -180,7 +189,7 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n
180 task_setup_data.rpc_client = NFS_CLIENT(dir); 189 task_setup_data.rpc_client = NFS_CLIENT(dir);
181 task = rpc_run_task(&task_setup_data); 190 task = rpc_run_task(&task_setup_data);
182 if (!IS_ERR(task)) 191 if (!IS_ERR(task))
183 rpc_put_task(task); 192 rpc_put_task_async(task);
184 return 1; 193 return 1;
185} 194}
186 195
@@ -252,6 +261,7 @@ nfs_async_unlink(struct inode *dir, struct dentry *dentry)
252{ 261{
253 struct nfs_unlinkdata *data; 262 struct nfs_unlinkdata *data;
254 int status = -ENOMEM; 263 int status = -ENOMEM;
264 void *devname_garbage = NULL;
255 265
256 data = kzalloc(sizeof(*data), GFP_KERNEL); 266 data = kzalloc(sizeof(*data), GFP_KERNEL);
257 if (data == NULL) 267 if (data == NULL)
@@ -269,8 +279,16 @@ nfs_async_unlink(struct inode *dir, struct dentry *dentry)
269 if (dentry->d_flags & DCACHE_NFSFS_RENAMED) 279 if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
270 goto out_unlock; 280 goto out_unlock;
271 dentry->d_flags |= DCACHE_NFSFS_RENAMED; 281 dentry->d_flags |= DCACHE_NFSFS_RENAMED;
282 devname_garbage = dentry->d_fsdata;
272 dentry->d_fsdata = data; 283 dentry->d_fsdata = data;
273 spin_unlock(&dentry->d_lock); 284 spin_unlock(&dentry->d_lock);
285 /*
286 * If we'd displaced old cached devname, free it. At that
287 * point dentry is definitely not a root, so we won't need
288 * that anymore.
289 */
290 if (devname_garbage)
291 kfree(devname_garbage);
274 return 0; 292 return 0;
275out_unlock: 293out_unlock:
276 spin_unlock(&dentry->d_lock); 294 spin_unlock(&dentry->d_lock);
@@ -299,6 +317,7 @@ nfs_complete_unlink(struct dentry *dentry, struct inode *inode)
299 if (dentry->d_flags & DCACHE_NFSFS_RENAMED) { 317 if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
300 dentry->d_flags &= ~DCACHE_NFSFS_RENAMED; 318 dentry->d_flags &= ~DCACHE_NFSFS_RENAMED;
301 data = dentry->d_fsdata; 319 data = dentry->d_fsdata;
320 dentry->d_fsdata = NULL;
302 } 321 }
303 spin_unlock(&dentry->d_lock); 322 spin_unlock(&dentry->d_lock);
304 323
@@ -315,6 +334,7 @@ nfs_cancel_async_unlink(struct dentry *dentry)
315 struct nfs_unlinkdata *data = dentry->d_fsdata; 334 struct nfs_unlinkdata *data = dentry->d_fsdata;
316 335
317 dentry->d_flags &= ~DCACHE_NFSFS_RENAMED; 336 dentry->d_flags &= ~DCACHE_NFSFS_RENAMED;
337 dentry->d_fsdata = NULL;
318 spin_unlock(&dentry->d_lock); 338 spin_unlock(&dentry->d_lock);
319 nfs_free_unlinkdata(data); 339 nfs_free_unlinkdata(data);
320 return; 340 return;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index c8278f4046cb..3bd5d7e80f6c 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -28,6 +28,7 @@
28#include "iostat.h" 28#include "iostat.h"
29#include "nfs4_fs.h" 29#include "nfs4_fs.h"
30#include "fscache.h" 30#include "fscache.h"
31#include "pnfs.h"
31 32
32#define NFSDBG_FACILITY NFSDBG_PAGECACHE 33#define NFSDBG_FACILITY NFSDBG_PAGECACHE
33 34
@@ -58,6 +59,7 @@ struct nfs_write_data *nfs_commitdata_alloc(void)
58 } 59 }
59 return p; 60 return p;
60} 61}
62EXPORT_SYMBOL_GPL(nfs_commitdata_alloc);
61 63
62void nfs_commit_free(struct nfs_write_data *p) 64void nfs_commit_free(struct nfs_write_data *p)
63{ 65{
@@ -65,6 +67,7 @@ void nfs_commit_free(struct nfs_write_data *p)
65 kfree(p->pagevec); 67 kfree(p->pagevec);
66 mempool_free(p, nfs_commit_mempool); 68 mempool_free(p, nfs_commit_mempool);
67} 69}
70EXPORT_SYMBOL_GPL(nfs_commit_free);
68 71
69struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount) 72struct nfs_write_data *nfs_writedata_alloc(unsigned int pagecount)
70{ 73{
@@ -96,6 +99,7 @@ void nfs_writedata_free(struct nfs_write_data *p)
96 99
97static void nfs_writedata_release(struct nfs_write_data *wdata) 100static void nfs_writedata_release(struct nfs_write_data *wdata)
98{ 101{
102 put_lseg(wdata->lseg);
99 put_nfs_open_context(wdata->args.context); 103 put_nfs_open_context(wdata->args.context);
100 nfs_writedata_free(wdata); 104 nfs_writedata_free(wdata);
101} 105}
@@ -177,8 +181,8 @@ static int wb_priority(struct writeback_control *wbc)
177 if (wbc->for_reclaim) 181 if (wbc->for_reclaim)
178 return FLUSH_HIGHPRI | FLUSH_STABLE; 182 return FLUSH_HIGHPRI | FLUSH_STABLE;
179 if (wbc->for_kupdate || wbc->for_background) 183 if (wbc->for_kupdate || wbc->for_background)
180 return FLUSH_LOWPRI; 184 return FLUSH_LOWPRI | FLUSH_COND_STABLE;
181 return 0; 185 return FLUSH_COND_STABLE;
182} 186}
183 187
184/* 188/*
@@ -385,11 +389,8 @@ static int nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
385 spin_lock(&inode->i_lock); 389 spin_lock(&inode->i_lock);
386 error = radix_tree_insert(&nfsi->nfs_page_tree, req->wb_index, req); 390 error = radix_tree_insert(&nfsi->nfs_page_tree, req->wb_index, req);
387 BUG_ON(error); 391 BUG_ON(error);
388 if (!nfsi->npages) { 392 if (!nfsi->npages && nfs_have_delegation(inode, FMODE_WRITE))
389 igrab(inode); 393 nfsi->change_attr++;
390 if (nfs_have_delegation(inode, FMODE_WRITE))
391 nfsi->change_attr++;
392 }
393 set_bit(PG_MAPPED, &req->wb_flags); 394 set_bit(PG_MAPPED, &req->wb_flags);
394 SetPagePrivate(req->wb_page); 395 SetPagePrivate(req->wb_page);
395 set_page_private(req->wb_page, (unsigned long)req); 396 set_page_private(req->wb_page, (unsigned long)req);
@@ -419,11 +420,7 @@ static void nfs_inode_remove_request(struct nfs_page *req)
419 clear_bit(PG_MAPPED, &req->wb_flags); 420 clear_bit(PG_MAPPED, &req->wb_flags);
420 radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index); 421 radix_tree_delete(&nfsi->nfs_page_tree, req->wb_index);
421 nfsi->npages--; 422 nfsi->npages--;
422 if (!nfsi->npages) { 423 spin_unlock(&inode->i_lock);
423 spin_unlock(&inode->i_lock);
424 iput(inode);
425 } else
426 spin_unlock(&inode->i_lock);
427 nfs_release_request(req); 424 nfs_release_request(req);
428} 425}
429 426
@@ -439,7 +436,7 @@ nfs_mark_request_dirty(struct nfs_page *req)
439 * Add a request to the inode's commit list. 436 * Add a request to the inode's commit list.
440 */ 437 */
441static void 438static void
442nfs_mark_request_commit(struct nfs_page *req) 439nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
443{ 440{
444 struct inode *inode = req->wb_context->path.dentry->d_inode; 441 struct inode *inode = req->wb_context->path.dentry->d_inode;
445 struct nfs_inode *nfsi = NFS_I(inode); 442 struct nfs_inode *nfsi = NFS_I(inode);
@@ -451,6 +448,7 @@ nfs_mark_request_commit(struct nfs_page *req)
451 NFS_PAGE_TAG_COMMIT); 448 NFS_PAGE_TAG_COMMIT);
452 nfsi->ncommit++; 449 nfsi->ncommit++;
453 spin_unlock(&inode->i_lock); 450 spin_unlock(&inode->i_lock);
451 pnfs_mark_request_commit(req, lseg);
454 inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); 452 inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
455 inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE); 453 inc_bdi_stat(req->wb_page->mapping->backing_dev_info, BDI_RECLAIMABLE);
456 __mark_inode_dirty(inode, I_DIRTY_DATASYNC); 454 __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
@@ -472,14 +470,18 @@ nfs_clear_request_commit(struct nfs_page *req)
472static inline 470static inline
473int nfs_write_need_commit(struct nfs_write_data *data) 471int nfs_write_need_commit(struct nfs_write_data *data)
474{ 472{
475 return data->verf.committed != NFS_FILE_SYNC; 473 if (data->verf.committed == NFS_DATA_SYNC)
474 return data->lseg == NULL;
475 else
476 return data->verf.committed != NFS_FILE_SYNC;
476} 477}
477 478
478static inline 479static inline
479int nfs_reschedule_unstable_write(struct nfs_page *req) 480int nfs_reschedule_unstable_write(struct nfs_page *req,
481 struct nfs_write_data *data)
480{ 482{
481 if (test_and_clear_bit(PG_NEED_COMMIT, &req->wb_flags)) { 483 if (test_and_clear_bit(PG_NEED_COMMIT, &req->wb_flags)) {
482 nfs_mark_request_commit(req); 484 nfs_mark_request_commit(req, data->lseg);
483 return 1; 485 return 1;
484 } 486 }
485 if (test_and_clear_bit(PG_NEED_RESCHED, &req->wb_flags)) { 487 if (test_and_clear_bit(PG_NEED_RESCHED, &req->wb_flags)) {
@@ -490,7 +492,7 @@ int nfs_reschedule_unstable_write(struct nfs_page *req)
490} 492}
491#else 493#else
492static inline void 494static inline void
493nfs_mark_request_commit(struct nfs_page *req) 495nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg)
494{ 496{
495} 497}
496 498
@@ -507,7 +509,8 @@ int nfs_write_need_commit(struct nfs_write_data *data)
507} 509}
508 510
509static inline 511static inline
510int nfs_reschedule_unstable_write(struct nfs_page *req) 512int nfs_reschedule_unstable_write(struct nfs_page *req,
513 struct nfs_write_data *data)
511{ 514{
512 return 0; 515 return 0;
513} 516}
@@ -539,11 +542,15 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst, pgoff_t idx_start, u
539 if (!nfs_need_commit(nfsi)) 542 if (!nfs_need_commit(nfsi))
540 return 0; 543 return 0;
541 544
545 spin_lock(&inode->i_lock);
542 ret = nfs_scan_list(nfsi, dst, idx_start, npages, NFS_PAGE_TAG_COMMIT); 546 ret = nfs_scan_list(nfsi, dst, idx_start, npages, NFS_PAGE_TAG_COMMIT);
543 if (ret > 0) 547 if (ret > 0)
544 nfsi->ncommit -= ret; 548 nfsi->ncommit -= ret;
549 spin_unlock(&inode->i_lock);
550
545 if (nfs_need_commit(NFS_I(inode))) 551 if (nfs_need_commit(NFS_I(inode)))
546 __mark_inode_dirty(inode, I_DIRTY_DATASYNC); 552 __mark_inode_dirty(inode, I_DIRTY_DATASYNC);
553
547 return ret; 554 return ret;
548} 555}
549#else 556#else
@@ -610,9 +617,11 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode,
610 } 617 }
611 618
612 if (nfs_clear_request_commit(req) && 619 if (nfs_clear_request_commit(req) &&
613 radix_tree_tag_clear(&NFS_I(inode)->nfs_page_tree, 620 radix_tree_tag_clear(&NFS_I(inode)->nfs_page_tree,
614 req->wb_index, NFS_PAGE_TAG_COMMIT) != NULL) 621 req->wb_index, NFS_PAGE_TAG_COMMIT) != NULL) {
615 NFS_I(inode)->ncommit--; 622 NFS_I(inode)->ncommit--;
623 pnfs_clear_request_commit(req);
624 }
616 625
617 /* Okay, the request matches. Update the region */ 626 /* Okay, the request matches. Update the region */
618 if (offset < req->wb_offset) { 627 if (offset < req->wb_offset) {
@@ -671,7 +680,6 @@ static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page,
671 req = nfs_setup_write_request(ctx, page, offset, count); 680 req = nfs_setup_write_request(ctx, page, offset, count);
672 if (IS_ERR(req)) 681 if (IS_ERR(req))
673 return PTR_ERR(req); 682 return PTR_ERR(req);
674 nfs_mark_request_dirty(req);
675 /* Update file length */ 683 /* Update file length */
676 nfs_grow_file(page, offset, count); 684 nfs_grow_file(page, offset, count);
677 nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes); 685 nfs_mark_uptodate(page, req->wb_pgbase, req->wb_bytes);
@@ -760,11 +768,12 @@ int nfs_updatepage(struct file *file, struct page *page,
760 return status; 768 return status;
761} 769}
762 770
763static void nfs_writepage_release(struct nfs_page *req) 771static void nfs_writepage_release(struct nfs_page *req,
772 struct nfs_write_data *data)
764{ 773{
765 struct page *page = req->wb_page; 774 struct page *page = req->wb_page;
766 775
767 if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req)) 776 if (PageError(req->wb_page) || !nfs_reschedule_unstable_write(req, data))
768 nfs_inode_remove_request(req); 777 nfs_inode_remove_request(req);
769 nfs_clear_page_tag_locked(req); 778 nfs_clear_page_tag_locked(req);
770 nfs_end_page_writeback(page); 779 nfs_end_page_writeback(page);
@@ -781,25 +790,21 @@ static int flush_task_priority(int how)
781 return RPC_PRIORITY_NORMAL; 790 return RPC_PRIORITY_NORMAL;
782} 791}
783 792
784/* 793int nfs_initiate_write(struct nfs_write_data *data,
785 * Set up the argument/result storage required for the RPC call. 794 struct rpc_clnt *clnt,
786 */ 795 const struct rpc_call_ops *call_ops,
787static int nfs_write_rpcsetup(struct nfs_page *req, 796 int how)
788 struct nfs_write_data *data,
789 const struct rpc_call_ops *call_ops,
790 unsigned int count, unsigned int offset,
791 int how)
792{ 797{
793 struct inode *inode = req->wb_context->path.dentry->d_inode; 798 struct inode *inode = data->inode;
794 int priority = flush_task_priority(how); 799 int priority = flush_task_priority(how);
795 struct rpc_task *task; 800 struct rpc_task *task;
796 struct rpc_message msg = { 801 struct rpc_message msg = {
797 .rpc_argp = &data->args, 802 .rpc_argp = &data->args,
798 .rpc_resp = &data->res, 803 .rpc_resp = &data->res,
799 .rpc_cred = req->wb_context->cred, 804 .rpc_cred = data->cred,
800 }; 805 };
801 struct rpc_task_setup task_setup_data = { 806 struct rpc_task_setup task_setup_data = {
802 .rpc_client = NFS_CLIENT(inode), 807 .rpc_client = clnt,
803 .task = &data->task, 808 .task = &data->task,
804 .rpc_message = &msg, 809 .rpc_message = &msg,
805 .callback_ops = call_ops, 810 .callback_ops = call_ops,
@@ -810,12 +815,52 @@ static int nfs_write_rpcsetup(struct nfs_page *req,
810 }; 815 };
811 int ret = 0; 816 int ret = 0;
812 817
818 /* Set up the initial task struct. */
819 NFS_PROTO(inode)->write_setup(data, &msg);
820
821 dprintk("NFS: %5u initiated write call "
822 "(req %s/%lld, %u bytes @ offset %llu)\n",
823 data->task.tk_pid,
824 inode->i_sb->s_id,
825 (long long)NFS_FILEID(inode),
826 data->args.count,
827 (unsigned long long)data->args.offset);
828
829 task = rpc_run_task(&task_setup_data);
830 if (IS_ERR(task)) {
831 ret = PTR_ERR(task);
832 goto out;
833 }
834 if (how & FLUSH_SYNC) {
835 ret = rpc_wait_for_completion_task(task);
836 if (ret == 0)
837 ret = task->tk_status;
838 }
839 rpc_put_task(task);
840out:
841 return ret;
842}
843EXPORT_SYMBOL_GPL(nfs_initiate_write);
844
845/*
846 * Set up the argument/result storage required for the RPC call.
847 */
848static int nfs_write_rpcsetup(struct nfs_page *req,
849 struct nfs_write_data *data,
850 const struct rpc_call_ops *call_ops,
851 unsigned int count, unsigned int offset,
852 struct pnfs_layout_segment *lseg,
853 int how)
854{
855 struct inode *inode = req->wb_context->path.dentry->d_inode;
856
813 /* Set up the RPC argument and reply structs 857 /* Set up the RPC argument and reply structs
814 * NB: take care not to mess about with data->commit et al. */ 858 * NB: take care not to mess about with data->commit et al. */
815 859
816 data->req = req; 860 data->req = req;
817 data->inode = inode = req->wb_context->path.dentry->d_inode; 861 data->inode = inode = req->wb_context->path.dentry->d_inode;
818 data->cred = msg.rpc_cred; 862 data->cred = req->wb_context->cred;
863 data->lseg = get_lseg(lseg);
819 864
820 data->args.fh = NFS_FH(inode); 865 data->args.fh = NFS_FH(inode);
821 data->args.offset = req_offset(req) + offset; 866 data->args.offset = req_offset(req) + offset;
@@ -825,7 +870,7 @@ static int nfs_write_rpcsetup(struct nfs_page *req,
825 data->args.context = get_nfs_open_context(req->wb_context); 870 data->args.context = get_nfs_open_context(req->wb_context);
826 data->args.lock_context = req->wb_lock_context; 871 data->args.lock_context = req->wb_lock_context;
827 data->args.stable = NFS_UNSTABLE; 872 data->args.stable = NFS_UNSTABLE;
828 if (how & FLUSH_STABLE) { 873 if (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) {
829 data->args.stable = NFS_DATA_SYNC; 874 data->args.stable = NFS_DATA_SYNC;
830 if (!nfs_need_commit(NFS_I(inode))) 875 if (!nfs_need_commit(NFS_I(inode)))
831 data->args.stable = NFS_FILE_SYNC; 876 data->args.stable = NFS_FILE_SYNC;
@@ -836,30 +881,11 @@ static int nfs_write_rpcsetup(struct nfs_page *req,
836 data->res.verf = &data->verf; 881 data->res.verf = &data->verf;
837 nfs_fattr_init(&data->fattr); 882 nfs_fattr_init(&data->fattr);
838 883
839 /* Set up the initial task struct. */ 884 if (data->lseg &&
840 NFS_PROTO(inode)->write_setup(data, &msg); 885 (pnfs_try_to_write_data(data, call_ops, how) == PNFS_ATTEMPTED))
841 886 return 0;
842 dprintk("NFS: %5u initiated write call "
843 "(req %s/%lld, %u bytes @ offset %llu)\n",
844 data->task.tk_pid,
845 inode->i_sb->s_id,
846 (long long)NFS_FILEID(inode),
847 count,
848 (unsigned long long)data->args.offset);
849 887
850 task = rpc_run_task(&task_setup_data); 888 return nfs_initiate_write(data, NFS_CLIENT(inode), call_ops, how);
851 if (IS_ERR(task)) {
852 ret = PTR_ERR(task);
853 goto out;
854 }
855 if (how & FLUSH_SYNC) {
856 ret = rpc_wait_for_completion_task(task);
857 if (ret == 0)
858 ret = task->tk_status;
859 }
860 rpc_put_task(task);
861out:
862 return ret;
863} 889}
864 890
865/* If a nfs_flush_* function fails, it should remove reqs from @head and 891/* If a nfs_flush_* function fails, it should remove reqs from @head and
@@ -879,20 +905,27 @@ static void nfs_redirty_request(struct nfs_page *req)
879 * Generate multiple small requests to write out a single 905 * Generate multiple small requests to write out a single
880 * contiguous dirty area on one page. 906 * contiguous dirty area on one page.
881 */ 907 */
882static int nfs_flush_multi(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int how) 908static int nfs_flush_multi(struct nfs_pageio_descriptor *desc)
883{ 909{
884 struct nfs_page *req = nfs_list_entry(head->next); 910 struct nfs_page *req = nfs_list_entry(desc->pg_list.next);
885 struct page *page = req->wb_page; 911 struct page *page = req->wb_page;
886 struct nfs_write_data *data; 912 struct nfs_write_data *data;
887 size_t wsize = NFS_SERVER(inode)->wsize, nbytes; 913 size_t wsize = NFS_SERVER(desc->pg_inode)->wsize, nbytes;
888 unsigned int offset; 914 unsigned int offset;
889 int requests = 0; 915 int requests = 0;
890 int ret = 0; 916 int ret = 0;
917 struct pnfs_layout_segment *lseg;
891 LIST_HEAD(list); 918 LIST_HEAD(list);
892 919
893 nfs_list_remove_request(req); 920 nfs_list_remove_request(req);
894 921
895 nbytes = count; 922 if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
923 (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit ||
924 desc->pg_count > wsize))
925 desc->pg_ioflags &= ~FLUSH_COND_STABLE;
926
927
928 nbytes = desc->pg_count;
896 do { 929 do {
897 size_t len = min(nbytes, wsize); 930 size_t len = min(nbytes, wsize);
898 931
@@ -905,9 +938,11 @@ static int nfs_flush_multi(struct inode *inode, struct list_head *head, unsigned
905 } while (nbytes != 0); 938 } while (nbytes != 0);
906 atomic_set(&req->wb_complete, requests); 939 atomic_set(&req->wb_complete, requests);
907 940
941 BUG_ON(desc->pg_lseg);
942 lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW);
908 ClearPageError(page); 943 ClearPageError(page);
909 offset = 0; 944 offset = 0;
910 nbytes = count; 945 nbytes = desc->pg_count;
911 do { 946 do {
912 int ret2; 947 int ret2;
913 948
@@ -919,13 +954,15 @@ static int nfs_flush_multi(struct inode *inode, struct list_head *head, unsigned
919 if (nbytes < wsize) 954 if (nbytes < wsize)
920 wsize = nbytes; 955 wsize = nbytes;
921 ret2 = nfs_write_rpcsetup(req, data, &nfs_write_partial_ops, 956 ret2 = nfs_write_rpcsetup(req, data, &nfs_write_partial_ops,
922 wsize, offset, how); 957 wsize, offset, lseg, desc->pg_ioflags);
923 if (ret == 0) 958 if (ret == 0)
924 ret = ret2; 959 ret = ret2;
925 offset += wsize; 960 offset += wsize;
926 nbytes -= wsize; 961 nbytes -= wsize;
927 } while (nbytes != 0); 962 } while (nbytes != 0);
928 963
964 put_lseg(lseg);
965 desc->pg_lseg = NULL;
929 return ret; 966 return ret;
930 967
931out_bad: 968out_bad:
@@ -946,16 +983,26 @@ out_bad:
946 * This is the case if nfs_updatepage detects a conflicting request 983 * This is the case if nfs_updatepage detects a conflicting request
947 * that has been written but not committed. 984 * that has been written but not committed.
948 */ 985 */
949static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int how) 986static int nfs_flush_one(struct nfs_pageio_descriptor *desc)
950{ 987{
951 struct nfs_page *req; 988 struct nfs_page *req;
952 struct page **pages; 989 struct page **pages;
953 struct nfs_write_data *data; 990 struct nfs_write_data *data;
991 struct list_head *head = &desc->pg_list;
992 struct pnfs_layout_segment *lseg = desc->pg_lseg;
993 int ret;
954 994
955 data = nfs_writedata_alloc(npages); 995 data = nfs_writedata_alloc(nfs_page_array_len(desc->pg_base,
956 if (!data) 996 desc->pg_count));
957 goto out_bad; 997 if (!data) {
958 998 while (!list_empty(head)) {
999 req = nfs_list_entry(head->next);
1000 nfs_list_remove_request(req);
1001 nfs_redirty_request(req);
1002 }
1003 ret = -ENOMEM;
1004 goto out;
1005 }
959 pages = data->pagevec; 1006 pages = data->pagevec;
960 while (!list_empty(head)) { 1007 while (!list_empty(head)) {
961 req = nfs_list_entry(head->next); 1008 req = nfs_list_entry(head->next);
@@ -965,16 +1012,19 @@ static int nfs_flush_one(struct inode *inode, struct list_head *head, unsigned i
965 *pages++ = req->wb_page; 1012 *pages++ = req->wb_page;
966 } 1013 }
967 req = nfs_list_entry(data->pages.next); 1014 req = nfs_list_entry(data->pages.next);
1015 if ((!lseg) && list_is_singular(&data->pages))
1016 lseg = pnfs_update_layout(desc->pg_inode, req->wb_context, IOMODE_RW);
1017
1018 if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
1019 (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit))
1020 desc->pg_ioflags &= ~FLUSH_COND_STABLE;
968 1021
969 /* Set up the argument struct */ 1022 /* Set up the argument struct */
970 return nfs_write_rpcsetup(req, data, &nfs_write_full_ops, count, 0, how); 1023 ret = nfs_write_rpcsetup(req, data, &nfs_write_full_ops, desc->pg_count, 0, lseg, desc->pg_ioflags);
971 out_bad: 1024out:
972 while (!list_empty(head)) { 1025 put_lseg(lseg); /* Cleans any gotten in ->pg_test */
973 req = nfs_list_entry(head->next); 1026 desc->pg_lseg = NULL;
974 nfs_list_remove_request(req); 1027 return ret;
975 nfs_redirty_request(req);
976 }
977 return -ENOMEM;
978} 1028}
979 1029
980static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, 1030static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
@@ -982,6 +1032,8 @@ static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
982{ 1032{
983 size_t wsize = NFS_SERVER(inode)->wsize; 1033 size_t wsize = NFS_SERVER(inode)->wsize;
984 1034
1035 pnfs_pageio_init_write(pgio, inode);
1036
985 if (wsize < PAGE_CACHE_SIZE) 1037 if (wsize < PAGE_CACHE_SIZE)
986 nfs_pageio_init(pgio, inode, nfs_flush_multi, wsize, ioflags); 1038 nfs_pageio_init(pgio, inode, nfs_flush_multi, wsize, ioflags);
987 else 1039 else
@@ -1039,7 +1091,7 @@ static void nfs_writeback_release_partial(void *calldata)
1039 1091
1040out: 1092out:
1041 if (atomic_dec_and_test(&req->wb_complete)) 1093 if (atomic_dec_and_test(&req->wb_complete))
1042 nfs_writepage_release(req); 1094 nfs_writepage_release(req, data);
1043 nfs_writedata_release(calldata); 1095 nfs_writedata_release(calldata);
1044} 1096}
1045 1097
@@ -1106,7 +1158,7 @@ static void nfs_writeback_release_full(void *calldata)
1106 1158
1107 if (nfs_write_need_commit(data)) { 1159 if (nfs_write_need_commit(data)) {
1108 memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf)); 1160 memcpy(&req->wb_verf, &data->verf, sizeof(req->wb_verf));
1109 nfs_mark_request_commit(req); 1161 nfs_mark_request_commit(req, data->lseg);
1110 dprintk(" marked for commit\n"); 1162 dprintk(" marked for commit\n");
1111 goto next; 1163 goto next;
1112 } 1164 }
@@ -1132,7 +1184,7 @@ static const struct rpc_call_ops nfs_write_full_ops = {
1132/* 1184/*
1133 * This function is called when the WRITE call is complete. 1185 * This function is called when the WRITE call is complete.
1134 */ 1186 */
1135int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data) 1187void nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
1136{ 1188{
1137 struct nfs_writeargs *argp = &data->args; 1189 struct nfs_writeargs *argp = &data->args;
1138 struct nfs_writeres *resp = &data->res; 1190 struct nfs_writeres *resp = &data->res;
@@ -1151,7 +1203,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
1151 */ 1203 */
1152 status = NFS_PROTO(data->inode)->write_done(task, data); 1204 status = NFS_PROTO(data->inode)->write_done(task, data);
1153 if (status != 0) 1205 if (status != 0)
1154 return status; 1206 return;
1155 nfs_add_stats(data->inode, NFSIOS_SERVERWRITTENBYTES, resp->count); 1207 nfs_add_stats(data->inode, NFSIOS_SERVERWRITTENBYTES, resp->count);
1156 1208
1157#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 1209#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
@@ -1166,6 +1218,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
1166 */ 1218 */
1167 static unsigned long complain; 1219 static unsigned long complain;
1168 1220
1221 /* Note this will print the MDS for a DS write */
1169 if (time_before(complain, jiffies)) { 1222 if (time_before(complain, jiffies)) {
1170 dprintk("NFS: faulty NFS server %s:" 1223 dprintk("NFS: faulty NFS server %s:"
1171 " (committed = %d) != (stable = %d)\n", 1224 " (committed = %d) != (stable = %d)\n",
@@ -1186,6 +1239,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
1186 /* Was this an NFSv2 write or an NFSv3 stable write? */ 1239 /* Was this an NFSv2 write or an NFSv3 stable write? */
1187 if (resp->verf->committed != NFS_UNSTABLE) { 1240 if (resp->verf->committed != NFS_UNSTABLE) {
1188 /* Resend from where the server left off */ 1241 /* Resend from where the server left off */
1242 data->mds_offset += resp->count;
1189 argp->offset += resp->count; 1243 argp->offset += resp->count;
1190 argp->pgbase += resp->count; 1244 argp->pgbase += resp->count;
1191 argp->count -= resp->count; 1245 argp->count -= resp->count;
@@ -1196,7 +1250,7 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
1196 argp->stable = NFS_FILE_SYNC; 1250 argp->stable = NFS_FILE_SYNC;
1197 } 1251 }
1198 nfs_restart_rpc(task, server->nfs_client); 1252 nfs_restart_rpc(task, server->nfs_client);
1199 return -EAGAIN; 1253 return;
1200 } 1254 }
1201 if (time_before(complain, jiffies)) { 1255 if (time_before(complain, jiffies)) {
1202 printk(KERN_WARNING 1256 printk(KERN_WARNING
@@ -1207,64 +1261,89 @@ int nfs_writeback_done(struct rpc_task *task, struct nfs_write_data *data)
1207 /* Can't do anything about it except throw an error. */ 1261 /* Can't do anything about it except throw an error. */
1208 task->tk_status = -EIO; 1262 task->tk_status = -EIO;
1209 } 1263 }
1210 return 0; 1264 return;
1211} 1265}
1212 1266
1213 1267
1214#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4) 1268#if defined(CONFIG_NFS_V3) || defined(CONFIG_NFS_V4)
1215static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait) 1269static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait)
1216{ 1270{
1271 int ret;
1272
1217 if (!test_and_set_bit(NFS_INO_COMMIT, &nfsi->flags)) 1273 if (!test_and_set_bit(NFS_INO_COMMIT, &nfsi->flags))
1218 return 1; 1274 return 1;
1219 if (may_wait && !out_of_line_wait_on_bit_lock(&nfsi->flags, 1275 if (!may_wait)
1220 NFS_INO_COMMIT, nfs_wait_bit_killable, 1276 return 0;
1221 TASK_KILLABLE)) 1277 ret = out_of_line_wait_on_bit_lock(&nfsi->flags,
1222 return 1; 1278 NFS_INO_COMMIT,
1223 return 0; 1279 nfs_wait_bit_killable,
1280 TASK_KILLABLE);
1281 return (ret < 0) ? ret : 1;
1224} 1282}
1225 1283
1226static void nfs_commit_clear_lock(struct nfs_inode *nfsi) 1284void nfs_commit_clear_lock(struct nfs_inode *nfsi)
1227{ 1285{
1228 clear_bit(NFS_INO_COMMIT, &nfsi->flags); 1286 clear_bit(NFS_INO_COMMIT, &nfsi->flags);
1229 smp_mb__after_clear_bit(); 1287 smp_mb__after_clear_bit();
1230 wake_up_bit(&nfsi->flags, NFS_INO_COMMIT); 1288 wake_up_bit(&nfsi->flags, NFS_INO_COMMIT);
1231} 1289}
1290EXPORT_SYMBOL_GPL(nfs_commit_clear_lock);
1232 1291
1233 1292void nfs_commitdata_release(void *data)
1234static void nfs_commitdata_release(void *data)
1235{ 1293{
1236 struct nfs_write_data *wdata = data; 1294 struct nfs_write_data *wdata = data;
1237 1295
1296 put_lseg(wdata->lseg);
1238 put_nfs_open_context(wdata->args.context); 1297 put_nfs_open_context(wdata->args.context);
1239 nfs_commit_free(wdata); 1298 nfs_commit_free(wdata);
1240} 1299}
1300EXPORT_SYMBOL_GPL(nfs_commitdata_release);
1241 1301
1242/* 1302int nfs_initiate_commit(struct nfs_write_data *data, struct rpc_clnt *clnt,
1243 * Set up the argument/result storage required for the RPC call. 1303 const struct rpc_call_ops *call_ops,
1244 */ 1304 int how)
1245static int nfs_commit_rpcsetup(struct list_head *head,
1246 struct nfs_write_data *data,
1247 int how)
1248{ 1305{
1249 struct nfs_page *first = nfs_list_entry(head->next);
1250 struct inode *inode = first->wb_context->path.dentry->d_inode;
1251 int priority = flush_task_priority(how);
1252 struct rpc_task *task; 1306 struct rpc_task *task;
1307 int priority = flush_task_priority(how);
1253 struct rpc_message msg = { 1308 struct rpc_message msg = {
1254 .rpc_argp = &data->args, 1309 .rpc_argp = &data->args,
1255 .rpc_resp = &data->res, 1310 .rpc_resp = &data->res,
1256 .rpc_cred = first->wb_context->cred, 1311 .rpc_cred = data->cred,
1257 }; 1312 };
1258 struct rpc_task_setup task_setup_data = { 1313 struct rpc_task_setup task_setup_data = {
1259 .task = &data->task, 1314 .task = &data->task,
1260 .rpc_client = NFS_CLIENT(inode), 1315 .rpc_client = clnt,
1261 .rpc_message = &msg, 1316 .rpc_message = &msg,
1262 .callback_ops = &nfs_commit_ops, 1317 .callback_ops = call_ops,
1263 .callback_data = data, 1318 .callback_data = data,
1264 .workqueue = nfsiod_workqueue, 1319 .workqueue = nfsiod_workqueue,
1265 .flags = RPC_TASK_ASYNC, 1320 .flags = RPC_TASK_ASYNC,
1266 .priority = priority, 1321 .priority = priority,
1267 }; 1322 };
1323 /* Set up the initial task struct. */
1324 NFS_PROTO(data->inode)->commit_setup(data, &msg);
1325
1326 dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid);
1327
1328 task = rpc_run_task(&task_setup_data);
1329 if (IS_ERR(task))
1330 return PTR_ERR(task);
1331 if (how & FLUSH_SYNC)
1332 rpc_wait_for_completion_task(task);
1333 rpc_put_task(task);
1334 return 0;
1335}
1336EXPORT_SYMBOL_GPL(nfs_initiate_commit);
1337
1338/*
1339 * Set up the argument/result storage required for the RPC call.
1340 */
1341void nfs_init_commit(struct nfs_write_data *data,
1342 struct list_head *head,
1343 struct pnfs_layout_segment *lseg)
1344{
1345 struct nfs_page *first = nfs_list_entry(head->next);
1346 struct inode *inode = first->wb_context->path.dentry->d_inode;
1268 1347
1269 /* Set up the RPC argument and reply structs 1348 /* Set up the RPC argument and reply structs
1270 * NB: take care not to mess about with data->commit et al. */ 1349 * NB: take care not to mess about with data->commit et al. */
@@ -1272,7 +1351,9 @@ static int nfs_commit_rpcsetup(struct list_head *head,
1272 list_splice_init(head, &data->pages); 1351 list_splice_init(head, &data->pages);
1273 1352
1274 data->inode = inode; 1353 data->inode = inode;
1275 data->cred = msg.rpc_cred; 1354 data->cred = first->wb_context->cred;
1355 data->lseg = lseg; /* reference transferred */
1356 data->mds_ops = &nfs_commit_ops;
1276 1357
1277 data->args.fh = NFS_FH(data->inode); 1358 data->args.fh = NFS_FH(data->inode);
1278 /* Note: we always request a commit of the entire inode */ 1359 /* Note: we always request a commit of the entire inode */
@@ -1283,18 +1364,25 @@ static int nfs_commit_rpcsetup(struct list_head *head,
1283 data->res.fattr = &data->fattr; 1364 data->res.fattr = &data->fattr;
1284 data->res.verf = &data->verf; 1365 data->res.verf = &data->verf;
1285 nfs_fattr_init(&data->fattr); 1366 nfs_fattr_init(&data->fattr);
1367}
1368EXPORT_SYMBOL_GPL(nfs_init_commit);
1286 1369
1287 /* Set up the initial task struct. */ 1370void nfs_retry_commit(struct list_head *page_list,
1288 NFS_PROTO(inode)->commit_setup(data, &msg); 1371 struct pnfs_layout_segment *lseg)
1289 1372{
1290 dprintk("NFS: %5u initiated commit call\n", data->task.tk_pid); 1373 struct nfs_page *req;
1291 1374
1292 task = rpc_run_task(&task_setup_data); 1375 while (!list_empty(page_list)) {
1293 if (IS_ERR(task)) 1376 req = nfs_list_entry(page_list->next);
1294 return PTR_ERR(task); 1377 nfs_list_remove_request(req);
1295 rpc_put_task(task); 1378 nfs_mark_request_commit(req, lseg);
1296 return 0; 1379 dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
1380 dec_bdi_stat(req->wb_page->mapping->backing_dev_info,
1381 BDI_RECLAIMABLE);
1382 nfs_clear_page_tag_locked(req);
1383 }
1297} 1384}
1385EXPORT_SYMBOL_GPL(nfs_retry_commit);
1298 1386
1299/* 1387/*
1300 * Commit dirty pages 1388 * Commit dirty pages
@@ -1303,7 +1391,6 @@ static int
1303nfs_commit_list(struct inode *inode, struct list_head *head, int how) 1391nfs_commit_list(struct inode *inode, struct list_head *head, int how)
1304{ 1392{
1305 struct nfs_write_data *data; 1393 struct nfs_write_data *data;
1306 struct nfs_page *req;
1307 1394
1308 data = nfs_commitdata_alloc(); 1395 data = nfs_commitdata_alloc();
1309 1396
@@ -1311,17 +1398,10 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how)
1311 goto out_bad; 1398 goto out_bad;
1312 1399
1313 /* Set up the argument struct */ 1400 /* Set up the argument struct */
1314 return nfs_commit_rpcsetup(head, data, how); 1401 nfs_init_commit(data, head, NULL);
1402 return nfs_initiate_commit(data, NFS_CLIENT(inode), data->mds_ops, how);
1315 out_bad: 1403 out_bad:
1316 while (!list_empty(head)) { 1404 nfs_retry_commit(head, NULL);
1317 req = nfs_list_entry(head->next);
1318 nfs_list_remove_request(req);
1319 nfs_mark_request_commit(req);
1320 dec_zone_page_state(req->wb_page, NR_UNSTABLE_NFS);
1321 dec_bdi_stat(req->wb_page->mapping->backing_dev_info,
1322 BDI_RECLAIMABLE);
1323 nfs_clear_page_tag_locked(req);
1324 }
1325 nfs_commit_clear_lock(NFS_I(inode)); 1405 nfs_commit_clear_lock(NFS_I(inode));
1326 return -ENOMEM; 1406 return -ENOMEM;
1327} 1407}
@@ -1337,14 +1417,12 @@ static void nfs_commit_done(struct rpc_task *task, void *calldata)
1337 task->tk_pid, task->tk_status); 1417 task->tk_pid, task->tk_status);
1338 1418
1339 /* Call the NFS version-specific code */ 1419 /* Call the NFS version-specific code */
1340 if (NFS_PROTO(data->inode)->commit_done(task, data) != 0) 1420 NFS_PROTO(data->inode)->commit_done(task, data);
1341 return;
1342} 1421}
1343 1422
1344static void nfs_commit_release(void *calldata) 1423void nfs_commit_release_pages(struct nfs_write_data *data)
1345{ 1424{
1346 struct nfs_write_data *data = calldata; 1425 struct nfs_page *req;
1347 struct nfs_page *req;
1348 int status = data->task.tk_status; 1426 int status = data->task.tk_status;
1349 1427
1350 while (!list_empty(&data->pages)) { 1428 while (!list_empty(&data->pages)) {
@@ -1378,6 +1456,14 @@ static void nfs_commit_release(void *calldata)
1378 next: 1456 next:
1379 nfs_clear_page_tag_locked(req); 1457 nfs_clear_page_tag_locked(req);
1380 } 1458 }
1459}
1460EXPORT_SYMBOL_GPL(nfs_commit_release_pages);
1461
1462static void nfs_commit_release(void *calldata)
1463{
1464 struct nfs_write_data *data = calldata;
1465
1466 nfs_commit_release_pages(data);
1381 nfs_commit_clear_lock(NFS_I(data->inode)); 1467 nfs_commit_clear_lock(NFS_I(data->inode));
1382 nfs_commitdata_release(calldata); 1468 nfs_commitdata_release(calldata);
1383} 1469}
@@ -1394,23 +1480,28 @@ int nfs_commit_inode(struct inode *inode, int how)
1394{ 1480{
1395 LIST_HEAD(head); 1481 LIST_HEAD(head);
1396 int may_wait = how & FLUSH_SYNC; 1482 int may_wait = how & FLUSH_SYNC;
1397 int res = 0; 1483 int res;
1398 1484
1399 if (!nfs_commit_set_lock(NFS_I(inode), may_wait)) 1485 res = nfs_commit_set_lock(NFS_I(inode), may_wait);
1486 if (res <= 0)
1400 goto out_mark_dirty; 1487 goto out_mark_dirty;
1401 spin_lock(&inode->i_lock);
1402 res = nfs_scan_commit(inode, &head, 0, 0); 1488 res = nfs_scan_commit(inode, &head, 0, 0);
1403 spin_unlock(&inode->i_lock);
1404 if (res) { 1489 if (res) {
1405 int error = nfs_commit_list(inode, &head, how); 1490 int error;
1491
1492 error = pnfs_commit_list(inode, &head, how);
1493 if (error == PNFS_NOT_ATTEMPTED)
1494 error = nfs_commit_list(inode, &head, how);
1406 if (error < 0) 1495 if (error < 0)
1407 return error; 1496 return error;
1408 if (may_wait) 1497 if (!may_wait)
1409 wait_on_bit(&NFS_I(inode)->flags, NFS_INO_COMMIT,
1410 nfs_wait_bit_killable,
1411 TASK_KILLABLE);
1412 else
1413 goto out_mark_dirty; 1498 goto out_mark_dirty;
1499 error = wait_on_bit(&NFS_I(inode)->flags,
1500 NFS_INO_COMMIT,
1501 nfs_wait_bit_killable,
1502 TASK_KILLABLE);
1503 if (error < 0)
1504 return error;
1414 } else 1505 } else
1415 nfs_commit_clear_lock(NFS_I(inode)); 1506 nfs_commit_clear_lock(NFS_I(inode));
1416 return res; 1507 return res;
@@ -1464,7 +1555,22 @@ static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_contr
1464 1555
1465int nfs_write_inode(struct inode *inode, struct writeback_control *wbc) 1556int nfs_write_inode(struct inode *inode, struct writeback_control *wbc)
1466{ 1557{
1467 return nfs_commit_unstable_pages(inode, wbc); 1558 int ret;
1559
1560 ret = nfs_commit_unstable_pages(inode, wbc);
1561 if (ret >= 0 && test_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags)) {
1562 int status;
1563 bool sync = true;
1564
1565 if (wbc->sync_mode == WB_SYNC_NONE || wbc->nonblocking ||
1566 wbc->for_background)
1567 sync = false;
1568
1569 status = pnfs_layoutcommit_inode(inode, sync);
1570 if (status < 0)
1571 return status;
1572 }
1573 return ret;
1468} 1574}
1469 1575
1470/* 1576/*