aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2011-07-27 16:23:02 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2011-07-27 16:23:02 -0400
commit28890d3598c352ae065b560e0fded3e79c800ba1 (patch)
tree93267c5b29b9e81185e66a6c2e70e67dc626b63f
parent91d41fdf31f74e6e2e5f3cb018eca4200e36e202 (diff)
parented1e6211a0a134ff23592c6f057af982ad5dab52 (diff)
Merge branch 'nfs-for-3.1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs
* 'nfs-for-3.1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (44 commits) NFSv4: Don't use the delegation->inode in nfs_mark_return_delegation() nfs: don't use d_move in nfs_async_rename_done RDMA: Increasing RPCRDMA_MAX_DATA_SEGS SUNRPC: Replace xprt->resend and xprt->sending with a priority queue SUNRPC: Allow caller of rpc_sleep_on() to select priority levels SUNRPC: Support dynamic slot allocation for TCP connections SUNRPC: Clean up the slot table allocation SUNRPC: Initalise the struct xprt upon allocation SUNRPC: Ensure that we grab the XPRT_LOCK before calling xprt_alloc_slot pnfs: simplify pnfs files module autoloading nfs: document nfsv4 sillyrename issues NFS: Convert nfs4_set_ds_client to EXPORT_SYMBOL_GPL SUNRPC: Convert the backchannel exports to EXPORT_SYMBOL_GPL SUNRPC: sunrpc should not explicitly depend on NFS config options NFS: Clean up - simplify the switch to read/write-through-MDS NFS: Move the pnfs write code into pnfs.c NFS: Move the pnfs read code into pnfs.c NFS: Allow the nfs_pageio_descriptor to signal that a re-coalesce is needed NFS: Use the nfs_pageio_descriptor->pg_bsize in the read/write request NFS: Cache rpc_ops in struct nfs_pageio_descriptor ...
-rw-r--r--fs/lockd/clntproc.c9
-rw-r--r--fs/nfs/Kconfig1
-rw-r--r--fs/nfs/callback_proc.c57
-rw-r--r--fs/nfs/client.c7
-rw-r--r--fs/nfs/delegation.c16
-rw-r--r--fs/nfs/internal.h13
-rw-r--r--fs/nfs/namespace.c2
-rw-r--r--fs/nfs/nfs4_fs.h5
-rw-r--r--fs/nfs/nfs4filelayout.c80
-rw-r--r--fs/nfs/nfs4filelayout.h17
-rw-r--r--fs/nfs/nfs4filelayoutdev.c452
-rw-r--r--fs/nfs/nfs4proc.c215
-rw-r--r--fs/nfs/nfs4state.c9
-rw-r--r--fs/nfs/nfs4xdr.c247
-rw-r--r--fs/nfs/objlayout/objio_osd.c20
-rw-r--r--fs/nfs/pagelist.c69
-rw-r--r--fs/nfs/pnfs.c221
-rw-r--r--fs/nfs/pnfs.h74
-rw-r--r--fs/nfs/pnfs_dev.c64
-rw-r--r--fs/nfs/read.c166
-rw-r--r--fs/nfs/unlink.c37
-rw-r--r--fs/nfs/write.c156
-rw-r--r--include/linux/nfs4.h3
-rw-r--r--include/linux/nfs_fs_sb.h5
-rw-r--r--include/linux/nfs_page.h17
-rw-r--r--include/linux/nfs_xdr.h34
-rw-r--r--include/linux/pnfs_osd_xdr.h31
-rw-r--r--include/linux/sunrpc/bc_xprt.h6
-rw-r--r--include/linux/sunrpc/sched.h4
-rw-r--r--include/linux/sunrpc/svc.h4
-rw-r--r--include/linux/sunrpc/xprt.h34
-rw-r--r--net/sunrpc/Kconfig4
-rw-r--r--net/sunrpc/Makefile2
-rw-r--r--net/sunrpc/backchannel_rqst.c7
-rw-r--r--net/sunrpc/bc_svc.c3
-rw-r--r--net/sunrpc/clnt.c15
-rw-r--r--net/sunrpc/sched.c38
-rw-r--r--net/sunrpc/svc.c6
-rw-r--r--net/sunrpc/svcsock.c14
-rw-r--r--net/sunrpc/xdr.c2
-rw-r--r--net/sunrpc/xprt.c257
-rw-r--r--net/sunrpc/xprtrdma/transport.c6
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h2
-rw-r--r--net/sunrpc/xprtsock.c57
44 files changed, 1856 insertions, 632 deletions
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index e374050a911c..8392cb85bd54 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -302,7 +302,8 @@ nlmclnt_call(struct rpc_cred *cred, struct nlm_rqst *req, u32 proc)
302 /* We appear to be out of the grace period */ 302 /* We appear to be out of the grace period */
303 wake_up_all(&host->h_gracewait); 303 wake_up_all(&host->h_gracewait);
304 } 304 }
305 dprintk("lockd: server returns status %d\n", resp->status); 305 dprintk("lockd: server returns status %d\n",
306 ntohl(resp->status));
306 return 0; /* Okay, call complete */ 307 return 0; /* Okay, call complete */
307 } 308 }
308 309
@@ -690,7 +691,8 @@ nlmclnt_unlock(struct nlm_rqst *req, struct file_lock *fl)
690 goto out; 691 goto out;
691 692
692 if (resp->status != nlm_lck_denied_nolocks) 693 if (resp->status != nlm_lck_denied_nolocks)
693 printk("lockd: unexpected unlock status: %d\n", resp->status); 694 printk("lockd: unexpected unlock status: %d\n",
695 ntohl(resp->status));
694 /* What to do now? I'm out of my depth... */ 696 /* What to do now? I'm out of my depth... */
695 status = -ENOLCK; 697 status = -ENOLCK;
696out: 698out:
@@ -843,6 +845,7 @@ nlm_stat_to_errno(__be32 status)
843 return -ENOLCK; 845 return -ENOLCK;
844#endif 846#endif
845 } 847 }
846 printk(KERN_NOTICE "lockd: unexpected server status %d\n", status); 848 printk(KERN_NOTICE "lockd: unexpected server status %d\n",
849 ntohl(status));
847 return -ENOLCK; 850 return -ENOLCK;
848} 851}
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index 81515545ba75..2cde5d954750 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -77,6 +77,7 @@ config NFS_V4
77config NFS_V4_1 77config NFS_V4_1
78 bool "NFS client support for NFSv4.1 (EXPERIMENTAL)" 78 bool "NFS client support for NFSv4.1 (EXPERIMENTAL)"
79 depends on NFS_FS && NFS_V4 && EXPERIMENTAL 79 depends on NFS_FS && NFS_V4 && EXPERIMENTAL
80 select SUNRPC_BACKCHANNEL
80 select PNFS_FILE_LAYOUT 81 select PNFS_FILE_LAYOUT
81 help 82 help
82 This option enables support for minor version 1 of the NFSv4 protocol 83 This option enables support for minor version 1 of the NFSv4 protocol
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index d4d1954e9bb9..74780f9f852c 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -111,6 +111,7 @@ int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation, const nf
111static u32 initiate_file_draining(struct nfs_client *clp, 111static u32 initiate_file_draining(struct nfs_client *clp,
112 struct cb_layoutrecallargs *args) 112 struct cb_layoutrecallargs *args)
113{ 113{
114 struct nfs_server *server;
114 struct pnfs_layout_hdr *lo; 115 struct pnfs_layout_hdr *lo;
115 struct inode *ino; 116 struct inode *ino;
116 bool found = false; 117 bool found = false;
@@ -118,21 +119,28 @@ static u32 initiate_file_draining(struct nfs_client *clp,
118 LIST_HEAD(free_me_list); 119 LIST_HEAD(free_me_list);
119 120
120 spin_lock(&clp->cl_lock); 121 spin_lock(&clp->cl_lock);
121 list_for_each_entry(lo, &clp->cl_layouts, plh_layouts) { 122 rcu_read_lock();
122 if (nfs_compare_fh(&args->cbl_fh, 123 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
123 &NFS_I(lo->plh_inode)->fh)) 124 list_for_each_entry(lo, &server->layouts, plh_layouts) {
124 continue; 125 if (nfs_compare_fh(&args->cbl_fh,
125 ino = igrab(lo->plh_inode); 126 &NFS_I(lo->plh_inode)->fh))
126 if (!ino) 127 continue;
127 continue; 128 ino = igrab(lo->plh_inode);
128 found = true; 129 if (!ino)
129 /* Without this, layout can be freed as soon 130 continue;
130 * as we release cl_lock. 131 found = true;
131 */ 132 /* Without this, layout can be freed as soon
132 get_layout_hdr(lo); 133 * as we release cl_lock.
133 break; 134 */
135 get_layout_hdr(lo);
136 break;
137 }
138 if (found)
139 break;
134 } 140 }
141 rcu_read_unlock();
135 spin_unlock(&clp->cl_lock); 142 spin_unlock(&clp->cl_lock);
143
136 if (!found) 144 if (!found)
137 return NFS4ERR_NOMATCHING_LAYOUT; 145 return NFS4ERR_NOMATCHING_LAYOUT;
138 146
@@ -154,6 +162,7 @@ static u32 initiate_file_draining(struct nfs_client *clp,
154static u32 initiate_bulk_draining(struct nfs_client *clp, 162static u32 initiate_bulk_draining(struct nfs_client *clp,
155 struct cb_layoutrecallargs *args) 163 struct cb_layoutrecallargs *args)
156{ 164{
165 struct nfs_server *server;
157 struct pnfs_layout_hdr *lo; 166 struct pnfs_layout_hdr *lo;
158 struct inode *ino; 167 struct inode *ino;
159 u32 rv = NFS4ERR_NOMATCHING_LAYOUT; 168 u32 rv = NFS4ERR_NOMATCHING_LAYOUT;
@@ -167,18 +176,24 @@ static u32 initiate_bulk_draining(struct nfs_client *clp,
167 }; 176 };
168 177
169 spin_lock(&clp->cl_lock); 178 spin_lock(&clp->cl_lock);
170 list_for_each_entry(lo, &clp->cl_layouts, plh_layouts) { 179 rcu_read_lock();
180 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
171 if ((args->cbl_recall_type == RETURN_FSID) && 181 if ((args->cbl_recall_type == RETURN_FSID) &&
172 memcmp(&NFS_SERVER(lo->plh_inode)->fsid, 182 memcmp(&server->fsid, &args->cbl_fsid,
173 &args->cbl_fsid, sizeof(struct nfs_fsid))) 183 sizeof(struct nfs_fsid)))
174 continue;
175 if (!igrab(lo->plh_inode))
176 continue; 184 continue;
177 get_layout_hdr(lo); 185
178 BUG_ON(!list_empty(&lo->plh_bulk_recall)); 186 list_for_each_entry(lo, &server->layouts, plh_layouts) {
179 list_add(&lo->plh_bulk_recall, &recall_list); 187 if (!igrab(lo->plh_inode))
188 continue;
189 get_layout_hdr(lo);
190 BUG_ON(!list_empty(&lo->plh_bulk_recall));
191 list_add(&lo->plh_bulk_recall, &recall_list);
192 }
180 } 193 }
194 rcu_read_unlock();
181 spin_unlock(&clp->cl_lock); 195 spin_unlock(&clp->cl_lock);
196
182 list_for_each_entry_safe(lo, tmp, 197 list_for_each_entry_safe(lo, tmp,
183 &recall_list, plh_bulk_recall) { 198 &recall_list, plh_bulk_recall) {
184 ino = lo->plh_inode; 199 ino = lo->plh_inode;
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index b3dc2b88b65b..19ea7d9c75e6 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -188,9 +188,6 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_
188 cred = rpc_lookup_machine_cred(); 188 cred = rpc_lookup_machine_cred();
189 if (!IS_ERR(cred)) 189 if (!IS_ERR(cred))
190 clp->cl_machine_cred = cred; 190 clp->cl_machine_cred = cred;
191#if defined(CONFIG_NFS_V4_1)
192 INIT_LIST_HEAD(&clp->cl_layouts);
193#endif
194 nfs_fscache_get_client_cookie(clp); 191 nfs_fscache_get_client_cookie(clp);
195 192
196 return clp; 193 return clp;
@@ -293,6 +290,7 @@ static void nfs_free_client(struct nfs_client *clp)
293 nfs4_deviceid_purge_client(clp); 290 nfs4_deviceid_purge_client(clp);
294 291
295 kfree(clp->cl_hostname); 292 kfree(clp->cl_hostname);
293 kfree(clp->server_scope);
296 kfree(clp); 294 kfree(clp);
297 295
298 dprintk("<-- nfs_free_client()\n"); 296 dprintk("<-- nfs_free_client()\n");
@@ -1062,6 +1060,7 @@ static struct nfs_server *nfs_alloc_server(void)
1062 INIT_LIST_HEAD(&server->client_link); 1060 INIT_LIST_HEAD(&server->client_link);
1063 INIT_LIST_HEAD(&server->master_link); 1061 INIT_LIST_HEAD(&server->master_link);
1064 INIT_LIST_HEAD(&server->delegations); 1062 INIT_LIST_HEAD(&server->delegations);
1063 INIT_LIST_HEAD(&server->layouts);
1065 1064
1066 atomic_set(&server->active, 0); 1065 atomic_set(&server->active, 0);
1067 1066
@@ -1464,7 +1463,7 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp,
1464 dprintk("<-- %s %p\n", __func__, clp); 1463 dprintk("<-- %s %p\n", __func__, clp);
1465 return clp; 1464 return clp;
1466} 1465}
1467EXPORT_SYMBOL(nfs4_set_ds_client); 1466EXPORT_SYMBOL_GPL(nfs4_set_ds_client);
1468 1467
1469/* 1468/*
1470 * Session has been established, and the client marked ready. 1469 * Session has been established, and the client marked ready.
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index dd25c2aec375..321a66bc3846 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -398,12 +398,11 @@ int nfs_inode_return_delegation(struct inode *inode)
398 return err; 398 return err;
399} 399}
400 400
401static void nfs_mark_return_delegation(struct nfs_delegation *delegation) 401static void nfs_mark_return_delegation(struct nfs_server *server,
402 struct nfs_delegation *delegation)
402{ 403{
403 struct nfs_client *clp = NFS_SERVER(delegation->inode)->nfs_client;
404
405 set_bit(NFS_DELEGATION_RETURN, &delegation->flags); 404 set_bit(NFS_DELEGATION_RETURN, &delegation->flags);
406 set_bit(NFS4CLNT_DELEGRETURN, &clp->cl_state); 405 set_bit(NFS4CLNT_DELEGRETURN, &server->nfs_client->cl_state);
407} 406}
408 407
409/** 408/**
@@ -441,7 +440,7 @@ static void nfs_mark_return_all_delegation_types(struct nfs_server *server,
441 if ((delegation->type == (FMODE_READ|FMODE_WRITE)) && !(flags & FMODE_WRITE)) 440 if ((delegation->type == (FMODE_READ|FMODE_WRITE)) && !(flags & FMODE_WRITE))
442 continue; 441 continue;
443 if (delegation->type & flags) 442 if (delegation->type & flags)
444 nfs_mark_return_delegation(delegation); 443 nfs_mark_return_delegation(server, delegation);
445 } 444 }
446} 445}
447 446
@@ -508,7 +507,7 @@ static void nfs_mark_return_unreferenced_delegations(struct nfs_server *server)
508 list_for_each_entry_rcu(delegation, &server->delegations, super_list) { 507 list_for_each_entry_rcu(delegation, &server->delegations, super_list) {
509 if (test_and_clear_bit(NFS_DELEGATION_REFERENCED, &delegation->flags)) 508 if (test_and_clear_bit(NFS_DELEGATION_REFERENCED, &delegation->flags))
510 continue; 509 continue;
511 nfs_mark_return_delegation(delegation); 510 nfs_mark_return_delegation(server, delegation);
512 } 511 }
513} 512}
514 513
@@ -539,7 +538,8 @@ void nfs_expire_unreferenced_delegations(struct nfs_client *clp)
539int nfs_async_inode_return_delegation(struct inode *inode, 538int nfs_async_inode_return_delegation(struct inode *inode,
540 const nfs4_stateid *stateid) 539 const nfs4_stateid *stateid)
541{ 540{
542 struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; 541 struct nfs_server *server = NFS_SERVER(inode);
542 struct nfs_client *clp = server->nfs_client;
543 struct nfs_delegation *delegation; 543 struct nfs_delegation *delegation;
544 544
545 rcu_read_lock(); 545 rcu_read_lock();
@@ -549,7 +549,7 @@ int nfs_async_inode_return_delegation(struct inode *inode,
549 rcu_read_unlock(); 549 rcu_read_unlock();
550 return -ENOENT; 550 return -ENOENT;
551 } 551 }
552 nfs_mark_return_delegation(delegation); 552 nfs_mark_return_delegation(server, delegation);
553 rcu_read_unlock(); 553 rcu_read_unlock();
554 554
555 nfs_delegation_run_state_manager(clp); 555 nfs_delegation_run_state_manager(clp);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 2a55347a2daa..ab12913dd473 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -277,6 +277,9 @@ extern void nfs_sb_deactive(struct super_block *sb);
277extern char *nfs_path(char **p, struct dentry *dentry, 277extern char *nfs_path(char **p, struct dentry *dentry,
278 char *buffer, ssize_t buflen); 278 char *buffer, ssize_t buflen);
279extern struct vfsmount *nfs_d_automount(struct path *path); 279extern struct vfsmount *nfs_d_automount(struct path *path);
280#ifdef CONFIG_NFS_V4
281rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *);
282#endif
280 283
281/* getroot.c */ 284/* getroot.c */
282extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *, 285extern struct dentry *nfs_get_root(struct super_block *, struct nfs_fh *,
@@ -288,12 +291,22 @@ extern struct dentry *nfs4_get_root(struct super_block *, struct nfs_fh *,
288extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh); 291extern int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh);
289#endif 292#endif
290 293
294struct nfs_pageio_descriptor;
291/* read.c */ 295/* read.c */
292extern int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt, 296extern int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt,
293 const struct rpc_call_ops *call_ops); 297 const struct rpc_call_ops *call_ops);
294extern void nfs_read_prepare(struct rpc_task *task, void *calldata); 298extern void nfs_read_prepare(struct rpc_task *task, void *calldata);
299extern int nfs_generic_pagein(struct nfs_pageio_descriptor *desc,
300 struct list_head *head);
301
302extern void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio);
303extern void nfs_readdata_release(struct nfs_read_data *rdata);
295 304
296/* write.c */ 305/* write.c */
306extern int nfs_generic_flush(struct nfs_pageio_descriptor *desc,
307 struct list_head *head);
308extern void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio);
309extern void nfs_writedata_release(struct nfs_write_data *wdata);
297extern void nfs_commit_free(struct nfs_write_data *p); 310extern void nfs_commit_free(struct nfs_write_data *p);
298extern int nfs_initiate_write(struct nfs_write_data *data, 311extern int nfs_initiate_write(struct nfs_write_data *data,
299 struct rpc_clnt *clnt, 312 struct rpc_clnt *clnt,
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 1f063bacd285..8102391bb374 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -119,7 +119,7 @@ Elong:
119} 119}
120 120
121#ifdef CONFIG_NFS_V4 121#ifdef CONFIG_NFS_V4
122static rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors) 122rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors)
123{ 123{
124 struct gss_api_mech *mech; 124 struct gss_api_mech *mech;
125 struct xdr_netobj oid; 125 struct xdr_netobj oid;
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index b788f2eb1ba0..1909ee8be350 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -48,6 +48,7 @@ enum nfs4_client_state {
48 NFS4CLNT_SESSION_RESET, 48 NFS4CLNT_SESSION_RESET,
49 NFS4CLNT_RECALL_SLOT, 49 NFS4CLNT_RECALL_SLOT,
50 NFS4CLNT_LEASE_CONFIRM, 50 NFS4CLNT_LEASE_CONFIRM,
51 NFS4CLNT_SERVER_SCOPE_MISMATCH,
51}; 52};
52 53
53enum nfs4_session_state { 54enum nfs4_session_state {
@@ -66,6 +67,8 @@ struct nfs4_minor_version_ops {
66 int cache_reply); 67 int cache_reply);
67 int (*validate_stateid)(struct nfs_delegation *, 68 int (*validate_stateid)(struct nfs_delegation *,
68 const nfs4_stateid *); 69 const nfs4_stateid *);
70 int (*find_root_sec)(struct nfs_server *, struct nfs_fh *,
71 struct nfs_fsinfo *);
69 const struct nfs4_state_recovery_ops *reboot_recovery_ops; 72 const struct nfs4_state_recovery_ops *reboot_recovery_ops;
70 const struct nfs4_state_recovery_ops *nograce_recovery_ops; 73 const struct nfs4_state_recovery_ops *nograce_recovery_ops;
71 const struct nfs4_state_maintenance_ops *state_renewal_ops; 74 const struct nfs4_state_maintenance_ops *state_renewal_ops;
@@ -349,6 +352,8 @@ extern void nfs4_schedule_state_manager(struct nfs_client *);
349extern void nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *); 352extern void nfs4_schedule_stateid_recovery(const struct nfs_server *, struct nfs4_state *);
350extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags); 353extern void nfs41_handle_sequence_flag_errors(struct nfs_client *clp, u32 flags);
351extern void nfs41_handle_recall_slot(struct nfs_client *clp); 354extern void nfs41_handle_recall_slot(struct nfs_client *clp);
355extern void nfs41_handle_server_scope(struct nfs_client *,
356 struct server_scope **);
352extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); 357extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
353extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); 358extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
354extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t, pid_t); 359extern void nfs4_copy_stateid(nfs4_stateid *, struct nfs4_state *, fl_owner_t, pid_t);
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index f9d03abcd04c..be93a622872c 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -334,6 +334,9 @@ filelayout_read_pagelist(struct nfs_read_data *data)
334 __func__, data->inode->i_ino, 334 __func__, data->inode->i_ino,
335 data->args.pgbase, (size_t)data->args.count, offset); 335 data->args.pgbase, (size_t)data->args.count, offset);
336 336
337 if (test_bit(NFS_DEVICEID_INVALID, &FILELAYOUT_DEVID_NODE(lseg)->flags))
338 return PNFS_NOT_ATTEMPTED;
339
337 /* Retrieve the correct rpc_client for the byte range */ 340 /* Retrieve the correct rpc_client for the byte range */
338 j = nfs4_fl_calc_j_index(lseg, offset); 341 j = nfs4_fl_calc_j_index(lseg, offset);
339 idx = nfs4_fl_calc_ds_index(lseg, j); 342 idx = nfs4_fl_calc_ds_index(lseg, j);
@@ -344,8 +347,7 @@ filelayout_read_pagelist(struct nfs_read_data *data)
344 set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags); 347 set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
345 return PNFS_NOT_ATTEMPTED; 348 return PNFS_NOT_ATTEMPTED;
346 } 349 }
347 dprintk("%s USE DS:ip %x %hu\n", __func__, 350 dprintk("%s USE DS: %s\n", __func__, ds->ds_remotestr);
348 ntohl(ds->ds_ip_addr), ntohs(ds->ds_port));
349 351
350 /* No multipath support. Use first DS */ 352 /* No multipath support. Use first DS */
351 data->ds_clp = ds->ds_clp; 353 data->ds_clp = ds->ds_clp;
@@ -374,6 +376,9 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync)
374 struct nfs_fh *fh; 376 struct nfs_fh *fh;
375 int status; 377 int status;
376 378
379 if (test_bit(NFS_DEVICEID_INVALID, &FILELAYOUT_DEVID_NODE(lseg)->flags))
380 return PNFS_NOT_ATTEMPTED;
381
377 /* Retrieve the correct rpc_client for the byte range */ 382 /* Retrieve the correct rpc_client for the byte range */
378 j = nfs4_fl_calc_j_index(lseg, offset); 383 j = nfs4_fl_calc_j_index(lseg, offset);
379 idx = nfs4_fl_calc_ds_index(lseg, j); 384 idx = nfs4_fl_calc_ds_index(lseg, j);
@@ -384,9 +389,9 @@ filelayout_write_pagelist(struct nfs_write_data *data, int sync)
384 set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags); 389 set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
385 return PNFS_NOT_ATTEMPTED; 390 return PNFS_NOT_ATTEMPTED;
386 } 391 }
387 dprintk("%s ino %lu sync %d req %Zu@%llu DS:%x:%hu\n", __func__, 392 dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s\n", __func__,
388 data->inode->i_ino, sync, (size_t) data->args.count, offset, 393 data->inode->i_ino, sync, (size_t) data->args.count, offset,
389 ntohl(ds->ds_ip_addr), ntohs(ds->ds_port)); 394 ds->ds_remotestr);
390 395
391 data->write_done_cb = filelayout_write_done_cb; 396 data->write_done_cb = filelayout_write_done_cb;
392 data->ds_clp = ds->ds_clp; 397 data->ds_clp = ds->ds_clp;
@@ -428,6 +433,14 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
428 433
429 dprintk("--> %s\n", __func__); 434 dprintk("--> %s\n", __func__);
430 435
436 /* FIXME: remove this check when layout segment support is added */
437 if (lgr->range.offset != 0 ||
438 lgr->range.length != NFS4_MAX_UINT64) {
439 dprintk("%s Only whole file layouts supported. Use MDS i/o\n",
440 __func__);
441 goto out;
442 }
443
431 if (fl->pattern_offset > lgr->range.offset) { 444 if (fl->pattern_offset > lgr->range.offset) {
432 dprintk("%s pattern_offset %lld too large\n", 445 dprintk("%s pattern_offset %lld too large\n",
433 __func__, fl->pattern_offset); 446 __func__, fl->pattern_offset);
@@ -449,6 +462,10 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
449 goto out; 462 goto out;
450 } else 463 } else
451 dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node); 464 dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node);
465 /* Found deviceid is being reaped */
466 if (test_bit(NFS_DEVICEID_INVALID, &dsaddr->id_node.flags))
467 goto out_put;
468
452 fl->dsaddr = dsaddr; 469 fl->dsaddr = dsaddr;
453 470
454 if (fl->first_stripe_index < 0 || 471 if (fl->first_stripe_index < 0 ||
@@ -659,7 +676,7 @@ filelayout_alloc_lseg(struct pnfs_layout_hdr *layoutid,
659 * return true : coalesce page 676 * return true : coalesce page
660 * return false : don't coalesce page 677 * return false : don't coalesce page
661 */ 678 */
662bool 679static bool
663filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, 680filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
664 struct nfs_page *req) 681 struct nfs_page *req)
665{ 682{
@@ -670,8 +687,6 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
670 !nfs_generic_pg_test(pgio, prev, req)) 687 !nfs_generic_pg_test(pgio, prev, req))
671 return false; 688 return false;
672 689
673 if (!pgio->pg_lseg)
674 return 1;
675 p_stripe = (u64)prev->wb_index << PAGE_CACHE_SHIFT; 690 p_stripe = (u64)prev->wb_index << PAGE_CACHE_SHIFT;
676 r_stripe = (u64)req->wb_index << PAGE_CACHE_SHIFT; 691 r_stripe = (u64)req->wb_index << PAGE_CACHE_SHIFT;
677 stripe_unit = FILELAYOUT_LSEG(pgio->pg_lseg)->stripe_unit; 692 stripe_unit = FILELAYOUT_LSEG(pgio->pg_lseg)->stripe_unit;
@@ -682,6 +697,52 @@ filelayout_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
682 return (p_stripe == r_stripe); 697 return (p_stripe == r_stripe);
683} 698}
684 699
700void
701filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio,
702 struct nfs_page *req)
703{
704 BUG_ON(pgio->pg_lseg != NULL);
705
706 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
707 req->wb_context,
708 0,
709 NFS4_MAX_UINT64,
710 IOMODE_READ,
711 GFP_KERNEL);
712 /* If no lseg, fall back to read through mds */
713 if (pgio->pg_lseg == NULL)
714 nfs_pageio_reset_read_mds(pgio);
715}
716
717void
718filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
719 struct nfs_page *req)
720{
721 BUG_ON(pgio->pg_lseg != NULL);
722
723 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
724 req->wb_context,
725 0,
726 NFS4_MAX_UINT64,
727 IOMODE_RW,
728 GFP_NOFS);
729 /* If no lseg, fall back to write through mds */
730 if (pgio->pg_lseg == NULL)
731 nfs_pageio_reset_write_mds(pgio);
732}
733
734static const struct nfs_pageio_ops filelayout_pg_read_ops = {
735 .pg_init = filelayout_pg_init_read,
736 .pg_test = filelayout_pg_test,
737 .pg_doio = pnfs_generic_pg_readpages,
738};
739
740static const struct nfs_pageio_ops filelayout_pg_write_ops = {
741 .pg_init = filelayout_pg_init_write,
742 .pg_test = filelayout_pg_test,
743 .pg_doio = pnfs_generic_pg_writepages,
744};
745
685static bool filelayout_mark_pnfs_commit(struct pnfs_layout_segment *lseg) 746static bool filelayout_mark_pnfs_commit(struct pnfs_layout_segment *lseg)
686{ 747{
687 return !FILELAYOUT_LSEG(lseg)->commit_through_mds; 748 return !FILELAYOUT_LSEG(lseg)->commit_through_mds;
@@ -879,7 +940,8 @@ static struct pnfs_layoutdriver_type filelayout_type = {
879 .owner = THIS_MODULE, 940 .owner = THIS_MODULE,
880 .alloc_lseg = filelayout_alloc_lseg, 941 .alloc_lseg = filelayout_alloc_lseg,
881 .free_lseg = filelayout_free_lseg, 942 .free_lseg = filelayout_free_lseg,
882 .pg_test = filelayout_pg_test, 943 .pg_read_ops = &filelayout_pg_read_ops,
944 .pg_write_ops = &filelayout_pg_write_ops,
883 .mark_pnfs_commit = filelayout_mark_pnfs_commit, 945 .mark_pnfs_commit = filelayout_mark_pnfs_commit,
884 .choose_commit_list = filelayout_choose_commit_list, 946 .choose_commit_list = filelayout_choose_commit_list,
885 .commit_pagelist = filelayout_commit_pagelist, 947 .commit_pagelist = filelayout_commit_pagelist,
@@ -902,5 +964,7 @@ static void __exit nfs4filelayout_exit(void)
902 pnfs_unregister_layoutdriver(&filelayout_type); 964 pnfs_unregister_layoutdriver(&filelayout_type);
903} 965}
904 966
967MODULE_ALIAS("nfs-layouttype4-1");
968
905module_init(nfs4filelayout_init); 969module_init(nfs4filelayout_init);
906module_exit(nfs4filelayout_exit); 970module_exit(nfs4filelayout_exit);
diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h
index cebe01e3795e..2e42284253fa 100644
--- a/fs/nfs/nfs4filelayout.h
+++ b/fs/nfs/nfs4filelayout.h
@@ -47,10 +47,17 @@ enum stripetype4 {
47}; 47};
48 48
49/* Individual ip address */ 49/* Individual ip address */
50struct nfs4_pnfs_ds_addr {
51 struct sockaddr_storage da_addr;
52 size_t da_addrlen;
53 struct list_head da_node; /* nfs4_pnfs_dev_hlist dev_dslist */
54 char *da_remotestr; /* human readable addr+port */
55};
56
50struct nfs4_pnfs_ds { 57struct nfs4_pnfs_ds {
51 struct list_head ds_node; /* nfs4_pnfs_dev_hlist dev_dslist */ 58 struct list_head ds_node; /* nfs4_pnfs_dev_hlist dev_dslist */
52 u32 ds_ip_addr; 59 char *ds_remotestr; /* comma sep list of addrs */
53 u32 ds_port; 60 struct list_head ds_addrs;
54 struct nfs_client *ds_clp; 61 struct nfs_client *ds_clp;
55 atomic_t ds_count; 62 atomic_t ds_count;
56}; 63};
@@ -89,6 +96,12 @@ FILELAYOUT_LSEG(struct pnfs_layout_segment *lseg)
89 generic_hdr); 96 generic_hdr);
90} 97}
91 98
99static inline struct nfs4_deviceid_node *
100FILELAYOUT_DEVID_NODE(struct pnfs_layout_segment *lseg)
101{
102 return &FILELAYOUT_LSEG(lseg)->dsaddr->id_node;
103}
104
92extern struct nfs_fh * 105extern struct nfs_fh *
93nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j); 106nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j);
94 107
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
index 3b7bf1377264..ed388aae9689 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -56,54 +56,139 @@ print_ds(struct nfs4_pnfs_ds *ds)
56 printk("%s NULL device\n", __func__); 56 printk("%s NULL device\n", __func__);
57 return; 57 return;
58 } 58 }
59 printk(" ip_addr %x port %hu\n" 59 printk(" ds %s\n"
60 " ref count %d\n" 60 " ref count %d\n"
61 " client %p\n" 61 " client %p\n"
62 " cl_exchange_flags %x\n", 62 " cl_exchange_flags %x\n",
63 ntohl(ds->ds_ip_addr), ntohs(ds->ds_port), 63 ds->ds_remotestr,
64 atomic_read(&ds->ds_count), ds->ds_clp, 64 atomic_read(&ds->ds_count), ds->ds_clp,
65 ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0); 65 ds->ds_clp ? ds->ds_clp->cl_exchange_flags : 0);
66} 66}
67 67
68/* nfs4_ds_cache_lock is held */ 68static bool
69static struct nfs4_pnfs_ds * 69same_sockaddr(struct sockaddr *addr1, struct sockaddr *addr2)
70_data_server_lookup_locked(u32 ip_addr, u32 port)
71{ 70{
72 struct nfs4_pnfs_ds *ds; 71 struct sockaddr_in *a, *b;
72 struct sockaddr_in6 *a6, *b6;
73
74 if (addr1->sa_family != addr2->sa_family)
75 return false;
76
77 switch (addr1->sa_family) {
78 case AF_INET:
79 a = (struct sockaddr_in *)addr1;
80 b = (struct sockaddr_in *)addr2;
81
82 if (a->sin_addr.s_addr == b->sin_addr.s_addr &&
83 a->sin_port == b->sin_port)
84 return true;
85 break;
86
87 case AF_INET6:
88 a6 = (struct sockaddr_in6 *)addr1;
89 b6 = (struct sockaddr_in6 *)addr2;
90
91 /* LINKLOCAL addresses must have matching scope_id */
92 if (ipv6_addr_scope(&a6->sin6_addr) ==
93 IPV6_ADDR_SCOPE_LINKLOCAL &&
94 a6->sin6_scope_id != b6->sin6_scope_id)
95 return false;
96
97 if (ipv6_addr_equal(&a6->sin6_addr, &b6->sin6_addr) &&
98 a6->sin6_port == b6->sin6_port)
99 return true;
100 break;
101
102 default:
103 dprintk("%s: unhandled address family: %u\n",
104 __func__, addr1->sa_family);
105 return false;
106 }
73 107
74 dprintk("_data_server_lookup: ip_addr=%x port=%hu\n", 108 return false;
75 ntohl(ip_addr), ntohs(port)); 109}
76 110
77 list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) { 111/*
78 if (ds->ds_ip_addr == ip_addr && 112 * Lookup DS by addresses. The first matching address returns true.
79 ds->ds_port == port) { 113 * nfs4_ds_cache_lock is held
80 return ds; 114 */
115static struct nfs4_pnfs_ds *
116_data_server_lookup_locked(struct list_head *dsaddrs)
117{
118 struct nfs4_pnfs_ds *ds;
119 struct nfs4_pnfs_ds_addr *da1, *da2;
120
121 list_for_each_entry(da1, dsaddrs, da_node) {
122 list_for_each_entry(ds, &nfs4_data_server_cache, ds_node) {
123 list_for_each_entry(da2, &ds->ds_addrs, da_node) {
124 if (same_sockaddr(
125 (struct sockaddr *)&da1->da_addr,
126 (struct sockaddr *)&da2->da_addr))
127 return ds;
128 }
81 } 129 }
82 } 130 }
83 return NULL; 131 return NULL;
84} 132}
85 133
86/* 134/*
135 * Compare two lists of addresses.
136 */
137static bool
138_data_server_match_all_addrs_locked(struct list_head *dsaddrs1,
139 struct list_head *dsaddrs2)
140{
141 struct nfs4_pnfs_ds_addr *da1, *da2;
142 size_t count1 = 0,
143 count2 = 0;
144
145 list_for_each_entry(da1, dsaddrs1, da_node)
146 count1++;
147
148 list_for_each_entry(da2, dsaddrs2, da_node) {
149 bool found = false;
150 count2++;
151 list_for_each_entry(da1, dsaddrs1, da_node) {
152 if (same_sockaddr((struct sockaddr *)&da1->da_addr,
153 (struct sockaddr *)&da2->da_addr)) {
154 found = true;
155 break;
156 }
157 }
158 if (!found)
159 return false;
160 }
161
162 return (count1 == count2);
163}
164
165/*
87 * Create an rpc connection to the nfs4_pnfs_ds data server 166 * Create an rpc connection to the nfs4_pnfs_ds data server
88 * Currently only support IPv4 167 * Currently only supports IPv4 and IPv6 addresses
89 */ 168 */
90static int 169static int
91nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds) 170nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
92{ 171{
93 struct nfs_client *clp; 172 struct nfs_client *clp = ERR_PTR(-EIO);
94 struct sockaddr_in sin; 173 struct nfs4_pnfs_ds_addr *da;
95 int status = 0; 174 int status = 0;
96 175
97 dprintk("--> %s ip:port %x:%hu au_flavor %d\n", __func__, 176 dprintk("--> %s DS %s au_flavor %d\n", __func__, ds->ds_remotestr,
98 ntohl(ds->ds_ip_addr), ntohs(ds->ds_port),
99 mds_srv->nfs_client->cl_rpcclient->cl_auth->au_flavor); 177 mds_srv->nfs_client->cl_rpcclient->cl_auth->au_flavor);
100 178
101 sin.sin_family = AF_INET; 179 BUG_ON(list_empty(&ds->ds_addrs));
102 sin.sin_addr.s_addr = ds->ds_ip_addr; 180
103 sin.sin_port = ds->ds_port; 181 list_for_each_entry(da, &ds->ds_addrs, da_node) {
182 dprintk("%s: DS %s: trying address %s\n",
183 __func__, ds->ds_remotestr, da->da_remotestr);
184
185 clp = nfs4_set_ds_client(mds_srv->nfs_client,
186 (struct sockaddr *)&da->da_addr,
187 da->da_addrlen, IPPROTO_TCP);
188 if (!IS_ERR(clp))
189 break;
190 }
104 191
105 clp = nfs4_set_ds_client(mds_srv->nfs_client, (struct sockaddr *)&sin,
106 sizeof(sin), IPPROTO_TCP);
107 if (IS_ERR(clp)) { 192 if (IS_ERR(clp)) {
108 status = PTR_ERR(clp); 193 status = PTR_ERR(clp);
109 goto out; 194 goto out;
@@ -115,8 +200,8 @@ nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
115 goto out_put; 200 goto out_put;
116 } 201 }
117 ds->ds_clp = clp; 202 ds->ds_clp = clp;
118 dprintk("%s [existing] ip=%x, port=%hu\n", __func__, 203 dprintk("%s [existing] server=%s\n", __func__,
119 ntohl(ds->ds_ip_addr), ntohs(ds->ds_port)); 204 ds->ds_remotestr);
120 goto out; 205 goto out;
121 } 206 }
122 207
@@ -135,8 +220,7 @@ nfs4_ds_connect(struct nfs_server *mds_srv, struct nfs4_pnfs_ds *ds)
135 goto out_put; 220 goto out_put;
136 221
137 ds->ds_clp = clp; 222 ds->ds_clp = clp;
138 dprintk("%s [new] ip=%x, port=%hu\n", __func__, ntohl(ds->ds_ip_addr), 223 dprintk("%s [new] addr: %s\n", __func__, ds->ds_remotestr);
139 ntohs(ds->ds_port));
140out: 224out:
141 return status; 225 return status;
142out_put: 226out_put:
@@ -147,12 +231,25 @@ out_put:
147static void 231static void
148destroy_ds(struct nfs4_pnfs_ds *ds) 232destroy_ds(struct nfs4_pnfs_ds *ds)
149{ 233{
234 struct nfs4_pnfs_ds_addr *da;
235
150 dprintk("--> %s\n", __func__); 236 dprintk("--> %s\n", __func__);
151 ifdebug(FACILITY) 237 ifdebug(FACILITY)
152 print_ds(ds); 238 print_ds(ds);
153 239
154 if (ds->ds_clp) 240 if (ds->ds_clp)
155 nfs_put_client(ds->ds_clp); 241 nfs_put_client(ds->ds_clp);
242
243 while (!list_empty(&ds->ds_addrs)) {
244 da = list_first_entry(&ds->ds_addrs,
245 struct nfs4_pnfs_ds_addr,
246 da_node);
247 list_del_init(&da->da_node);
248 kfree(da->da_remotestr);
249 kfree(da);
250 }
251
252 kfree(ds->ds_remotestr);
156 kfree(ds); 253 kfree(ds);
157} 254}
158 255
@@ -179,31 +276,96 @@ nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr)
179 kfree(dsaddr); 276 kfree(dsaddr);
180} 277}
181 278
279/*
280 * Create a string with a human readable address and port to avoid
281 * complicated setup around many dprinks.
282 */
283static char *
284nfs4_pnfs_remotestr(struct list_head *dsaddrs, gfp_t gfp_flags)
285{
286 struct nfs4_pnfs_ds_addr *da;
287 char *remotestr;
288 size_t len;
289 char *p;
290
291 len = 3; /* '{', '}' and eol */
292 list_for_each_entry(da, dsaddrs, da_node) {
293 len += strlen(da->da_remotestr) + 1; /* string plus comma */
294 }
295
296 remotestr = kzalloc(len, gfp_flags);
297 if (!remotestr)
298 return NULL;
299
300 p = remotestr;
301 *(p++) = '{';
302 len--;
303 list_for_each_entry(da, dsaddrs, da_node) {
304 size_t ll = strlen(da->da_remotestr);
305
306 if (ll > len)
307 goto out_err;
308
309 memcpy(p, da->da_remotestr, ll);
310 p += ll;
311 len -= ll;
312
313 if (len < 1)
314 goto out_err;
315 (*p++) = ',';
316 len--;
317 }
318 if (len < 2)
319 goto out_err;
320 *(p++) = '}';
321 *p = '\0';
322 return remotestr;
323out_err:
324 kfree(remotestr);
325 return NULL;
326}
327
182static struct nfs4_pnfs_ds * 328static struct nfs4_pnfs_ds *
183nfs4_pnfs_ds_add(struct inode *inode, u32 ip_addr, u32 port, gfp_t gfp_flags) 329nfs4_pnfs_ds_add(struct list_head *dsaddrs, gfp_t gfp_flags)
184{ 330{
185 struct nfs4_pnfs_ds *tmp_ds, *ds; 331 struct nfs4_pnfs_ds *tmp_ds, *ds = NULL;
332 char *remotestr;
186 333
187 ds = kzalloc(sizeof(*tmp_ds), gfp_flags); 334 if (list_empty(dsaddrs)) {
335 dprintk("%s: no addresses defined\n", __func__);
336 goto out;
337 }
338
339 ds = kzalloc(sizeof(*ds), gfp_flags);
188 if (!ds) 340 if (!ds)
189 goto out; 341 goto out;
190 342
343 /* this is only used for debugging, so it's ok if its NULL */
344 remotestr = nfs4_pnfs_remotestr(dsaddrs, gfp_flags);
345
191 spin_lock(&nfs4_ds_cache_lock); 346 spin_lock(&nfs4_ds_cache_lock);
192 tmp_ds = _data_server_lookup_locked(ip_addr, port); 347 tmp_ds = _data_server_lookup_locked(dsaddrs);
193 if (tmp_ds == NULL) { 348 if (tmp_ds == NULL) {
194 ds->ds_ip_addr = ip_addr; 349 INIT_LIST_HEAD(&ds->ds_addrs);
195 ds->ds_port = port; 350 list_splice_init(dsaddrs, &ds->ds_addrs);
351 ds->ds_remotestr = remotestr;
196 atomic_set(&ds->ds_count, 1); 352 atomic_set(&ds->ds_count, 1);
197 INIT_LIST_HEAD(&ds->ds_node); 353 INIT_LIST_HEAD(&ds->ds_node);
198 ds->ds_clp = NULL; 354 ds->ds_clp = NULL;
199 list_add(&ds->ds_node, &nfs4_data_server_cache); 355 list_add(&ds->ds_node, &nfs4_data_server_cache);
200 dprintk("%s add new data server ip 0x%x\n", __func__, 356 dprintk("%s add new data server %s\n", __func__,
201 ds->ds_ip_addr); 357 ds->ds_remotestr);
202 } else { 358 } else {
359 if (!_data_server_match_all_addrs_locked(&tmp_ds->ds_addrs,
360 dsaddrs)) {
361 dprintk("%s: multipath address mismatch: %s != %s",
362 __func__, tmp_ds->ds_remotestr, remotestr);
363 }
364 kfree(remotestr);
203 kfree(ds); 365 kfree(ds);
204 atomic_inc(&tmp_ds->ds_count); 366 atomic_inc(&tmp_ds->ds_count);
205 dprintk("%s data server found ip 0x%x, inc'ed ds_count to %d\n", 367 dprintk("%s data server %s found, inc'ed ds_count to %d\n",
206 __func__, tmp_ds->ds_ip_addr, 368 __func__, tmp_ds->ds_remotestr,
207 atomic_read(&tmp_ds->ds_count)); 369 atomic_read(&tmp_ds->ds_count));
208 ds = tmp_ds; 370 ds = tmp_ds;
209 } 371 }
@@ -213,18 +375,22 @@ out:
213} 375}
214 376
215/* 377/*
216 * Currently only support ipv4, and one multi-path address. 378 * Currently only supports ipv4, ipv6 and one multi-path address.
217 */ 379 */
218static struct nfs4_pnfs_ds * 380static struct nfs4_pnfs_ds_addr *
219decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode, gfp_t gfp_flags) 381decode_ds_addr(struct xdr_stream *streamp, gfp_t gfp_flags)
220{ 382{
221 struct nfs4_pnfs_ds *ds = NULL; 383 struct nfs4_pnfs_ds_addr *da = NULL;
222 char *buf; 384 char *buf, *portstr;
223 const char *ipend, *pstr; 385 u32 port;
224 u32 ip_addr, port; 386 int nlen, rlen;
225 int nlen, rlen, i;
226 int tmp[2]; 387 int tmp[2];
227 __be32 *p; 388 __be32 *p;
389 char *netid, *match_netid;
390 size_t len, match_netid_len;
391 char *startsep = "";
392 char *endsep = "";
393
228 394
229 /* r_netid */ 395 /* r_netid */
230 p = xdr_inline_decode(streamp, 4); 396 p = xdr_inline_decode(streamp, 4);
@@ -236,64 +402,123 @@ decode_and_add_ds(struct xdr_stream *streamp, struct inode *inode, gfp_t gfp_fla
236 if (unlikely(!p)) 402 if (unlikely(!p))
237 goto out_err; 403 goto out_err;
238 404
239 /* Check that netid is "tcp" */ 405 netid = kmalloc(nlen+1, gfp_flags);
240 if (nlen != 3 || memcmp((char *)p, "tcp", 3)) { 406 if (unlikely(!netid))
241 dprintk("%s: ERROR: non ipv4 TCP r_netid\n", __func__);
242 goto out_err; 407 goto out_err;
243 }
244 408
245 /* r_addr */ 409 netid[nlen] = '\0';
410 memcpy(netid, p, nlen);
411
412 /* r_addr: ip/ip6addr with port in dec octets - see RFC 5665 */
246 p = xdr_inline_decode(streamp, 4); 413 p = xdr_inline_decode(streamp, 4);
247 if (unlikely(!p)) 414 if (unlikely(!p))
248 goto out_err; 415 goto out_free_netid;
249 rlen = be32_to_cpup(p); 416 rlen = be32_to_cpup(p);
250 417
251 p = xdr_inline_decode(streamp, rlen); 418 p = xdr_inline_decode(streamp, rlen);
252 if (unlikely(!p)) 419 if (unlikely(!p))
253 goto out_err; 420 goto out_free_netid;
254 421
255 /* ipv6 length plus port is legal */ 422 /* port is ".ABC.DEF", 8 chars max */
256 if (rlen > INET6_ADDRSTRLEN + 8) { 423 if (rlen > INET6_ADDRSTRLEN + IPV6_SCOPE_ID_LEN + 8) {
257 dprintk("%s: Invalid address, length %d\n", __func__, 424 dprintk("%s: Invalid address, length %d\n", __func__,
258 rlen); 425 rlen);
259 goto out_err; 426 goto out_free_netid;
260 } 427 }
261 buf = kmalloc(rlen + 1, gfp_flags); 428 buf = kmalloc(rlen + 1, gfp_flags);
262 if (!buf) { 429 if (!buf) {
263 dprintk("%s: Not enough memory\n", __func__); 430 dprintk("%s: Not enough memory\n", __func__);
264 goto out_err; 431 goto out_free_netid;
265 } 432 }
266 buf[rlen] = '\0'; 433 buf[rlen] = '\0';
267 memcpy(buf, p, rlen); 434 memcpy(buf, p, rlen);
268 435
269 /* replace the port dots with dashes for the in4_pton() delimiter*/ 436 /* replace port '.' with '-' */
270 for (i = 0; i < 2; i++) { 437 portstr = strrchr(buf, '.');
271 char *res = strrchr(buf, '.'); 438 if (!portstr) {
272 if (!res) { 439 dprintk("%s: Failed finding expected dot in port\n",
273 dprintk("%s: Failed finding expected dots in port\n", 440 __func__);
274 __func__); 441 goto out_free_buf;
275 goto out_free; 442 }
276 } 443 *portstr = '-';
277 *res = '-'; 444
445 /* find '.' between address and port */
446 portstr = strrchr(buf, '.');
447 if (!portstr) {
448 dprintk("%s: Failed finding expected dot between address and "
449 "port\n", __func__);
450 goto out_free_buf;
278 } 451 }
452 *portstr = '\0';
279 453
280 /* Currently only support ipv4 address */ 454 da = kzalloc(sizeof(*da), gfp_flags);
281 if (in4_pton(buf, rlen, (u8 *)&ip_addr, '-', &ipend) == 0) { 455 if (unlikely(!da))
282 dprintk("%s: Only ipv4 addresses supported\n", __func__); 456 goto out_free_buf;
283 goto out_free; 457
458 INIT_LIST_HEAD(&da->da_node);
459
460 if (!rpc_pton(buf, portstr-buf, (struct sockaddr *)&da->da_addr,
461 sizeof(da->da_addr))) {
462 dprintk("%s: error parsing address %s\n", __func__, buf);
463 goto out_free_da;
284 } 464 }
285 465
286 /* port */ 466 portstr++;
287 pstr = ipend; 467 sscanf(portstr, "%d-%d", &tmp[0], &tmp[1]);
288 sscanf(pstr, "-%d-%d", &tmp[0], &tmp[1]);
289 port = htons((tmp[0] << 8) | (tmp[1])); 468 port = htons((tmp[0] << 8) | (tmp[1]));
290 469
291 ds = nfs4_pnfs_ds_add(inode, ip_addr, port, gfp_flags); 470 switch (da->da_addr.ss_family) {
292 dprintk("%s: Decoded address and port %s\n", __func__, buf); 471 case AF_INET:
293out_free: 472 ((struct sockaddr_in *)&da->da_addr)->sin_port = port;
473 da->da_addrlen = sizeof(struct sockaddr_in);
474 match_netid = "tcp";
475 match_netid_len = 3;
476 break;
477
478 case AF_INET6:
479 ((struct sockaddr_in6 *)&da->da_addr)->sin6_port = port;
480 da->da_addrlen = sizeof(struct sockaddr_in6);
481 match_netid = "tcp6";
482 match_netid_len = 4;
483 startsep = "[";
484 endsep = "]";
485 break;
486
487 default:
488 dprintk("%s: unsupported address family: %u\n",
489 __func__, da->da_addr.ss_family);
490 goto out_free_da;
491 }
492
493 if (nlen != match_netid_len || strncmp(netid, match_netid, nlen)) {
494 dprintk("%s: ERROR: r_netid \"%s\" != \"%s\"\n",
495 __func__, netid, match_netid);
496 goto out_free_da;
497 }
498
499 /* save human readable address */
500 len = strlen(startsep) + strlen(buf) + strlen(endsep) + 7;
501 da->da_remotestr = kzalloc(len, gfp_flags);
502
503 /* NULL is ok, only used for dprintk */
504 if (da->da_remotestr)
505 snprintf(da->da_remotestr, len, "%s%s%s:%u", startsep,
506 buf, endsep, ntohs(port));
507
508 dprintk("%s: Parsed DS addr %s\n", __func__, da->da_remotestr);
294 kfree(buf); 509 kfree(buf);
510 kfree(netid);
511 return da;
512
513out_free_da:
514 kfree(da);
515out_free_buf:
516 dprintk("%s: Error parsing DS addr: %s\n", __func__, buf);
517 kfree(buf);
518out_free_netid:
519 kfree(netid);
295out_err: 520out_err:
296 return ds; 521 return NULL;
297} 522}
298 523
299/* Decode opaque device data and return the result */ 524/* Decode opaque device data and return the result */
@@ -310,6 +535,8 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags)
310 struct xdr_stream stream; 535 struct xdr_stream stream;
311 struct xdr_buf buf; 536 struct xdr_buf buf;
312 struct page *scratch; 537 struct page *scratch;
538 struct list_head dsaddrs;
539 struct nfs4_pnfs_ds_addr *da;
313 540
314 /* set up xdr stream */ 541 /* set up xdr stream */
315 scratch = alloc_page(gfp_flags); 542 scratch = alloc_page(gfp_flags);
@@ -386,6 +613,8 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags)
386 NFS_SERVER(ino)->nfs_client, 613 NFS_SERVER(ino)->nfs_client,
387 &pdev->dev_id); 614 &pdev->dev_id);
388 615
616 INIT_LIST_HEAD(&dsaddrs);
617
389 for (i = 0; i < dsaddr->ds_num; i++) { 618 for (i = 0; i < dsaddr->ds_num; i++) {
390 int j; 619 int j;
391 u32 mp_count; 620 u32 mp_count;
@@ -395,48 +624,43 @@ decode_device(struct inode *ino, struct pnfs_device *pdev, gfp_t gfp_flags)
395 goto out_err_free_deviceid; 624 goto out_err_free_deviceid;
396 625
397 mp_count = be32_to_cpup(p); /* multipath count */ 626 mp_count = be32_to_cpup(p); /* multipath count */
398 if (mp_count > 1) {
399 printk(KERN_WARNING
400 "%s: Multipath count %d not supported, "
401 "skipping all greater than 1\n", __func__,
402 mp_count);
403 }
404 for (j = 0; j < mp_count; j++) { 627 for (j = 0; j < mp_count; j++) {
405 if (j == 0) { 628 da = decode_ds_addr(&stream, gfp_flags);
406 dsaddr->ds_list[i] = decode_and_add_ds(&stream, 629 if (da)
407 ino, gfp_flags); 630 list_add_tail(&da->da_node, &dsaddrs);
408 if (dsaddr->ds_list[i] == NULL) 631 }
409 goto out_err_free_deviceid; 632 if (list_empty(&dsaddrs)) {
410 } else { 633 dprintk("%s: no suitable DS addresses found\n",
411 u32 len; 634 __func__);
412 /* skip extra multipath */ 635 goto out_err_free_deviceid;
413 636 }
414 /* read len, skip */ 637
415 p = xdr_inline_decode(&stream, 4); 638 dsaddr->ds_list[i] = nfs4_pnfs_ds_add(&dsaddrs, gfp_flags);
416 if (unlikely(!p)) 639 if (!dsaddr->ds_list[i])
417 goto out_err_free_deviceid; 640 goto out_err_drain_dsaddrs;
418 len = be32_to_cpup(p); 641
419 642 /* If DS was already in cache, free ds addrs */
420 p = xdr_inline_decode(&stream, len); 643 while (!list_empty(&dsaddrs)) {
421 if (unlikely(!p)) 644 da = list_first_entry(&dsaddrs,
422 goto out_err_free_deviceid; 645 struct nfs4_pnfs_ds_addr,
423 646 da_node);
424 /* read len, skip */ 647 list_del_init(&da->da_node);
425 p = xdr_inline_decode(&stream, 4); 648 kfree(da->da_remotestr);
426 if (unlikely(!p)) 649 kfree(da);
427 goto out_err_free_deviceid;
428 len = be32_to_cpup(p);
429
430 p = xdr_inline_decode(&stream, len);
431 if (unlikely(!p))
432 goto out_err_free_deviceid;
433 }
434 } 650 }
435 } 651 }
436 652
437 __free_page(scratch); 653 __free_page(scratch);
438 return dsaddr; 654 return dsaddr;
439 655
656out_err_drain_dsaddrs:
657 while (!list_empty(&dsaddrs)) {
658 da = list_first_entry(&dsaddrs, struct nfs4_pnfs_ds_addr,
659 da_node);
660 list_del_init(&da->da_node);
661 kfree(da->da_remotestr);
662 kfree(da);
663 }
440out_err_free_deviceid: 664out_err_free_deviceid:
441 nfs4_fl_free_deviceid(dsaddr); 665 nfs4_fl_free_deviceid(dsaddr);
442 /* stripe_indicies was part of dsaddr */ 666 /* stripe_indicies was part of dsaddr */
@@ -591,13 +815,13 @@ nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j)
591 815
592static void 816static void
593filelayout_mark_devid_negative(struct nfs4_file_layout_dsaddr *dsaddr, 817filelayout_mark_devid_negative(struct nfs4_file_layout_dsaddr *dsaddr,
594 int err, u32 ds_addr) 818 int err, const char *ds_remotestr)
595{ 819{
596 u32 *p = (u32 *)&dsaddr->id_node.deviceid; 820 u32 *p = (u32 *)&dsaddr->id_node.deviceid;
597 821
598 printk(KERN_ERR "NFS: data server %x connection error %d." 822 printk(KERN_ERR "NFS: data server %s connection error %d."
599 " Deviceid [%x%x%x%x] marked out of use.\n", 823 " Deviceid [%x%x%x%x] marked out of use.\n",
600 ds_addr, err, p[0], p[1], p[2], p[3]); 824 ds_remotestr, err, p[0], p[1], p[2], p[3]);
601 825
602 spin_lock(&nfs4_ds_cache_lock); 826 spin_lock(&nfs4_ds_cache_lock);
603 dsaddr->flags |= NFS4_DEVICE_ID_NEG_ENTRY; 827 dsaddr->flags |= NFS4_DEVICE_ID_NEG_ENTRY;
@@ -628,7 +852,7 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
628 err = nfs4_ds_connect(s, ds); 852 err = nfs4_ds_connect(s, ds);
629 if (err) { 853 if (err) {
630 filelayout_mark_devid_negative(dsaddr, err, 854 filelayout_mark_devid_negative(dsaddr, err,
631 ntohl(ds->ds_ip_addr)); 855 ds->ds_remotestr);
632 return NULL; 856 return NULL;
633 } 857 }
634 } 858 }
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 26bece8f3083..079614deca3f 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -80,7 +80,10 @@ static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle,
80static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, 80static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
81 struct nfs_fattr *fattr, struct iattr *sattr, 81 struct nfs_fattr *fattr, struct iattr *sattr,
82 struct nfs4_state *state); 82 struct nfs4_state *state);
83 83#ifdef CONFIG_NFS_V4_1
84static int nfs41_test_stateid(struct nfs_server *, struct nfs4_state *);
85static int nfs41_free_stateid(struct nfs_server *, struct nfs4_state *);
86#endif
84/* Prevent leaks of NFSv4 errors into userland */ 87/* Prevent leaks of NFSv4 errors into userland */
85static int nfs4_map_errors(int err) 88static int nfs4_map_errors(int err)
86{ 89{
@@ -1689,6 +1692,20 @@ static int nfs4_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *sta
1689 return ret; 1692 return ret;
1690} 1693}
1691 1694
1695#if defined(CONFIG_NFS_V4_1)
1696static int nfs41_open_expired(struct nfs4_state_owner *sp, struct nfs4_state *state)
1697{
1698 int status;
1699 struct nfs_server *server = NFS_SERVER(state->inode);
1700
1701 status = nfs41_test_stateid(server, state);
1702 if (status == NFS_OK)
1703 return 0;
1704 nfs41_free_stateid(server, state);
1705 return nfs4_open_expired(sp, state);
1706}
1707#endif
1708
1692/* 1709/*
1693 * on an EXCLUSIVE create, the server should send back a bitmask with FATTR4-* 1710 * on an EXCLUSIVE create, the server should send back a bitmask with FATTR4-*
1694 * fields corresponding to attributes that were used to store the verifier. 1711 * fields corresponding to attributes that were used to store the verifier.
@@ -2252,13 +2269,14 @@ static int nfs4_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle,
2252static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, 2269static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle,
2253 struct nfs_fsinfo *info) 2270 struct nfs_fsinfo *info)
2254{ 2271{
2272 int minor_version = server->nfs_client->cl_minorversion;
2255 int status = nfs4_lookup_root(server, fhandle, info); 2273 int status = nfs4_lookup_root(server, fhandle, info);
2256 if ((status == -NFS4ERR_WRONGSEC) && !(server->flags & NFS_MOUNT_SECFLAVOUR)) 2274 if ((status == -NFS4ERR_WRONGSEC) && !(server->flags & NFS_MOUNT_SECFLAVOUR))
2257 /* 2275 /*
2258 * A status of -NFS4ERR_WRONGSEC will be mapped to -EPERM 2276 * A status of -NFS4ERR_WRONGSEC will be mapped to -EPERM
2259 * by nfs4_map_errors() as this function exits. 2277 * by nfs4_map_errors() as this function exits.
2260 */ 2278 */
2261 status = nfs4_find_root_sec(server, fhandle, info); 2279 status = nfs_v4_minor_ops[minor_version]->find_root_sec(server, fhandle, info);
2262 if (status == 0) 2280 if (status == 0)
2263 status = nfs4_server_capabilities(server, fhandle); 2281 status = nfs4_server_capabilities(server, fhandle);
2264 if (status == 0) 2282 if (status == 0)
@@ -4441,6 +4459,20 @@ out:
4441 return err; 4459 return err;
4442} 4460}
4443 4461
4462#if defined(CONFIG_NFS_V4_1)
4463static int nfs41_lock_expired(struct nfs4_state *state, struct file_lock *request)
4464{
4465 int status;
4466 struct nfs_server *server = NFS_SERVER(state->inode);
4467
4468 status = nfs41_test_stateid(server, state);
4469 if (status == NFS_OK)
4470 return 0;
4471 nfs41_free_stateid(server, state);
4472 return nfs4_lock_expired(state, request);
4473}
4474#endif
4475
4444static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request) 4476static int _nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock *request)
4445{ 4477{
4446 struct nfs_inode *nfsi = NFS_I(state->inode); 4478 struct nfs_inode *nfsi = NFS_I(state->inode);
@@ -4779,6 +4811,16 @@ out_inval:
4779 return -NFS4ERR_INVAL; 4811 return -NFS4ERR_INVAL;
4780} 4812}
4781 4813
4814static bool
4815nfs41_same_server_scope(struct server_scope *a, struct server_scope *b)
4816{
4817 if (a->server_scope_sz == b->server_scope_sz &&
4818 memcmp(a->server_scope, b->server_scope, a->server_scope_sz) == 0)
4819 return true;
4820
4821 return false;
4822}
4823
4782/* 4824/*
4783 * nfs4_proc_exchange_id() 4825 * nfs4_proc_exchange_id()
4784 * 4826 *
@@ -4821,9 +4863,31 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
4821 init_utsname()->domainname, 4863 init_utsname()->domainname,
4822 clp->cl_rpcclient->cl_auth->au_flavor); 4864 clp->cl_rpcclient->cl_auth->au_flavor);
4823 4865
4866 res.server_scope = kzalloc(sizeof(struct server_scope), GFP_KERNEL);
4867 if (unlikely(!res.server_scope))
4868 return -ENOMEM;
4869
4824 status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT); 4870 status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
4825 if (!status) 4871 if (!status)
4826 status = nfs4_check_cl_exchange_flags(clp->cl_exchange_flags); 4872 status = nfs4_check_cl_exchange_flags(clp->cl_exchange_flags);
4873
4874 if (!status) {
4875 if (clp->server_scope &&
4876 !nfs41_same_server_scope(clp->server_scope,
4877 res.server_scope)) {
4878 dprintk("%s: server_scope mismatch detected\n",
4879 __func__);
4880 set_bit(NFS4CLNT_SERVER_SCOPE_MISMATCH, &clp->cl_state);
4881 kfree(clp->server_scope);
4882 clp->server_scope = NULL;
4883 }
4884
4885 if (!clp->server_scope)
4886 clp->server_scope = res.server_scope;
4887 else
4888 kfree(res.server_scope);
4889 }
4890
4827 dprintk("<-- %s status= %d\n", __func__, status); 4891 dprintk("<-- %s status= %d\n", __func__, status);
4828 return status; 4892 return status;
4829} 4893}
@@ -5704,7 +5768,7 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
5704{ 5768{
5705 struct nfs4_layoutreturn *lrp = calldata; 5769 struct nfs4_layoutreturn *lrp = calldata;
5706 struct nfs_server *server; 5770 struct nfs_server *server;
5707 struct pnfs_layout_hdr *lo = NFS_I(lrp->args.inode)->layout; 5771 struct pnfs_layout_hdr *lo = lrp->args.layout;
5708 5772
5709 dprintk("--> %s\n", __func__); 5773 dprintk("--> %s\n", __func__);
5710 5774
@@ -5733,7 +5797,7 @@ static void nfs4_layoutreturn_release(void *calldata)
5733 struct nfs4_layoutreturn *lrp = calldata; 5797 struct nfs4_layoutreturn *lrp = calldata;
5734 5798
5735 dprintk("--> %s\n", __func__); 5799 dprintk("--> %s\n", __func__);
5736 put_layout_hdr(NFS_I(lrp->args.inode)->layout); 5800 put_layout_hdr(lrp->args.layout);
5737 kfree(calldata); 5801 kfree(calldata);
5738 dprintk("<-- %s\n", __func__); 5802 dprintk("<-- %s\n", __func__);
5739} 5803}
@@ -5901,6 +5965,143 @@ out:
5901 rpc_put_task(task); 5965 rpc_put_task(task);
5902 return status; 5966 return status;
5903} 5967}
5968
5969static int
5970_nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle,
5971 struct nfs_fsinfo *info, struct nfs4_secinfo_flavors *flavors)
5972{
5973 struct nfs41_secinfo_no_name_args args = {
5974 .style = SECINFO_STYLE_CURRENT_FH,
5975 };
5976 struct nfs4_secinfo_res res = {
5977 .flavors = flavors,
5978 };
5979 struct rpc_message msg = {
5980 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SECINFO_NO_NAME],
5981 .rpc_argp = &args,
5982 .rpc_resp = &res,
5983 };
5984 return nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
5985}
5986
5987static int
5988nfs41_proc_secinfo_no_name(struct nfs_server *server, struct nfs_fh *fhandle,
5989 struct nfs_fsinfo *info, struct nfs4_secinfo_flavors *flavors)
5990{
5991 struct nfs4_exception exception = { };
5992 int err;
5993 do {
5994 err = _nfs41_proc_secinfo_no_name(server, fhandle, info, flavors);
5995 switch (err) {
5996 case 0:
5997 case -NFS4ERR_WRONGSEC:
5998 case -NFS4ERR_NOTSUPP:
5999 break;
6000 default:
6001 err = nfs4_handle_exception(server, err, &exception);
6002 }
6003 } while (exception.retry);
6004 return err;
6005}
6006
6007static int
6008nfs41_find_root_sec(struct nfs_server *server, struct nfs_fh *fhandle,
6009 struct nfs_fsinfo *info)
6010{
6011 int err;
6012 struct page *page;
6013 rpc_authflavor_t flavor;
6014 struct nfs4_secinfo_flavors *flavors;
6015
6016 page = alloc_page(GFP_KERNEL);
6017 if (!page) {
6018 err = -ENOMEM;
6019 goto out;
6020 }
6021
6022 flavors = page_address(page);
6023 err = nfs41_proc_secinfo_no_name(server, fhandle, info, flavors);
6024
6025 /*
6026 * Fall back on "guess and check" method if
6027 * the server doesn't support SECINFO_NO_NAME
6028 */
6029 if (err == -NFS4ERR_WRONGSEC || err == -NFS4ERR_NOTSUPP) {
6030 err = nfs4_find_root_sec(server, fhandle, info);
6031 goto out_freepage;
6032 }
6033 if (err)
6034 goto out_freepage;
6035
6036 flavor = nfs_find_best_sec(flavors);
6037 if (err == 0)
6038 err = nfs4_lookup_root_sec(server, fhandle, info, flavor);
6039
6040out_freepage:
6041 put_page(page);
6042 if (err == -EACCES)
6043 return -EPERM;
6044out:
6045 return err;
6046}
6047static int _nfs41_test_stateid(struct nfs_server *server, struct nfs4_state *state)
6048{
6049 int status;
6050 struct nfs41_test_stateid_args args = {
6051 .stateid = &state->stateid,
6052 };
6053 struct nfs41_test_stateid_res res;
6054 struct rpc_message msg = {
6055 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_TEST_STATEID],
6056 .rpc_argp = &args,
6057 .rpc_resp = &res,
6058 };
6059 args.seq_args.sa_session = res.seq_res.sr_session = NULL;
6060 status = nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 0, 1);
6061 return status;
6062}
6063
6064static int nfs41_test_stateid(struct nfs_server *server, struct nfs4_state *state)
6065{
6066 struct nfs4_exception exception = { };
6067 int err;
6068 do {
6069 err = nfs4_handle_exception(server,
6070 _nfs41_test_stateid(server, state),
6071 &exception);
6072 } while (exception.retry);
6073 return err;
6074}
6075
6076static int _nfs4_free_stateid(struct nfs_server *server, struct nfs4_state *state)
6077{
6078 int status;
6079 struct nfs41_free_stateid_args args = {
6080 .stateid = &state->stateid,
6081 };
6082 struct nfs41_free_stateid_res res;
6083 struct rpc_message msg = {
6084 .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_FREE_STATEID],
6085 .rpc_argp = &args,
6086 .rpc_resp = &res,
6087 };
6088
6089 args.seq_args.sa_session = res.seq_res.sr_session = NULL;
6090 status = nfs4_call_sync_sequence(server->client, server, &msg, &args.seq_args, &res.seq_res, 0, 1);
6091 return status;
6092}
6093
6094static int nfs41_free_stateid(struct nfs_server *server, struct nfs4_state *state)
6095{
6096 struct nfs4_exception exception = { };
6097 int err;
6098 do {
6099 err = nfs4_handle_exception(server,
6100 _nfs4_free_stateid(server, state),
6101 &exception);
6102 } while (exception.retry);
6103 return err;
6104}
5904#endif /* CONFIG_NFS_V4_1 */ 6105#endif /* CONFIG_NFS_V4_1 */
5905 6106
5906struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = { 6107struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = {
@@ -5937,8 +6138,8 @@ struct nfs4_state_recovery_ops nfs40_nograce_recovery_ops = {
5937struct nfs4_state_recovery_ops nfs41_nograce_recovery_ops = { 6138struct nfs4_state_recovery_ops nfs41_nograce_recovery_ops = {
5938 .owner_flag_bit = NFS_OWNER_RECLAIM_NOGRACE, 6139 .owner_flag_bit = NFS_OWNER_RECLAIM_NOGRACE,
5939 .state_flag_bit = NFS_STATE_RECLAIM_NOGRACE, 6140 .state_flag_bit = NFS_STATE_RECLAIM_NOGRACE,
5940 .recover_open = nfs4_open_expired, 6141 .recover_open = nfs41_open_expired,
5941 .recover_lock = nfs4_lock_expired, 6142 .recover_lock = nfs41_lock_expired,
5942 .establish_clid = nfs41_init_clientid, 6143 .establish_clid = nfs41_init_clientid,
5943 .get_clid_cred = nfs4_get_exchange_id_cred, 6144 .get_clid_cred = nfs4_get_exchange_id_cred,
5944}; 6145};
@@ -5962,6 +6163,7 @@ static const struct nfs4_minor_version_ops nfs_v4_0_minor_ops = {
5962 .minor_version = 0, 6163 .minor_version = 0,
5963 .call_sync = _nfs4_call_sync, 6164 .call_sync = _nfs4_call_sync,
5964 .validate_stateid = nfs4_validate_delegation_stateid, 6165 .validate_stateid = nfs4_validate_delegation_stateid,
6166 .find_root_sec = nfs4_find_root_sec,
5965 .reboot_recovery_ops = &nfs40_reboot_recovery_ops, 6167 .reboot_recovery_ops = &nfs40_reboot_recovery_ops,
5966 .nograce_recovery_ops = &nfs40_nograce_recovery_ops, 6168 .nograce_recovery_ops = &nfs40_nograce_recovery_ops,
5967 .state_renewal_ops = &nfs40_state_renewal_ops, 6169 .state_renewal_ops = &nfs40_state_renewal_ops,
@@ -5972,6 +6174,7 @@ static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = {
5972 .minor_version = 1, 6174 .minor_version = 1,
5973 .call_sync = _nfs4_call_sync_session, 6175 .call_sync = _nfs4_call_sync_session,
5974 .validate_stateid = nfs41_validate_delegation_stateid, 6176 .validate_stateid = nfs41_validate_delegation_stateid,
6177 .find_root_sec = nfs41_find_root_sec,
5975 .reboot_recovery_ops = &nfs41_reboot_recovery_ops, 6178 .reboot_recovery_ops = &nfs41_reboot_recovery_ops,
5976 .nograce_recovery_ops = &nfs41_nograce_recovery_ops, 6179 .nograce_recovery_ops = &nfs41_nograce_recovery_ops,
5977 .state_renewal_ops = &nfs41_state_renewal_ops, 6180 .state_renewal_ops = &nfs41_state_renewal_ops,
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 7acfe8843626..72ab97ef3d61 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1643,7 +1643,14 @@ static void nfs4_state_manager(struct nfs_client *clp)
1643 goto out_error; 1643 goto out_error;
1644 } 1644 }
1645 clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state); 1645 clear_bit(NFS4CLNT_CHECK_LEASE, &clp->cl_state);
1646 set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state); 1646
1647 if (test_and_clear_bit(NFS4CLNT_SERVER_SCOPE_MISMATCH,
1648 &clp->cl_state))
1649 nfs4_state_start_reclaim_nograce(clp);
1650 else
1651 set_bit(NFS4CLNT_RECLAIM_REBOOT,
1652 &clp->cl_state);
1653
1647 pnfs_destroy_all_layouts(clp); 1654 pnfs_destroy_all_layouts(clp);
1648 } 1655 }
1649 1656
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index e6e8f3b9a1de..c191a9baa422 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -343,6 +343,14 @@ static int nfs4_stat_to_errno(int);
343 1 /* FIXME: opaque lrf_body always empty at the moment */) 343 1 /* FIXME: opaque lrf_body always empty at the moment */)
344#define decode_layoutreturn_maxsz (op_decode_hdr_maxsz + \ 344#define decode_layoutreturn_maxsz (op_decode_hdr_maxsz + \
345 1 + decode_stateid_maxsz) 345 1 + decode_stateid_maxsz)
346#define encode_secinfo_no_name_maxsz (op_encode_hdr_maxsz + 1)
347#define decode_secinfo_no_name_maxsz decode_secinfo_maxsz
348#define encode_test_stateid_maxsz (op_encode_hdr_maxsz + 2 + \
349 XDR_QUADLEN(NFS4_STATEID_SIZE))
350#define decode_test_stateid_maxsz (op_decode_hdr_maxsz + 2 + 1)
351#define encode_free_stateid_maxsz (op_encode_hdr_maxsz + 1 + \
352 XDR_QUADLEN(NFS4_STATEID_SIZE))
353#define decode_free_stateid_maxsz (op_decode_hdr_maxsz + 1)
346#else /* CONFIG_NFS_V4_1 */ 354#else /* CONFIG_NFS_V4_1 */
347#define encode_sequence_maxsz 0 355#define encode_sequence_maxsz 0
348#define decode_sequence_maxsz 0 356#define decode_sequence_maxsz 0
@@ -772,6 +780,26 @@ static int nfs4_stat_to_errno(int);
772 decode_sequence_maxsz + \ 780 decode_sequence_maxsz + \
773 decode_putfh_maxsz + \ 781 decode_putfh_maxsz + \
774 decode_layoutreturn_maxsz) 782 decode_layoutreturn_maxsz)
783#define NFS4_enc_secinfo_no_name_sz (compound_encode_hdr_maxsz + \
784 encode_sequence_maxsz + \
785 encode_putrootfh_maxsz +\
786 encode_secinfo_no_name_maxsz)
787#define NFS4_dec_secinfo_no_name_sz (compound_decode_hdr_maxsz + \
788 decode_sequence_maxsz + \
789 decode_putrootfh_maxsz + \
790 decode_secinfo_no_name_maxsz)
791#define NFS4_enc_test_stateid_sz (compound_encode_hdr_maxsz + \
792 encode_sequence_maxsz + \
793 encode_test_stateid_maxsz)
794#define NFS4_dec_test_stateid_sz (compound_decode_hdr_maxsz + \
795 decode_sequence_maxsz + \
796 decode_test_stateid_maxsz)
797#define NFS4_enc_free_stateid_sz (compound_encode_hdr_maxsz + \
798 encode_sequence_maxsz + \
799 encode_free_stateid_maxsz)
800#define NFS4_dec_free_stateid_sz (compound_decode_hdr_maxsz + \
801 decode_sequence_maxsz + \
802 decode_free_stateid_maxsz)
775 803
776const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH + 804const u32 nfs41_maxwrite_overhead = ((RPC_MAX_HEADER_WITH_AUTH +
777 compound_encode_hdr_maxsz + 805 compound_encode_hdr_maxsz +
@@ -1938,6 +1966,46 @@ encode_layoutreturn(struct xdr_stream *xdr,
1938 hdr->nops++; 1966 hdr->nops++;
1939 hdr->replen += decode_layoutreturn_maxsz; 1967 hdr->replen += decode_layoutreturn_maxsz;
1940} 1968}
1969
1970static int
1971encode_secinfo_no_name(struct xdr_stream *xdr,
1972 const struct nfs41_secinfo_no_name_args *args,
1973 struct compound_hdr *hdr)
1974{
1975 __be32 *p;
1976 p = reserve_space(xdr, 8);
1977 *p++ = cpu_to_be32(OP_SECINFO_NO_NAME);
1978 *p++ = cpu_to_be32(args->style);
1979 hdr->nops++;
1980 hdr->replen += decode_secinfo_no_name_maxsz;
1981 return 0;
1982}
1983
1984static void encode_test_stateid(struct xdr_stream *xdr,
1985 struct nfs41_test_stateid_args *args,
1986 struct compound_hdr *hdr)
1987{
1988 __be32 *p;
1989
1990 p = reserve_space(xdr, 8 + NFS4_STATEID_SIZE);
1991 *p++ = cpu_to_be32(OP_TEST_STATEID);
1992 *p++ = cpu_to_be32(1);
1993 xdr_encode_opaque_fixed(p, args->stateid->data, NFS4_STATEID_SIZE);
1994 hdr->nops++;
1995 hdr->replen += decode_test_stateid_maxsz;
1996}
1997
1998static void encode_free_stateid(struct xdr_stream *xdr,
1999 struct nfs41_free_stateid_args *args,
2000 struct compound_hdr *hdr)
2001{
2002 __be32 *p;
2003 p = reserve_space(xdr, 4 + NFS4_STATEID_SIZE);
2004 *p++ = cpu_to_be32(OP_FREE_STATEID);
2005 xdr_encode_opaque_fixed(p, args->stateid->data, NFS4_STATEID_SIZE);
2006 hdr->nops++;
2007 hdr->replen += decode_free_stateid_maxsz;
2008}
1941#endif /* CONFIG_NFS_V4_1 */ 2009#endif /* CONFIG_NFS_V4_1 */
1942 2010
1943/* 2011/*
@@ -2790,6 +2858,59 @@ static void nfs4_xdr_enc_layoutreturn(struct rpc_rqst *req,
2790 encode_layoutreturn(xdr, args, &hdr); 2858 encode_layoutreturn(xdr, args, &hdr);
2791 encode_nops(&hdr); 2859 encode_nops(&hdr);
2792} 2860}
2861
2862/*
2863 * Encode SECINFO_NO_NAME request
2864 */
2865static int nfs4_xdr_enc_secinfo_no_name(struct rpc_rqst *req,
2866 struct xdr_stream *xdr,
2867 struct nfs41_secinfo_no_name_args *args)
2868{
2869 struct compound_hdr hdr = {
2870 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
2871 };
2872
2873 encode_compound_hdr(xdr, req, &hdr);
2874 encode_sequence(xdr, &args->seq_args, &hdr);
2875 encode_putrootfh(xdr, &hdr);
2876 encode_secinfo_no_name(xdr, args, &hdr);
2877 encode_nops(&hdr);
2878 return 0;
2879}
2880
2881/*
2882 * Encode TEST_STATEID request
2883 */
2884static void nfs4_xdr_enc_test_stateid(struct rpc_rqst *req,
2885 struct xdr_stream *xdr,
2886 struct nfs41_test_stateid_args *args)
2887{
2888 struct compound_hdr hdr = {
2889 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
2890 };
2891
2892 encode_compound_hdr(xdr, req, &hdr);
2893 encode_sequence(xdr, &args->seq_args, &hdr);
2894 encode_test_stateid(xdr, args, &hdr);
2895 encode_nops(&hdr);
2896}
2897
2898/*
2899 * Encode FREE_STATEID request
2900 */
2901static void nfs4_xdr_enc_free_stateid(struct rpc_rqst *req,
2902 struct xdr_stream *xdr,
2903 struct nfs41_free_stateid_args *args)
2904{
2905 struct compound_hdr hdr = {
2906 .minorversion = nfs4_xdr_minorversion(&args->seq_args),
2907 };
2908
2909 encode_compound_hdr(xdr, req, &hdr);
2910 encode_sequence(xdr, &args->seq_args, &hdr);
2911 encode_free_stateid(xdr, args, &hdr);
2912 encode_nops(&hdr);
2913}
2793#endif /* CONFIG_NFS_V4_1 */ 2914#endif /* CONFIG_NFS_V4_1 */
2794 2915
2795static void print_overflow_msg(const char *func, const struct xdr_stream *xdr) 2916static void print_overflow_msg(const char *func, const struct xdr_stream *xdr)
@@ -4977,11 +5098,17 @@ static int decode_exchange_id(struct xdr_stream *xdr,
4977 if (unlikely(status)) 5098 if (unlikely(status))
4978 return status; 5099 return status;
4979 5100
4980 /* Throw away server_scope */ 5101 /* Save server_scope */
4981 status = decode_opaque_inline(xdr, &dummy, &dummy_str); 5102 status = decode_opaque_inline(xdr, &dummy, &dummy_str);
4982 if (unlikely(status)) 5103 if (unlikely(status))
4983 return status; 5104 return status;
4984 5105
5106 if (unlikely(dummy > NFS4_OPAQUE_LIMIT))
5107 return -EIO;
5108
5109 memcpy(res->server_scope->server_scope, dummy_str, dummy);
5110 res->server_scope->server_scope_sz = dummy;
5111
4985 /* Throw away Implementation id array */ 5112 /* Throw away Implementation id array */
4986 status = decode_opaque_inline(xdr, &dummy, &dummy_str); 5113 status = decode_opaque_inline(xdr, &dummy, &dummy_str);
4987 if (unlikely(status)) 5114 if (unlikely(status))
@@ -5322,6 +5449,55 @@ out_overflow:
5322 print_overflow_msg(__func__, xdr); 5449 print_overflow_msg(__func__, xdr);
5323 return -EIO; 5450 return -EIO;
5324} 5451}
5452
5453static int decode_test_stateid(struct xdr_stream *xdr,
5454 struct nfs41_test_stateid_res *res)
5455{
5456 __be32 *p;
5457 int status;
5458 int num_res;
5459
5460 status = decode_op_hdr(xdr, OP_TEST_STATEID);
5461 if (status)
5462 return status;
5463
5464 p = xdr_inline_decode(xdr, 4);
5465 if (unlikely(!p))
5466 goto out_overflow;
5467 num_res = be32_to_cpup(p++);
5468 if (num_res != 1)
5469 goto out;
5470
5471 p = xdr_inline_decode(xdr, 4);
5472 if (unlikely(!p))
5473 goto out_overflow;
5474 res->status = be32_to_cpup(p++);
5475 return res->status;
5476out_overflow:
5477 print_overflow_msg(__func__, xdr);
5478out:
5479 return -EIO;
5480}
5481
5482static int decode_free_stateid(struct xdr_stream *xdr,
5483 struct nfs41_free_stateid_res *res)
5484{
5485 __be32 *p;
5486 int status;
5487
5488 status = decode_op_hdr(xdr, OP_FREE_STATEID);
5489 if (status)
5490 return status;
5491
5492 p = xdr_inline_decode(xdr, 4);
5493 if (unlikely(!p))
5494 goto out_overflow;
5495 res->status = be32_to_cpup(p++);
5496 return res->status;
5497out_overflow:
5498 print_overflow_msg(__func__, xdr);
5499 return -EIO;
5500}
5325#endif /* CONFIG_NFS_V4_1 */ 5501#endif /* CONFIG_NFS_V4_1 */
5326 5502
5327/* 5503/*
@@ -6461,6 +6637,72 @@ static int nfs4_xdr_dec_layoutcommit(struct rpc_rqst *rqstp,
6461out: 6637out:
6462 return status; 6638 return status;
6463} 6639}
6640
6641/*
6642 * Decode SECINFO_NO_NAME response
6643 */
6644static int nfs4_xdr_dec_secinfo_no_name(struct rpc_rqst *rqstp,
6645 struct xdr_stream *xdr,
6646 struct nfs4_secinfo_res *res)
6647{
6648 struct compound_hdr hdr;
6649 int status;
6650
6651 status = decode_compound_hdr(xdr, &hdr);
6652 if (status)
6653 goto out;
6654 status = decode_sequence(xdr, &res->seq_res, rqstp);
6655 if (status)
6656 goto out;
6657 status = decode_putrootfh(xdr);
6658 if (status)
6659 goto out;
6660 status = decode_secinfo(xdr, res);
6661out:
6662 return status;
6663}
6664
6665/*
6666 * Decode TEST_STATEID response
6667 */
6668static int nfs4_xdr_dec_test_stateid(struct rpc_rqst *rqstp,
6669 struct xdr_stream *xdr,
6670 struct nfs41_test_stateid_res *res)
6671{
6672 struct compound_hdr hdr;
6673 int status;
6674
6675 status = decode_compound_hdr(xdr, &hdr);
6676 if (status)
6677 goto out;
6678 status = decode_sequence(xdr, &res->seq_res, rqstp);
6679 if (status)
6680 goto out;
6681 status = decode_test_stateid(xdr, res);
6682out:
6683 return status;
6684}
6685
6686/*
6687 * Decode FREE_STATEID response
6688 */
6689static int nfs4_xdr_dec_free_stateid(struct rpc_rqst *rqstp,
6690 struct xdr_stream *xdr,
6691 struct nfs41_free_stateid_res *res)
6692{
6693 struct compound_hdr hdr;
6694 int status;
6695
6696 status = decode_compound_hdr(xdr, &hdr);
6697 if (status)
6698 goto out;
6699 status = decode_sequence(xdr, &res->seq_res, rqstp);
6700 if (status)
6701 goto out;
6702 status = decode_free_stateid(xdr, res);
6703out:
6704 return status;
6705}
6464#endif /* CONFIG_NFS_V4_1 */ 6706#endif /* CONFIG_NFS_V4_1 */
6465 6707
6466/** 6708/**
@@ -6663,6 +6905,9 @@ struct rpc_procinfo nfs4_procedures[] = {
6663 PROC(LAYOUTGET, enc_layoutget, dec_layoutget), 6905 PROC(LAYOUTGET, enc_layoutget, dec_layoutget),
6664 PROC(LAYOUTCOMMIT, enc_layoutcommit, dec_layoutcommit), 6906 PROC(LAYOUTCOMMIT, enc_layoutcommit, dec_layoutcommit),
6665 PROC(LAYOUTRETURN, enc_layoutreturn, dec_layoutreturn), 6907 PROC(LAYOUTRETURN, enc_layoutreturn, dec_layoutreturn),
6908 PROC(SECINFO_NO_NAME, enc_secinfo_no_name, dec_secinfo_no_name),
6909 PROC(TEST_STATEID, enc_test_stateid, dec_test_stateid),
6910 PROC(FREE_STATEID, enc_free_stateid, dec_free_stateid),
6666#endif /* CONFIG_NFS_V4_1 */ 6911#endif /* CONFIG_NFS_V4_1 */
6667}; 6912};
6668 6913
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index 8ff2ea3f10ef..9383ca7245bc 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -1000,13 +1000,22 @@ static bool objio_pg_test(struct nfs_pageio_descriptor *pgio,
1000 if (!pnfs_generic_pg_test(pgio, prev, req)) 1000 if (!pnfs_generic_pg_test(pgio, prev, req))
1001 return false; 1001 return false;
1002 1002
1003 if (pgio->pg_lseg == NULL)
1004 return true;
1005
1006 return pgio->pg_count + req->wb_bytes <= 1003 return pgio->pg_count + req->wb_bytes <=
1007 OBJIO_LSEG(pgio->pg_lseg)->max_io_size; 1004 OBJIO_LSEG(pgio->pg_lseg)->max_io_size;
1008} 1005}
1009 1006
1007static const struct nfs_pageio_ops objio_pg_read_ops = {
1008 .pg_init = pnfs_generic_pg_init_read,
1009 .pg_test = objio_pg_test,
1010 .pg_doio = pnfs_generic_pg_readpages,
1011};
1012
1013static const struct nfs_pageio_ops objio_pg_write_ops = {
1014 .pg_init = pnfs_generic_pg_init_write,
1015 .pg_test = objio_pg_test,
1016 .pg_doio = pnfs_generic_pg_writepages,
1017};
1018
1010static struct pnfs_layoutdriver_type objlayout_type = { 1019static struct pnfs_layoutdriver_type objlayout_type = {
1011 .id = LAYOUT_OSD2_OBJECTS, 1020 .id = LAYOUT_OSD2_OBJECTS,
1012 .name = "LAYOUT_OSD2_OBJECTS", 1021 .name = "LAYOUT_OSD2_OBJECTS",
@@ -1020,7 +1029,8 @@ static struct pnfs_layoutdriver_type objlayout_type = {
1020 1029
1021 .read_pagelist = objlayout_read_pagelist, 1030 .read_pagelist = objlayout_read_pagelist,
1022 .write_pagelist = objlayout_write_pagelist, 1031 .write_pagelist = objlayout_write_pagelist,
1023 .pg_test = objio_pg_test, 1032 .pg_read_ops = &objio_pg_read_ops,
1033 .pg_write_ops = &objio_pg_write_ops,
1024 1034
1025 .free_deviceid_node = objio_free_deviceid_node, 1035 .free_deviceid_node = objio_free_deviceid_node,
1026 1036
@@ -1055,5 +1065,7 @@ objlayout_exit(void)
1055 __func__); 1065 __func__);
1056} 1066}
1057 1067
1068MODULE_ALIAS("nfs-layouttype4-2");
1069
1058module_init(objlayout_init); 1070module_init(objlayout_init);
1059module_exit(objlayout_exit); 1071module_exit(objlayout_exit);
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 18449f43c568..b60970cc7f1f 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -230,7 +230,7 @@ EXPORT_SYMBOL_GPL(nfs_generic_pg_test);
230 */ 230 */
231void nfs_pageio_init(struct nfs_pageio_descriptor *desc, 231void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
232 struct inode *inode, 232 struct inode *inode,
233 int (*doio)(struct nfs_pageio_descriptor *), 233 const struct nfs_pageio_ops *pg_ops,
234 size_t bsize, 234 size_t bsize,
235 int io_flags) 235 int io_flags)
236{ 236{
@@ -240,13 +240,12 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
240 desc->pg_bsize = bsize; 240 desc->pg_bsize = bsize;
241 desc->pg_base = 0; 241 desc->pg_base = 0;
242 desc->pg_moreio = 0; 242 desc->pg_moreio = 0;
243 desc->pg_recoalesce = 0;
243 desc->pg_inode = inode; 244 desc->pg_inode = inode;
244 desc->pg_doio = doio; 245 desc->pg_ops = pg_ops;
245 desc->pg_ioflags = io_flags; 246 desc->pg_ioflags = io_flags;
246 desc->pg_error = 0; 247 desc->pg_error = 0;
247 desc->pg_lseg = NULL; 248 desc->pg_lseg = NULL;
248 desc->pg_test = nfs_generic_pg_test;
249 pnfs_pageio_init(desc, inode);
250} 249}
251 250
252/** 251/**
@@ -276,7 +275,7 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev,
276 return false; 275 return false;
277 if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE) 276 if (prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE)
278 return false; 277 return false;
279 return pgio->pg_test(pgio, prev, req); 278 return pgio->pg_ops->pg_test(pgio, prev, req);
280} 279}
281 280
282/** 281/**
@@ -297,6 +296,8 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc,
297 if (!nfs_can_coalesce_requests(prev, req, desc)) 296 if (!nfs_can_coalesce_requests(prev, req, desc))
298 return 0; 297 return 0;
299 } else { 298 } else {
299 if (desc->pg_ops->pg_init)
300 desc->pg_ops->pg_init(desc, req);
300 desc->pg_base = req->wb_pgbase; 301 desc->pg_base = req->wb_pgbase;
301 } 302 }
302 nfs_list_remove_request(req); 303 nfs_list_remove_request(req);
@@ -311,7 +312,7 @@ static int nfs_pageio_do_add_request(struct nfs_pageio_descriptor *desc,
311static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc) 312static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc)
312{ 313{
313 if (!list_empty(&desc->pg_list)) { 314 if (!list_empty(&desc->pg_list)) {
314 int error = desc->pg_doio(desc); 315 int error = desc->pg_ops->pg_doio(desc);
315 if (error < 0) 316 if (error < 0)
316 desc->pg_error = error; 317 desc->pg_error = error;
317 else 318 else
@@ -331,7 +332,7 @@ static void nfs_pageio_doio(struct nfs_pageio_descriptor *desc)
331 * Returns true if the request 'req' was successfully coalesced into the 332 * Returns true if the request 'req' was successfully coalesced into the
332 * existing list of pages 'desc'. 333 * existing list of pages 'desc'.
333 */ 334 */
334int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc, 335static int __nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
335 struct nfs_page *req) 336 struct nfs_page *req)
336{ 337{
337 while (!nfs_pageio_do_add_request(desc, req)) { 338 while (!nfs_pageio_do_add_request(desc, req)) {
@@ -340,17 +341,67 @@ int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
340 if (desc->pg_error < 0) 341 if (desc->pg_error < 0)
341 return 0; 342 return 0;
342 desc->pg_moreio = 0; 343 desc->pg_moreio = 0;
344 if (desc->pg_recoalesce)
345 return 0;
343 } 346 }
344 return 1; 347 return 1;
345} 348}
346 349
350static int nfs_do_recoalesce(struct nfs_pageio_descriptor *desc)
351{
352 LIST_HEAD(head);
353
354 do {
355 list_splice_init(&desc->pg_list, &head);
356 desc->pg_bytes_written -= desc->pg_count;
357 desc->pg_count = 0;
358 desc->pg_base = 0;
359 desc->pg_recoalesce = 0;
360
361 while (!list_empty(&head)) {
362 struct nfs_page *req;
363
364 req = list_first_entry(&head, struct nfs_page, wb_list);
365 nfs_list_remove_request(req);
366 if (__nfs_pageio_add_request(desc, req))
367 continue;
368 if (desc->pg_error < 0)
369 return 0;
370 break;
371 }
372 } while (desc->pg_recoalesce);
373 return 1;
374}
375
376int nfs_pageio_add_request(struct nfs_pageio_descriptor *desc,
377 struct nfs_page *req)
378{
379 int ret;
380
381 do {
382 ret = __nfs_pageio_add_request(desc, req);
383 if (ret)
384 break;
385 if (desc->pg_error < 0)
386 break;
387 ret = nfs_do_recoalesce(desc);
388 } while (ret);
389 return ret;
390}
391
347/** 392/**
348 * nfs_pageio_complete - Complete I/O on an nfs_pageio_descriptor 393 * nfs_pageio_complete - Complete I/O on an nfs_pageio_descriptor
349 * @desc: pointer to io descriptor 394 * @desc: pointer to io descriptor
350 */ 395 */
351void nfs_pageio_complete(struct nfs_pageio_descriptor *desc) 396void nfs_pageio_complete(struct nfs_pageio_descriptor *desc)
352{ 397{
353 nfs_pageio_doio(desc); 398 for (;;) {
399 nfs_pageio_doio(desc);
400 if (!desc->pg_recoalesce)
401 break;
402 if (!nfs_do_recoalesce(desc))
403 break;
404 }
354} 405}
355 406
356/** 407/**
@@ -369,7 +420,7 @@ void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index)
369 if (!list_empty(&desc->pg_list)) { 420 if (!list_empty(&desc->pg_list)) {
370 struct nfs_page *prev = nfs_list_entry(desc->pg_list.prev); 421 struct nfs_page *prev = nfs_list_entry(desc->pg_list.prev);
371 if (index != prev->wb_index + 1) 422 if (index != prev->wb_index + 1)
372 nfs_pageio_doio(desc); 423 nfs_pageio_complete(desc);
373 } 424 }
374} 425}
375 426
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 29c0ca7fc347..38e5508555c6 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -28,6 +28,7 @@
28 */ 28 */
29 29
30#include <linux/nfs_fs.h> 30#include <linux/nfs_fs.h>
31#include <linux/nfs_page.h>
31#include "internal.h" 32#include "internal.h"
32#include "pnfs.h" 33#include "pnfs.h"
33#include "iostat.h" 34#include "iostat.h"
@@ -448,11 +449,20 @@ pnfs_destroy_layout(struct nfs_inode *nfsi)
448void 449void
449pnfs_destroy_all_layouts(struct nfs_client *clp) 450pnfs_destroy_all_layouts(struct nfs_client *clp)
450{ 451{
452 struct nfs_server *server;
451 struct pnfs_layout_hdr *lo; 453 struct pnfs_layout_hdr *lo;
452 LIST_HEAD(tmp_list); 454 LIST_HEAD(tmp_list);
453 455
456 nfs4_deviceid_mark_client_invalid(clp);
457 nfs4_deviceid_purge_client(clp);
458
454 spin_lock(&clp->cl_lock); 459 spin_lock(&clp->cl_lock);
455 list_splice_init(&clp->cl_layouts, &tmp_list); 460 rcu_read_lock();
461 list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
462 if (!list_empty(&server->layouts))
463 list_splice_init(&server->layouts, &tmp_list);
464 }
465 rcu_read_unlock();
456 spin_unlock(&clp->cl_lock); 466 spin_unlock(&clp->cl_lock);
457 467
458 while (!list_empty(&tmp_list)) { 468 while (!list_empty(&tmp_list)) {
@@ -661,6 +671,7 @@ _pnfs_return_layout(struct inode *ino)
661 lrp->args.stateid = stateid; 671 lrp->args.stateid = stateid;
662 lrp->args.layout_type = NFS_SERVER(ino)->pnfs_curr_ld->id; 672 lrp->args.layout_type = NFS_SERVER(ino)->pnfs_curr_ld->id;
663 lrp->args.inode = ino; 673 lrp->args.inode = ino;
674 lrp->args.layout = lo;
664 lrp->clp = NFS_SERVER(ino)->nfs_client; 675 lrp->clp = NFS_SERVER(ino)->nfs_client;
665 676
666 status = nfs4_proc_layoutreturn(lrp); 677 status = nfs4_proc_layoutreturn(lrp);
@@ -920,7 +931,8 @@ pnfs_update_layout(struct inode *ino,
920 }; 931 };
921 unsigned pg_offset; 932 unsigned pg_offset;
922 struct nfs_inode *nfsi = NFS_I(ino); 933 struct nfs_inode *nfsi = NFS_I(ino);
923 struct nfs_client *clp = NFS_SERVER(ino)->nfs_client; 934 struct nfs_server *server = NFS_SERVER(ino);
935 struct nfs_client *clp = server->nfs_client;
924 struct pnfs_layout_hdr *lo; 936 struct pnfs_layout_hdr *lo;
925 struct pnfs_layout_segment *lseg = NULL; 937 struct pnfs_layout_segment *lseg = NULL;
926 bool first = false; 938 bool first = false;
@@ -964,7 +976,7 @@ pnfs_update_layout(struct inode *ino,
964 */ 976 */
965 spin_lock(&clp->cl_lock); 977 spin_lock(&clp->cl_lock);
966 BUG_ON(!list_empty(&lo->plh_layouts)); 978 BUG_ON(!list_empty(&lo->plh_layouts));
967 list_add_tail(&lo->plh_layouts, &clp->cl_layouts); 979 list_add_tail(&lo->plh_layouts, &server->layouts);
968 spin_unlock(&clp->cl_lock); 980 spin_unlock(&clp->cl_lock);
969 } 981 }
970 982
@@ -973,7 +985,8 @@ pnfs_update_layout(struct inode *ino,
973 arg.offset -= pg_offset; 985 arg.offset -= pg_offset;
974 arg.length += pg_offset; 986 arg.length += pg_offset;
975 } 987 }
976 arg.length = PAGE_CACHE_ALIGN(arg.length); 988 if (arg.length != NFS4_MAX_UINT64)
989 arg.length = PAGE_CACHE_ALIGN(arg.length);
977 990
978 lseg = send_layoutget(lo, ctx, &arg, gfp_flags); 991 lseg = send_layoutget(lo, ctx, &arg, gfp_flags);
979 if (!lseg && first) { 992 if (!lseg && first) {
@@ -991,6 +1004,7 @@ out_unlock:
991 spin_unlock(&ino->i_lock); 1004 spin_unlock(&ino->i_lock);
992 goto out; 1005 goto out;
993} 1006}
1007EXPORT_SYMBOL_GPL(pnfs_update_layout);
994 1008
995int 1009int
996pnfs_layout_process(struct nfs4_layoutget *lgp) 1010pnfs_layout_process(struct nfs4_layoutget *lgp)
@@ -1048,35 +1062,71 @@ out_forget_reply:
1048 goto out; 1062 goto out;
1049} 1063}
1050 1064
1065void
1066pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
1067{
1068 BUG_ON(pgio->pg_lseg != NULL);
1069
1070 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
1071 req->wb_context,
1072 req_offset(req),
1073 req->wb_bytes,
1074 IOMODE_READ,
1075 GFP_KERNEL);
1076 /* If no lseg, fall back to read through mds */
1077 if (pgio->pg_lseg == NULL)
1078 nfs_pageio_reset_read_mds(pgio);
1079
1080}
1081EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_read);
1082
1083void
1084pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
1085{
1086 BUG_ON(pgio->pg_lseg != NULL);
1087
1088 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
1089 req->wb_context,
1090 req_offset(req),
1091 req->wb_bytes,
1092 IOMODE_RW,
1093 GFP_NOFS);
1094 /* If no lseg, fall back to write through mds */
1095 if (pgio->pg_lseg == NULL)
1096 nfs_pageio_reset_write_mds(pgio);
1097}
1098EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_write);
1099
1051bool 1100bool
1052pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, 1101pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode)
1053 struct nfs_page *req)
1054{ 1102{
1055 enum pnfs_iomode access_type; 1103 struct nfs_server *server = NFS_SERVER(inode);
1056 gfp_t gfp_flags; 1104 struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;
1057 1105
1058 /* We assume that pg_ioflags == 0 iff we're reading a page */ 1106 if (ld == NULL)
1059 if (pgio->pg_ioflags == 0) { 1107 return false;
1060 access_type = IOMODE_READ; 1108 nfs_pageio_init(pgio, inode, ld->pg_read_ops, server->rsize, 0);
1061 gfp_flags = GFP_KERNEL; 1109 return true;
1062 } else { 1110}
1063 access_type = IOMODE_RW;
1064 gfp_flags = GFP_NOFS;
1065 }
1066 1111
1067 if (pgio->pg_lseg == NULL) { 1112bool
1068 if (pgio->pg_count != prev->wb_bytes) 1113pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags)
1069 return true; 1114{
1070 /* This is first coelesce call for a series of nfs_pages */ 1115 struct nfs_server *server = NFS_SERVER(inode);
1071 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 1116 struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;
1072 prev->wb_context, 1117
1073 req_offset(prev), 1118 if (ld == NULL)
1074 pgio->pg_count, 1119 return false;
1075 access_type, 1120 nfs_pageio_init(pgio, inode, ld->pg_write_ops, server->wsize, ioflags);
1076 gfp_flags); 1121 return true;
1077 if (pgio->pg_lseg == NULL) 1122}
1078 return true; 1123
1079 } 1124bool
1125pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
1126 struct nfs_page *req)
1127{
1128 if (pgio->pg_lseg == NULL)
1129 return nfs_generic_pg_test(pgio, prev, req);
1080 1130
1081 /* 1131 /*
1082 * Test if a nfs_page is fully contained in the pnfs_layout_range. 1132 * Test if a nfs_page is fully contained in the pnfs_layout_range.
@@ -1120,15 +1170,30 @@ pnfs_ld_write_done(struct nfs_write_data *data)
1120} 1170}
1121EXPORT_SYMBOL_GPL(pnfs_ld_write_done); 1171EXPORT_SYMBOL_GPL(pnfs_ld_write_done);
1122 1172
1123enum pnfs_try_status 1173static void
1174pnfs_write_through_mds(struct nfs_pageio_descriptor *desc,
1175 struct nfs_write_data *data)
1176{
1177 list_splice_tail_init(&data->pages, &desc->pg_list);
1178 if (data->req && list_empty(&data->req->wb_list))
1179 nfs_list_add_request(data->req, &desc->pg_list);
1180 nfs_pageio_reset_write_mds(desc);
1181 desc->pg_recoalesce = 1;
1182 nfs_writedata_release(data);
1183}
1184
1185static enum pnfs_try_status
1124pnfs_try_to_write_data(struct nfs_write_data *wdata, 1186pnfs_try_to_write_data(struct nfs_write_data *wdata,
1125 const struct rpc_call_ops *call_ops, int how) 1187 const struct rpc_call_ops *call_ops,
1188 struct pnfs_layout_segment *lseg,
1189 int how)
1126{ 1190{
1127 struct inode *inode = wdata->inode; 1191 struct inode *inode = wdata->inode;
1128 enum pnfs_try_status trypnfs; 1192 enum pnfs_try_status trypnfs;
1129 struct nfs_server *nfss = NFS_SERVER(inode); 1193 struct nfs_server *nfss = NFS_SERVER(inode);
1130 1194
1131 wdata->mds_ops = call_ops; 1195 wdata->mds_ops = call_ops;
1196 wdata->lseg = get_lseg(lseg);
1132 1197
1133 dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__, 1198 dprintk("%s: Writing ino:%lu %u@%llu (how %d)\n", __func__,
1134 inode->i_ino, wdata->args.count, wdata->args.offset, how); 1199 inode->i_ino, wdata->args.count, wdata->args.offset, how);
@@ -1144,6 +1209,44 @@ pnfs_try_to_write_data(struct nfs_write_data *wdata,
1144 return trypnfs; 1209 return trypnfs;
1145} 1210}
1146 1211
1212static void
1213pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *head, int how)
1214{
1215 struct nfs_write_data *data;
1216 const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
1217 struct pnfs_layout_segment *lseg = desc->pg_lseg;
1218
1219 desc->pg_lseg = NULL;
1220 while (!list_empty(head)) {
1221 enum pnfs_try_status trypnfs;
1222
1223 data = list_entry(head->next, struct nfs_write_data, list);
1224 list_del_init(&data->list);
1225
1226 trypnfs = pnfs_try_to_write_data(data, call_ops, lseg, how);
1227 if (trypnfs == PNFS_NOT_ATTEMPTED)
1228 pnfs_write_through_mds(desc, data);
1229 }
1230 put_lseg(lseg);
1231}
1232
1233int
1234pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
1235{
1236 LIST_HEAD(head);
1237 int ret;
1238
1239 ret = nfs_generic_flush(desc, &head);
1240 if (ret != 0) {
1241 put_lseg(desc->pg_lseg);
1242 desc->pg_lseg = NULL;
1243 return ret;
1244 }
1245 pnfs_do_multiple_writes(desc, &head, desc->pg_ioflags);
1246 return 0;
1247}
1248EXPORT_SYMBOL_GPL(pnfs_generic_pg_writepages);
1249
1147/* 1250/*
1148 * Called by non rpc-based layout drivers 1251 * Called by non rpc-based layout drivers
1149 */ 1252 */
@@ -1167,18 +1270,32 @@ pnfs_ld_read_done(struct nfs_read_data *data)
1167} 1270}
1168EXPORT_SYMBOL_GPL(pnfs_ld_read_done); 1271EXPORT_SYMBOL_GPL(pnfs_ld_read_done);
1169 1272
1273static void
1274pnfs_read_through_mds(struct nfs_pageio_descriptor *desc,
1275 struct nfs_read_data *data)
1276{
1277 list_splice_tail_init(&data->pages, &desc->pg_list);
1278 if (data->req && list_empty(&data->req->wb_list))
1279 nfs_list_add_request(data->req, &desc->pg_list);
1280 nfs_pageio_reset_read_mds(desc);
1281 desc->pg_recoalesce = 1;
1282 nfs_readdata_release(data);
1283}
1284
1170/* 1285/*
1171 * Call the appropriate parallel I/O subsystem read function. 1286 * Call the appropriate parallel I/O subsystem read function.
1172 */ 1287 */
1173enum pnfs_try_status 1288static enum pnfs_try_status
1174pnfs_try_to_read_data(struct nfs_read_data *rdata, 1289pnfs_try_to_read_data(struct nfs_read_data *rdata,
1175 const struct rpc_call_ops *call_ops) 1290 const struct rpc_call_ops *call_ops,
1291 struct pnfs_layout_segment *lseg)
1176{ 1292{
1177 struct inode *inode = rdata->inode; 1293 struct inode *inode = rdata->inode;
1178 struct nfs_server *nfss = NFS_SERVER(inode); 1294 struct nfs_server *nfss = NFS_SERVER(inode);
1179 enum pnfs_try_status trypnfs; 1295 enum pnfs_try_status trypnfs;
1180 1296
1181 rdata->mds_ops = call_ops; 1297 rdata->mds_ops = call_ops;
1298 rdata->lseg = get_lseg(lseg);
1182 1299
1183 dprintk("%s: Reading ino:%lu %u@%llu\n", 1300 dprintk("%s: Reading ino:%lu %u@%llu\n",
1184 __func__, inode->i_ino, rdata->args.count, rdata->args.offset); 1301 __func__, inode->i_ino, rdata->args.count, rdata->args.offset);
@@ -1194,6 +1311,44 @@ pnfs_try_to_read_data(struct nfs_read_data *rdata,
1194 return trypnfs; 1311 return trypnfs;
1195} 1312}
1196 1313
1314static void
1315pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *head)
1316{
1317 struct nfs_read_data *data;
1318 const struct rpc_call_ops *call_ops = desc->pg_rpc_callops;
1319 struct pnfs_layout_segment *lseg = desc->pg_lseg;
1320
1321 desc->pg_lseg = NULL;
1322 while (!list_empty(head)) {
1323 enum pnfs_try_status trypnfs;
1324
1325 data = list_entry(head->next, struct nfs_read_data, list);
1326 list_del_init(&data->list);
1327
1328 trypnfs = pnfs_try_to_read_data(data, call_ops, lseg);
1329 if (trypnfs == PNFS_NOT_ATTEMPTED)
1330 pnfs_read_through_mds(desc, data);
1331 }
1332 put_lseg(lseg);
1333}
1334
1335int
1336pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
1337{
1338 LIST_HEAD(head);
1339 int ret;
1340
1341 ret = nfs_generic_pagein(desc, &head);
1342 if (ret != 0) {
1343 put_lseg(desc->pg_lseg);
1344 desc->pg_lseg = NULL;
1345 return ret;
1346 }
1347 pnfs_do_multiple_reads(desc, &head);
1348 return 0;
1349}
1350EXPORT_SYMBOL_GPL(pnfs_generic_pg_readpages);
1351
1197/* 1352/*
1198 * Currently there is only one (whole file) write lseg. 1353 * Currently there is only one (whole file) write lseg.
1199 */ 1354 */
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 96bf4e6f45be..078670dfbe04 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -87,7 +87,8 @@ struct pnfs_layoutdriver_type {
87 void (*free_lseg) (struct pnfs_layout_segment *lseg); 87 void (*free_lseg) (struct pnfs_layout_segment *lseg);
88 88
89 /* test for nfs page cache coalescing */ 89 /* test for nfs page cache coalescing */
90 bool (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *); 90 const struct nfs_pageio_ops *pg_read_ops;
91 const struct nfs_pageio_ops *pg_write_ops;
91 92
92 /* Returns true if layoutdriver wants to divert this request to 93 /* Returns true if layoutdriver wants to divert this request to
93 * driver's commit routine. 94 * driver's commit routine.
@@ -148,16 +149,16 @@ extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp);
148/* pnfs.c */ 149/* pnfs.c */
149void get_layout_hdr(struct pnfs_layout_hdr *lo); 150void get_layout_hdr(struct pnfs_layout_hdr *lo);
150void put_lseg(struct pnfs_layout_segment *lseg); 151void put_lseg(struct pnfs_layout_segment *lseg);
151struct pnfs_layout_segment * 152
152pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx, 153bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *);
153 loff_t pos, u64 count, enum pnfs_iomode access_type, 154bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *, struct inode *, int);
154 gfp_t gfp_flags); 155
155void set_pnfs_layoutdriver(struct nfs_server *, u32 id); 156void set_pnfs_layoutdriver(struct nfs_server *, u32 id);
156void unset_pnfs_layoutdriver(struct nfs_server *); 157void unset_pnfs_layoutdriver(struct nfs_server *);
157enum pnfs_try_status pnfs_try_to_write_data(struct nfs_write_data *, 158void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page *);
158 const struct rpc_call_ops *, int); 159int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc);
159enum pnfs_try_status pnfs_try_to_read_data(struct nfs_read_data *, 160void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *, struct nfs_page *);
160 const struct rpc_call_ops *); 161int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc);
161bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req); 162bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req);
162int pnfs_layout_process(struct nfs4_layoutget *lgp); 163int pnfs_layout_process(struct nfs4_layoutget *lgp);
163void pnfs_free_lseg_list(struct list_head *tmp_list); 164void pnfs_free_lseg_list(struct list_head *tmp_list);
@@ -182,6 +183,19 @@ int pnfs_layoutcommit_inode(struct inode *inode, bool sync);
182int _pnfs_return_layout(struct inode *); 183int _pnfs_return_layout(struct inode *);
183int pnfs_ld_write_done(struct nfs_write_data *); 184int pnfs_ld_write_done(struct nfs_write_data *);
184int pnfs_ld_read_done(struct nfs_read_data *); 185int pnfs_ld_read_done(struct nfs_read_data *);
186struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino,
187 struct nfs_open_context *ctx,
188 loff_t pos,
189 u64 count,
190 enum pnfs_iomode iomode,
191 gfp_t gfp_flags);
192
193void nfs4_deviceid_mark_client_invalid(struct nfs_client *clp);
194
195/* nfs4_deviceid_flags */
196enum {
197 NFS_DEVICEID_INVALID = 0, /* set when MDS clientid recalled */
198};
185 199
186/* pnfs_dev.c */ 200/* pnfs_dev.c */
187struct nfs4_deviceid_node { 201struct nfs4_deviceid_node {
@@ -189,13 +203,13 @@ struct nfs4_deviceid_node {
189 struct hlist_node tmpnode; 203 struct hlist_node tmpnode;
190 const struct pnfs_layoutdriver_type *ld; 204 const struct pnfs_layoutdriver_type *ld;
191 const struct nfs_client *nfs_client; 205 const struct nfs_client *nfs_client;
206 unsigned long flags;
192 struct nfs4_deviceid deviceid; 207 struct nfs4_deviceid deviceid;
193 atomic_t ref; 208 atomic_t ref;
194}; 209};
195 210
196void nfs4_print_deviceid(const struct nfs4_deviceid *dev_id); 211void nfs4_print_deviceid(const struct nfs4_deviceid *dev_id);
197struct nfs4_deviceid_node *nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *); 212struct nfs4_deviceid_node *nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *);
198struct nfs4_deviceid_node *nfs4_unhash_put_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *);
199void nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *); 213void nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *, const struct nfs_client *, const struct nfs4_deviceid *);
200void nfs4_init_deviceid_node(struct nfs4_deviceid_node *, 214void nfs4_init_deviceid_node(struct nfs4_deviceid_node *,
201 const struct pnfs_layoutdriver_type *, 215 const struct pnfs_layoutdriver_type *,
@@ -293,15 +307,6 @@ static inline int pnfs_return_layout(struct inode *ino)
293 return 0; 307 return 0;
294} 308}
295 309
296static inline void pnfs_pageio_init(struct nfs_pageio_descriptor *pgio,
297 struct inode *inode)
298{
299 struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
300
301 if (ld)
302 pgio->pg_test = ld->pg_test;
303}
304
305#else /* CONFIG_NFS_V4_1 */ 310#else /* CONFIG_NFS_V4_1 */
306 311
307static inline void pnfs_destroy_all_layouts(struct nfs_client *clp) 312static inline void pnfs_destroy_all_layouts(struct nfs_client *clp)
@@ -322,28 +327,6 @@ static inline void put_lseg(struct pnfs_layout_segment *lseg)
322{ 327{
323} 328}
324 329
325static inline struct pnfs_layout_segment *
326pnfs_update_layout(struct inode *ino, struct nfs_open_context *ctx,
327 loff_t pos, u64 count, enum pnfs_iomode access_type,
328 gfp_t gfp_flags)
329{
330 return NULL;
331}
332
333static inline enum pnfs_try_status
334pnfs_try_to_read_data(struct nfs_read_data *data,
335 const struct rpc_call_ops *call_ops)
336{
337 return PNFS_NOT_ATTEMPTED;
338}
339
340static inline enum pnfs_try_status
341pnfs_try_to_write_data(struct nfs_write_data *data,
342 const struct rpc_call_ops *call_ops, int how)
343{
344 return PNFS_NOT_ATTEMPTED;
345}
346
347static inline int pnfs_return_layout(struct inode *ino) 330static inline int pnfs_return_layout(struct inode *ino)
348{ 331{
349 return 0; 332 return 0;
@@ -385,9 +368,14 @@ static inline void unset_pnfs_layoutdriver(struct nfs_server *s)
385{ 368{
386} 369}
387 370
388static inline void pnfs_pageio_init(struct nfs_pageio_descriptor *pgio, 371static inline bool pnfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode)
389 struct inode *inode)
390{ 372{
373 return false;
374}
375
376static inline bool pnfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, struct inode *inode, int ioflags)
377{
378 return false;
391} 379}
392 380
393static inline void 381static inline void
diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c
index f0f8e1e22f6c..6fda5228ef56 100644
--- a/fs/nfs/pnfs_dev.c
+++ b/fs/nfs/pnfs_dev.c
@@ -100,8 +100,8 @@ _find_get_deviceid(const struct pnfs_layoutdriver_type *ld,
100 100
101 rcu_read_lock(); 101 rcu_read_lock();
102 d = _lookup_deviceid(ld, clp, id, hash); 102 d = _lookup_deviceid(ld, clp, id, hash);
103 if (d && !atomic_inc_not_zero(&d->ref)) 103 if (d != NULL)
104 d = NULL; 104 atomic_inc(&d->ref);
105 rcu_read_unlock(); 105 rcu_read_unlock();
106 return d; 106 return d;
107} 107}
@@ -115,15 +115,15 @@ nfs4_find_get_deviceid(const struct pnfs_layoutdriver_type *ld,
115EXPORT_SYMBOL_GPL(nfs4_find_get_deviceid); 115EXPORT_SYMBOL_GPL(nfs4_find_get_deviceid);
116 116
117/* 117/*
118 * Unhash and put deviceid 118 * Remove a deviceid from cache
119 * 119 *
120 * @clp nfs_client associated with deviceid 120 * @clp nfs_client associated with deviceid
121 * @id the deviceid to unhash 121 * @id the deviceid to unhash
122 * 122 *
123 * @ret the unhashed node, if found and dereferenced to zero, NULL otherwise. 123 * @ret the unhashed node, if found and dereferenced to zero, NULL otherwise.
124 */ 124 */
125struct nfs4_deviceid_node * 125void
126nfs4_unhash_put_deviceid(const struct pnfs_layoutdriver_type *ld, 126nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *ld,
127 const struct nfs_client *clp, const struct nfs4_deviceid *id) 127 const struct nfs_client *clp, const struct nfs4_deviceid *id)
128{ 128{
129 struct nfs4_deviceid_node *d; 129 struct nfs4_deviceid_node *d;
@@ -134,7 +134,7 @@ nfs4_unhash_put_deviceid(const struct pnfs_layoutdriver_type *ld,
134 rcu_read_unlock(); 134 rcu_read_unlock();
135 if (!d) { 135 if (!d) {
136 spin_unlock(&nfs4_deviceid_lock); 136 spin_unlock(&nfs4_deviceid_lock);
137 return NULL; 137 return;
138 } 138 }
139 hlist_del_init_rcu(&d->node); 139 hlist_del_init_rcu(&d->node);
140 spin_unlock(&nfs4_deviceid_lock); 140 spin_unlock(&nfs4_deviceid_lock);
@@ -142,28 +142,7 @@ nfs4_unhash_put_deviceid(const struct pnfs_layoutdriver_type *ld,
142 142
143 /* balance the initial ref set in pnfs_insert_deviceid */ 143 /* balance the initial ref set in pnfs_insert_deviceid */
144 if (atomic_dec_and_test(&d->ref)) 144 if (atomic_dec_and_test(&d->ref))
145 return d; 145 d->ld->free_deviceid_node(d);
146
147 return NULL;
148}
149EXPORT_SYMBOL_GPL(nfs4_unhash_put_deviceid);
150
151/*
152 * Delete a deviceid from cache
153 *
154 * @clp struct nfs_client qualifying the deviceid
155 * @id deviceid to delete
156 */
157void
158nfs4_delete_deviceid(const struct pnfs_layoutdriver_type *ld,
159 const struct nfs_client *clp, const struct nfs4_deviceid *id)
160{
161 struct nfs4_deviceid_node *d;
162
163 d = nfs4_unhash_put_deviceid(ld, clp, id);
164 if (!d)
165 return;
166 d->ld->free_deviceid_node(d);
167} 146}
168EXPORT_SYMBOL_GPL(nfs4_delete_deviceid); 147EXPORT_SYMBOL_GPL(nfs4_delete_deviceid);
169 148
@@ -177,6 +156,7 @@ nfs4_init_deviceid_node(struct nfs4_deviceid_node *d,
177 INIT_HLIST_NODE(&d->tmpnode); 156 INIT_HLIST_NODE(&d->tmpnode);
178 d->ld = ld; 157 d->ld = ld;
179 d->nfs_client = nfs_client; 158 d->nfs_client = nfs_client;
159 d->flags = 0;
180 d->deviceid = *id; 160 d->deviceid = *id;
181 atomic_set(&d->ref, 1); 161 atomic_set(&d->ref, 1);
182} 162}
@@ -221,16 +201,15 @@ EXPORT_SYMBOL_GPL(nfs4_insert_deviceid_node);
221 * 201 *
222 * @d deviceid node to put 202 * @d deviceid node to put
223 * 203 *
224 * @ret true iff the node was deleted 204 * return true iff the node was deleted
205 * Note that since the test for d->ref == 0 is sufficient to establish
206 * that the node is no longer hashed in the global device id cache.
225 */ 207 */
226bool 208bool
227nfs4_put_deviceid_node(struct nfs4_deviceid_node *d) 209nfs4_put_deviceid_node(struct nfs4_deviceid_node *d)
228{ 210{
229 if (!atomic_dec_and_lock(&d->ref, &nfs4_deviceid_lock)) 211 if (!atomic_dec_and_test(&d->ref))
230 return false; 212 return false;
231 hlist_del_init_rcu(&d->node);
232 spin_unlock(&nfs4_deviceid_lock);
233 synchronize_rcu();
234 d->ld->free_deviceid_node(d); 213 d->ld->free_deviceid_node(d);
235 return true; 214 return true;
236} 215}
@@ -275,3 +254,22 @@ nfs4_deviceid_purge_client(const struct nfs_client *clp)
275 for (h = 0; h < NFS4_DEVICE_ID_HASH_SIZE; h++) 254 for (h = 0; h < NFS4_DEVICE_ID_HASH_SIZE; h++)
276 _deviceid_purge_client(clp, h); 255 _deviceid_purge_client(clp, h);
277} 256}
257
258/*
259 * Stop use of all deviceids associated with an nfs_client
260 */
261void
262nfs4_deviceid_mark_client_invalid(struct nfs_client *clp)
263{
264 struct nfs4_deviceid_node *d;
265 struct hlist_node *n;
266 int i;
267
268 rcu_read_lock();
269 for (i = 0; i < NFS4_DEVICE_ID_HASH_SIZE; i ++){
270 hlist_for_each_entry_rcu(d, n, &nfs4_deviceid_cache[i], node)
271 if (d->nfs_client == clp)
272 set_bit(NFS_DEVICEID_INVALID, &d->flags);
273 }
274 rcu_read_unlock();
275}
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index a68679f538fc..2171c043ab08 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -30,8 +30,7 @@
30 30
31#define NFSDBG_FACILITY NFSDBG_PAGECACHE 31#define NFSDBG_FACILITY NFSDBG_PAGECACHE
32 32
33static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc); 33static const struct nfs_pageio_ops nfs_pageio_read_ops;
34static int nfs_pagein_one(struct nfs_pageio_descriptor *desc);
35static const struct rpc_call_ops nfs_read_partial_ops; 34static const struct rpc_call_ops nfs_read_partial_ops;
36static const struct rpc_call_ops nfs_read_full_ops; 35static const struct rpc_call_ops nfs_read_full_ops;
37 36
@@ -68,7 +67,7 @@ void nfs_readdata_free(struct nfs_read_data *p)
68 mempool_free(p, nfs_rdata_mempool); 67 mempool_free(p, nfs_rdata_mempool);
69} 68}
70 69
71static void nfs_readdata_release(struct nfs_read_data *rdata) 70void nfs_readdata_release(struct nfs_read_data *rdata)
72{ 71{
73 put_lseg(rdata->lseg); 72 put_lseg(rdata->lseg);
74 put_nfs_open_context(rdata->args.context); 73 put_nfs_open_context(rdata->args.context);
@@ -113,6 +112,27 @@ static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data)
113 } 112 }
114} 113}
115 114
115static void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio,
116 struct inode *inode)
117{
118 nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops,
119 NFS_SERVER(inode)->rsize, 0);
120}
121
122void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio)
123{
124 pgio->pg_ops = &nfs_pageio_read_ops;
125 pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->rsize;
126}
127EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds);
128
129static void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
130 struct inode *inode)
131{
132 if (!pnfs_pageio_init_read(pgio, inode))
133 nfs_pageio_init_read_mds(pgio, inode);
134}
135
116int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, 136int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
117 struct page *page) 137 struct page *page)
118{ 138{
@@ -131,14 +151,9 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
131 if (len < PAGE_CACHE_SIZE) 151 if (len < PAGE_CACHE_SIZE)
132 zero_user_segment(page, len, PAGE_CACHE_SIZE); 152 zero_user_segment(page, len, PAGE_CACHE_SIZE);
133 153
134 nfs_pageio_init(&pgio, inode, NULL, 0, 0); 154 nfs_pageio_init_read(&pgio, inode);
135 nfs_list_add_request(new, &pgio.pg_list); 155 nfs_pageio_add_request(&pgio, new);
136 pgio.pg_count = len; 156 nfs_pageio_complete(&pgio);
137
138 if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE)
139 nfs_pagein_multi(&pgio);
140 else
141 nfs_pagein_one(&pgio);
142 return 0; 157 return 0;
143} 158}
144 159
@@ -202,17 +217,14 @@ EXPORT_SYMBOL_GPL(nfs_initiate_read);
202/* 217/*
203 * Set up the NFS read request struct 218 * Set up the NFS read request struct
204 */ 219 */
205static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, 220static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
206 const struct rpc_call_ops *call_ops, 221 unsigned int count, unsigned int offset)
207 unsigned int count, unsigned int offset,
208 struct pnfs_layout_segment *lseg)
209{ 222{
210 struct inode *inode = req->wb_context->dentry->d_inode; 223 struct inode *inode = req->wb_context->dentry->d_inode;
211 224
212 data->req = req; 225 data->req = req;
213 data->inode = inode; 226 data->inode = inode;
214 data->cred = req->wb_context->cred; 227 data->cred = req->wb_context->cred;
215 data->lseg = get_lseg(lseg);
216 228
217 data->args.fh = NFS_FH(inode); 229 data->args.fh = NFS_FH(inode);
218 data->args.offset = req_offset(req) + offset; 230 data->args.offset = req_offset(req) + offset;
@@ -226,14 +238,36 @@ static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
226 data->res.count = count; 238 data->res.count = count;
227 data->res.eof = 0; 239 data->res.eof = 0;
228 nfs_fattr_init(&data->fattr); 240 nfs_fattr_init(&data->fattr);
241}
229 242
230 if (data->lseg && 243static int nfs_do_read(struct nfs_read_data *data,
231 (pnfs_try_to_read_data(data, call_ops) == PNFS_ATTEMPTED)) 244 const struct rpc_call_ops *call_ops)
232 return 0; 245{
246 struct inode *inode = data->args.context->dentry->d_inode;
233 247
234 return nfs_initiate_read(data, NFS_CLIENT(inode), call_ops); 248 return nfs_initiate_read(data, NFS_CLIENT(inode), call_ops);
235} 249}
236 250
251static int
252nfs_do_multiple_reads(struct list_head *head,
253 const struct rpc_call_ops *call_ops)
254{
255 struct nfs_read_data *data;
256 int ret = 0;
257
258 while (!list_empty(head)) {
259 int ret2;
260
261 data = list_entry(head->next, struct nfs_read_data, list);
262 list_del_init(&data->list);
263
264 ret2 = nfs_do_read(data, call_ops);
265 if (ret == 0)
266 ret = ret2;
267 }
268 return ret;
269}
270
237static void 271static void
238nfs_async_read_error(struct list_head *head) 272nfs_async_read_error(struct list_head *head)
239{ 273{
@@ -260,20 +294,19 @@ nfs_async_read_error(struct list_head *head)
260 * won't see the new data until our attribute cache is updated. This is more 294 * won't see the new data until our attribute cache is updated. This is more
261 * or less conventional NFS client behavior. 295 * or less conventional NFS client behavior.
262 */ 296 */
263static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc) 297static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, struct list_head *res)
264{ 298{
265 struct nfs_page *req = nfs_list_entry(desc->pg_list.next); 299 struct nfs_page *req = nfs_list_entry(desc->pg_list.next);
266 struct page *page = req->wb_page; 300 struct page *page = req->wb_page;
267 struct nfs_read_data *data; 301 struct nfs_read_data *data;
268 size_t rsize = NFS_SERVER(desc->pg_inode)->rsize, nbytes; 302 size_t rsize = desc->pg_bsize, nbytes;
269 unsigned int offset; 303 unsigned int offset;
270 int requests = 0; 304 int requests = 0;
271 int ret = 0; 305 int ret = 0;
272 struct pnfs_layout_segment *lseg;
273 LIST_HEAD(list);
274 306
275 nfs_list_remove_request(req); 307 nfs_list_remove_request(req);
276 308
309 offset = 0;
277 nbytes = desc->pg_count; 310 nbytes = desc->pg_count;
278 do { 311 do {
279 size_t len = min(nbytes,rsize); 312 size_t len = min(nbytes,rsize);
@@ -281,45 +314,21 @@ static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc)
281 data = nfs_readdata_alloc(1); 314 data = nfs_readdata_alloc(1);
282 if (!data) 315 if (!data)
283 goto out_bad; 316 goto out_bad;
284 list_add(&data->pages, &list); 317 data->pagevec[0] = page;
318 nfs_read_rpcsetup(req, data, len, offset);
319 list_add(&data->list, res);
285 requests++; 320 requests++;
286 nbytes -= len; 321 nbytes -= len;
322 offset += len;
287 } while(nbytes != 0); 323 } while(nbytes != 0);
288 atomic_set(&req->wb_complete, requests); 324 atomic_set(&req->wb_complete, requests);
289
290 BUG_ON(desc->pg_lseg != NULL);
291 lseg = pnfs_update_layout(desc->pg_inode, req->wb_context,
292 req_offset(req), desc->pg_count,
293 IOMODE_READ, GFP_KERNEL);
294 ClearPageError(page); 325 ClearPageError(page);
295 offset = 0; 326 desc->pg_rpc_callops = &nfs_read_partial_ops;
296 nbytes = desc->pg_count;
297 do {
298 int ret2;
299
300 data = list_entry(list.next, struct nfs_read_data, pages);
301 list_del_init(&data->pages);
302
303 data->pagevec[0] = page;
304
305 if (nbytes < rsize)
306 rsize = nbytes;
307 ret2 = nfs_read_rpcsetup(req, data, &nfs_read_partial_ops,
308 rsize, offset, lseg);
309 if (ret == 0)
310 ret = ret2;
311 offset += rsize;
312 nbytes -= rsize;
313 } while (nbytes != 0);
314 put_lseg(lseg);
315 desc->pg_lseg = NULL;
316
317 return ret; 327 return ret;
318
319out_bad: 328out_bad:
320 while (!list_empty(&list)) { 329 while (!list_empty(res)) {
321 data = list_entry(list.next, struct nfs_read_data, pages); 330 data = list_entry(res->next, struct nfs_read_data, list);
322 list_del(&data->pages); 331 list_del(&data->list);
323 nfs_readdata_free(data); 332 nfs_readdata_free(data);
324 } 333 }
325 SetPageError(page); 334 SetPageError(page);
@@ -327,19 +336,19 @@ out_bad:
327 return -ENOMEM; 336 return -ENOMEM;
328} 337}
329 338
330static int nfs_pagein_one(struct nfs_pageio_descriptor *desc) 339static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, struct list_head *res)
331{ 340{
332 struct nfs_page *req; 341 struct nfs_page *req;
333 struct page **pages; 342 struct page **pages;
334 struct nfs_read_data *data; 343 struct nfs_read_data *data;
335 struct list_head *head = &desc->pg_list; 344 struct list_head *head = &desc->pg_list;
336 struct pnfs_layout_segment *lseg = desc->pg_lseg; 345 int ret = 0;
337 int ret = -ENOMEM;
338 346
339 data = nfs_readdata_alloc(nfs_page_array_len(desc->pg_base, 347 data = nfs_readdata_alloc(nfs_page_array_len(desc->pg_base,
340 desc->pg_count)); 348 desc->pg_count));
341 if (!data) { 349 if (!data) {
342 nfs_async_read_error(head); 350 nfs_async_read_error(head);
351 ret = -ENOMEM;
343 goto out; 352 goto out;
344 } 353 }
345 354
@@ -352,19 +361,37 @@ static int nfs_pagein_one(struct nfs_pageio_descriptor *desc)
352 *pages++ = req->wb_page; 361 *pages++ = req->wb_page;
353 } 362 }
354 req = nfs_list_entry(data->pages.next); 363 req = nfs_list_entry(data->pages.next);
355 if ((!lseg) && list_is_singular(&data->pages))
356 lseg = pnfs_update_layout(desc->pg_inode, req->wb_context,
357 req_offset(req), desc->pg_count,
358 IOMODE_READ, GFP_KERNEL);
359 364
360 ret = nfs_read_rpcsetup(req, data, &nfs_read_full_ops, desc->pg_count, 365 nfs_read_rpcsetup(req, data, desc->pg_count, 0);
361 0, lseg); 366 list_add(&data->list, res);
367 desc->pg_rpc_callops = &nfs_read_full_ops;
362out: 368out:
363 put_lseg(lseg);
364 desc->pg_lseg = NULL;
365 return ret; 369 return ret;
366} 370}
367 371
372int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, struct list_head *head)
373{
374 if (desc->pg_bsize < PAGE_CACHE_SIZE)
375 return nfs_pagein_multi(desc, head);
376 return nfs_pagein_one(desc, head);
377}
378
379static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
380{
381 LIST_HEAD(head);
382 int ret;
383
384 ret = nfs_generic_pagein(desc, &head);
385 if (ret == 0)
386 ret = nfs_do_multiple_reads(&head, desc->pg_rpc_callops);
387 return ret;
388}
389
390static const struct nfs_pageio_ops nfs_pageio_read_ops = {
391 .pg_test = nfs_generic_pg_test,
392 .pg_doio = nfs_generic_pg_readpages,
393};
394
368/* 395/*
369 * This is the callback from RPC telling us whether a reply was 396 * This is the callback from RPC telling us whether a reply was
370 * received or some error occurred (timeout or socket shutdown). 397 * received or some error occurred (timeout or socket shutdown).
@@ -635,8 +662,6 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
635 .pgio = &pgio, 662 .pgio = &pgio,
636 }; 663 };
637 struct inode *inode = mapping->host; 664 struct inode *inode = mapping->host;
638 struct nfs_server *server = NFS_SERVER(inode);
639 size_t rsize = server->rsize;
640 unsigned long npages; 665 unsigned long npages;
641 int ret = -ESTALE; 666 int ret = -ESTALE;
642 667
@@ -664,10 +689,7 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
664 if (ret == 0) 689 if (ret == 0)
665 goto read_complete; /* all pages were read */ 690 goto read_complete; /* all pages were read */
666 691
667 if (rsize < PAGE_CACHE_SIZE) 692 nfs_pageio_init_read(&pgio, inode);
668 nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0);
669 else
670 nfs_pageio_init(&pgio, inode, nfs_pagein_one, rsize, 0);
671 693
672 ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc); 694 ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc);
673 695
diff --git a/fs/nfs/unlink.c b/fs/nfs/unlink.c
index 8d6864c2a5fa..b2fbbde58e44 100644
--- a/fs/nfs/unlink.c
+++ b/fs/nfs/unlink.c
@@ -147,7 +147,7 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n
147 147
148 alias = d_lookup(parent, &data->args.name); 148 alias = d_lookup(parent, &data->args.name);
149 if (alias != NULL) { 149 if (alias != NULL) {
150 int ret = 0; 150 int ret;
151 void *devname_garbage = NULL; 151 void *devname_garbage = NULL;
152 152
153 /* 153 /*
@@ -155,14 +155,16 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n
155 * the sillyrename information to the aliased dentry. 155 * the sillyrename information to the aliased dentry.
156 */ 156 */
157 nfs_free_dname(data); 157 nfs_free_dname(data);
158 ret = nfs_copy_dname(alias, data);
158 spin_lock(&alias->d_lock); 159 spin_lock(&alias->d_lock);
159 if (alias->d_inode != NULL && 160 if (ret == 0 && alias->d_inode != NULL &&
160 !(alias->d_flags & DCACHE_NFSFS_RENAMED)) { 161 !(alias->d_flags & DCACHE_NFSFS_RENAMED)) {
161 devname_garbage = alias->d_fsdata; 162 devname_garbage = alias->d_fsdata;
162 alias->d_fsdata = data; 163 alias->d_fsdata = data;
163 alias->d_flags |= DCACHE_NFSFS_RENAMED; 164 alias->d_flags |= DCACHE_NFSFS_RENAMED;
164 ret = 1; 165 ret = 1;
165 } 166 } else
167 ret = 0;
166 spin_unlock(&alias->d_lock); 168 spin_unlock(&alias->d_lock);
167 nfs_dec_sillycount(dir); 169 nfs_dec_sillycount(dir);
168 dput(alias); 170 dput(alias);
@@ -171,8 +173,7 @@ static int nfs_do_call_unlink(struct dentry *parent, struct inode *dir, struct n
171 * point dentry is definitely not a root, so we won't need 173 * point dentry is definitely not a root, so we won't need
172 * that anymore. 174 * that anymore.
173 */ 175 */
174 if (devname_garbage) 176 kfree(devname_garbage);
175 kfree(devname_garbage);
176 return ret; 177 return ret;
177 } 178 }
178 data->dir = igrab(dir); 179 data->dir = igrab(dir);
@@ -204,8 +205,6 @@ static int nfs_call_unlink(struct dentry *dentry, struct nfs_unlinkdata *data)
204 if (parent == NULL) 205 if (parent == NULL)
205 goto out_free; 206 goto out_free;
206 dir = parent->d_inode; 207 dir = parent->d_inode;
207 if (nfs_copy_dname(dentry, data) != 0)
208 goto out_dput;
209 /* Non-exclusive lock protects against concurrent lookup() calls */ 208 /* Non-exclusive lock protects against concurrent lookup() calls */
210 spin_lock(&dir->i_lock); 209 spin_lock(&dir->i_lock);
211 if (atomic_inc_not_zero(&NFS_I(dir)->silly_count) == 0) { 210 if (atomic_inc_not_zero(&NFS_I(dir)->silly_count) == 0) {
@@ -366,6 +365,8 @@ static void nfs_async_rename_done(struct rpc_task *task, void *calldata)
366 struct nfs_renamedata *data = calldata; 365 struct nfs_renamedata *data = calldata;
367 struct inode *old_dir = data->old_dir; 366 struct inode *old_dir = data->old_dir;
368 struct inode *new_dir = data->new_dir; 367 struct inode *new_dir = data->new_dir;
368 struct dentry *old_dentry = data->old_dentry;
369 struct dentry *new_dentry = data->new_dentry;
369 370
370 if (!NFS_PROTO(old_dir)->rename_done(task, old_dir, new_dir)) { 371 if (!NFS_PROTO(old_dir)->rename_done(task, old_dir, new_dir)) {
371 nfs_restart_rpc(task, NFS_SERVER(old_dir)->nfs_client); 372 nfs_restart_rpc(task, NFS_SERVER(old_dir)->nfs_client);
@@ -373,12 +374,12 @@ static void nfs_async_rename_done(struct rpc_task *task, void *calldata)
373 } 374 }
374 375
375 if (task->tk_status != 0) { 376 if (task->tk_status != 0) {
376 nfs_cancel_async_unlink(data->old_dentry); 377 nfs_cancel_async_unlink(old_dentry);
377 return; 378 return;
378 } 379 }
379 380
380 nfs_set_verifier(data->old_dentry, nfs_save_change_attribute(old_dir)); 381 d_drop(old_dentry);
381 d_move(data->old_dentry, data->new_dentry); 382 d_drop(new_dentry);
382} 383}
383 384
384/** 385/**
@@ -501,6 +502,14 @@ nfs_async_rename(struct inode *old_dir, struct inode *new_dir,
501 * and only performs the unlink once the last reference to it is put. 502 * and only performs the unlink once the last reference to it is put.
502 * 503 *
503 * The final cleanup is done during dentry_iput. 504 * The final cleanup is done during dentry_iput.
505 *
506 * (Note: NFSv4 is stateful, and has opens, so in theory an NFSv4 server
507 * could take responsibility for keeping open files referenced. The server
508 * would also need to ensure that opened-but-deleted files were kept over
509 * reboots. However, we may not assume a server does so. (RFC 5661
510 * does provide an OPEN4_RESULT_PRESERVE_UNLINKED flag that a server can
511 * use to advertise that it does this; some day we may take advantage of
512 * it.))
504 */ 513 */
505int 514int
506nfs_sillyrename(struct inode *dir, struct dentry *dentry) 515nfs_sillyrename(struct inode *dir, struct dentry *dentry)
@@ -560,6 +569,14 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry)
560 if (error) 569 if (error)
561 goto out_dput; 570 goto out_dput;
562 571
572 /* populate unlinkdata with the right dname */
573 error = nfs_copy_dname(sdentry,
574 (struct nfs_unlinkdata *)dentry->d_fsdata);
575 if (error) {
576 nfs_cancel_async_unlink(dentry);
577 goto out_dput;
578 }
579
563 /* run the rename task, undo unlink if it fails */ 580 /* run the rename task, undo unlink if it fails */
564 task = nfs_async_rename(dir, dir, dentry, sdentry); 581 task = nfs_async_rename(dir, dir, dentry, sdentry);
565 if (IS_ERR(task)) { 582 if (IS_ERR(task)) {
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 00e37501fa3b..b39b37f80913 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -97,7 +97,7 @@ void nfs_writedata_free(struct nfs_write_data *p)
97 mempool_free(p, nfs_wdata_mempool); 97 mempool_free(p, nfs_wdata_mempool);
98} 98}
99 99
100static void nfs_writedata_release(struct nfs_write_data *wdata) 100void nfs_writedata_release(struct nfs_write_data *wdata)
101{ 101{
102 put_lseg(wdata->lseg); 102 put_lseg(wdata->lseg);
103 put_nfs_open_context(wdata->args.context); 103 put_nfs_open_context(wdata->args.context);
@@ -845,11 +845,9 @@ EXPORT_SYMBOL_GPL(nfs_initiate_write);
845/* 845/*
846 * Set up the argument/result storage required for the RPC call. 846 * Set up the argument/result storage required for the RPC call.
847 */ 847 */
848static int nfs_write_rpcsetup(struct nfs_page *req, 848static void nfs_write_rpcsetup(struct nfs_page *req,
849 struct nfs_write_data *data, 849 struct nfs_write_data *data,
850 const struct rpc_call_ops *call_ops,
851 unsigned int count, unsigned int offset, 850 unsigned int count, unsigned int offset,
852 struct pnfs_layout_segment *lseg,
853 int how) 851 int how)
854{ 852{
855 struct inode *inode = req->wb_context->dentry->d_inode; 853 struct inode *inode = req->wb_context->dentry->d_inode;
@@ -860,7 +858,6 @@ static int nfs_write_rpcsetup(struct nfs_page *req,
860 data->req = req; 858 data->req = req;
861 data->inode = inode = req->wb_context->dentry->d_inode; 859 data->inode = inode = req->wb_context->dentry->d_inode;
862 data->cred = req->wb_context->cred; 860 data->cred = req->wb_context->cred;
863 data->lseg = get_lseg(lseg);
864 861
865 data->args.fh = NFS_FH(inode); 862 data->args.fh = NFS_FH(inode);
866 data->args.offset = req_offset(req) + offset; 863 data->args.offset = req_offset(req) + offset;
@@ -872,24 +869,51 @@ static int nfs_write_rpcsetup(struct nfs_page *req,
872 data->args.context = get_nfs_open_context(req->wb_context); 869 data->args.context = get_nfs_open_context(req->wb_context);
873 data->args.lock_context = req->wb_lock_context; 870 data->args.lock_context = req->wb_lock_context;
874 data->args.stable = NFS_UNSTABLE; 871 data->args.stable = NFS_UNSTABLE;
875 if (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) { 872 switch (how & (FLUSH_STABLE | FLUSH_COND_STABLE)) {
876 data->args.stable = NFS_DATA_SYNC; 873 case 0:
877 if (!nfs_need_commit(NFS_I(inode))) 874 break;
878 data->args.stable = NFS_FILE_SYNC; 875 case FLUSH_COND_STABLE:
876 if (nfs_need_commit(NFS_I(inode)))
877 break;
878 default:
879 data->args.stable = NFS_FILE_SYNC;
879 } 880 }
880 881
881 data->res.fattr = &data->fattr; 882 data->res.fattr = &data->fattr;
882 data->res.count = count; 883 data->res.count = count;
883 data->res.verf = &data->verf; 884 data->res.verf = &data->verf;
884 nfs_fattr_init(&data->fattr); 885 nfs_fattr_init(&data->fattr);
886}
885 887
886 if (data->lseg && 888static int nfs_do_write(struct nfs_write_data *data,
887 (pnfs_try_to_write_data(data, call_ops, how) == PNFS_ATTEMPTED)) 889 const struct rpc_call_ops *call_ops,
888 return 0; 890 int how)
891{
892 struct inode *inode = data->args.context->dentry->d_inode;
889 893
890 return nfs_initiate_write(data, NFS_CLIENT(inode), call_ops, how); 894 return nfs_initiate_write(data, NFS_CLIENT(inode), call_ops, how);
891} 895}
892 896
897static int nfs_do_multiple_writes(struct list_head *head,
898 const struct rpc_call_ops *call_ops,
899 int how)
900{
901 struct nfs_write_data *data;
902 int ret = 0;
903
904 while (!list_empty(head)) {
905 int ret2;
906
907 data = list_entry(head->next, struct nfs_write_data, list);
908 list_del_init(&data->list);
909
910 ret2 = nfs_do_write(data, call_ops, how);
911 if (ret == 0)
912 ret = ret2;
913 }
914 return ret;
915}
916
893/* If a nfs_flush_* function fails, it should remove reqs from @head and 917/* If a nfs_flush_* function fails, it should remove reqs from @head and
894 * call this on each, which will prepare them to be retried on next 918 * call this on each, which will prepare them to be retried on next
895 * writeback using standard nfs. 919 * writeback using standard nfs.
@@ -907,17 +931,15 @@ static void nfs_redirty_request(struct nfs_page *req)
907 * Generate multiple small requests to write out a single 931 * Generate multiple small requests to write out a single
908 * contiguous dirty area on one page. 932 * contiguous dirty area on one page.
909 */ 933 */
910static int nfs_flush_multi(struct nfs_pageio_descriptor *desc) 934static int nfs_flush_multi(struct nfs_pageio_descriptor *desc, struct list_head *res)
911{ 935{
912 struct nfs_page *req = nfs_list_entry(desc->pg_list.next); 936 struct nfs_page *req = nfs_list_entry(desc->pg_list.next);
913 struct page *page = req->wb_page; 937 struct page *page = req->wb_page;
914 struct nfs_write_data *data; 938 struct nfs_write_data *data;
915 size_t wsize = NFS_SERVER(desc->pg_inode)->wsize, nbytes; 939 size_t wsize = desc->pg_bsize, nbytes;
916 unsigned int offset; 940 unsigned int offset;
917 int requests = 0; 941 int requests = 0;
918 int ret = 0; 942 int ret = 0;
919 struct pnfs_layout_segment *lseg;
920 LIST_HEAD(list);
921 943
922 nfs_list_remove_request(req); 944 nfs_list_remove_request(req);
923 945
@@ -927,6 +949,7 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc)
927 desc->pg_ioflags &= ~FLUSH_COND_STABLE; 949 desc->pg_ioflags &= ~FLUSH_COND_STABLE;
928 950
929 951
952 offset = 0;
930 nbytes = desc->pg_count; 953 nbytes = desc->pg_count;
931 do { 954 do {
932 size_t len = min(nbytes, wsize); 955 size_t len = min(nbytes, wsize);
@@ -934,45 +957,21 @@ static int nfs_flush_multi(struct nfs_pageio_descriptor *desc)
934 data = nfs_writedata_alloc(1); 957 data = nfs_writedata_alloc(1);
935 if (!data) 958 if (!data)
936 goto out_bad; 959 goto out_bad;
937 list_add(&data->pages, &list); 960 data->pagevec[0] = page;
961 nfs_write_rpcsetup(req, data, wsize, offset, desc->pg_ioflags);
962 list_add(&data->list, res);
938 requests++; 963 requests++;
939 nbytes -= len; 964 nbytes -= len;
965 offset += len;
940 } while (nbytes != 0); 966 } while (nbytes != 0);
941 atomic_set(&req->wb_complete, requests); 967 atomic_set(&req->wb_complete, requests);
942 968 desc->pg_rpc_callops = &nfs_write_partial_ops;
943 BUG_ON(desc->pg_lseg);
944 lseg = pnfs_update_layout(desc->pg_inode, req->wb_context,
945 req_offset(req), desc->pg_count,
946 IOMODE_RW, GFP_NOFS);
947 ClearPageError(page);
948 offset = 0;
949 nbytes = desc->pg_count;
950 do {
951 int ret2;
952
953 data = list_entry(list.next, struct nfs_write_data, pages);
954 list_del_init(&data->pages);
955
956 data->pagevec[0] = page;
957
958 if (nbytes < wsize)
959 wsize = nbytes;
960 ret2 = nfs_write_rpcsetup(req, data, &nfs_write_partial_ops,
961 wsize, offset, lseg, desc->pg_ioflags);
962 if (ret == 0)
963 ret = ret2;
964 offset += wsize;
965 nbytes -= wsize;
966 } while (nbytes != 0);
967
968 put_lseg(lseg);
969 desc->pg_lseg = NULL;
970 return ret; 969 return ret;
971 970
972out_bad: 971out_bad:
973 while (!list_empty(&list)) { 972 while (!list_empty(res)) {
974 data = list_entry(list.next, struct nfs_write_data, pages); 973 data = list_entry(res->next, struct nfs_write_data, list);
975 list_del(&data->pages); 974 list_del(&data->list);
976 nfs_writedata_free(data); 975 nfs_writedata_free(data);
977 } 976 }
978 nfs_redirty_request(req); 977 nfs_redirty_request(req);
@@ -987,14 +986,13 @@ out_bad:
987 * This is the case if nfs_updatepage detects a conflicting request 986 * This is the case if nfs_updatepage detects a conflicting request
988 * that has been written but not committed. 987 * that has been written but not committed.
989 */ 988 */
990static int nfs_flush_one(struct nfs_pageio_descriptor *desc) 989static int nfs_flush_one(struct nfs_pageio_descriptor *desc, struct list_head *res)
991{ 990{
992 struct nfs_page *req; 991 struct nfs_page *req;
993 struct page **pages; 992 struct page **pages;
994 struct nfs_write_data *data; 993 struct nfs_write_data *data;
995 struct list_head *head = &desc->pg_list; 994 struct list_head *head = &desc->pg_list;
996 struct pnfs_layout_segment *lseg = desc->pg_lseg; 995 int ret = 0;
997 int ret;
998 996
999 data = nfs_writedata_alloc(nfs_page_array_len(desc->pg_base, 997 data = nfs_writedata_alloc(nfs_page_array_len(desc->pg_base,
1000 desc->pg_count)); 998 desc->pg_count));
@@ -1016,32 +1014,62 @@ static int nfs_flush_one(struct nfs_pageio_descriptor *desc)
1016 *pages++ = req->wb_page; 1014 *pages++ = req->wb_page;
1017 } 1015 }
1018 req = nfs_list_entry(data->pages.next); 1016 req = nfs_list_entry(data->pages.next);
1019 if ((!lseg) && list_is_singular(&data->pages))
1020 lseg = pnfs_update_layout(desc->pg_inode, req->wb_context,
1021 req_offset(req), desc->pg_count,
1022 IOMODE_RW, GFP_NOFS);
1023 1017
1024 if ((desc->pg_ioflags & FLUSH_COND_STABLE) && 1018 if ((desc->pg_ioflags & FLUSH_COND_STABLE) &&
1025 (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit)) 1019 (desc->pg_moreio || NFS_I(desc->pg_inode)->ncommit))
1026 desc->pg_ioflags &= ~FLUSH_COND_STABLE; 1020 desc->pg_ioflags &= ~FLUSH_COND_STABLE;
1027 1021
1028 /* Set up the argument struct */ 1022 /* Set up the argument struct */
1029 ret = nfs_write_rpcsetup(req, data, &nfs_write_full_ops, desc->pg_count, 0, lseg, desc->pg_ioflags); 1023 nfs_write_rpcsetup(req, data, desc->pg_count, 0, desc->pg_ioflags);
1024 list_add(&data->list, res);
1025 desc->pg_rpc_callops = &nfs_write_full_ops;
1030out: 1026out:
1031 put_lseg(lseg); /* Cleans any gotten in ->pg_test */
1032 desc->pg_lseg = NULL;
1033 return ret; 1027 return ret;
1034} 1028}
1035 1029
1036static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio, 1030int nfs_generic_flush(struct nfs_pageio_descriptor *desc, struct list_head *head)
1031{
1032 if (desc->pg_bsize < PAGE_CACHE_SIZE)
1033 return nfs_flush_multi(desc, head);
1034 return nfs_flush_one(desc, head);
1035}
1036
1037static int nfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
1038{
1039 LIST_HEAD(head);
1040 int ret;
1041
1042 ret = nfs_generic_flush(desc, &head);
1043 if (ret == 0)
1044 ret = nfs_do_multiple_writes(&head, desc->pg_rpc_callops,
1045 desc->pg_ioflags);
1046 return ret;
1047}
1048
1049static const struct nfs_pageio_ops nfs_pageio_write_ops = {
1050 .pg_test = nfs_generic_pg_test,
1051 .pg_doio = nfs_generic_pg_writepages,
1052};
1053
1054static void nfs_pageio_init_write_mds(struct nfs_pageio_descriptor *pgio,
1037 struct inode *inode, int ioflags) 1055 struct inode *inode, int ioflags)
1038{ 1056{
1039 size_t wsize = NFS_SERVER(inode)->wsize; 1057 nfs_pageio_init(pgio, inode, &nfs_pageio_write_ops,
1058 NFS_SERVER(inode)->wsize, ioflags);
1059}
1060
1061void nfs_pageio_reset_write_mds(struct nfs_pageio_descriptor *pgio)
1062{
1063 pgio->pg_ops = &nfs_pageio_write_ops;
1064 pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->wsize;
1065}
1066EXPORT_SYMBOL_GPL(nfs_pageio_reset_write_mds);
1040 1067
1041 if (wsize < PAGE_CACHE_SIZE) 1068static void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
1042 nfs_pageio_init(pgio, inode, nfs_flush_multi, wsize, ioflags); 1069 struct inode *inode, int ioflags)
1043 else 1070{
1044 nfs_pageio_init(pgio, inode, nfs_flush_one, wsize, ioflags); 1071 if (!pnfs_pageio_init_write(pgio, inode, ioflags))
1072 nfs_pageio_init_write_mds(pgio, inode, ioflags);
1045} 1073}
1046 1074
1047/* 1075/*
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 504b289ba680..a3c4bc800dce 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -563,6 +563,9 @@ enum {
563 NFSPROC4_CLNT_GETDEVICEINFO, 563 NFSPROC4_CLNT_GETDEVICEINFO,
564 NFSPROC4_CLNT_LAYOUTCOMMIT, 564 NFSPROC4_CLNT_LAYOUTCOMMIT,
565 NFSPROC4_CLNT_LAYOUTRETURN, 565 NFSPROC4_CLNT_LAYOUTRETURN,
566 NFSPROC4_CLNT_SECINFO_NO_NAME,
567 NFSPROC4_CLNT_TEST_STATEID,
568 NFSPROC4_CLNT_FREE_STATEID,
566}; 569};
567 570
568/* nfs41 types */ 571/* nfs41 types */
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 08c444aa0411..50a661f8b45a 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -16,6 +16,7 @@ struct nfs4_sequence_args;
16struct nfs4_sequence_res; 16struct nfs4_sequence_res;
17struct nfs_server; 17struct nfs_server;
18struct nfs4_minor_version_ops; 18struct nfs4_minor_version_ops;
19struct server_scope;
19 20
20/* 21/*
21 * The nfs_client identifies our client state to the server. 22 * The nfs_client identifies our client state to the server.
@@ -77,12 +78,13 @@ struct nfs_client {
77 /* The flags used for obtaining the clientid during EXCHANGE_ID */ 78 /* The flags used for obtaining the clientid during EXCHANGE_ID */
78 u32 cl_exchange_flags; 79 u32 cl_exchange_flags;
79 struct nfs4_session *cl_session; /* sharred session */ 80 struct nfs4_session *cl_session; /* sharred session */
80 struct list_head cl_layouts;
81#endif /* CONFIG_NFS_V4 */ 81#endif /* CONFIG_NFS_V4 */
82 82
83#ifdef CONFIG_NFS_FSCACHE 83#ifdef CONFIG_NFS_FSCACHE
84 struct fscache_cookie *fscache; /* client index cache cookie */ 84 struct fscache_cookie *fscache; /* client index cache cookie */
85#endif 85#endif
86
87 struct server_scope *server_scope; /* from exchange_id */
86}; 88};
87 89
88/* 90/*
@@ -149,6 +151,7 @@ struct nfs_server {
149 struct rb_root openowner_id; 151 struct rb_root openowner_id;
150 struct rb_root lockowner_id; 152 struct rb_root lockowner_id;
151#endif 153#endif
154 struct list_head layouts;
152 struct list_head delegations; 155 struct list_head delegations;
153 void (*destroy)(struct nfs_server *); 156 void (*destroy)(struct nfs_server *);
154 157
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 25311b3bedf8..e2791a27a901 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -55,20 +55,28 @@ struct nfs_page {
55 struct nfs_writeverf wb_verf; /* Commit cookie */ 55 struct nfs_writeverf wb_verf; /* Commit cookie */
56}; 56};
57 57
58struct nfs_pageio_descriptor;
59struct nfs_pageio_ops {
60 void (*pg_init)(struct nfs_pageio_descriptor *, struct nfs_page *);
61 bool (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);
62 int (*pg_doio)(struct nfs_pageio_descriptor *);
63};
64
58struct nfs_pageio_descriptor { 65struct nfs_pageio_descriptor {
59 struct list_head pg_list; 66 struct list_head pg_list;
60 unsigned long pg_bytes_written; 67 unsigned long pg_bytes_written;
61 size_t pg_count; 68 size_t pg_count;
62 size_t pg_bsize; 69 size_t pg_bsize;
63 unsigned int pg_base; 70 unsigned int pg_base;
64 char pg_moreio; 71 unsigned char pg_moreio : 1,
72 pg_recoalesce : 1;
65 73
66 struct inode *pg_inode; 74 struct inode *pg_inode;
67 int (*pg_doio)(struct nfs_pageio_descriptor *); 75 const struct nfs_pageio_ops *pg_ops;
68 int pg_ioflags; 76 int pg_ioflags;
69 int pg_error; 77 int pg_error;
78 const struct rpc_call_ops *pg_rpc_callops;
70 struct pnfs_layout_segment *pg_lseg; 79 struct pnfs_layout_segment *pg_lseg;
71 bool (*pg_test)(struct nfs_pageio_descriptor *, struct nfs_page *, struct nfs_page *);
72}; 80};
73 81
74#define NFS_WBACK_BUSY(req) (test_bit(PG_BUSY,&(req)->wb_flags)) 82#define NFS_WBACK_BUSY(req) (test_bit(PG_BUSY,&(req)->wb_flags))
@@ -85,7 +93,7 @@ extern int nfs_scan_list(struct nfs_inode *nfsi, struct list_head *dst,
85 pgoff_t idx_start, unsigned int npages, int tag); 93 pgoff_t idx_start, unsigned int npages, int tag);
86extern void nfs_pageio_init(struct nfs_pageio_descriptor *desc, 94extern void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
87 struct inode *inode, 95 struct inode *inode,
88 int (*doio)(struct nfs_pageio_descriptor *desc), 96 const struct nfs_pageio_ops *pg_ops,
89 size_t bsize, 97 size_t bsize,
90 int how); 98 int how);
91extern int nfs_pageio_add_request(struct nfs_pageio_descriptor *, 99extern int nfs_pageio_add_request(struct nfs_pageio_descriptor *,
@@ -100,7 +108,6 @@ extern void nfs_unlock_request(struct nfs_page *req);
100extern int nfs_set_page_tag_locked(struct nfs_page *req); 108extern int nfs_set_page_tag_locked(struct nfs_page *req);
101extern void nfs_clear_page_tag_locked(struct nfs_page *req); 109extern void nfs_clear_page_tag_locked(struct nfs_page *req);
102 110
103
104/* 111/*
105 * Lock the page of an asynchronous request without getting a new reference 112 * Lock the page of an asynchronous request without getting a new reference
106 */ 113 */
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 00848d86ffb2..5b115956abac 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -269,9 +269,10 @@ struct nfs4_layoutcommit_data {
269}; 269};
270 270
271struct nfs4_layoutreturn_args { 271struct nfs4_layoutreturn_args {
272 __u32 layout_type; 272 struct pnfs_layout_hdr *layout;
273 struct inode *inode; 273 struct inode *inode;
274 nfs4_stateid stateid; 274 nfs4_stateid stateid;
275 __u32 layout_type;
275 struct nfs4_sequence_args seq_args; 276 struct nfs4_sequence_args seq_args;
276}; 277};
277 278
@@ -1060,6 +1061,7 @@ struct server_scope {
1060struct nfs41_exchange_id_res { 1061struct nfs41_exchange_id_res {
1061 struct nfs_client *client; 1062 struct nfs_client *client;
1062 u32 flags; 1063 u32 flags;
1064 struct server_scope *server_scope;
1063}; 1065};
1064 1066
1065struct nfs41_create_session_args { 1067struct nfs41_create_session_args {
@@ -1083,6 +1085,34 @@ struct nfs41_reclaim_complete_args {
1083struct nfs41_reclaim_complete_res { 1085struct nfs41_reclaim_complete_res {
1084 struct nfs4_sequence_res seq_res; 1086 struct nfs4_sequence_res seq_res;
1085}; 1087};
1088
1089#define SECINFO_STYLE_CURRENT_FH 0
1090#define SECINFO_STYLE_PARENT 1
1091struct nfs41_secinfo_no_name_args {
1092 int style;
1093 struct nfs4_sequence_args seq_args;
1094};
1095
1096struct nfs41_test_stateid_args {
1097 nfs4_stateid *stateid;
1098 struct nfs4_sequence_args seq_args;
1099};
1100
1101struct nfs41_test_stateid_res {
1102 unsigned int status;
1103 struct nfs4_sequence_res seq_res;
1104};
1105
1106struct nfs41_free_stateid_args {
1107 nfs4_stateid *stateid;
1108 struct nfs4_sequence_args seq_args;
1109};
1110
1111struct nfs41_free_stateid_res {
1112 unsigned int status;
1113 struct nfs4_sequence_res seq_res;
1114};
1115
1086#endif /* CONFIG_NFS_V4_1 */ 1116#endif /* CONFIG_NFS_V4_1 */
1087 1117
1088struct nfs_page; 1118struct nfs_page;
@@ -1096,6 +1126,7 @@ struct nfs_read_data {
1096 struct rpc_cred *cred; 1126 struct rpc_cred *cred;
1097 struct nfs_fattr fattr; /* fattr storage */ 1127 struct nfs_fattr fattr; /* fattr storage */
1098 struct list_head pages; /* Coalesced read requests */ 1128 struct list_head pages; /* Coalesced read requests */
1129 struct list_head list; /* lists of struct nfs_read_data */
1099 struct nfs_page *req; /* multi ops per nfs_page */ 1130 struct nfs_page *req; /* multi ops per nfs_page */
1100 struct page **pagevec; 1131 struct page **pagevec;
1101 unsigned int npages; /* Max length of pagevec */ 1132 unsigned int npages; /* Max length of pagevec */
@@ -1119,6 +1150,7 @@ struct nfs_write_data {
1119 struct nfs_fattr fattr; 1150 struct nfs_fattr fattr;
1120 struct nfs_writeverf verf; 1151 struct nfs_writeverf verf;
1121 struct list_head pages; /* Coalesced requests we wish to flush */ 1152 struct list_head pages; /* Coalesced requests we wish to flush */
1153 struct list_head list; /* lists of struct nfs_write_data */
1122 struct nfs_page *req; /* multi ops per nfs_page */ 1154 struct nfs_page *req; /* multi ops per nfs_page */
1123 struct page **pagevec; 1155 struct page **pagevec;
1124 unsigned int npages; /* Max length of pagevec */ 1156 unsigned int npages; /* Max length of pagevec */
diff --git a/include/linux/pnfs_osd_xdr.h b/include/linux/pnfs_osd_xdr.h
index 76efbdd01622..435dd5fa7453 100644
--- a/include/linux/pnfs_osd_xdr.h
+++ b/include/linux/pnfs_osd_xdr.h
@@ -41,9 +41,6 @@
41 41
42#include <linux/nfs_fs.h> 42#include <linux/nfs_fs.h>
43#include <linux/nfs_page.h> 43#include <linux/nfs_page.h>
44#include <scsi/osd_protocol.h>
45
46#define PNFS_OSD_OSDNAME_MAXSIZE 256
47 44
48/* 45/*
49 * draft-ietf-nfsv4-minorversion-22 46 * draft-ietf-nfsv4-minorversion-22
@@ -99,12 +96,6 @@ struct pnfs_osd_objid {
99#define _DEVID_HI(oid_device_id) \ 96#define _DEVID_HI(oid_device_id) \
100 (unsigned long long)be64_to_cpup(((__be64 *)(oid_device_id)->data) + 1) 97 (unsigned long long)be64_to_cpup(((__be64 *)(oid_device_id)->data) + 1)
101 98
102static inline int
103pnfs_osd_objid_xdr_sz(void)
104{
105 return (NFS4_DEVICEID4_SIZE / 4) + 2 + 2;
106}
107
108enum pnfs_osd_version { 99enum pnfs_osd_version {
109 PNFS_OSD_MISSING = 0, 100 PNFS_OSD_MISSING = 0,
110 PNFS_OSD_VERSION_1 = 1, 101 PNFS_OSD_VERSION_1 = 1,
@@ -189,8 +180,6 @@ struct pnfs_osd_targetid {
189 struct nfs4_string oti_scsi_device_id; 180 struct nfs4_string oti_scsi_device_id;
190}; 181};
191 182
192enum { PNFS_OSD_TARGETID_MAX = 1 + PNFS_OSD_OSDNAME_MAXSIZE / 4 };
193
194/* struct netaddr4 { 183/* struct netaddr4 {
195 * // see struct rpcb in RFC1833 184 * // see struct rpcb in RFC1833
196 * string r_netid<>; // network id 185 * string r_netid<>; // network id
@@ -207,12 +196,6 @@ struct pnfs_osd_targetaddr {
207 struct pnfs_osd_net_addr ota_netaddr; 196 struct pnfs_osd_net_addr ota_netaddr;
208}; 197};
209 198
210enum {
211 NETWORK_ID_MAX = 16 / 4,
212 UNIVERSAL_ADDRESS_MAX = 64 / 4,
213 PNFS_OSD_TARGETADDR_MAX = 3 + NETWORK_ID_MAX + UNIVERSAL_ADDRESS_MAX,
214};
215
216struct pnfs_osd_deviceaddr { 199struct pnfs_osd_deviceaddr {
217 struct pnfs_osd_targetid oda_targetid; 200 struct pnfs_osd_targetid oda_targetid;
218 struct pnfs_osd_targetaddr oda_targetaddr; 201 struct pnfs_osd_targetaddr oda_targetaddr;
@@ -222,15 +205,6 @@ struct pnfs_osd_deviceaddr {
222 struct nfs4_string oda_osdname; 205 struct nfs4_string oda_osdname;
223}; 206};
224 207
225enum {
226 ODA_OSDNAME_MAX = PNFS_OSD_OSDNAME_MAXSIZE / 4,
227 PNFS_OSD_DEVICEADDR_MAX =
228 PNFS_OSD_TARGETID_MAX + PNFS_OSD_TARGETADDR_MAX +
229 2 /*oda_lun*/ +
230 1 + OSD_SYSTEMID_LEN +
231 1 + ODA_OSDNAME_MAX,
232};
233
234/* LAYOUTCOMMIT: layoutupdate */ 208/* LAYOUTCOMMIT: layoutupdate */
235 209
236/* union pnfs_osd_deltaspaceused4 switch (bool dsu_valid) { 210/* union pnfs_osd_deltaspaceused4 switch (bool dsu_valid) {
@@ -279,7 +253,7 @@ struct pnfs_osd_ioerr {
279 u32 oer_errno; 253 u32 oer_errno;
280}; 254};
281 255
282/* OSD XDR API */ 256/* OSD XDR Client API */
283/* Layout helpers */ 257/* Layout helpers */
284/* Layout decoding is done in two parts: 258/* Layout decoding is done in two parts:
285 * 1. First Call pnfs_osd_xdr_decode_layout_map to read in only the header part 259 * 1. First Call pnfs_osd_xdr_decode_layout_map to read in only the header part
@@ -337,8 +311,7 @@ extern int
337pnfs_osd_xdr_encode_layoutupdate(struct xdr_stream *xdr, 311pnfs_osd_xdr_encode_layoutupdate(struct xdr_stream *xdr,
338 struct pnfs_osd_layoutupdate *lou); 312 struct pnfs_osd_layoutupdate *lou);
339 313
340/* osd_ioerror encoding/decoding (layout_return) */ 314/* osd_ioerror encoding (layout_return) */
341/* Client */
342extern __be32 *pnfs_osd_xdr_ioerr_reserve_space(struct xdr_stream *xdr); 315extern __be32 *pnfs_osd_xdr_ioerr_reserve_space(struct xdr_stream *xdr);
343extern void pnfs_osd_xdr_encode_ioerr(__be32 *p, struct pnfs_osd_ioerr *ioerr); 316extern void pnfs_osd_xdr_encode_ioerr(__be32 *p, struct pnfs_osd_ioerr *ioerr);
344 317
diff --git a/include/linux/sunrpc/bc_xprt.h b/include/linux/sunrpc/bc_xprt.h
index 082884295f80..f7f3ce340c08 100644
--- a/include/linux/sunrpc/bc_xprt.h
+++ b/include/linux/sunrpc/bc_xprt.h
@@ -31,7 +31,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31#include <linux/sunrpc/xprt.h> 31#include <linux/sunrpc/xprt.h>
32#include <linux/sunrpc/sched.h> 32#include <linux/sunrpc/sched.h>
33 33
34#ifdef CONFIG_NFS_V4_1 34#ifdef CONFIG_SUNRPC_BACKCHANNEL
35struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt); 35struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt);
36void xprt_free_bc_request(struct rpc_rqst *req); 36void xprt_free_bc_request(struct rpc_rqst *req);
37int xprt_setup_backchannel(struct rpc_xprt *, unsigned int min_reqs); 37int xprt_setup_backchannel(struct rpc_xprt *, unsigned int min_reqs);
@@ -47,7 +47,7 @@ static inline int svc_is_backchannel(const struct svc_rqst *rqstp)
47 return 1; 47 return 1;
48 return 0; 48 return 0;
49} 49}
50#else /* CONFIG_NFS_V4_1 */ 50#else /* CONFIG_SUNRPC_BACKCHANNEL */
51static inline int xprt_setup_backchannel(struct rpc_xprt *xprt, 51static inline int xprt_setup_backchannel(struct rpc_xprt *xprt,
52 unsigned int min_reqs) 52 unsigned int min_reqs)
53{ 53{
@@ -62,6 +62,6 @@ static inline int svc_is_backchannel(const struct svc_rqst *rqstp)
62static inline void xprt_free_bc_request(struct rpc_rqst *req) 62static inline void xprt_free_bc_request(struct rpc_rqst *req)
63{ 63{
64} 64}
65#endif /* CONFIG_NFS_V4_1 */ 65#endif /* CONFIG_SUNRPC_BACKCHANNEL */
66#endif /* _LINUX_SUNRPC_BC_XPRT_H */ 66#endif /* _LINUX_SUNRPC_BC_XPRT_H */
67 67
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index fe2d8e6b923b..e7756896f3ca 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -227,6 +227,10 @@ void rpc_init_wait_queue(struct rpc_wait_queue *, const char *);
227void rpc_destroy_wait_queue(struct rpc_wait_queue *); 227void rpc_destroy_wait_queue(struct rpc_wait_queue *);
228void rpc_sleep_on(struct rpc_wait_queue *, struct rpc_task *, 228void rpc_sleep_on(struct rpc_wait_queue *, struct rpc_task *,
229 rpc_action action); 229 rpc_action action);
230void rpc_sleep_on_priority(struct rpc_wait_queue *,
231 struct rpc_task *,
232 rpc_action action,
233 int priority);
230void rpc_wake_up_queued_task(struct rpc_wait_queue *, 234void rpc_wake_up_queued_task(struct rpc_wait_queue *,
231 struct rpc_task *); 235 struct rpc_task *);
232void rpc_wake_up(struct rpc_wait_queue *); 236void rpc_wake_up(struct rpc_wait_queue *);
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 2f1e5186e049..223588a976a0 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -92,7 +92,7 @@ struct svc_serv {
92 struct module * sv_module; /* optional module to count when 92 struct module * sv_module; /* optional module to count when
93 * adding threads */ 93 * adding threads */
94 svc_thread_fn sv_function; /* main function for threads */ 94 svc_thread_fn sv_function; /* main function for threads */
95#if defined(CONFIG_NFS_V4_1) 95#if defined(CONFIG_SUNRPC_BACKCHANNEL)
96 struct list_head sv_cb_list; /* queue for callback requests 96 struct list_head sv_cb_list; /* queue for callback requests
97 * that arrive over the same 97 * that arrive over the same
98 * connection */ 98 * connection */
@@ -100,7 +100,7 @@ struct svc_serv {
100 wait_queue_head_t sv_cb_waitq; /* sleep here if there are no 100 wait_queue_head_t sv_cb_waitq; /* sleep here if there are no
101 * entries in the svc_cb_list */ 101 * entries in the svc_cb_list */
102 struct svc_xprt *sv_bc_xprt; /* callback on fore channel */ 102 struct svc_xprt *sv_bc_xprt; /* callback on fore channel */
103#endif /* CONFIG_NFS_V4_1 */ 103#endif /* CONFIG_SUNRPC_BACKCHANNEL */
104}; 104};
105 105
106/* 106/*
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index 81cce3b3ee66..15518a152ac3 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -22,6 +22,7 @@
22#define RPC_MIN_SLOT_TABLE (2U) 22#define RPC_MIN_SLOT_TABLE (2U)
23#define RPC_DEF_SLOT_TABLE (16U) 23#define RPC_DEF_SLOT_TABLE (16U)
24#define RPC_MAX_SLOT_TABLE (128U) 24#define RPC_MAX_SLOT_TABLE (128U)
25#define RPC_MAX_SLOT_TABLE_LIMIT (65536U)
25 26
26/* 27/*
27 * This describes a timeout strategy 28 * This describes a timeout strategy
@@ -100,18 +101,18 @@ struct rpc_rqst {
100 ktime_t rq_xtime; /* transmit time stamp */ 101 ktime_t rq_xtime; /* transmit time stamp */
101 int rq_ntrans; 102 int rq_ntrans;
102 103
103#if defined(CONFIG_NFS_V4_1) 104#if defined(CONFIG_SUNRPC_BACKCHANNEL)
104 struct list_head rq_bc_list; /* Callback service list */ 105 struct list_head rq_bc_list; /* Callback service list */
105 unsigned long rq_bc_pa_state; /* Backchannel prealloc state */ 106 unsigned long rq_bc_pa_state; /* Backchannel prealloc state */
106 struct list_head rq_bc_pa_list; /* Backchannel prealloc list */ 107 struct list_head rq_bc_pa_list; /* Backchannel prealloc list */
107#endif /* CONFIG_NFS_V4_1 */ 108#endif /* CONFIG_SUNRPC_BACKCHANEL */
108}; 109};
109#define rq_svec rq_snd_buf.head 110#define rq_svec rq_snd_buf.head
110#define rq_slen rq_snd_buf.len 111#define rq_slen rq_snd_buf.len
111 112
112struct rpc_xprt_ops { 113struct rpc_xprt_ops {
113 void (*set_buffer_size)(struct rpc_xprt *xprt, size_t sndsize, size_t rcvsize); 114 void (*set_buffer_size)(struct rpc_xprt *xprt, size_t sndsize, size_t rcvsize);
114 int (*reserve_xprt)(struct rpc_task *task); 115 int (*reserve_xprt)(struct rpc_xprt *xprt, struct rpc_task *task);
115 void (*release_xprt)(struct rpc_xprt *xprt, struct rpc_task *task); 116 void (*release_xprt)(struct rpc_xprt *xprt, struct rpc_task *task);
116 void (*rpcbind)(struct rpc_task *task); 117 void (*rpcbind)(struct rpc_task *task);
117 void (*set_port)(struct rpc_xprt *xprt, unsigned short port); 118 void (*set_port)(struct rpc_xprt *xprt, unsigned short port);
@@ -164,12 +165,12 @@ struct rpc_xprt {
164 165
165 struct rpc_wait_queue binding; /* requests waiting on rpcbind */ 166 struct rpc_wait_queue binding; /* requests waiting on rpcbind */
166 struct rpc_wait_queue sending; /* requests waiting to send */ 167 struct rpc_wait_queue sending; /* requests waiting to send */
167 struct rpc_wait_queue resend; /* requests waiting to resend */
168 struct rpc_wait_queue pending; /* requests in flight */ 168 struct rpc_wait_queue pending; /* requests in flight */
169 struct rpc_wait_queue backlog; /* waiting for slot */ 169 struct rpc_wait_queue backlog; /* waiting for slot */
170 struct list_head free; /* free slots */ 170 struct list_head free; /* free slots */
171 struct rpc_rqst * slot; /* slot table storage */ 171 unsigned int max_reqs; /* max number of slots */
172 unsigned int max_reqs; /* total slots */ 172 unsigned int min_reqs; /* min number of slots */
173 atomic_t num_reqs; /* total slots */
173 unsigned long state; /* transport state */ 174 unsigned long state; /* transport state */
174 unsigned char shutdown : 1, /* being shut down */ 175 unsigned char shutdown : 1, /* being shut down */
175 resvport : 1; /* use a reserved port */ 176 resvport : 1; /* use a reserved port */
@@ -200,7 +201,7 @@ struct rpc_xprt {
200 u32 xid; /* Next XID value to use */ 201 u32 xid; /* Next XID value to use */
201 struct rpc_task * snd_task; /* Task blocked in send */ 202 struct rpc_task * snd_task; /* Task blocked in send */
202 struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */ 203 struct svc_xprt *bc_xprt; /* NFSv4.1 backchannel */
203#if defined(CONFIG_NFS_V4_1) 204#if defined(CONFIG_SUNRPC_BACKCHANNEL)
204 struct svc_serv *bc_serv; /* The RPC service which will */ 205 struct svc_serv *bc_serv; /* The RPC service which will */
205 /* process the callback */ 206 /* process the callback */
206 unsigned int bc_alloc_count; /* Total number of preallocs */ 207 unsigned int bc_alloc_count; /* Total number of preallocs */
@@ -208,7 +209,7 @@ struct rpc_xprt {
208 * items */ 209 * items */
209 struct list_head bc_pa_list; /* List of preallocated 210 struct list_head bc_pa_list; /* List of preallocated
210 * backchannel rpc_rqst's */ 211 * backchannel rpc_rqst's */
211#endif /* CONFIG_NFS_V4_1 */ 212#endif /* CONFIG_SUNRPC_BACKCHANNEL */
212 struct list_head recv; 213 struct list_head recv;
213 214
214 struct { 215 struct {
@@ -228,15 +229,15 @@ struct rpc_xprt {
228 const char *address_strings[RPC_DISPLAY_MAX]; 229 const char *address_strings[RPC_DISPLAY_MAX];
229}; 230};
230 231
231#if defined(CONFIG_NFS_V4_1) 232#if defined(CONFIG_SUNRPC_BACKCHANNEL)
232/* 233/*
233 * Backchannel flags 234 * Backchannel flags
234 */ 235 */
235#define RPC_BC_PA_IN_USE 0x0001 /* Preallocated backchannel */ 236#define RPC_BC_PA_IN_USE 0x0001 /* Preallocated backchannel */
236 /* buffer in use */ 237 /* buffer in use */
237#endif /* CONFIG_NFS_V4_1 */ 238#endif /* CONFIG_SUNRPC_BACKCHANNEL */
238 239
239#if defined(CONFIG_NFS_V4_1) 240#if defined(CONFIG_SUNRPC_BACKCHANNEL)
240static inline int bc_prealloc(struct rpc_rqst *req) 241static inline int bc_prealloc(struct rpc_rqst *req)
241{ 242{
242 return test_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state); 243 return test_bit(RPC_BC_PA_IN_USE, &req->rq_bc_pa_state);
@@ -246,7 +247,7 @@ static inline int bc_prealloc(struct rpc_rqst *req)
246{ 247{
247 return 0; 248 return 0;
248} 249}
249#endif /* CONFIG_NFS_V4_1 */ 250#endif /* CONFIG_SUNRPC_BACKCHANNEL */
250 251
251struct xprt_create { 252struct xprt_create {
252 int ident; /* XPRT_TRANSPORT identifier */ 253 int ident; /* XPRT_TRANSPORT identifier */
@@ -271,8 +272,8 @@ struct xprt_class {
271struct rpc_xprt *xprt_create_transport(struct xprt_create *args); 272struct rpc_xprt *xprt_create_transport(struct xprt_create *args);
272void xprt_connect(struct rpc_task *task); 273void xprt_connect(struct rpc_task *task);
273void xprt_reserve(struct rpc_task *task); 274void xprt_reserve(struct rpc_task *task);
274int xprt_reserve_xprt(struct rpc_task *task); 275int xprt_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task);
275int xprt_reserve_xprt_cong(struct rpc_task *task); 276int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task);
276int xprt_prepare_transmit(struct rpc_task *task); 277int xprt_prepare_transmit(struct rpc_task *task);
277void xprt_transmit(struct rpc_task *task); 278void xprt_transmit(struct rpc_task *task);
278void xprt_end_transmit(struct rpc_task *task); 279void xprt_end_transmit(struct rpc_task *task);
@@ -282,7 +283,9 @@ void xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task);
282void xprt_release(struct rpc_task *task); 283void xprt_release(struct rpc_task *task);
283struct rpc_xprt * xprt_get(struct rpc_xprt *xprt); 284struct rpc_xprt * xprt_get(struct rpc_xprt *xprt);
284void xprt_put(struct rpc_xprt *xprt); 285void xprt_put(struct rpc_xprt *xprt);
285struct rpc_xprt * xprt_alloc(struct net *net, int size, int max_req); 286struct rpc_xprt * xprt_alloc(struct net *net, size_t size,
287 unsigned int num_prealloc,
288 unsigned int max_req);
286void xprt_free(struct rpc_xprt *); 289void xprt_free(struct rpc_xprt *);
287 290
288static inline __be32 *xprt_skip_transport_header(struct rpc_xprt *xprt, __be32 *p) 291static inline __be32 *xprt_skip_transport_header(struct rpc_xprt *xprt, __be32 *p)
@@ -321,7 +324,6 @@ void xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie);
321#define XPRT_CLOSING (6) 324#define XPRT_CLOSING (6)
322#define XPRT_CONNECTION_ABORT (7) 325#define XPRT_CONNECTION_ABORT (7)
323#define XPRT_CONNECTION_CLOSE (8) 326#define XPRT_CONNECTION_CLOSE (8)
324#define XPRT_INITIALIZED (9)
325 327
326static inline void xprt_set_connected(struct rpc_xprt *xprt) 328static inline void xprt_set_connected(struct rpc_xprt *xprt)
327{ 329{
diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig
index b2198e65d8bb..ffd243d09188 100644
--- a/net/sunrpc/Kconfig
+++ b/net/sunrpc/Kconfig
@@ -4,6 +4,10 @@ config SUNRPC
4config SUNRPC_GSS 4config SUNRPC_GSS
5 tristate 5 tristate
6 6
7config SUNRPC_BACKCHANNEL
8 bool
9 depends on SUNRPC
10
7config SUNRPC_XPRT_RDMA 11config SUNRPC_XPRT_RDMA
8 tristate 12 tristate
9 depends on SUNRPC && INFINIBAND && INFINIBAND_ADDR_TRANS && EXPERIMENTAL 13 depends on SUNRPC && INFINIBAND && INFINIBAND_ADDR_TRANS && EXPERIMENTAL
diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile
index 9d2fca5ad14a..8209a0411bca 100644
--- a/net/sunrpc/Makefile
+++ b/net/sunrpc/Makefile
@@ -13,6 +13,6 @@ sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \
13 addr.o rpcb_clnt.o timer.o xdr.o \ 13 addr.o rpcb_clnt.o timer.o xdr.o \
14 sunrpc_syms.o cache.o rpc_pipe.o \ 14 sunrpc_syms.o cache.o rpc_pipe.o \
15 svc_xprt.o 15 svc_xprt.o
16sunrpc-$(CONFIG_NFS_V4_1) += backchannel_rqst.o bc_svc.o 16sunrpc-$(CONFIG_SUNRPC_BACKCHANNEL) += backchannel_rqst.o bc_svc.o
17sunrpc-$(CONFIG_PROC_FS) += stats.o 17sunrpc-$(CONFIG_PROC_FS) += stats.o
18sunrpc-$(CONFIG_SYSCTL) += sysctl.o 18sunrpc-$(CONFIG_SYSCTL) += sysctl.o
diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c
index cf06af3b63c6..91eaa26e4c42 100644
--- a/net/sunrpc/backchannel_rqst.c
+++ b/net/sunrpc/backchannel_rqst.c
@@ -29,8 +29,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29#define RPCDBG_FACILITY RPCDBG_TRANS 29#define RPCDBG_FACILITY RPCDBG_TRANS
30#endif 30#endif
31 31
32#if defined(CONFIG_NFS_V4_1)
33
34/* 32/*
35 * Helper routines that track the number of preallocation elements 33 * Helper routines that track the number of preallocation elements
36 * on the transport. 34 * on the transport.
@@ -174,7 +172,7 @@ out_free:
174 dprintk("RPC: setup backchannel transport failed\n"); 172 dprintk("RPC: setup backchannel transport failed\n");
175 return -1; 173 return -1;
176} 174}
177EXPORT_SYMBOL(xprt_setup_backchannel); 175EXPORT_SYMBOL_GPL(xprt_setup_backchannel);
178 176
179/* 177/*
180 * Destroys the backchannel preallocated structures. 178 * Destroys the backchannel preallocated structures.
@@ -204,7 +202,7 @@ void xprt_destroy_backchannel(struct rpc_xprt *xprt, unsigned int max_reqs)
204 dprintk("RPC: backchannel list empty= %s\n", 202 dprintk("RPC: backchannel list empty= %s\n",
205 list_empty(&xprt->bc_pa_list) ? "true" : "false"); 203 list_empty(&xprt->bc_pa_list) ? "true" : "false");
206} 204}
207EXPORT_SYMBOL(xprt_destroy_backchannel); 205EXPORT_SYMBOL_GPL(xprt_destroy_backchannel);
208 206
209/* 207/*
210 * One or more rpc_rqst structure have been preallocated during the 208 * One or more rpc_rqst structure have been preallocated during the
@@ -279,4 +277,3 @@ void xprt_free_bc_request(struct rpc_rqst *req)
279 spin_unlock_bh(&xprt->bc_pa_lock); 277 spin_unlock_bh(&xprt->bc_pa_lock);
280} 278}
281 279
282#endif /* CONFIG_NFS_V4_1 */
diff --git a/net/sunrpc/bc_svc.c b/net/sunrpc/bc_svc.c
index 1dd1a6890007..0b2eb388cbda 100644
--- a/net/sunrpc/bc_svc.c
+++ b/net/sunrpc/bc_svc.c
@@ -27,8 +27,6 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 * reply over an existing open connection previously established by the client. 27 * reply over an existing open connection previously established by the client.
28 */ 28 */
29 29
30#if defined(CONFIG_NFS_V4_1)
31
32#include <linux/module.h> 30#include <linux/module.h>
33 31
34#include <linux/sunrpc/xprt.h> 32#include <linux/sunrpc/xprt.h>
@@ -63,4 +61,3 @@ int bc_send(struct rpc_rqst *req)
63 return ret; 61 return ret;
64} 62}
65 63
66#endif /* CONFIG_NFS_V4_1 */
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index c50818f0473b..c5347d29cfb7 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -64,9 +64,9 @@ static void call_decode(struct rpc_task *task);
64static void call_bind(struct rpc_task *task); 64static void call_bind(struct rpc_task *task);
65static void call_bind_status(struct rpc_task *task); 65static void call_bind_status(struct rpc_task *task);
66static void call_transmit(struct rpc_task *task); 66static void call_transmit(struct rpc_task *task);
67#if defined(CONFIG_NFS_V4_1) 67#if defined(CONFIG_SUNRPC_BACKCHANNEL)
68static void call_bc_transmit(struct rpc_task *task); 68static void call_bc_transmit(struct rpc_task *task);
69#endif /* CONFIG_NFS_V4_1 */ 69#endif /* CONFIG_SUNRPC_BACKCHANNEL */
70static void call_status(struct rpc_task *task); 70static void call_status(struct rpc_task *task);
71static void call_transmit_status(struct rpc_task *task); 71static void call_transmit_status(struct rpc_task *task);
72static void call_refresh(struct rpc_task *task); 72static void call_refresh(struct rpc_task *task);
@@ -715,7 +715,7 @@ rpc_call_async(struct rpc_clnt *clnt, const struct rpc_message *msg, int flags,
715} 715}
716EXPORT_SYMBOL_GPL(rpc_call_async); 716EXPORT_SYMBOL_GPL(rpc_call_async);
717 717
718#if defined(CONFIG_NFS_V4_1) 718#if defined(CONFIG_SUNRPC_BACKCHANNEL)
719/** 719/**
720 * rpc_run_bc_task - Allocate a new RPC task for backchannel use, then run 720 * rpc_run_bc_task - Allocate a new RPC task for backchannel use, then run
721 * rpc_execute against it 721 * rpc_execute against it
@@ -758,7 +758,7 @@ out:
758 dprintk("RPC: rpc_run_bc_task: task= %p\n", task); 758 dprintk("RPC: rpc_run_bc_task: task= %p\n", task);
759 return task; 759 return task;
760} 760}
761#endif /* CONFIG_NFS_V4_1 */ 761#endif /* CONFIG_SUNRPC_BACKCHANNEL */
762 762
763void 763void
764rpc_call_start(struct rpc_task *task) 764rpc_call_start(struct rpc_task *task)
@@ -1361,7 +1361,7 @@ call_transmit_status(struct rpc_task *task)
1361 } 1361 }
1362} 1362}
1363 1363
1364#if defined(CONFIG_NFS_V4_1) 1364#if defined(CONFIG_SUNRPC_BACKCHANNEL)
1365/* 1365/*
1366 * 5b. Send the backchannel RPC reply. On error, drop the reply. In 1366 * 5b. Send the backchannel RPC reply. On error, drop the reply. In
1367 * addition, disconnect on connectivity errors. 1367 * addition, disconnect on connectivity errors.
@@ -1425,7 +1425,7 @@ call_bc_transmit(struct rpc_task *task)
1425 } 1425 }
1426 rpc_wake_up_queued_task(&req->rq_xprt->pending, task); 1426 rpc_wake_up_queued_task(&req->rq_xprt->pending, task);
1427} 1427}
1428#endif /* CONFIG_NFS_V4_1 */ 1428#endif /* CONFIG_SUNRPC_BACKCHANNEL */
1429 1429
1430/* 1430/*
1431 * 6. Sort out the RPC call status 1431 * 6. Sort out the RPC call status
@@ -1550,8 +1550,7 @@ call_decode(struct rpc_task *task)
1550 kxdrdproc_t decode = task->tk_msg.rpc_proc->p_decode; 1550 kxdrdproc_t decode = task->tk_msg.rpc_proc->p_decode;
1551 __be32 *p; 1551 __be32 *p;
1552 1552
1553 dprintk("RPC: %5u call_decode (status %d)\n", 1553 dprint_status(task);
1554 task->tk_pid, task->tk_status);
1555 1554
1556 if (task->tk_flags & RPC_CALL_MAJORSEEN) { 1555 if (task->tk_flags & RPC_CALL_MAJORSEEN) {
1557 if (clnt->cl_chatty) 1556 if (clnt->cl_chatty)
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 4814e246a874..d12ffa545811 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -97,14 +97,16 @@ __rpc_add_timer(struct rpc_wait_queue *queue, struct rpc_task *task)
97/* 97/*
98 * Add new request to a priority queue. 98 * Add new request to a priority queue.
99 */ 99 */
100static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue, struct rpc_task *task) 100static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue,
101 struct rpc_task *task,
102 unsigned char queue_priority)
101{ 103{
102 struct list_head *q; 104 struct list_head *q;
103 struct rpc_task *t; 105 struct rpc_task *t;
104 106
105 INIT_LIST_HEAD(&task->u.tk_wait.links); 107 INIT_LIST_HEAD(&task->u.tk_wait.links);
106 q = &queue->tasks[task->tk_priority]; 108 q = &queue->tasks[queue_priority];
107 if (unlikely(task->tk_priority > queue->maxpriority)) 109 if (unlikely(queue_priority > queue->maxpriority))
108 q = &queue->tasks[queue->maxpriority]; 110 q = &queue->tasks[queue->maxpriority];
109 list_for_each_entry(t, q, u.tk_wait.list) { 111 list_for_each_entry(t, q, u.tk_wait.list) {
110 if (t->tk_owner == task->tk_owner) { 112 if (t->tk_owner == task->tk_owner) {
@@ -123,12 +125,14 @@ static void __rpc_add_wait_queue_priority(struct rpc_wait_queue *queue, struct r
123 * improve overall performance. 125 * improve overall performance.
124 * Everyone else gets appended to the queue to ensure proper FIFO behavior. 126 * Everyone else gets appended to the queue to ensure proper FIFO behavior.
125 */ 127 */
126static void __rpc_add_wait_queue(struct rpc_wait_queue *queue, struct rpc_task *task) 128static void __rpc_add_wait_queue(struct rpc_wait_queue *queue,
129 struct rpc_task *task,
130 unsigned char queue_priority)
127{ 131{
128 BUG_ON (RPC_IS_QUEUED(task)); 132 BUG_ON (RPC_IS_QUEUED(task));
129 133
130 if (RPC_IS_PRIORITY(queue)) 134 if (RPC_IS_PRIORITY(queue))
131 __rpc_add_wait_queue_priority(queue, task); 135 __rpc_add_wait_queue_priority(queue, task, queue_priority);
132 else if (RPC_IS_SWAPPER(task)) 136 else if (RPC_IS_SWAPPER(task))
133 list_add(&task->u.tk_wait.list, &queue->tasks[0]); 137 list_add(&task->u.tk_wait.list, &queue->tasks[0]);
134 else 138 else
@@ -311,13 +315,15 @@ static void rpc_make_runnable(struct rpc_task *task)
311 * NB: An RPC task will only receive interrupt-driven events as long 315 * NB: An RPC task will only receive interrupt-driven events as long
312 * as it's on a wait queue. 316 * as it's on a wait queue.
313 */ 317 */
314static void __rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task, 318static void __rpc_sleep_on_priority(struct rpc_wait_queue *q,
315 rpc_action action) 319 struct rpc_task *task,
320 rpc_action action,
321 unsigned char queue_priority)
316{ 322{
317 dprintk("RPC: %5u sleep_on(queue \"%s\" time %lu)\n", 323 dprintk("RPC: %5u sleep_on(queue \"%s\" time %lu)\n",
318 task->tk_pid, rpc_qname(q), jiffies); 324 task->tk_pid, rpc_qname(q), jiffies);
319 325
320 __rpc_add_wait_queue(q, task); 326 __rpc_add_wait_queue(q, task, queue_priority);
321 327
322 BUG_ON(task->tk_callback != NULL); 328 BUG_ON(task->tk_callback != NULL);
323 task->tk_callback = action; 329 task->tk_callback = action;
@@ -334,11 +340,25 @@ void rpc_sleep_on(struct rpc_wait_queue *q, struct rpc_task *task,
334 * Protect the queue operations. 340 * Protect the queue operations.
335 */ 341 */
336 spin_lock_bh(&q->lock); 342 spin_lock_bh(&q->lock);
337 __rpc_sleep_on(q, task, action); 343 __rpc_sleep_on_priority(q, task, action, task->tk_priority);
338 spin_unlock_bh(&q->lock); 344 spin_unlock_bh(&q->lock);
339} 345}
340EXPORT_SYMBOL_GPL(rpc_sleep_on); 346EXPORT_SYMBOL_GPL(rpc_sleep_on);
341 347
348void rpc_sleep_on_priority(struct rpc_wait_queue *q, struct rpc_task *task,
349 rpc_action action, int priority)
350{
351 /* We shouldn't ever put an inactive task to sleep */
352 BUG_ON(!RPC_IS_ACTIVATED(task));
353
354 /*
355 * Protect the queue operations.
356 */
357 spin_lock_bh(&q->lock);
358 __rpc_sleep_on_priority(q, task, action, priority - RPC_PRIORITY_LOW);
359 spin_unlock_bh(&q->lock);
360}
361
342/** 362/**
343 * __rpc_do_wake_up_task - wake up a single rpc_task 363 * __rpc_do_wake_up_task - wake up a single rpc_task
344 * @queue: wait queue 364 * @queue: wait queue
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 2b90292e9505..6a69a1131fb7 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1252,7 +1252,7 @@ svc_process(struct svc_rqst *rqstp)
1252 } 1252 }
1253} 1253}
1254 1254
1255#if defined(CONFIG_NFS_V4_1) 1255#if defined(CONFIG_SUNRPC_BACKCHANNEL)
1256/* 1256/*
1257 * Process a backchannel RPC request that arrived over an existing 1257 * Process a backchannel RPC request that arrived over an existing
1258 * outbound connection 1258 * outbound connection
@@ -1300,8 +1300,8 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req,
1300 return 0; 1300 return 0;
1301 } 1301 }
1302} 1302}
1303EXPORT_SYMBOL(bc_svc_process); 1303EXPORT_SYMBOL_GPL(bc_svc_process);
1304#endif /* CONFIG_NFS_V4_1 */ 1304#endif /* CONFIG_SUNRPC_BACKCHANNEL */
1305 1305
1306/* 1306/*
1307 * Return (transport-specific) limit on the rpc payload. 1307 * Return (transport-specific) limit on the rpc payload.
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index f2cb5b881dea..767d494de7a2 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -68,12 +68,12 @@ static void svc_sock_free(struct svc_xprt *);
68static struct svc_xprt *svc_create_socket(struct svc_serv *, int, 68static struct svc_xprt *svc_create_socket(struct svc_serv *, int,
69 struct net *, struct sockaddr *, 69 struct net *, struct sockaddr *,
70 int, int); 70 int, int);
71#if defined(CONFIG_NFS_V4_1) 71#if defined(CONFIG_SUNRPC_BACKCHANNEL)
72static struct svc_xprt *svc_bc_create_socket(struct svc_serv *, int, 72static struct svc_xprt *svc_bc_create_socket(struct svc_serv *, int,
73 struct net *, struct sockaddr *, 73 struct net *, struct sockaddr *,
74 int, int); 74 int, int);
75static void svc_bc_sock_free(struct svc_xprt *xprt); 75static void svc_bc_sock_free(struct svc_xprt *xprt);
76#endif /* CONFIG_NFS_V4_1 */ 76#endif /* CONFIG_SUNRPC_BACKCHANNEL */
77 77
78#ifdef CONFIG_DEBUG_LOCK_ALLOC 78#ifdef CONFIG_DEBUG_LOCK_ALLOC
79static struct lock_class_key svc_key[2]; 79static struct lock_class_key svc_key[2];
@@ -1243,7 +1243,7 @@ static struct svc_xprt *svc_tcp_create(struct svc_serv *serv,
1243 return svc_create_socket(serv, IPPROTO_TCP, net, sa, salen, flags); 1243 return svc_create_socket(serv, IPPROTO_TCP, net, sa, salen, flags);
1244} 1244}
1245 1245
1246#if defined(CONFIG_NFS_V4_1) 1246#if defined(CONFIG_SUNRPC_BACKCHANNEL)
1247static struct svc_xprt *svc_bc_create_socket(struct svc_serv *, int, 1247static struct svc_xprt *svc_bc_create_socket(struct svc_serv *, int,
1248 struct net *, struct sockaddr *, 1248 struct net *, struct sockaddr *,
1249 int, int); 1249 int, int);
@@ -1284,7 +1284,7 @@ static void svc_cleanup_bc_xprt_sock(void)
1284{ 1284{
1285 svc_unreg_xprt_class(&svc_tcp_bc_class); 1285 svc_unreg_xprt_class(&svc_tcp_bc_class);
1286} 1286}
1287#else /* CONFIG_NFS_V4_1 */ 1287#else /* CONFIG_SUNRPC_BACKCHANNEL */
1288static void svc_init_bc_xprt_sock(void) 1288static void svc_init_bc_xprt_sock(void)
1289{ 1289{
1290} 1290}
@@ -1292,7 +1292,7 @@ static void svc_init_bc_xprt_sock(void)
1292static void svc_cleanup_bc_xprt_sock(void) 1292static void svc_cleanup_bc_xprt_sock(void)
1293{ 1293{
1294} 1294}
1295#endif /* CONFIG_NFS_V4_1 */ 1295#endif /* CONFIG_SUNRPC_BACKCHANNEL */
1296 1296
1297static struct svc_xprt_ops svc_tcp_ops = { 1297static struct svc_xprt_ops svc_tcp_ops = {
1298 .xpo_create = svc_tcp_create, 1298 .xpo_create = svc_tcp_create,
@@ -1623,7 +1623,7 @@ static void svc_sock_free(struct svc_xprt *xprt)
1623 kfree(svsk); 1623 kfree(svsk);
1624} 1624}
1625 1625
1626#if defined(CONFIG_NFS_V4_1) 1626#if defined(CONFIG_SUNRPC_BACKCHANNEL)
1627/* 1627/*
1628 * Create a back channel svc_xprt which shares the fore channel socket. 1628 * Create a back channel svc_xprt which shares the fore channel socket.
1629 */ 1629 */
@@ -1662,4 +1662,4 @@ static void svc_bc_sock_free(struct svc_xprt *xprt)
1662 if (xprt) 1662 if (xprt)
1663 kfree(container_of(xprt, struct svc_sock, sk_xprt)); 1663 kfree(container_of(xprt, struct svc_sock, sk_xprt));
1664} 1664}
1665#endif /* CONFIG_NFS_V4_1 */ 1665#endif /* CONFIG_SUNRPC_BACKCHANNEL */
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index f008c14ad34c..277ebd4bf095 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -126,7 +126,7 @@ xdr_terminate_string(struct xdr_buf *buf, const u32 len)
126 kaddr[buf->page_base + len] = '\0'; 126 kaddr[buf->page_base + len] = '\0';
127 kunmap_atomic(kaddr, KM_USER0); 127 kunmap_atomic(kaddr, KM_USER0);
128} 128}
129EXPORT_SYMBOL(xdr_terminate_string); 129EXPORT_SYMBOL_GPL(xdr_terminate_string);
130 130
131void 131void
132xdr_encode_pages(struct xdr_buf *xdr, struct page **pages, unsigned int base, 132xdr_encode_pages(struct xdr_buf *xdr, struct page **pages, unsigned int base,
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index ce5eb68a9664..9b6a4d1ea8f8 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -62,6 +62,7 @@
62/* 62/*
63 * Local functions 63 * Local functions
64 */ 64 */
65static void xprt_init(struct rpc_xprt *xprt, struct net *net);
65static void xprt_request_init(struct rpc_task *, struct rpc_xprt *); 66static void xprt_request_init(struct rpc_task *, struct rpc_xprt *);
66static void xprt_connect_status(struct rpc_task *task); 67static void xprt_connect_status(struct rpc_task *task);
67static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *); 68static int __xprt_get_cong(struct rpc_xprt *, struct rpc_task *);
@@ -191,10 +192,10 @@ EXPORT_SYMBOL_GPL(xprt_load_transport);
191 * transport connects from colliding with writes. No congestion control 192 * transport connects from colliding with writes. No congestion control
192 * is provided. 193 * is provided.
193 */ 194 */
194int xprt_reserve_xprt(struct rpc_task *task) 195int xprt_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task)
195{ 196{
196 struct rpc_rqst *req = task->tk_rqstp; 197 struct rpc_rqst *req = task->tk_rqstp;
197 struct rpc_xprt *xprt = req->rq_xprt; 198 int priority;
198 199
199 if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) { 200 if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) {
200 if (task == xprt->snd_task) 201 if (task == xprt->snd_task)
@@ -202,8 +203,10 @@ int xprt_reserve_xprt(struct rpc_task *task)
202 goto out_sleep; 203 goto out_sleep;
203 } 204 }
204 xprt->snd_task = task; 205 xprt->snd_task = task;
205 req->rq_bytes_sent = 0; 206 if (req != NULL) {
206 req->rq_ntrans++; 207 req->rq_bytes_sent = 0;
208 req->rq_ntrans++;
209 }
207 210
208 return 1; 211 return 1;
209 212
@@ -212,10 +215,13 @@ out_sleep:
212 task->tk_pid, xprt); 215 task->tk_pid, xprt);
213 task->tk_timeout = 0; 216 task->tk_timeout = 0;
214 task->tk_status = -EAGAIN; 217 task->tk_status = -EAGAIN;
215 if (req->rq_ntrans) 218 if (req == NULL)
216 rpc_sleep_on(&xprt->resend, task, NULL); 219 priority = RPC_PRIORITY_LOW;
220 else if (!req->rq_ntrans)
221 priority = RPC_PRIORITY_NORMAL;
217 else 222 else
218 rpc_sleep_on(&xprt->sending, task, NULL); 223 priority = RPC_PRIORITY_HIGH;
224 rpc_sleep_on_priority(&xprt->sending, task, NULL, priority);
219 return 0; 225 return 0;
220} 226}
221EXPORT_SYMBOL_GPL(xprt_reserve_xprt); 227EXPORT_SYMBOL_GPL(xprt_reserve_xprt);
@@ -239,22 +245,24 @@ static void xprt_clear_locked(struct rpc_xprt *xprt)
239 * integrated into the decision of whether a request is allowed to be 245 * integrated into the decision of whether a request is allowed to be
240 * woken up and given access to the transport. 246 * woken up and given access to the transport.
241 */ 247 */
242int xprt_reserve_xprt_cong(struct rpc_task *task) 248int xprt_reserve_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task)
243{ 249{
244 struct rpc_xprt *xprt = task->tk_xprt;
245 struct rpc_rqst *req = task->tk_rqstp; 250 struct rpc_rqst *req = task->tk_rqstp;
251 int priority;
246 252
247 if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) { 253 if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) {
248 if (task == xprt->snd_task) 254 if (task == xprt->snd_task)
249 return 1; 255 return 1;
250 goto out_sleep; 256 goto out_sleep;
251 } 257 }
258 if (req == NULL) {
259 xprt->snd_task = task;
260 return 1;
261 }
252 if (__xprt_get_cong(xprt, task)) { 262 if (__xprt_get_cong(xprt, task)) {
253 xprt->snd_task = task; 263 xprt->snd_task = task;
254 if (req) { 264 req->rq_bytes_sent = 0;
255 req->rq_bytes_sent = 0; 265 req->rq_ntrans++;
256 req->rq_ntrans++;
257 }
258 return 1; 266 return 1;
259 } 267 }
260 xprt_clear_locked(xprt); 268 xprt_clear_locked(xprt);
@@ -262,10 +270,13 @@ out_sleep:
262 dprintk("RPC: %5u failed to lock transport %p\n", task->tk_pid, xprt); 270 dprintk("RPC: %5u failed to lock transport %p\n", task->tk_pid, xprt);
263 task->tk_timeout = 0; 271 task->tk_timeout = 0;
264 task->tk_status = -EAGAIN; 272 task->tk_status = -EAGAIN;
265 if (req && req->rq_ntrans) 273 if (req == NULL)
266 rpc_sleep_on(&xprt->resend, task, NULL); 274 priority = RPC_PRIORITY_LOW;
275 else if (!req->rq_ntrans)
276 priority = RPC_PRIORITY_NORMAL;
267 else 277 else
268 rpc_sleep_on(&xprt->sending, task, NULL); 278 priority = RPC_PRIORITY_HIGH;
279 rpc_sleep_on_priority(&xprt->sending, task, NULL, priority);
269 return 0; 280 return 0;
270} 281}
271EXPORT_SYMBOL_GPL(xprt_reserve_xprt_cong); 282EXPORT_SYMBOL_GPL(xprt_reserve_xprt_cong);
@@ -275,7 +286,7 @@ static inline int xprt_lock_write(struct rpc_xprt *xprt, struct rpc_task *task)
275 int retval; 286 int retval;
276 287
277 spin_lock_bh(&xprt->transport_lock); 288 spin_lock_bh(&xprt->transport_lock);
278 retval = xprt->ops->reserve_xprt(task); 289 retval = xprt->ops->reserve_xprt(xprt, task);
279 spin_unlock_bh(&xprt->transport_lock); 290 spin_unlock_bh(&xprt->transport_lock);
280 return retval; 291 return retval;
281} 292}
@@ -288,12 +299,9 @@ static void __xprt_lock_write_next(struct rpc_xprt *xprt)
288 if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) 299 if (test_and_set_bit(XPRT_LOCKED, &xprt->state))
289 return; 300 return;
290 301
291 task = rpc_wake_up_next(&xprt->resend); 302 task = rpc_wake_up_next(&xprt->sending);
292 if (!task) { 303 if (task == NULL)
293 task = rpc_wake_up_next(&xprt->sending); 304 goto out_unlock;
294 if (!task)
295 goto out_unlock;
296 }
297 305
298 req = task->tk_rqstp; 306 req = task->tk_rqstp;
299 xprt->snd_task = task; 307 xprt->snd_task = task;
@@ -310,24 +318,25 @@ out_unlock:
310static void __xprt_lock_write_next_cong(struct rpc_xprt *xprt) 318static void __xprt_lock_write_next_cong(struct rpc_xprt *xprt)
311{ 319{
312 struct rpc_task *task; 320 struct rpc_task *task;
321 struct rpc_rqst *req;
313 322
314 if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) 323 if (test_and_set_bit(XPRT_LOCKED, &xprt->state))
315 return; 324 return;
316 if (RPCXPRT_CONGESTED(xprt)) 325 if (RPCXPRT_CONGESTED(xprt))
317 goto out_unlock; 326 goto out_unlock;
318 task = rpc_wake_up_next(&xprt->resend); 327 task = rpc_wake_up_next(&xprt->sending);
319 if (!task) { 328 if (task == NULL)
320 task = rpc_wake_up_next(&xprt->sending); 329 goto out_unlock;
321 if (!task) 330
322 goto out_unlock; 331 req = task->tk_rqstp;
332 if (req == NULL) {
333 xprt->snd_task = task;
334 return;
323 } 335 }
324 if (__xprt_get_cong(xprt, task)) { 336 if (__xprt_get_cong(xprt, task)) {
325 struct rpc_rqst *req = task->tk_rqstp;
326 xprt->snd_task = task; 337 xprt->snd_task = task;
327 if (req) { 338 req->rq_bytes_sent = 0;
328 req->rq_bytes_sent = 0; 339 req->rq_ntrans++;
329 req->rq_ntrans++;
330 }
331 return; 340 return;
332 } 341 }
333out_unlock: 342out_unlock:
@@ -852,7 +861,7 @@ int xprt_prepare_transmit(struct rpc_task *task)
852 err = req->rq_reply_bytes_recvd; 861 err = req->rq_reply_bytes_recvd;
853 goto out_unlock; 862 goto out_unlock;
854 } 863 }
855 if (!xprt->ops->reserve_xprt(task)) 864 if (!xprt->ops->reserve_xprt(xprt, task))
856 err = -EAGAIN; 865 err = -EAGAIN;
857out_unlock: 866out_unlock:
858 spin_unlock_bh(&xprt->transport_lock); 867 spin_unlock_bh(&xprt->transport_lock);
@@ -928,28 +937,66 @@ void xprt_transmit(struct rpc_task *task)
928 spin_unlock_bh(&xprt->transport_lock); 937 spin_unlock_bh(&xprt->transport_lock);
929} 938}
930 939
940static struct rpc_rqst *xprt_dynamic_alloc_slot(struct rpc_xprt *xprt, gfp_t gfp_flags)
941{
942 struct rpc_rqst *req = ERR_PTR(-EAGAIN);
943
944 if (!atomic_add_unless(&xprt->num_reqs, 1, xprt->max_reqs))
945 goto out;
946 req = kzalloc(sizeof(struct rpc_rqst), gfp_flags);
947 if (req != NULL)
948 goto out;
949 atomic_dec(&xprt->num_reqs);
950 req = ERR_PTR(-ENOMEM);
951out:
952 return req;
953}
954
955static bool xprt_dynamic_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req)
956{
957 if (atomic_add_unless(&xprt->num_reqs, -1, xprt->min_reqs)) {
958 kfree(req);
959 return true;
960 }
961 return false;
962}
963
931static void xprt_alloc_slot(struct rpc_task *task) 964static void xprt_alloc_slot(struct rpc_task *task)
932{ 965{
933 struct rpc_xprt *xprt = task->tk_xprt; 966 struct rpc_xprt *xprt = task->tk_xprt;
967 struct rpc_rqst *req;
934 968
935 task->tk_status = 0;
936 if (task->tk_rqstp)
937 return;
938 if (!list_empty(&xprt->free)) { 969 if (!list_empty(&xprt->free)) {
939 struct rpc_rqst *req = list_entry(xprt->free.next, struct rpc_rqst, rq_list); 970 req = list_entry(xprt->free.next, struct rpc_rqst, rq_list);
940 list_del_init(&req->rq_list); 971 list_del(&req->rq_list);
941 task->tk_rqstp = req; 972 goto out_init_req;
942 xprt_request_init(task, xprt); 973 }
943 return; 974 req = xprt_dynamic_alloc_slot(xprt, GFP_NOWAIT);
975 if (!IS_ERR(req))
976 goto out_init_req;
977 switch (PTR_ERR(req)) {
978 case -ENOMEM:
979 rpc_delay(task, HZ >> 2);
980 dprintk("RPC: dynamic allocation of request slot "
981 "failed! Retrying\n");
982 break;
983 case -EAGAIN:
984 rpc_sleep_on(&xprt->backlog, task, NULL);
985 dprintk("RPC: waiting for request slot\n");
944 } 986 }
945 dprintk("RPC: waiting for request slot\n");
946 task->tk_status = -EAGAIN; 987 task->tk_status = -EAGAIN;
947 task->tk_timeout = 0; 988 return;
948 rpc_sleep_on(&xprt->backlog, task, NULL); 989out_init_req:
990 task->tk_status = 0;
991 task->tk_rqstp = req;
992 xprt_request_init(task, xprt);
949} 993}
950 994
951static void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req) 995static void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req)
952{ 996{
997 if (xprt_dynamic_free_slot(xprt, req))
998 return;
999
953 memset(req, 0, sizeof(*req)); /* mark unused */ 1000 memset(req, 0, sizeof(*req)); /* mark unused */
954 1001
955 spin_lock(&xprt->reserve_lock); 1002 spin_lock(&xprt->reserve_lock);
@@ -958,25 +1005,49 @@ static void xprt_free_slot(struct rpc_xprt *xprt, struct rpc_rqst *req)
958 spin_unlock(&xprt->reserve_lock); 1005 spin_unlock(&xprt->reserve_lock);
959} 1006}
960 1007
961struct rpc_xprt *xprt_alloc(struct net *net, int size, int max_req) 1008static void xprt_free_all_slots(struct rpc_xprt *xprt)
1009{
1010 struct rpc_rqst *req;
1011 while (!list_empty(&xprt->free)) {
1012 req = list_first_entry(&xprt->free, struct rpc_rqst, rq_list);
1013 list_del(&req->rq_list);
1014 kfree(req);
1015 }
1016}
1017
1018struct rpc_xprt *xprt_alloc(struct net *net, size_t size,
1019 unsigned int num_prealloc,
1020 unsigned int max_alloc)
962{ 1021{
963 struct rpc_xprt *xprt; 1022 struct rpc_xprt *xprt;
1023 struct rpc_rqst *req;
1024 int i;
964 1025
965 xprt = kzalloc(size, GFP_KERNEL); 1026 xprt = kzalloc(size, GFP_KERNEL);
966 if (xprt == NULL) 1027 if (xprt == NULL)
967 goto out; 1028 goto out;
968 atomic_set(&xprt->count, 1);
969 1029
970 xprt->max_reqs = max_req; 1030 xprt_init(xprt, net);
971 xprt->slot = kcalloc(max_req, sizeof(struct rpc_rqst), GFP_KERNEL); 1031
972 if (xprt->slot == NULL) 1032 for (i = 0; i < num_prealloc; i++) {
1033 req = kzalloc(sizeof(struct rpc_rqst), GFP_KERNEL);
1034 if (!req)
1035 break;
1036 list_add(&req->rq_list, &xprt->free);
1037 }
1038 if (i < num_prealloc)
973 goto out_free; 1039 goto out_free;
1040 if (max_alloc > num_prealloc)
1041 xprt->max_reqs = max_alloc;
1042 else
1043 xprt->max_reqs = num_prealloc;
1044 xprt->min_reqs = num_prealloc;
1045 atomic_set(&xprt->num_reqs, num_prealloc);
974 1046
975 xprt->xprt_net = get_net(net);
976 return xprt; 1047 return xprt;
977 1048
978out_free: 1049out_free:
979 kfree(xprt); 1050 xprt_free(xprt);
980out: 1051out:
981 return NULL; 1052 return NULL;
982} 1053}
@@ -985,7 +1056,7 @@ EXPORT_SYMBOL_GPL(xprt_alloc);
985void xprt_free(struct rpc_xprt *xprt) 1056void xprt_free(struct rpc_xprt *xprt)
986{ 1057{
987 put_net(xprt->xprt_net); 1058 put_net(xprt->xprt_net);
988 kfree(xprt->slot); 1059 xprt_free_all_slots(xprt);
989 kfree(xprt); 1060 kfree(xprt);
990} 1061}
991EXPORT_SYMBOL_GPL(xprt_free); 1062EXPORT_SYMBOL_GPL(xprt_free);
@@ -1001,10 +1072,24 @@ void xprt_reserve(struct rpc_task *task)
1001{ 1072{
1002 struct rpc_xprt *xprt = task->tk_xprt; 1073 struct rpc_xprt *xprt = task->tk_xprt;
1003 1074
1004 task->tk_status = -EIO; 1075 task->tk_status = 0;
1076 if (task->tk_rqstp != NULL)
1077 return;
1078
1079 /* Note: grabbing the xprt_lock_write() here is not strictly needed,
1080 * but ensures that we throttle new slot allocation if the transport
1081 * is congested (e.g. if reconnecting or if we're out of socket
1082 * write buffer space).
1083 */
1084 task->tk_timeout = 0;
1085 task->tk_status = -EAGAIN;
1086 if (!xprt_lock_write(xprt, task))
1087 return;
1088
1005 spin_lock(&xprt->reserve_lock); 1089 spin_lock(&xprt->reserve_lock);
1006 xprt_alloc_slot(task); 1090 xprt_alloc_slot(task);
1007 spin_unlock(&xprt->reserve_lock); 1091 spin_unlock(&xprt->reserve_lock);
1092 xprt_release_write(xprt, task);
1008} 1093}
1009 1094
1010static inline __be32 xprt_alloc_xid(struct rpc_xprt *xprt) 1095static inline __be32 xprt_alloc_xid(struct rpc_xprt *xprt)
@@ -1021,6 +1106,7 @@ static void xprt_request_init(struct rpc_task *task, struct rpc_xprt *xprt)
1021{ 1106{
1022 struct rpc_rqst *req = task->tk_rqstp; 1107 struct rpc_rqst *req = task->tk_rqstp;
1023 1108
1109 INIT_LIST_HEAD(&req->rq_list);
1024 req->rq_timeout = task->tk_client->cl_timeout->to_initval; 1110 req->rq_timeout = task->tk_client->cl_timeout->to_initval;
1025 req->rq_task = task; 1111 req->rq_task = task;
1026 req->rq_xprt = xprt; 1112 req->rq_xprt = xprt;
@@ -1073,6 +1159,34 @@ void xprt_release(struct rpc_task *task)
1073 xprt_free_bc_request(req); 1159 xprt_free_bc_request(req);
1074} 1160}
1075 1161
1162static void xprt_init(struct rpc_xprt *xprt, struct net *net)
1163{
1164 atomic_set(&xprt->count, 1);
1165
1166 spin_lock_init(&xprt->transport_lock);
1167 spin_lock_init(&xprt->reserve_lock);
1168
1169 INIT_LIST_HEAD(&xprt->free);
1170 INIT_LIST_HEAD(&xprt->recv);
1171#if defined(CONFIG_SUNRPC_BACKCHANNEL)
1172 spin_lock_init(&xprt->bc_pa_lock);
1173 INIT_LIST_HEAD(&xprt->bc_pa_list);
1174#endif /* CONFIG_SUNRPC_BACKCHANNEL */
1175
1176 xprt->last_used = jiffies;
1177 xprt->cwnd = RPC_INITCWND;
1178 xprt->bind_index = 0;
1179
1180 rpc_init_wait_queue(&xprt->binding, "xprt_binding");
1181 rpc_init_wait_queue(&xprt->pending, "xprt_pending");
1182 rpc_init_priority_wait_queue(&xprt->sending, "xprt_sending");
1183 rpc_init_priority_wait_queue(&xprt->backlog, "xprt_backlog");
1184
1185 xprt_init_xid(xprt);
1186
1187 xprt->xprt_net = get_net(net);
1188}
1189
1076/** 1190/**
1077 * xprt_create_transport - create an RPC transport 1191 * xprt_create_transport - create an RPC transport
1078 * @args: rpc transport creation arguments 1192 * @args: rpc transport creation arguments
@@ -1081,7 +1195,6 @@ void xprt_release(struct rpc_task *task)
1081struct rpc_xprt *xprt_create_transport(struct xprt_create *args) 1195struct rpc_xprt *xprt_create_transport(struct xprt_create *args)
1082{ 1196{
1083 struct rpc_xprt *xprt; 1197 struct rpc_xprt *xprt;
1084 struct rpc_rqst *req;
1085 struct xprt_class *t; 1198 struct xprt_class *t;
1086 1199
1087 spin_lock(&xprt_list_lock); 1200 spin_lock(&xprt_list_lock);
@@ -1100,46 +1213,17 @@ found:
1100 if (IS_ERR(xprt)) { 1213 if (IS_ERR(xprt)) {
1101 dprintk("RPC: xprt_create_transport: failed, %ld\n", 1214 dprintk("RPC: xprt_create_transport: failed, %ld\n",
1102 -PTR_ERR(xprt)); 1215 -PTR_ERR(xprt));
1103 return xprt; 1216 goto out;
1104 } 1217 }
1105 if (test_and_set_bit(XPRT_INITIALIZED, &xprt->state))
1106 /* ->setup returned a pre-initialized xprt: */
1107 return xprt;
1108
1109 spin_lock_init(&xprt->transport_lock);
1110 spin_lock_init(&xprt->reserve_lock);
1111
1112 INIT_LIST_HEAD(&xprt->free);
1113 INIT_LIST_HEAD(&xprt->recv);
1114#if defined(CONFIG_NFS_V4_1)
1115 spin_lock_init(&xprt->bc_pa_lock);
1116 INIT_LIST_HEAD(&xprt->bc_pa_list);
1117#endif /* CONFIG_NFS_V4_1 */
1118
1119 INIT_WORK(&xprt->task_cleanup, xprt_autoclose); 1218 INIT_WORK(&xprt->task_cleanup, xprt_autoclose);
1120 if (xprt_has_timer(xprt)) 1219 if (xprt_has_timer(xprt))
1121 setup_timer(&xprt->timer, xprt_init_autodisconnect, 1220 setup_timer(&xprt->timer, xprt_init_autodisconnect,
1122 (unsigned long)xprt); 1221 (unsigned long)xprt);
1123 else 1222 else
1124 init_timer(&xprt->timer); 1223 init_timer(&xprt->timer);
1125 xprt->last_used = jiffies;
1126 xprt->cwnd = RPC_INITCWND;
1127 xprt->bind_index = 0;
1128
1129 rpc_init_wait_queue(&xprt->binding, "xprt_binding");
1130 rpc_init_wait_queue(&xprt->pending, "xprt_pending");
1131 rpc_init_wait_queue(&xprt->sending, "xprt_sending");
1132 rpc_init_wait_queue(&xprt->resend, "xprt_resend");
1133 rpc_init_priority_wait_queue(&xprt->backlog, "xprt_backlog");
1134
1135 /* initialize free list */
1136 for (req = &xprt->slot[xprt->max_reqs-1]; req >= &xprt->slot[0]; req--)
1137 list_add(&req->rq_list, &xprt->free);
1138
1139 xprt_init_xid(xprt);
1140
1141 dprintk("RPC: created transport %p with %u slots\n", xprt, 1224 dprintk("RPC: created transport %p with %u slots\n", xprt,
1142 xprt->max_reqs); 1225 xprt->max_reqs);
1226out:
1143 return xprt; 1227 return xprt;
1144} 1228}
1145 1229
@@ -1157,7 +1241,6 @@ static void xprt_destroy(struct rpc_xprt *xprt)
1157 rpc_destroy_wait_queue(&xprt->binding); 1241 rpc_destroy_wait_queue(&xprt->binding);
1158 rpc_destroy_wait_queue(&xprt->pending); 1242 rpc_destroy_wait_queue(&xprt->pending);
1159 rpc_destroy_wait_queue(&xprt->sending); 1243 rpc_destroy_wait_queue(&xprt->sending);
1160 rpc_destroy_wait_queue(&xprt->resend);
1161 rpc_destroy_wait_queue(&xprt->backlog); 1244 rpc_destroy_wait_queue(&xprt->backlog);
1162 cancel_work_sync(&xprt->task_cleanup); 1245 cancel_work_sync(&xprt->task_cleanup);
1163 /* 1246 /*
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 0867070bb5ca..b446e100286f 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -283,6 +283,7 @@ xprt_setup_rdma(struct xprt_create *args)
283 } 283 }
284 284
285 xprt = xprt_alloc(args->net, sizeof(struct rpcrdma_xprt), 285 xprt = xprt_alloc(args->net, sizeof(struct rpcrdma_xprt),
286 xprt_rdma_slot_table_entries,
286 xprt_rdma_slot_table_entries); 287 xprt_rdma_slot_table_entries);
287 if (xprt == NULL) { 288 if (xprt == NULL) {
288 dprintk("RPC: %s: couldn't allocate rpcrdma_xprt\n", 289 dprintk("RPC: %s: couldn't allocate rpcrdma_xprt\n",
@@ -452,9 +453,8 @@ xprt_rdma_connect(struct rpc_task *task)
452} 453}
453 454
454static int 455static int
455xprt_rdma_reserve_xprt(struct rpc_task *task) 456xprt_rdma_reserve_xprt(struct rpc_xprt *xprt, struct rpc_task *task)
456{ 457{
457 struct rpc_xprt *xprt = task->tk_xprt;
458 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); 458 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
459 int credits = atomic_read(&r_xprt->rx_buf.rb_credits); 459 int credits = atomic_read(&r_xprt->rx_buf.rb_credits);
460 460
@@ -466,7 +466,7 @@ xprt_rdma_reserve_xprt(struct rpc_task *task)
466 BUG_ON(r_xprt->rx_buf.rb_cwndscale <= 0); 466 BUG_ON(r_xprt->rx_buf.rb_cwndscale <= 0);
467 } 467 }
468 xprt->cwnd = credits * r_xprt->rx_buf.rb_cwndscale; 468 xprt->cwnd = credits * r_xprt->rx_buf.rb_cwndscale;
469 return xprt_reserve_xprt_cong(task); 469 return xprt_reserve_xprt_cong(xprt, task);
470} 470}
471 471
472/* 472/*
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index ddf05288d9f1..08c5d5a128fc 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -109,7 +109,7 @@ struct rpcrdma_ep {
109 */ 109 */
110 110
111/* temporary static scatter/gather max */ 111/* temporary static scatter/gather max */
112#define RPCRDMA_MAX_DATA_SEGS (8) /* max scatter/gather */ 112#define RPCRDMA_MAX_DATA_SEGS (64) /* max scatter/gather */
113#define RPCRDMA_MAX_SEGS (RPCRDMA_MAX_DATA_SEGS + 2) /* head+tail = 2 */ 113#define RPCRDMA_MAX_SEGS (RPCRDMA_MAX_DATA_SEGS + 2) /* head+tail = 2 */
114#define MAX_RPCRDMAHDR (\ 114#define MAX_RPCRDMAHDR (\
115 /* max supported RPC/RDMA header */ \ 115 /* max supported RPC/RDMA header */ \
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 72abb7358933..d7f97ef26590 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -37,7 +37,7 @@
37#include <linux/sunrpc/svcsock.h> 37#include <linux/sunrpc/svcsock.h>
38#include <linux/sunrpc/xprtsock.h> 38#include <linux/sunrpc/xprtsock.h>
39#include <linux/file.h> 39#include <linux/file.h>
40#ifdef CONFIG_NFS_V4_1 40#ifdef CONFIG_SUNRPC_BACKCHANNEL
41#include <linux/sunrpc/bc_xprt.h> 41#include <linux/sunrpc/bc_xprt.h>
42#endif 42#endif
43 43
@@ -54,7 +54,8 @@ static void xs_close(struct rpc_xprt *xprt);
54 * xprtsock tunables 54 * xprtsock tunables
55 */ 55 */
56unsigned int xprt_udp_slot_table_entries = RPC_DEF_SLOT_TABLE; 56unsigned int xprt_udp_slot_table_entries = RPC_DEF_SLOT_TABLE;
57unsigned int xprt_tcp_slot_table_entries = RPC_DEF_SLOT_TABLE; 57unsigned int xprt_tcp_slot_table_entries = RPC_MIN_SLOT_TABLE;
58unsigned int xprt_max_tcp_slot_table_entries = RPC_MAX_SLOT_TABLE;
58 59
59unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT; 60unsigned int xprt_min_resvport = RPC_DEF_MIN_RESVPORT;
60unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT; 61unsigned int xprt_max_resvport = RPC_DEF_MAX_RESVPORT;
@@ -75,6 +76,7 @@ static unsigned int xs_tcp_fin_timeout __read_mostly = XS_TCP_LINGER_TO;
75 76
76static unsigned int min_slot_table_size = RPC_MIN_SLOT_TABLE; 77static unsigned int min_slot_table_size = RPC_MIN_SLOT_TABLE;
77static unsigned int max_slot_table_size = RPC_MAX_SLOT_TABLE; 78static unsigned int max_slot_table_size = RPC_MAX_SLOT_TABLE;
79static unsigned int max_tcp_slot_table_limit = RPC_MAX_SLOT_TABLE_LIMIT;
78static unsigned int xprt_min_resvport_limit = RPC_MIN_RESVPORT; 80static unsigned int xprt_min_resvport_limit = RPC_MIN_RESVPORT;
79static unsigned int xprt_max_resvport_limit = RPC_MAX_RESVPORT; 81static unsigned int xprt_max_resvport_limit = RPC_MAX_RESVPORT;
80 82
@@ -104,6 +106,15 @@ static ctl_table xs_tunables_table[] = {
104 .extra2 = &max_slot_table_size 106 .extra2 = &max_slot_table_size
105 }, 107 },
106 { 108 {
109 .procname = "tcp_max_slot_table_entries",
110 .data = &xprt_max_tcp_slot_table_entries,
111 .maxlen = sizeof(unsigned int),
112 .mode = 0644,
113 .proc_handler = proc_dointvec_minmax,
114 .extra1 = &min_slot_table_size,
115 .extra2 = &max_tcp_slot_table_limit
116 },
117 {
107 .procname = "min_resvport", 118 .procname = "min_resvport",
108 .data = &xprt_min_resvport, 119 .data = &xprt_min_resvport,
109 .maxlen = sizeof(unsigned int), 120 .maxlen = sizeof(unsigned int),
@@ -755,6 +766,8 @@ static void xs_tcp_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task)
755 if (task == NULL) 766 if (task == NULL)
756 goto out_release; 767 goto out_release;
757 req = task->tk_rqstp; 768 req = task->tk_rqstp;
769 if (req == NULL)
770 goto out_release;
758 if (req->rq_bytes_sent == 0) 771 if (req->rq_bytes_sent == 0)
759 goto out_release; 772 goto out_release;
760 if (req->rq_bytes_sent == req->rq_snd_buf.len) 773 if (req->rq_bytes_sent == req->rq_snd_buf.len)
@@ -1236,7 +1249,7 @@ static inline int xs_tcp_read_reply(struct rpc_xprt *xprt,
1236 return 0; 1249 return 0;
1237} 1250}
1238 1251
1239#if defined(CONFIG_NFS_V4_1) 1252#if defined(CONFIG_SUNRPC_BACKCHANNEL)
1240/* 1253/*
1241 * Obtains an rpc_rqst previously allocated and invokes the common 1254 * Obtains an rpc_rqst previously allocated and invokes the common
1242 * tcp read code to read the data. The result is placed in the callback 1255 * tcp read code to read the data. The result is placed in the callback
@@ -1299,7 +1312,7 @@ static inline int _xs_tcp_read_data(struct rpc_xprt *xprt,
1299{ 1312{
1300 return xs_tcp_read_reply(xprt, desc); 1313 return xs_tcp_read_reply(xprt, desc);
1301} 1314}
1302#endif /* CONFIG_NFS_V4_1 */ 1315#endif /* CONFIG_SUNRPC_BACKCHANNEL */
1303 1316
1304/* 1317/*
1305 * Read data off the transport. This can be either an RPC_CALL or an 1318 * Read data off the transport. This can be either an RPC_CALL or an
@@ -2489,7 +2502,8 @@ static int xs_init_anyaddr(const int family, struct sockaddr *sap)
2489} 2502}
2490 2503
2491static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args, 2504static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args,
2492 unsigned int slot_table_size) 2505 unsigned int slot_table_size,
2506 unsigned int max_slot_table_size)
2493{ 2507{
2494 struct rpc_xprt *xprt; 2508 struct rpc_xprt *xprt;
2495 struct sock_xprt *new; 2509 struct sock_xprt *new;
@@ -2499,7 +2513,8 @@ static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args,
2499 return ERR_PTR(-EBADF); 2513 return ERR_PTR(-EBADF);
2500 } 2514 }
2501 2515
2502 xprt = xprt_alloc(args->net, sizeof(*new), slot_table_size); 2516 xprt = xprt_alloc(args->net, sizeof(*new), slot_table_size,
2517 max_slot_table_size);
2503 if (xprt == NULL) { 2518 if (xprt == NULL) {
2504 dprintk("RPC: xs_setup_xprt: couldn't allocate " 2519 dprintk("RPC: xs_setup_xprt: couldn't allocate "
2505 "rpc_xprt\n"); 2520 "rpc_xprt\n");
@@ -2541,7 +2556,8 @@ static struct rpc_xprt *xs_setup_local(struct xprt_create *args)
2541 struct rpc_xprt *xprt; 2556 struct rpc_xprt *xprt;
2542 struct rpc_xprt *ret; 2557 struct rpc_xprt *ret;
2543 2558
2544 xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries); 2559 xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries,
2560 xprt_max_tcp_slot_table_entries);
2545 if (IS_ERR(xprt)) 2561 if (IS_ERR(xprt))
2546 return xprt; 2562 return xprt;
2547 transport = container_of(xprt, struct sock_xprt, xprt); 2563 transport = container_of(xprt, struct sock_xprt, xprt);
@@ -2605,7 +2621,8 @@ static struct rpc_xprt *xs_setup_udp(struct xprt_create *args)
2605 struct sock_xprt *transport; 2621 struct sock_xprt *transport;
2606 struct rpc_xprt *ret; 2622 struct rpc_xprt *ret;
2607 2623
2608 xprt = xs_setup_xprt(args, xprt_udp_slot_table_entries); 2624 xprt = xs_setup_xprt(args, xprt_udp_slot_table_entries,
2625 xprt_udp_slot_table_entries);
2609 if (IS_ERR(xprt)) 2626 if (IS_ERR(xprt))
2610 return xprt; 2627 return xprt;
2611 transport = container_of(xprt, struct sock_xprt, xprt); 2628 transport = container_of(xprt, struct sock_xprt, xprt);
@@ -2681,7 +2698,8 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args)
2681 struct sock_xprt *transport; 2698 struct sock_xprt *transport;
2682 struct rpc_xprt *ret; 2699 struct rpc_xprt *ret;
2683 2700
2684 xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries); 2701 xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries,
2702 xprt_max_tcp_slot_table_entries);
2685 if (IS_ERR(xprt)) 2703 if (IS_ERR(xprt))
2686 return xprt; 2704 return xprt;
2687 transport = container_of(xprt, struct sock_xprt, xprt); 2705 transport = container_of(xprt, struct sock_xprt, xprt);
@@ -2760,7 +2778,8 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args)
2760 */ 2778 */
2761 return args->bc_xprt->xpt_bc_xprt; 2779 return args->bc_xprt->xpt_bc_xprt;
2762 } 2780 }
2763 xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries); 2781 xprt = xs_setup_xprt(args, xprt_tcp_slot_table_entries,
2782 xprt_tcp_slot_table_entries);
2764 if (IS_ERR(xprt)) 2783 if (IS_ERR(xprt))
2765 return xprt; 2784 return xprt;
2766 transport = container_of(xprt, struct sock_xprt, xprt); 2785 transport = container_of(xprt, struct sock_xprt, xprt);
@@ -2947,8 +2966,26 @@ static struct kernel_param_ops param_ops_slot_table_size = {
2947#define param_check_slot_table_size(name, p) \ 2966#define param_check_slot_table_size(name, p) \
2948 __param_check(name, p, unsigned int); 2967 __param_check(name, p, unsigned int);
2949 2968
2969static int param_set_max_slot_table_size(const char *val,
2970 const struct kernel_param *kp)
2971{
2972 return param_set_uint_minmax(val, kp,
2973 RPC_MIN_SLOT_TABLE,
2974 RPC_MAX_SLOT_TABLE_LIMIT);
2975}
2976
2977static struct kernel_param_ops param_ops_max_slot_table_size = {
2978 .set = param_set_max_slot_table_size,
2979 .get = param_get_uint,
2980};
2981
2982#define param_check_max_slot_table_size(name, p) \
2983 __param_check(name, p, unsigned int);
2984
2950module_param_named(tcp_slot_table_entries, xprt_tcp_slot_table_entries, 2985module_param_named(tcp_slot_table_entries, xprt_tcp_slot_table_entries,
2951 slot_table_size, 0644); 2986 slot_table_size, 0644);
2987module_param_named(tcp_max_slot_table_entries, xprt_max_tcp_slot_table_entries,
2988 max_slot_table_size, 0644);
2952module_param_named(udp_slot_table_entries, xprt_udp_slot_table_entries, 2989module_param_named(udp_slot_table_entries, xprt_udp_slot_table_entries,
2953 slot_table_size, 0644); 2990 slot_table_size, 0644);
2954 2991