aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2017-05-10 16:03:38 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2017-05-10 16:03:38 -0400
commit73ccb023a2f25b72c4b95499ca24760588014614 (patch)
treeb0fd9968af3e929ac496f159420a25dc3e1dcafb
parentf94c128eefcce2e3448d543f13cd7d7b8aa660a5 (diff)
parent76b2a303384e1d6299c3a0249f0f0ce2f8f96017 (diff)
Merge tag 'nfs-for-4.12-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs
Pull NFS client updates from Trond Myklebust: "Highlights include: Stable bugfixes: - Fix use after free in write error path - Use GFP_NOIO for two allocations in writeback - Fix a hang in OPEN related to server reboot - Check the result of nfs4_pnfs_ds_connect - Fix an rcu lock leak Features: - Removal of the unmaintained and unused OSD pNFS layout - Cleanup and removal of lots of unnecessary dprintk()s - Cleanup and removal of some memory failure paths now that GFP_NOFS is guaranteed to never fail. - Remove the v3-only data server limitation on pNFS/flexfiles Bugfixes: - RPC/RDMA connection handling bugfixes - Copy offload: fixes to ensure the copied data is COMMITed to disk. - Readdir: switch back to using the ->iterate VFS interface - File locking fixes from Ben Coddington - Various use-after-free and deadlock issues in pNFS - Write path bugfixes" * tag 'nfs-for-4.12-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (89 commits) pNFS/flexfiles: Always attempt to call layoutstats when flexfiles is enabled NFSv4.1: Work around a Linux server bug... NFS append COMMIT after synchronous COPY NFSv4: Fix exclusive create attributes encoding NFSv4: Fix an rcu lock leak nfs: use kmap/kunmap directly NFS: always treat the invocation of nfs_getattr as cache hit when noac is on Fix nfs_client refcounting if kmalloc fails in nfs4_proc_exchange_id and nfs4_proc_async_renew NFSv4.1: RECLAIM_COMPLETE must handle NFS4ERR_CONN_NOT_BOUND_TO_SESSION pNFS: Fix NULL dereference in pnfs_generic_alloc_ds_commits pNFS: Fix a typo in pnfs_generic_alloc_ds_commits pNFS: Fix a deadlock when coalescing writes and returning the layout pNFS: Don't clear the layout return info if there are segments to return pNFS: Ensure we commit the layout if it has been invalidated pNFS: Don't send COMMITs to the DSes if the server invalidated our layout pNFS/flexfiles: Fix up the ff_layout_write_pagelist failure path pNFS: Ensure we check layout validity before marking it for return NFS4.1 handle interrupted slot reuse from ERR_DELAY NFSv4: check return value of xdr_inline_decode nfs/filelayout: fix NULL pointer dereference in fl_pnfs_update_layout() ...
-rw-r--r--Documentation/admin-guide/kernel-parameters.txt6
-rw-r--r--Documentation/filesystems/nfs/pnfs.txt37
-rw-r--r--fs/fuse/file.c2
-rw-r--r--fs/lockd/clntlock.c1
-rw-r--r--fs/lockd/clntproc.c26
-rw-r--r--fs/locks.c2
-rw-r--r--fs/nfs/Kconfig5
-rw-r--r--fs/nfs/Makefile1
-rw-r--r--fs/nfs/callback_proc.c47
-rw-r--r--fs/nfs/callback_xdr.c109
-rw-r--r--fs/nfs/client.c67
-rw-r--r--fs/nfs/dir.c104
-rw-r--r--fs/nfs/direct.c21
-rw-r--r--fs/nfs/file.c30
-rw-r--r--fs/nfs/filelayout/filelayout.c8
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.c24
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayoutdev.c10
-rw-r--r--fs/nfs/inode.c5
-rw-r--r--fs/nfs/internal.h5
-rw-r--r--fs/nfs/namespace.c34
-rw-r--r--fs/nfs/nfs3proc.c54
-rw-r--r--fs/nfs/nfs42proc.c24
-rw-r--r--fs/nfs/nfs42xdr.c22
-rw-r--r--fs/nfs/nfs4client.c283
-rw-r--r--fs/nfs/nfs4getroot.c3
-rw-r--r--fs/nfs/nfs4namespace.c7
-rw-r--r--fs/nfs/nfs4proc.c99
-rw-r--r--fs/nfs/nfs4state.c10
-rw-r--r--fs/nfs/nfs4xdr.c94
-rw-r--r--fs/nfs/objlayout/Kbuild5
-rw-r--r--fs/nfs/objlayout/objio_osd.c675
-rw-r--r--fs/nfs/objlayout/objlayout.c706
-rw-r--r--fs/nfs/objlayout/objlayout.h183
-rw-r--r--fs/nfs/objlayout/pnfs_osd_xdr_cli.c415
-rw-r--r--fs/nfs/pagelist.c77
-rw-r--r--fs/nfs/pnfs.c62
-rw-r--r--fs/nfs/pnfs.h6
-rw-r--r--fs/nfs/pnfs_nfs.c24
-rw-r--r--fs/nfs/proc.c2
-rw-r--r--fs/nfs/read.c9
-rw-r--r--fs/nfs/write.c121
-rw-r--r--include/linux/fs.h2
-rw-r--r--include/linux/lockd/bind.h24
-rw-r--r--include/linux/lockd/lockd.h2
-rw-r--r--include/linux/nfs_fs.h17
-rw-r--r--include/linux/nfs_fs_sb.h1
-rw-r--r--include/linux/nfs_page.h5
-rw-r--r--include/linux/nfs_xdr.h3
-rw-r--r--net/sunrpc/clnt.c8
-rw-r--r--net/sunrpc/sched.c5
-rw-r--r--net/sunrpc/xdr.c2
-rw-r--r--net/sunrpc/xprt.c1
-rw-r--r--net/sunrpc/xprtrdma/rpc_rdma.c12
-rw-r--r--net/sunrpc/xprtrdma/transport.c57
-rw-r--r--net/sunrpc/xprtrdma/verbs.c323
-rw-r--r--net/sunrpc/xprtrdma/xprt_rdma.h22
56 files changed, 949 insertions, 2960 deletions
diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 4e0654b56aef..238bd211f365 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -2434,12 +2434,6 @@
2434 and gids from such clients. This is intended to ease 2434 and gids from such clients. This is intended to ease
2435 migration from NFSv2/v3. 2435 migration from NFSv2/v3.
2436 2436
2437 objlayoutdriver.osd_login_prog=
2438 [NFS] [OBJLAYOUT] sets the pathname to the program which
2439 is used to automatically discover and login into new
2440 osd-targets. Please see:
2441 Documentation/filesystems/pnfs.txt for more explanations
2442
2443 nmi_debug= [KNL,SH] Specify one or more actions to take 2437 nmi_debug= [KNL,SH] Specify one or more actions to take
2444 when a NMI is triggered. 2438 when a NMI is triggered.
2445 Format: [state][,regs][,debounce][,die] 2439 Format: [state][,regs][,debounce][,die]
diff --git a/Documentation/filesystems/nfs/pnfs.txt b/Documentation/filesystems/nfs/pnfs.txt
index 8de578a98222..80dc0bdc302a 100644
--- a/Documentation/filesystems/nfs/pnfs.txt
+++ b/Documentation/filesystems/nfs/pnfs.txt
@@ -64,46 +64,9 @@ table which are called by the nfs-client pnfs-core to implement the
64different layout types. 64different layout types.
65 65
66Files-layout-driver code is in: fs/nfs/filelayout/.. directory 66Files-layout-driver code is in: fs/nfs/filelayout/.. directory
67Objects-layout-driver code is in: fs/nfs/objlayout/.. directory
68Blocks-layout-driver code is in: fs/nfs/blocklayout/.. directory 67Blocks-layout-driver code is in: fs/nfs/blocklayout/.. directory
69Flexfiles-layout-driver code is in: fs/nfs/flexfilelayout/.. directory 68Flexfiles-layout-driver code is in: fs/nfs/flexfilelayout/.. directory
70 69
71objects-layout setup
72--------------------
73
74As part of the full STD implementation the objlayoutdriver.ko needs, at times,
75to automatically login to yet undiscovered iscsi/osd devices. For this the
76driver makes up-calles to a user-mode script called *osd_login*
77
78The path_name of the script to use is by default:
79 /sbin/osd_login.
80This name can be overridden by the Kernel module parameter:
81 objlayoutdriver.osd_login_prog
82
83If Kernel does not find the osd_login_prog path it will zero it out
84and will not attempt farther logins. An admin can then write new value
85to the objlayoutdriver.osd_login_prog Kernel parameter to re-enable it.
86
87The /sbin/osd_login is part of the nfs-utils package, and should usually
88be installed on distributions that support this Kernel version.
89
90The API to the login script is as follows:
91 Usage: $0 -u <URI> -o <OSDNAME> -s <SYSTEMID>
92 Options:
93 -u target uri e.g. iscsi://<ip>:<port>
94 (always exists)
95 (More protocols can be defined in the future.
96 The client does not interpret this string it is
97 passed unchanged as received from the Server)
98 -o osdname of the requested target OSD
99 (Might be empty)
100 (A string which denotes the OSD name, there is a
101 limit of 64 chars on this string)
102 -s systemid of the requested target OSD
103 (Might be empty)
104 (This string, if not empty is always an hex
105 representation of the 20 bytes osd_system_id)
106
107blocks-layout setup 70blocks-layout setup
108------------------- 71-------------------
109 72
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index aa93f09ae6e6..3ee4fdc3da9e 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -2177,7 +2177,7 @@ static int fuse_setlk(struct file *file, struct file_lock *fl, int flock)
2177 } 2177 }
2178 2178
2179 /* Unlock on close is handled by the flush method */ 2179 /* Unlock on close is handled by the flush method */
2180 if (fl->fl_flags & FL_CLOSE) 2180 if ((fl->fl_flags & FL_CLOSE_POSIX) == FL_CLOSE_POSIX)
2181 return 0; 2181 return 0;
2182 2182
2183 if (pid && pid_nr == 0) 2183 if (pid && pid_nr == 0)
diff --git a/fs/lockd/clntlock.c b/fs/lockd/clntlock.c
index 41e491b8e5d7..27d577dbe51a 100644
--- a/fs/lockd/clntlock.c
+++ b/fs/lockd/clntlock.c
@@ -69,6 +69,7 @@ struct nlm_host *nlmclnt_init(const struct nlmclnt_initdata *nlm_init)
69 if (host->h_rpcclnt == NULL && nlm_bind_host(host) == NULL) 69 if (host->h_rpcclnt == NULL && nlm_bind_host(host) == NULL)
70 goto out_nobind; 70 goto out_nobind;
71 71
72 host->h_nlmclnt_ops = nlm_init->nlmclnt_ops;
72 return host; 73 return host;
73out_nobind: 74out_nobind:
74 nlmclnt_release_host(host); 75 nlmclnt_release_host(host);
diff --git a/fs/lockd/clntproc.c b/fs/lockd/clntproc.c
index 112952037933..066ac313ae5c 100644
--- a/fs/lockd/clntproc.c
+++ b/fs/lockd/clntproc.c
@@ -150,17 +150,22 @@ static void nlmclnt_release_lockargs(struct nlm_rqst *req)
150 * @host: address of a valid nlm_host context representing the NLM server 150 * @host: address of a valid nlm_host context representing the NLM server
151 * @cmd: fcntl-style file lock operation to perform 151 * @cmd: fcntl-style file lock operation to perform
152 * @fl: address of arguments for the lock operation 152 * @fl: address of arguments for the lock operation
153 * @data: address of data to be sent to callback operations
153 * 154 *
154 */ 155 */
155int nlmclnt_proc(struct nlm_host *host, int cmd, struct file_lock *fl) 156int nlmclnt_proc(struct nlm_host *host, int cmd, struct file_lock *fl, void *data)
156{ 157{
157 struct nlm_rqst *call; 158 struct nlm_rqst *call;
158 int status; 159 int status;
160 const struct nlmclnt_operations *nlmclnt_ops = host->h_nlmclnt_ops;
159 161
160 call = nlm_alloc_call(host); 162 call = nlm_alloc_call(host);
161 if (call == NULL) 163 if (call == NULL)
162 return -ENOMEM; 164 return -ENOMEM;
163 165
166 if (nlmclnt_ops && nlmclnt_ops->nlmclnt_alloc_call)
167 nlmclnt_ops->nlmclnt_alloc_call(data);
168
164 nlmclnt_locks_init_private(fl, host); 169 nlmclnt_locks_init_private(fl, host);
165 if (!fl->fl_u.nfs_fl.owner) { 170 if (!fl->fl_u.nfs_fl.owner) {
166 /* lockowner allocation has failed */ 171 /* lockowner allocation has failed */
@@ -169,6 +174,7 @@ int nlmclnt_proc(struct nlm_host *host, int cmd, struct file_lock *fl)
169 } 174 }
170 /* Set up the argument struct */ 175 /* Set up the argument struct */
171 nlmclnt_setlockargs(call, fl); 176 nlmclnt_setlockargs(call, fl);
177 call->a_callback_data = data;
172 178
173 if (IS_SETLK(cmd) || IS_SETLKW(cmd)) { 179 if (IS_SETLK(cmd) || IS_SETLKW(cmd)) {
174 if (fl->fl_type != F_UNLCK) { 180 if (fl->fl_type != F_UNLCK) {
@@ -214,8 +220,12 @@ struct nlm_rqst *nlm_alloc_call(struct nlm_host *host)
214 220
215void nlmclnt_release_call(struct nlm_rqst *call) 221void nlmclnt_release_call(struct nlm_rqst *call)
216{ 222{
223 const struct nlmclnt_operations *nlmclnt_ops = call->a_host->h_nlmclnt_ops;
224
217 if (!atomic_dec_and_test(&call->a_count)) 225 if (!atomic_dec_and_test(&call->a_count))
218 return; 226 return;
227 if (nlmclnt_ops && nlmclnt_ops->nlmclnt_release_call)
228 nlmclnt_ops->nlmclnt_release_call(call->a_callback_data);
219 nlmclnt_release_host(call->a_host); 229 nlmclnt_release_host(call->a_host);
220 nlmclnt_release_lockargs(call); 230 nlmclnt_release_lockargs(call);
221 kfree(call); 231 kfree(call);
@@ -687,6 +697,19 @@ out:
687 return status; 697 return status;
688} 698}
689 699
700static void nlmclnt_unlock_prepare(struct rpc_task *task, void *data)
701{
702 struct nlm_rqst *req = data;
703 const struct nlmclnt_operations *nlmclnt_ops = req->a_host->h_nlmclnt_ops;
704 bool defer_call = false;
705
706 if (nlmclnt_ops && nlmclnt_ops->nlmclnt_unlock_prepare)
707 defer_call = nlmclnt_ops->nlmclnt_unlock_prepare(task, req->a_callback_data);
708
709 if (!defer_call)
710 rpc_call_start(task);
711}
712
690static void nlmclnt_unlock_callback(struct rpc_task *task, void *data) 713static void nlmclnt_unlock_callback(struct rpc_task *task, void *data)
691{ 714{
692 struct nlm_rqst *req = data; 715 struct nlm_rqst *req = data;
@@ -720,6 +743,7 @@ die:
720} 743}
721 744
722static const struct rpc_call_ops nlmclnt_unlock_ops = { 745static const struct rpc_call_ops nlmclnt_unlock_ops = {
746 .rpc_call_prepare = nlmclnt_unlock_prepare,
723 .rpc_call_done = nlmclnt_unlock_callback, 747 .rpc_call_done = nlmclnt_unlock_callback,
724 .rpc_release = nlmclnt_rpc_release, 748 .rpc_release = nlmclnt_rpc_release,
725}; 749};
diff --git a/fs/locks.c b/fs/locks.c
index 26811321d39b..af2031a1fcff 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -2504,7 +2504,7 @@ locks_remove_flock(struct file *filp, struct file_lock_context *flctx)
2504 .fl_owner = filp, 2504 .fl_owner = filp,
2505 .fl_pid = current->tgid, 2505 .fl_pid = current->tgid,
2506 .fl_file = filp, 2506 .fl_file = filp,
2507 .fl_flags = FL_FLOCK, 2507 .fl_flags = FL_FLOCK | FL_CLOSE,
2508 .fl_type = F_UNLCK, 2508 .fl_type = F_UNLCK,
2509 .fl_end = OFFSET_MAX, 2509 .fl_end = OFFSET_MAX,
2510 }; 2510 };
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index f31fd0dd92c6..69d02cf8cf37 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -123,11 +123,6 @@ config PNFS_BLOCK
123 depends on NFS_V4_1 && BLK_DEV_DM 123 depends on NFS_V4_1 && BLK_DEV_DM
124 default NFS_V4 124 default NFS_V4
125 125
126config PNFS_OBJLAYOUT
127 tristate
128 depends on NFS_V4_1 && SCSI_OSD_ULD
129 default NFS_V4
130
131config PNFS_FLEXFILE_LAYOUT 126config PNFS_FLEXFILE_LAYOUT
132 tristate 127 tristate
133 depends on NFS_V4_1 && NFS_V3 128 depends on NFS_V4_1 && NFS_V3
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index 6abdda209642..98f4e5728a67 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -31,6 +31,5 @@ nfsv4-$(CONFIG_NFS_V4_1) += pnfs.o pnfs_dev.o pnfs_nfs.o
31nfsv4-$(CONFIG_NFS_V4_2) += nfs42proc.o 31nfsv4-$(CONFIG_NFS_V4_2) += nfs42proc.o
32 32
33obj-$(CONFIG_PNFS_FILE_LAYOUT) += filelayout/ 33obj-$(CONFIG_PNFS_FILE_LAYOUT) += filelayout/
34obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayout/
35obj-$(CONFIG_PNFS_BLOCK) += blocklayout/ 34obj-$(CONFIG_PNFS_BLOCK) += blocklayout/
36obj-$(CONFIG_PNFS_FLEXFILE_LAYOUT) += flexfilelayout/ 35obj-$(CONFIG_PNFS_FLEXFILE_LAYOUT) += flexfilelayout/
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index f073a6d2c6a5..52479f180ea1 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -131,10 +131,11 @@ restart:
131 if (!inode) 131 if (!inode)
132 continue; 132 continue;
133 if (!nfs_sb_active(inode->i_sb)) { 133 if (!nfs_sb_active(inode->i_sb)) {
134 rcu_read_lock(); 134 rcu_read_unlock();
135 spin_unlock(&clp->cl_lock); 135 spin_unlock(&clp->cl_lock);
136 iput(inode); 136 iput(inode);
137 spin_lock(&clp->cl_lock); 137 spin_lock(&clp->cl_lock);
138 rcu_read_lock();
138 goto restart; 139 goto restart;
139 } 140 }
140 return inode; 141 return inode;
@@ -170,10 +171,11 @@ restart:
170 if (!inode) 171 if (!inode)
171 continue; 172 continue;
172 if (!nfs_sb_active(inode->i_sb)) { 173 if (!nfs_sb_active(inode->i_sb)) {
173 rcu_read_lock(); 174 rcu_read_unlock();
174 spin_unlock(&clp->cl_lock); 175 spin_unlock(&clp->cl_lock);
175 iput(inode); 176 iput(inode);
176 spin_lock(&clp->cl_lock); 177 spin_lock(&clp->cl_lock);
178 rcu_read_lock();
177 goto restart; 179 goto restart;
178 } 180 }
179 return inode; 181 return inode;
@@ -317,31 +319,18 @@ static u32 initiate_bulk_draining(struct nfs_client *clp,
317static u32 do_callback_layoutrecall(struct nfs_client *clp, 319static u32 do_callback_layoutrecall(struct nfs_client *clp,
318 struct cb_layoutrecallargs *args) 320 struct cb_layoutrecallargs *args)
319{ 321{
320 u32 res;
321
322 dprintk("%s enter, type=%i\n", __func__, args->cbl_recall_type);
323 if (args->cbl_recall_type == RETURN_FILE) 322 if (args->cbl_recall_type == RETURN_FILE)
324 res = initiate_file_draining(clp, args); 323 return initiate_file_draining(clp, args);
325 else 324 return initiate_bulk_draining(clp, args);
326 res = initiate_bulk_draining(clp, args);
327 dprintk("%s returning %i\n", __func__, res);
328 return res;
329
330} 325}
331 326
332__be32 nfs4_callback_layoutrecall(struct cb_layoutrecallargs *args, 327__be32 nfs4_callback_layoutrecall(struct cb_layoutrecallargs *args,
333 void *dummy, struct cb_process_state *cps) 328 void *dummy, struct cb_process_state *cps)
334{ 329{
335 u32 res; 330 u32 res = NFS4ERR_OP_NOT_IN_SESSION;
336
337 dprintk("%s: -->\n", __func__);
338 331
339 if (cps->clp) 332 if (cps->clp)
340 res = do_callback_layoutrecall(cps->clp, args); 333 res = do_callback_layoutrecall(cps->clp, args);
341 else
342 res = NFS4ERR_OP_NOT_IN_SESSION;
343
344 dprintk("%s: exit with status = %d\n", __func__, res);
345 return cpu_to_be32(res); 334 return cpu_to_be32(res);
346} 335}
347 336
@@ -364,8 +353,6 @@ __be32 nfs4_callback_devicenotify(struct cb_devicenotifyargs *args,
364 struct nfs_client *clp = cps->clp; 353 struct nfs_client *clp = cps->clp;
365 struct nfs_server *server = NULL; 354 struct nfs_server *server = NULL;
366 355
367 dprintk("%s: -->\n", __func__);
368
369 if (!clp) { 356 if (!clp) {
370 res = cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION); 357 res = cpu_to_be32(NFS4ERR_OP_NOT_IN_SESSION);
371 goto out; 358 goto out;
@@ -384,8 +371,6 @@ __be32 nfs4_callback_devicenotify(struct cb_devicenotifyargs *args,
384 goto found; 371 goto found;
385 } 372 }
386 rcu_read_unlock(); 373 rcu_read_unlock();
387 dprintk("%s: layout type %u not found\n",
388 __func__, dev->cbd_layout_type);
389 continue; 374 continue;
390 } 375 }
391 376
@@ -395,8 +380,6 @@ __be32 nfs4_callback_devicenotify(struct cb_devicenotifyargs *args,
395 380
396out: 381out:
397 kfree(args->devs); 382 kfree(args->devs);
398 dprintk("%s: exit with status = %u\n",
399 __func__, be32_to_cpu(res));
400 return res; 383 return res;
401} 384}
402 385
@@ -417,16 +400,11 @@ static __be32
417validate_seqid(const struct nfs4_slot_table *tbl, const struct nfs4_slot *slot, 400validate_seqid(const struct nfs4_slot_table *tbl, const struct nfs4_slot *slot,
418 const struct cb_sequenceargs * args) 401 const struct cb_sequenceargs * args)
419{ 402{
420 dprintk("%s enter. slotid %u seqid %u, slot table seqid: %u\n",
421 __func__, args->csa_slotid, args->csa_sequenceid, slot->seq_nr);
422
423 if (args->csa_slotid > tbl->server_highest_slotid) 403 if (args->csa_slotid > tbl->server_highest_slotid)
424 return htonl(NFS4ERR_BADSLOT); 404 return htonl(NFS4ERR_BADSLOT);
425 405
426 /* Replay */ 406 /* Replay */
427 if (args->csa_sequenceid == slot->seq_nr) { 407 if (args->csa_sequenceid == slot->seq_nr) {
428 dprintk("%s seqid %u is a replay\n",
429 __func__, args->csa_sequenceid);
430 if (nfs4_test_locked_slot(tbl, slot->slot_nr)) 408 if (nfs4_test_locked_slot(tbl, slot->slot_nr))
431 return htonl(NFS4ERR_DELAY); 409 return htonl(NFS4ERR_DELAY);
432 /* Signal process_op to set this error on next op */ 410 /* Signal process_op to set this error on next op */
@@ -480,15 +458,6 @@ static bool referring_call_exists(struct nfs_client *clp,
480 458
481 for (j = 0; j < rclist->rcl_nrefcalls; j++) { 459 for (j = 0; j < rclist->rcl_nrefcalls; j++) {
482 ref = &rclist->rcl_refcalls[j]; 460 ref = &rclist->rcl_refcalls[j];
483
484 dprintk("%s: sessionid %x:%x:%x:%x sequenceid %u "
485 "slotid %u\n", __func__,
486 ((u32 *)&rclist->rcl_sessionid.data)[0],
487 ((u32 *)&rclist->rcl_sessionid.data)[1],
488 ((u32 *)&rclist->rcl_sessionid.data)[2],
489 ((u32 *)&rclist->rcl_sessionid.data)[3],
490 ref->rc_sequenceid, ref->rc_slotid);
491
492 status = nfs4_slot_wait_on_seqid(tbl, ref->rc_slotid, 461 status = nfs4_slot_wait_on_seqid(tbl, ref->rc_slotid,
493 ref->rc_sequenceid, HZ >> 1) < 0; 462 ref->rc_sequenceid, HZ >> 1) < 0;
494 if (status) 463 if (status)
@@ -593,8 +562,6 @@ out:
593 res->csr_status = status; 562 res->csr_status = status;
594 563
595 trace_nfs4_cb_sequence(args, res, status); 564 trace_nfs4_cb_sequence(args, res, status);
596 dprintk("%s: exit with status = %d res->csr_status %d\n", __func__,
597 ntohl(status), ntohl(res->csr_status));
598 return status; 565 return status;
599} 566}
600 567
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index d051fc3583a9..c14758e08d73 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -171,8 +171,6 @@ static __be32 decode_compound_hdr_arg(struct xdr_stream *xdr, struct cb_compound
171 return htonl(NFS4ERR_MINOR_VERS_MISMATCH); 171 return htonl(NFS4ERR_MINOR_VERS_MISMATCH);
172 } 172 }
173 hdr->nops = ntohl(*p); 173 hdr->nops = ntohl(*p);
174 dprintk("%s: minorversion %d nops %d\n", __func__,
175 hdr->minorversion, hdr->nops);
176 return 0; 174 return 0;
177} 175}
178 176
@@ -192,11 +190,8 @@ static __be32 decode_getattr_args(struct svc_rqst *rqstp, struct xdr_stream *xdr
192 190
193 status = decode_fh(xdr, &args->fh); 191 status = decode_fh(xdr, &args->fh);
194 if (unlikely(status != 0)) 192 if (unlikely(status != 0))
195 goto out; 193 return status;
196 status = decode_bitmap(xdr, args->bitmap); 194 return decode_bitmap(xdr, args->bitmap);
197out:
198 dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
199 return status;
200} 195}
201 196
202static __be32 decode_recall_args(struct svc_rqst *rqstp, struct xdr_stream *xdr, struct cb_recallargs *args) 197static __be32 decode_recall_args(struct svc_rqst *rqstp, struct xdr_stream *xdr, struct cb_recallargs *args)
@@ -206,17 +201,12 @@ static __be32 decode_recall_args(struct svc_rqst *rqstp, struct xdr_stream *xdr,
206 201
207 status = decode_delegation_stateid(xdr, &args->stateid); 202 status = decode_delegation_stateid(xdr, &args->stateid);
208 if (unlikely(status != 0)) 203 if (unlikely(status != 0))
209 goto out; 204 return status;
210 p = read_buf(xdr, 4); 205 p = read_buf(xdr, 4);
211 if (unlikely(p == NULL)) { 206 if (unlikely(p == NULL))
212 status = htonl(NFS4ERR_RESOURCE); 207 return htonl(NFS4ERR_RESOURCE);
213 goto out;
214 }
215 args->truncate = ntohl(*p); 208 args->truncate = ntohl(*p);
216 status = decode_fh(xdr, &args->fh); 209 return decode_fh(xdr, &args->fh);
217out:
218 dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
219 return status;
220} 210}
221 211
222#if defined(CONFIG_NFS_V4_1) 212#if defined(CONFIG_NFS_V4_1)
@@ -235,10 +225,8 @@ static __be32 decode_layoutrecall_args(struct svc_rqst *rqstp,
235 uint32_t iomode; 225 uint32_t iomode;
236 226
237 p = read_buf(xdr, 4 * sizeof(uint32_t)); 227 p = read_buf(xdr, 4 * sizeof(uint32_t));
238 if (unlikely(p == NULL)) { 228 if (unlikely(p == NULL))
239 status = htonl(NFS4ERR_BADXDR); 229 return htonl(NFS4ERR_BADXDR);
240 goto out;
241 }
242 230
243 args->cbl_layout_type = ntohl(*p++); 231 args->cbl_layout_type = ntohl(*p++);
244 /* Depite the spec's xdr, iomode really belongs in the FILE switch, 232 /* Depite the spec's xdr, iomode really belongs in the FILE switch,
@@ -252,37 +240,23 @@ static __be32 decode_layoutrecall_args(struct svc_rqst *rqstp,
252 args->cbl_range.iomode = iomode; 240 args->cbl_range.iomode = iomode;
253 status = decode_fh(xdr, &args->cbl_fh); 241 status = decode_fh(xdr, &args->cbl_fh);
254 if (unlikely(status != 0)) 242 if (unlikely(status != 0))
255 goto out; 243 return status;
256 244
257 p = read_buf(xdr, 2 * sizeof(uint64_t)); 245 p = read_buf(xdr, 2 * sizeof(uint64_t));
258 if (unlikely(p == NULL)) { 246 if (unlikely(p == NULL))
259 status = htonl(NFS4ERR_BADXDR); 247 return htonl(NFS4ERR_BADXDR);
260 goto out;
261 }
262 p = xdr_decode_hyper(p, &args->cbl_range.offset); 248 p = xdr_decode_hyper(p, &args->cbl_range.offset);
263 p = xdr_decode_hyper(p, &args->cbl_range.length); 249 p = xdr_decode_hyper(p, &args->cbl_range.length);
264 status = decode_layout_stateid(xdr, &args->cbl_stateid); 250 return decode_layout_stateid(xdr, &args->cbl_stateid);
265 if (unlikely(status != 0))
266 goto out;
267 } else if (args->cbl_recall_type == RETURN_FSID) { 251 } else if (args->cbl_recall_type == RETURN_FSID) {
268 p = read_buf(xdr, 2 * sizeof(uint64_t)); 252 p = read_buf(xdr, 2 * sizeof(uint64_t));
269 if (unlikely(p == NULL)) { 253 if (unlikely(p == NULL))
270 status = htonl(NFS4ERR_BADXDR); 254 return htonl(NFS4ERR_BADXDR);
271 goto out;
272 }
273 p = xdr_decode_hyper(p, &args->cbl_fsid.major); 255 p = xdr_decode_hyper(p, &args->cbl_fsid.major);
274 p = xdr_decode_hyper(p, &args->cbl_fsid.minor); 256 p = xdr_decode_hyper(p, &args->cbl_fsid.minor);
275 } else if (args->cbl_recall_type != RETURN_ALL) { 257 } else if (args->cbl_recall_type != RETURN_ALL)
276 status = htonl(NFS4ERR_BADXDR); 258 return htonl(NFS4ERR_BADXDR);
277 goto out; 259 return 0;
278 }
279 dprintk("%s: ltype 0x%x iomode %d changed %d recall_type %d\n",
280 __func__,
281 args->cbl_layout_type, iomode,
282 args->cbl_layoutchanged, args->cbl_recall_type);
283out:
284 dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
285 return status;
286} 260}
287 261
288static 262static
@@ -437,12 +411,11 @@ static __be32 decode_cb_sequence_args(struct svc_rqst *rqstp,
437 411
438 status = decode_sessionid(xdr, &args->csa_sessionid); 412 status = decode_sessionid(xdr, &args->csa_sessionid);
439 if (status) 413 if (status)
440 goto out; 414 return status;
441 415
442 status = htonl(NFS4ERR_RESOURCE);
443 p = read_buf(xdr, 5 * sizeof(uint32_t)); 416 p = read_buf(xdr, 5 * sizeof(uint32_t));
444 if (unlikely(p == NULL)) 417 if (unlikely(p == NULL))
445 goto out; 418 return htonl(NFS4ERR_RESOURCE);
446 419
447 args->csa_addr = svc_addr(rqstp); 420 args->csa_addr = svc_addr(rqstp);
448 args->csa_sequenceid = ntohl(*p++); 421 args->csa_sequenceid = ntohl(*p++);
@@ -456,7 +429,7 @@ static __be32 decode_cb_sequence_args(struct svc_rqst *rqstp,
456 sizeof(*args->csa_rclists), 429 sizeof(*args->csa_rclists),
457 GFP_KERNEL); 430 GFP_KERNEL);
458 if (unlikely(args->csa_rclists == NULL)) 431 if (unlikely(args->csa_rclists == NULL))
459 goto out; 432 return htonl(NFS4ERR_RESOURCE);
460 433
461 for (i = 0; i < args->csa_nrclists; i++) { 434 for (i = 0; i < args->csa_nrclists; i++) {
462 status = decode_rc_list(xdr, &args->csa_rclists[i]); 435 status = decode_rc_list(xdr, &args->csa_rclists[i]);
@@ -466,27 +439,13 @@ static __be32 decode_cb_sequence_args(struct svc_rqst *rqstp,
466 } 439 }
467 } 440 }
468 } 441 }
469 status = 0; 442 return 0;
470
471 dprintk("%s: sessionid %x:%x:%x:%x sequenceid %u slotid %u "
472 "highestslotid %u cachethis %d nrclists %u\n",
473 __func__,
474 ((u32 *)&args->csa_sessionid)[0],
475 ((u32 *)&args->csa_sessionid)[1],
476 ((u32 *)&args->csa_sessionid)[2],
477 ((u32 *)&args->csa_sessionid)[3],
478 args->csa_sequenceid, args->csa_slotid,
479 args->csa_highestslotid, args->csa_cachethis,
480 args->csa_nrclists);
481out:
482 dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
483 return status;
484 443
485out_free: 444out_free:
486 for (i = 0; i < args->csa_nrclists; i++) 445 for (i = 0; i < args->csa_nrclists; i++)
487 kfree(args->csa_rclists[i].rcl_refcalls); 446 kfree(args->csa_rclists[i].rcl_refcalls);
488 kfree(args->csa_rclists); 447 kfree(args->csa_rclists);
489 goto out; 448 return status;
490} 449}
491 450
492static __be32 decode_recallany_args(struct svc_rqst *rqstp, 451static __be32 decode_recallany_args(struct svc_rqst *rqstp,
@@ -557,11 +516,8 @@ static __be32 decode_notify_lock_args(struct svc_rqst *rqstp, struct xdr_stream
557 516
558 status = decode_fh(xdr, &args->cbnl_fh); 517 status = decode_fh(xdr, &args->cbnl_fh);
559 if (unlikely(status != 0)) 518 if (unlikely(status != 0))
560 goto out; 519 return status;
561 status = decode_lockowner(xdr, args); 520 return decode_lockowner(xdr, args);
562out:
563 dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
564 return status;
565} 521}
566 522
567#endif /* CONFIG_NFS_V4_1 */ 523#endif /* CONFIG_NFS_V4_1 */
@@ -707,7 +663,6 @@ static __be32 encode_getattr_res(struct svc_rqst *rqstp, struct xdr_stream *xdr,
707 status = encode_attr_mtime(xdr, res->bitmap, &res->mtime); 663 status = encode_attr_mtime(xdr, res->bitmap, &res->mtime);
708 *savep = htonl((unsigned int)((char *)xdr->p - (char *)(savep+1))); 664 *savep = htonl((unsigned int)((char *)xdr->p - (char *)(savep+1)));
709out: 665out:
710 dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
711 return status; 666 return status;
712} 667}
713 668
@@ -734,11 +689,11 @@ static __be32 encode_cb_sequence_res(struct svc_rqst *rqstp,
734 __be32 status = res->csr_status; 689 __be32 status = res->csr_status;
735 690
736 if (unlikely(status != 0)) 691 if (unlikely(status != 0))
737 goto out; 692 return status;
738 693
739 status = encode_sessionid(xdr, &res->csr_sessionid); 694 status = encode_sessionid(xdr, &res->csr_sessionid);
740 if (status) 695 if (status)
741 goto out; 696 return status;
742 697
743 p = xdr_reserve_space(xdr, 4 * sizeof(uint32_t)); 698 p = xdr_reserve_space(xdr, 4 * sizeof(uint32_t));
744 if (unlikely(p == NULL)) 699 if (unlikely(p == NULL))
@@ -748,9 +703,7 @@ static __be32 encode_cb_sequence_res(struct svc_rqst *rqstp,
748 *p++ = htonl(res->csr_slotid); 703 *p++ = htonl(res->csr_slotid);
749 *p++ = htonl(res->csr_highestslotid); 704 *p++ = htonl(res->csr_highestslotid);
750 *p++ = htonl(res->csr_target_highestslotid); 705 *p++ = htonl(res->csr_target_highestslotid);
751out: 706 return 0;
752 dprintk("%s: exit with status = %d\n", __func__, ntohl(status));
753 return status;
754} 707}
755 708
756static __be32 709static __be32
@@ -871,14 +824,10 @@ static __be32 process_op(int nop, struct svc_rqst *rqstp,
871 long maxlen; 824 long maxlen;
872 __be32 res; 825 __be32 res;
873 826
874 dprintk("%s: start\n", __func__);
875 status = decode_op_hdr(xdr_in, &op_nr); 827 status = decode_op_hdr(xdr_in, &op_nr);
876 if (unlikely(status)) 828 if (unlikely(status))
877 return status; 829 return status;
878 830
879 dprintk("%s: minorversion=%d nop=%d op_nr=%u\n",
880 __func__, cps->minorversion, nop, op_nr);
881
882 switch (cps->minorversion) { 831 switch (cps->minorversion) {
883 case 0: 832 case 0:
884 status = preprocess_nfs4_op(op_nr, &op); 833 status = preprocess_nfs4_op(op_nr, &op);
@@ -917,7 +866,6 @@ encode_hdr:
917 return res; 866 return res;
918 if (op->encode_res != NULL && status == 0) 867 if (op->encode_res != NULL && status == 0)
919 status = op->encode_res(rqstp, xdr_out, resp); 868 status = op->encode_res(rqstp, xdr_out, resp);
920 dprintk("%s: done, status = %d\n", __func__, ntohl(status));
921 return status; 869 return status;
922} 870}
923 871
@@ -937,8 +885,6 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
937 }; 885 };
938 unsigned int nops = 0; 886 unsigned int nops = 0;
939 887
940 dprintk("%s: start\n", __func__);
941
942 xdr_init_decode(&xdr_in, &rqstp->rq_arg, rqstp->rq_arg.head[0].iov_base); 888 xdr_init_decode(&xdr_in, &rqstp->rq_arg, rqstp->rq_arg.head[0].iov_base);
943 889
944 p = (__be32*)((char *)rqstp->rq_res.head[0].iov_base + rqstp->rq_res.head[0].iov_len); 890 p = (__be32*)((char *)rqstp->rq_res.head[0].iov_base + rqstp->rq_res.head[0].iov_len);
@@ -977,7 +923,6 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
977 *hdr_res.nops = htonl(nops); 923 *hdr_res.nops = htonl(nops);
978 nfs4_cb_free_slot(&cps); 924 nfs4_cb_free_slot(&cps);
979 nfs_put_client(cps.clp); 925 nfs_put_client(cps.clp);
980 dprintk("%s: done, status = %u\n", __func__, ntohl(status));
981 return rpc_success; 926 return rpc_success;
982 927
983out_invalidcred: 928out_invalidcred:
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 04d15a0045e3..ee5ddbd36088 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -218,6 +218,7 @@ static void nfs_cb_idr_remove_locked(struct nfs_client *clp)
218static void pnfs_init_server(struct nfs_server *server) 218static void pnfs_init_server(struct nfs_server *server)
219{ 219{
220 rpc_init_wait_queue(&server->roc_rpcwaitq, "pNFS ROC"); 220 rpc_init_wait_queue(&server->roc_rpcwaitq, "pNFS ROC");
221 rpc_init_wait_queue(&server->uoc_rpcwaitq, "NFS UOC");
221} 222}
222 223
223#else 224#else
@@ -240,8 +241,6 @@ static void pnfs_init_server(struct nfs_server *server)
240 */ 241 */
241void nfs_free_client(struct nfs_client *clp) 242void nfs_free_client(struct nfs_client *clp)
242{ 243{
243 dprintk("--> nfs_free_client(%u)\n", clp->rpc_ops->version);
244
245 nfs_fscache_release_client_cookie(clp); 244 nfs_fscache_release_client_cookie(clp);
246 245
247 /* -EIO all pending I/O */ 246 /* -EIO all pending I/O */
@@ -256,8 +255,6 @@ void nfs_free_client(struct nfs_client *clp)
256 kfree(clp->cl_hostname); 255 kfree(clp->cl_hostname);
257 kfree(clp->cl_acceptor); 256 kfree(clp->cl_acceptor);
258 kfree(clp); 257 kfree(clp);
259
260 dprintk("<-- nfs_free_client()\n");
261} 258}
262EXPORT_SYMBOL_GPL(nfs_free_client); 259EXPORT_SYMBOL_GPL(nfs_free_client);
263 260
@@ -271,7 +268,6 @@ void nfs_put_client(struct nfs_client *clp)
271 if (!clp) 268 if (!clp)
272 return; 269 return;
273 270
274 dprintk("--> nfs_put_client({%d})\n", atomic_read(&clp->cl_count));
275 nn = net_generic(clp->cl_net, nfs_net_id); 271 nn = net_generic(clp->cl_net, nfs_net_id);
276 272
277 if (atomic_dec_and_lock(&clp->cl_count, &nn->nfs_client_lock)) { 273 if (atomic_dec_and_lock(&clp->cl_count, &nn->nfs_client_lock)) {
@@ -382,9 +378,6 @@ nfs_found_client(const struct nfs_client_initdata *cl_init,
382 } 378 }
383 379
384 smp_rmb(); 380 smp_rmb();
385
386 dprintk("<-- %s found nfs_client %p for %s\n",
387 __func__, clp, cl_init->hostname ?: "");
388 return clp; 381 return clp;
389} 382}
390 383
@@ -403,9 +396,6 @@ struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_init)
403 return NULL; 396 return NULL;
404 } 397 }
405 398
406 dprintk("--> nfs_get_client(%s,v%u)\n",
407 cl_init->hostname, rpc_ops->version);
408
409 /* see if the client already exists */ 399 /* see if the client already exists */
410 do { 400 do {
411 spin_lock(&nn->nfs_client_lock); 401 spin_lock(&nn->nfs_client_lock);
@@ -430,8 +420,6 @@ struct nfs_client *nfs_get_client(const struct nfs_client_initdata *cl_init)
430 new = rpc_ops->alloc_client(cl_init); 420 new = rpc_ops->alloc_client(cl_init);
431 } while (!IS_ERR(new)); 421 } while (!IS_ERR(new));
432 422
433 dprintk("<-- nfs_get_client() Failed to find %s (%ld)\n",
434 cl_init->hostname, PTR_ERR(new));
435 return new; 423 return new;
436} 424}
437EXPORT_SYMBOL_GPL(nfs_get_client); 425EXPORT_SYMBOL_GPL(nfs_get_client);
@@ -558,6 +546,7 @@ static int nfs_start_lockd(struct nfs_server *server)
558 .noresvport = server->flags & NFS_MOUNT_NORESVPORT ? 546 .noresvport = server->flags & NFS_MOUNT_NORESVPORT ?
559 1 : 0, 547 1 : 0,
560 .net = clp->cl_net, 548 .net = clp->cl_net,
549 .nlmclnt_ops = clp->cl_nfs_mod->rpc_ops->nlmclnt_ops,
561 }; 550 };
562 551
563 if (nlm_init.nfs_version > 3) 552 if (nlm_init.nfs_version > 3)
@@ -624,27 +613,21 @@ struct nfs_client *nfs_init_client(struct nfs_client *clp,
624{ 613{
625 int error; 614 int error;
626 615
627 if (clp->cl_cons_state == NFS_CS_READY) { 616 /* the client is already initialised */
628 /* the client is already initialised */ 617 if (clp->cl_cons_state == NFS_CS_READY)
629 dprintk("<-- nfs_init_client() = 0 [already %p]\n", clp);
630 return clp; 618 return clp;
631 }
632 619
633 /* 620 /*
634 * Create a client RPC handle for doing FSSTAT with UNIX auth only 621 * Create a client RPC handle for doing FSSTAT with UNIX auth only
635 * - RFC 2623, sec 2.3.2 622 * - RFC 2623, sec 2.3.2
636 */ 623 */
637 error = nfs_create_rpc_client(clp, cl_init, RPC_AUTH_UNIX); 624 error = nfs_create_rpc_client(clp, cl_init, RPC_AUTH_UNIX);
638 if (error < 0) 625 nfs_mark_client_ready(clp, error == 0 ? NFS_CS_READY : error);
639 goto error; 626 if (error < 0) {
640 nfs_mark_client_ready(clp, NFS_CS_READY); 627 nfs_put_client(clp);
628 clp = ERR_PTR(error);
629 }
641 return clp; 630 return clp;
642
643error:
644 nfs_mark_client_ready(clp, error);
645 nfs_put_client(clp);
646 dprintk("<-- nfs_init_client() = xerror %d\n", error);
647 return ERR_PTR(error);
648} 631}
649EXPORT_SYMBOL_GPL(nfs_init_client); 632EXPORT_SYMBOL_GPL(nfs_init_client);
650 633
@@ -668,8 +651,6 @@ static int nfs_init_server(struct nfs_server *server,
668 struct nfs_client *clp; 651 struct nfs_client *clp;
669 int error; 652 int error;
670 653
671 dprintk("--> nfs_init_server()\n");
672
673 nfs_init_timeout_values(&timeparms, data->nfs_server.protocol, 654 nfs_init_timeout_values(&timeparms, data->nfs_server.protocol,
674 data->timeo, data->retrans); 655 data->timeo, data->retrans);
675 if (data->flags & NFS_MOUNT_NORESVPORT) 656 if (data->flags & NFS_MOUNT_NORESVPORT)
@@ -677,10 +658,8 @@ static int nfs_init_server(struct nfs_server *server,
677 658
678 /* Allocate or find a client reference we can use */ 659 /* Allocate or find a client reference we can use */
679 clp = nfs_get_client(&cl_init); 660 clp = nfs_get_client(&cl_init);
680 if (IS_ERR(clp)) { 661 if (IS_ERR(clp))
681 dprintk("<-- nfs_init_server() = error %ld\n", PTR_ERR(clp));
682 return PTR_ERR(clp); 662 return PTR_ERR(clp);
683 }
684 663
685 server->nfs_client = clp; 664 server->nfs_client = clp;
686 665
@@ -725,13 +704,11 @@ static int nfs_init_server(struct nfs_server *server,
725 server->mountd_protocol = data->mount_server.protocol; 704 server->mountd_protocol = data->mount_server.protocol;
726 705
727 server->namelen = data->namlen; 706 server->namelen = data->namlen;
728 dprintk("<-- nfs_init_server() = 0 [new %p]\n", clp);
729 return 0; 707 return 0;
730 708
731error: 709error:
732 server->nfs_client = NULL; 710 server->nfs_client = NULL;
733 nfs_put_client(clp); 711 nfs_put_client(clp);
734 dprintk("<-- nfs_init_server() = xerror %d\n", error);
735 return error; 712 return error;
736} 713}
737 714
@@ -798,12 +775,10 @@ int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs
798 struct nfs_client *clp = server->nfs_client; 775 struct nfs_client *clp = server->nfs_client;
799 int error; 776 int error;
800 777
801 dprintk("--> nfs_probe_fsinfo()\n");
802
803 if (clp->rpc_ops->set_capabilities != NULL) { 778 if (clp->rpc_ops->set_capabilities != NULL) {
804 error = clp->rpc_ops->set_capabilities(server, mntfh); 779 error = clp->rpc_ops->set_capabilities(server, mntfh);
805 if (error < 0) 780 if (error < 0)
806 goto out_error; 781 return error;
807 } 782 }
808 783
809 fsinfo.fattr = fattr; 784 fsinfo.fattr = fattr;
@@ -811,7 +786,7 @@ int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs
811 memset(fsinfo.layouttype, 0, sizeof(fsinfo.layouttype)); 786 memset(fsinfo.layouttype, 0, sizeof(fsinfo.layouttype));
812 error = clp->rpc_ops->fsinfo(server, mntfh, &fsinfo); 787 error = clp->rpc_ops->fsinfo(server, mntfh, &fsinfo);
813 if (error < 0) 788 if (error < 0)
814 goto out_error; 789 return error;
815 790
816 nfs_server_set_fsinfo(server, &fsinfo); 791 nfs_server_set_fsinfo(server, &fsinfo);
817 792
@@ -826,12 +801,7 @@ int nfs_probe_fsinfo(struct nfs_server *server, struct nfs_fh *mntfh, struct nfs
826 server->namelen = pathinfo.max_namelen; 801 server->namelen = pathinfo.max_namelen;
827 } 802 }
828 803
829 dprintk("<-- nfs_probe_fsinfo() = 0\n");
830 return 0; 804 return 0;
831
832out_error:
833 dprintk("nfs_probe_fsinfo: error = %d\n", -error);
834 return error;
835} 805}
836EXPORT_SYMBOL_GPL(nfs_probe_fsinfo); 806EXPORT_SYMBOL_GPL(nfs_probe_fsinfo);
837 807
@@ -927,8 +897,6 @@ EXPORT_SYMBOL_GPL(nfs_alloc_server);
927 */ 897 */
928void nfs_free_server(struct nfs_server *server) 898void nfs_free_server(struct nfs_server *server)
929{ 899{
930 dprintk("--> nfs_free_server()\n");
931
932 nfs_server_remove_lists(server); 900 nfs_server_remove_lists(server);
933 901
934 if (server->destroy != NULL) 902 if (server->destroy != NULL)
@@ -946,7 +914,6 @@ void nfs_free_server(struct nfs_server *server)
946 nfs_free_iostats(server->io_stats); 914 nfs_free_iostats(server->io_stats);
947 kfree(server); 915 kfree(server);
948 nfs_release_automount_timer(); 916 nfs_release_automount_timer();
949 dprintk("<-- nfs_free_server()\n");
950} 917}
951EXPORT_SYMBOL_GPL(nfs_free_server); 918EXPORT_SYMBOL_GPL(nfs_free_server);
952 919
@@ -1026,10 +993,6 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source,
1026 struct nfs_fattr *fattr_fsinfo; 993 struct nfs_fattr *fattr_fsinfo;
1027 int error; 994 int error;
1028 995
1029 dprintk("--> nfs_clone_server(,%llx:%llx,)\n",
1030 (unsigned long long) fattr->fsid.major,
1031 (unsigned long long) fattr->fsid.minor);
1032
1033 server = nfs_alloc_server(); 996 server = nfs_alloc_server();
1034 if (!server) 997 if (!server)
1035 return ERR_PTR(-ENOMEM); 998 return ERR_PTR(-ENOMEM);
@@ -1061,10 +1024,6 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source,
1061 if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN) 1024 if (server->namelen == 0 || server->namelen > NFS4_MAXNAMLEN)
1062 server->namelen = NFS4_MAXNAMLEN; 1025 server->namelen = NFS4_MAXNAMLEN;
1063 1026
1064 dprintk("Cloned FSID: %llx:%llx\n",
1065 (unsigned long long) server->fsid.major,
1066 (unsigned long long) server->fsid.minor);
1067
1068 error = nfs_start_lockd(server); 1027 error = nfs_start_lockd(server);
1069 if (error < 0) 1028 if (error < 0)
1070 goto out_free_server; 1029 goto out_free_server;
@@ -1073,13 +1032,11 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source,
1073 server->mount_time = jiffies; 1032 server->mount_time = jiffies;
1074 1033
1075 nfs_free_fattr(fattr_fsinfo); 1034 nfs_free_fattr(fattr_fsinfo);
1076 dprintk("<-- nfs_clone_server() = %p\n", server);
1077 return server; 1035 return server;
1078 1036
1079out_free_server: 1037out_free_server:
1080 nfs_free_fattr(fattr_fsinfo); 1038 nfs_free_fattr(fattr_fsinfo);
1081 nfs_free_server(server); 1039 nfs_free_server(server);
1082 dprintk("<-- nfs_clone_server() = error %d\n", error);
1083 return ERR_PTR(error); 1040 return ERR_PTR(error);
1084} 1041}
1085EXPORT_SYMBOL_GPL(nfs_clone_server); 1042EXPORT_SYMBOL_GPL(nfs_clone_server);
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index f92ba8d6c556..32ccd7754f8a 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -57,7 +57,7 @@ static void nfs_readdir_clear_array(struct page*);
57const struct file_operations nfs_dir_operations = { 57const struct file_operations nfs_dir_operations = {
58 .llseek = nfs_llseek_dir, 58 .llseek = nfs_llseek_dir,
59 .read = generic_read_dir, 59 .read = generic_read_dir,
60 .iterate_shared = nfs_readdir, 60 .iterate = nfs_readdir,
61 .open = nfs_opendir, 61 .open = nfs_opendir,
62 .release = nfs_closedir, 62 .release = nfs_closedir,
63 .fsync = nfs_fsync_dir, 63 .fsync = nfs_fsync_dir,
@@ -145,7 +145,6 @@ struct nfs_cache_array_entry {
145}; 145};
146 146
147struct nfs_cache_array { 147struct nfs_cache_array {
148 atomic_t refcount;
149 int size; 148 int size;
150 int eof_index; 149 int eof_index;
151 u64 last_cookie; 150 u64 last_cookie;
@@ -171,27 +170,6 @@ typedef struct {
171} nfs_readdir_descriptor_t; 170} nfs_readdir_descriptor_t;
172 171
173/* 172/*
174 * The caller is responsible for calling nfs_readdir_release_array(page)
175 */
176static
177struct nfs_cache_array *nfs_readdir_get_array(struct page *page)
178{
179 void *ptr;
180 if (page == NULL)
181 return ERR_PTR(-EIO);
182 ptr = kmap(page);
183 if (ptr == NULL)
184 return ERR_PTR(-ENOMEM);
185 return ptr;
186}
187
188static
189void nfs_readdir_release_array(struct page *page)
190{
191 kunmap(page);
192}
193
194/*
195 * we are freeing strings created by nfs_add_to_readdir_array() 173 * we are freeing strings created by nfs_add_to_readdir_array()
196 */ 174 */
197static 175static
@@ -201,18 +179,9 @@ void nfs_readdir_clear_array(struct page *page)
201 int i; 179 int i;
202 180
203 array = kmap_atomic(page); 181 array = kmap_atomic(page);
204 if (atomic_dec_and_test(&array->refcount)) 182 for (i = 0; i < array->size; i++)
205 for (i = 0; i < array->size; i++) 183 kfree(array->array[i].string.name);
206 kfree(array->array[i].string.name);
207 kunmap_atomic(array);
208}
209
210static bool grab_page(struct page *page)
211{
212 struct nfs_cache_array *array = kmap_atomic(page);
213 bool res = atomic_inc_not_zero(&array->refcount);
214 kunmap_atomic(array); 184 kunmap_atomic(array);
215 return res;
216} 185}
217 186
218/* 187/*
@@ -239,13 +208,10 @@ int nfs_readdir_make_qstr(struct qstr *string, const char *name, unsigned int le
239static 208static
240int nfs_readdir_add_to_array(struct nfs_entry *entry, struct page *page) 209int nfs_readdir_add_to_array(struct nfs_entry *entry, struct page *page)
241{ 210{
242 struct nfs_cache_array *array = nfs_readdir_get_array(page); 211 struct nfs_cache_array *array = kmap(page);
243 struct nfs_cache_array_entry *cache_entry; 212 struct nfs_cache_array_entry *cache_entry;
244 int ret; 213 int ret;
245 214
246 if (IS_ERR(array))
247 return PTR_ERR(array);
248
249 cache_entry = &array->array[array->size]; 215 cache_entry = &array->array[array->size];
250 216
251 /* Check that this entry lies within the page bounds */ 217 /* Check that this entry lies within the page bounds */
@@ -264,7 +230,7 @@ int nfs_readdir_add_to_array(struct nfs_entry *entry, struct page *page)
264 if (entry->eof != 0) 230 if (entry->eof != 0)
265 array->eof_index = array->size; 231 array->eof_index = array->size;
266out: 232out:
267 nfs_readdir_release_array(page); 233 kunmap(page);
268 return ret; 234 return ret;
269} 235}
270 236
@@ -353,11 +319,7 @@ int nfs_readdir_search_array(nfs_readdir_descriptor_t *desc)
353 struct nfs_cache_array *array; 319 struct nfs_cache_array *array;
354 int status; 320 int status;
355 321
356 array = nfs_readdir_get_array(desc->page); 322 array = kmap(desc->page);
357 if (IS_ERR(array)) {
358 status = PTR_ERR(array);
359 goto out;
360 }
361 323
362 if (*desc->dir_cookie == 0) 324 if (*desc->dir_cookie == 0)
363 status = nfs_readdir_search_for_pos(array, desc); 325 status = nfs_readdir_search_for_pos(array, desc);
@@ -369,8 +331,7 @@ int nfs_readdir_search_array(nfs_readdir_descriptor_t *desc)
369 desc->current_index += array->size; 331 desc->current_index += array->size;
370 desc->page_index++; 332 desc->page_index++;
371 } 333 }
372 nfs_readdir_release_array(desc->page); 334 kunmap(desc->page);
373out:
374 return status; 335 return status;
375} 336}
376 337
@@ -606,13 +567,10 @@ int nfs_readdir_page_filler(nfs_readdir_descriptor_t *desc, struct nfs_entry *en
606 567
607out_nopages: 568out_nopages:
608 if (count == 0 || (status == -EBADCOOKIE && entry->eof != 0)) { 569 if (count == 0 || (status == -EBADCOOKIE && entry->eof != 0)) {
609 array = nfs_readdir_get_array(page); 570 array = kmap(page);
610 if (!IS_ERR(array)) { 571 array->eof_index = array->size;
611 array->eof_index = array->size; 572 status = 0;
612 status = 0; 573 kunmap(page);
613 nfs_readdir_release_array(page);
614 } else
615 status = PTR_ERR(array);
616 } 574 }
617 575
618 put_page(scratch); 576 put_page(scratch);
@@ -674,13 +632,8 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page,
674 goto out; 632 goto out;
675 } 633 }
676 634
677 array = nfs_readdir_get_array(page); 635 array = kmap(page);
678 if (IS_ERR(array)) {
679 status = PTR_ERR(array);
680 goto out_label_free;
681 }
682 memset(array, 0, sizeof(struct nfs_cache_array)); 636 memset(array, 0, sizeof(struct nfs_cache_array));
683 atomic_set(&array->refcount, 1);
684 array->eof_index = -1; 637 array->eof_index = -1;
685 638
686 status = nfs_readdir_alloc_pages(pages, array_size); 639 status = nfs_readdir_alloc_pages(pages, array_size);
@@ -703,8 +656,7 @@ int nfs_readdir_xdr_to_array(nfs_readdir_descriptor_t *desc, struct page *page,
703 656
704 nfs_readdir_free_pages(pages, array_size); 657 nfs_readdir_free_pages(pages, array_size);
705out_release_array: 658out_release_array:
706 nfs_readdir_release_array(page); 659 kunmap(page);
707out_label_free:
708 nfs4_label_free(entry.label); 660 nfs4_label_free(entry.label);
709out: 661out:
710 nfs_free_fattr(entry.fattr); 662 nfs_free_fattr(entry.fattr);
@@ -743,7 +695,8 @@ int nfs_readdir_filler(nfs_readdir_descriptor_t *desc, struct page* page)
743static 695static
744void cache_page_release(nfs_readdir_descriptor_t *desc) 696void cache_page_release(nfs_readdir_descriptor_t *desc)
745{ 697{
746 nfs_readdir_clear_array(desc->page); 698 if (!desc->page->mapping)
699 nfs_readdir_clear_array(desc->page);
747 put_page(desc->page); 700 put_page(desc->page);
748 desc->page = NULL; 701 desc->page = NULL;
749} 702}
@@ -751,16 +704,8 @@ void cache_page_release(nfs_readdir_descriptor_t *desc)
751static 704static
752struct page *get_cache_page(nfs_readdir_descriptor_t *desc) 705struct page *get_cache_page(nfs_readdir_descriptor_t *desc)
753{ 706{
754 struct page *page; 707 return read_cache_page(desc->file->f_mapping,
755
756 for (;;) {
757 page = read_cache_page(desc->file->f_mapping,
758 desc->page_index, (filler_t *)nfs_readdir_filler, desc); 708 desc->page_index, (filler_t *)nfs_readdir_filler, desc);
759 if (IS_ERR(page) || grab_page(page))
760 break;
761 put_page(page);
762 }
763 return page;
764} 709}
765 710
766/* 711/*
@@ -809,12 +754,7 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc)
809 struct nfs_cache_array *array = NULL; 754 struct nfs_cache_array *array = NULL;
810 struct nfs_open_dir_context *ctx = file->private_data; 755 struct nfs_open_dir_context *ctx = file->private_data;
811 756
812 array = nfs_readdir_get_array(desc->page); 757 array = kmap(desc->page);
813 if (IS_ERR(array)) {
814 res = PTR_ERR(array);
815 goto out;
816 }
817
818 for (i = desc->cache_entry_index; i < array->size; i++) { 758 for (i = desc->cache_entry_index; i < array->size; i++) {
819 struct nfs_cache_array_entry *ent; 759 struct nfs_cache_array_entry *ent;
820 760
@@ -835,8 +775,7 @@ int nfs_do_filldir(nfs_readdir_descriptor_t *desc)
835 if (array->eof_index >= 0) 775 if (array->eof_index >= 0)
836 desc->eof = 1; 776 desc->eof = 1;
837 777
838 nfs_readdir_release_array(desc->page); 778 kunmap(desc->page);
839out:
840 cache_page_release(desc); 779 cache_page_release(desc);
841 dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n", 780 dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %Lu; returning = %d\n",
842 (unsigned long long)*desc->dir_cookie, res); 781 (unsigned long long)*desc->dir_cookie, res);
@@ -966,11 +905,13 @@ out:
966 905
967static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence) 906static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence)
968{ 907{
908 struct inode *inode = file_inode(filp);
969 struct nfs_open_dir_context *dir_ctx = filp->private_data; 909 struct nfs_open_dir_context *dir_ctx = filp->private_data;
970 910
971 dfprintk(FILE, "NFS: llseek dir(%pD2, %lld, %d)\n", 911 dfprintk(FILE, "NFS: llseek dir(%pD2, %lld, %d)\n",
972 filp, offset, whence); 912 filp, offset, whence);
973 913
914 inode_lock(inode);
974 switch (whence) { 915 switch (whence) {
975 case 1: 916 case 1:
976 offset += filp->f_pos; 917 offset += filp->f_pos;
@@ -978,13 +919,16 @@ static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence)
978 if (offset >= 0) 919 if (offset >= 0)
979 break; 920 break;
980 default: 921 default:
981 return -EINVAL; 922 offset = -EINVAL;
923 goto out;
982 } 924 }
983 if (offset != filp->f_pos) { 925 if (offset != filp->f_pos) {
984 filp->f_pos = offset; 926 filp->f_pos = offset;
985 dir_ctx->dir_cookie = 0; 927 dir_ctx->dir_cookie = 0;
986 dir_ctx->duped = 0; 928 dir_ctx->duped = 0;
987 } 929 }
930out:
931 inode_unlock(inode);
988 return offset; 932 return offset;
989} 933}
990 934
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index c1b5fed7c863..6fb9fad2d1e6 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -392,16 +392,6 @@ static void nfs_direct_complete(struct nfs_direct_req *dreq)
392 nfs_direct_req_release(dreq); 392 nfs_direct_req_release(dreq);
393} 393}
394 394
395static void nfs_direct_readpage_release(struct nfs_page *req)
396{
397 dprintk("NFS: direct read done (%s/%llu %d@%lld)\n",
398 req->wb_context->dentry->d_sb->s_id,
399 (unsigned long long)NFS_FILEID(d_inode(req->wb_context->dentry)),
400 req->wb_bytes,
401 (long long)req_offset(req));
402 nfs_release_request(req);
403}
404
405static void nfs_direct_read_completion(struct nfs_pgio_header *hdr) 395static void nfs_direct_read_completion(struct nfs_pgio_header *hdr)
406{ 396{
407 unsigned long bytes = 0; 397 unsigned long bytes = 0;
@@ -426,7 +416,7 @@ static void nfs_direct_read_completion(struct nfs_pgio_header *hdr)
426 set_page_dirty(page); 416 set_page_dirty(page);
427 bytes += req->wb_bytes; 417 bytes += req->wb_bytes;
428 nfs_list_remove_request(req); 418 nfs_list_remove_request(req);
429 nfs_direct_readpage_release(req); 419 nfs_release_request(req);
430 } 420 }
431out_put: 421out_put:
432 if (put_dreq(dreq)) 422 if (put_dreq(dreq))
@@ -700,16 +690,9 @@ static void nfs_direct_commit_complete(struct nfs_commit_data *data)
700 int status = data->task.tk_status; 690 int status = data->task.tk_status;
701 691
702 nfs_init_cinfo_from_dreq(&cinfo, dreq); 692 nfs_init_cinfo_from_dreq(&cinfo, dreq);
703 if (status < 0) { 693 if (status < 0 || nfs_direct_cmp_commit_data_verf(dreq, data))
704 dprintk("NFS: %5u commit failed with error %d.\n",
705 data->task.tk_pid, status);
706 dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
707 } else if (nfs_direct_cmp_commit_data_verf(dreq, data)) {
708 dprintk("NFS: %5u commit verify failed\n", data->task.tk_pid);
709 dreq->flags = NFS_ODIRECT_RESCHED_WRITES; 694 dreq->flags = NFS_ODIRECT_RESCHED_WRITES;
710 }
711 695
712 dprintk("NFS: %5u commit returned %d\n", data->task.tk_pid, status);
713 while (!list_empty(&data->pages)) { 696 while (!list_empty(&data->pages)) {
714 req = nfs_list_entry(data->pages.next); 697 req = nfs_list_entry(data->pages.next);
715 nfs_list_remove_request(req); 698 nfs_list_remove_request(req);
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 668213984d68..5713eb32a45e 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -482,7 +482,7 @@ static int nfs_launder_page(struct page *page)
482 inode->i_ino, (long long)page_offset(page)); 482 inode->i_ino, (long long)page_offset(page));
483 483
484 nfs_fscache_wait_on_page_write(nfsi, page); 484 nfs_fscache_wait_on_page_write(nfsi, page);
485 return nfs_wb_launder_page(inode, page); 485 return nfs_wb_page(inode, page);
486} 486}
487 487
488static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file, 488static int nfs_swap_activate(struct swap_info_struct *sis, struct file *file,
@@ -697,14 +697,14 @@ do_unlk(struct file *filp, int cmd, struct file_lock *fl, int is_local)
697 if (!IS_ERR(l_ctx)) { 697 if (!IS_ERR(l_ctx)) {
698 status = nfs_iocounter_wait(l_ctx); 698 status = nfs_iocounter_wait(l_ctx);
699 nfs_put_lock_context(l_ctx); 699 nfs_put_lock_context(l_ctx);
700 if (status < 0) 700 /* NOTE: special case
701 * If we're signalled while cleaning up locks on process exit, we
702 * still need to complete the unlock.
703 */
704 if (status < 0 && !(fl->fl_flags & FL_CLOSE))
701 return status; 705 return status;
702 } 706 }
703 707
704 /* NOTE: special case
705 * If we're signalled while cleaning up locks on process exit, we
706 * still need to complete the unlock.
707 */
708 /* 708 /*
709 * Use local locking if mounted with "-onolock" or with appropriate 709 * Use local locking if mounted with "-onolock" or with appropriate
710 * "-olocal_lock=" 710 * "-olocal_lock="
@@ -820,9 +820,23 @@ int nfs_flock(struct file *filp, int cmd, struct file_lock *fl)
820 if (NFS_SERVER(inode)->flags & NFS_MOUNT_LOCAL_FLOCK) 820 if (NFS_SERVER(inode)->flags & NFS_MOUNT_LOCAL_FLOCK)
821 is_local = 1; 821 is_local = 1;
822 822
823 /* We're simulating flock() locks using posix locks on the server */ 823 /*
824 if (fl->fl_type == F_UNLCK) 824 * VFS doesn't require the open mode to match a flock() lock's type.
825 * NFS, however, may simulate flock() locking with posix locking which
826 * requires the open mode to match the lock type.
827 */
828 switch (fl->fl_type) {
829 case F_UNLCK:
825 return do_unlk(filp, cmd, fl, is_local); 830 return do_unlk(filp, cmd, fl, is_local);
831 case F_RDLCK:
832 if (!(filp->f_mode & FMODE_READ))
833 return -EBADF;
834 break;
835 case F_WRLCK:
836 if (!(filp->f_mode & FMODE_WRITE))
837 return -EBADF;
838 }
839
826 return do_setlk(filp, cmd, fl, is_local); 840 return do_setlk(filp, cmd, fl, is_local);
827} 841}
828EXPORT_SYMBOL_GPL(nfs_flock); 842EXPORT_SYMBOL_GPL(nfs_flock);
diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c
index acd30baca461..1cf85d65b748 100644
--- a/fs/nfs/filelayout/filelayout.c
+++ b/fs/nfs/filelayout/filelayout.c
@@ -921,11 +921,11 @@ fl_pnfs_update_layout(struct inode *ino,
921 fl = FILELAYOUT_LSEG(lseg); 921 fl = FILELAYOUT_LSEG(lseg);
922 922
923 status = filelayout_check_deviceid(lo, fl, gfp_flags); 923 status = filelayout_check_deviceid(lo, fl, gfp_flags);
924 if (status) 924 if (status) {
925 pnfs_put_lseg(lseg);
925 lseg = ERR_PTR(status); 926 lseg = ERR_PTR(status);
927 }
926out: 928out:
927 if (IS_ERR(lseg))
928 pnfs_put_lseg(lseg);
929 return lseg; 929 return lseg;
930} 930}
931 931
@@ -933,6 +933,7 @@ static void
933filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio, 933filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio,
934 struct nfs_page *req) 934 struct nfs_page *req)
935{ 935{
936 pnfs_generic_pg_check_layout(pgio);
936 if (!pgio->pg_lseg) { 937 if (!pgio->pg_lseg) {
937 pgio->pg_lseg = fl_pnfs_update_layout(pgio->pg_inode, 938 pgio->pg_lseg = fl_pnfs_update_layout(pgio->pg_inode,
938 req->wb_context, 939 req->wb_context,
@@ -959,6 +960,7 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
959 struct nfs_commit_info cinfo; 960 struct nfs_commit_info cinfo;
960 int status; 961 int status;
961 962
963 pnfs_generic_pg_check_layout(pgio);
962 if (!pgio->pg_lseg) { 964 if (!pgio->pg_lseg) {
963 pgio->pg_lseg = fl_pnfs_update_layout(pgio->pg_inode, 965 pgio->pg_lseg = fl_pnfs_update_layout(pgio->pg_inode,
964 req->wb_context, 966 req->wb_context,
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 42dedf2d625f..f5714ee01000 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -846,6 +846,7 @@ ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio,
846 int ds_idx; 846 int ds_idx;
847 847
848retry: 848retry:
849 pnfs_generic_pg_check_layout(pgio);
849 /* Use full layout for now */ 850 /* Use full layout for now */
850 if (!pgio->pg_lseg) 851 if (!pgio->pg_lseg)
851 ff_layout_pg_get_read(pgio, req, false); 852 ff_layout_pg_get_read(pgio, req, false);
@@ -894,6 +895,7 @@ ff_layout_pg_init_write(struct nfs_pageio_descriptor *pgio,
894 int status; 895 int status;
895 896
896retry: 897retry:
898 pnfs_generic_pg_check_layout(pgio);
897 if (!pgio->pg_lseg) { 899 if (!pgio->pg_lseg) {
898 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 900 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
899 req->wb_context, 901 req->wb_context,
@@ -1800,16 +1802,16 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
1800 1802
1801 ds = nfs4_ff_layout_prepare_ds(lseg, idx, true); 1803 ds = nfs4_ff_layout_prepare_ds(lseg, idx, true);
1802 if (!ds) 1804 if (!ds)
1803 return PNFS_NOT_ATTEMPTED; 1805 goto out_failed;
1804 1806
1805 ds_clnt = nfs4_ff_find_or_create_ds_client(lseg, idx, ds->ds_clp, 1807 ds_clnt = nfs4_ff_find_or_create_ds_client(lseg, idx, ds->ds_clp,
1806 hdr->inode); 1808 hdr->inode);
1807 if (IS_ERR(ds_clnt)) 1809 if (IS_ERR(ds_clnt))
1808 return PNFS_NOT_ATTEMPTED; 1810 goto out_failed;
1809 1811
1810 ds_cred = ff_layout_get_ds_cred(lseg, idx, hdr->cred); 1812 ds_cred = ff_layout_get_ds_cred(lseg, idx, hdr->cred);
1811 if (!ds_cred) 1813 if (!ds_cred)
1812 return PNFS_NOT_ATTEMPTED; 1814 goto out_failed;
1813 1815
1814 vers = nfs4_ff_layout_ds_version(lseg, idx); 1816 vers = nfs4_ff_layout_ds_version(lseg, idx);
1815 1817
@@ -1839,6 +1841,11 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
1839 sync, RPC_TASK_SOFTCONN); 1841 sync, RPC_TASK_SOFTCONN);
1840 put_rpccred(ds_cred); 1842 put_rpccred(ds_cred);
1841 return PNFS_ATTEMPTED; 1843 return PNFS_ATTEMPTED;
1844
1845out_failed:
1846 if (ff_layout_avoid_mds_available_ds(lseg))
1847 return PNFS_TRY_AGAIN;
1848 return PNFS_NOT_ATTEMPTED;
1842} 1849}
1843 1850
1844static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i) 1851static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i)
@@ -2354,10 +2361,21 @@ ff_layout_prepare_layoutstats(struct nfs42_layoutstat_args *args)
2354 return 0; 2361 return 0;
2355} 2362}
2356 2363
2364static int
2365ff_layout_set_layoutdriver(struct nfs_server *server,
2366 const struct nfs_fh *dummy)
2367{
2368#if IS_ENABLED(CONFIG_NFS_V4_2)
2369 server->caps |= NFS_CAP_LAYOUTSTATS;
2370#endif
2371 return 0;
2372}
2373
2357static struct pnfs_layoutdriver_type flexfilelayout_type = { 2374static struct pnfs_layoutdriver_type flexfilelayout_type = {
2358 .id = LAYOUT_FLEX_FILES, 2375 .id = LAYOUT_FLEX_FILES,
2359 .name = "LAYOUT_FLEX_FILES", 2376 .name = "LAYOUT_FLEX_FILES",
2360 .owner = THIS_MODULE, 2377 .owner = THIS_MODULE,
2378 .set_layoutdriver = ff_layout_set_layoutdriver,
2361 .alloc_layout_hdr = ff_layout_alloc_layout_hdr, 2379 .alloc_layout_hdr = ff_layout_alloc_layout_hdr,
2362 .free_layout_hdr = ff_layout_free_layout_hdr, 2380 .free_layout_hdr = ff_layout_free_layout_hdr,
2363 .alloc_lseg = ff_layout_alloc_lseg, 2381 .alloc_lseg = ff_layout_alloc_lseg,
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
index 457cfeb1d5c1..6df7a0cf5660 100644
--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
@@ -119,7 +119,13 @@ nfs4_ff_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
119 if (ds_versions[i].wsize > NFS_MAX_FILE_IO_SIZE) 119 if (ds_versions[i].wsize > NFS_MAX_FILE_IO_SIZE)
120 ds_versions[i].wsize = NFS_MAX_FILE_IO_SIZE; 120 ds_versions[i].wsize = NFS_MAX_FILE_IO_SIZE;
121 121
122 if (ds_versions[i].version != 3 || ds_versions[i].minor_version != 0) { 122 /*
123 * check for valid major/minor combination.
124 * currently we support dataserver which talk:
125 * v3, v4.0, v4.1, v4.2
126 */
127 if (!((ds_versions[i].version == 3 && ds_versions[i].minor_version == 0) ||
128 (ds_versions[i].version == 4 && ds_versions[i].minor_version < 3))) {
123 dprintk("%s: [%d] unsupported ds version %d-%d\n", __func__, 129 dprintk("%s: [%d] unsupported ds version %d-%d\n", __func__,
124 i, ds_versions[i].version, 130 i, ds_versions[i].version,
125 ds_versions[i].minor_version); 131 ds_versions[i].minor_version);
@@ -415,7 +421,7 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
415 mirror->mirror_ds->ds_versions[0].minor_version); 421 mirror->mirror_ds->ds_versions[0].minor_version);
416 422
417 /* connect success, check rsize/wsize limit */ 423 /* connect success, check rsize/wsize limit */
418 if (ds->ds_clp) { 424 if (!status) {
419 max_payload = 425 max_payload =
420 nfs_block_size(rpc_max_payload(ds->ds_clp->cl_rpcclient), 426 nfs_block_size(rpc_max_payload(ds->ds_clp->cl_rpcclient),
421 NULL); 427 NULL);
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index f489a5a71bd5..1de93ba78dc9 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -734,7 +734,10 @@ int nfs_getattr(const struct path *path, struct kstat *stat,
734 if (need_atime || nfs_need_revalidate_inode(inode)) { 734 if (need_atime || nfs_need_revalidate_inode(inode)) {
735 struct nfs_server *server = NFS_SERVER(inode); 735 struct nfs_server *server = NFS_SERVER(inode);
736 736
737 nfs_readdirplus_parent_cache_miss(path->dentry); 737 if (!(server->flags & NFS_MOUNT_NOAC))
738 nfs_readdirplus_parent_cache_miss(path->dentry);
739 else
740 nfs_readdirplus_parent_cache_hit(path->dentry);
738 err = __nfs_revalidate_inode(server, inode); 741 err = __nfs_revalidate_inode(server, inode);
739 } else 742 } else
740 nfs_readdirplus_parent_cache_hit(path->dentry); 743 nfs_readdirplus_parent_cache_hit(path->dentry);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 7b38fedb7e03..e9b4c3320e37 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -495,7 +495,6 @@ void nfs_mark_request_commit(struct nfs_page *req,
495 u32 ds_commit_idx); 495 u32 ds_commit_idx);
496int nfs_write_need_commit(struct nfs_pgio_header *); 496int nfs_write_need_commit(struct nfs_pgio_header *);
497void nfs_writeback_update_inode(struct nfs_pgio_header *hdr); 497void nfs_writeback_update_inode(struct nfs_pgio_header *hdr);
498int nfs_commit_file(struct file *file, struct nfs_write_verifier *verf);
499int nfs_generic_commit_list(struct inode *inode, struct list_head *head, 498int nfs_generic_commit_list(struct inode *inode, struct list_head *head,
500 int how, struct nfs_commit_info *cinfo); 499 int how, struct nfs_commit_info *cinfo);
501void nfs_retry_commit(struct list_head *page_list, 500void nfs_retry_commit(struct list_head *page_list,
@@ -756,9 +755,13 @@ static inline bool nfs_error_is_fatal(int err)
756{ 755{
757 switch (err) { 756 switch (err) {
758 case -ERESTARTSYS: 757 case -ERESTARTSYS:
758 case -EACCES:
759 case -EDQUOT:
760 case -EFBIG:
759 case -EIO: 761 case -EIO:
760 case -ENOSPC: 762 case -ENOSPC:
761 case -EROFS: 763 case -EROFS:
764 case -ESTALE:
762 case -E2BIG: 765 case -E2BIG:
763 return true; 766 return true;
764 default: 767 default:
diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c
index 786f17580582..1a224a33a6c2 100644
--- a/fs/nfs/namespace.c
+++ b/fs/nfs/namespace.c
@@ -143,11 +143,8 @@ struct vfsmount *nfs_d_automount(struct path *path)
143 struct nfs_fh *fh = NULL; 143 struct nfs_fh *fh = NULL;
144 struct nfs_fattr *fattr = NULL; 144 struct nfs_fattr *fattr = NULL;
145 145
146 dprintk("--> nfs_d_automount()\n");
147
148 mnt = ERR_PTR(-ESTALE);
149 if (IS_ROOT(path->dentry)) 146 if (IS_ROOT(path->dentry))
150 goto out_nofree; 147 return ERR_PTR(-ESTALE);
151 148
152 mnt = ERR_PTR(-ENOMEM); 149 mnt = ERR_PTR(-ENOMEM);
153 fh = nfs_alloc_fhandle(); 150 fh = nfs_alloc_fhandle();
@@ -155,13 +152,10 @@ struct vfsmount *nfs_d_automount(struct path *path)
155 if (fh == NULL || fattr == NULL) 152 if (fh == NULL || fattr == NULL)
156 goto out; 153 goto out;
157 154
158 dprintk("%s: enter\n", __func__);
159
160 mnt = server->nfs_client->rpc_ops->submount(server, path->dentry, fh, fattr); 155 mnt = server->nfs_client->rpc_ops->submount(server, path->dentry, fh, fattr);
161 if (IS_ERR(mnt)) 156 if (IS_ERR(mnt))
162 goto out; 157 goto out;
163 158
164 dprintk("%s: done, success\n", __func__);
165 mntget(mnt); /* prevent immediate expiration */ 159 mntget(mnt); /* prevent immediate expiration */
166 mnt_set_expiry(mnt, &nfs_automount_list); 160 mnt_set_expiry(mnt, &nfs_automount_list);
167 schedule_delayed_work(&nfs_automount_task, nfs_mountpoint_expiry_timeout); 161 schedule_delayed_work(&nfs_automount_task, nfs_mountpoint_expiry_timeout);
@@ -169,11 +163,6 @@ struct vfsmount *nfs_d_automount(struct path *path)
169out: 163out:
170 nfs_free_fattr(fattr); 164 nfs_free_fattr(fattr);
171 nfs_free_fhandle(fh); 165 nfs_free_fhandle(fh);
172out_nofree:
173 if (IS_ERR(mnt))
174 dprintk("<-- %s(): error %ld\n", __func__, PTR_ERR(mnt));
175 else
176 dprintk("<-- %s() = %p\n", __func__, mnt);
177 return mnt; 166 return mnt;
178} 167}
179 168
@@ -248,27 +237,20 @@ struct vfsmount *nfs_do_submount(struct dentry *dentry, struct nfs_fh *fh,
248 .fattr = fattr, 237 .fattr = fattr,
249 .authflavor = authflavor, 238 .authflavor = authflavor,
250 }; 239 };
251 struct vfsmount *mnt = ERR_PTR(-ENOMEM); 240 struct vfsmount *mnt;
252 char *page = (char *) __get_free_page(GFP_USER); 241 char *page = (char *) __get_free_page(GFP_USER);
253 char *devname; 242 char *devname;
254 243
255 dprintk("--> nfs_do_submount()\n");
256
257 dprintk("%s: submounting on %pd2\n", __func__,
258 dentry);
259 if (page == NULL) 244 if (page == NULL)
260 goto out; 245 return ERR_PTR(-ENOMEM);
246
261 devname = nfs_devname(dentry, page, PAGE_SIZE); 247 devname = nfs_devname(dentry, page, PAGE_SIZE);
262 mnt = (struct vfsmount *)devname;
263 if (IS_ERR(devname)) 248 if (IS_ERR(devname))
264 goto free_page; 249 mnt = (struct vfsmount *)devname;
265 mnt = nfs_do_clone_mount(NFS_SB(dentry->d_sb), devname, &mountdata); 250 else
266free_page: 251 mnt = nfs_do_clone_mount(NFS_SB(dentry->d_sb), devname, &mountdata);
267 free_page((unsigned long)page);
268out:
269 dprintk("%s: done\n", __func__);
270 252
271 dprintk("<-- nfs_do_submount() = %p\n", mnt); 253 free_page((unsigned long)page);
272 return mnt; 254 return mnt;
273} 255}
274EXPORT_SYMBOL_GPL(nfs_do_submount); 256EXPORT_SYMBOL_GPL(nfs_do_submount);
diff --git a/fs/nfs/nfs3proc.c b/fs/nfs/nfs3proc.c
index dc925b531f32..0c07b567118d 100644
--- a/fs/nfs/nfs3proc.c
+++ b/fs/nfs/nfs3proc.c
@@ -865,12 +865,63 @@ static void nfs3_proc_commit_setup(struct nfs_commit_data *data, struct rpc_mess
865 msg->rpc_proc = &nfs3_procedures[NFS3PROC_COMMIT]; 865 msg->rpc_proc = &nfs3_procedures[NFS3PROC_COMMIT];
866} 866}
867 867
868static void nfs3_nlm_alloc_call(void *data)
869{
870 struct nfs_lock_context *l_ctx = data;
871 if (l_ctx && test_bit(NFS_CONTEXT_UNLOCK, &l_ctx->open_context->flags)) {
872 get_nfs_open_context(l_ctx->open_context);
873 nfs_get_lock_context(l_ctx->open_context);
874 }
875}
876
877static bool nfs3_nlm_unlock_prepare(struct rpc_task *task, void *data)
878{
879 struct nfs_lock_context *l_ctx = data;
880 if (l_ctx && test_bit(NFS_CONTEXT_UNLOCK, &l_ctx->open_context->flags))
881 return nfs_async_iocounter_wait(task, l_ctx);
882 return false;
883
884}
885
886static void nfs3_nlm_release_call(void *data)
887{
888 struct nfs_lock_context *l_ctx = data;
889 struct nfs_open_context *ctx;
890 if (l_ctx && test_bit(NFS_CONTEXT_UNLOCK, &l_ctx->open_context->flags)) {
891 ctx = l_ctx->open_context;
892 nfs_put_lock_context(l_ctx);
893 put_nfs_open_context(ctx);
894 }
895}
896
897const struct nlmclnt_operations nlmclnt_fl_close_lock_ops = {
898 .nlmclnt_alloc_call = nfs3_nlm_alloc_call,
899 .nlmclnt_unlock_prepare = nfs3_nlm_unlock_prepare,
900 .nlmclnt_release_call = nfs3_nlm_release_call,
901};
902
868static int 903static int
869nfs3_proc_lock(struct file *filp, int cmd, struct file_lock *fl) 904nfs3_proc_lock(struct file *filp, int cmd, struct file_lock *fl)
870{ 905{
871 struct inode *inode = file_inode(filp); 906 struct inode *inode = file_inode(filp);
907 struct nfs_lock_context *l_ctx = NULL;
908 struct nfs_open_context *ctx = nfs_file_open_context(filp);
909 int status;
872 910
873 return nlmclnt_proc(NFS_SERVER(inode)->nlm_host, cmd, fl); 911 if (fl->fl_flags & FL_CLOSE) {
912 l_ctx = nfs_get_lock_context(ctx);
913 if (IS_ERR(l_ctx))
914 l_ctx = NULL;
915 else
916 set_bit(NFS_CONTEXT_UNLOCK, &ctx->flags);
917 }
918
919 status = nlmclnt_proc(NFS_SERVER(inode)->nlm_host, cmd, fl, l_ctx);
920
921 if (l_ctx)
922 nfs_put_lock_context(l_ctx);
923
924 return status;
874} 925}
875 926
876static int nfs3_have_delegation(struct inode *inode, fmode_t flags) 927static int nfs3_have_delegation(struct inode *inode, fmode_t flags)
@@ -921,6 +972,7 @@ const struct nfs_rpc_ops nfs_v3_clientops = {
921 .dir_inode_ops = &nfs3_dir_inode_operations, 972 .dir_inode_ops = &nfs3_dir_inode_operations,
922 .file_inode_ops = &nfs3_file_inode_operations, 973 .file_inode_ops = &nfs3_file_inode_operations,
923 .file_ops = &nfs_file_operations, 974 .file_ops = &nfs_file_operations,
975 .nlmclnt_ops = &nlmclnt_fl_close_lock_ops,
924 .getroot = nfs3_proc_get_root, 976 .getroot = nfs3_proc_get_root,
925 .submount = nfs_submount, 977 .submount = nfs_submount,
926 .try_mount = nfs_try_mount, 978 .try_mount = nfs_try_mount,
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 1e486c73ec94..929d09a5310a 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -167,23 +167,29 @@ static ssize_t _nfs42_proc_copy(struct file *src,
167 if (status) 167 if (status)
168 return status; 168 return status;
169 169
170 res->commit_res.verf = kzalloc(sizeof(struct nfs_writeverf), GFP_NOFS);
171 if (!res->commit_res.verf)
172 return -ENOMEM;
170 status = nfs4_call_sync(server->client, server, &msg, 173 status = nfs4_call_sync(server->client, server, &msg,
171 &args->seq_args, &res->seq_res, 0); 174 &args->seq_args, &res->seq_res, 0);
172 if (status == -ENOTSUPP) 175 if (status == -ENOTSUPP)
173 server->caps &= ~NFS_CAP_COPY; 176 server->caps &= ~NFS_CAP_COPY;
174 if (status) 177 if (status)
175 return status; 178 goto out;
176 179
177 if (res->write_res.verifier.committed != NFS_FILE_SYNC) { 180 if (!nfs_write_verifier_cmp(&res->write_res.verifier.verifier,
178 status = nfs_commit_file(dst, &res->write_res.verifier.verifier); 181 &res->commit_res.verf->verifier)) {
179 if (status) 182 status = -EAGAIN;
180 return status; 183 goto out;
181 } 184 }
182 185
183 truncate_pagecache_range(dst_inode, pos_dst, 186 truncate_pagecache_range(dst_inode, pos_dst,
184 pos_dst + res->write_res.count); 187 pos_dst + res->write_res.count);
185 188
186 return res->write_res.count; 189 status = res->write_res.count;
190out:
191 kfree(res->commit_res.verf);
192 return status;
187} 193}
188 194
189ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src, 195ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src,
@@ -240,6 +246,9 @@ ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src,
240 if (err == -ENOTSUPP) { 246 if (err == -ENOTSUPP) {
241 err = -EOPNOTSUPP; 247 err = -EOPNOTSUPP;
242 break; 248 break;
249 } if (err == -EAGAIN) {
250 dst_exception.retry = 1;
251 continue;
243 } 252 }
244 253
245 err2 = nfs4_handle_exception(server, err, &src_exception); 254 err2 = nfs4_handle_exception(server, err, &src_exception);
@@ -379,6 +388,7 @@ nfs42_layoutstat_done(struct rpc_task *task, void *calldata)
379 pnfs_mark_layout_stateid_invalid(lo, &head); 388 pnfs_mark_layout_stateid_invalid(lo, &head);
380 spin_unlock(&inode->i_lock); 389 spin_unlock(&inode->i_lock);
381 pnfs_free_lseg_list(&head); 390 pnfs_free_lseg_list(&head);
391 nfs_commit_inode(inode, 0);
382 } else 392 } else
383 spin_unlock(&inode->i_lock); 393 spin_unlock(&inode->i_lock);
384 break; 394 break;
@@ -400,8 +410,6 @@ nfs42_layoutstat_done(struct rpc_task *task, void *calldata)
400 case -EOPNOTSUPP: 410 case -EOPNOTSUPP:
401 NFS_SERVER(inode)->caps &= ~NFS_CAP_LAYOUTSTATS; 411 NFS_SERVER(inode)->caps &= ~NFS_CAP_LAYOUTSTATS;
402 } 412 }
403
404 dprintk("%s server returns %d\n", __func__, task->tk_status);
405} 413}
406 414
407static void 415static void
diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
index 6c7296454bbc..528362f69cc1 100644
--- a/fs/nfs/nfs42xdr.c
+++ b/fs/nfs/nfs42xdr.c
@@ -66,12 +66,14 @@
66 encode_putfh_maxsz + \ 66 encode_putfh_maxsz + \
67 encode_savefh_maxsz + \ 67 encode_savefh_maxsz + \
68 encode_putfh_maxsz + \ 68 encode_putfh_maxsz + \
69 encode_copy_maxsz) 69 encode_copy_maxsz + \
70 encode_commit_maxsz)
70#define NFS4_dec_copy_sz (compound_decode_hdr_maxsz + \ 71#define NFS4_dec_copy_sz (compound_decode_hdr_maxsz + \
71 decode_putfh_maxsz + \ 72 decode_putfh_maxsz + \
72 decode_savefh_maxsz + \ 73 decode_savefh_maxsz + \
73 decode_putfh_maxsz + \ 74 decode_putfh_maxsz + \
74 decode_copy_maxsz) 75 decode_copy_maxsz + \
76 decode_commit_maxsz)
75#define NFS4_enc_deallocate_sz (compound_encode_hdr_maxsz + \ 77#define NFS4_enc_deallocate_sz (compound_encode_hdr_maxsz + \
76 encode_putfh_maxsz + \ 78 encode_putfh_maxsz + \
77 encode_deallocate_maxsz + \ 79 encode_deallocate_maxsz + \
@@ -222,6 +224,18 @@ static void nfs4_xdr_enc_allocate(struct rpc_rqst *req,
222 encode_nops(&hdr); 224 encode_nops(&hdr);
223} 225}
224 226
227static void encode_copy_commit(struct xdr_stream *xdr,
228 struct nfs42_copy_args *args,
229 struct compound_hdr *hdr)
230{
231 __be32 *p;
232
233 encode_op_hdr(xdr, OP_COMMIT, decode_commit_maxsz, hdr);
234 p = reserve_space(xdr, 12);
235 p = xdr_encode_hyper(p, args->dst_pos);
236 *p = cpu_to_be32(args->count);
237}
238
225/* 239/*
226 * Encode COPY request 240 * Encode COPY request
227 */ 241 */
@@ -239,6 +253,7 @@ static void nfs4_xdr_enc_copy(struct rpc_rqst *req,
239 encode_savefh(xdr, &hdr); 253 encode_savefh(xdr, &hdr);
240 encode_putfh(xdr, args->dst_fh, &hdr); 254 encode_putfh(xdr, args->dst_fh, &hdr);
241 encode_copy(xdr, args, &hdr); 255 encode_copy(xdr, args, &hdr);
256 encode_copy_commit(xdr, args, &hdr);
242 encode_nops(&hdr); 257 encode_nops(&hdr);
243} 258}
244 259
@@ -481,6 +496,9 @@ static int nfs4_xdr_dec_copy(struct rpc_rqst *rqstp,
481 if (status) 496 if (status)
482 goto out; 497 goto out;
483 status = decode_copy(xdr, res); 498 status = decode_copy(xdr, res);
499 if (status)
500 goto out;
501 status = decode_commit(xdr, &res->commit_res);
484out: 502out:
485 return status; 503 return status;
486} 504}
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 8346ccbf2d52..692a7a8bfc7a 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -359,11 +359,9 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp,
359 struct nfs_client *old; 359 struct nfs_client *old;
360 int error; 360 int error;
361 361
362 if (clp->cl_cons_state == NFS_CS_READY) { 362 if (clp->cl_cons_state == NFS_CS_READY)
363 /* the client is initialised already */ 363 /* the client is initialised already */
364 dprintk("<-- nfs4_init_client() = 0 [already %p]\n", clp);
365 return clp; 364 return clp;
366 }
367 365
368 /* Check NFS protocol revision and initialize RPC op vector */ 366 /* Check NFS protocol revision and initialize RPC op vector */
369 clp->rpc_ops = &nfs_v4_clientops; 367 clp->rpc_ops = &nfs_v4_clientops;
@@ -421,7 +419,6 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp,
421error: 419error:
422 nfs_mark_client_ready(clp, error); 420 nfs_mark_client_ready(clp, error);
423 nfs_put_client(clp); 421 nfs_put_client(clp);
424 dprintk("<-- nfs4_init_client() = xerror %d\n", error);
425 return ERR_PTR(error); 422 return ERR_PTR(error);
426} 423}
427 424
@@ -469,6 +466,50 @@ static bool nfs4_same_verifier(nfs4_verifier *v1, nfs4_verifier *v2)
469 return memcmp(v1->data, v2->data, sizeof(v1->data)) == 0; 466 return memcmp(v1->data, v2->data, sizeof(v1->data)) == 0;
470} 467}
471 468
469static int nfs4_match_client(struct nfs_client *pos, struct nfs_client *new,
470 struct nfs_client **prev, struct nfs_net *nn)
471{
472 int status;
473
474 if (pos->rpc_ops != new->rpc_ops)
475 return 1;
476
477 if (pos->cl_minorversion != new->cl_minorversion)
478 return 1;
479
480 /* If "pos" isn't marked ready, we can't trust the
481 * remaining fields in "pos", especially the client
482 * ID and serverowner fields. Wait for CREATE_SESSION
483 * to finish. */
484 if (pos->cl_cons_state > NFS_CS_READY) {
485 atomic_inc(&pos->cl_count);
486 spin_unlock(&nn->nfs_client_lock);
487
488 nfs_put_client(*prev);
489 *prev = pos;
490
491 status = nfs_wait_client_init_complete(pos);
492 spin_lock(&nn->nfs_client_lock);
493
494 if (status < 0)
495 return status;
496 }
497
498 if (pos->cl_cons_state != NFS_CS_READY)
499 return 1;
500
501 if (pos->cl_clientid != new->cl_clientid)
502 return 1;
503
504 /* NFSv4.1 always uses the uniform string, however someone
505 * might switch the uniquifier string on us.
506 */
507 if (!nfs4_match_client_owner_id(pos, new))
508 return 1;
509
510 return 0;
511}
512
472/** 513/**
473 * nfs40_walk_client_list - Find server that recognizes a client ID 514 * nfs40_walk_client_list - Find server that recognizes a client ID
474 * 515 *
@@ -497,34 +538,10 @@ int nfs40_walk_client_list(struct nfs_client *new,
497 spin_lock(&nn->nfs_client_lock); 538 spin_lock(&nn->nfs_client_lock);
498 list_for_each_entry(pos, &nn->nfs_client_list, cl_share_link) { 539 list_for_each_entry(pos, &nn->nfs_client_list, cl_share_link) {
499 540
500 if (pos->rpc_ops != new->rpc_ops) 541 status = nfs4_match_client(pos, new, &prev, nn);
501 continue; 542 if (status < 0)
502 543 goto out_unlock;
503 if (pos->cl_minorversion != new->cl_minorversion) 544 if (status != 0)
504 continue;
505
506 /* If "pos" isn't marked ready, we can't trust the
507 * remaining fields in "pos" */
508 if (pos->cl_cons_state > NFS_CS_READY) {
509 atomic_inc(&pos->cl_count);
510 spin_unlock(&nn->nfs_client_lock);
511
512 nfs_put_client(prev);
513 prev = pos;
514
515 status = nfs_wait_client_init_complete(pos);
516 if (status < 0)
517 goto out;
518 status = -NFS4ERR_STALE_CLIENTID;
519 spin_lock(&nn->nfs_client_lock);
520 }
521 if (pos->cl_cons_state != NFS_CS_READY)
522 continue;
523
524 if (pos->cl_clientid != new->cl_clientid)
525 continue;
526
527 if (!nfs4_match_client_owner_id(pos, new))
528 continue; 545 continue;
529 /* 546 /*
530 * We just sent a new SETCLIENTID, which should have 547 * We just sent a new SETCLIENTID, which should have
@@ -557,8 +574,6 @@ int nfs40_walk_client_list(struct nfs_client *new,
557 574
558 prev = NULL; 575 prev = NULL;
559 *result = pos; 576 *result = pos;
560 dprintk("NFS: <-- %s using nfs_client = %p ({%d})\n",
561 __func__, pos, atomic_read(&pos->cl_count));
562 goto out; 577 goto out;
563 case -ERESTARTSYS: 578 case -ERESTARTSYS:
564 case -ETIMEDOUT: 579 case -ETIMEDOUT:
@@ -567,37 +582,23 @@ int nfs40_walk_client_list(struct nfs_client *new,
567 */ 582 */
568 nfs4_schedule_path_down_recovery(pos); 583 nfs4_schedule_path_down_recovery(pos);
569 default: 584 default:
585 spin_lock(&nn->nfs_client_lock);
570 goto out; 586 goto out;
571 } 587 }
572 588
573 spin_lock(&nn->nfs_client_lock); 589 spin_lock(&nn->nfs_client_lock);
574 } 590 }
591out_unlock:
575 spin_unlock(&nn->nfs_client_lock); 592 spin_unlock(&nn->nfs_client_lock);
576 593
577 /* No match found. The server lost our clientid */ 594 /* No match found. The server lost our clientid */
578out: 595out:
579 nfs_put_client(prev); 596 nfs_put_client(prev);
580 dprintk("NFS: <-- %s status = %d\n", __func__, status);
581 return status; 597 return status;
582} 598}
583 599
584#ifdef CONFIG_NFS_V4_1 600#ifdef CONFIG_NFS_V4_1
585/* 601/*
586 * Returns true if the client IDs match
587 */
588static bool nfs4_match_clientids(u64 a, u64 b)
589{
590 if (a != b) {
591 dprintk("NFS: --> %s client ID %llx does not match %llx\n",
592 __func__, a, b);
593 return false;
594 }
595 dprintk("NFS: --> %s client ID %llx matches %llx\n",
596 __func__, a, b);
597 return true;
598}
599
600/*
601 * Returns true if the server major ids match 602 * Returns true if the server major ids match
602 */ 603 */
603static bool 604static bool
@@ -605,36 +606,8 @@ nfs4_check_serverowner_major_id(struct nfs41_server_owner *o1,
605 struct nfs41_server_owner *o2) 606 struct nfs41_server_owner *o2)
606{ 607{
607 if (o1->major_id_sz != o2->major_id_sz) 608 if (o1->major_id_sz != o2->major_id_sz)
608 goto out_major_mismatch; 609 return false;
609 if (memcmp(o1->major_id, o2->major_id, o1->major_id_sz) != 0) 610 return memcmp(o1->major_id, o2->major_id, o1->major_id_sz) == 0;
610 goto out_major_mismatch;
611
612 dprintk("NFS: --> %s server owner major IDs match\n", __func__);
613 return true;
614
615out_major_mismatch:
616 dprintk("NFS: --> %s server owner major IDs do not match\n",
617 __func__);
618 return false;
619}
620
621/*
622 * Returns true if server minor ids match
623 */
624static bool
625nfs4_check_serverowner_minor_id(struct nfs41_server_owner *o1,
626 struct nfs41_server_owner *o2)
627{
628 /* Check eir_server_owner so_minor_id */
629 if (o1->minor_id != o2->minor_id)
630 goto out_minor_mismatch;
631
632 dprintk("NFS: --> %s server owner minor IDs match\n", __func__);
633 return true;
634
635out_minor_mismatch:
636 dprintk("NFS: --> %s server owner minor IDs do not match\n", __func__);
637 return false;
638} 611}
639 612
640/* 613/*
@@ -645,18 +618,9 @@ nfs4_check_server_scope(struct nfs41_server_scope *s1,
645 struct nfs41_server_scope *s2) 618 struct nfs41_server_scope *s2)
646{ 619{
647 if (s1->server_scope_sz != s2->server_scope_sz) 620 if (s1->server_scope_sz != s2->server_scope_sz)
648 goto out_scope_mismatch; 621 return false;
649 if (memcmp(s1->server_scope, s2->server_scope, 622 return memcmp(s1->server_scope, s2->server_scope,
650 s1->server_scope_sz) != 0) 623 s1->server_scope_sz) == 0;
651 goto out_scope_mismatch;
652
653 dprintk("NFS: --> %s server scopes match\n", __func__);
654 return true;
655
656out_scope_mismatch:
657 dprintk("NFS: --> %s server scopes do not match\n",
658 __func__);
659 return false;
660} 624}
661 625
662/** 626/**
@@ -680,7 +644,7 @@ int nfs4_detect_session_trunking(struct nfs_client *clp,
680 struct rpc_xprt *xprt) 644 struct rpc_xprt *xprt)
681{ 645{
682 /* Check eir_clientid */ 646 /* Check eir_clientid */
683 if (!nfs4_match_clientids(clp->cl_clientid, res->clientid)) 647 if (clp->cl_clientid != res->clientid)
684 goto out_err; 648 goto out_err;
685 649
686 /* Check eir_server_owner so_major_id */ 650 /* Check eir_server_owner so_major_id */
@@ -689,8 +653,7 @@ int nfs4_detect_session_trunking(struct nfs_client *clp,
689 goto out_err; 653 goto out_err;
690 654
691 /* Check eir_server_owner so_minor_id */ 655 /* Check eir_server_owner so_minor_id */
692 if (!nfs4_check_serverowner_minor_id(clp->cl_serverowner, 656 if (clp->cl_serverowner->minor_id != res->server_owner->minor_id)
693 res->server_owner))
694 goto out_err; 657 goto out_err;
695 658
696 /* Check eir_server_scope */ 659 /* Check eir_server_scope */
@@ -739,33 +702,10 @@ int nfs41_walk_client_list(struct nfs_client *new,
739 if (pos == new) 702 if (pos == new)
740 goto found; 703 goto found;
741 704
742 if (pos->rpc_ops != new->rpc_ops) 705 status = nfs4_match_client(pos, new, &prev, nn);
743 continue; 706 if (status < 0)
744 707 goto out;
745 if (pos->cl_minorversion != new->cl_minorversion) 708 if (status != 0)
746 continue;
747
748 /* If "pos" isn't marked ready, we can't trust the
749 * remaining fields in "pos", especially the client
750 * ID and serverowner fields. Wait for CREATE_SESSION
751 * to finish. */
752 if (pos->cl_cons_state > NFS_CS_READY) {
753 atomic_inc(&pos->cl_count);
754 spin_unlock(&nn->nfs_client_lock);
755
756 nfs_put_client(prev);
757 prev = pos;
758
759 status = nfs_wait_client_init_complete(pos);
760 spin_lock(&nn->nfs_client_lock);
761 if (status < 0)
762 break;
763 status = -NFS4ERR_STALE_CLIENTID;
764 }
765 if (pos->cl_cons_state != NFS_CS_READY)
766 continue;
767
768 if (!nfs4_match_clientids(pos->cl_clientid, new->cl_clientid))
769 continue; 709 continue;
770 710
771 /* 711 /*
@@ -777,23 +717,15 @@ int nfs41_walk_client_list(struct nfs_client *new,
777 new->cl_serverowner)) 717 new->cl_serverowner))
778 continue; 718 continue;
779 719
780 /* Unlike NFSv4.0, we know that NFSv4.1 always uses the
781 * uniform string, however someone might switch the
782 * uniquifier string on us.
783 */
784 if (!nfs4_match_client_owner_id(pos, new))
785 continue;
786found: 720found:
787 atomic_inc(&pos->cl_count); 721 atomic_inc(&pos->cl_count);
788 *result = pos; 722 *result = pos;
789 status = 0; 723 status = 0;
790 dprintk("NFS: <-- %s using nfs_client = %p ({%d})\n",
791 __func__, pos, atomic_read(&pos->cl_count));
792 break; 724 break;
793 } 725 }
794 726
727out:
795 spin_unlock(&nn->nfs_client_lock); 728 spin_unlock(&nn->nfs_client_lock);
796 dprintk("NFS: <-- %s status = %d\n", __func__, status);
797 nfs_put_client(prev); 729 nfs_put_client(prev);
798 return status; 730 return status;
799} 731}
@@ -916,9 +848,6 @@ static int nfs4_set_client(struct nfs_server *server,
916 .timeparms = timeparms, 848 .timeparms = timeparms,
917 }; 849 };
918 struct nfs_client *clp; 850 struct nfs_client *clp;
919 int error;
920
921 dprintk("--> nfs4_set_client()\n");
922 851
923 if (server->flags & NFS_MOUNT_NORESVPORT) 852 if (server->flags & NFS_MOUNT_NORESVPORT)
924 set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); 853 set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
@@ -927,15 +856,11 @@ static int nfs4_set_client(struct nfs_server *server,
927 856
928 /* Allocate or find a client reference we can use */ 857 /* Allocate or find a client reference we can use */
929 clp = nfs_get_client(&cl_init); 858 clp = nfs_get_client(&cl_init);
930 if (IS_ERR(clp)) { 859 if (IS_ERR(clp))
931 error = PTR_ERR(clp); 860 return PTR_ERR(clp);
932 goto error;
933 }
934 861
935 if (server->nfs_client == clp) { 862 if (server->nfs_client == clp)
936 error = -ELOOP; 863 return -ELOOP;
937 goto error;
938 }
939 864
940 /* 865 /*
941 * Query for the lease time on clientid setup or renewal 866 * Query for the lease time on clientid setup or renewal
@@ -947,11 +872,7 @@ static int nfs4_set_client(struct nfs_server *server,
947 set_bit(NFS_CS_CHECK_LEASE_TIME, &clp->cl_res_state); 872 set_bit(NFS_CS_CHECK_LEASE_TIME, &clp->cl_res_state);
948 873
949 server->nfs_client = clp; 874 server->nfs_client = clp;
950 dprintk("<-- nfs4_set_client() = 0 [new %p]\n", clp);
951 return 0; 875 return 0;
952error:
953 dprintk("<-- nfs4_set_client() = xerror %d\n", error);
954 return error;
955} 876}
956 877
957/* 878/*
@@ -982,7 +903,6 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv,
982 .net = mds_clp->cl_net, 903 .net = mds_clp->cl_net,
983 .timeparms = &ds_timeout, 904 .timeparms = &ds_timeout,
984 }; 905 };
985 struct nfs_client *clp;
986 char buf[INET6_ADDRSTRLEN + 1]; 906 char buf[INET6_ADDRSTRLEN + 1];
987 907
988 if (rpc_ntop(ds_addr, buf, sizeof(buf)) <= 0) 908 if (rpc_ntop(ds_addr, buf, sizeof(buf)) <= 0)
@@ -998,10 +918,7 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv,
998 * (section 13.1 RFC 5661). 918 * (section 13.1 RFC 5661).
999 */ 919 */
1000 nfs_init_timeout_values(&ds_timeout, ds_proto, ds_timeo, ds_retrans); 920 nfs_init_timeout_values(&ds_timeout, ds_proto, ds_timeo, ds_retrans);
1001 clp = nfs_get_client(&cl_init); 921 return nfs_get_client(&cl_init);
1002
1003 dprintk("<-- %s %p\n", __func__, clp);
1004 return clp;
1005} 922}
1006EXPORT_SYMBOL_GPL(nfs4_set_ds_client); 923EXPORT_SYMBOL_GPL(nfs4_set_ds_client);
1007 924
@@ -1098,8 +1015,6 @@ static int nfs4_init_server(struct nfs_server *server,
1098 struct rpc_timeout timeparms; 1015 struct rpc_timeout timeparms;
1099 int error; 1016 int error;
1100 1017
1101 dprintk("--> nfs4_init_server()\n");
1102
1103 nfs_init_timeout_values(&timeparms, data->nfs_server.protocol, 1018 nfs_init_timeout_values(&timeparms, data->nfs_server.protocol,
1104 data->timeo, data->retrans); 1019 data->timeo, data->retrans);
1105 1020
@@ -1127,7 +1042,7 @@ static int nfs4_init_server(struct nfs_server *server,
1127 data->minorversion, 1042 data->minorversion,
1128 data->net); 1043 data->net);
1129 if (error < 0) 1044 if (error < 0)
1130 goto error; 1045 return error;
1131 1046
1132 if (data->rsize) 1047 if (data->rsize)
1133 server->rsize = nfs_block_size(data->rsize, NULL); 1048 server->rsize = nfs_block_size(data->rsize, NULL);
@@ -1138,16 +1053,10 @@ static int nfs4_init_server(struct nfs_server *server,
1138 server->acregmax = data->acregmax * HZ; 1053 server->acregmax = data->acregmax * HZ;
1139 server->acdirmin = data->acdirmin * HZ; 1054 server->acdirmin = data->acdirmin * HZ;
1140 server->acdirmax = data->acdirmax * HZ; 1055 server->acdirmax = data->acdirmax * HZ;
1056 server->port = data->nfs_server.port;
1141 1057
1142 server->port = data->nfs_server.port; 1058 return nfs_init_server_rpcclient(server, &timeparms,
1143 1059 data->selected_flavor);
1144 error = nfs_init_server_rpcclient(server, &timeparms,
1145 data->selected_flavor);
1146
1147error:
1148 /* Done */
1149 dprintk("<-- nfs4_init_server() = %d\n", error);
1150 return error;
1151} 1060}
1152 1061
1153/* 1062/*
@@ -1163,8 +1072,6 @@ struct nfs_server *nfs4_create_server(struct nfs_mount_info *mount_info,
1163 bool auth_probe; 1072 bool auth_probe;
1164 int error; 1073 int error;
1165 1074
1166 dprintk("--> nfs4_create_server()\n");
1167
1168 server = nfs_alloc_server(); 1075 server = nfs_alloc_server();
1169 if (!server) 1076 if (!server)
1170 return ERR_PTR(-ENOMEM); 1077 return ERR_PTR(-ENOMEM);
@@ -1180,12 +1087,10 @@ struct nfs_server *nfs4_create_server(struct nfs_mount_info *mount_info,
1180 if (error < 0) 1087 if (error < 0)
1181 goto error; 1088 goto error;
1182 1089
1183 dprintk("<-- nfs4_create_server() = %p\n", server);
1184 return server; 1090 return server;
1185 1091
1186error: 1092error:
1187 nfs_free_server(server); 1093 nfs_free_server(server);
1188 dprintk("<-- nfs4_create_server() = error %d\n", error);
1189 return ERR_PTR(error); 1094 return ERR_PTR(error);
1190} 1095}
1191 1096
@@ -1200,8 +1105,6 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
1200 bool auth_probe; 1105 bool auth_probe;
1201 int error; 1106 int error;
1202 1107
1203 dprintk("--> nfs4_create_referral_server()\n");
1204
1205 server = nfs_alloc_server(); 1108 server = nfs_alloc_server();
1206 if (!server) 1109 if (!server)
1207 return ERR_PTR(-ENOMEM); 1110 return ERR_PTR(-ENOMEM);
@@ -1235,12 +1138,10 @@ struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *data,
1235 if (error < 0) 1138 if (error < 0)
1236 goto error; 1139 goto error;
1237 1140
1238 dprintk("<-- nfs_create_referral_server() = %p\n", server);
1239 return server; 1141 return server;
1240 1142
1241error: 1143error:
1242 nfs_free_server(server); 1144 nfs_free_server(server);
1243 dprintk("<-- nfs4_create_referral_server() = error %d\n", error);
1244 return ERR_PTR(error); 1145 return ERR_PTR(error);
1245} 1146}
1246 1147
@@ -1300,31 +1201,16 @@ int nfs4_update_server(struct nfs_server *server, const char *hostname,
1300 struct sockaddr *localaddr = (struct sockaddr *)&address; 1201 struct sockaddr *localaddr = (struct sockaddr *)&address;
1301 int error; 1202 int error;
1302 1203
1303 dprintk("--> %s: move FSID %llx:%llx to \"%s\")\n", __func__,
1304 (unsigned long long)server->fsid.major,
1305 (unsigned long long)server->fsid.minor,
1306 hostname);
1307
1308 error = rpc_switch_client_transport(clnt, &xargs, clnt->cl_timeout); 1204 error = rpc_switch_client_transport(clnt, &xargs, clnt->cl_timeout);
1309 if (error != 0) { 1205 if (error != 0)
1310 dprintk("<-- %s(): rpc_switch_client_transport returned %d\n", 1206 return error;
1311 __func__, error);
1312 goto out;
1313 }
1314 1207
1315 error = rpc_localaddr(clnt, localaddr, sizeof(address)); 1208 error = rpc_localaddr(clnt, localaddr, sizeof(address));
1316 if (error != 0) { 1209 if (error != 0)
1317 dprintk("<-- %s(): rpc_localaddr returned %d\n", 1210 return error;
1318 __func__, error);
1319 goto out;
1320 }
1321 1211
1322 error = -EAFNOSUPPORT; 1212 if (rpc_ntop(localaddr, buf, sizeof(buf)) == 0)
1323 if (rpc_ntop(localaddr, buf, sizeof(buf)) == 0) { 1213 return -EAFNOSUPPORT;
1324 dprintk("<-- %s(): rpc_ntop returned %d\n",
1325 __func__, error);
1326 goto out;
1327 }
1328 1214
1329 nfs_server_remove_lists(server); 1215 nfs_server_remove_lists(server);
1330 error = nfs4_set_client(server, hostname, sap, salen, buf, 1216 error = nfs4_set_client(server, hostname, sap, salen, buf,
@@ -1333,21 +1219,12 @@ int nfs4_update_server(struct nfs_server *server, const char *hostname,
1333 nfs_put_client(clp); 1219 nfs_put_client(clp);
1334 if (error != 0) { 1220 if (error != 0) {
1335 nfs_server_insert_lists(server); 1221 nfs_server_insert_lists(server);
1336 dprintk("<-- %s(): nfs4_set_client returned %d\n", 1222 return error;
1337 __func__, error);
1338 goto out;
1339 } 1223 }
1340 1224
1341 if (server->nfs_client->cl_hostname == NULL) 1225 if (server->nfs_client->cl_hostname == NULL)
1342 server->nfs_client->cl_hostname = kstrdup(hostname, GFP_KERNEL); 1226 server->nfs_client->cl_hostname = kstrdup(hostname, GFP_KERNEL);
1343 nfs_server_insert_lists(server); 1227 nfs_server_insert_lists(server);
1344 1228
1345 error = nfs_probe_destination(server); 1229 return nfs_probe_destination(server);
1346 if (error < 0)
1347 goto out;
1348
1349 dprintk("<-- %s() succeeded\n", __func__);
1350
1351out:
1352 return error;
1353} 1230}
diff --git a/fs/nfs/nfs4getroot.c b/fs/nfs/nfs4getroot.c
index 039b3eb6d834..ac8406018962 100644
--- a/fs/nfs/nfs4getroot.c
+++ b/fs/nfs/nfs4getroot.c
@@ -14,8 +14,6 @@ int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh, bool auth_p
14 struct nfs_fsinfo fsinfo; 14 struct nfs_fsinfo fsinfo;
15 int ret = -ENOMEM; 15 int ret = -ENOMEM;
16 16
17 dprintk("--> nfs4_get_rootfh()\n");
18
19 fsinfo.fattr = nfs_alloc_fattr(); 17 fsinfo.fattr = nfs_alloc_fattr();
20 if (fsinfo.fattr == NULL) 18 if (fsinfo.fattr == NULL)
21 goto out; 19 goto out;
@@ -38,6 +36,5 @@ int nfs4_get_rootfh(struct nfs_server *server, struct nfs_fh *mntfh, bool auth_p
38 memcpy(&server->fsid, &fsinfo.fattr->fsid, sizeof(server->fsid)); 36 memcpy(&server->fsid, &fsinfo.fattr->fsid, sizeof(server->fsid));
39out: 37out:
40 nfs_free_fattr(fsinfo.fattr); 38 nfs_free_fattr(fsinfo.fattr);
41 dprintk("<-- nfs4_get_rootfh() = %d\n", ret);
42 return ret; 39 return ret;
43} 40}
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index d8b040bd9814..7d531da1bae3 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -340,7 +340,6 @@ static struct vfsmount *nfs_follow_referral(struct dentry *dentry,
340out: 340out:
341 free_page((unsigned long) page); 341 free_page((unsigned long) page);
342 free_page((unsigned long) page2); 342 free_page((unsigned long) page2);
343 dprintk("%s: done\n", __func__);
344 return mnt; 343 return mnt;
345} 344}
346 345
@@ -358,11 +357,9 @@ static struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry *
358 int err; 357 int err;
359 358
360 /* BUG_ON(IS_ROOT(dentry)); */ 359 /* BUG_ON(IS_ROOT(dentry)); */
361 dprintk("%s: enter\n", __func__);
362
363 page = alloc_page(GFP_KERNEL); 360 page = alloc_page(GFP_KERNEL);
364 if (page == NULL) 361 if (page == NULL)
365 goto out; 362 return mnt;
366 363
367 fs_locations = kmalloc(sizeof(struct nfs4_fs_locations), GFP_KERNEL); 364 fs_locations = kmalloc(sizeof(struct nfs4_fs_locations), GFP_KERNEL);
368 if (fs_locations == NULL) 365 if (fs_locations == NULL)
@@ -386,8 +383,6 @@ static struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry *
386out_free: 383out_free:
387 __free_page(page); 384 __free_page(page);
388 kfree(fs_locations); 385 kfree(fs_locations);
389out:
390 dprintk("%s: done\n", __func__);
391 return mnt; 386 return mnt;
392} 387}
393 388
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 201ca3f2c4ba..c08c46a3b8cd 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -698,7 +698,8 @@ static int nfs41_sequence_process(struct rpc_task *task,
698 session = slot->table->session; 698 session = slot->table->session;
699 699
700 if (slot->interrupted) { 700 if (slot->interrupted) {
701 slot->interrupted = 0; 701 if (res->sr_status != -NFS4ERR_DELAY)
702 slot->interrupted = 0;
702 interrupted = true; 703 interrupted = true;
703 } 704 }
704 705
@@ -2300,8 +2301,10 @@ static int _nfs4_proc_open(struct nfs4_opendata *data)
2300 if (status != 0) 2301 if (status != 0)
2301 return status; 2302 return status;
2302 } 2303 }
2303 if (!(o_res->f_attr->valid & NFS_ATTR_FATTR)) 2304 if (!(o_res->f_attr->valid & NFS_ATTR_FATTR)) {
2305 nfs4_sequence_free_slot(&o_res->seq_res);
2304 nfs4_proc_getattr(server, &o_res->fh, o_res->f_attr, o_res->f_label); 2306 nfs4_proc_getattr(server, &o_res->fh, o_res->f_attr, o_res->f_label);
2307 }
2305 return 0; 2308 return 0;
2306} 2309}
2307 2310
@@ -3265,6 +3268,7 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
3265 .rpc_resp = &res, 3268 .rpc_resp = &res,
3266 }; 3269 };
3267 int status; 3270 int status;
3271 int i;
3268 3272
3269 bitmask[0] = FATTR4_WORD0_SUPPORTED_ATTRS | 3273 bitmask[0] = FATTR4_WORD0_SUPPORTED_ATTRS |
3270 FATTR4_WORD0_FH_EXPIRE_TYPE | 3274 FATTR4_WORD0_FH_EXPIRE_TYPE |
@@ -3330,8 +3334,13 @@ static int _nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *f
3330 server->cache_consistency_bitmask[0] &= FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE; 3334 server->cache_consistency_bitmask[0] &= FATTR4_WORD0_CHANGE|FATTR4_WORD0_SIZE;
3331 server->cache_consistency_bitmask[1] &= FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY; 3335 server->cache_consistency_bitmask[1] &= FATTR4_WORD1_TIME_METADATA|FATTR4_WORD1_TIME_MODIFY;
3332 server->cache_consistency_bitmask[2] = 0; 3336 server->cache_consistency_bitmask[2] = 0;
3337
3338 /* Avoid a regression due to buggy server */
3339 for (i = 0; i < ARRAY_SIZE(res.exclcreat_bitmask); i++)
3340 res.exclcreat_bitmask[i] &= res.attr_bitmask[i];
3333 memcpy(server->exclcreat_bitmask, res.exclcreat_bitmask, 3341 memcpy(server->exclcreat_bitmask, res.exclcreat_bitmask,
3334 sizeof(server->exclcreat_bitmask)); 3342 sizeof(server->exclcreat_bitmask));
3343
3335 server->acl_bitmask = res.acl_bitmask; 3344 server->acl_bitmask = res.acl_bitmask;
3336 server->fh_expire_type = res.fh_expire_type; 3345 server->fh_expire_type = res.fh_expire_type;
3337 } 3346 }
@@ -4610,7 +4619,7 @@ static int nfs4_proc_pgio_rpc_prepare(struct rpc_task *task,
4610 return 0; 4619 return 0;
4611 if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context, 4620 if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context,
4612 hdr->args.lock_context, 4621 hdr->args.lock_context,
4613 hdr->rw_ops->rw_mode) == -EIO) 4622 hdr->rw_mode) == -EIO)
4614 return -EIO; 4623 return -EIO;
4615 if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) 4624 if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags)))
4616 return -EIO; 4625 return -EIO;
@@ -4804,8 +4813,10 @@ static int nfs4_proc_async_renew(struct nfs_client *clp, struct rpc_cred *cred,
4804 if (!atomic_inc_not_zero(&clp->cl_count)) 4813 if (!atomic_inc_not_zero(&clp->cl_count))
4805 return -EIO; 4814 return -EIO;
4806 data = kmalloc(sizeof(*data), GFP_NOFS); 4815 data = kmalloc(sizeof(*data), GFP_NOFS);
4807 if (data == NULL) 4816 if (data == NULL) {
4817 nfs_put_client(clp);
4808 return -ENOMEM; 4818 return -ENOMEM;
4819 }
4809 data->client = clp; 4820 data->client = clp;
4810 data->timestamp = jiffies; 4821 data->timestamp = jiffies;
4811 return rpc_call_async(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT, 4822 return rpc_call_async(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT,
@@ -5782,6 +5793,7 @@ struct nfs4_unlockdata {
5782 struct nfs_locku_res res; 5793 struct nfs_locku_res res;
5783 struct nfs4_lock_state *lsp; 5794 struct nfs4_lock_state *lsp;
5784 struct nfs_open_context *ctx; 5795 struct nfs_open_context *ctx;
5796 struct nfs_lock_context *l_ctx;
5785 struct file_lock fl; 5797 struct file_lock fl;
5786 struct nfs_server *server; 5798 struct nfs_server *server;
5787 unsigned long timestamp; 5799 unsigned long timestamp;
@@ -5806,6 +5818,7 @@ static struct nfs4_unlockdata *nfs4_alloc_unlockdata(struct file_lock *fl,
5806 atomic_inc(&lsp->ls_count); 5818 atomic_inc(&lsp->ls_count);
5807 /* Ensure we don't close file until we're done freeing locks! */ 5819 /* Ensure we don't close file until we're done freeing locks! */
5808 p->ctx = get_nfs_open_context(ctx); 5820 p->ctx = get_nfs_open_context(ctx);
5821 p->l_ctx = nfs_get_lock_context(ctx);
5809 memcpy(&p->fl, fl, sizeof(p->fl)); 5822 memcpy(&p->fl, fl, sizeof(p->fl));
5810 p->server = NFS_SERVER(inode); 5823 p->server = NFS_SERVER(inode);
5811 return p; 5824 return p;
@@ -5816,6 +5829,7 @@ static void nfs4_locku_release_calldata(void *data)
5816 struct nfs4_unlockdata *calldata = data; 5829 struct nfs4_unlockdata *calldata = data;
5817 nfs_free_seqid(calldata->arg.seqid); 5830 nfs_free_seqid(calldata->arg.seqid);
5818 nfs4_put_lock_state(calldata->lsp); 5831 nfs4_put_lock_state(calldata->lsp);
5832 nfs_put_lock_context(calldata->l_ctx);
5819 put_nfs_open_context(calldata->ctx); 5833 put_nfs_open_context(calldata->ctx);
5820 kfree(calldata); 5834 kfree(calldata);
5821} 5835}
@@ -5857,6 +5871,10 @@ static void nfs4_locku_prepare(struct rpc_task *task, void *data)
5857{ 5871{
5858 struct nfs4_unlockdata *calldata = data; 5872 struct nfs4_unlockdata *calldata = data;
5859 5873
5874 if (test_bit(NFS_CONTEXT_UNLOCK, &calldata->l_ctx->open_context->flags) &&
5875 nfs_async_iocounter_wait(task, calldata->l_ctx))
5876 return;
5877
5860 if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0) 5878 if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0)
5861 goto out_wait; 5879 goto out_wait;
5862 nfs4_stateid_copy(&calldata->arg.stateid, &calldata->lsp->ls_stateid); 5880 nfs4_stateid_copy(&calldata->arg.stateid, &calldata->lsp->ls_stateid);
@@ -5908,6 +5926,8 @@ static struct rpc_task *nfs4_do_unlck(struct file_lock *fl,
5908 * canceled lock is passed in, and it won't be an unlock. 5926 * canceled lock is passed in, and it won't be an unlock.
5909 */ 5927 */
5910 fl->fl_type = F_UNLCK; 5928 fl->fl_type = F_UNLCK;
5929 if (fl->fl_flags & FL_CLOSE)
5930 set_bit(NFS_CONTEXT_UNLOCK, &ctx->flags);
5911 5931
5912 data = nfs4_alloc_unlockdata(fl, ctx, lsp, seqid); 5932 data = nfs4_alloc_unlockdata(fl, ctx, lsp, seqid);
5913 if (data == NULL) { 5933 if (data == NULL) {
@@ -6445,9 +6465,6 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request)
6445 ctx = nfs_file_open_context(filp); 6465 ctx = nfs_file_open_context(filp);
6446 state = ctx->state; 6466 state = ctx->state;
6447 6467
6448 if (request->fl_start < 0 || request->fl_end < 0)
6449 return -EINVAL;
6450
6451 if (IS_GETLK(cmd)) { 6468 if (IS_GETLK(cmd)) {
6452 if (state != NULL) 6469 if (state != NULL)
6453 return nfs4_proc_getlk(state, F_GETLK, request); 6470 return nfs4_proc_getlk(state, F_GETLK, request);
@@ -6470,20 +6487,6 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request)
6470 !test_bit(NFS_STATE_POSIX_LOCKS, &state->flags)) 6487 !test_bit(NFS_STATE_POSIX_LOCKS, &state->flags))
6471 return -ENOLCK; 6488 return -ENOLCK;
6472 6489
6473 /*
6474 * Don't rely on the VFS having checked the file open mode,
6475 * since it won't do this for flock() locks.
6476 */
6477 switch (request->fl_type) {
6478 case F_RDLCK:
6479 if (!(filp->f_mode & FMODE_READ))
6480 return -EBADF;
6481 break;
6482 case F_WRLCK:
6483 if (!(filp->f_mode & FMODE_WRITE))
6484 return -EBADF;
6485 }
6486
6487 status = nfs4_set_lock_state(state, request); 6490 status = nfs4_set_lock_state(state, request);
6488 if (status != 0) 6491 if (status != 0)
6489 return status; 6492 return status;
@@ -7155,8 +7158,6 @@ int nfs4_proc_bind_one_conn_to_session(struct rpc_clnt *clnt,
7155 }; 7158 };
7156 struct rpc_task *task; 7159 struct rpc_task *task;
7157 7160
7158 dprintk("--> %s\n", __func__);
7159
7160 nfs4_copy_sessionid(&args.sessionid, &clp->cl_session->sess_id); 7161 nfs4_copy_sessionid(&args.sessionid, &clp->cl_session->sess_id);
7161 if (!(clp->cl_session->flags & SESSION4_BACK_CHAN)) 7162 if (!(clp->cl_session->flags & SESSION4_BACK_CHAN))
7162 args.dir = NFS4_CDFC4_FORE; 7163 args.dir = NFS4_CDFC4_FORE;
@@ -7176,24 +7177,20 @@ int nfs4_proc_bind_one_conn_to_session(struct rpc_clnt *clnt,
7176 if (memcmp(res.sessionid.data, 7177 if (memcmp(res.sessionid.data,
7177 clp->cl_session->sess_id.data, NFS4_MAX_SESSIONID_LEN)) { 7178 clp->cl_session->sess_id.data, NFS4_MAX_SESSIONID_LEN)) {
7178 dprintk("NFS: %s: Session ID mismatch\n", __func__); 7179 dprintk("NFS: %s: Session ID mismatch\n", __func__);
7179 status = -EIO; 7180 return -EIO;
7180 goto out;
7181 } 7181 }
7182 if ((res.dir & args.dir) != res.dir || res.dir == 0) { 7182 if ((res.dir & args.dir) != res.dir || res.dir == 0) {
7183 dprintk("NFS: %s: Unexpected direction from server\n", 7183 dprintk("NFS: %s: Unexpected direction from server\n",
7184 __func__); 7184 __func__);
7185 status = -EIO; 7185 return -EIO;
7186 goto out;
7187 } 7186 }
7188 if (res.use_conn_in_rdma_mode != args.use_conn_in_rdma_mode) { 7187 if (res.use_conn_in_rdma_mode != args.use_conn_in_rdma_mode) {
7189 dprintk("NFS: %s: Server returned RDMA mode = true\n", 7188 dprintk("NFS: %s: Server returned RDMA mode = true\n",
7190 __func__); 7189 __func__);
7191 status = -EIO; 7190 return -EIO;
7192 goto out;
7193 } 7191 }
7194 } 7192 }
7195out: 7193
7196 dprintk("<-- %s status= %d\n", __func__, status);
7197 return status; 7194 return status;
7198} 7195}
7199 7196
@@ -7459,15 +7456,16 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred,
7459 }; 7456 };
7460 struct nfs41_exchange_id_data *calldata; 7457 struct nfs41_exchange_id_data *calldata;
7461 struct rpc_task *task; 7458 struct rpc_task *task;
7462 int status = -EIO; 7459 int status;
7463 7460
7464 if (!atomic_inc_not_zero(&clp->cl_count)) 7461 if (!atomic_inc_not_zero(&clp->cl_count))
7465 goto out; 7462 return -EIO;
7466 7463
7467 status = -ENOMEM;
7468 calldata = kzalloc(sizeof(*calldata), GFP_NOFS); 7464 calldata = kzalloc(sizeof(*calldata), GFP_NOFS);
7469 if (!calldata) 7465 if (!calldata) {
7470 goto out; 7466 nfs_put_client(clp);
7467 return -ENOMEM;
7468 }
7471 7469
7472 if (!xprt) 7470 if (!xprt)
7473 nfs4_init_boot_verifier(clp, &verifier); 7471 nfs4_init_boot_verifier(clp, &verifier);
@@ -7476,10 +7474,6 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred,
7476 if (status) 7474 if (status)
7477 goto out_calldata; 7475 goto out_calldata;
7478 7476
7479 dprintk("NFS call exchange_id auth=%s, '%s'\n",
7480 clp->cl_rpcclient->cl_auth->au_ops->au_name,
7481 clp->cl_owner_id);
7482
7483 calldata->res.server_owner = kzalloc(sizeof(struct nfs41_server_owner), 7477 calldata->res.server_owner = kzalloc(sizeof(struct nfs41_server_owner),
7484 GFP_NOFS); 7478 GFP_NOFS);
7485 status = -ENOMEM; 7479 status = -ENOMEM;
@@ -7545,13 +7539,6 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred,
7545 7539
7546 rpc_put_task(task); 7540 rpc_put_task(task);
7547out: 7541out:
7548 if (clp->cl_implid != NULL)
7549 dprintk("NFS reply exchange_id: Server Implementation ID: "
7550 "domain: %s, name: %s, date: %llu,%u\n",
7551 clp->cl_implid->domain, clp->cl_implid->name,
7552 clp->cl_implid->date.seconds,
7553 clp->cl_implid->date.nseconds);
7554 dprintk("NFS reply exchange_id: %d\n", status);
7555 return status; 7542 return status;
7556 7543
7557out_impl_id: 7544out_impl_id:
@@ -7769,17 +7756,13 @@ int nfs4_proc_get_lease_time(struct nfs_client *clp, struct nfs_fsinfo *fsinfo)
7769 7756
7770 nfs4_init_sequence(&args.la_seq_args, &res.lr_seq_res, 0); 7757 nfs4_init_sequence(&args.la_seq_args, &res.lr_seq_res, 0);
7771 nfs4_set_sequence_privileged(&args.la_seq_args); 7758 nfs4_set_sequence_privileged(&args.la_seq_args);
7772 dprintk("--> %s\n", __func__);
7773 task = rpc_run_task(&task_setup); 7759 task = rpc_run_task(&task_setup);
7774 7760
7775 if (IS_ERR(task)) 7761 if (IS_ERR(task))
7776 status = PTR_ERR(task); 7762 return PTR_ERR(task);
7777 else {
7778 status = task->tk_status;
7779 rpc_put_task(task);
7780 }
7781 dprintk("<-- %s return %d\n", __func__, status);
7782 7763
7764 status = task->tk_status;
7765 rpc_put_task(task);
7783 return status; 7766 return status;
7784} 7767}
7785 7768
@@ -8180,6 +8163,12 @@ static int nfs41_reclaim_complete_handle_errors(struct rpc_task *task, struct nf
8180 /* fall through */ 8163 /* fall through */
8181 case -NFS4ERR_RETRY_UNCACHED_REP: 8164 case -NFS4ERR_RETRY_UNCACHED_REP:
8182 return -EAGAIN; 8165 return -EAGAIN;
8166 case -NFS4ERR_BADSESSION:
8167 case -NFS4ERR_DEADSESSION:
8168 case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
8169 nfs4_schedule_session_recovery(clp->cl_session,
8170 task->tk_status);
8171 break;
8183 default: 8172 default:
8184 nfs4_schedule_lease_recovery(clp); 8173 nfs4_schedule_lease_recovery(clp);
8185 } 8174 }
@@ -8258,7 +8247,6 @@ static int nfs41_proc_reclaim_complete(struct nfs_client *clp,
8258 if (status == 0) 8247 if (status == 0)
8259 status = task->tk_status; 8248 status = task->tk_status;
8260 rpc_put_task(task); 8249 rpc_put_task(task);
8261 return 0;
8262out: 8250out:
8263 dprintk("<-- %s status=%d\n", __func__, status); 8251 dprintk("<-- %s status=%d\n", __func__, status);
8264 return status; 8252 return status;
@@ -8357,6 +8345,7 @@ nfs4_layoutget_handle_exception(struct rpc_task *task,
8357 */ 8345 */
8358 pnfs_mark_layout_stateid_invalid(lo, &head); 8346 pnfs_mark_layout_stateid_invalid(lo, &head);
8359 spin_unlock(&inode->i_lock); 8347 spin_unlock(&inode->i_lock);
8348 nfs_commit_inode(inode, 0);
8360 pnfs_free_lseg_list(&head); 8349 pnfs_free_lseg_list(&head);
8361 status = -EAGAIN; 8350 status = -EAGAIN;
8362 goto out; 8351 goto out;
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 8156bad6b441..b34de036501b 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -1649,13 +1649,14 @@ static void nfs4_state_start_reclaim_reboot(struct nfs_client *clp)
1649 nfs4_state_mark_reclaim_helper(clp, nfs4_state_mark_reclaim_reboot); 1649 nfs4_state_mark_reclaim_helper(clp, nfs4_state_mark_reclaim_reboot);
1650} 1650}
1651 1651
1652static void nfs4_reclaim_complete(struct nfs_client *clp, 1652static int nfs4_reclaim_complete(struct nfs_client *clp,
1653 const struct nfs4_state_recovery_ops *ops, 1653 const struct nfs4_state_recovery_ops *ops,
1654 struct rpc_cred *cred) 1654 struct rpc_cred *cred)
1655{ 1655{
1656 /* Notify the server we're done reclaiming our state */ 1656 /* Notify the server we're done reclaiming our state */
1657 if (ops->reclaim_complete) 1657 if (ops->reclaim_complete)
1658 (void)ops->reclaim_complete(clp, cred); 1658 return ops->reclaim_complete(clp, cred);
1659 return 0;
1659} 1660}
1660 1661
1661static void nfs4_clear_reclaim_server(struct nfs_server *server) 1662static void nfs4_clear_reclaim_server(struct nfs_server *server)
@@ -1702,13 +1703,16 @@ static void nfs4_state_end_reclaim_reboot(struct nfs_client *clp)
1702{ 1703{
1703 const struct nfs4_state_recovery_ops *ops; 1704 const struct nfs4_state_recovery_ops *ops;
1704 struct rpc_cred *cred; 1705 struct rpc_cred *cred;
1706 int err;
1705 1707
1706 if (!nfs4_state_clear_reclaim_reboot(clp)) 1708 if (!nfs4_state_clear_reclaim_reboot(clp))
1707 return; 1709 return;
1708 ops = clp->cl_mvops->reboot_recovery_ops; 1710 ops = clp->cl_mvops->reboot_recovery_ops;
1709 cred = nfs4_get_clid_cred(clp); 1711 cred = nfs4_get_clid_cred(clp);
1710 nfs4_reclaim_complete(clp, ops, cred); 1712 err = nfs4_reclaim_complete(clp, ops, cred);
1711 put_rpccred(cred); 1713 put_rpccred(cred);
1714 if (err == -NFS4ERR_CONN_NOT_BOUND_TO_SESSION)
1715 set_bit(NFS4CLNT_RECLAIM_REBOOT, &clp->cl_state);
1712} 1716}
1713 1717
1714static void nfs4_state_start_reclaim_nograce(struct nfs_client *clp) 1718static void nfs4_state_start_reclaim_nograce(struct nfs_client *clp)
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 80ce289eea05..3aebfdc82b30 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -1000,8 +1000,9 @@ static void encode_nfs4_verifier(struct xdr_stream *xdr, const nfs4_verifier *ve
1000 1000
1001static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap, 1001static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap,
1002 const struct nfs4_label *label, 1002 const struct nfs4_label *label,
1003 const umode_t *umask,
1003 const struct nfs_server *server, 1004 const struct nfs_server *server,
1004 bool excl_check, const umode_t *umask) 1005 const uint32_t attrmask[])
1005{ 1006{
1006 char owner_name[IDMAP_NAMESZ]; 1007 char owner_name[IDMAP_NAMESZ];
1007 char owner_group[IDMAP_NAMESZ]; 1008 char owner_group[IDMAP_NAMESZ];
@@ -1016,22 +1017,20 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap,
1016 /* 1017 /*
1017 * We reserve enough space to write the entire attribute buffer at once. 1018 * We reserve enough space to write the entire attribute buffer at once.
1018 */ 1019 */
1019 if (iap->ia_valid & ATTR_SIZE) { 1020 if ((iap->ia_valid & ATTR_SIZE) && (attrmask[0] & FATTR4_WORD0_SIZE)) {
1020 bmval[0] |= FATTR4_WORD0_SIZE; 1021 bmval[0] |= FATTR4_WORD0_SIZE;
1021 len += 8; 1022 len += 8;
1022 } 1023 }
1023 if (!(server->attr_bitmask[2] & FATTR4_WORD2_MODE_UMASK))
1024 umask = NULL;
1025 if (iap->ia_valid & ATTR_MODE) { 1024 if (iap->ia_valid & ATTR_MODE) {
1026 if (umask) { 1025 if (umask && (attrmask[2] & FATTR4_WORD2_MODE_UMASK)) {
1027 bmval[2] |= FATTR4_WORD2_MODE_UMASK; 1026 bmval[2] |= FATTR4_WORD2_MODE_UMASK;
1028 len += 8; 1027 len += 8;
1029 } else { 1028 } else if (attrmask[1] & FATTR4_WORD1_MODE) {
1030 bmval[1] |= FATTR4_WORD1_MODE; 1029 bmval[1] |= FATTR4_WORD1_MODE;
1031 len += 4; 1030 len += 4;
1032 } 1031 }
1033 } 1032 }
1034 if (iap->ia_valid & ATTR_UID) { 1033 if ((iap->ia_valid & ATTR_UID) && (attrmask[1] & FATTR4_WORD1_OWNER)) {
1035 owner_namelen = nfs_map_uid_to_name(server, iap->ia_uid, owner_name, IDMAP_NAMESZ); 1034 owner_namelen = nfs_map_uid_to_name(server, iap->ia_uid, owner_name, IDMAP_NAMESZ);
1036 if (owner_namelen < 0) { 1035 if (owner_namelen < 0) {
1037 dprintk("nfs: couldn't resolve uid %d to string\n", 1036 dprintk("nfs: couldn't resolve uid %d to string\n",
@@ -1044,7 +1043,8 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap,
1044 bmval[1] |= FATTR4_WORD1_OWNER; 1043 bmval[1] |= FATTR4_WORD1_OWNER;
1045 len += 4 + (XDR_QUADLEN(owner_namelen) << 2); 1044 len += 4 + (XDR_QUADLEN(owner_namelen) << 2);
1046 } 1045 }
1047 if (iap->ia_valid & ATTR_GID) { 1046 if ((iap->ia_valid & ATTR_GID) &&
1047 (attrmask[1] & FATTR4_WORD1_OWNER_GROUP)) {
1048 owner_grouplen = nfs_map_gid_to_group(server, iap->ia_gid, owner_group, IDMAP_NAMESZ); 1048 owner_grouplen = nfs_map_gid_to_group(server, iap->ia_gid, owner_group, IDMAP_NAMESZ);
1049 if (owner_grouplen < 0) { 1049 if (owner_grouplen < 0) {
1050 dprintk("nfs: couldn't resolve gid %d to string\n", 1050 dprintk("nfs: couldn't resolve gid %d to string\n",
@@ -1056,32 +1056,26 @@ static void encode_attrs(struct xdr_stream *xdr, const struct iattr *iap,
1056 bmval[1] |= FATTR4_WORD1_OWNER_GROUP; 1056 bmval[1] |= FATTR4_WORD1_OWNER_GROUP;
1057 len += 4 + (XDR_QUADLEN(owner_grouplen) << 2); 1057 len += 4 + (XDR_QUADLEN(owner_grouplen) << 2);
1058 } 1058 }
1059 if (iap->ia_valid & ATTR_ATIME_SET) { 1059 if (attrmask[1] & FATTR4_WORD1_TIME_ACCESS_SET) {
1060 bmval[1] |= FATTR4_WORD1_TIME_ACCESS_SET; 1060 if (iap->ia_valid & ATTR_ATIME_SET) {
1061 len += 16; 1061 bmval[1] |= FATTR4_WORD1_TIME_ACCESS_SET;
1062 } else if (iap->ia_valid & ATTR_ATIME) { 1062 len += 16;
1063 bmval[1] |= FATTR4_WORD1_TIME_ACCESS_SET; 1063 } else if (iap->ia_valid & ATTR_ATIME) {
1064 len += 4; 1064 bmval[1] |= FATTR4_WORD1_TIME_ACCESS_SET;
1065 } 1065 len += 4;
1066 if (iap->ia_valid & ATTR_MTIME_SET) { 1066 }
1067 bmval[1] |= FATTR4_WORD1_TIME_MODIFY_SET;
1068 len += 16;
1069 } else if (iap->ia_valid & ATTR_MTIME) {
1070 bmval[1] |= FATTR4_WORD1_TIME_MODIFY_SET;
1071 len += 4;
1072 } 1067 }
1073 1068 if (attrmask[1] & FATTR4_WORD1_TIME_MODIFY_SET) {
1074 if (excl_check) { 1069 if (iap->ia_valid & ATTR_MTIME_SET) {
1075 const u32 *excl_bmval = server->exclcreat_bitmask; 1070 bmval[1] |= FATTR4_WORD1_TIME_MODIFY_SET;
1076 bmval[0] &= excl_bmval[0]; 1071 len += 16;
1077 bmval[1] &= excl_bmval[1]; 1072 } else if (iap->ia_valid & ATTR_MTIME) {
1078 bmval[2] &= excl_bmval[2]; 1073 bmval[1] |= FATTR4_WORD1_TIME_MODIFY_SET;
1079 1074 len += 4;
1080 if (!(excl_bmval[2] & FATTR4_WORD2_SECURITY_LABEL)) 1075 }
1081 label = NULL;
1082 } 1076 }
1083 1077
1084 if (label) { 1078 if (label && (attrmask[2] & FATTR4_WORD2_SECURITY_LABEL)) {
1085 len += 4 + 4 + 4 + (XDR_QUADLEN(label->len) << 2); 1079 len += 4 + 4 + 4 + (XDR_QUADLEN(label->len) << 2);
1086 bmval[2] |= FATTR4_WORD2_SECURITY_LABEL; 1080 bmval[2] |= FATTR4_WORD2_SECURITY_LABEL;
1087 } 1081 }
@@ -1188,8 +1182,8 @@ static void encode_create(struct xdr_stream *xdr, const struct nfs4_create_arg *
1188 } 1182 }
1189 1183
1190 encode_string(xdr, create->name->len, create->name->name); 1184 encode_string(xdr, create->name->len, create->name->name);
1191 encode_attrs(xdr, create->attrs, create->label, create->server, false, 1185 encode_attrs(xdr, create->attrs, create->label, &create->umask,
1192 &create->umask); 1186 create->server, create->server->attr_bitmask);
1193} 1187}
1194 1188
1195static void encode_getattr_one(struct xdr_stream *xdr, uint32_t bitmap, struct compound_hdr *hdr) 1189static void encode_getattr_one(struct xdr_stream *xdr, uint32_t bitmap, struct compound_hdr *hdr)
@@ -1409,13 +1403,13 @@ static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_op
1409 switch(arg->createmode) { 1403 switch(arg->createmode) {
1410 case NFS4_CREATE_UNCHECKED: 1404 case NFS4_CREATE_UNCHECKED:
1411 *p = cpu_to_be32(NFS4_CREATE_UNCHECKED); 1405 *p = cpu_to_be32(NFS4_CREATE_UNCHECKED);
1412 encode_attrs(xdr, arg->u.attrs, arg->label, arg->server, false, 1406 encode_attrs(xdr, arg->u.attrs, arg->label, &arg->umask,
1413 &arg->umask); 1407 arg->server, arg->server->attr_bitmask);
1414 break; 1408 break;
1415 case NFS4_CREATE_GUARDED: 1409 case NFS4_CREATE_GUARDED:
1416 *p = cpu_to_be32(NFS4_CREATE_GUARDED); 1410 *p = cpu_to_be32(NFS4_CREATE_GUARDED);
1417 encode_attrs(xdr, arg->u.attrs, arg->label, arg->server, false, 1411 encode_attrs(xdr, arg->u.attrs, arg->label, &arg->umask,
1418 &arg->umask); 1412 arg->server, arg->server->attr_bitmask);
1419 break; 1413 break;
1420 case NFS4_CREATE_EXCLUSIVE: 1414 case NFS4_CREATE_EXCLUSIVE:
1421 *p = cpu_to_be32(NFS4_CREATE_EXCLUSIVE); 1415 *p = cpu_to_be32(NFS4_CREATE_EXCLUSIVE);
@@ -1424,8 +1418,8 @@ static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_op
1424 case NFS4_CREATE_EXCLUSIVE4_1: 1418 case NFS4_CREATE_EXCLUSIVE4_1:
1425 *p = cpu_to_be32(NFS4_CREATE_EXCLUSIVE4_1); 1419 *p = cpu_to_be32(NFS4_CREATE_EXCLUSIVE4_1);
1426 encode_nfs4_verifier(xdr, &arg->u.verifier); 1420 encode_nfs4_verifier(xdr, &arg->u.verifier);
1427 encode_attrs(xdr, arg->u.attrs, arg->label, arg->server, true, 1421 encode_attrs(xdr, arg->u.attrs, arg->label, &arg->umask,
1428 &arg->umask); 1422 arg->server, arg->server->exclcreat_bitmask);
1429 } 1423 }
1430} 1424}
1431 1425
@@ -1681,7 +1675,8 @@ static void encode_setattr(struct xdr_stream *xdr, const struct nfs_setattrargs
1681{ 1675{
1682 encode_op_hdr(xdr, OP_SETATTR, decode_setattr_maxsz, hdr); 1676 encode_op_hdr(xdr, OP_SETATTR, decode_setattr_maxsz, hdr);
1683 encode_nfs4_stateid(xdr, &arg->stateid); 1677 encode_nfs4_stateid(xdr, &arg->stateid);
1684 encode_attrs(xdr, arg->iap, arg->label, server, false, NULL); 1678 encode_attrs(xdr, arg->iap, arg->label, NULL, server,
1679 server->attr_bitmask);
1685} 1680}
1686 1681
1687static void encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclientid *setclientid, struct compound_hdr *hdr) 1682static void encode_setclientid(struct xdr_stream *xdr, const struct nfs4_setclientid *setclientid, struct compound_hdr *hdr)
@@ -2005,16 +2000,10 @@ encode_layoutcommit(struct xdr_stream *xdr,
2005 *p++ = cpu_to_be32(0); /* Never send time_modify_changed */ 2000 *p++ = cpu_to_be32(0); /* Never send time_modify_changed */
2006 *p++ = cpu_to_be32(NFS_SERVER(args->inode)->pnfs_curr_ld->id);/* type */ 2001 *p++ = cpu_to_be32(NFS_SERVER(args->inode)->pnfs_curr_ld->id);/* type */
2007 2002
2008 if (NFS_SERVER(inode)->pnfs_curr_ld->encode_layoutcommit) { 2003 encode_uint32(xdr, args->layoutupdate_len);
2009 NFS_SERVER(inode)->pnfs_curr_ld->encode_layoutcommit( 2004 if (args->layoutupdate_pages)
2010 NFS_I(inode)->layout, xdr, args); 2005 xdr_write_pages(xdr, args->layoutupdate_pages, 0,
2011 } else { 2006 args->layoutupdate_len);
2012 encode_uint32(xdr, args->layoutupdate_len);
2013 if (args->layoutupdate_pages) {
2014 xdr_write_pages(xdr, args->layoutupdate_pages, 0,
2015 args->layoutupdate_len);
2016 }
2017 }
2018 2007
2019 return 0; 2008 return 0;
2020} 2009}
@@ -2024,7 +2013,6 @@ encode_layoutreturn(struct xdr_stream *xdr,
2024 const struct nfs4_layoutreturn_args *args, 2013 const struct nfs4_layoutreturn_args *args,
2025 struct compound_hdr *hdr) 2014 struct compound_hdr *hdr)
2026{ 2015{
2027 const struct pnfs_layoutdriver_type *lr_ops = NFS_SERVER(args->inode)->pnfs_curr_ld;
2028 __be32 *p; 2016 __be32 *p;
2029 2017
2030 encode_op_hdr(xdr, OP_LAYOUTRETURN, decode_layoutreturn_maxsz, hdr); 2018 encode_op_hdr(xdr, OP_LAYOUTRETURN, decode_layoutreturn_maxsz, hdr);
@@ -2041,8 +2029,6 @@ encode_layoutreturn(struct xdr_stream *xdr,
2041 spin_unlock(&args->inode->i_lock); 2029 spin_unlock(&args->inode->i_lock);
2042 if (args->ld_private->ops && args->ld_private->ops->encode) 2030 if (args->ld_private->ops && args->ld_private->ops->encode)
2043 args->ld_private->ops->encode(xdr, args, args->ld_private); 2031 args->ld_private->ops->encode(xdr, args, args->ld_private);
2044 else if (lr_ops->encode_layoutreturn)
2045 lr_ops->encode_layoutreturn(xdr, args);
2046 else 2032 else
2047 encode_uint32(xdr, 0); 2033 encode_uint32(xdr, 0);
2048} 2034}
@@ -5579,6 +5565,8 @@ static int decode_op_map(struct xdr_stream *xdr, struct nfs4_op_map *op_map)
5579 unsigned int i; 5565 unsigned int i;
5580 5566
5581 p = xdr_inline_decode(xdr, 4); 5567 p = xdr_inline_decode(xdr, 4);
5568 if (!p)
5569 return -EIO;
5582 bitmap_words = be32_to_cpup(p++); 5570 bitmap_words = be32_to_cpup(p++);
5583 if (bitmap_words > NFS4_OP_MAP_NUM_WORDS) 5571 if (bitmap_words > NFS4_OP_MAP_NUM_WORDS)
5584 return -EIO; 5572 return -EIO;
diff --git a/fs/nfs/objlayout/Kbuild b/fs/nfs/objlayout/Kbuild
deleted file mode 100644
index ed30ea072bb8..000000000000
--- a/fs/nfs/objlayout/Kbuild
+++ /dev/null
@@ -1,5 +0,0 @@
1#
2# Makefile for the pNFS Objects Layout Driver kernel module
3#
4objlayoutdriver-y := objio_osd.o pnfs_osd_xdr_cli.o objlayout.o
5obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayoutdriver.o
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
deleted file mode 100644
index 049c1b1f2932..000000000000
--- a/fs/nfs/objlayout/objio_osd.c
+++ /dev/null
@@ -1,675 +0,0 @@
1/*
2 * pNFS Objects layout implementation over open-osd initiator library
3 *
4 * Copyright (C) 2009 Panasas Inc. [year of first publication]
5 * All rights reserved.
6 *
7 * Benny Halevy <bhalevy@panasas.com>
8 * Boaz Harrosh <ooo@electrozaur.com>
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License version 2
12 * See the file COPYING included with this distribution for more details.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 *
18 * 1. Redistributions of source code must retain the above copyright
19 * notice, this list of conditions and the following disclaimer.
20 * 2. Redistributions in binary form must reproduce the above copyright
21 * notice, this list of conditions and the following disclaimer in the
22 * documentation and/or other materials provided with the distribution.
23 * 3. Neither the name of the Panasas company nor the names of its
24 * contributors may be used to endorse or promote products derived
25 * from this software without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
28 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
29 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
30 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
34 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
35 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
36 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
37 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 */
39
40#include <linux/module.h>
41#include <scsi/osd_ore.h>
42
43#include "objlayout.h"
44#include "../internal.h"
45
46#define NFSDBG_FACILITY NFSDBG_PNFS_LD
47
48struct objio_dev_ent {
49 struct nfs4_deviceid_node id_node;
50 struct ore_dev od;
51};
52
53static void
54objio_free_deviceid_node(struct nfs4_deviceid_node *d)
55{
56 struct objio_dev_ent *de = container_of(d, struct objio_dev_ent, id_node);
57
58 dprintk("%s: free od=%p\n", __func__, de->od.od);
59 osduld_put_device(de->od.od);
60 kfree_rcu(d, rcu);
61}
62
63struct objio_segment {
64 struct pnfs_layout_segment lseg;
65
66 struct ore_layout layout;
67 struct ore_components oc;
68};
69
70static inline struct objio_segment *
71OBJIO_LSEG(struct pnfs_layout_segment *lseg)
72{
73 return container_of(lseg, struct objio_segment, lseg);
74}
75
76struct objio_state {
77 /* Generic layer */
78 struct objlayout_io_res oir;
79
80 bool sync;
81 /*FIXME: Support for extra_bytes at ore_get_rw_state() */
82 struct ore_io_state *ios;
83};
84
85/* Send and wait for a get_device_info of devices in the layout,
86 then look them up with the osd_initiator library */
87struct nfs4_deviceid_node *
88objio_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev,
89 gfp_t gfp_flags)
90{
91 struct pnfs_osd_deviceaddr *deviceaddr;
92 struct objio_dev_ent *ode = NULL;
93 struct osd_dev *od;
94 struct osd_dev_info odi;
95 bool retry_flag = true;
96 __be32 *p;
97 int err;
98
99 deviceaddr = kzalloc(sizeof(*deviceaddr), gfp_flags);
100 if (!deviceaddr)
101 return NULL;
102
103 p = page_address(pdev->pages[0]);
104 pnfs_osd_xdr_decode_deviceaddr(deviceaddr, p);
105
106 odi.systemid_len = deviceaddr->oda_systemid.len;
107 if (odi.systemid_len > sizeof(odi.systemid)) {
108 dprintk("%s: odi.systemid_len > sizeof(systemid=%zd)\n",
109 __func__, sizeof(odi.systemid));
110 err = -EINVAL;
111 goto out;
112 } else if (odi.systemid_len)
113 memcpy(odi.systemid, deviceaddr->oda_systemid.data,
114 odi.systemid_len);
115 odi.osdname_len = deviceaddr->oda_osdname.len;
116 odi.osdname = (u8 *)deviceaddr->oda_osdname.data;
117
118 if (!odi.osdname_len && !odi.systemid_len) {
119 dprintk("%s: !odi.osdname_len && !odi.systemid_len\n",
120 __func__);
121 err = -ENODEV;
122 goto out;
123 }
124
125retry_lookup:
126 od = osduld_info_lookup(&odi);
127 if (IS_ERR(od)) {
128 err = PTR_ERR(od);
129 dprintk("%s: osduld_info_lookup => %d\n", __func__, err);
130 if (err == -ENODEV && retry_flag) {
131 err = objlayout_autologin(deviceaddr);
132 if (likely(!err)) {
133 retry_flag = false;
134 goto retry_lookup;
135 }
136 }
137 goto out;
138 }
139
140 dprintk("Adding new dev_id(%llx:%llx)\n",
141 _DEVID_LO(&pdev->dev_id), _DEVID_HI(&pdev->dev_id));
142
143 ode = kzalloc(sizeof(*ode), gfp_flags);
144 if (!ode) {
145 dprintk("%s: -ENOMEM od=%p\n", __func__, od);
146 goto out;
147 }
148
149 nfs4_init_deviceid_node(&ode->id_node, server, &pdev->dev_id);
150 kfree(deviceaddr);
151
152 ode->od.od = od;
153 return &ode->id_node;
154
155out:
156 kfree(deviceaddr);
157 return NULL;
158}
159
160static void copy_single_comp(struct ore_components *oc, unsigned c,
161 struct pnfs_osd_object_cred *src_comp)
162{
163 struct ore_comp *ocomp = &oc->comps[c];
164
165 WARN_ON(src_comp->oc_cap_key.cred_len > 0); /* libosd is NO_SEC only */
166 WARN_ON(src_comp->oc_cap.cred_len > sizeof(ocomp->cred));
167
168 ocomp->obj.partition = src_comp->oc_object_id.oid_partition_id;
169 ocomp->obj.id = src_comp->oc_object_id.oid_object_id;
170
171 memcpy(ocomp->cred, src_comp->oc_cap.cred, sizeof(ocomp->cred));
172}
173
174static int __alloc_objio_seg(unsigned numdevs, gfp_t gfp_flags,
175 struct objio_segment **pseg)
176{
177/* This is the in memory structure of the objio_segment
178 *
179 * struct __alloc_objio_segment {
180 * struct objio_segment olseg;
181 * struct ore_dev *ods[numdevs];
182 * struct ore_comp comps[numdevs];
183 * } *aolseg;
184 * NOTE: The code as above compiles and runs perfectly. It is elegant,
185 * type safe and compact. At some Past time Linus has decided he does not
186 * like variable length arrays, For the sake of this principal we uglify
187 * the code as below.
188 */
189 struct objio_segment *lseg;
190 size_t lseg_size = sizeof(*lseg) +
191 numdevs * sizeof(lseg->oc.ods[0]) +
192 numdevs * sizeof(*lseg->oc.comps);
193
194 lseg = kzalloc(lseg_size, gfp_flags);
195 if (unlikely(!lseg)) {
196 dprintk("%s: Failed allocation numdevs=%d size=%zd\n", __func__,
197 numdevs, lseg_size);
198 return -ENOMEM;
199 }
200
201 lseg->oc.numdevs = numdevs;
202 lseg->oc.single_comp = EC_MULTPLE_COMPS;
203 lseg->oc.ods = (void *)(lseg + 1);
204 lseg->oc.comps = (void *)(lseg->oc.ods + numdevs);
205
206 *pseg = lseg;
207 return 0;
208}
209
210int objio_alloc_lseg(struct pnfs_layout_segment **outp,
211 struct pnfs_layout_hdr *pnfslay,
212 struct pnfs_layout_range *range,
213 struct xdr_stream *xdr,
214 gfp_t gfp_flags)
215{
216 struct nfs_server *server = NFS_SERVER(pnfslay->plh_inode);
217 struct objio_segment *objio_seg;
218 struct pnfs_osd_xdr_decode_layout_iter iter;
219 struct pnfs_osd_layout layout;
220 struct pnfs_osd_object_cred src_comp;
221 unsigned cur_comp;
222 int err;
223
224 err = pnfs_osd_xdr_decode_layout_map(&layout, &iter, xdr);
225 if (unlikely(err))
226 return err;
227
228 err = __alloc_objio_seg(layout.olo_num_comps, gfp_flags, &objio_seg);
229 if (unlikely(err))
230 return err;
231
232 objio_seg->layout.stripe_unit = layout.olo_map.odm_stripe_unit;
233 objio_seg->layout.group_width = layout.olo_map.odm_group_width;
234 objio_seg->layout.group_depth = layout.olo_map.odm_group_depth;
235 objio_seg->layout.mirrors_p1 = layout.olo_map.odm_mirror_cnt + 1;
236 objio_seg->layout.raid_algorithm = layout.olo_map.odm_raid_algorithm;
237
238 err = ore_verify_layout(layout.olo_map.odm_num_comps,
239 &objio_seg->layout);
240 if (unlikely(err))
241 goto err;
242
243 objio_seg->oc.first_dev = layout.olo_comps_index;
244 cur_comp = 0;
245 while (pnfs_osd_xdr_decode_layout_comp(&src_comp, &iter, xdr, &err)) {
246 struct nfs4_deviceid_node *d;
247 struct objio_dev_ent *ode;
248
249 copy_single_comp(&objio_seg->oc, cur_comp, &src_comp);
250
251 d = nfs4_find_get_deviceid(server,
252 &src_comp.oc_object_id.oid_device_id,
253 pnfslay->plh_lc_cred, gfp_flags);
254 if (!d) {
255 err = -ENXIO;
256 goto err;
257 }
258
259 ode = container_of(d, struct objio_dev_ent, id_node);
260 objio_seg->oc.ods[cur_comp++] = &ode->od;
261 }
262 /* pnfs_osd_xdr_decode_layout_comp returns false on error */
263 if (unlikely(err))
264 goto err;
265
266 *outp = &objio_seg->lseg;
267 return 0;
268
269err:
270 kfree(objio_seg);
271 dprintk("%s: Error: return %d\n", __func__, err);
272 *outp = NULL;
273 return err;
274}
275
276void objio_free_lseg(struct pnfs_layout_segment *lseg)
277{
278 int i;
279 struct objio_segment *objio_seg = OBJIO_LSEG(lseg);
280
281 for (i = 0; i < objio_seg->oc.numdevs; i++) {
282 struct ore_dev *od = objio_seg->oc.ods[i];
283 struct objio_dev_ent *ode;
284
285 if (!od)
286 break;
287 ode = container_of(od, typeof(*ode), od);
288 nfs4_put_deviceid_node(&ode->id_node);
289 }
290 kfree(objio_seg);
291}
292
293static int
294objio_alloc_io_state(struct pnfs_layout_hdr *pnfs_layout_type, bool is_reading,
295 struct pnfs_layout_segment *lseg, struct page **pages, unsigned pgbase,
296 loff_t offset, size_t count, void *rpcdata, gfp_t gfp_flags,
297 struct objio_state **outp)
298{
299 struct objio_segment *objio_seg = OBJIO_LSEG(lseg);
300 struct ore_io_state *ios;
301 int ret;
302 struct __alloc_objio_state {
303 struct objio_state objios;
304 struct pnfs_osd_ioerr ioerrs[objio_seg->oc.numdevs];
305 } *aos;
306
307 aos = kzalloc(sizeof(*aos), gfp_flags);
308 if (unlikely(!aos))
309 return -ENOMEM;
310
311 objlayout_init_ioerrs(&aos->objios.oir, objio_seg->oc.numdevs,
312 aos->ioerrs, rpcdata, pnfs_layout_type);
313
314 ret = ore_get_rw_state(&objio_seg->layout, &objio_seg->oc, is_reading,
315 offset, count, &ios);
316 if (unlikely(ret)) {
317 kfree(aos);
318 return ret;
319 }
320
321 ios->pages = pages;
322 ios->pgbase = pgbase;
323 ios->private = aos;
324 BUG_ON(ios->nr_pages > (pgbase + count + PAGE_SIZE - 1) >> PAGE_SHIFT);
325
326 aos->objios.sync = 0;
327 aos->objios.ios = ios;
328 *outp = &aos->objios;
329 return 0;
330}
331
332void objio_free_result(struct objlayout_io_res *oir)
333{
334 struct objio_state *objios = container_of(oir, struct objio_state, oir);
335
336 ore_put_io_state(objios->ios);
337 kfree(objios);
338}
339
340static enum pnfs_osd_errno osd_pri_2_pnfs_err(enum osd_err_priority oep)
341{
342 switch (oep) {
343 case OSD_ERR_PRI_NO_ERROR:
344 return (enum pnfs_osd_errno)0;
345
346 case OSD_ERR_PRI_CLEAR_PAGES:
347 BUG_ON(1);
348 return 0;
349
350 case OSD_ERR_PRI_RESOURCE:
351 return PNFS_OSD_ERR_RESOURCE;
352 case OSD_ERR_PRI_BAD_CRED:
353 return PNFS_OSD_ERR_BAD_CRED;
354 case OSD_ERR_PRI_NO_ACCESS:
355 return PNFS_OSD_ERR_NO_ACCESS;
356 case OSD_ERR_PRI_UNREACHABLE:
357 return PNFS_OSD_ERR_UNREACHABLE;
358 case OSD_ERR_PRI_NOT_FOUND:
359 return PNFS_OSD_ERR_NOT_FOUND;
360 case OSD_ERR_PRI_NO_SPACE:
361 return PNFS_OSD_ERR_NO_SPACE;
362 default:
363 WARN_ON(1);
364 /* fallthrough */
365 case OSD_ERR_PRI_EIO:
366 return PNFS_OSD_ERR_EIO;
367 }
368}
369
370static void __on_dev_error(struct ore_io_state *ios,
371 struct ore_dev *od, unsigned dev_index, enum osd_err_priority oep,
372 u64 dev_offset, u64 dev_len)
373{
374 struct objio_state *objios = ios->private;
375 struct pnfs_osd_objid pooid;
376 struct objio_dev_ent *ode = container_of(od, typeof(*ode), od);
377 /* FIXME: what to do with more-then-one-group layouts. We need to
378 * translate from ore_io_state index to oc->comps index
379 */
380 unsigned comp = dev_index;
381
382 pooid.oid_device_id = ode->id_node.deviceid;
383 pooid.oid_partition_id = ios->oc->comps[comp].obj.partition;
384 pooid.oid_object_id = ios->oc->comps[comp].obj.id;
385
386 objlayout_io_set_result(&objios->oir, comp,
387 &pooid, osd_pri_2_pnfs_err(oep),
388 dev_offset, dev_len, !ios->reading);
389}
390
391/*
392 * read
393 */
394static void _read_done(struct ore_io_state *ios, void *private)
395{
396 struct objio_state *objios = private;
397 ssize_t status;
398 int ret = ore_check_io(ios, &__on_dev_error);
399
400 /* FIXME: _io_free(ios) can we dealocate the libosd resources; */
401
402 if (likely(!ret))
403 status = ios->length;
404 else
405 status = ret;
406
407 objlayout_read_done(&objios->oir, status, objios->sync);
408}
409
410int objio_read_pagelist(struct nfs_pgio_header *hdr)
411{
412 struct objio_state *objios;
413 int ret;
414
415 ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, true,
416 hdr->lseg, hdr->args.pages, hdr->args.pgbase,
417 hdr->args.offset, hdr->args.count, hdr,
418 GFP_KERNEL, &objios);
419 if (unlikely(ret))
420 return ret;
421
422 objios->ios->done = _read_done;
423 dprintk("%s: offset=0x%llx length=0x%x\n", __func__,
424 hdr->args.offset, hdr->args.count);
425 ret = ore_read(objios->ios);
426 if (unlikely(ret))
427 objio_free_result(&objios->oir);
428 return ret;
429}
430
431/*
432 * write
433 */
434static void _write_done(struct ore_io_state *ios, void *private)
435{
436 struct objio_state *objios = private;
437 ssize_t status;
438 int ret = ore_check_io(ios, &__on_dev_error);
439
440 /* FIXME: _io_free(ios) can we dealocate the libosd resources; */
441
442 if (likely(!ret)) {
443 /* FIXME: should be based on the OSD's persistence model
444 * See OSD2r05 Section 4.13 Data persistence model */
445 objios->oir.committed = NFS_FILE_SYNC;
446 status = ios->length;
447 } else {
448 status = ret;
449 }
450
451 objlayout_write_done(&objios->oir, status, objios->sync);
452}
453
454static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate)
455{
456 struct objio_state *objios = priv;
457 struct nfs_pgio_header *hdr = objios->oir.rpcdata;
458 struct address_space *mapping = hdr->inode->i_mapping;
459 pgoff_t index = offset / PAGE_SIZE;
460 struct page *page;
461 loff_t i_size = i_size_read(hdr->inode);
462
463 if (offset >= i_size) {
464 *uptodate = true;
465 dprintk("%s: g_zero_page index=0x%lx\n", __func__, index);
466 return ZERO_PAGE(0);
467 }
468
469 page = find_get_page(mapping, index);
470 if (!page) {
471 page = find_or_create_page(mapping, index, GFP_NOFS);
472 if (unlikely(!page)) {
473 dprintk("%s: grab_cache_page Failed index=0x%lx\n",
474 __func__, index);
475 return NULL;
476 }
477 unlock_page(page);
478 }
479 *uptodate = PageUptodate(page);
480 dprintk("%s: index=0x%lx uptodate=%d\n", __func__, index, *uptodate);
481 return page;
482}
483
484static void __r4w_put_page(void *priv, struct page *page)
485{
486 dprintk("%s: index=0x%lx\n", __func__,
487 (page == ZERO_PAGE(0)) ? -1UL : page->index);
488 if (ZERO_PAGE(0) != page)
489 put_page(page);
490 return;
491}
492
493static const struct _ore_r4w_op _r4w_op = {
494 .get_page = &__r4w_get_page,
495 .put_page = &__r4w_put_page,
496};
497
498int objio_write_pagelist(struct nfs_pgio_header *hdr, int how)
499{
500 struct objio_state *objios;
501 int ret;
502
503 ret = objio_alloc_io_state(NFS_I(hdr->inode)->layout, false,
504 hdr->lseg, hdr->args.pages, hdr->args.pgbase,
505 hdr->args.offset, hdr->args.count, hdr, GFP_NOFS,
506 &objios);
507 if (unlikely(ret))
508 return ret;
509
510 objios->sync = 0 != (how & FLUSH_SYNC);
511 objios->ios->r4w = &_r4w_op;
512
513 if (!objios->sync)
514 objios->ios->done = _write_done;
515
516 dprintk("%s: offset=0x%llx length=0x%x\n", __func__,
517 hdr->args.offset, hdr->args.count);
518 ret = ore_write(objios->ios);
519 if (unlikely(ret)) {
520 objio_free_result(&objios->oir);
521 return ret;
522 }
523
524 if (objios->sync)
525 _write_done(objios->ios, objios);
526
527 return 0;
528}
529
530/*
531 * Return 0 if @req cannot be coalesced into @pgio, otherwise return the number
532 * of bytes (maximum @req->wb_bytes) that can be coalesced.
533 */
534static size_t objio_pg_test(struct nfs_pageio_descriptor *pgio,
535 struct nfs_page *prev, struct nfs_page *req)
536{
537 struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(pgio);
538 unsigned int size;
539
540 size = pnfs_generic_pg_test(pgio, prev, req);
541
542 if (!size || mirror->pg_count + req->wb_bytes >
543 (unsigned long)pgio->pg_layout_private)
544 return 0;
545
546 return min(size, req->wb_bytes);
547}
548
549static void objio_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
550{
551 pnfs_generic_pg_init_read(pgio, req);
552 if (unlikely(pgio->pg_lseg == NULL))
553 return; /* Not pNFS */
554
555 pgio->pg_layout_private = (void *)
556 OBJIO_LSEG(pgio->pg_lseg)->layout.max_io_length;
557}
558
559static bool aligned_on_raid_stripe(u64 offset, struct ore_layout *layout,
560 unsigned long *stripe_end)
561{
562 u32 stripe_off;
563 unsigned stripe_size;
564
565 if (layout->raid_algorithm == PNFS_OSD_RAID_0)
566 return true;
567
568 stripe_size = layout->stripe_unit *
569 (layout->group_width - layout->parity);
570
571 div_u64_rem(offset, stripe_size, &stripe_off);
572 if (!stripe_off)
573 return true;
574
575 *stripe_end = stripe_size - stripe_off;
576 return false;
577}
578
579static void objio_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
580{
581 unsigned long stripe_end = 0;
582 u64 wb_size;
583
584 if (pgio->pg_dreq == NULL)
585 wb_size = i_size_read(pgio->pg_inode) - req_offset(req);
586 else
587 wb_size = nfs_dreq_bytes_left(pgio->pg_dreq);
588
589 pnfs_generic_pg_init_write(pgio, req, wb_size);
590 if (unlikely(pgio->pg_lseg == NULL))
591 return; /* Not pNFS */
592
593 if (req->wb_offset ||
594 !aligned_on_raid_stripe(req->wb_index * PAGE_SIZE,
595 &OBJIO_LSEG(pgio->pg_lseg)->layout,
596 &stripe_end)) {
597 pgio->pg_layout_private = (void *)stripe_end;
598 } else {
599 pgio->pg_layout_private = (void *)
600 OBJIO_LSEG(pgio->pg_lseg)->layout.max_io_length;
601 }
602}
603
604static const struct nfs_pageio_ops objio_pg_read_ops = {
605 .pg_init = objio_init_read,
606 .pg_test = objio_pg_test,
607 .pg_doio = pnfs_generic_pg_readpages,
608 .pg_cleanup = pnfs_generic_pg_cleanup,
609};
610
611static const struct nfs_pageio_ops objio_pg_write_ops = {
612 .pg_init = objio_init_write,
613 .pg_test = objio_pg_test,
614 .pg_doio = pnfs_generic_pg_writepages,
615 .pg_cleanup = pnfs_generic_pg_cleanup,
616};
617
618static struct pnfs_layoutdriver_type objlayout_type = {
619 .id = LAYOUT_OSD2_OBJECTS,
620 .name = "LAYOUT_OSD2_OBJECTS",
621 .flags = PNFS_LAYOUTRET_ON_SETATTR |
622 PNFS_LAYOUTRET_ON_ERROR,
623
624 .max_deviceinfo_size = PAGE_SIZE,
625 .owner = THIS_MODULE,
626 .alloc_layout_hdr = objlayout_alloc_layout_hdr,
627 .free_layout_hdr = objlayout_free_layout_hdr,
628
629 .alloc_lseg = objlayout_alloc_lseg,
630 .free_lseg = objlayout_free_lseg,
631
632 .read_pagelist = objlayout_read_pagelist,
633 .write_pagelist = objlayout_write_pagelist,
634 .pg_read_ops = &objio_pg_read_ops,
635 .pg_write_ops = &objio_pg_write_ops,
636
637 .sync = pnfs_generic_sync,
638
639 .free_deviceid_node = objio_free_deviceid_node,
640
641 .encode_layoutcommit = objlayout_encode_layoutcommit,
642 .encode_layoutreturn = objlayout_encode_layoutreturn,
643};
644
645MODULE_DESCRIPTION("pNFS Layout Driver for OSD2 objects");
646MODULE_AUTHOR("Benny Halevy <bhalevy@panasas.com>");
647MODULE_LICENSE("GPL");
648
649static int __init
650objlayout_init(void)
651{
652 int ret = pnfs_register_layoutdriver(&objlayout_type);
653
654 if (ret)
655 printk(KERN_INFO
656 "NFS: %s: Registering OSD pNFS Layout Driver failed: error=%d\n",
657 __func__, ret);
658 else
659 printk(KERN_INFO "NFS: %s: Registered OSD pNFS Layout Driver\n",
660 __func__);
661 return ret;
662}
663
664static void __exit
665objlayout_exit(void)
666{
667 pnfs_unregister_layoutdriver(&objlayout_type);
668 printk(KERN_INFO "NFS: %s: Unregistered OSD pNFS Layout Driver\n",
669 __func__);
670}
671
672MODULE_ALIAS("nfs-layouttype4-2");
673
674module_init(objlayout_init);
675module_exit(objlayout_exit);
diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c
deleted file mode 100644
index 8f3d2acb81c3..000000000000
--- a/fs/nfs/objlayout/objlayout.c
+++ /dev/null
@@ -1,706 +0,0 @@
1/*
2 * pNFS Objects layout driver high level definitions
3 *
4 * Copyright (C) 2007 Panasas Inc. [year of first publication]
5 * All rights reserved.
6 *
7 * Benny Halevy <bhalevy@panasas.com>
8 * Boaz Harrosh <ooo@electrozaur.com>
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License version 2
12 * See the file COPYING included with this distribution for more details.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 *
18 * 1. Redistributions of source code must retain the above copyright
19 * notice, this list of conditions and the following disclaimer.
20 * 2. Redistributions in binary form must reproduce the above copyright
21 * notice, this list of conditions and the following disclaimer in the
22 * documentation and/or other materials provided with the distribution.
23 * 3. Neither the name of the Panasas company nor the names of its
24 * contributors may be used to endorse or promote products derived
25 * from this software without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
28 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
29 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
30 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
34 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
35 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
36 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
37 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 */
39
40#include <linux/kmod.h>
41#include <linux/moduleparam.h>
42#include <linux/ratelimit.h>
43#include <scsi/osd_initiator.h>
44#include "objlayout.h"
45
46#define NFSDBG_FACILITY NFSDBG_PNFS_LD
47/*
48 * Create a objlayout layout structure for the given inode and return it.
49 */
50struct pnfs_layout_hdr *
51objlayout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags)
52{
53 struct objlayout *objlay;
54
55 objlay = kzalloc(sizeof(struct objlayout), gfp_flags);
56 if (!objlay)
57 return NULL;
58 spin_lock_init(&objlay->lock);
59 INIT_LIST_HEAD(&objlay->err_list);
60 dprintk("%s: Return %p\n", __func__, objlay);
61 return &objlay->pnfs_layout;
62}
63
64/*
65 * Free an objlayout layout structure
66 */
67void
68objlayout_free_layout_hdr(struct pnfs_layout_hdr *lo)
69{
70 struct objlayout *objlay = OBJLAYOUT(lo);
71
72 dprintk("%s: objlay %p\n", __func__, objlay);
73
74 WARN_ON(!list_empty(&objlay->err_list));
75 kfree(objlay);
76}
77
78/*
79 * Unmarshall layout and store it in pnfslay.
80 */
81struct pnfs_layout_segment *
82objlayout_alloc_lseg(struct pnfs_layout_hdr *pnfslay,
83 struct nfs4_layoutget_res *lgr,
84 gfp_t gfp_flags)
85{
86 int status = -ENOMEM;
87 struct xdr_stream stream;
88 struct xdr_buf buf = {
89 .pages = lgr->layoutp->pages,
90 .page_len = lgr->layoutp->len,
91 .buflen = lgr->layoutp->len,
92 .len = lgr->layoutp->len,
93 };
94 struct page *scratch;
95 struct pnfs_layout_segment *lseg;
96
97 dprintk("%s: Begin pnfslay %p\n", __func__, pnfslay);
98
99 scratch = alloc_page(gfp_flags);
100 if (!scratch)
101 goto err_nofree;
102
103 xdr_init_decode(&stream, &buf, NULL);
104 xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE);
105
106 status = objio_alloc_lseg(&lseg, pnfslay, &lgr->range, &stream, gfp_flags);
107 if (unlikely(status)) {
108 dprintk("%s: objio_alloc_lseg Return err %d\n", __func__,
109 status);
110 goto err;
111 }
112
113 __free_page(scratch);
114
115 dprintk("%s: Return %p\n", __func__, lseg);
116 return lseg;
117
118err:
119 __free_page(scratch);
120err_nofree:
121 dprintk("%s: Err Return=>%d\n", __func__, status);
122 return ERR_PTR(status);
123}
124
125/*
126 * Free a layout segement
127 */
128void
129objlayout_free_lseg(struct pnfs_layout_segment *lseg)
130{
131 dprintk("%s: freeing layout segment %p\n", __func__, lseg);
132
133 if (unlikely(!lseg))
134 return;
135
136 objio_free_lseg(lseg);
137}
138
139/*
140 * I/O Operations
141 */
142static inline u64
143end_offset(u64 start, u64 len)
144{
145 u64 end;
146
147 end = start + len;
148 return end >= start ? end : NFS4_MAX_UINT64;
149}
150
151static void _fix_verify_io_params(struct pnfs_layout_segment *lseg,
152 struct page ***p_pages, unsigned *p_pgbase,
153 u64 offset, unsigned long count)
154{
155 u64 lseg_end_offset;
156
157 BUG_ON(offset < lseg->pls_range.offset);
158 lseg_end_offset = end_offset(lseg->pls_range.offset,
159 lseg->pls_range.length);
160 BUG_ON(offset >= lseg_end_offset);
161 WARN_ON(offset + count > lseg_end_offset);
162
163 if (*p_pgbase > PAGE_SIZE) {
164 dprintk("%s: pgbase(0x%x) > PAGE_SIZE\n", __func__, *p_pgbase);
165 *p_pages += *p_pgbase >> PAGE_SHIFT;
166 *p_pgbase &= ~PAGE_MASK;
167 }
168}
169
170/*
171 * I/O done common code
172 */
173static void
174objlayout_iodone(struct objlayout_io_res *oir)
175{
176 if (likely(oir->status >= 0)) {
177 objio_free_result(oir);
178 } else {
179 struct objlayout *objlay = oir->objlay;
180
181 spin_lock(&objlay->lock);
182 objlay->delta_space_valid = OBJ_DSU_INVALID;
183 list_add(&objlay->err_list, &oir->err_list);
184 spin_unlock(&objlay->lock);
185 }
186}
187
188/*
189 * objlayout_io_set_result - Set an osd_error code on a specific osd comp.
190 *
191 * The @index component IO failed (error returned from target). Register
192 * the error for later reporting at layout-return.
193 */
194void
195objlayout_io_set_result(struct objlayout_io_res *oir, unsigned index,
196 struct pnfs_osd_objid *pooid, int osd_error,
197 u64 offset, u64 length, bool is_write)
198{
199 struct pnfs_osd_ioerr *ioerr = &oir->ioerrs[index];
200
201 BUG_ON(index >= oir->num_comps);
202 if (osd_error) {
203 ioerr->oer_component = *pooid;
204 ioerr->oer_comp_offset = offset;
205 ioerr->oer_comp_length = length;
206 ioerr->oer_iswrite = is_write;
207 ioerr->oer_errno = osd_error;
208
209 dprintk("%s: err[%d]: errno=%d is_write=%d dev(%llx:%llx) "
210 "par=0x%llx obj=0x%llx offset=0x%llx length=0x%llx\n",
211 __func__, index, ioerr->oer_errno,
212 ioerr->oer_iswrite,
213 _DEVID_LO(&ioerr->oer_component.oid_device_id),
214 _DEVID_HI(&ioerr->oer_component.oid_device_id),
215 ioerr->oer_component.oid_partition_id,
216 ioerr->oer_component.oid_object_id,
217 ioerr->oer_comp_offset,
218 ioerr->oer_comp_length);
219 } else {
220 /* User need not call if no error is reported */
221 ioerr->oer_errno = 0;
222 }
223}
224
225/* Function scheduled on rpc workqueue to call ->nfs_readlist_complete().
226 * This is because the osd completion is called with ints-off from
227 * the block layer
228 */
229static void _rpc_read_complete(struct work_struct *work)
230{
231 struct rpc_task *task;
232 struct nfs_pgio_header *hdr;
233
234 dprintk("%s enter\n", __func__);
235 task = container_of(work, struct rpc_task, u.tk_work);
236 hdr = container_of(task, struct nfs_pgio_header, task);
237
238 pnfs_ld_read_done(hdr);
239}
240
241void
242objlayout_read_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
243{
244 struct nfs_pgio_header *hdr = oir->rpcdata;
245
246 oir->status = hdr->task.tk_status = status;
247 if (status >= 0)
248 hdr->res.count = status;
249 else
250 hdr->pnfs_error = status;
251 objlayout_iodone(oir);
252 /* must not use oir after this point */
253
254 dprintk("%s: Return status=%zd eof=%d sync=%d\n", __func__,
255 status, hdr->res.eof, sync);
256
257 if (sync)
258 pnfs_ld_read_done(hdr);
259 else {
260 INIT_WORK(&hdr->task.u.tk_work, _rpc_read_complete);
261 schedule_work(&hdr->task.u.tk_work);
262 }
263}
264
265/*
266 * Perform sync or async reads.
267 */
268enum pnfs_try_status
269objlayout_read_pagelist(struct nfs_pgio_header *hdr)
270{
271 struct inode *inode = hdr->inode;
272 loff_t offset = hdr->args.offset;
273 size_t count = hdr->args.count;
274 int err;
275 loff_t eof;
276
277 eof = i_size_read(inode);
278 if (unlikely(offset + count > eof)) {
279 if (offset >= eof) {
280 err = 0;
281 hdr->res.count = 0;
282 hdr->res.eof = 1;
283 /*FIXME: do we need to call pnfs_ld_read_done() */
284 goto out;
285 }
286 count = eof - offset;
287 }
288
289 hdr->res.eof = (offset + count) >= eof;
290 _fix_verify_io_params(hdr->lseg, &hdr->args.pages,
291 &hdr->args.pgbase,
292 hdr->args.offset, hdr->args.count);
293
294 dprintk("%s: inode(%lx) offset 0x%llx count 0x%zx eof=%d\n",
295 __func__, inode->i_ino, offset, count, hdr->res.eof);
296
297 err = objio_read_pagelist(hdr);
298 out:
299 if (unlikely(err)) {
300 hdr->pnfs_error = err;
301 dprintk("%s: Returned Error %d\n", __func__, err);
302 return PNFS_NOT_ATTEMPTED;
303 }
304 return PNFS_ATTEMPTED;
305}
306
307/* Function scheduled on rpc workqueue to call ->nfs_writelist_complete().
308 * This is because the osd completion is called with ints-off from
309 * the block layer
310 */
311static void _rpc_write_complete(struct work_struct *work)
312{
313 struct rpc_task *task;
314 struct nfs_pgio_header *hdr;
315
316 dprintk("%s enter\n", __func__);
317 task = container_of(work, struct rpc_task, u.tk_work);
318 hdr = container_of(task, struct nfs_pgio_header, task);
319
320 pnfs_ld_write_done(hdr);
321}
322
323void
324objlayout_write_done(struct objlayout_io_res *oir, ssize_t status, bool sync)
325{
326 struct nfs_pgio_header *hdr = oir->rpcdata;
327
328 oir->status = hdr->task.tk_status = status;
329 if (status >= 0) {
330 hdr->res.count = status;
331 hdr->verf.committed = oir->committed;
332 } else {
333 hdr->pnfs_error = status;
334 }
335 objlayout_iodone(oir);
336 /* must not use oir after this point */
337
338 dprintk("%s: Return status %zd committed %d sync=%d\n", __func__,
339 status, hdr->verf.committed, sync);
340
341 if (sync)
342 pnfs_ld_write_done(hdr);
343 else {
344 INIT_WORK(&hdr->task.u.tk_work, _rpc_write_complete);
345 schedule_work(&hdr->task.u.tk_work);
346 }
347}
348
349/*
350 * Perform sync or async writes.
351 */
352enum pnfs_try_status
353objlayout_write_pagelist(struct nfs_pgio_header *hdr, int how)
354{
355 int err;
356
357 _fix_verify_io_params(hdr->lseg, &hdr->args.pages,
358 &hdr->args.pgbase,
359 hdr->args.offset, hdr->args.count);
360
361 err = objio_write_pagelist(hdr, how);
362 if (unlikely(err)) {
363 hdr->pnfs_error = err;
364 dprintk("%s: Returned Error %d\n", __func__, err);
365 return PNFS_NOT_ATTEMPTED;
366 }
367 return PNFS_ATTEMPTED;
368}
369
370void
371objlayout_encode_layoutcommit(struct pnfs_layout_hdr *pnfslay,
372 struct xdr_stream *xdr,
373 const struct nfs4_layoutcommit_args *args)
374{
375 struct objlayout *objlay = OBJLAYOUT(pnfslay);
376 struct pnfs_osd_layoutupdate lou;
377 __be32 *start;
378
379 dprintk("%s: Begin\n", __func__);
380
381 spin_lock(&objlay->lock);
382 lou.dsu_valid = (objlay->delta_space_valid == OBJ_DSU_VALID);
383 lou.dsu_delta = objlay->delta_space_used;
384 objlay->delta_space_used = 0;
385 objlay->delta_space_valid = OBJ_DSU_INIT;
386 lou.olu_ioerr_flag = !list_empty(&objlay->err_list);
387 spin_unlock(&objlay->lock);
388
389 start = xdr_reserve_space(xdr, 4);
390
391 BUG_ON(pnfs_osd_xdr_encode_layoutupdate(xdr, &lou));
392
393 *start = cpu_to_be32((xdr->p - start - 1) * 4);
394
395 dprintk("%s: Return delta_space_used %lld err %d\n", __func__,
396 lou.dsu_delta, lou.olu_ioerr_flag);
397}
398
399static int
400err_prio(u32 oer_errno)
401{
402 switch (oer_errno) {
403 case 0:
404 return 0;
405
406 case PNFS_OSD_ERR_RESOURCE:
407 return OSD_ERR_PRI_RESOURCE;
408 case PNFS_OSD_ERR_BAD_CRED:
409 return OSD_ERR_PRI_BAD_CRED;
410 case PNFS_OSD_ERR_NO_ACCESS:
411 return OSD_ERR_PRI_NO_ACCESS;
412 case PNFS_OSD_ERR_UNREACHABLE:
413 return OSD_ERR_PRI_UNREACHABLE;
414 case PNFS_OSD_ERR_NOT_FOUND:
415 return OSD_ERR_PRI_NOT_FOUND;
416 case PNFS_OSD_ERR_NO_SPACE:
417 return OSD_ERR_PRI_NO_SPACE;
418 default:
419 WARN_ON(1);
420 /* fallthrough */
421 case PNFS_OSD_ERR_EIO:
422 return OSD_ERR_PRI_EIO;
423 }
424}
425
426static void
427merge_ioerr(struct pnfs_osd_ioerr *dest_err,
428 const struct pnfs_osd_ioerr *src_err)
429{
430 u64 dest_end, src_end;
431
432 if (!dest_err->oer_errno) {
433 *dest_err = *src_err;
434 /* accumulated device must be blank */
435 memset(&dest_err->oer_component.oid_device_id, 0,
436 sizeof(dest_err->oer_component.oid_device_id));
437
438 return;
439 }
440
441 if (dest_err->oer_component.oid_partition_id !=
442 src_err->oer_component.oid_partition_id)
443 dest_err->oer_component.oid_partition_id = 0;
444
445 if (dest_err->oer_component.oid_object_id !=
446 src_err->oer_component.oid_object_id)
447 dest_err->oer_component.oid_object_id = 0;
448
449 if (dest_err->oer_comp_offset > src_err->oer_comp_offset)
450 dest_err->oer_comp_offset = src_err->oer_comp_offset;
451
452 dest_end = end_offset(dest_err->oer_comp_offset,
453 dest_err->oer_comp_length);
454 src_end = end_offset(src_err->oer_comp_offset,
455 src_err->oer_comp_length);
456 if (dest_end < src_end)
457 dest_end = src_end;
458
459 dest_err->oer_comp_length = dest_end - dest_err->oer_comp_offset;
460
461 if ((src_err->oer_iswrite == dest_err->oer_iswrite) &&
462 (err_prio(src_err->oer_errno) > err_prio(dest_err->oer_errno))) {
463 dest_err->oer_errno = src_err->oer_errno;
464 } else if (src_err->oer_iswrite) {
465 dest_err->oer_iswrite = true;
466 dest_err->oer_errno = src_err->oer_errno;
467 }
468}
469
470static void
471encode_accumulated_error(struct objlayout *objlay, __be32 *p)
472{
473 struct objlayout_io_res *oir, *tmp;
474 struct pnfs_osd_ioerr accumulated_err = {.oer_errno = 0};
475
476 list_for_each_entry_safe(oir, tmp, &objlay->err_list, err_list) {
477 unsigned i;
478
479 for (i = 0; i < oir->num_comps; i++) {
480 struct pnfs_osd_ioerr *ioerr = &oir->ioerrs[i];
481
482 if (!ioerr->oer_errno)
483 continue;
484
485 printk(KERN_ERR "NFS: %s: err[%d]: errno=%d "
486 "is_write=%d dev(%llx:%llx) par=0x%llx "
487 "obj=0x%llx offset=0x%llx length=0x%llx\n",
488 __func__, i, ioerr->oer_errno,
489 ioerr->oer_iswrite,
490 _DEVID_LO(&ioerr->oer_component.oid_device_id),
491 _DEVID_HI(&ioerr->oer_component.oid_device_id),
492 ioerr->oer_component.oid_partition_id,
493 ioerr->oer_component.oid_object_id,
494 ioerr->oer_comp_offset,
495 ioerr->oer_comp_length);
496
497 merge_ioerr(&accumulated_err, ioerr);
498 }
499 list_del(&oir->err_list);
500 objio_free_result(oir);
501 }
502
503 pnfs_osd_xdr_encode_ioerr(p, &accumulated_err);
504}
505
506void
507objlayout_encode_layoutreturn(struct xdr_stream *xdr,
508 const struct nfs4_layoutreturn_args *args)
509{
510 struct pnfs_layout_hdr *pnfslay = args->layout;
511 struct objlayout *objlay = OBJLAYOUT(pnfslay);
512 struct objlayout_io_res *oir, *tmp;
513 __be32 *start;
514
515 dprintk("%s: Begin\n", __func__);
516 start = xdr_reserve_space(xdr, 4);
517 BUG_ON(!start);
518
519 spin_lock(&objlay->lock);
520
521 list_for_each_entry_safe(oir, tmp, &objlay->err_list, err_list) {
522 __be32 *last_xdr = NULL, *p;
523 unsigned i;
524 int res = 0;
525
526 for (i = 0; i < oir->num_comps; i++) {
527 struct pnfs_osd_ioerr *ioerr = &oir->ioerrs[i];
528
529 if (!ioerr->oer_errno)
530 continue;
531
532 dprintk("%s: err[%d]: errno=%d is_write=%d "
533 "dev(%llx:%llx) par=0x%llx obj=0x%llx "
534 "offset=0x%llx length=0x%llx\n",
535 __func__, i, ioerr->oer_errno,
536 ioerr->oer_iswrite,
537 _DEVID_LO(&ioerr->oer_component.oid_device_id),
538 _DEVID_HI(&ioerr->oer_component.oid_device_id),
539 ioerr->oer_component.oid_partition_id,
540 ioerr->oer_component.oid_object_id,
541 ioerr->oer_comp_offset,
542 ioerr->oer_comp_length);
543
544 p = pnfs_osd_xdr_ioerr_reserve_space(xdr);
545 if (unlikely(!p)) {
546 res = -E2BIG;
547 break; /* accumulated_error */
548 }
549
550 last_xdr = p;
551 pnfs_osd_xdr_encode_ioerr(p, &oir->ioerrs[i]);
552 }
553
554 /* TODO: use xdr_write_pages */
555 if (unlikely(res)) {
556 /* no space for even one error descriptor */
557 BUG_ON(!last_xdr);
558
559 /* we've encountered a situation with lots and lots of
560 * errors and no space to encode them all. Use the last
561 * available slot to report the union of all the
562 * remaining errors.
563 */
564 encode_accumulated_error(objlay, last_xdr);
565 goto loop_done;
566 }
567 list_del(&oir->err_list);
568 objio_free_result(oir);
569 }
570loop_done:
571 spin_unlock(&objlay->lock);
572
573 *start = cpu_to_be32((xdr->p - start - 1) * 4);
574 dprintk("%s: Return\n", __func__);
575}
576
577enum {
578 OBJLAYOUT_MAX_URI_LEN = 256, OBJLAYOUT_MAX_OSDNAME_LEN = 64,
579 OBJLAYOUT_MAX_SYSID_HEX_LEN = OSD_SYSTEMID_LEN * 2 + 1,
580 OSD_LOGIN_UPCALL_PATHLEN = 256
581};
582
583static char osd_login_prog[OSD_LOGIN_UPCALL_PATHLEN] = "/sbin/osd_login";
584
585module_param_string(osd_login_prog, osd_login_prog, sizeof(osd_login_prog),
586 0600);
587MODULE_PARM_DESC(osd_login_prog, "Path to the osd_login upcall program");
588
589struct __auto_login {
590 char uri[OBJLAYOUT_MAX_URI_LEN];
591 char osdname[OBJLAYOUT_MAX_OSDNAME_LEN];
592 char systemid_hex[OBJLAYOUT_MAX_SYSID_HEX_LEN];
593};
594
595static int __objlayout_upcall(struct __auto_login *login)
596{
597 static char *envp[] = { "HOME=/",
598 "TERM=linux",
599 "PATH=/sbin:/usr/sbin:/bin:/usr/bin",
600 NULL
601 };
602 char *argv[8];
603 int ret;
604
605 if (unlikely(!osd_login_prog[0])) {
606 dprintk("%s: osd_login_prog is disabled\n", __func__);
607 return -EACCES;
608 }
609
610 dprintk("%s uri: %s\n", __func__, login->uri);
611 dprintk("%s osdname %s\n", __func__, login->osdname);
612 dprintk("%s systemid_hex %s\n", __func__, login->systemid_hex);
613
614 argv[0] = (char *)osd_login_prog;
615 argv[1] = "-u";
616 argv[2] = login->uri;
617 argv[3] = "-o";
618 argv[4] = login->osdname;
619 argv[5] = "-s";
620 argv[6] = login->systemid_hex;
621 argv[7] = NULL;
622
623 ret = call_usermodehelper(argv[0], argv, envp, UMH_WAIT_PROC);
624 /*
625 * Disable the upcall mechanism if we're getting an ENOENT or
626 * EACCES error. The admin can re-enable it on the fly by using
627 * sysfs to set the objlayoutdriver.osd_login_prog module parameter once
628 * the problem has been fixed.
629 */
630 if (ret == -ENOENT || ret == -EACCES) {
631 printk(KERN_ERR "PNFS-OBJ: %s was not found please set "
632 "objlayoutdriver.osd_login_prog kernel parameter!\n",
633 osd_login_prog);
634 osd_login_prog[0] = '\0';
635 }
636 dprintk("%s %s return value: %d\n", __func__, osd_login_prog, ret);
637
638 return ret;
639}
640
641/* Assume dest is all zeros */
642static void __copy_nfsS_and_zero_terminate(struct nfs4_string s,
643 char *dest, int max_len,
644 const char *var_name)
645{
646 if (!s.len)
647 return;
648
649 if (s.len >= max_len) {
650 pr_warn_ratelimited(
651 "objlayout_autologin: %s: s.len(%d) >= max_len(%d)",
652 var_name, s.len, max_len);
653 s.len = max_len - 1; /* space for null terminator */
654 }
655
656 memcpy(dest, s.data, s.len);
657}
658
659/* Assume sysid is all zeros */
660static void _sysid_2_hex(struct nfs4_string s,
661 char sysid[OBJLAYOUT_MAX_SYSID_HEX_LEN])
662{
663 int i;
664 char *cur;
665
666 if (!s.len)
667 return;
668
669 if (s.len != OSD_SYSTEMID_LEN) {
670 pr_warn_ratelimited(
671 "objlayout_autologin: systemid_len(%d) != OSD_SYSTEMID_LEN",
672 s.len);
673 if (s.len > OSD_SYSTEMID_LEN)
674 s.len = OSD_SYSTEMID_LEN;
675 }
676
677 cur = sysid;
678 for (i = 0; i < s.len; i++)
679 cur = hex_byte_pack(cur, s.data[i]);
680}
681
682int objlayout_autologin(struct pnfs_osd_deviceaddr *deviceaddr)
683{
684 int rc;
685 struct __auto_login login;
686
687 if (!deviceaddr->oda_targetaddr.ota_netaddr.r_addr.len)
688 return -ENODEV;
689
690 memset(&login, 0, sizeof(login));
691 __copy_nfsS_and_zero_terminate(
692 deviceaddr->oda_targetaddr.ota_netaddr.r_addr,
693 login.uri, sizeof(login.uri), "URI");
694
695 __copy_nfsS_and_zero_terminate(
696 deviceaddr->oda_osdname,
697 login.osdname, sizeof(login.osdname), "OSDNAME");
698
699 _sysid_2_hex(deviceaddr->oda_systemid, login.systemid_hex);
700
701 rc = __objlayout_upcall(&login);
702 if (rc > 0) /* script returns positive values */
703 rc = -ENODEV;
704
705 return rc;
706}
diff --git a/fs/nfs/objlayout/objlayout.h b/fs/nfs/objlayout/objlayout.h
deleted file mode 100644
index fc94a5872ed4..000000000000
--- a/fs/nfs/objlayout/objlayout.h
+++ /dev/null
@@ -1,183 +0,0 @@
1/*
2 * Data types and function declerations for interfacing with the
3 * pNFS standard object layout driver.
4 *
5 * Copyright (C) 2007 Panasas Inc. [year of first publication]
6 * All rights reserved.
7 *
8 * Benny Halevy <bhalevy@panasas.com>
9 * Boaz Harrosh <ooo@electrozaur.com>
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License version 2
13 * See the file COPYING included with this distribution for more details.
14 *
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions
17 * are met:
18 *
19 * 1. Redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer.
21 * 2. Redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in the
23 * documentation and/or other materials provided with the distribution.
24 * 3. Neither the name of the Panasas company nor the names of its
25 * contributors may be used to endorse or promote products derived
26 * from this software without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
29 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
30 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
31 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
33 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
34 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
35 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
36 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
37 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
38 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
39 */
40
41#ifndef _OBJLAYOUT_H
42#define _OBJLAYOUT_H
43
44#include <linux/nfs_fs.h>
45#include <linux/pnfs_osd_xdr.h>
46#include "../pnfs.h"
47
48/*
49 * per-inode layout
50 */
51struct objlayout {
52 struct pnfs_layout_hdr pnfs_layout;
53
54 /* for layout_commit */
55 enum osd_delta_space_valid_enum {
56 OBJ_DSU_INIT = 0,
57 OBJ_DSU_VALID,
58 OBJ_DSU_INVALID,
59 } delta_space_valid;
60 s64 delta_space_used; /* consumed by write ops */
61
62 /* for layout_return */
63 spinlock_t lock;
64 struct list_head err_list;
65};
66
67static inline struct objlayout *
68OBJLAYOUT(struct pnfs_layout_hdr *lo)
69{
70 return container_of(lo, struct objlayout, pnfs_layout);
71}
72
73/*
74 * per-I/O operation state
75 * embedded in objects provider io_state data structure
76 */
77struct objlayout_io_res {
78 struct objlayout *objlay;
79
80 void *rpcdata;
81 int status; /* res */
82 int committed; /* res */
83
84 /* Error reporting (layout_return) */
85 struct list_head err_list;
86 unsigned num_comps;
87 /* Pointer to array of error descriptors of size num_comps.
88 * It should contain as many entries as devices in the osd_layout
89 * that participate in the I/O. It is up to the io_engine to allocate
90 * needed space and set num_comps.
91 */
92 struct pnfs_osd_ioerr *ioerrs;
93};
94
95static inline
96void objlayout_init_ioerrs(struct objlayout_io_res *oir, unsigned num_comps,
97 struct pnfs_osd_ioerr *ioerrs, void *rpcdata,
98 struct pnfs_layout_hdr *pnfs_layout_type)
99{
100 oir->objlay = OBJLAYOUT(pnfs_layout_type);
101 oir->rpcdata = rpcdata;
102 INIT_LIST_HEAD(&oir->err_list);
103 oir->num_comps = num_comps;
104 oir->ioerrs = ioerrs;
105}
106
107/*
108 * Raid engine I/O API
109 */
110extern int objio_alloc_lseg(struct pnfs_layout_segment **outp,
111 struct pnfs_layout_hdr *pnfslay,
112 struct pnfs_layout_range *range,
113 struct xdr_stream *xdr,
114 gfp_t gfp_flags);
115extern void objio_free_lseg(struct pnfs_layout_segment *lseg);
116
117/* objio_free_result will free these @oir structs received from
118 * objlayout_{read,write}_done
119 */
120extern void objio_free_result(struct objlayout_io_res *oir);
121
122extern int objio_read_pagelist(struct nfs_pgio_header *rdata);
123extern int objio_write_pagelist(struct nfs_pgio_header *wdata, int how);
124
125/*
126 * callback API
127 */
128extern void objlayout_io_set_result(struct objlayout_io_res *oir,
129 unsigned index, struct pnfs_osd_objid *pooid,
130 int osd_error, u64 offset, u64 length, bool is_write);
131
132static inline void
133objlayout_add_delta_space_used(struct objlayout *objlay, s64 space_used)
134{
135 /* If one of the I/Os errored out and the delta_space_used was
136 * invalid we render the complete report as invalid. Protocol mandate
137 * the DSU be accurate or not reported.
138 */
139 spin_lock(&objlay->lock);
140 if (objlay->delta_space_valid != OBJ_DSU_INVALID) {
141 objlay->delta_space_valid = OBJ_DSU_VALID;
142 objlay->delta_space_used += space_used;
143 }
144 spin_unlock(&objlay->lock);
145}
146
147extern void objlayout_read_done(struct objlayout_io_res *oir,
148 ssize_t status, bool sync);
149extern void objlayout_write_done(struct objlayout_io_res *oir,
150 ssize_t status, bool sync);
151
152/*
153 * exported generic objects function vectors
154 */
155
156extern struct pnfs_layout_hdr *objlayout_alloc_layout_hdr(struct inode *, gfp_t gfp_flags);
157extern void objlayout_free_layout_hdr(struct pnfs_layout_hdr *);
158
159extern struct pnfs_layout_segment *objlayout_alloc_lseg(
160 struct pnfs_layout_hdr *,
161 struct nfs4_layoutget_res *,
162 gfp_t gfp_flags);
163extern void objlayout_free_lseg(struct pnfs_layout_segment *);
164
165extern enum pnfs_try_status objlayout_read_pagelist(
166 struct nfs_pgio_header *);
167
168extern enum pnfs_try_status objlayout_write_pagelist(
169 struct nfs_pgio_header *,
170 int how);
171
172extern void objlayout_encode_layoutcommit(
173 struct pnfs_layout_hdr *,
174 struct xdr_stream *,
175 const struct nfs4_layoutcommit_args *);
176
177extern void objlayout_encode_layoutreturn(
178 struct xdr_stream *,
179 const struct nfs4_layoutreturn_args *);
180
181extern int objlayout_autologin(struct pnfs_osd_deviceaddr *deviceaddr);
182
183#endif /* _OBJLAYOUT_H */
diff --git a/fs/nfs/objlayout/pnfs_osd_xdr_cli.c b/fs/nfs/objlayout/pnfs_osd_xdr_cli.c
deleted file mode 100644
index f093c7ec983b..000000000000
--- a/fs/nfs/objlayout/pnfs_osd_xdr_cli.c
+++ /dev/null
@@ -1,415 +0,0 @@
1/*
2 * Object-Based pNFS Layout XDR layer
3 *
4 * Copyright (C) 2007 Panasas Inc. [year of first publication]
5 * All rights reserved.
6 *
7 * Benny Halevy <bhalevy@panasas.com>
8 * Boaz Harrosh <ooo@electrozaur.com>
9 *
10 * This program is free software; you can redistribute it and/or modify
11 * it under the terms of the GNU General Public License version 2
12 * See the file COPYING included with this distribution for more details.
13 *
14 * Redistribution and use in source and binary forms, with or without
15 * modification, are permitted provided that the following conditions
16 * are met:
17 *
18 * 1. Redistributions of source code must retain the above copyright
19 * notice, this list of conditions and the following disclaimer.
20 * 2. Redistributions in binary form must reproduce the above copyright
21 * notice, this list of conditions and the following disclaimer in the
22 * documentation and/or other materials provided with the distribution.
23 * 3. Neither the name of the Panasas company nor the names of its
24 * contributors may be used to endorse or promote products derived
25 * from this software without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED
28 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
29 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
30 * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
34 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
35 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
36 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
37 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
38 */
39
40#include <linux/pnfs_osd_xdr.h>
41
42#define NFSDBG_FACILITY NFSDBG_PNFS_LD
43
44/*
45 * The following implementation is based on RFC5664
46 */
47
48/*
49 * struct pnfs_osd_objid {
50 * struct nfs4_deviceid oid_device_id;
51 * u64 oid_partition_id;
52 * u64 oid_object_id;
53 * }; // xdr size 32 bytes
54 */
55static __be32 *
56_osd_xdr_decode_objid(__be32 *p, struct pnfs_osd_objid *objid)
57{
58 p = xdr_decode_opaque_fixed(p, objid->oid_device_id.data,
59 sizeof(objid->oid_device_id.data));
60
61 p = xdr_decode_hyper(p, &objid->oid_partition_id);
62 p = xdr_decode_hyper(p, &objid->oid_object_id);
63 return p;
64}
65/*
66 * struct pnfs_osd_opaque_cred {
67 * u32 cred_len;
68 * void *cred;
69 * }; // xdr size [variable]
70 * The return pointers are from the xdr buffer
71 */
72static int
73_osd_xdr_decode_opaque_cred(struct pnfs_osd_opaque_cred *opaque_cred,
74 struct xdr_stream *xdr)
75{
76 __be32 *p = xdr_inline_decode(xdr, 1);
77
78 if (!p)
79 return -EINVAL;
80
81 opaque_cred->cred_len = be32_to_cpu(*p++);
82
83 p = xdr_inline_decode(xdr, opaque_cred->cred_len);
84 if (!p)
85 return -EINVAL;
86
87 opaque_cred->cred = p;
88 return 0;
89}
90
91/*
92 * struct pnfs_osd_object_cred {
93 * struct pnfs_osd_objid oc_object_id;
94 * u32 oc_osd_version;
95 * u32 oc_cap_key_sec;
96 * struct pnfs_osd_opaque_cred oc_cap_key
97 * struct pnfs_osd_opaque_cred oc_cap;
98 * }; // xdr size 32 + 4 + 4 + [variable] + [variable]
99 */
100static int
101_osd_xdr_decode_object_cred(struct pnfs_osd_object_cred *comp,
102 struct xdr_stream *xdr)
103{
104 __be32 *p = xdr_inline_decode(xdr, 32 + 4 + 4);
105 int ret;
106
107 if (!p)
108 return -EIO;
109
110 p = _osd_xdr_decode_objid(p, &comp->oc_object_id);
111 comp->oc_osd_version = be32_to_cpup(p++);
112 comp->oc_cap_key_sec = be32_to_cpup(p);
113
114 ret = _osd_xdr_decode_opaque_cred(&comp->oc_cap_key, xdr);
115 if (unlikely(ret))
116 return ret;
117
118 ret = _osd_xdr_decode_opaque_cred(&comp->oc_cap, xdr);
119 return ret;
120}
121
122/*
123 * struct pnfs_osd_data_map {
124 * u32 odm_num_comps;
125 * u64 odm_stripe_unit;
126 * u32 odm_group_width;
127 * u32 odm_group_depth;
128 * u32 odm_mirror_cnt;
129 * u32 odm_raid_algorithm;
130 * }; // xdr size 4 + 8 + 4 + 4 + 4 + 4
131 */
132static inline int
133_osd_data_map_xdr_sz(void)
134{
135 return 4 + 8 + 4 + 4 + 4 + 4;
136}
137
138static __be32 *
139_osd_xdr_decode_data_map(__be32 *p, struct pnfs_osd_data_map *data_map)
140{
141 data_map->odm_num_comps = be32_to_cpup(p++);
142 p = xdr_decode_hyper(p, &data_map->odm_stripe_unit);
143 data_map->odm_group_width = be32_to_cpup(p++);
144 data_map->odm_group_depth = be32_to_cpup(p++);
145 data_map->odm_mirror_cnt = be32_to_cpup(p++);
146 data_map->odm_raid_algorithm = be32_to_cpup(p++);
147 dprintk("%s: odm_num_comps=%u odm_stripe_unit=%llu odm_group_width=%u "
148 "odm_group_depth=%u odm_mirror_cnt=%u odm_raid_algorithm=%u\n",
149 __func__,
150 data_map->odm_num_comps,
151 (unsigned long long)data_map->odm_stripe_unit,
152 data_map->odm_group_width,
153 data_map->odm_group_depth,
154 data_map->odm_mirror_cnt,
155 data_map->odm_raid_algorithm);
156 return p;
157}
158
159int pnfs_osd_xdr_decode_layout_map(struct pnfs_osd_layout *layout,
160 struct pnfs_osd_xdr_decode_layout_iter *iter, struct xdr_stream *xdr)
161{
162 __be32 *p;
163
164 memset(iter, 0, sizeof(*iter));
165
166 p = xdr_inline_decode(xdr, _osd_data_map_xdr_sz() + 4 + 4);
167 if (unlikely(!p))
168 return -EINVAL;
169
170 p = _osd_xdr_decode_data_map(p, &layout->olo_map);
171 layout->olo_comps_index = be32_to_cpup(p++);
172 layout->olo_num_comps = be32_to_cpup(p++);
173 dprintk("%s: olo_comps_index=%d olo_num_comps=%d\n", __func__,
174 layout->olo_comps_index, layout->olo_num_comps);
175
176 iter->total_comps = layout->olo_num_comps;
177 return 0;
178}
179
180bool pnfs_osd_xdr_decode_layout_comp(struct pnfs_osd_object_cred *comp,
181 struct pnfs_osd_xdr_decode_layout_iter *iter, struct xdr_stream *xdr,
182 int *err)
183{
184 BUG_ON(iter->decoded_comps > iter->total_comps);
185 if (iter->decoded_comps == iter->total_comps)
186 return false;
187
188 *err = _osd_xdr_decode_object_cred(comp, xdr);
189 if (unlikely(*err)) {
190 dprintk("%s: _osd_xdr_decode_object_cred=>%d decoded_comps=%d "
191 "total_comps=%d\n", __func__, *err,
192 iter->decoded_comps, iter->total_comps);
193 return false; /* stop the loop */
194 }
195 dprintk("%s: dev(%llx:%llx) par=0x%llx obj=0x%llx "
196 "key_len=%u cap_len=%u\n",
197 __func__,
198 _DEVID_LO(&comp->oc_object_id.oid_device_id),
199 _DEVID_HI(&comp->oc_object_id.oid_device_id),
200 comp->oc_object_id.oid_partition_id,
201 comp->oc_object_id.oid_object_id,
202 comp->oc_cap_key.cred_len, comp->oc_cap.cred_len);
203
204 iter->decoded_comps++;
205 return true;
206}
207
208/*
209 * Get Device Information Decoding
210 *
211 * Note: since Device Information is currently done synchronously, all
212 * variable strings fields are left inside the rpc buffer and are only
213 * pointed to by the pnfs_osd_deviceaddr members. So the read buffer
214 * should not be freed while the returned information is in use.
215 */
216/*
217 *struct nfs4_string {
218 * unsigned int len;
219 * char *data;
220 *}; // size [variable]
221 * NOTE: Returned string points to inside the XDR buffer
222 */
223static __be32 *
224__read_u8_opaque(__be32 *p, struct nfs4_string *str)
225{
226 str->len = be32_to_cpup(p++);
227 str->data = (char *)p;
228
229 p += XDR_QUADLEN(str->len);
230 return p;
231}
232
233/*
234 * struct pnfs_osd_targetid {
235 * u32 oti_type;
236 * struct nfs4_string oti_scsi_device_id;
237 * };// size 4 + [variable]
238 */
239static __be32 *
240__read_targetid(__be32 *p, struct pnfs_osd_targetid* targetid)
241{
242 u32 oti_type;
243
244 oti_type = be32_to_cpup(p++);
245 targetid->oti_type = oti_type;
246
247 switch (oti_type) {
248 case OBJ_TARGET_SCSI_NAME:
249 case OBJ_TARGET_SCSI_DEVICE_ID:
250 p = __read_u8_opaque(p, &targetid->oti_scsi_device_id);
251 }
252
253 return p;
254}
255
256/*
257 * struct pnfs_osd_net_addr {
258 * struct nfs4_string r_netid;
259 * struct nfs4_string r_addr;
260 * };
261 */
262static __be32 *
263__read_net_addr(__be32 *p, struct pnfs_osd_net_addr* netaddr)
264{
265 p = __read_u8_opaque(p, &netaddr->r_netid);
266 p = __read_u8_opaque(p, &netaddr->r_addr);
267
268 return p;
269}
270
271/*
272 * struct pnfs_osd_targetaddr {
273 * u32 ota_available;
274 * struct pnfs_osd_net_addr ota_netaddr;
275 * };
276 */
277static __be32 *
278__read_targetaddr(__be32 *p, struct pnfs_osd_targetaddr *targetaddr)
279{
280 u32 ota_available;
281
282 ota_available = be32_to_cpup(p++);
283 targetaddr->ota_available = ota_available;
284
285 if (ota_available)
286 p = __read_net_addr(p, &targetaddr->ota_netaddr);
287
288
289 return p;
290}
291
292/*
293 * struct pnfs_osd_deviceaddr {
294 * struct pnfs_osd_targetid oda_targetid;
295 * struct pnfs_osd_targetaddr oda_targetaddr;
296 * u8 oda_lun[8];
297 * struct nfs4_string oda_systemid;
298 * struct pnfs_osd_object_cred oda_root_obj_cred;
299 * struct nfs4_string oda_osdname;
300 * };
301 */
302
303/* We need this version for the pnfs_osd_xdr_decode_deviceaddr which does
304 * not have an xdr_stream
305 */
306static __be32 *
307__read_opaque_cred(__be32 *p,
308 struct pnfs_osd_opaque_cred *opaque_cred)
309{
310 opaque_cred->cred_len = be32_to_cpu(*p++);
311 opaque_cred->cred = p;
312 return p + XDR_QUADLEN(opaque_cred->cred_len);
313}
314
315static __be32 *
316__read_object_cred(__be32 *p, struct pnfs_osd_object_cred *comp)
317{
318 p = _osd_xdr_decode_objid(p, &comp->oc_object_id);
319 comp->oc_osd_version = be32_to_cpup(p++);
320 comp->oc_cap_key_sec = be32_to_cpup(p++);
321
322 p = __read_opaque_cred(p, &comp->oc_cap_key);
323 p = __read_opaque_cred(p, &comp->oc_cap);
324 return p;
325}
326
327void pnfs_osd_xdr_decode_deviceaddr(
328 struct pnfs_osd_deviceaddr *deviceaddr, __be32 *p)
329{
330 p = __read_targetid(p, &deviceaddr->oda_targetid);
331
332 p = __read_targetaddr(p, &deviceaddr->oda_targetaddr);
333
334 p = xdr_decode_opaque_fixed(p, deviceaddr->oda_lun,
335 sizeof(deviceaddr->oda_lun));
336
337 p = __read_u8_opaque(p, &deviceaddr->oda_systemid);
338
339 p = __read_object_cred(p, &deviceaddr->oda_root_obj_cred);
340
341 p = __read_u8_opaque(p, &deviceaddr->oda_osdname);
342
343 /* libosd likes this terminated in dbg. It's last, so no problems */
344 deviceaddr->oda_osdname.data[deviceaddr->oda_osdname.len] = 0;
345}
346
347/*
348 * struct pnfs_osd_layoutupdate {
349 * u32 dsu_valid;
350 * s64 dsu_delta;
351 * u32 olu_ioerr_flag;
352 * }; xdr size 4 + 8 + 4
353 */
354int
355pnfs_osd_xdr_encode_layoutupdate(struct xdr_stream *xdr,
356 struct pnfs_osd_layoutupdate *lou)
357{
358 __be32 *p = xdr_reserve_space(xdr, 4 + 8 + 4);
359
360 if (!p)
361 return -E2BIG;
362
363 *p++ = cpu_to_be32(lou->dsu_valid);
364 if (lou->dsu_valid)
365 p = xdr_encode_hyper(p, lou->dsu_delta);
366 *p++ = cpu_to_be32(lou->olu_ioerr_flag);
367 return 0;
368}
369
370/*
371 * struct pnfs_osd_objid {
372 * struct nfs4_deviceid oid_device_id;
373 * u64 oid_partition_id;
374 * u64 oid_object_id;
375 * }; // xdr size 32 bytes
376 */
377static inline __be32 *
378pnfs_osd_xdr_encode_objid(__be32 *p, struct pnfs_osd_objid *object_id)
379{
380 p = xdr_encode_opaque_fixed(p, &object_id->oid_device_id.data,
381 sizeof(object_id->oid_device_id.data));
382 p = xdr_encode_hyper(p, object_id->oid_partition_id);
383 p = xdr_encode_hyper(p, object_id->oid_object_id);
384
385 return p;
386}
387
388/*
389 * struct pnfs_osd_ioerr {
390 * struct pnfs_osd_objid oer_component;
391 * u64 oer_comp_offset;
392 * u64 oer_comp_length;
393 * u32 oer_iswrite;
394 * u32 oer_errno;
395 * }; // xdr size 32 + 24 bytes
396 */
397void pnfs_osd_xdr_encode_ioerr(__be32 *p, struct pnfs_osd_ioerr *ioerr)
398{
399 p = pnfs_osd_xdr_encode_objid(p, &ioerr->oer_component);
400 p = xdr_encode_hyper(p, ioerr->oer_comp_offset);
401 p = xdr_encode_hyper(p, ioerr->oer_comp_length);
402 *p++ = cpu_to_be32(ioerr->oer_iswrite);
403 *p = cpu_to_be32(ioerr->oer_errno);
404}
405
406__be32 *pnfs_osd_xdr_ioerr_reserve_space(struct xdr_stream *xdr)
407{
408 __be32 *p;
409
410 p = xdr_reserve_space(xdr, 32 + 24);
411 if (unlikely(!p))
412 dprintk("%s: out of xdr space\n", __func__);
413
414 return p;
415}
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 6e629b856a00..ad92b401326c 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -29,19 +29,6 @@
29static struct kmem_cache *nfs_page_cachep; 29static struct kmem_cache *nfs_page_cachep;
30static const struct rpc_call_ops nfs_pgio_common_ops; 30static const struct rpc_call_ops nfs_pgio_common_ops;
31 31
32static bool nfs_pgarray_set(struct nfs_page_array *p, unsigned int pagecount)
33{
34 p->npages = pagecount;
35 if (pagecount <= ARRAY_SIZE(p->page_array))
36 p->pagevec = p->page_array;
37 else {
38 p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL);
39 if (!p->pagevec)
40 p->npages = 0;
41 }
42 return p->pagevec != NULL;
43}
44
45struct nfs_pgio_mirror * 32struct nfs_pgio_mirror *
46nfs_pgio_current_mirror(struct nfs_pageio_descriptor *desc) 33nfs_pgio_current_mirror(struct nfs_pageio_descriptor *desc)
47{ 34{
@@ -115,6 +102,35 @@ nfs_iocounter_wait(struct nfs_lock_context *l_ctx)
115 TASK_KILLABLE); 102 TASK_KILLABLE);
116} 103}
117 104
105/**
106 * nfs_async_iocounter_wait - wait on a rpc_waitqueue for I/O
107 * to complete
108 * @task: the rpc_task that should wait
109 * @l_ctx: nfs_lock_context with io_counter to check
110 *
111 * Returns true if there is outstanding I/O to wait on and the
112 * task has been put to sleep.
113 */
114bool
115nfs_async_iocounter_wait(struct rpc_task *task, struct nfs_lock_context *l_ctx)
116{
117 struct inode *inode = d_inode(l_ctx->open_context->dentry);
118 bool ret = false;
119
120 if (atomic_read(&l_ctx->io_count) > 0) {
121 rpc_sleep_on(&NFS_SERVER(inode)->uoc_rpcwaitq, task, NULL);
122 ret = true;
123 }
124
125 if (atomic_read(&l_ctx->io_count) == 0) {
126 rpc_wake_up_queued_task(&NFS_SERVER(inode)->uoc_rpcwaitq, task);
127 ret = false;
128 }
129
130 return ret;
131}
132EXPORT_SYMBOL_GPL(nfs_async_iocounter_wait);
133
118/* 134/*
119 * nfs_page_group_lock - lock the head of the page group 135 * nfs_page_group_lock - lock the head of the page group
120 * @req - request in group that is to be locked 136 * @req - request in group that is to be locked
@@ -398,8 +414,11 @@ static void nfs_clear_request(struct nfs_page *req)
398 req->wb_page = NULL; 414 req->wb_page = NULL;
399 } 415 }
400 if (l_ctx != NULL) { 416 if (l_ctx != NULL) {
401 if (atomic_dec_and_test(&l_ctx->io_count)) 417 if (atomic_dec_and_test(&l_ctx->io_count)) {
402 wake_up_atomic_t(&l_ctx->io_count); 418 wake_up_atomic_t(&l_ctx->io_count);
419 if (test_bit(NFS_CONTEXT_UNLOCK, &ctx->flags))
420 rpc_wake_up(&NFS_SERVER(d_inode(ctx->dentry))->uoc_rpcwaitq);
421 }
403 nfs_put_lock_context(l_ctx); 422 nfs_put_lock_context(l_ctx);
404 req->wb_lock_context = NULL; 423 req->wb_lock_context = NULL;
405 } 424 }
@@ -677,7 +696,8 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
677 const struct nfs_pgio_completion_ops *compl_ops, 696 const struct nfs_pgio_completion_ops *compl_ops,
678 const struct nfs_rw_ops *rw_ops, 697 const struct nfs_rw_ops *rw_ops,
679 size_t bsize, 698 size_t bsize,
680 int io_flags) 699 int io_flags,
700 gfp_t gfp_flags)
681{ 701{
682 struct nfs_pgio_mirror *new; 702 struct nfs_pgio_mirror *new;
683 int i; 703 int i;
@@ -701,7 +721,7 @@ void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
701 /* until we have a request, we don't have an lseg and no 721 /* until we have a request, we don't have an lseg and no
702 * idea how many mirrors there will be */ 722 * idea how many mirrors there will be */
703 new = kcalloc(NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX, 723 new = kcalloc(NFS_PAGEIO_DESCRIPTOR_MIRROR_MAX,
704 sizeof(struct nfs_pgio_mirror), GFP_KERNEL); 724 sizeof(struct nfs_pgio_mirror), gfp_flags);
705 desc->pg_mirrors_dynamic = new; 725 desc->pg_mirrors_dynamic = new;
706 desc->pg_mirrors = new; 726 desc->pg_mirrors = new;
707 727
@@ -754,13 +774,24 @@ int nfs_generic_pgio(struct nfs_pageio_descriptor *desc,
754 *last_page; 774 *last_page;
755 struct list_head *head = &mirror->pg_list; 775 struct list_head *head = &mirror->pg_list;
756 struct nfs_commit_info cinfo; 776 struct nfs_commit_info cinfo;
777 struct nfs_page_array *pg_array = &hdr->page_array;
757 unsigned int pagecount, pageused; 778 unsigned int pagecount, pageused;
779 gfp_t gfp_flags = GFP_KERNEL;
758 780
759 pagecount = nfs_page_array_len(mirror->pg_base, mirror->pg_count); 781 pagecount = nfs_page_array_len(mirror->pg_base, mirror->pg_count);
760 if (!nfs_pgarray_set(&hdr->page_array, pagecount)) { 782
761 nfs_pgio_error(hdr); 783 if (pagecount <= ARRAY_SIZE(pg_array->page_array))
762 desc->pg_error = -ENOMEM; 784 pg_array->pagevec = pg_array->page_array;
763 return desc->pg_error; 785 else {
786 if (hdr->rw_mode == FMODE_WRITE)
787 gfp_flags = GFP_NOIO;
788 pg_array->pagevec = kcalloc(pagecount, sizeof(struct page *), gfp_flags);
789 if (!pg_array->pagevec) {
790 pg_array->npages = 0;
791 nfs_pgio_error(hdr);
792 desc->pg_error = -ENOMEM;
793 return desc->pg_error;
794 }
764 } 795 }
765 796
766 nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq); 797 nfs_init_cinfo(&cinfo, desc->pg_inode, desc->pg_dreq);
@@ -1256,8 +1287,10 @@ void nfs_pageio_cond_complete(struct nfs_pageio_descriptor *desc, pgoff_t index)
1256 mirror = &desc->pg_mirrors[midx]; 1287 mirror = &desc->pg_mirrors[midx];
1257 if (!list_empty(&mirror->pg_list)) { 1288 if (!list_empty(&mirror->pg_list)) {
1258 prev = nfs_list_entry(mirror->pg_list.prev); 1289 prev = nfs_list_entry(mirror->pg_list.prev);
1259 if (index != prev->wb_index + 1) 1290 if (index != prev->wb_index + 1) {
1260 nfs_pageio_complete_mirror(desc, midx); 1291 nfs_pageio_complete(desc);
1292 break;
1293 }
1261 } 1294 }
1262 } 1295 }
1263} 1296}
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index dd042498ce7c..adc6ec28d4b5 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -322,9 +322,15 @@ pnfs_set_plh_return_info(struct pnfs_layout_hdr *lo, enum pnfs_iomode iomode,
322static void 322static void
323pnfs_clear_layoutreturn_info(struct pnfs_layout_hdr *lo) 323pnfs_clear_layoutreturn_info(struct pnfs_layout_hdr *lo)
324{ 324{
325 struct pnfs_layout_segment *lseg;
325 lo->plh_return_iomode = 0; 326 lo->plh_return_iomode = 0;
326 lo->plh_return_seq = 0; 327 lo->plh_return_seq = 0;
327 clear_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags); 328 clear_bit(NFS_LAYOUT_RETURN_REQUESTED, &lo->plh_flags);
329 list_for_each_entry(lseg, &lo->plh_segs, pls_list) {
330 if (!test_bit(NFS_LSEG_LAYOUTRETURN, &lseg->pls_flags))
331 continue;
332 pnfs_set_plh_return_info(lo, lseg->pls_range.iomode, 0);
333 }
328} 334}
329 335
330static void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo) 336static void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo)
@@ -367,9 +373,9 @@ pnfs_mark_layout_stateid_invalid(struct pnfs_layout_hdr *lo,
367 struct pnfs_layout_segment *lseg, *next; 373 struct pnfs_layout_segment *lseg, *next;
368 374
369 set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags); 375 set_bit(NFS_LAYOUT_INVALID_STID, &lo->plh_flags);
370 pnfs_clear_layoutreturn_info(lo);
371 list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) 376 list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list)
372 pnfs_clear_lseg_state(lseg, lseg_list); 377 pnfs_clear_lseg_state(lseg, lseg_list);
378 pnfs_clear_layoutreturn_info(lo);
373 pnfs_free_returned_lsegs(lo, lseg_list, &range, 0); 379 pnfs_free_returned_lsegs(lo, lseg_list, &range, 0);
374 if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags) && 380 if (test_bit(NFS_LAYOUT_RETURN, &lo->plh_flags) &&
375 !test_and_set_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags)) 381 !test_and_set_bit(NFS_LAYOUT_RETURN_LOCK, &lo->plh_flags))
@@ -563,7 +569,6 @@ pnfs_put_lseg_locked(struct pnfs_layout_segment *lseg)
563 } 569 }
564 } 570 }
565} 571}
566EXPORT_SYMBOL_GPL(pnfs_put_lseg_locked);
567 572
568/* 573/*
569 * is l2 fully contained in l1? 574 * is l2 fully contained in l1?
@@ -728,6 +733,7 @@ pnfs_destroy_layout(struct nfs_inode *nfsi)
728 pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RW_FAILED); 733 pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RW_FAILED);
729 spin_unlock(&nfsi->vfs_inode.i_lock); 734 spin_unlock(&nfsi->vfs_inode.i_lock);
730 pnfs_free_lseg_list(&tmp_list); 735 pnfs_free_lseg_list(&tmp_list);
736 nfs_commit_inode(&nfsi->vfs_inode, 0);
731 pnfs_put_layout_hdr(lo); 737 pnfs_put_layout_hdr(lo);
732 } else 738 } else
733 spin_unlock(&nfsi->vfs_inode.i_lock); 739 spin_unlock(&nfsi->vfs_inode.i_lock);
@@ -1209,7 +1215,6 @@ out:
1209 dprintk("<-- %s status: %d\n", __func__, status); 1215 dprintk("<-- %s status: %d\n", __func__, status);
1210 return status; 1216 return status;
1211} 1217}
1212EXPORT_SYMBOL_GPL(_pnfs_return_layout);
1213 1218
1214int 1219int
1215pnfs_commit_and_return_layout(struct inode *inode) 1220pnfs_commit_and_return_layout(struct inode *inode)
@@ -1991,6 +1996,8 @@ out_forget:
1991 spin_unlock(&ino->i_lock); 1996 spin_unlock(&ino->i_lock);
1992 lseg->pls_layout = lo; 1997 lseg->pls_layout = lo;
1993 NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg); 1998 NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
1999 if (!pnfs_layout_is_valid(lo))
2000 nfs_commit_inode(ino, 0);
1994 return ERR_PTR(-EAGAIN); 2001 return ERR_PTR(-EAGAIN);
1995} 2002}
1996 2003
@@ -2051,9 +2058,11 @@ void pnfs_error_mark_layout_for_return(struct inode *inode,
2051 bool return_now = false; 2058 bool return_now = false;
2052 2059
2053 spin_lock(&inode->i_lock); 2060 spin_lock(&inode->i_lock);
2061 if (!pnfs_layout_is_valid(lo)) {
2062 spin_unlock(&inode->i_lock);
2063 return;
2064 }
2054 pnfs_set_plh_return_info(lo, range.iomode, 0); 2065 pnfs_set_plh_return_info(lo, range.iomode, 0);
2055 /* Block LAYOUTGET */
2056 set_bit(NFS_LAYOUT_RETURN, &lo->plh_flags);
2057 /* 2066 /*
2058 * mark all matching lsegs so that we are sure to have no live 2067 * mark all matching lsegs so that we are sure to have no live
2059 * segments at hand when sending layoutreturn. See pnfs_put_lseg() 2068 * segments at hand when sending layoutreturn. See pnfs_put_lseg()
@@ -2075,10 +2084,22 @@ void pnfs_error_mark_layout_for_return(struct inode *inode,
2075EXPORT_SYMBOL_GPL(pnfs_error_mark_layout_for_return); 2084EXPORT_SYMBOL_GPL(pnfs_error_mark_layout_for_return);
2076 2085
2077void 2086void
2087pnfs_generic_pg_check_layout(struct nfs_pageio_descriptor *pgio)
2088{
2089 if (pgio->pg_lseg == NULL ||
2090 test_bit(NFS_LSEG_VALID, &pgio->pg_lseg->pls_flags))
2091 return;
2092 pnfs_put_lseg(pgio->pg_lseg);
2093 pgio->pg_lseg = NULL;
2094}
2095EXPORT_SYMBOL_GPL(pnfs_generic_pg_check_layout);
2096
2097void
2078pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) 2098pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
2079{ 2099{
2080 u64 rd_size = req->wb_bytes; 2100 u64 rd_size = req->wb_bytes;
2081 2101
2102 pnfs_generic_pg_check_layout(pgio);
2082 if (pgio->pg_lseg == NULL) { 2103 if (pgio->pg_lseg == NULL) {
2083 if (pgio->pg_dreq == NULL) 2104 if (pgio->pg_dreq == NULL)
2084 rd_size = i_size_read(pgio->pg_inode) - req_offset(req); 2105 rd_size = i_size_read(pgio->pg_inode) - req_offset(req);
@@ -2109,6 +2130,7 @@ void
2109pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, 2130pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio,
2110 struct nfs_page *req, u64 wb_size) 2131 struct nfs_page *req, u64 wb_size)
2111{ 2132{
2133 pnfs_generic_pg_check_layout(pgio);
2112 if (pgio->pg_lseg == NULL) { 2134 if (pgio->pg_lseg == NULL) {
2113 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 2135 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
2114 req->wb_context, 2136 req->wb_context,
@@ -2277,8 +2299,20 @@ pnfs_do_write(struct nfs_pageio_descriptor *desc,
2277 enum pnfs_try_status trypnfs; 2299 enum pnfs_try_status trypnfs;
2278 2300
2279 trypnfs = pnfs_try_to_write_data(hdr, call_ops, lseg, how); 2301 trypnfs = pnfs_try_to_write_data(hdr, call_ops, lseg, how);
2280 if (trypnfs == PNFS_NOT_ATTEMPTED) 2302 switch (trypnfs) {
2303 case PNFS_NOT_ATTEMPTED:
2281 pnfs_write_through_mds(desc, hdr); 2304 pnfs_write_through_mds(desc, hdr);
2305 case PNFS_ATTEMPTED:
2306 break;
2307 case PNFS_TRY_AGAIN:
2308 /* cleanup hdr and prepare to redo pnfs */
2309 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
2310 struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc);
2311 list_splice_init(&hdr->pages, &mirror->pg_list);
2312 mirror->pg_recoalesce = 1;
2313 }
2314 hdr->mds_ops->rpc_release(hdr);
2315 }
2282} 2316}
2283 2317
2284static void pnfs_writehdr_free(struct nfs_pgio_header *hdr) 2318static void pnfs_writehdr_free(struct nfs_pgio_header *hdr)
@@ -2408,10 +2442,20 @@ pnfs_do_read(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr)
2408 enum pnfs_try_status trypnfs; 2442 enum pnfs_try_status trypnfs;
2409 2443
2410 trypnfs = pnfs_try_to_read_data(hdr, call_ops, lseg); 2444 trypnfs = pnfs_try_to_read_data(hdr, call_ops, lseg);
2411 if (trypnfs == PNFS_TRY_AGAIN) 2445 switch (trypnfs) {
2412 pnfs_read_resend_pnfs(hdr); 2446 case PNFS_NOT_ATTEMPTED:
2413 if (trypnfs == PNFS_NOT_ATTEMPTED || hdr->task.tk_status)
2414 pnfs_read_through_mds(desc, hdr); 2447 pnfs_read_through_mds(desc, hdr);
2448 case PNFS_ATTEMPTED:
2449 break;
2450 case PNFS_TRY_AGAIN:
2451 /* cleanup hdr and prepare to redo pnfs */
2452 if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) {
2453 struct nfs_pgio_mirror *mirror = nfs_pgio_current_mirror(desc);
2454 list_splice_init(&hdr->pages, &mirror->pg_list);
2455 mirror->pg_recoalesce = 1;
2456 }
2457 hdr->mds_ops->rpc_release(hdr);
2458 }
2415} 2459}
2416 2460
2417static void pnfs_readhdr_free(struct nfs_pgio_header *hdr) 2461static void pnfs_readhdr_free(struct nfs_pgio_header *hdr)
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 590e1e35781f..2d05b756a8d6 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -173,14 +173,9 @@ struct pnfs_layoutdriver_type {
173 gfp_t gfp_flags); 173 gfp_t gfp_flags);
174 174
175 int (*prepare_layoutreturn) (struct nfs4_layoutreturn_args *); 175 int (*prepare_layoutreturn) (struct nfs4_layoutreturn_args *);
176 void (*encode_layoutreturn) (struct xdr_stream *xdr,
177 const struct nfs4_layoutreturn_args *args);
178 176
179 void (*cleanup_layoutcommit) (struct nfs4_layoutcommit_data *data); 177 void (*cleanup_layoutcommit) (struct nfs4_layoutcommit_data *data);
180 int (*prepare_layoutcommit) (struct nfs4_layoutcommit_args *args); 178 int (*prepare_layoutcommit) (struct nfs4_layoutcommit_args *args);
181 void (*encode_layoutcommit) (struct pnfs_layout_hdr *lo,
182 struct xdr_stream *xdr,
183 const struct nfs4_layoutcommit_args *args);
184 int (*prepare_layoutstats) (struct nfs42_layoutstat_args *args); 179 int (*prepare_layoutstats) (struct nfs42_layoutstat_args *args);
185}; 180};
186 181
@@ -239,6 +234,7 @@ void pnfs_put_lseg_locked(struct pnfs_layout_segment *lseg);
239 234
240void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, struct nfs_fsinfo *); 235void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, struct nfs_fsinfo *);
241void unset_pnfs_layoutdriver(struct nfs_server *); 236void unset_pnfs_layoutdriver(struct nfs_server *);
237void pnfs_generic_pg_check_layout(struct nfs_pageio_descriptor *pgio);
242void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page *); 238void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page *);
243int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc); 239int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc);
244void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, 240void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio,
diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
index 7250b95549ec..d40755a0984b 100644
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c
@@ -217,7 +217,14 @@ pnfs_generic_alloc_ds_commits(struct nfs_commit_info *cinfo,
217 for (i = 0; i < fl_cinfo->nbuckets; i++, bucket++) { 217 for (i = 0; i < fl_cinfo->nbuckets; i++, bucket++) {
218 if (list_empty(&bucket->committing)) 218 if (list_empty(&bucket->committing))
219 continue; 219 continue;
220 data = nfs_commitdata_alloc(); 220 /*
221 * If the layout segment is invalid, then let
222 * pnfs_generic_retry_commit() clean up the bucket.
223 */
224 if (bucket->clseg && !pnfs_is_valid_lseg(bucket->clseg) &&
225 !test_bit(NFS_LSEG_LAYOUTRETURN, &bucket->clseg->pls_flags))
226 break;
227 data = nfs_commitdata_alloc(false);
221 if (!data) 228 if (!data)
222 break; 229 break;
223 data->ds_commit_index = i; 230 data->ds_commit_index = i;
@@ -283,16 +290,10 @@ pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
283 unsigned int nreq = 0; 290 unsigned int nreq = 0;
284 291
285 if (!list_empty(mds_pages)) { 292 if (!list_empty(mds_pages)) {
286 data = nfs_commitdata_alloc(); 293 data = nfs_commitdata_alloc(true);
287 if (data != NULL) { 294 data->ds_commit_index = -1;
288 data->ds_commit_index = -1; 295 list_add(&data->pages, &list);
289 list_add(&data->pages, &list); 296 nreq++;
290 nreq++;
291 } else {
292 nfs_retry_commit(mds_pages, NULL, cinfo, 0);
293 pnfs_generic_retry_commit(cinfo, 0);
294 return -ENOMEM;
295 }
296 } 297 }
297 298
298 nreq += pnfs_generic_alloc_ds_commits(cinfo, &list); 299 nreq += pnfs_generic_alloc_ds_commits(cinfo, &list);
@@ -619,7 +620,6 @@ void nfs4_pnfs_v3_ds_connect_unload(void)
619 get_v3_ds_connect = NULL; 620 get_v3_ds_connect = NULL;
620 } 621 }
621} 622}
622EXPORT_SYMBOL_GPL(nfs4_pnfs_v3_ds_connect_unload);
623 623
624static int _nfs4_pnfs_v3_ds_connect(struct nfs_server *mds_srv, 624static int _nfs4_pnfs_v3_ds_connect(struct nfs_server *mds_srv,
625 struct nfs4_pnfs_ds *ds, 625 struct nfs4_pnfs_ds *ds,
diff --git a/fs/nfs/proc.c b/fs/nfs/proc.c
index b7bca8303989..9872cf676a50 100644
--- a/fs/nfs/proc.c
+++ b/fs/nfs/proc.c
@@ -638,7 +638,7 @@ nfs_proc_lock(struct file *filp, int cmd, struct file_lock *fl)
638{ 638{
639 struct inode *inode = file_inode(filp); 639 struct inode *inode = file_inode(filp);
640 640
641 return nlmclnt_proc(NFS_SERVER(inode)->nlm_host, cmd, fl); 641 return nlmclnt_proc(NFS_SERVER(inode)->nlm_host, cmd, fl, NULL);
642} 642}
643 643
644/* Helper functions for NFS lock bounds checking */ 644/* Helper functions for NFS lock bounds checking */
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index defc9233e985..a8421d9dab6a 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -35,7 +35,11 @@ static struct kmem_cache *nfs_rdata_cachep;
35 35
36static struct nfs_pgio_header *nfs_readhdr_alloc(void) 36static struct nfs_pgio_header *nfs_readhdr_alloc(void)
37{ 37{
38 return kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL); 38 struct nfs_pgio_header *p = kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL);
39
40 if (p)
41 p->rw_mode = FMODE_READ;
42 return p;
39} 43}
40 44
41static void nfs_readhdr_free(struct nfs_pgio_header *rhdr) 45static void nfs_readhdr_free(struct nfs_pgio_header *rhdr)
@@ -64,7 +68,7 @@ void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
64 pg_ops = server->pnfs_curr_ld->pg_read_ops; 68 pg_ops = server->pnfs_curr_ld->pg_read_ops;
65#endif 69#endif
66 nfs_pageio_init(pgio, inode, pg_ops, compl_ops, &nfs_rw_read_ops, 70 nfs_pageio_init(pgio, inode, pg_ops, compl_ops, &nfs_rw_read_ops,
67 server->rsize, 0); 71 server->rsize, 0, GFP_KERNEL);
68} 72}
69EXPORT_SYMBOL_GPL(nfs_pageio_init_read); 73EXPORT_SYMBOL_GPL(nfs_pageio_init_read);
70 74
@@ -451,7 +455,6 @@ void nfs_destroy_readpagecache(void)
451} 455}
452 456
453static const struct nfs_rw_ops nfs_rw_read_ops = { 457static const struct nfs_rw_ops nfs_rw_read_ops = {
454 .rw_mode = FMODE_READ,
455 .rw_alloc_header = nfs_readhdr_alloc, 458 .rw_alloc_header = nfs_readhdr_alloc,
456 .rw_free_header = nfs_readhdr_free, 459 .rw_free_header = nfs_readhdr_free,
457 .rw_done = nfs_readpage_done, 460 .rw_done = nfs_readpage_done,
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index cc341fc7fd44..db7ba542559e 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -60,14 +60,28 @@ static mempool_t *nfs_wdata_mempool;
60static struct kmem_cache *nfs_cdata_cachep; 60static struct kmem_cache *nfs_cdata_cachep;
61static mempool_t *nfs_commit_mempool; 61static mempool_t *nfs_commit_mempool;
62 62
63struct nfs_commit_data *nfs_commitdata_alloc(void) 63struct nfs_commit_data *nfs_commitdata_alloc(bool never_fail)
64{ 64{
65 struct nfs_commit_data *p = mempool_alloc(nfs_commit_mempool, GFP_NOIO); 65 struct nfs_commit_data *p;
66 66
67 if (p) { 67 if (never_fail)
68 memset(p, 0, sizeof(*p)); 68 p = mempool_alloc(nfs_commit_mempool, GFP_NOIO);
69 INIT_LIST_HEAD(&p->pages); 69 else {
70 /* It is OK to do some reclaim, not no safe to wait
71 * for anything to be returned to the pool.
72 * mempool_alloc() cannot handle that particular combination,
73 * so we need two separate attempts.
74 */
75 p = mempool_alloc(nfs_commit_mempool, GFP_NOWAIT);
76 if (!p)
77 p = kmem_cache_alloc(nfs_cdata_cachep, GFP_NOIO |
78 __GFP_NOWARN | __GFP_NORETRY);
79 if (!p)
80 return NULL;
70 } 81 }
82
83 memset(p, 0, sizeof(*p));
84 INIT_LIST_HEAD(&p->pages);
71 return p; 85 return p;
72} 86}
73EXPORT_SYMBOL_GPL(nfs_commitdata_alloc); 87EXPORT_SYMBOL_GPL(nfs_commitdata_alloc);
@@ -82,8 +96,10 @@ static struct nfs_pgio_header *nfs_writehdr_alloc(void)
82{ 96{
83 struct nfs_pgio_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO); 97 struct nfs_pgio_header *p = mempool_alloc(nfs_wdata_mempool, GFP_NOIO);
84 98
85 if (p) 99 if (p) {
86 memset(p, 0, sizeof(*p)); 100 memset(p, 0, sizeof(*p));
101 p->rw_mode = FMODE_WRITE;
102 }
87 return p; 103 return p;
88} 104}
89 105
@@ -547,9 +563,21 @@ static void nfs_write_error_remove_page(struct nfs_page *req)
547{ 563{
548 nfs_unlock_request(req); 564 nfs_unlock_request(req);
549 nfs_end_page_writeback(req); 565 nfs_end_page_writeback(req);
550 nfs_release_request(req);
551 generic_error_remove_page(page_file_mapping(req->wb_page), 566 generic_error_remove_page(page_file_mapping(req->wb_page),
552 req->wb_page); 567 req->wb_page);
568 nfs_release_request(req);
569}
570
571static bool
572nfs_error_is_fatal_on_server(int err)
573{
574 switch (err) {
575 case 0:
576 case -ERESTARTSYS:
577 case -EINTR:
578 return false;
579 }
580 return nfs_error_is_fatal(err);
553} 581}
554 582
555/* 583/*
@@ -557,8 +585,7 @@ static void nfs_write_error_remove_page(struct nfs_page *req)
557 * May return an error if the user signalled nfs_wait_on_request(). 585 * May return an error if the user signalled nfs_wait_on_request().
558 */ 586 */
559static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio, 587static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
560 struct page *page, bool nonblock, 588 struct page *page, bool nonblock)
561 bool launder)
562{ 589{
563 struct nfs_page *req; 590 struct nfs_page *req;
564 int ret = 0; 591 int ret = 0;
@@ -574,19 +601,19 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
574 WARN_ON_ONCE(test_bit(PG_CLEAN, &req->wb_flags)); 601 WARN_ON_ONCE(test_bit(PG_CLEAN, &req->wb_flags));
575 602
576 ret = 0; 603 ret = 0;
604 /* If there is a fatal error that covers this write, just exit */
605 if (nfs_error_is_fatal_on_server(req->wb_context->error))
606 goto out_launder;
607
577 if (!nfs_pageio_add_request(pgio, req)) { 608 if (!nfs_pageio_add_request(pgio, req)) {
578 ret = pgio->pg_error; 609 ret = pgio->pg_error;
579 /* 610 /*
580 * Remove the problematic req upon fatal errors 611 * Remove the problematic req upon fatal errors on the server
581 * in launder case, while other dirty pages can
582 * still be around until they get flushed.
583 */ 612 */
584 if (nfs_error_is_fatal(ret)) { 613 if (nfs_error_is_fatal(ret)) {
585 nfs_context_set_write_error(req->wb_context, ret); 614 nfs_context_set_write_error(req->wb_context, ret);
586 if (launder) { 615 if (nfs_error_is_fatal_on_server(ret))
587 nfs_write_error_remove_page(req); 616 goto out_launder;
588 goto out;
589 }
590 } 617 }
591 nfs_redirty_request(req); 618 nfs_redirty_request(req);
592 ret = -EAGAIN; 619 ret = -EAGAIN;
@@ -595,16 +622,18 @@ static int nfs_page_async_flush(struct nfs_pageio_descriptor *pgio,
595 NFSIOS_WRITEPAGES, 1); 622 NFSIOS_WRITEPAGES, 1);
596out: 623out:
597 return ret; 624 return ret;
625out_launder:
626 nfs_write_error_remove_page(req);
627 return ret;
598} 628}
599 629
600static int nfs_do_writepage(struct page *page, struct writeback_control *wbc, 630static int nfs_do_writepage(struct page *page, struct writeback_control *wbc,
601 struct nfs_pageio_descriptor *pgio, bool launder) 631 struct nfs_pageio_descriptor *pgio)
602{ 632{
603 int ret; 633 int ret;
604 634
605 nfs_pageio_cond_complete(pgio, page_index(page)); 635 nfs_pageio_cond_complete(pgio, page_index(page));
606 ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE, 636 ret = nfs_page_async_flush(pgio, page, wbc->sync_mode == WB_SYNC_NONE);
607 launder);
608 if (ret == -EAGAIN) { 637 if (ret == -EAGAIN) {
609 redirty_page_for_writepage(wbc, page); 638 redirty_page_for_writepage(wbc, page);
610 ret = 0; 639 ret = 0;
@@ -616,8 +645,7 @@ static int nfs_do_writepage(struct page *page, struct writeback_control *wbc,
616 * Write an mmapped page to the server. 645 * Write an mmapped page to the server.
617 */ 646 */
618static int nfs_writepage_locked(struct page *page, 647static int nfs_writepage_locked(struct page *page,
619 struct writeback_control *wbc, 648 struct writeback_control *wbc)
620 bool launder)
621{ 649{
622 struct nfs_pageio_descriptor pgio; 650 struct nfs_pageio_descriptor pgio;
623 struct inode *inode = page_file_mapping(page)->host; 651 struct inode *inode = page_file_mapping(page)->host;
@@ -626,7 +654,7 @@ static int nfs_writepage_locked(struct page *page,
626 nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE); 654 nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGE);
627 nfs_pageio_init_write(&pgio, inode, 0, 655 nfs_pageio_init_write(&pgio, inode, 0,
628 false, &nfs_async_write_completion_ops); 656 false, &nfs_async_write_completion_ops);
629 err = nfs_do_writepage(page, wbc, &pgio, launder); 657 err = nfs_do_writepage(page, wbc, &pgio);
630 nfs_pageio_complete(&pgio); 658 nfs_pageio_complete(&pgio);
631 if (err < 0) 659 if (err < 0)
632 return err; 660 return err;
@@ -639,7 +667,7 @@ int nfs_writepage(struct page *page, struct writeback_control *wbc)
639{ 667{
640 int ret; 668 int ret;
641 669
642 ret = nfs_writepage_locked(page, wbc, false); 670 ret = nfs_writepage_locked(page, wbc);
643 unlock_page(page); 671 unlock_page(page);
644 return ret; 672 return ret;
645} 673}
@@ -648,7 +676,7 @@ static int nfs_writepages_callback(struct page *page, struct writeback_control *
648{ 676{
649 int ret; 677 int ret;
650 678
651 ret = nfs_do_writepage(page, wbc, data, false); 679 ret = nfs_do_writepage(page, wbc, data);
652 unlock_page(page); 680 unlock_page(page);
653 return ret; 681 return ret;
654} 682}
@@ -1367,7 +1395,7 @@ void nfs_pageio_init_write(struct nfs_pageio_descriptor *pgio,
1367 pg_ops = server->pnfs_curr_ld->pg_write_ops; 1395 pg_ops = server->pnfs_curr_ld->pg_write_ops;
1368#endif 1396#endif
1369 nfs_pageio_init(pgio, inode, pg_ops, compl_ops, &nfs_rw_write_ops, 1397 nfs_pageio_init(pgio, inode, pg_ops, compl_ops, &nfs_rw_write_ops,
1370 server->wsize, ioflags); 1398 server->wsize, ioflags, GFP_NOIO);
1371} 1399}
1372EXPORT_SYMBOL_GPL(nfs_pageio_init_write); 1400EXPORT_SYMBOL_GPL(nfs_pageio_init_write);
1373 1401
@@ -1704,50 +1732,14 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how,
1704 if (list_empty(head)) 1732 if (list_empty(head))
1705 return 0; 1733 return 0;
1706 1734
1707 data = nfs_commitdata_alloc(); 1735 data = nfs_commitdata_alloc(true);
1708
1709 if (!data)
1710 goto out_bad;
1711 1736
1712 /* Set up the argument struct */ 1737 /* Set up the argument struct */
1713 nfs_init_commit(data, head, NULL, cinfo); 1738 nfs_init_commit(data, head, NULL, cinfo);
1714 atomic_inc(&cinfo->mds->rpcs_out); 1739 atomic_inc(&cinfo->mds->rpcs_out);
1715 return nfs_initiate_commit(NFS_CLIENT(inode), data, NFS_PROTO(inode), 1740 return nfs_initiate_commit(NFS_CLIENT(inode), data, NFS_PROTO(inode),
1716 data->mds_ops, how, 0); 1741 data->mds_ops, how, 0);
1717 out_bad:
1718 nfs_retry_commit(head, NULL, cinfo, 0);
1719 return -ENOMEM;
1720}
1721
1722int nfs_commit_file(struct file *file, struct nfs_write_verifier *verf)
1723{
1724 struct inode *inode = file_inode(file);
1725 struct nfs_open_context *open;
1726 struct nfs_commit_info cinfo;
1727 struct nfs_page *req;
1728 int ret;
1729
1730 open = get_nfs_open_context(nfs_file_open_context(file));
1731 req = nfs_create_request(open, NULL, NULL, 0, i_size_read(inode));
1732 if (IS_ERR(req)) {
1733 ret = PTR_ERR(req);
1734 goto out_put;
1735 }
1736
1737 nfs_init_cinfo_from_inode(&cinfo, inode);
1738
1739 memcpy(&req->wb_verf, verf, sizeof(struct nfs_write_verifier));
1740 nfs_request_add_commit_list(req, &cinfo);
1741 ret = nfs_commit_inode(inode, FLUSH_SYNC);
1742 if (ret > 0)
1743 ret = 0;
1744
1745 nfs_free_request(req);
1746out_put:
1747 put_nfs_open_context(open);
1748 return ret;
1749} 1742}
1750EXPORT_SYMBOL_GPL(nfs_commit_file);
1751 1743
1752/* 1744/*
1753 * COMMIT call returned 1745 * COMMIT call returned
@@ -1985,7 +1977,7 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
1985/* 1977/*
1986 * Write back all requests on one page - we do this before reading it. 1978 * Write back all requests on one page - we do this before reading it.
1987 */ 1979 */
1988int nfs_wb_single_page(struct inode *inode, struct page *page, bool launder) 1980int nfs_wb_page(struct inode *inode, struct page *page)
1989{ 1981{
1990 loff_t range_start = page_file_offset(page); 1982 loff_t range_start = page_file_offset(page);
1991 loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1); 1983 loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
@@ -2002,7 +1994,7 @@ int nfs_wb_single_page(struct inode *inode, struct page *page, bool launder)
2002 for (;;) { 1994 for (;;) {
2003 wait_on_page_writeback(page); 1995 wait_on_page_writeback(page);
2004 if (clear_page_dirty_for_io(page)) { 1996 if (clear_page_dirty_for_io(page)) {
2005 ret = nfs_writepage_locked(page, &wbc, launder); 1997 ret = nfs_writepage_locked(page, &wbc);
2006 if (ret < 0) 1998 if (ret < 0)
2007 goto out_error; 1999 goto out_error;
2008 continue; 2000 continue;
@@ -2107,7 +2099,6 @@ void nfs_destroy_writepagecache(void)
2107} 2099}
2108 2100
2109static const struct nfs_rw_ops nfs_rw_write_ops = { 2101static const struct nfs_rw_ops nfs_rw_write_ops = {
2110 .rw_mode = FMODE_WRITE,
2111 .rw_alloc_header = nfs_writehdr_alloc, 2102 .rw_alloc_header = nfs_writehdr_alloc,
2112 .rw_free_header = nfs_writehdr_free, 2103 .rw_free_header = nfs_writehdr_free,
2113 .rw_done = nfs_writeback_done, 2104 .rw_done = nfs_writeback_done,
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 26488b419965..0ad325ed71e8 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -909,6 +909,8 @@ static inline struct file *get_file(struct file *f)
909#define FL_OFDLCK 1024 /* lock is "owned" by struct file */ 909#define FL_OFDLCK 1024 /* lock is "owned" by struct file */
910#define FL_LAYOUT 2048 /* outstanding pNFS layout */ 910#define FL_LAYOUT 2048 /* outstanding pNFS layout */
911 911
912#define FL_CLOSE_POSIX (FL_POSIX | FL_CLOSE)
913
912/* 914/*
913 * Special return value from posix_lock_file() and vfs_lock_file() for 915 * Special return value from posix_lock_file() and vfs_lock_file() for
914 * asynchronous locking. 916 * asynchronous locking.
diff --git a/include/linux/lockd/bind.h b/include/linux/lockd/bind.h
index 140edab64446..05728396a1a1 100644
--- a/include/linux/lockd/bind.h
+++ b/include/linux/lockd/bind.h
@@ -18,6 +18,7 @@
18 18
19/* Dummy declarations */ 19/* Dummy declarations */
20struct svc_rqst; 20struct svc_rqst;
21struct rpc_task;
21 22
22/* 23/*
23 * This is the set of functions for lockd->nfsd communication 24 * This is the set of functions for lockd->nfsd communication
@@ -43,6 +44,7 @@ struct nlmclnt_initdata {
43 u32 nfs_version; 44 u32 nfs_version;
44 int noresvport; 45 int noresvport;
45 struct net *net; 46 struct net *net;
47 const struct nlmclnt_operations *nlmclnt_ops;
46}; 48};
47 49
48/* 50/*
@@ -52,8 +54,26 @@ struct nlmclnt_initdata {
52extern struct nlm_host *nlmclnt_init(const struct nlmclnt_initdata *nlm_init); 54extern struct nlm_host *nlmclnt_init(const struct nlmclnt_initdata *nlm_init);
53extern void nlmclnt_done(struct nlm_host *host); 55extern void nlmclnt_done(struct nlm_host *host);
54 56
55extern int nlmclnt_proc(struct nlm_host *host, int cmd, 57/*
56 struct file_lock *fl); 58 * NLM client operations provide a means to modify RPC processing of NLM
59 * requests. Callbacks receive a pointer to data passed into the call to
60 * nlmclnt_proc().
61 */
62struct nlmclnt_operations {
63 /* Called on successful allocation of nlm_rqst, use for allocation or
64 * reference counting. */
65 void (*nlmclnt_alloc_call)(void *);
66
67 /* Called in rpc_task_prepare for unlock. A return value of true
68 * indicates the callback has put the task to sleep on a waitqueue
69 * and NLM should not call rpc_call_start(). */
70 bool (*nlmclnt_unlock_prepare)(struct rpc_task*, void *);
71
72 /* Called when the nlm_rqst is freed, callbacks should clean up here */
73 void (*nlmclnt_release_call)(void *);
74};
75
76extern int nlmclnt_proc(struct nlm_host *host, int cmd, struct file_lock *fl, void *data);
57extern int lockd_up(struct net *net); 77extern int lockd_up(struct net *net);
58extern void lockd_down(struct net *net); 78extern void lockd_down(struct net *net);
59 79
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index b37dee3acaba..41f7b6a04d69 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -69,6 +69,7 @@ struct nlm_host {
69 char *h_addrbuf; /* address eyecatcher */ 69 char *h_addrbuf; /* address eyecatcher */
70 struct net *net; /* host net */ 70 struct net *net; /* host net */
71 char nodename[UNX_MAXNODENAME + 1]; 71 char nodename[UNX_MAXNODENAME + 1];
72 const struct nlmclnt_operations *h_nlmclnt_ops; /* Callback ops for NLM users */
72}; 73};
73 74
74/* 75/*
@@ -142,6 +143,7 @@ struct nlm_rqst {
142 struct nlm_block * a_block; 143 struct nlm_block * a_block;
143 unsigned int a_retries; /* Retry count */ 144 unsigned int a_retries; /* Retry count */
144 u8 a_owner[NLMCLNT_OHSIZE]; 145 u8 a_owner[NLMCLNT_OHSIZE];
146 void * a_callback_data; /* sent to nlmclnt_operations callbacks */
145}; 147};
146 148
147/* 149/*
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 287f34161086..bb0eb2c9acca 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -76,6 +76,7 @@ struct nfs_open_context {
76#define NFS_CONTEXT_ERROR_WRITE (0) 76#define NFS_CONTEXT_ERROR_WRITE (0)
77#define NFS_CONTEXT_RESEND_WRITES (1) 77#define NFS_CONTEXT_RESEND_WRITES (1)
78#define NFS_CONTEXT_BAD (2) 78#define NFS_CONTEXT_BAD (2)
79#define NFS_CONTEXT_UNLOCK (3)
79 int error; 80 int error;
80 81
81 struct list_head list; 82 struct list_head list;
@@ -499,25 +500,13 @@ extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned
499 */ 500 */
500extern int nfs_sync_inode(struct inode *inode); 501extern int nfs_sync_inode(struct inode *inode);
501extern int nfs_wb_all(struct inode *inode); 502extern int nfs_wb_all(struct inode *inode);
502extern int nfs_wb_single_page(struct inode *inode, struct page *page, bool launder); 503extern int nfs_wb_page(struct inode *inode, struct page *page);
503extern int nfs_wb_page_cancel(struct inode *inode, struct page* page); 504extern int nfs_wb_page_cancel(struct inode *inode, struct page* page);
504extern int nfs_commit_inode(struct inode *, int); 505extern int nfs_commit_inode(struct inode *, int);
505extern struct nfs_commit_data *nfs_commitdata_alloc(void); 506extern struct nfs_commit_data *nfs_commitdata_alloc(bool never_fail);
506extern void nfs_commit_free(struct nfs_commit_data *data); 507extern void nfs_commit_free(struct nfs_commit_data *data);
507 508
508static inline int 509static inline int
509nfs_wb_launder_page(struct inode *inode, struct page *page)
510{
511 return nfs_wb_single_page(inode, page, true);
512}
513
514static inline int
515nfs_wb_page(struct inode *inode, struct page *page)
516{
517 return nfs_wb_single_page(inode, page, false);
518}
519
520static inline int
521nfs_have_writebacks(struct inode *inode) 510nfs_have_writebacks(struct inode *inode)
522{ 511{
523 return NFS_I(inode)->nrequests != 0; 512 return NFS_I(inode)->nrequests != 0;
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index e1502c55741e..e418a1096662 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -221,6 +221,7 @@ struct nfs_server {
221 u32 mountd_version; 221 u32 mountd_version;
222 unsigned short mountd_port; 222 unsigned short mountd_port;
223 unsigned short mountd_protocol; 223 unsigned short mountd_protocol;
224 struct rpc_wait_queue uoc_rpcwaitq;
224}; 225};
225 226
226/* Server capabilities */ 227/* Server capabilities */
diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h
index 957049f72290..247cc3d3498f 100644
--- a/include/linux/nfs_page.h
+++ b/include/linux/nfs_page.h
@@ -64,7 +64,6 @@ struct nfs_pageio_ops {
64}; 64};
65 65
66struct nfs_rw_ops { 66struct nfs_rw_ops {
67 const fmode_t rw_mode;
68 struct nfs_pgio_header *(*rw_alloc_header)(void); 67 struct nfs_pgio_header *(*rw_alloc_header)(void);
69 void (*rw_free_header)(struct nfs_pgio_header *); 68 void (*rw_free_header)(struct nfs_pgio_header *);
70 int (*rw_done)(struct rpc_task *, struct nfs_pgio_header *, 69 int (*rw_done)(struct rpc_task *, struct nfs_pgio_header *,
@@ -124,7 +123,8 @@ extern void nfs_pageio_init(struct nfs_pageio_descriptor *desc,
124 const struct nfs_pgio_completion_ops *compl_ops, 123 const struct nfs_pgio_completion_ops *compl_ops,
125 const struct nfs_rw_ops *rw_ops, 124 const struct nfs_rw_ops *rw_ops,
126 size_t bsize, 125 size_t bsize,
127 int how); 126 int how,
127 gfp_t gfp_flags);
128extern int nfs_pageio_add_request(struct nfs_pageio_descriptor *, 128extern int nfs_pageio_add_request(struct nfs_pageio_descriptor *,
129 struct nfs_page *); 129 struct nfs_page *);
130extern int nfs_pageio_resend(struct nfs_pageio_descriptor *, 130extern int nfs_pageio_resend(struct nfs_pageio_descriptor *,
@@ -141,6 +141,7 @@ extern int nfs_page_group_lock(struct nfs_page *, bool);
141extern void nfs_page_group_lock_wait(struct nfs_page *); 141extern void nfs_page_group_lock_wait(struct nfs_page *);
142extern void nfs_page_group_unlock(struct nfs_page *); 142extern void nfs_page_group_unlock(struct nfs_page *);
143extern bool nfs_page_group_sync_on_bit(struct nfs_page *, unsigned int); 143extern bool nfs_page_group_sync_on_bit(struct nfs_page *, unsigned int);
144extern bool nfs_async_iocounter_wait(struct rpc_task *, struct nfs_lock_context *);
144 145
145/* 146/*
146 * Lock the page of an asynchronous request 147 * Lock the page of an asynchronous request
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 348f7c158084..b28c83475ee8 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1383,6 +1383,7 @@ struct nfs42_copy_res {
1383 struct nfs42_write_res write_res; 1383 struct nfs42_write_res write_res;
1384 bool consecutive; 1384 bool consecutive;
1385 bool synchronous; 1385 bool synchronous;
1386 struct nfs_commitres commit_res;
1386}; 1387};
1387 1388
1388struct nfs42_seek_args { 1389struct nfs42_seek_args {
@@ -1427,6 +1428,7 @@ struct nfs_pgio_header {
1427 struct list_head pages; 1428 struct list_head pages;
1428 struct nfs_page *req; 1429 struct nfs_page *req;
1429 struct nfs_writeverf verf; /* Used for writes */ 1430 struct nfs_writeverf verf; /* Used for writes */
1431 fmode_t rw_mode;
1430 struct pnfs_layout_segment *lseg; 1432 struct pnfs_layout_segment *lseg;
1431 loff_t io_start; 1433 loff_t io_start;
1432 const struct rpc_call_ops *mds_ops; 1434 const struct rpc_call_ops *mds_ops;
@@ -1550,6 +1552,7 @@ struct nfs_rpc_ops {
1550 const struct inode_operations *dir_inode_ops; 1552 const struct inode_operations *dir_inode_ops;
1551 const struct inode_operations *file_inode_ops; 1553 const struct inode_operations *file_inode_ops;
1552 const struct file_operations *file_ops; 1554 const struct file_operations *file_ops;
1555 const struct nlmclnt_operations *nlmclnt_ops;
1553 1556
1554 int (*getroot) (struct nfs_server *, struct nfs_fh *, 1557 int (*getroot) (struct nfs_server *, struct nfs_fh *,
1555 struct nfs_fsinfo *); 1558 struct nfs_fsinfo *);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 52da3ce54bb5..b5cb921775a0 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -1042,8 +1042,6 @@ struct rpc_task *rpc_run_task(const struct rpc_task_setup *task_setup_data)
1042 struct rpc_task *task; 1042 struct rpc_task *task;
1043 1043
1044 task = rpc_new_task(task_setup_data); 1044 task = rpc_new_task(task_setup_data);
1045 if (IS_ERR(task))
1046 goto out;
1047 1045
1048 rpc_task_set_client(task, task_setup_data->rpc_client); 1046 rpc_task_set_client(task, task_setup_data->rpc_client);
1049 rpc_task_set_rpc_message(task, task_setup_data->rpc_message); 1047 rpc_task_set_rpc_message(task, task_setup_data->rpc_message);
@@ -1053,7 +1051,6 @@ struct rpc_task *rpc_run_task(const struct rpc_task_setup *task_setup_data)
1053 1051
1054 atomic_inc(&task->tk_count); 1052 atomic_inc(&task->tk_count);
1055 rpc_execute(task); 1053 rpc_execute(task);
1056out:
1057 return task; 1054 return task;
1058} 1055}
1059EXPORT_SYMBOL_GPL(rpc_run_task); 1056EXPORT_SYMBOL_GPL(rpc_run_task);
@@ -1140,10 +1137,6 @@ struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req)
1140 * Create an rpc_task to send the data 1137 * Create an rpc_task to send the data
1141 */ 1138 */
1142 task = rpc_new_task(&task_setup_data); 1139 task = rpc_new_task(&task_setup_data);
1143 if (IS_ERR(task)) {
1144 xprt_free_bc_request(req);
1145 goto out;
1146 }
1147 task->tk_rqstp = req; 1140 task->tk_rqstp = req;
1148 1141
1149 /* 1142 /*
@@ -1158,7 +1151,6 @@ struct rpc_task *rpc_run_bc_task(struct rpc_rqst *req)
1158 WARN_ON_ONCE(atomic_read(&task->tk_count) != 2); 1151 WARN_ON_ONCE(atomic_read(&task->tk_count) != 2);
1159 rpc_execute(task); 1152 rpc_execute(task);
1160 1153
1161out:
1162 dprintk("RPC: rpc_run_bc_task: task= %p\n", task); 1154 dprintk("RPC: rpc_run_bc_task: task= %p\n", task);
1163 return task; 1155 return task;
1164} 1156}
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 5db68b371db2..0cc83839c13c 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -965,11 +965,6 @@ struct rpc_task *rpc_new_task(const struct rpc_task_setup *setup_data)
965 965
966 if (task == NULL) { 966 if (task == NULL) {
967 task = rpc_alloc_task(); 967 task = rpc_alloc_task();
968 if (task == NULL) {
969 rpc_release_calldata(setup_data->callback_ops,
970 setup_data->callback_data);
971 return ERR_PTR(-ENOMEM);
972 }
973 flags = RPC_TASK_DYNAMIC; 968 flags = RPC_TASK_DYNAMIC;
974 } 969 }
975 970
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 1f7082144e01..e34f4ee7f2b6 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -807,7 +807,7 @@ void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p)
807EXPORT_SYMBOL_GPL(xdr_init_decode); 807EXPORT_SYMBOL_GPL(xdr_init_decode);
808 808
809/** 809/**
810 * xdr_init_decode - Initialize an xdr_stream for decoding data. 810 * xdr_init_decode_pages - Initialize an xdr_stream for decoding into pages
811 * @xdr: pointer to xdr_stream struct 811 * @xdr: pointer to xdr_stream struct
812 * @buf: pointer to XDR buffer from which to decode data 812 * @buf: pointer to XDR buffer from which to decode data
813 * @pages: list of pages to decode into 813 * @pages: list of pages to decode into
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index b530a2852ba8..3e63c5e97ebe 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -651,6 +651,7 @@ void xprt_force_disconnect(struct rpc_xprt *xprt)
651 xprt_wake_pending_tasks(xprt, -EAGAIN); 651 xprt_wake_pending_tasks(xprt, -EAGAIN);
652 spin_unlock_bh(&xprt->transport_lock); 652 spin_unlock_bh(&xprt->transport_lock);
653} 653}
654EXPORT_SYMBOL_GPL(xprt_force_disconnect);
654 655
655/** 656/**
656 * xprt_conditional_disconnect - force a transport to disconnect 657 * xprt_conditional_disconnect - force a transport to disconnect
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index a044be2d6ad7..694e9b13ecf0 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -494,7 +494,7 @@ rpcrdma_prepare_hdr_sge(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
494 } 494 }
495 sge->length = len; 495 sge->length = len;
496 496
497 ib_dma_sync_single_for_device(ia->ri_device, sge->addr, 497 ib_dma_sync_single_for_device(rdmab_device(rb), sge->addr,
498 sge->length, DMA_TO_DEVICE); 498 sge->length, DMA_TO_DEVICE);
499 req->rl_send_wr.num_sge++; 499 req->rl_send_wr.num_sge++;
500 return true; 500 return true;
@@ -523,7 +523,7 @@ rpcrdma_prepare_msg_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req,
523 sge[sge_no].addr = rdmab_addr(rb); 523 sge[sge_no].addr = rdmab_addr(rb);
524 sge[sge_no].length = xdr->head[0].iov_len; 524 sge[sge_no].length = xdr->head[0].iov_len;
525 sge[sge_no].lkey = rdmab_lkey(rb); 525 sge[sge_no].lkey = rdmab_lkey(rb);
526 ib_dma_sync_single_for_device(device, sge[sge_no].addr, 526 ib_dma_sync_single_for_device(rdmab_device(rb), sge[sge_no].addr,
527 sge[sge_no].length, DMA_TO_DEVICE); 527 sge[sge_no].length, DMA_TO_DEVICE);
528 528
529 /* If there is a Read chunk, the page list is being handled 529 /* If there is a Read chunk, the page list is being handled
@@ -781,9 +781,11 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst)
781 return 0; 781 return 0;
782 782
783out_err: 783out_err:
784 pr_err("rpcrdma: rpcrdma_marshal_req failed, status %ld\n", 784 if (PTR_ERR(iptr) != -ENOBUFS) {
785 PTR_ERR(iptr)); 785 pr_err("rpcrdma: rpcrdma_marshal_req failed, status %ld\n",
786 r_xprt->rx_stats.failed_marshal_count++; 786 PTR_ERR(iptr));
787 r_xprt->rx_stats.failed_marshal_count++;
788 }
787 return PTR_ERR(iptr); 789 return PTR_ERR(iptr);
788} 790}
789 791
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index c717f5410776..62ecbccd9748 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -66,8 +66,8 @@ static unsigned int xprt_rdma_slot_table_entries = RPCRDMA_DEF_SLOT_TABLE;
66unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE; 66unsigned int xprt_rdma_max_inline_read = RPCRDMA_DEF_INLINE;
67static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE; 67static unsigned int xprt_rdma_max_inline_write = RPCRDMA_DEF_INLINE;
68static unsigned int xprt_rdma_inline_write_padding; 68static unsigned int xprt_rdma_inline_write_padding;
69static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR; 69unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR;
70 int xprt_rdma_pad_optimize = 0; 70int xprt_rdma_pad_optimize;
71 71
72#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) 72#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
73 73
@@ -396,7 +396,7 @@ xprt_setup_rdma(struct xprt_create *args)
396 396
397 new_xprt = rpcx_to_rdmax(xprt); 397 new_xprt = rpcx_to_rdmax(xprt);
398 398
399 rc = rpcrdma_ia_open(new_xprt, sap, xprt_rdma_memreg_strategy); 399 rc = rpcrdma_ia_open(new_xprt, sap);
400 if (rc) 400 if (rc)
401 goto out1; 401 goto out1;
402 402
@@ -457,19 +457,33 @@ out1:
457 return ERR_PTR(rc); 457 return ERR_PTR(rc);
458} 458}
459 459
460/* 460/**
461 * Close a connection, during shutdown or timeout/reconnect 461 * xprt_rdma_close - Close down RDMA connection
462 * @xprt: generic transport to be closed
463 *
464 * Called during transport shutdown reconnect, or device
465 * removal. Caller holds the transport's write lock.
462 */ 466 */
463static void 467static void
464xprt_rdma_close(struct rpc_xprt *xprt) 468xprt_rdma_close(struct rpc_xprt *xprt)
465{ 469{
466 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); 470 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
471 struct rpcrdma_ep *ep = &r_xprt->rx_ep;
472 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
473
474 dprintk("RPC: %s: closing xprt %p\n", __func__, xprt);
467 475
468 dprintk("RPC: %s: closing\n", __func__); 476 if (test_and_clear_bit(RPCRDMA_IAF_REMOVING, &ia->ri_flags)) {
469 if (r_xprt->rx_ep.rep_connected > 0) 477 xprt_clear_connected(xprt);
478 rpcrdma_ia_remove(ia);
479 return;
480 }
481 if (ep->rep_connected == -ENODEV)
482 return;
483 if (ep->rep_connected > 0)
470 xprt->reestablish_timeout = 0; 484 xprt->reestablish_timeout = 0;
471 xprt_disconnect_done(xprt); 485 xprt_disconnect_done(xprt);
472 rpcrdma_ep_disconnect(&r_xprt->rx_ep, &r_xprt->rx_ia); 486 rpcrdma_ep_disconnect(ep, ia);
473} 487}
474 488
475static void 489static void
@@ -484,6 +498,27 @@ xprt_rdma_set_port(struct rpc_xprt *xprt, u16 port)
484 dprintk("RPC: %s: %u\n", __func__, port); 498 dprintk("RPC: %s: %u\n", __func__, port);
485} 499}
486 500
501/**
502 * xprt_rdma_timer - invoked when an RPC times out
503 * @xprt: controlling RPC transport
504 * @task: RPC task that timed out
505 *
506 * Invoked when the transport is still connected, but an RPC
507 * retransmit timeout occurs.
508 *
509 * Since RDMA connections don't have a keep-alive, forcibly
510 * disconnect and retry to connect. This drives full
511 * detection of the network path, and retransmissions of
512 * all pending RPCs.
513 */
514static void
515xprt_rdma_timer(struct rpc_xprt *xprt, struct rpc_task *task)
516{
517 dprintk("RPC: %5u %s: xprt = %p\n", task->tk_pid, __func__, xprt);
518
519 xprt_force_disconnect(xprt);
520}
521
487static void 522static void
488xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task) 523xprt_rdma_connect(struct rpc_xprt *xprt, struct rpc_task *task)
489{ 524{
@@ -659,6 +694,8 @@ xprt_rdma_free(struct rpc_task *task)
659 * xprt_rdma_send_request - marshal and send an RPC request 694 * xprt_rdma_send_request - marshal and send an RPC request
660 * @task: RPC task with an RPC message in rq_snd_buf 695 * @task: RPC task with an RPC message in rq_snd_buf
661 * 696 *
697 * Caller holds the transport's write lock.
698 *
662 * Return values: 699 * Return values:
663 * 0: The request has been sent 700 * 0: The request has been sent
664 * ENOTCONN: Caller needs to invoke connect logic then call again 701 * ENOTCONN: Caller needs to invoke connect logic then call again
@@ -685,6 +722,9 @@ xprt_rdma_send_request(struct rpc_task *task)
685 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); 722 struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt);
686 int rc = 0; 723 int rc = 0;
687 724
725 if (!xprt_connected(xprt))
726 goto drop_connection;
727
688 /* On retransmit, remove any previously registered chunks */ 728 /* On retransmit, remove any previously registered chunks */
689 if (unlikely(!list_empty(&req->rl_registered))) 729 if (unlikely(!list_empty(&req->rl_registered)))
690 r_xprt->rx_ia.ri_ops->ro_unmap_safe(r_xprt, req, false); 730 r_xprt->rx_ia.ri_ops->ro_unmap_safe(r_xprt, req, false);
@@ -776,6 +816,7 @@ static struct rpc_xprt_ops xprt_rdma_procs = {
776 .alloc_slot = xprt_alloc_slot, 816 .alloc_slot = xprt_alloc_slot,
777 .release_request = xprt_release_rqst_cong, /* ditto */ 817 .release_request = xprt_release_rqst_cong, /* ditto */
778 .set_retrans_timeout = xprt_set_retrans_timeout_def, /* ditto */ 818 .set_retrans_timeout = xprt_set_retrans_timeout_def, /* ditto */
819 .timer = xprt_rdma_timer,
779 .rpcbind = rpcb_getport_async, /* sunrpc/rpcb_clnt.c */ 820 .rpcbind = rpcb_getport_async, /* sunrpc/rpcb_clnt.c */
780 .set_port = xprt_rdma_set_port, 821 .set_port = xprt_rdma_set_port,
781 .connect = xprt_rdma_connect, 822 .connect = xprt_rdma_connect,
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 3b332b395045..3dbce9ac4327 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -53,7 +53,7 @@
53#include <linux/sunrpc/addr.h> 53#include <linux/sunrpc/addr.h>
54#include <linux/sunrpc/svc_rdma.h> 54#include <linux/sunrpc/svc_rdma.h>
55#include <asm/bitops.h> 55#include <asm/bitops.h>
56#include <linux/module.h> /* try_module_get()/module_put() */ 56
57#include <rdma/ib_cm.h> 57#include <rdma/ib_cm.h>
58 58
59#include "xprt_rdma.h" 59#include "xprt_rdma.h"
@@ -69,8 +69,11 @@
69/* 69/*
70 * internal functions 70 * internal functions
71 */ 71 */
72static void rpcrdma_create_mrs(struct rpcrdma_xprt *r_xprt);
73static void rpcrdma_destroy_mrs(struct rpcrdma_buffer *buf);
74static void rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb);
72 75
73static struct workqueue_struct *rpcrdma_receive_wq; 76static struct workqueue_struct *rpcrdma_receive_wq __read_mostly;
74 77
75int 78int
76rpcrdma_alloc_wq(void) 79rpcrdma_alloc_wq(void)
@@ -180,7 +183,7 @@ rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
180 rep->rr_wc_flags = wc->wc_flags; 183 rep->rr_wc_flags = wc->wc_flags;
181 rep->rr_inv_rkey = wc->ex.invalidate_rkey; 184 rep->rr_inv_rkey = wc->ex.invalidate_rkey;
182 185
183 ib_dma_sync_single_for_cpu(rep->rr_device, 186 ib_dma_sync_single_for_cpu(rdmab_device(rep->rr_rdmabuf),
184 rdmab_addr(rep->rr_rdmabuf), 187 rdmab_addr(rep->rr_rdmabuf),
185 rep->rr_len, DMA_FROM_DEVICE); 188 rep->rr_len, DMA_FROM_DEVICE);
186 189
@@ -262,6 +265,21 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
262 __func__, ep); 265 __func__, ep);
263 complete(&ia->ri_done); 266 complete(&ia->ri_done);
264 break; 267 break;
268 case RDMA_CM_EVENT_DEVICE_REMOVAL:
269#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
270 pr_info("rpcrdma: removing device for %pIS:%u\n",
271 sap, rpc_get_port(sap));
272#endif
273 set_bit(RPCRDMA_IAF_REMOVING, &ia->ri_flags);
274 ep->rep_connected = -ENODEV;
275 xprt_force_disconnect(&xprt->rx_xprt);
276 wait_for_completion(&ia->ri_remove_done);
277
278 ia->ri_id = NULL;
279 ia->ri_pd = NULL;
280 ia->ri_device = NULL;
281 /* Return 1 to ensure the core destroys the id. */
282 return 1;
265 case RDMA_CM_EVENT_ESTABLISHED: 283 case RDMA_CM_EVENT_ESTABLISHED:
266 connstate = 1; 284 connstate = 1;
267 ib_query_qp(ia->ri_id->qp, attr, 285 ib_query_qp(ia->ri_id->qp, attr,
@@ -291,9 +309,6 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
291 goto connected; 309 goto connected;
292 case RDMA_CM_EVENT_DISCONNECTED: 310 case RDMA_CM_EVENT_DISCONNECTED:
293 connstate = -ECONNABORTED; 311 connstate = -ECONNABORTED;
294 goto connected;
295 case RDMA_CM_EVENT_DEVICE_REMOVAL:
296 connstate = -ENODEV;
297connected: 312connected:
298 dprintk("RPC: %s: %sconnected\n", 313 dprintk("RPC: %s: %sconnected\n",
299 __func__, connstate > 0 ? "" : "dis"); 314 __func__, connstate > 0 ? "" : "dis");
@@ -329,14 +344,6 @@ connected:
329 return 0; 344 return 0;
330} 345}
331 346
332static void rpcrdma_destroy_id(struct rdma_cm_id *id)
333{
334 if (id) {
335 module_put(id->device->owner);
336 rdma_destroy_id(id);
337 }
338}
339
340static struct rdma_cm_id * 347static struct rdma_cm_id *
341rpcrdma_create_id(struct rpcrdma_xprt *xprt, 348rpcrdma_create_id(struct rpcrdma_xprt *xprt,
342 struct rpcrdma_ia *ia, struct sockaddr *addr) 349 struct rpcrdma_ia *ia, struct sockaddr *addr)
@@ -346,6 +353,7 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
346 int rc; 353 int rc;
347 354
348 init_completion(&ia->ri_done); 355 init_completion(&ia->ri_done);
356 init_completion(&ia->ri_remove_done);
349 357
350 id = rdma_create_id(&init_net, rpcrdma_conn_upcall, xprt, RDMA_PS_TCP, 358 id = rdma_create_id(&init_net, rpcrdma_conn_upcall, xprt, RDMA_PS_TCP,
351 IB_QPT_RC); 359 IB_QPT_RC);
@@ -370,16 +378,6 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
370 goto out; 378 goto out;
371 } 379 }
372 380
373 /* FIXME:
374 * Until xprtrdma supports DEVICE_REMOVAL, the provider must
375 * be pinned while there are active NFS/RDMA mounts to prevent
376 * hangs and crashes at umount time.
377 */
378 if (!ia->ri_async_rc && !try_module_get(id->device->owner)) {
379 dprintk("RPC: %s: Failed to get device module\n",
380 __func__);
381 ia->ri_async_rc = -ENODEV;
382 }
383 rc = ia->ri_async_rc; 381 rc = ia->ri_async_rc;
384 if (rc) 382 if (rc)
385 goto out; 383 goto out;
@@ -389,21 +387,20 @@ rpcrdma_create_id(struct rpcrdma_xprt *xprt,
389 if (rc) { 387 if (rc) {
390 dprintk("RPC: %s: rdma_resolve_route() failed %i\n", 388 dprintk("RPC: %s: rdma_resolve_route() failed %i\n",
391 __func__, rc); 389 __func__, rc);
392 goto put; 390 goto out;
393 } 391 }
394 rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout); 392 rc = wait_for_completion_interruptible_timeout(&ia->ri_done, wtimeout);
395 if (rc < 0) { 393 if (rc < 0) {
396 dprintk("RPC: %s: wait() exited: %i\n", 394 dprintk("RPC: %s: wait() exited: %i\n",
397 __func__, rc); 395 __func__, rc);
398 goto put; 396 goto out;
399 } 397 }
400 rc = ia->ri_async_rc; 398 rc = ia->ri_async_rc;
401 if (rc) 399 if (rc)
402 goto put; 400 goto out;
403 401
404 return id; 402 return id;
405put: 403
406 module_put(id->device->owner);
407out: 404out:
408 rdma_destroy_id(id); 405 rdma_destroy_id(id);
409 return ERR_PTR(rc); 406 return ERR_PTR(rc);
@@ -413,13 +410,16 @@ out:
413 * Exported functions. 410 * Exported functions.
414 */ 411 */
415 412
416/* 413/**
417 * Open and initialize an Interface Adapter. 414 * rpcrdma_ia_open - Open and initialize an Interface Adapter.
418 * o initializes fields of struct rpcrdma_ia, including 415 * @xprt: controlling transport
419 * interface and provider attributes and protection zone. 416 * @addr: IP address of remote peer
417 *
418 * Returns 0 on success, negative errno if an appropriate
419 * Interface Adapter could not be found and opened.
420 */ 420 */
421int 421int
422rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg) 422rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr)
423{ 423{
424 struct rpcrdma_ia *ia = &xprt->rx_ia; 424 struct rpcrdma_ia *ia = &xprt->rx_ia;
425 int rc; 425 int rc;
@@ -427,7 +427,7 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
427 ia->ri_id = rpcrdma_create_id(xprt, ia, addr); 427 ia->ri_id = rpcrdma_create_id(xprt, ia, addr);
428 if (IS_ERR(ia->ri_id)) { 428 if (IS_ERR(ia->ri_id)) {
429 rc = PTR_ERR(ia->ri_id); 429 rc = PTR_ERR(ia->ri_id);
430 goto out1; 430 goto out_err;
431 } 431 }
432 ia->ri_device = ia->ri_id->device; 432 ia->ri_device = ia->ri_id->device;
433 433
@@ -435,10 +435,10 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
435 if (IS_ERR(ia->ri_pd)) { 435 if (IS_ERR(ia->ri_pd)) {
436 rc = PTR_ERR(ia->ri_pd); 436 rc = PTR_ERR(ia->ri_pd);
437 pr_err("rpcrdma: ib_alloc_pd() returned %d\n", rc); 437 pr_err("rpcrdma: ib_alloc_pd() returned %d\n", rc);
438 goto out2; 438 goto out_err;
439 } 439 }
440 440
441 switch (memreg) { 441 switch (xprt_rdma_memreg_strategy) {
442 case RPCRDMA_FRMR: 442 case RPCRDMA_FRMR:
443 if (frwr_is_supported(ia)) { 443 if (frwr_is_supported(ia)) {
444 ia->ri_ops = &rpcrdma_frwr_memreg_ops; 444 ia->ri_ops = &rpcrdma_frwr_memreg_ops;
@@ -452,28 +452,73 @@ rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr, int memreg)
452 } 452 }
453 /*FALLTHROUGH*/ 453 /*FALLTHROUGH*/
454 default: 454 default:
455 pr_err("rpcrdma: Unsupported memory registration mode: %d\n", 455 pr_err("rpcrdma: Device %s does not support memreg mode %d\n",
456 memreg); 456 ia->ri_device->name, xprt_rdma_memreg_strategy);
457 rc = -EINVAL; 457 rc = -EINVAL;
458 goto out3; 458 goto out_err;
459 } 459 }
460 460
461 return 0; 461 return 0;
462 462
463out3: 463out_err:
464 ib_dealloc_pd(ia->ri_pd); 464 rpcrdma_ia_close(ia);
465 ia->ri_pd = NULL;
466out2:
467 rpcrdma_destroy_id(ia->ri_id);
468 ia->ri_id = NULL;
469out1:
470 return rc; 465 return rc;
471} 466}
472 467
473/* 468/**
474 * Clean up/close an IA. 469 * rpcrdma_ia_remove - Handle device driver unload
475 * o if event handles and PD have been initialized, free them. 470 * @ia: interface adapter being removed
476 * o close the IA 471 *
472 * Divest transport H/W resources associated with this adapter,
473 * but allow it to be restored later.
474 */
475void
476rpcrdma_ia_remove(struct rpcrdma_ia *ia)
477{
478 struct rpcrdma_xprt *r_xprt = container_of(ia, struct rpcrdma_xprt,
479 rx_ia);
480 struct rpcrdma_ep *ep = &r_xprt->rx_ep;
481 struct rpcrdma_buffer *buf = &r_xprt->rx_buf;
482 struct rpcrdma_req *req;
483 struct rpcrdma_rep *rep;
484
485 cancel_delayed_work_sync(&buf->rb_refresh_worker);
486
487 /* This is similar to rpcrdma_ep_destroy, but:
488 * - Don't cancel the connect worker.
489 * - Don't call rpcrdma_ep_disconnect, which waits
490 * for another conn upcall, which will deadlock.
491 * - rdma_disconnect is unneeded, the underlying
492 * connection is already gone.
493 */
494 if (ia->ri_id->qp) {
495 ib_drain_qp(ia->ri_id->qp);
496 rdma_destroy_qp(ia->ri_id);
497 ia->ri_id->qp = NULL;
498 }
499 ib_free_cq(ep->rep_attr.recv_cq);
500 ib_free_cq(ep->rep_attr.send_cq);
501
502 /* The ULP is responsible for ensuring all DMA
503 * mappings and MRs are gone.
504 */
505 list_for_each_entry(rep, &buf->rb_recv_bufs, rr_list)
506 rpcrdma_dma_unmap_regbuf(rep->rr_rdmabuf);
507 list_for_each_entry(req, &buf->rb_allreqs, rl_all) {
508 rpcrdma_dma_unmap_regbuf(req->rl_rdmabuf);
509 rpcrdma_dma_unmap_regbuf(req->rl_sendbuf);
510 rpcrdma_dma_unmap_regbuf(req->rl_recvbuf);
511 }
512 rpcrdma_destroy_mrs(buf);
513
514 /* Allow waiters to continue */
515 complete(&ia->ri_remove_done);
516}
517
518/**
519 * rpcrdma_ia_close - Clean up/close an IA.
520 * @ia: interface adapter to close
521 *
477 */ 522 */
478void 523void
479rpcrdma_ia_close(struct rpcrdma_ia *ia) 524rpcrdma_ia_close(struct rpcrdma_ia *ia)
@@ -482,13 +527,15 @@ rpcrdma_ia_close(struct rpcrdma_ia *ia)
482 if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) { 527 if (ia->ri_id != NULL && !IS_ERR(ia->ri_id)) {
483 if (ia->ri_id->qp) 528 if (ia->ri_id->qp)
484 rdma_destroy_qp(ia->ri_id); 529 rdma_destroy_qp(ia->ri_id);
485 rpcrdma_destroy_id(ia->ri_id); 530 rdma_destroy_id(ia->ri_id);
486 ia->ri_id = NULL;
487 } 531 }
532 ia->ri_id = NULL;
533 ia->ri_device = NULL;
488 534
489 /* If the pd is still busy, xprtrdma missed freeing a resource */ 535 /* If the pd is still busy, xprtrdma missed freeing a resource */
490 if (ia->ri_pd && !IS_ERR(ia->ri_pd)) 536 if (ia->ri_pd && !IS_ERR(ia->ri_pd))
491 ib_dealloc_pd(ia->ri_pd); 537 ib_dealloc_pd(ia->ri_pd);
538 ia->ri_pd = NULL;
492} 539}
493 540
494/* 541/*
@@ -646,6 +693,99 @@ rpcrdma_ep_destroy(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
646 ib_free_cq(ep->rep_attr.send_cq); 693 ib_free_cq(ep->rep_attr.send_cq);
647} 694}
648 695
696/* Re-establish a connection after a device removal event.
697 * Unlike a normal reconnection, a fresh PD and a new set
698 * of MRs and buffers is needed.
699 */
700static int
701rpcrdma_ep_recreate_xprt(struct rpcrdma_xprt *r_xprt,
702 struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
703{
704 struct sockaddr *sap = (struct sockaddr *)&r_xprt->rx_data.addr;
705 int rc, err;
706
707 pr_info("%s: r_xprt = %p\n", __func__, r_xprt);
708
709 rc = -EHOSTUNREACH;
710 if (rpcrdma_ia_open(r_xprt, sap))
711 goto out1;
712
713 rc = -ENOMEM;
714 err = rpcrdma_ep_create(ep, ia, &r_xprt->rx_data);
715 if (err) {
716 pr_err("rpcrdma: rpcrdma_ep_create returned %d\n", err);
717 goto out2;
718 }
719
720 rc = -ENETUNREACH;
721 err = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr);
722 if (err) {
723 pr_err("rpcrdma: rdma_create_qp returned %d\n", err);
724 goto out3;
725 }
726
727 rpcrdma_create_mrs(r_xprt);
728 return 0;
729
730out3:
731 rpcrdma_ep_destroy(ep, ia);
732out2:
733 rpcrdma_ia_close(ia);
734out1:
735 return rc;
736}
737
738static int
739rpcrdma_ep_reconnect(struct rpcrdma_xprt *r_xprt, struct rpcrdma_ep *ep,
740 struct rpcrdma_ia *ia)
741{
742 struct sockaddr *sap = (struct sockaddr *)&r_xprt->rx_data.addr;
743 struct rdma_cm_id *id, *old;
744 int err, rc;
745
746 dprintk("RPC: %s: reconnecting...\n", __func__);
747
748 rpcrdma_ep_disconnect(ep, ia);
749
750 rc = -EHOSTUNREACH;
751 id = rpcrdma_create_id(r_xprt, ia, sap);
752 if (IS_ERR(id))
753 goto out;
754
755 /* As long as the new ID points to the same device as the
756 * old ID, we can reuse the transport's existing PD and all
757 * previously allocated MRs. Also, the same device means
758 * the transport's previous DMA mappings are still valid.
759 *
760 * This is a sanity check only. There should be no way these
761 * point to two different devices here.
762 */
763 old = id;
764 rc = -ENETUNREACH;
765 if (ia->ri_device != id->device) {
766 pr_err("rpcrdma: can't reconnect on different device!\n");
767 goto out_destroy;
768 }
769
770 err = rdma_create_qp(id, ia->ri_pd, &ep->rep_attr);
771 if (err) {
772 dprintk("RPC: %s: rdma_create_qp returned %d\n",
773 __func__, err);
774 goto out_destroy;
775 }
776
777 /* Atomically replace the transport's ID and QP. */
778 rc = 0;
779 old = ia->ri_id;
780 ia->ri_id = id;
781 rdma_destroy_qp(old);
782
783out_destroy:
784 rdma_destroy_id(old);
785out:
786 return rc;
787}
788
649/* 789/*
650 * Connect unconnected endpoint. 790 * Connect unconnected endpoint.
651 */ 791 */
@@ -654,61 +794,30 @@ rpcrdma_ep_connect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia)
654{ 794{
655 struct rpcrdma_xprt *r_xprt = container_of(ia, struct rpcrdma_xprt, 795 struct rpcrdma_xprt *r_xprt = container_of(ia, struct rpcrdma_xprt,
656 rx_ia); 796 rx_ia);
657 struct rdma_cm_id *id, *old;
658 struct sockaddr *sap;
659 unsigned int extras; 797 unsigned int extras;
660 int rc = 0; 798 int rc;
661 799
662 if (ep->rep_connected != 0) {
663retry: 800retry:
664 dprintk("RPC: %s: reconnecting...\n", __func__); 801 switch (ep->rep_connected) {
665 802 case 0:
666 rpcrdma_ep_disconnect(ep, ia);
667
668 sap = (struct sockaddr *)&r_xprt->rx_data.addr;
669 id = rpcrdma_create_id(r_xprt, ia, sap);
670 if (IS_ERR(id)) {
671 rc = -EHOSTUNREACH;
672 goto out;
673 }
674 /* TEMP TEMP TEMP - fail if new device:
675 * Deregister/remarshal *all* requests!
676 * Close and recreate adapter, pd, etc!
677 * Re-determine all attributes still sane!
678 * More stuff I haven't thought of!
679 * Rrrgh!
680 */
681 if (ia->ri_device != id->device) {
682 printk("RPC: %s: can't reconnect on "
683 "different device!\n", __func__);
684 rpcrdma_destroy_id(id);
685 rc = -ENETUNREACH;
686 goto out;
687 }
688 /* END TEMP */
689 rc = rdma_create_qp(id, ia->ri_pd, &ep->rep_attr);
690 if (rc) {
691 dprintk("RPC: %s: rdma_create_qp failed %i\n",
692 __func__, rc);
693 rpcrdma_destroy_id(id);
694 rc = -ENETUNREACH;
695 goto out;
696 }
697
698 old = ia->ri_id;
699 ia->ri_id = id;
700
701 rdma_destroy_qp(old);
702 rpcrdma_destroy_id(old);
703 } else {
704 dprintk("RPC: %s: connecting...\n", __func__); 803 dprintk("RPC: %s: connecting...\n", __func__);
705 rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr); 804 rc = rdma_create_qp(ia->ri_id, ia->ri_pd, &ep->rep_attr);
706 if (rc) { 805 if (rc) {
707 dprintk("RPC: %s: rdma_create_qp failed %i\n", 806 dprintk("RPC: %s: rdma_create_qp failed %i\n",
708 __func__, rc); 807 __func__, rc);
709 /* do not update ep->rep_connected */ 808 rc = -ENETUNREACH;
710 return -ENETUNREACH; 809 goto out_noupdate;
711 } 810 }
811 break;
812 case -ENODEV:
813 rc = rpcrdma_ep_recreate_xprt(r_xprt, ep, ia);
814 if (rc)
815 goto out_noupdate;
816 break;
817 default:
818 rc = rpcrdma_ep_reconnect(r_xprt, ep, ia);
819 if (rc)
820 goto out;
712 } 821 }
713 822
714 ep->rep_connected = 0; 823 ep->rep_connected = 0;
@@ -736,6 +845,8 @@ retry:
736out: 845out:
737 if (rc) 846 if (rc)
738 ep->rep_connected = rc; 847 ep->rep_connected = rc;
848
849out_noupdate:
739 return rc; 850 return rc;
740} 851}
741 852
@@ -878,7 +989,6 @@ struct rpcrdma_rep *
878rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt) 989rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt)
879{ 990{
880 struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data; 991 struct rpcrdma_create_data_internal *cdata = &r_xprt->rx_data;
881 struct rpcrdma_ia *ia = &r_xprt->rx_ia;
882 struct rpcrdma_rep *rep; 992 struct rpcrdma_rep *rep;
883 int rc; 993 int rc;
884 994
@@ -894,7 +1004,6 @@ rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt)
894 goto out_free; 1004 goto out_free;
895 } 1005 }
896 1006
897 rep->rr_device = ia->ri_device;
898 rep->rr_cqe.done = rpcrdma_wc_receive; 1007 rep->rr_cqe.done = rpcrdma_wc_receive;
899 rep->rr_rxprt = r_xprt; 1008 rep->rr_rxprt = r_xprt;
900 INIT_WORK(&rep->rr_work, rpcrdma_reply_handler); 1009 INIT_WORK(&rep->rr_work, rpcrdma_reply_handler);
@@ -1037,6 +1146,7 @@ void
1037rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) 1146rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf)
1038{ 1147{
1039 cancel_delayed_work_sync(&buf->rb_recovery_worker); 1148 cancel_delayed_work_sync(&buf->rb_recovery_worker);
1149 cancel_delayed_work_sync(&buf->rb_refresh_worker);
1040 1150
1041 while (!list_empty(&buf->rb_recv_bufs)) { 1151 while (!list_empty(&buf->rb_recv_bufs)) {
1042 struct rpcrdma_rep *rep; 1152 struct rpcrdma_rep *rep;
@@ -1081,7 +1191,8 @@ rpcrdma_get_mw(struct rpcrdma_xprt *r_xprt)
1081 1191
1082out_nomws: 1192out_nomws:
1083 dprintk("RPC: %s: no MWs available\n", __func__); 1193 dprintk("RPC: %s: no MWs available\n", __func__);
1084 schedule_delayed_work(&buf->rb_refresh_worker, 0); 1194 if (r_xprt->rx_ep.rep_connected != -ENODEV)
1195 schedule_delayed_work(&buf->rb_refresh_worker, 0);
1085 1196
1086 /* Allow the reply handler and refresh worker to run */ 1197 /* Allow the reply handler and refresh worker to run */
1087 cond_resched(); 1198 cond_resched();
@@ -1231,17 +1342,19 @@ rpcrdma_alloc_regbuf(size_t size, enum dma_data_direction direction,
1231bool 1342bool
1232__rpcrdma_dma_map_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb) 1343__rpcrdma_dma_map_regbuf(struct rpcrdma_ia *ia, struct rpcrdma_regbuf *rb)
1233{ 1344{
1345 struct ib_device *device = ia->ri_device;
1346
1234 if (rb->rg_direction == DMA_NONE) 1347 if (rb->rg_direction == DMA_NONE)
1235 return false; 1348 return false;
1236 1349
1237 rb->rg_iov.addr = ib_dma_map_single(ia->ri_device, 1350 rb->rg_iov.addr = ib_dma_map_single(device,
1238 (void *)rb->rg_base, 1351 (void *)rb->rg_base,
1239 rdmab_length(rb), 1352 rdmab_length(rb),
1240 rb->rg_direction); 1353 rb->rg_direction);
1241 if (ib_dma_mapping_error(ia->ri_device, rdmab_addr(rb))) 1354 if (ib_dma_mapping_error(device, rdmab_addr(rb)))
1242 return false; 1355 return false;
1243 1356
1244 rb->rg_device = ia->ri_device; 1357 rb->rg_device = device;
1245 rb->rg_iov.lkey = ia->ri_pd->local_dma_lkey; 1358 rb->rg_iov.lkey = ia->ri_pd->local_dma_lkey;
1246 return true; 1359 return true;
1247} 1360}
diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h
index 171a35116de9..1d66acf1a723 100644
--- a/net/sunrpc/xprtrdma/xprt_rdma.h
+++ b/net/sunrpc/xprtrdma/xprt_rdma.h
@@ -69,6 +69,7 @@ struct rpcrdma_ia {
69 struct rdma_cm_id *ri_id; 69 struct rdma_cm_id *ri_id;
70 struct ib_pd *ri_pd; 70 struct ib_pd *ri_pd;
71 struct completion ri_done; 71 struct completion ri_done;
72 struct completion ri_remove_done;
72 int ri_async_rc; 73 int ri_async_rc;
73 unsigned int ri_max_segs; 74 unsigned int ri_max_segs;
74 unsigned int ri_max_frmr_depth; 75 unsigned int ri_max_frmr_depth;
@@ -78,10 +79,15 @@ struct rpcrdma_ia {
78 bool ri_reminv_expected; 79 bool ri_reminv_expected;
79 bool ri_implicit_roundup; 80 bool ri_implicit_roundup;
80 enum ib_mr_type ri_mrtype; 81 enum ib_mr_type ri_mrtype;
82 unsigned long ri_flags;
81 struct ib_qp_attr ri_qp_attr; 83 struct ib_qp_attr ri_qp_attr;
82 struct ib_qp_init_attr ri_qp_init_attr; 84 struct ib_qp_init_attr ri_qp_init_attr;
83}; 85};
84 86
87enum {
88 RPCRDMA_IAF_REMOVING = 0,
89};
90
85/* 91/*
86 * RDMA Endpoint -- one per transport instance 92 * RDMA Endpoint -- one per transport instance
87 */ 93 */
@@ -164,6 +170,12 @@ rdmab_to_msg(struct rpcrdma_regbuf *rb)
164 return (struct rpcrdma_msg *)rb->rg_base; 170 return (struct rpcrdma_msg *)rb->rg_base;
165} 171}
166 172
173static inline struct ib_device *
174rdmab_device(struct rpcrdma_regbuf *rb)
175{
176 return rb->rg_device;
177}
178
167#define RPCRDMA_DEF_GFP (GFP_NOIO | __GFP_NOWARN) 179#define RPCRDMA_DEF_GFP (GFP_NOIO | __GFP_NOWARN)
168 180
169/* To ensure a transport can always make forward progress, 181/* To ensure a transport can always make forward progress,
@@ -209,7 +221,6 @@ struct rpcrdma_rep {
209 unsigned int rr_len; 221 unsigned int rr_len;
210 int rr_wc_flags; 222 int rr_wc_flags;
211 u32 rr_inv_rkey; 223 u32 rr_inv_rkey;
212 struct ib_device *rr_device;
213 struct rpcrdma_xprt *rr_rxprt; 224 struct rpcrdma_xprt *rr_rxprt;
214 struct work_struct rr_work; 225 struct work_struct rr_work;
215 struct list_head rr_list; 226 struct list_head rr_list;
@@ -380,7 +391,6 @@ struct rpcrdma_buffer {
380 spinlock_t rb_mwlock; /* protect rb_mws list */ 391 spinlock_t rb_mwlock; /* protect rb_mws list */
381 struct list_head rb_mws; 392 struct list_head rb_mws;
382 struct list_head rb_all; 393 struct list_head rb_all;
383 char *rb_pool;
384 394
385 spinlock_t rb_lock; /* protect buf lists */ 395 spinlock_t rb_lock; /* protect buf lists */
386 int rb_send_count, rb_recv_count; 396 int rb_send_count, rb_recv_count;
@@ -497,10 +507,16 @@ struct rpcrdma_xprt {
497 * Default is 0, see sysctl entry and rpc_rdma.c rpcrdma_convert_iovs() */ 507 * Default is 0, see sysctl entry and rpc_rdma.c rpcrdma_convert_iovs() */
498extern int xprt_rdma_pad_optimize; 508extern int xprt_rdma_pad_optimize;
499 509
510/* This setting controls the hunt for a supported memory
511 * registration strategy.
512 */
513extern unsigned int xprt_rdma_memreg_strategy;
514
500/* 515/*
501 * Interface Adapter calls - xprtrdma/verbs.c 516 * Interface Adapter calls - xprtrdma/verbs.c
502 */ 517 */
503int rpcrdma_ia_open(struct rpcrdma_xprt *, struct sockaddr *, int); 518int rpcrdma_ia_open(struct rpcrdma_xprt *xprt, struct sockaddr *addr);
519void rpcrdma_ia_remove(struct rpcrdma_ia *ia);
504void rpcrdma_ia_close(struct rpcrdma_ia *); 520void rpcrdma_ia_close(struct rpcrdma_ia *);
505bool frwr_is_supported(struct rpcrdma_ia *); 521bool frwr_is_supported(struct rpcrdma_ia *);
506bool fmr_is_supported(struct rpcrdma_ia *); 522bool fmr_is_supported(struct rpcrdma_ia *);