aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2012-10-10 10:52:35 -0400
committerLinus Torvalds <torvalds@linux-foundation.org>2012-10-10 10:52:35 -0400
commitdf632d3ce7eacf92ad9b642301c7b53a1d95b8d8 (patch)
tree848c39ed4f7cfdb582bf2e0a0a03147efaa5198d
parent2474542f64432398f503373f53bdf620491bcfa8 (diff)
parentaf283885b70248268617955a5ea5476647bd556b (diff)
Merge tag 'nfs-for-3.7-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs
Pull NFS client updates from Trond Myklebust: "Features include: - Remove CONFIG_EXPERIMENTAL dependency from NFSv4.1 Aside from the issues discussed at the LKS, distros are shipping NFSv4.1 with all the trimmings. - Fix fdatasync()/fsync() for the corner case of a server reboot. - NFSv4 OPEN access fix: finally distinguish correctly between open-for-read and open-for-execute permissions in all situations. - Ensure that the TCP socket is closed when we're in CLOSE_WAIT - More idmapper bugfixes - Lots of pNFS bugfixes and cleanups to remove unnecessary state and make the code easier to read. - In cases where a pNFS read or write fails, allow the client to resume trying layoutgets after two minutes of read/write- through-mds. - More net namespace fixes to the NFSv4 callback code. - More net namespace fixes to the NFSv3 locking code. - More NFSv4 migration preparatory patches. Including patches to detect network trunking in both NFSv4 and NFSv4.1 - pNFS block updates to optimise LAYOUTGET calls." * tag 'nfs-for-3.7-1' of git://git.linux-nfs.org/projects/trondmy/linux-nfs: (113 commits) pnfsblock: cleanup nfs4_blkdev_get NFS41: send real read size in layoutget NFS41: send real write size in layoutget NFS: track direct IO left bytes NFSv4.1: Cleanup ugliness in pnfs_layoutgets_blocked() NFSv4.1: Ensure that the layout sequence id stays 'close' to the current NFSv4.1: Deal with seqid wraparound in the pNFS return-on-close code NFSv4 set open access operation call flag in nfs4_init_opendata_res NFSv4.1: Remove the dependency on CONFIG_EXPERIMENTAL NFSv4 reduce attribute requests for open reclaim NFSv4: nfs4_open_done first must check that GETATTR decoded a file type NFSv4.1: Deal with wraparound when updating the layout "barrier" seqid NFSv4.1: Deal with wraparound issues when updating the layout stateid NFSv4.1: Always set the layout stateid if this is the first layoutget NFSv4.1: Fix another refcount issue in pnfs_find_alloc_layout NFSv4: don't put ACCESS in OPEN compound if O_EXCL NFSv4: don't check MAY_WRITE access bit in OPEN NFS: Set key construction data for the legacy upcall NFSv4.1: don't do two EXCHANGE_IDs on mount NFS: nfs41_walk_client_list(): re-lock before iterating ...
-rw-r--r--Documentation/filesystems/nfs/nfs.txt44
-rw-r--r--Documentation/kernel-parameters.txt5
-rw-r--r--fs/lockd/mon.c86
-rw-r--r--fs/lockd/netns.h4
-rw-r--r--fs/lockd/svc.c1
-rw-r--r--fs/nfs/Kconfig4
-rw-r--r--fs/nfs/blocklayout/blocklayout.c306
-rw-r--r--fs/nfs/blocklayout/blocklayout.h2
-rw-r--r--fs/nfs/blocklayout/blocklayoutdev.c25
-rw-r--r--fs/nfs/blocklayout/extents.c3
-rw-r--r--fs/nfs/callback.c321
-rw-r--r--fs/nfs/callback.h3
-rw-r--r--fs/nfs/callback_proc.c31
-rw-r--r--fs/nfs/client.c23
-rw-r--r--fs/nfs/dir.c16
-rw-r--r--fs/nfs/direct.c32
-rw-r--r--fs/nfs/file.c41
-rw-r--r--fs/nfs/getroot.c2
-rw-r--r--fs/nfs/idmap.c114
-rw-r--r--fs/nfs/inode.c10
-rw-r--r--fs/nfs/internal.h15
-rw-r--r--fs/nfs/netns.h4
-rw-r--r--fs/nfs/nfs4_fs.h19
-rw-r--r--fs/nfs/nfs4client.c256
-rw-r--r--fs/nfs/nfs4file.c29
-rw-r--r--fs/nfs/nfs4filelayout.c41
-rw-r--r--fs/nfs/nfs4filelayout.h16
-rw-r--r--fs/nfs/nfs4filelayoutdev.c17
-rw-r--r--fs/nfs/nfs4namespace.c16
-rw-r--r--fs/nfs/nfs4proc.c342
-rw-r--r--fs/nfs/nfs4state.c228
-rw-r--r--fs/nfs/nfs4sysctl.c1
-rw-r--r--fs/nfs/nfs4xdr.c31
-rw-r--r--fs/nfs/objlayout/objio_osd.c9
-rw-r--r--fs/nfs/pagelist.c12
-rw-r--r--fs/nfs/pnfs.c417
-rw-r--r--fs/nfs/pnfs.h57
-rw-r--r--fs/nfs/pnfs_dev.c27
-rw-r--r--fs/nfs/super.c31
-rw-r--r--fs/nfs/write.c11
-rw-r--r--include/linux/nfs_fs.h11
-rw-r--r--include/linux/nfs_fs_sb.h3
-rw-r--r--include/linux/nfs_xdr.h5
-rw-r--r--include/linux/sunrpc/clnt.h2
-rw-r--r--include/linux/sunrpc/xprt.h3
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c58
-rw-r--r--net/sunrpc/clnt.c105
-rw-r--r--net/sunrpc/rpc_pipe.c8
-rw-r--r--net/sunrpc/sched.c2
-rw-r--r--net/sunrpc/xdr.c21
-rw-r--r--net/sunrpc/xprt.c8
-rw-r--r--net/sunrpc/xprtrdma/transport.c22
-rw-r--r--net/sunrpc/xprtsock.c40
53 files changed, 2126 insertions, 814 deletions
diff --git a/Documentation/filesystems/nfs/nfs.txt b/Documentation/filesystems/nfs/nfs.txt
index f50f26ce6cd0..f2571c8bef74 100644
--- a/Documentation/filesystems/nfs/nfs.txt
+++ b/Documentation/filesystems/nfs/nfs.txt
@@ -12,9 +12,47 @@ and work is in progress on adding support for minor version 1 of the NFSv4
12protocol. 12protocol.
13 13
14The purpose of this document is to provide information on some of the 14The purpose of this document is to provide information on some of the
15upcall interfaces that are used in order to provide the NFS client with 15special features of the NFS client that can be configured by system
16some of the information that it requires in order to fully comply with 16administrators.
17the NFS spec. 17
18
19The nfs4_unique_id parameter
20============================
21
22NFSv4 requires clients to identify themselves to servers with a unique
23string. File open and lock state shared between one client and one server
24is associated with this identity. To support robust NFSv4 state recovery
25and transparent state migration, this identity string must not change
26across client reboots.
27
28Without any other intervention, the Linux client uses a string that contains
29the local system's node name. System administrators, however, often do not
30take care to ensure that node names are fully qualified and do not change
31over the lifetime of a client system. Node names can have other
32administrative requirements that require particular behavior that does not
33work well as part of an nfs_client_id4 string.
34
35The nfs.nfs4_unique_id boot parameter specifies a unique string that can be
36used instead of a system's node name when an NFS client identifies itself to
37a server. Thus, if the system's node name is not unique, or it changes, its
38nfs.nfs4_unique_id stays the same, preventing collision with other clients
39or loss of state during NFS reboot recovery or transparent state migration.
40
41The nfs.nfs4_unique_id string is typically a UUID, though it can contain
42anything that is believed to be unique across all NFS clients. An
43nfs4_unique_id string should be chosen when a client system is installed,
44just as a system's root file system gets a fresh UUID in its label at
45install time.
46
47The string should remain fixed for the lifetime of the client. It can be
48changed safely if care is taken that the client shuts down cleanly and all
49outstanding NFSv4 state has expired, to prevent loss of NFSv4 state.
50
51This string can be stored in an NFS client's grub.conf, or it can be provided
52via a net boot facility such as PXE. It may also be specified as an nfs.ko
53module parameter. Specifying a uniquifier string is not support for NFS
54clients running in containers.
55
18 56
19The DNS resolver 57The DNS resolver
20================ 58================
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index f777fa96243d..e2ed3360b708 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -1730,6 +1730,11 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
1730 will be autodetected by the client, and it will fall 1730 will be autodetected by the client, and it will fall
1731 back to using the idmapper. 1731 back to using the idmapper.
1732 To turn off this behaviour, set the value to '0'. 1732 To turn off this behaviour, set the value to '0'.
1733 nfs.nfs4_unique_id=
1734 [NFS4] Specify an additional fixed unique ident-
1735 ification string that NFSv4 clients can insert into
1736 their nfs_client_id4 string. This is typically a
1737 UUID that is generated at system install time.
1733 1738
1734 nfs.send_implementation_id = 1739 nfs.send_implementation_id =
1735 [NFSv4.1] Send client implementation identification 1740 [NFSv4.1] Send client implementation identification
diff --git a/fs/lockd/mon.c b/fs/lockd/mon.c
index 7ef14b3c5bee..e4fb3ba5a58a 100644
--- a/fs/lockd/mon.c
+++ b/fs/lockd/mon.c
@@ -7,7 +7,6 @@
7 */ 7 */
8 8
9#include <linux/types.h> 9#include <linux/types.h>
10#include <linux/utsname.h>
11#include <linux/kernel.h> 10#include <linux/kernel.h>
12#include <linux/ktime.h> 11#include <linux/ktime.h>
13#include <linux/slab.h> 12#include <linux/slab.h>
@@ -19,6 +18,8 @@
19 18
20#include <asm/unaligned.h> 19#include <asm/unaligned.h>
21 20
21#include "netns.h"
22
22#define NLMDBG_FACILITY NLMDBG_MONITOR 23#define NLMDBG_FACILITY NLMDBG_MONITOR
23#define NSM_PROGRAM 100024 24#define NSM_PROGRAM 100024
24#define NSM_VERSION 1 25#define NSM_VERSION 1
@@ -40,6 +41,7 @@ struct nsm_args {
40 u32 proc; 41 u32 proc;
41 42
42 char *mon_name; 43 char *mon_name;
44 char *nodename;
43}; 45};
44 46
45struct nsm_res { 47struct nsm_res {
@@ -70,7 +72,7 @@ static struct rpc_clnt *nsm_create(struct net *net)
70 }; 72 };
71 struct rpc_create_args args = { 73 struct rpc_create_args args = {
72 .net = net, 74 .net = net,
73 .protocol = XPRT_TRANSPORT_UDP, 75 .protocol = XPRT_TRANSPORT_TCP,
74 .address = (struct sockaddr *)&sin, 76 .address = (struct sockaddr *)&sin,
75 .addrsize = sizeof(sin), 77 .addrsize = sizeof(sin),
76 .servername = "rpc.statd", 78 .servername = "rpc.statd",
@@ -83,10 +85,54 @@ static struct rpc_clnt *nsm_create(struct net *net)
83 return rpc_create(&args); 85 return rpc_create(&args);
84} 86}
85 87
86static int nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res, 88static struct rpc_clnt *nsm_client_get(struct net *net)
87 struct net *net)
88{ 89{
90 static DEFINE_MUTEX(nsm_create_mutex);
89 struct rpc_clnt *clnt; 91 struct rpc_clnt *clnt;
92 struct lockd_net *ln = net_generic(net, lockd_net_id);
93
94 spin_lock(&ln->nsm_clnt_lock);
95 if (ln->nsm_users) {
96 ln->nsm_users++;
97 clnt = ln->nsm_clnt;
98 spin_unlock(&ln->nsm_clnt_lock);
99 goto out;
100 }
101 spin_unlock(&ln->nsm_clnt_lock);
102
103 mutex_lock(&nsm_create_mutex);
104 clnt = nsm_create(net);
105 if (!IS_ERR(clnt)) {
106 ln->nsm_clnt = clnt;
107 smp_wmb();
108 ln->nsm_users = 1;
109 }
110 mutex_unlock(&nsm_create_mutex);
111out:
112 return clnt;
113}
114
115static void nsm_client_put(struct net *net)
116{
117 struct lockd_net *ln = net_generic(net, lockd_net_id);
118 struct rpc_clnt *clnt = ln->nsm_clnt;
119 int shutdown = 0;
120
121 spin_lock(&ln->nsm_clnt_lock);
122 if (ln->nsm_users) {
123 if (--ln->nsm_users)
124 ln->nsm_clnt = NULL;
125 shutdown = !ln->nsm_users;
126 }
127 spin_unlock(&ln->nsm_clnt_lock);
128
129 if (shutdown)
130 rpc_shutdown_client(clnt);
131}
132
133static int nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res,
134 struct rpc_clnt *clnt)
135{
90 int status; 136 int status;
91 struct nsm_args args = { 137 struct nsm_args args = {
92 .priv = &nsm->sm_priv, 138 .priv = &nsm->sm_priv,
@@ -94,31 +140,24 @@ static int nsm_mon_unmon(struct nsm_handle *nsm, u32 proc, struct nsm_res *res,
94 .vers = 3, 140 .vers = 3,
95 .proc = NLMPROC_NSM_NOTIFY, 141 .proc = NLMPROC_NSM_NOTIFY,
96 .mon_name = nsm->sm_mon_name, 142 .mon_name = nsm->sm_mon_name,
143 .nodename = clnt->cl_nodename,
97 }; 144 };
98 struct rpc_message msg = { 145 struct rpc_message msg = {
99 .rpc_argp = &args, 146 .rpc_argp = &args,
100 .rpc_resp = res, 147 .rpc_resp = res,
101 }; 148 };
102 149
103 clnt = nsm_create(net); 150 BUG_ON(clnt == NULL);
104 if (IS_ERR(clnt)) {
105 status = PTR_ERR(clnt);
106 dprintk("lockd: failed to create NSM upcall transport, "
107 "status=%d\n", status);
108 goto out;
109 }
110 151
111 memset(res, 0, sizeof(*res)); 152 memset(res, 0, sizeof(*res));
112 153
113 msg.rpc_proc = &clnt->cl_procinfo[proc]; 154 msg.rpc_proc = &clnt->cl_procinfo[proc];
114 status = rpc_call_sync(clnt, &msg, 0); 155 status = rpc_call_sync(clnt, &msg, RPC_TASK_SOFTCONN);
115 if (status < 0) 156 if (status < 0)
116 dprintk("lockd: NSM upcall RPC failed, status=%d\n", 157 dprintk("lockd: NSM upcall RPC failed, status=%d\n",
117 status); 158 status);
118 else 159 else
119 status = 0; 160 status = 0;
120 rpc_shutdown_client(clnt);
121 out:
122 return status; 161 return status;
123} 162}
124 163
@@ -138,6 +177,7 @@ int nsm_monitor(const struct nlm_host *host)
138 struct nsm_handle *nsm = host->h_nsmhandle; 177 struct nsm_handle *nsm = host->h_nsmhandle;
139 struct nsm_res res; 178 struct nsm_res res;
140 int status; 179 int status;
180 struct rpc_clnt *clnt;
141 181
142 dprintk("lockd: nsm_monitor(%s)\n", nsm->sm_name); 182 dprintk("lockd: nsm_monitor(%s)\n", nsm->sm_name);
143 183
@@ -150,7 +190,15 @@ int nsm_monitor(const struct nlm_host *host)
150 */ 190 */
151 nsm->sm_mon_name = nsm_use_hostnames ? nsm->sm_name : nsm->sm_addrbuf; 191 nsm->sm_mon_name = nsm_use_hostnames ? nsm->sm_name : nsm->sm_addrbuf;
152 192
153 status = nsm_mon_unmon(nsm, NSMPROC_MON, &res, host->net); 193 clnt = nsm_client_get(host->net);
194 if (IS_ERR(clnt)) {
195 status = PTR_ERR(clnt);
196 dprintk("lockd: failed to create NSM upcall transport, "
197 "status=%d, net=%p\n", status, host->net);
198 return status;
199 }
200
201 status = nsm_mon_unmon(nsm, NSMPROC_MON, &res, clnt);
154 if (unlikely(res.status != 0)) 202 if (unlikely(res.status != 0))
155 status = -EIO; 203 status = -EIO;
156 if (unlikely(status < 0)) { 204 if (unlikely(status < 0)) {
@@ -182,9 +230,11 @@ void nsm_unmonitor(const struct nlm_host *host)
182 230
183 if (atomic_read(&nsm->sm_count) == 1 231 if (atomic_read(&nsm->sm_count) == 1
184 && nsm->sm_monitored && !nsm->sm_sticky) { 232 && nsm->sm_monitored && !nsm->sm_sticky) {
233 struct lockd_net *ln = net_generic(host->net, lockd_net_id);
234
185 dprintk("lockd: nsm_unmonitor(%s)\n", nsm->sm_name); 235 dprintk("lockd: nsm_unmonitor(%s)\n", nsm->sm_name);
186 236
187 status = nsm_mon_unmon(nsm, NSMPROC_UNMON, &res, host->net); 237 status = nsm_mon_unmon(nsm, NSMPROC_UNMON, &res, ln->nsm_clnt);
188 if (res.status != 0) 238 if (res.status != 0)
189 status = -EIO; 239 status = -EIO;
190 if (status < 0) 240 if (status < 0)
@@ -192,6 +242,8 @@ void nsm_unmonitor(const struct nlm_host *host)
192 nsm->sm_name); 242 nsm->sm_name);
193 else 243 else
194 nsm->sm_monitored = 0; 244 nsm->sm_monitored = 0;
245
246 nsm_client_put(host->net);
195 } 247 }
196} 248}
197 249
@@ -430,7 +482,7 @@ static void encode_my_id(struct xdr_stream *xdr, const struct nsm_args *argp)
430{ 482{
431 __be32 *p; 483 __be32 *p;
432 484
433 encode_nsm_string(xdr, utsname()->nodename); 485 encode_nsm_string(xdr, argp->nodename);
434 p = xdr_reserve_space(xdr, 4 + 4 + 4); 486 p = xdr_reserve_space(xdr, 4 + 4 + 4);
435 *p++ = cpu_to_be32(argp->prog); 487 *p++ = cpu_to_be32(argp->prog);
436 *p++ = cpu_to_be32(argp->vers); 488 *p++ = cpu_to_be32(argp->vers);
diff --git a/fs/lockd/netns.h b/fs/lockd/netns.h
index 4eee248ba96e..5010b55628b4 100644
--- a/fs/lockd/netns.h
+++ b/fs/lockd/netns.h
@@ -12,6 +12,10 @@ struct lockd_net {
12 struct delayed_work grace_period_end; 12 struct delayed_work grace_period_end;
13 struct lock_manager lockd_manager; 13 struct lock_manager lockd_manager;
14 struct list_head grace_list; 14 struct list_head grace_list;
15
16 spinlock_t nsm_clnt_lock;
17 unsigned int nsm_users;
18 struct rpc_clnt *nsm_clnt;
15}; 19};
16 20
17extern int lockd_net_id; 21extern int lockd_net_id;
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 31a63f87b806..7e355870d519 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -596,6 +596,7 @@ static int lockd_init_net(struct net *net)
596 596
597 INIT_DELAYED_WORK(&ln->grace_period_end, grace_ender); 597 INIT_DELAYED_WORK(&ln->grace_period_end, grace_ender);
598 INIT_LIST_HEAD(&ln->grace_list); 598 INIT_LIST_HEAD(&ln->grace_list);
599 spin_lock_init(&ln->nsm_clnt_lock);
599 return 0; 600 return 0;
600} 601}
601 602
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig
index db7ad719628a..13ca196385f5 100644
--- a/fs/nfs/Kconfig
+++ b/fs/nfs/Kconfig
@@ -95,8 +95,8 @@ config NFS_SWAP
95 This option enables swapon to work on files located on NFS mounts. 95 This option enables swapon to work on files located on NFS mounts.
96 96
97config NFS_V4_1 97config NFS_V4_1
98 bool "NFS client support for NFSv4.1 (EXPERIMENTAL)" 98 bool "NFS client support for NFSv4.1"
99 depends on NFS_V4 && EXPERIMENTAL 99 depends on NFS_V4
100 select SUNRPC_BACKCHANNEL 100 select SUNRPC_BACKCHANNEL
101 help 101 help
102 This option enables support for minor version 1 of the NFSv4 protocol 102 This option enables support for minor version 1 of the NFSv4 protocol
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index dd392ed5f2e2..f1027b06a1a9 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -37,6 +37,7 @@
37#include <linux/bio.h> /* struct bio */ 37#include <linux/bio.h> /* struct bio */
38#include <linux/buffer_head.h> /* various write calls */ 38#include <linux/buffer_head.h> /* various write calls */
39#include <linux/prefetch.h> 39#include <linux/prefetch.h>
40#include <linux/pagevec.h>
40 41
41#include "../pnfs.h" 42#include "../pnfs.h"
42#include "../internal.h" 43#include "../internal.h"
@@ -162,25 +163,39 @@ static struct bio *bl_alloc_init_bio(int npg, sector_t isect,
162 return bio; 163 return bio;
163} 164}
164 165
165static struct bio *bl_add_page_to_bio(struct bio *bio, int npg, int rw, 166static struct bio *do_add_page_to_bio(struct bio *bio, int npg, int rw,
166 sector_t isect, struct page *page, 167 sector_t isect, struct page *page,
167 struct pnfs_block_extent *be, 168 struct pnfs_block_extent *be,
168 void (*end_io)(struct bio *, int err), 169 void (*end_io)(struct bio *, int err),
169 struct parallel_io *par) 170 struct parallel_io *par,
171 unsigned int offset, int len)
170{ 172{
173 isect = isect + (offset >> SECTOR_SHIFT);
174 dprintk("%s: npg %d rw %d isect %llu offset %u len %d\n", __func__,
175 npg, rw, (unsigned long long)isect, offset, len);
171retry: 176retry:
172 if (!bio) { 177 if (!bio) {
173 bio = bl_alloc_init_bio(npg, isect, be, end_io, par); 178 bio = bl_alloc_init_bio(npg, isect, be, end_io, par);
174 if (!bio) 179 if (!bio)
175 return ERR_PTR(-ENOMEM); 180 return ERR_PTR(-ENOMEM);
176 } 181 }
177 if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) { 182 if (bio_add_page(bio, page, len, offset) < len) {
178 bio = bl_submit_bio(rw, bio); 183 bio = bl_submit_bio(rw, bio);
179 goto retry; 184 goto retry;
180 } 185 }
181 return bio; 186 return bio;
182} 187}
183 188
189static struct bio *bl_add_page_to_bio(struct bio *bio, int npg, int rw,
190 sector_t isect, struct page *page,
191 struct pnfs_block_extent *be,
192 void (*end_io)(struct bio *, int err),
193 struct parallel_io *par)
194{
195 return do_add_page_to_bio(bio, npg, rw, isect, page, be,
196 end_io, par, 0, PAGE_CACHE_SIZE);
197}
198
184/* This is basically copied from mpage_end_io_read */ 199/* This is basically copied from mpage_end_io_read */
185static void bl_end_io_read(struct bio *bio, int err) 200static void bl_end_io_read(struct bio *bio, int err)
186{ 201{
@@ -228,14 +243,6 @@ bl_end_par_io_read(void *data, int unused)
228 schedule_work(&rdata->task.u.tk_work); 243 schedule_work(&rdata->task.u.tk_work);
229} 244}
230 245
231static bool
232bl_check_alignment(u64 offset, u32 len, unsigned long blkmask)
233{
234 if ((offset & blkmask) || (len & blkmask))
235 return false;
236 return true;
237}
238
239static enum pnfs_try_status 246static enum pnfs_try_status
240bl_read_pagelist(struct nfs_read_data *rdata) 247bl_read_pagelist(struct nfs_read_data *rdata)
241{ 248{
@@ -246,15 +253,15 @@ bl_read_pagelist(struct nfs_read_data *rdata)
246 sector_t isect, extent_length = 0; 253 sector_t isect, extent_length = 0;
247 struct parallel_io *par; 254 struct parallel_io *par;
248 loff_t f_offset = rdata->args.offset; 255 loff_t f_offset = rdata->args.offset;
256 size_t bytes_left = rdata->args.count;
257 unsigned int pg_offset, pg_len;
249 struct page **pages = rdata->args.pages; 258 struct page **pages = rdata->args.pages;
250 int pg_index = rdata->args.pgbase >> PAGE_CACHE_SHIFT; 259 int pg_index = rdata->args.pgbase >> PAGE_CACHE_SHIFT;
260 const bool is_dio = (header->dreq != NULL);
251 261
252 dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__, 262 dprintk("%s enter nr_pages %u offset %lld count %u\n", __func__,
253 rdata->pages.npages, f_offset, (unsigned int)rdata->args.count); 263 rdata->pages.npages, f_offset, (unsigned int)rdata->args.count);
254 264
255 if (!bl_check_alignment(f_offset, rdata->args.count, PAGE_CACHE_MASK))
256 goto use_mds;
257
258 par = alloc_parallel(rdata); 265 par = alloc_parallel(rdata);
259 if (!par) 266 if (!par)
260 goto use_mds; 267 goto use_mds;
@@ -284,36 +291,53 @@ bl_read_pagelist(struct nfs_read_data *rdata)
284 extent_length = min(extent_length, cow_length); 291 extent_length = min(extent_length, cow_length);
285 } 292 }
286 } 293 }
294
295 if (is_dio) {
296 pg_offset = f_offset & ~PAGE_CACHE_MASK;
297 if (pg_offset + bytes_left > PAGE_CACHE_SIZE)
298 pg_len = PAGE_CACHE_SIZE - pg_offset;
299 else
300 pg_len = bytes_left;
301
302 f_offset += pg_len;
303 bytes_left -= pg_len;
304 isect += (pg_offset >> SECTOR_SHIFT);
305 } else {
306 pg_offset = 0;
307 pg_len = PAGE_CACHE_SIZE;
308 }
309
287 hole = is_hole(be, isect); 310 hole = is_hole(be, isect);
288 if (hole && !cow_read) { 311 if (hole && !cow_read) {
289 bio = bl_submit_bio(READ, bio); 312 bio = bl_submit_bio(READ, bio);
290 /* Fill hole w/ zeroes w/o accessing device */ 313 /* Fill hole w/ zeroes w/o accessing device */
291 dprintk("%s Zeroing page for hole\n", __func__); 314 dprintk("%s Zeroing page for hole\n", __func__);
292 zero_user_segment(pages[i], 0, PAGE_CACHE_SIZE); 315 zero_user_segment(pages[i], pg_offset, pg_len);
293 print_page(pages[i]); 316 print_page(pages[i]);
294 SetPageUptodate(pages[i]); 317 SetPageUptodate(pages[i]);
295 } else { 318 } else {
296 struct pnfs_block_extent *be_read; 319 struct pnfs_block_extent *be_read;
297 320
298 be_read = (hole && cow_read) ? cow_read : be; 321 be_read = (hole && cow_read) ? cow_read : be;
299 bio = bl_add_page_to_bio(bio, rdata->pages.npages - i, 322 bio = do_add_page_to_bio(bio, rdata->pages.npages - i,
300 READ, 323 READ,
301 isect, pages[i], be_read, 324 isect, pages[i], be_read,
302 bl_end_io_read, par); 325 bl_end_io_read, par,
326 pg_offset, pg_len);
303 if (IS_ERR(bio)) { 327 if (IS_ERR(bio)) {
304 header->pnfs_error = PTR_ERR(bio); 328 header->pnfs_error = PTR_ERR(bio);
305 bio = NULL; 329 bio = NULL;
306 goto out; 330 goto out;
307 } 331 }
308 } 332 }
309 isect += PAGE_CACHE_SECTORS; 333 isect += (pg_len >> SECTOR_SHIFT);
310 extent_length -= PAGE_CACHE_SECTORS; 334 extent_length -= PAGE_CACHE_SECTORS;
311 } 335 }
312 if ((isect << SECTOR_SHIFT) >= header->inode->i_size) { 336 if ((isect << SECTOR_SHIFT) >= header->inode->i_size) {
313 rdata->res.eof = 1; 337 rdata->res.eof = 1;
314 rdata->res.count = header->inode->i_size - f_offset; 338 rdata->res.count = header->inode->i_size - rdata->args.offset;
315 } else { 339 } else {
316 rdata->res.count = (isect << SECTOR_SHIFT) - f_offset; 340 rdata->res.count = (isect << SECTOR_SHIFT) - rdata->args.offset;
317 } 341 }
318out: 342out:
319 bl_put_extent(be); 343 bl_put_extent(be);
@@ -461,6 +485,106 @@ map_block(struct buffer_head *bh, sector_t isect, struct pnfs_block_extent *be)
461 return; 485 return;
462} 486}
463 487
488static void
489bl_read_single_end_io(struct bio *bio, int error)
490{
491 struct bio_vec *bvec = bio->bi_io_vec + bio->bi_vcnt - 1;
492 struct page *page = bvec->bv_page;
493
494 /* Only one page in bvec */
495 unlock_page(page);
496}
497
498static int
499bl_do_readpage_sync(struct page *page, struct pnfs_block_extent *be,
500 unsigned int offset, unsigned int len)
501{
502 struct bio *bio;
503 struct page *shadow_page;
504 sector_t isect;
505 char *kaddr, *kshadow_addr;
506 int ret = 0;
507
508 dprintk("%s: offset %u len %u\n", __func__, offset, len);
509
510 shadow_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
511 if (shadow_page == NULL)
512 return -ENOMEM;
513
514 bio = bio_alloc(GFP_NOIO, 1);
515 if (bio == NULL)
516 return -ENOMEM;
517
518 isect = (page->index << PAGE_CACHE_SECTOR_SHIFT) +
519 (offset / SECTOR_SIZE);
520
521 bio->bi_sector = isect - be->be_f_offset + be->be_v_offset;
522 bio->bi_bdev = be->be_mdev;
523 bio->bi_end_io = bl_read_single_end_io;
524
525 lock_page(shadow_page);
526 if (bio_add_page(bio, shadow_page,
527 SECTOR_SIZE, round_down(offset, SECTOR_SIZE)) == 0) {
528 unlock_page(shadow_page);
529 bio_put(bio);
530 return -EIO;
531 }
532
533 submit_bio(READ, bio);
534 wait_on_page_locked(shadow_page);
535 if (unlikely(!test_bit(BIO_UPTODATE, &bio->bi_flags))) {
536 ret = -EIO;
537 } else {
538 kaddr = kmap_atomic(page);
539 kshadow_addr = kmap_atomic(shadow_page);
540 memcpy(kaddr + offset, kshadow_addr + offset, len);
541 kunmap_atomic(kshadow_addr);
542 kunmap_atomic(kaddr);
543 }
544 __free_page(shadow_page);
545 bio_put(bio);
546
547 return ret;
548}
549
550static int
551bl_read_partial_page_sync(struct page *page, struct pnfs_block_extent *be,
552 unsigned int dirty_offset, unsigned int dirty_len,
553 bool full_page)
554{
555 int ret = 0;
556 unsigned int start, end;
557
558 if (full_page) {
559 start = 0;
560 end = PAGE_CACHE_SIZE;
561 } else {
562 start = round_down(dirty_offset, SECTOR_SIZE);
563 end = round_up(dirty_offset + dirty_len, SECTOR_SIZE);
564 }
565
566 dprintk("%s: offset %u len %d\n", __func__, dirty_offset, dirty_len);
567 if (!be) {
568 zero_user_segments(page, start, dirty_offset,
569 dirty_offset + dirty_len, end);
570 if (start == 0 && end == PAGE_CACHE_SIZE &&
571 trylock_page(page)) {
572 SetPageUptodate(page);
573 unlock_page(page);
574 }
575 return ret;
576 }
577
578 if (start != dirty_offset)
579 ret = bl_do_readpage_sync(page, be, start, dirty_offset - start);
580
581 if (!ret && (dirty_offset + dirty_len < end))
582 ret = bl_do_readpage_sync(page, be, dirty_offset + dirty_len,
583 end - dirty_offset - dirty_len);
584
585 return ret;
586}
587
464/* Given an unmapped page, zero it or read in page for COW, page is locked 588/* Given an unmapped page, zero it or read in page for COW, page is locked
465 * by caller. 589 * by caller.
466 */ 590 */
@@ -494,7 +618,6 @@ init_page_for_write(struct page *page, struct pnfs_block_extent *cow_read)
494 SetPageUptodate(page); 618 SetPageUptodate(page);
495 619
496cleanup: 620cleanup:
497 bl_put_extent(cow_read);
498 if (bh) 621 if (bh)
499 free_buffer_head(bh); 622 free_buffer_head(bh);
500 if (ret) { 623 if (ret) {
@@ -566,6 +689,7 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync)
566 struct parallel_io *par = NULL; 689 struct parallel_io *par = NULL;
567 loff_t offset = wdata->args.offset; 690 loff_t offset = wdata->args.offset;
568 size_t count = wdata->args.count; 691 size_t count = wdata->args.count;
692 unsigned int pg_offset, pg_len, saved_len;
569 struct page **pages = wdata->args.pages; 693 struct page **pages = wdata->args.pages;
570 struct page *page; 694 struct page *page;
571 pgoff_t index; 695 pgoff_t index;
@@ -574,10 +698,13 @@ bl_write_pagelist(struct nfs_write_data *wdata, int sync)
574 NFS_SERVER(header->inode)->pnfs_blksize >> PAGE_CACHE_SHIFT; 698 NFS_SERVER(header->inode)->pnfs_blksize >> PAGE_CACHE_SHIFT;
575 699
576 dprintk("%s enter, %Zu@%lld\n", __func__, count, offset); 700 dprintk("%s enter, %Zu@%lld\n", __func__, count, offset);
577 /* Check for alignment first */
578 if (!bl_check_alignment(offset, count, PAGE_CACHE_MASK))
579 goto out_mds;
580 701
702 if (header->dreq != NULL &&
703 (!IS_ALIGNED(offset, NFS_SERVER(header->inode)->pnfs_blksize) ||
704 !IS_ALIGNED(count, NFS_SERVER(header->inode)->pnfs_blksize))) {
705 dprintk("pnfsblock nonblock aligned DIO writes. Resend MDS\n");
706 goto out_mds;
707 }
581 /* At this point, wdata->pages is a (sequential) list of nfs_pages. 708 /* At this point, wdata->pages is a (sequential) list of nfs_pages.
582 * We want to write each, and if there is an error set pnfs_error 709 * We want to write each, and if there is an error set pnfs_error
583 * to have it redone using nfs. 710 * to have it redone using nfs.
@@ -674,10 +801,11 @@ next_page:
674 if (!extent_length) { 801 if (!extent_length) {
675 /* We've used up the previous extent */ 802 /* We've used up the previous extent */
676 bl_put_extent(be); 803 bl_put_extent(be);
804 bl_put_extent(cow_read);
677 bio = bl_submit_bio(WRITE, bio); 805 bio = bl_submit_bio(WRITE, bio);
678 /* Get the next one */ 806 /* Get the next one */
679 be = bl_find_get_extent(BLK_LSEG2EXT(header->lseg), 807 be = bl_find_get_extent(BLK_LSEG2EXT(header->lseg),
680 isect, NULL); 808 isect, &cow_read);
681 if (!be || !is_writable(be, isect)) { 809 if (!be || !is_writable(be, isect)) {
682 header->pnfs_error = -EINVAL; 810 header->pnfs_error = -EINVAL;
683 goto out; 811 goto out;
@@ -694,7 +822,26 @@ next_page:
694 extent_length = be->be_length - 822 extent_length = be->be_length -
695 (isect - be->be_f_offset); 823 (isect - be->be_f_offset);
696 } 824 }
697 if (be->be_state == PNFS_BLOCK_INVALID_DATA) { 825
826 dprintk("%s offset %lld count %Zu\n", __func__, offset, count);
827 pg_offset = offset & ~PAGE_CACHE_MASK;
828 if (pg_offset + count > PAGE_CACHE_SIZE)
829 pg_len = PAGE_CACHE_SIZE - pg_offset;
830 else
831 pg_len = count;
832
833 saved_len = pg_len;
834 if (be->be_state == PNFS_BLOCK_INVALID_DATA &&
835 !bl_is_sector_init(be->be_inval, isect)) {
836 ret = bl_read_partial_page_sync(pages[i], cow_read,
837 pg_offset, pg_len, true);
838 if (ret) {
839 dprintk("%s bl_read_partial_page_sync fail %d\n",
840 __func__, ret);
841 header->pnfs_error = ret;
842 goto out;
843 }
844
698 ret = bl_mark_sectors_init(be->be_inval, isect, 845 ret = bl_mark_sectors_init(be->be_inval, isect,
699 PAGE_CACHE_SECTORS); 846 PAGE_CACHE_SECTORS);
700 if (unlikely(ret)) { 847 if (unlikely(ret)) {
@@ -703,15 +850,35 @@ next_page:
703 header->pnfs_error = ret; 850 header->pnfs_error = ret;
704 goto out; 851 goto out;
705 } 852 }
853
854 /* Expand to full page write */
855 pg_offset = 0;
856 pg_len = PAGE_CACHE_SIZE;
857 } else if ((pg_offset & (SECTOR_SIZE - 1)) ||
858 (pg_len & (SECTOR_SIZE - 1))){
859 /* ahh, nasty case. We have to do sync full sector
860 * read-modify-write cycles.
861 */
862 unsigned int saved_offset = pg_offset;
863 ret = bl_read_partial_page_sync(pages[i], be, pg_offset,
864 pg_len, false);
865 pg_offset = round_down(pg_offset, SECTOR_SIZE);
866 pg_len = round_up(saved_offset + pg_len, SECTOR_SIZE)
867 - pg_offset;
706 } 868 }
707 bio = bl_add_page_to_bio(bio, wdata->pages.npages - i, WRITE, 869
870
871 bio = do_add_page_to_bio(bio, wdata->pages.npages - i, WRITE,
708 isect, pages[i], be, 872 isect, pages[i], be,
709 bl_end_io_write, par); 873 bl_end_io_write, par,
874 pg_offset, pg_len);
710 if (IS_ERR(bio)) { 875 if (IS_ERR(bio)) {
711 header->pnfs_error = PTR_ERR(bio); 876 header->pnfs_error = PTR_ERR(bio);
712 bio = NULL; 877 bio = NULL;
713 goto out; 878 goto out;
714 } 879 }
880 offset += saved_len;
881 count -= saved_len;
715 isect += PAGE_CACHE_SECTORS; 882 isect += PAGE_CACHE_SECTORS;
716 last_isect = isect; 883 last_isect = isect;
717 extent_length -= PAGE_CACHE_SECTORS; 884 extent_length -= PAGE_CACHE_SECTORS;
@@ -729,17 +896,16 @@ next_page:
729 } 896 }
730 897
731write_done: 898write_done:
732 wdata->res.count = (last_isect << SECTOR_SHIFT) - (offset); 899 wdata->res.count = wdata->args.count;
733 if (count < wdata->res.count) {
734 wdata->res.count = count;
735 }
736out: 900out:
737 bl_put_extent(be); 901 bl_put_extent(be);
902 bl_put_extent(cow_read);
738 bl_submit_bio(WRITE, bio); 903 bl_submit_bio(WRITE, bio);
739 put_parallel(par); 904 put_parallel(par);
740 return PNFS_ATTEMPTED; 905 return PNFS_ATTEMPTED;
741out_mds: 906out_mds:
742 bl_put_extent(be); 907 bl_put_extent(be);
908 bl_put_extent(cow_read);
743 kfree(par); 909 kfree(par);
744 return PNFS_NOT_ATTEMPTED; 910 return PNFS_NOT_ATTEMPTED;
745} 911}
@@ -874,7 +1040,7 @@ static void free_blk_mountid(struct block_mount_id *mid)
874 } 1040 }
875} 1041}
876 1042
877/* This is mostly copied from the filelayout's get_device_info function. 1043/* This is mostly copied from the filelayout_get_device_info function.
878 * It seems much of this should be at the generic pnfs level. 1044 * It seems much of this should be at the generic pnfs level.
879 */ 1045 */
880static struct pnfs_block_dev * 1046static struct pnfs_block_dev *
@@ -1011,33 +1177,95 @@ bl_clear_layoutdriver(struct nfs_server *server)
1011 return 0; 1177 return 0;
1012} 1178}
1013 1179
1180static bool
1181is_aligned_req(struct nfs_page *req, unsigned int alignment)
1182{
1183 return IS_ALIGNED(req->wb_offset, alignment) &&
1184 IS_ALIGNED(req->wb_bytes, alignment);
1185}
1186
1014static void 1187static void
1015bl_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) 1188bl_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
1016{ 1189{
1017 if (!bl_check_alignment(req->wb_offset, req->wb_bytes, PAGE_CACHE_MASK)) 1190 if (pgio->pg_dreq != NULL &&
1191 !is_aligned_req(req, SECTOR_SIZE))
1018 nfs_pageio_reset_read_mds(pgio); 1192 nfs_pageio_reset_read_mds(pgio);
1019 else 1193 else
1020 pnfs_generic_pg_init_read(pgio, req); 1194 pnfs_generic_pg_init_read(pgio, req);
1021} 1195}
1022 1196
1197static bool
1198bl_pg_test_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
1199 struct nfs_page *req)
1200{
1201 if (pgio->pg_dreq != NULL &&
1202 !is_aligned_req(req, SECTOR_SIZE))
1203 return false;
1204
1205 return pnfs_generic_pg_test(pgio, prev, req);
1206}
1207
1208/*
1209 * Return the number of contiguous bytes for a given inode
1210 * starting at page frame idx.
1211 */
1212static u64 pnfs_num_cont_bytes(struct inode *inode, pgoff_t idx)
1213{
1214 struct address_space *mapping = inode->i_mapping;
1215 pgoff_t end;
1216
1217 /* Optimize common case that writes from 0 to end of file */
1218 end = DIV_ROUND_UP(i_size_read(inode), PAGE_CACHE_SIZE);
1219 if (end != NFS_I(inode)->npages) {
1220 rcu_read_lock();
1221 end = radix_tree_next_hole(&mapping->page_tree, idx + 1, ULONG_MAX);
1222 rcu_read_unlock();
1223 }
1224
1225 if (!end)
1226 return i_size_read(inode) - (idx << PAGE_CACHE_SHIFT);
1227 else
1228 return (end - idx) << PAGE_CACHE_SHIFT;
1229}
1230
1023static void 1231static void
1024bl_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) 1232bl_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
1025{ 1233{
1026 if (!bl_check_alignment(req->wb_offset, req->wb_bytes, PAGE_CACHE_MASK)) 1234 if (pgio->pg_dreq != NULL &&
1235 !is_aligned_req(req, PAGE_CACHE_SIZE)) {
1027 nfs_pageio_reset_write_mds(pgio); 1236 nfs_pageio_reset_write_mds(pgio);
1028 else 1237 } else {
1029 pnfs_generic_pg_init_write(pgio, req); 1238 u64 wb_size;
1239 if (pgio->pg_dreq == NULL)
1240 wb_size = pnfs_num_cont_bytes(pgio->pg_inode,
1241 req->wb_index);
1242 else
1243 wb_size = nfs_dreq_bytes_left(pgio->pg_dreq);
1244
1245 pnfs_generic_pg_init_write(pgio, req, wb_size);
1246 }
1247}
1248
1249static bool
1250bl_pg_test_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
1251 struct nfs_page *req)
1252{
1253 if (pgio->pg_dreq != NULL &&
1254 !is_aligned_req(req, PAGE_CACHE_SIZE))
1255 return false;
1256
1257 return pnfs_generic_pg_test(pgio, prev, req);
1030} 1258}
1031 1259
1032static const struct nfs_pageio_ops bl_pg_read_ops = { 1260static const struct nfs_pageio_ops bl_pg_read_ops = {
1033 .pg_init = bl_pg_init_read, 1261 .pg_init = bl_pg_init_read,
1034 .pg_test = pnfs_generic_pg_test, 1262 .pg_test = bl_pg_test_read,
1035 .pg_doio = pnfs_generic_pg_readpages, 1263 .pg_doio = pnfs_generic_pg_readpages,
1036}; 1264};
1037 1265
1038static const struct nfs_pageio_ops bl_pg_write_ops = { 1266static const struct nfs_pageio_ops bl_pg_write_ops = {
1039 .pg_init = bl_pg_init_write, 1267 .pg_init = bl_pg_init_write,
1040 .pg_test = pnfs_generic_pg_test, 1268 .pg_test = bl_pg_test_write,
1041 .pg_doio = pnfs_generic_pg_writepages, 1269 .pg_doio = pnfs_generic_pg_writepages,
1042}; 1270};
1043 1271
diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h
index 03350690118e..f4891bde8851 100644
--- a/fs/nfs/blocklayout/blocklayout.h
+++ b/fs/nfs/blocklayout/blocklayout.h
@@ -41,6 +41,7 @@
41 41
42#define PAGE_CACHE_SECTORS (PAGE_CACHE_SIZE >> SECTOR_SHIFT) 42#define PAGE_CACHE_SECTORS (PAGE_CACHE_SIZE >> SECTOR_SHIFT)
43#define PAGE_CACHE_SECTOR_SHIFT (PAGE_CACHE_SHIFT - SECTOR_SHIFT) 43#define PAGE_CACHE_SECTOR_SHIFT (PAGE_CACHE_SHIFT - SECTOR_SHIFT)
44#define SECTOR_SIZE (1 << SECTOR_SHIFT)
44 45
45struct block_mount_id { 46struct block_mount_id {
46 spinlock_t bm_lock; /* protects list */ 47 spinlock_t bm_lock; /* protects list */
@@ -172,7 +173,6 @@ struct bl_msg_hdr {
172/* blocklayoutdev.c */ 173/* blocklayoutdev.c */
173ssize_t bl_pipe_downcall(struct file *, const char __user *, size_t); 174ssize_t bl_pipe_downcall(struct file *, const char __user *, size_t);
174void bl_pipe_destroy_msg(struct rpc_pipe_msg *); 175void bl_pipe_destroy_msg(struct rpc_pipe_msg *);
175struct block_device *nfs4_blkdev_get(dev_t dev);
176int nfs4_blkdev_put(struct block_device *bdev); 176int nfs4_blkdev_put(struct block_device *bdev);
177struct pnfs_block_dev *nfs4_blk_decode_device(struct nfs_server *server, 177struct pnfs_block_dev *nfs4_blk_decode_device(struct nfs_server *server,
178 struct pnfs_device *dev); 178 struct pnfs_device *dev);
diff --git a/fs/nfs/blocklayout/blocklayoutdev.c b/fs/nfs/blocklayout/blocklayoutdev.c
index c96554245ccf..a86c5bdad9e3 100644
--- a/fs/nfs/blocklayout/blocklayoutdev.c
+++ b/fs/nfs/blocklayout/blocklayoutdev.c
@@ -53,22 +53,6 @@ static int decode_sector_number(__be32 **rp, sector_t *sp)
53 return 0; 53 return 0;
54} 54}
55 55
56/* Open a block_device by device number. */
57struct block_device *nfs4_blkdev_get(dev_t dev)
58{
59 struct block_device *bd;
60
61 dprintk("%s enter\n", __func__);
62 bd = blkdev_get_by_dev(dev, FMODE_READ, NULL);
63 if (IS_ERR(bd))
64 goto fail;
65 return bd;
66fail:
67 dprintk("%s failed to open device : %ld\n",
68 __func__, PTR_ERR(bd));
69 return NULL;
70}
71
72/* 56/*
73 * Release the block device 57 * Release the block device
74 */ 58 */
@@ -172,11 +156,12 @@ nfs4_blk_decode_device(struct nfs_server *server,
172 goto out; 156 goto out;
173 } 157 }
174 158
175 bd = nfs4_blkdev_get(MKDEV(reply->major, reply->minor)); 159 bd = blkdev_get_by_dev(MKDEV(reply->major, reply->minor),
160 FMODE_READ, NULL);
176 if (IS_ERR(bd)) { 161 if (IS_ERR(bd)) {
177 rc = PTR_ERR(bd); 162 dprintk("%s failed to open device : %ld\n", __func__,
178 dprintk("%s failed to open device : %d\n", __func__, rc); 163 PTR_ERR(bd));
179 rv = ERR_PTR(rc); 164 rv = ERR_CAST(bd);
180 goto out; 165 goto out;
181 } 166 }
182 167
diff --git a/fs/nfs/blocklayout/extents.c b/fs/nfs/blocklayout/extents.c
index 1f9a6032796b..9c3e117c3ed1 100644
--- a/fs/nfs/blocklayout/extents.c
+++ b/fs/nfs/blocklayout/extents.c
@@ -683,8 +683,7 @@ encode_pnfs_block_layoutupdate(struct pnfs_block_layout *bl,
683 p = xdr_encode_hyper(p, lce->bse_length << SECTOR_SHIFT); 683 p = xdr_encode_hyper(p, lce->bse_length << SECTOR_SHIFT);
684 p = xdr_encode_hyper(p, 0LL); 684 p = xdr_encode_hyper(p, 0LL);
685 *p++ = cpu_to_be32(PNFS_BLOCK_READWRITE_DATA); 685 *p++ = cpu_to_be32(PNFS_BLOCK_READWRITE_DATA);
686 list_del(&lce->bse_node); 686 list_move_tail(&lce->bse_node, &bl->bl_committing);
687 list_add_tail(&lce->bse_node, &bl->bl_committing);
688 bl->bl_count--; 687 bl->bl_count--;
689 count++; 688 count++;
690 } 689 }
diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c
index 4c8459e5bdee..2245bef50f37 100644
--- a/fs/nfs/callback.c
+++ b/fs/nfs/callback.c
@@ -12,6 +12,7 @@
12#include <linux/sunrpc/svc.h> 12#include <linux/sunrpc/svc.h>
13#include <linux/sunrpc/svcsock.h> 13#include <linux/sunrpc/svcsock.h>
14#include <linux/nfs_fs.h> 14#include <linux/nfs_fs.h>
15#include <linux/errno.h>
15#include <linux/mutex.h> 16#include <linux/mutex.h>
16#include <linux/freezer.h> 17#include <linux/freezer.h>
17#include <linux/kthread.h> 18#include <linux/kthread.h>
@@ -23,6 +24,7 @@
23#include "nfs4_fs.h" 24#include "nfs4_fs.h"
24#include "callback.h" 25#include "callback.h"
25#include "internal.h" 26#include "internal.h"
27#include "netns.h"
26 28
27#define NFSDBG_FACILITY NFSDBG_CALLBACK 29#define NFSDBG_FACILITY NFSDBG_CALLBACK
28 30
@@ -37,7 +39,32 @@ static struct nfs_callback_data nfs_callback_info[NFS4_MAX_MINOR_VERSION + 1];
37static DEFINE_MUTEX(nfs_callback_mutex); 39static DEFINE_MUTEX(nfs_callback_mutex);
38static struct svc_program nfs4_callback_program; 40static struct svc_program nfs4_callback_program;
39 41
40unsigned short nfs_callback_tcpport6; 42static int nfs4_callback_up_net(struct svc_serv *serv, struct net *net)
43{
44 int ret;
45 struct nfs_net *nn = net_generic(net, nfs_net_id);
46
47 ret = svc_create_xprt(serv, "tcp", net, PF_INET,
48 nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS);
49 if (ret <= 0)
50 goto out_err;
51 nn->nfs_callback_tcpport = ret;
52 dprintk("NFS: Callback listener port = %u (af %u, net %p)\n",
53 nn->nfs_callback_tcpport, PF_INET, net);
54
55 ret = svc_create_xprt(serv, "tcp", net, PF_INET6,
56 nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS);
57 if (ret > 0) {
58 nn->nfs_callback_tcpport6 = ret;
59 dprintk("NFS: Callback listener port = %u (af %u, net %p)\n",
60 nn->nfs_callback_tcpport6, PF_INET6, net);
61 } else if (ret != -EAFNOSUPPORT)
62 goto out_err;
63 return 0;
64
65out_err:
66 return (ret) ? ret : -ENOMEM;
67}
41 68
42/* 69/*
43 * This is the NFSv4 callback kernel thread. 70 * This is the NFSv4 callback kernel thread.
@@ -78,38 +105,23 @@ nfs4_callback_svc(void *vrqstp)
78 * Prepare to bring up the NFSv4 callback service 105 * Prepare to bring up the NFSv4 callback service
79 */ 106 */
80static struct svc_rqst * 107static struct svc_rqst *
81nfs4_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt) 108nfs4_callback_up(struct svc_serv *serv)
82{ 109{
83 int ret;
84
85 ret = svc_create_xprt(serv, "tcp", &init_net, PF_INET,
86 nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS);
87 if (ret <= 0)
88 goto out_err;
89 nfs_callback_tcpport = ret;
90 dprintk("NFS: Callback listener port = %u (af %u)\n",
91 nfs_callback_tcpport, PF_INET);
92
93 ret = svc_create_xprt(serv, "tcp", &init_net, PF_INET6,
94 nfs_callback_set_tcpport, SVC_SOCK_ANONYMOUS);
95 if (ret > 0) {
96 nfs_callback_tcpport6 = ret;
97 dprintk("NFS: Callback listener port = %u (af %u)\n",
98 nfs_callback_tcpport6, PF_INET6);
99 } else if (ret == -EAFNOSUPPORT)
100 ret = 0;
101 else
102 goto out_err;
103
104 return svc_prepare_thread(serv, &serv->sv_pools[0], NUMA_NO_NODE); 110 return svc_prepare_thread(serv, &serv->sv_pools[0], NUMA_NO_NODE);
105
106out_err:
107 if (ret == 0)
108 ret = -ENOMEM;
109 return ERR_PTR(ret);
110} 111}
111 112
112#if defined(CONFIG_NFS_V4_1) 113#if defined(CONFIG_NFS_V4_1)
114static int nfs41_callback_up_net(struct svc_serv *serv, struct net *net)
115{
116 /*
117 * Create an svc_sock for the back channel service that shares the
118 * fore channel connection.
119 * Returns the input port (0) and sets the svc_serv bc_xprt on success
120 */
121 return svc_create_xprt(serv, "tcp-bc", net, PF_INET, 0,
122 SVC_SOCK_ANONYMOUS);
123}
124
113/* 125/*
114 * The callback service for NFSv4.1 callbacks 126 * The callback service for NFSv4.1 callbacks
115 */ 127 */
@@ -149,28 +161,9 @@ nfs41_callback_svc(void *vrqstp)
149 * Bring up the NFSv4.1 callback service 161 * Bring up the NFSv4.1 callback service
150 */ 162 */
151static struct svc_rqst * 163static struct svc_rqst *
152nfs41_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt) 164nfs41_callback_up(struct svc_serv *serv)
153{ 165{
154 struct svc_rqst *rqstp; 166 struct svc_rqst *rqstp;
155 int ret;
156
157 /*
158 * Create an svc_sock for the back channel service that shares the
159 * fore channel connection.
160 * Returns the input port (0) and sets the svc_serv bc_xprt on success
161 */
162 ret = svc_create_xprt(serv, "tcp-bc", &init_net, PF_INET, 0,
163 SVC_SOCK_ANONYMOUS);
164 if (ret < 0) {
165 rqstp = ERR_PTR(ret);
166 goto out;
167 }
168
169 /*
170 * Save the svc_serv in the transport so that it can
171 * be referenced when the session backchannel is initialized
172 */
173 xprt->bc_serv = serv;
174 167
175 INIT_LIST_HEAD(&serv->sv_cb_list); 168 INIT_LIST_HEAD(&serv->sv_cb_list);
176 spin_lock_init(&serv->sv_cb_lock); 169 spin_lock_init(&serv->sv_cb_lock);
@@ -180,90 +173,74 @@ nfs41_callback_up(struct svc_serv *serv, struct rpc_xprt *xprt)
180 svc_xprt_put(serv->sv_bc_xprt); 173 svc_xprt_put(serv->sv_bc_xprt);
181 serv->sv_bc_xprt = NULL; 174 serv->sv_bc_xprt = NULL;
182 } 175 }
183out:
184 dprintk("--> %s return %ld\n", __func__, 176 dprintk("--> %s return %ld\n", __func__,
185 IS_ERR(rqstp) ? PTR_ERR(rqstp) : 0); 177 IS_ERR(rqstp) ? PTR_ERR(rqstp) : 0);
186 return rqstp; 178 return rqstp;
187} 179}
188 180
189static inline int nfs_minorversion_callback_svc_setup(u32 minorversion, 181static void nfs_minorversion_callback_svc_setup(struct svc_serv *serv,
190 struct svc_serv *serv, struct rpc_xprt *xprt,
191 struct svc_rqst **rqstpp, int (**callback_svc)(void *vrqstp)) 182 struct svc_rqst **rqstpp, int (**callback_svc)(void *vrqstp))
192{ 183{
193 if (minorversion) { 184 *rqstpp = nfs41_callback_up(serv);
194 *rqstpp = nfs41_callback_up(serv, xprt); 185 *callback_svc = nfs41_callback_svc;
195 *callback_svc = nfs41_callback_svc;
196 }
197 return minorversion;
198} 186}
199 187
200static inline void nfs_callback_bc_serv(u32 minorversion, struct rpc_xprt *xprt, 188static inline void nfs_callback_bc_serv(u32 minorversion, struct rpc_xprt *xprt,
201 struct nfs_callback_data *cb_info) 189 struct svc_serv *serv)
202{ 190{
203 if (minorversion) 191 if (minorversion)
204 xprt->bc_serv = cb_info->serv; 192 /*
193 * Save the svc_serv in the transport so that it can
194 * be referenced when the session backchannel is initialized
195 */
196 xprt->bc_serv = serv;
205} 197}
206#else 198#else
207static inline int nfs_minorversion_callback_svc_setup(u32 minorversion, 199static int nfs41_callback_up_net(struct svc_serv *serv, struct net *net)
208 struct svc_serv *serv, struct rpc_xprt *xprt,
209 struct svc_rqst **rqstpp, int (**callback_svc)(void *vrqstp))
210{ 200{
211 return 0; 201 return 0;
212} 202}
213 203
204static void nfs_minorversion_callback_svc_setup(struct svc_serv *serv,
205 struct svc_rqst **rqstpp, int (**callback_svc)(void *vrqstp))
206{
207 *rqstpp = ERR_PTR(-ENOTSUPP);
208 *callback_svc = ERR_PTR(-ENOTSUPP);
209}
210
214static inline void nfs_callback_bc_serv(u32 minorversion, struct rpc_xprt *xprt, 211static inline void nfs_callback_bc_serv(u32 minorversion, struct rpc_xprt *xprt,
215 struct nfs_callback_data *cb_info) 212 struct svc_serv *serv)
216{ 213{
217} 214}
218#endif /* CONFIG_NFS_V4_1 */ 215#endif /* CONFIG_NFS_V4_1 */
219 216
220/* 217static int nfs_callback_start_svc(int minorversion, struct rpc_xprt *xprt,
221 * Bring up the callback thread if it is not already up. 218 struct svc_serv *serv)
222 */
223int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt)
224{ 219{
225 struct svc_serv *serv = NULL;
226 struct svc_rqst *rqstp; 220 struct svc_rqst *rqstp;
227 int (*callback_svc)(void *vrqstp); 221 int (*callback_svc)(void *vrqstp);
228 struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion]; 222 struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion];
229 char svc_name[12]; 223 char svc_name[12];
230 int ret = 0; 224 int ret;
231 int minorversion_setup;
232 struct net *net = &init_net;
233 225
234 mutex_lock(&nfs_callback_mutex); 226 nfs_callback_bc_serv(minorversion, xprt, serv);
235 if (cb_info->users++ || cb_info->task != NULL) {
236 nfs_callback_bc_serv(minorversion, xprt, cb_info);
237 goto out;
238 }
239 serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, NULL);
240 if (!serv) {
241 ret = -ENOMEM;
242 goto out_err;
243 }
244 /* As there is only one thread we need to over-ride the
245 * default maximum of 80 connections
246 */
247 serv->sv_maxconn = 1024;
248 227
249 ret = svc_bind(serv, net); 228 if (cb_info->task)
250 if (ret < 0) { 229 return 0;
251 printk(KERN_WARNING "NFS: bind callback service failed\n");
252 goto out_err;
253 }
254 230
255 minorversion_setup = nfs_minorversion_callback_svc_setup(minorversion, 231 switch (minorversion) {
256 serv, xprt, &rqstp, &callback_svc); 232 case 0:
257 if (!minorversion_setup) {
258 /* v4.0 callback setup */ 233 /* v4.0 callback setup */
259 rqstp = nfs4_callback_up(serv, xprt); 234 rqstp = nfs4_callback_up(serv);
260 callback_svc = nfs4_callback_svc; 235 callback_svc = nfs4_callback_svc;
236 break;
237 default:
238 nfs_minorversion_callback_svc_setup(serv,
239 &rqstp, &callback_svc);
261 } 240 }
262 241
263 if (IS_ERR(rqstp)) { 242 if (IS_ERR(rqstp))
264 ret = PTR_ERR(rqstp); 243 return PTR_ERR(rqstp);
265 goto out_err;
266 }
267 244
268 svc_sock_update_bufs(serv); 245 svc_sock_update_bufs(serv);
269 246
@@ -276,41 +253,165 @@ int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt)
276 svc_exit_thread(cb_info->rqst); 253 svc_exit_thread(cb_info->rqst);
277 cb_info->rqst = NULL; 254 cb_info->rqst = NULL;
278 cb_info->task = NULL; 255 cb_info->task = NULL;
279 goto out_err; 256 return PTR_ERR(cb_info->task);
257 }
258 dprintk("nfs_callback_up: service started\n");
259 return 0;
260}
261
262static void nfs_callback_down_net(u32 minorversion, struct svc_serv *serv, struct net *net)
263{
264 struct nfs_net *nn = net_generic(net, nfs_net_id);
265
266 if (--nn->cb_users[minorversion])
267 return;
268
269 dprintk("NFS: destroy per-net callback data; net=%p\n", net);
270 svc_shutdown_net(serv, net);
271}
272
273static int nfs_callback_up_net(int minorversion, struct svc_serv *serv, struct net *net)
274{
275 struct nfs_net *nn = net_generic(net, nfs_net_id);
276 int ret;
277
278 if (nn->cb_users[minorversion]++)
279 return 0;
280
281 dprintk("NFS: create per-net callback data; net=%p\n", net);
282
283 ret = svc_bind(serv, net);
284 if (ret < 0) {
285 printk(KERN_WARNING "NFS: bind callback service failed\n");
286 goto err_bind;
287 }
288
289 switch (minorversion) {
290 case 0:
291 ret = nfs4_callback_up_net(serv, net);
292 break;
293 case 1:
294 ret = nfs41_callback_up_net(serv, net);
295 break;
296 default:
297 printk(KERN_ERR "NFS: unknown callback version: %d\n",
298 minorversion);
299 ret = -EINVAL;
300 break;
280 } 301 }
281out: 302
303 if (ret < 0) {
304 printk(KERN_ERR "NFS: callback service start failed\n");
305 goto err_socks;
306 }
307 return 0;
308
309err_socks:
310 svc_rpcb_cleanup(serv, net);
311err_bind:
312 dprintk("NFS: Couldn't create callback socket: err = %d; "
313 "net = %p\n", ret, net);
314 return ret;
315}
316
317static struct svc_serv *nfs_callback_create_svc(int minorversion)
318{
319 struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion];
320 struct svc_serv *serv;
321
322 /*
323 * Check whether we're already up and running.
324 */
325 if (cb_info->task) {
326 /*
327 * Note: increase service usage, because later in case of error
328 * svc_destroy() will be called.
329 */
330 svc_get(cb_info->serv);
331 return cb_info->serv;
332 }
333
334 /*
335 * Sanity check: if there's no task,
336 * we should be the first user ...
337 */
338 if (cb_info->users)
339 printk(KERN_WARNING "nfs_callback_create_svc: no kthread, %d users??\n",
340 cb_info->users);
341
342 serv = svc_create(&nfs4_callback_program, NFS4_CALLBACK_BUFSIZE, NULL);
343 if (!serv) {
344 printk(KERN_ERR "nfs_callback_create_svc: create service failed\n");
345 return ERR_PTR(-ENOMEM);
346 }
347 /* As there is only one thread we need to over-ride the
348 * default maximum of 80 connections
349 */
350 serv->sv_maxconn = 1024;
351 dprintk("nfs_callback_create_svc: service created\n");
352 return serv;
353}
354
355/*
356 * Bring up the callback thread if it is not already up.
357 */
358int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt)
359{
360 struct svc_serv *serv;
361 struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion];
362 int ret;
363 struct net *net = xprt->xprt_net;
364
365 mutex_lock(&nfs_callback_mutex);
366
367 serv = nfs_callback_create_svc(minorversion);
368 if (IS_ERR(serv)) {
369 ret = PTR_ERR(serv);
370 goto err_create;
371 }
372
373 ret = nfs_callback_up_net(minorversion, serv, net);
374 if (ret < 0)
375 goto err_net;
376
377 ret = nfs_callback_start_svc(minorversion, xprt, serv);
378 if (ret < 0)
379 goto err_start;
380
381 cb_info->users++;
282 /* 382 /*
283 * svc_create creates the svc_serv with sv_nrthreads == 1, and then 383 * svc_create creates the svc_serv with sv_nrthreads == 1, and then
284 * svc_prepare_thread increments that. So we need to call svc_destroy 384 * svc_prepare_thread increments that. So we need to call svc_destroy
285 * on both success and failure so that the refcount is 1 when the 385 * on both success and failure so that the refcount is 1 when the
286 * thread exits. 386 * thread exits.
287 */ 387 */
288 if (serv) 388err_net:
289 svc_destroy(serv); 389 svc_destroy(serv);
390err_create:
290 mutex_unlock(&nfs_callback_mutex); 391 mutex_unlock(&nfs_callback_mutex);
291 return ret; 392 return ret;
292out_err: 393
293 dprintk("NFS: Couldn't create callback socket or server thread; " 394err_start:
294 "err = %d\n", ret); 395 nfs_callback_down_net(minorversion, serv, net);
295 cb_info->users--; 396 dprintk("NFS: Couldn't create server thread; err = %d\n", ret);
296 if (serv) 397 goto err_net;
297 svc_shutdown_net(serv, net);
298 goto out;
299} 398}
300 399
301/* 400/*
302 * Kill the callback thread if it's no longer being used. 401 * Kill the callback thread if it's no longer being used.
303 */ 402 */
304void nfs_callback_down(int minorversion) 403void nfs_callback_down(int minorversion, struct net *net)
305{ 404{
306 struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion]; 405 struct nfs_callback_data *cb_info = &nfs_callback_info[minorversion];
307 406
308 mutex_lock(&nfs_callback_mutex); 407 mutex_lock(&nfs_callback_mutex);
408 nfs_callback_down_net(minorversion, cb_info->serv, net);
309 cb_info->users--; 409 cb_info->users--;
310 if (cb_info->users == 0 && cb_info->task != NULL) { 410 if (cb_info->users == 0 && cb_info->task != NULL) {
311 kthread_stop(cb_info->task); 411 kthread_stop(cb_info->task);
312 svc_shutdown_net(cb_info->serv, &init_net); 412 dprintk("nfs_callback_down: service stopped\n");
313 svc_exit_thread(cb_info->rqst); 413 svc_exit_thread(cb_info->rqst);
414 dprintk("nfs_callback_down: service destroyed\n");
314 cb_info->serv = NULL; 415 cb_info->serv = NULL;
315 cb_info->rqst = NULL; 416 cb_info->rqst = NULL;
316 cb_info->task = NULL; 417 cb_info->task = NULL;
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
index b44d7b128b71..4251c2ae06ad 100644
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h
@@ -194,7 +194,7 @@ extern __be32 nfs4_callback_recall(struct cb_recallargs *args, void *dummy,
194 struct cb_process_state *cps); 194 struct cb_process_state *cps);
195#if IS_ENABLED(CONFIG_NFS_V4) 195#if IS_ENABLED(CONFIG_NFS_V4)
196extern int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt); 196extern int nfs_callback_up(u32 minorversion, struct rpc_xprt *xprt);
197extern void nfs_callback_down(int minorversion); 197extern void nfs_callback_down(int minorversion, struct net *net);
198extern int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation, 198extern int nfs4_validate_delegation_stateid(struct nfs_delegation *delegation,
199 const nfs4_stateid *stateid); 199 const nfs4_stateid *stateid);
200extern int nfs4_set_callback_sessionid(struct nfs_client *clp); 200extern int nfs4_set_callback_sessionid(struct nfs_client *clp);
@@ -209,6 +209,5 @@ extern int nfs4_set_callback_sessionid(struct nfs_client *clp);
209 209
210extern unsigned int nfs_callback_set_tcpport; 210extern unsigned int nfs_callback_set_tcpport;
211extern unsigned short nfs_callback_tcpport; 211extern unsigned short nfs_callback_tcpport;
212extern unsigned short nfs_callback_tcpport6;
213 212
214#endif /* __LINUX_FS_NFS_CALLBACK_H */ 213#endif /* __LINUX_FS_NFS_CALLBACK_H */
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 1b5d809a105e..76b4a7a3e559 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -122,7 +122,15 @@ static struct pnfs_layout_hdr * get_layout_by_fh_locked(struct nfs_client *clp,
122 ino = igrab(lo->plh_inode); 122 ino = igrab(lo->plh_inode);
123 if (!ino) 123 if (!ino)
124 continue; 124 continue;
125 get_layout_hdr(lo); 125 spin_lock(&ino->i_lock);
126 /* Is this layout in the process of being freed? */
127 if (NFS_I(ino)->layout != lo) {
128 spin_unlock(&ino->i_lock);
129 iput(ino);
130 continue;
131 }
132 pnfs_get_layout_hdr(lo);
133 spin_unlock(&ino->i_lock);
126 return lo; 134 return lo;
127 } 135 }
128 } 136 }
@@ -158,7 +166,7 @@ static u32 initiate_file_draining(struct nfs_client *clp,
158 ino = lo->plh_inode; 166 ino = lo->plh_inode;
159 spin_lock(&ino->i_lock); 167 spin_lock(&ino->i_lock);
160 if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) || 168 if (test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) ||
161 mark_matching_lsegs_invalid(lo, &free_me_list, 169 pnfs_mark_matching_lsegs_invalid(lo, &free_me_list,
162 &args->cbl_range)) 170 &args->cbl_range))
163 rv = NFS4ERR_DELAY; 171 rv = NFS4ERR_DELAY;
164 else 172 else
@@ -166,7 +174,7 @@ static u32 initiate_file_draining(struct nfs_client *clp,
166 pnfs_set_layout_stateid(lo, &args->cbl_stateid, true); 174 pnfs_set_layout_stateid(lo, &args->cbl_stateid, true);
167 spin_unlock(&ino->i_lock); 175 spin_unlock(&ino->i_lock);
168 pnfs_free_lseg_list(&free_me_list); 176 pnfs_free_lseg_list(&free_me_list);
169 put_layout_hdr(lo); 177 pnfs_put_layout_hdr(lo);
170 iput(ino); 178 iput(ino);
171 return rv; 179 return rv;
172} 180}
@@ -196,9 +204,18 @@ static u32 initiate_bulk_draining(struct nfs_client *clp,
196 continue; 204 continue;
197 205
198 list_for_each_entry(lo, &server->layouts, plh_layouts) { 206 list_for_each_entry(lo, &server->layouts, plh_layouts) {
199 if (!igrab(lo->plh_inode)) 207 ino = igrab(lo->plh_inode);
208 if (ino)
209 continue;
210 spin_lock(&ino->i_lock);
211 /* Is this layout in the process of being freed? */
212 if (NFS_I(ino)->layout != lo) {
213 spin_unlock(&ino->i_lock);
214 iput(ino);
200 continue; 215 continue;
201 get_layout_hdr(lo); 216 }
217 pnfs_get_layout_hdr(lo);
218 spin_unlock(&ino->i_lock);
202 BUG_ON(!list_empty(&lo->plh_bulk_recall)); 219 BUG_ON(!list_empty(&lo->plh_bulk_recall));
203 list_add(&lo->plh_bulk_recall, &recall_list); 220 list_add(&lo->plh_bulk_recall, &recall_list);
204 } 221 }
@@ -211,12 +228,12 @@ static u32 initiate_bulk_draining(struct nfs_client *clp,
211 ino = lo->plh_inode; 228 ino = lo->plh_inode;
212 spin_lock(&ino->i_lock); 229 spin_lock(&ino->i_lock);
213 set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags); 230 set_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
214 if (mark_matching_lsegs_invalid(lo, &free_me_list, &range)) 231 if (pnfs_mark_matching_lsegs_invalid(lo, &free_me_list, &range))
215 rv = NFS4ERR_DELAY; 232 rv = NFS4ERR_DELAY;
216 list_del_init(&lo->plh_bulk_recall); 233 list_del_init(&lo->plh_bulk_recall);
217 spin_unlock(&ino->i_lock); 234 spin_unlock(&ino->i_lock);
218 pnfs_free_lseg_list(&free_me_list); 235 pnfs_free_lseg_list(&free_me_list);
219 put_layout_hdr(lo); 236 pnfs_put_layout_hdr(lo);
220 iput(ino); 237 iput(ino);
221 } 238 }
222 return rv; 239 return rv;
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index 99694442b93f..8b39a42ac35e 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -93,10 +93,10 @@ static struct nfs_subversion *find_nfs_version(unsigned int version)
93 spin_unlock(&nfs_version_lock); 93 spin_unlock(&nfs_version_lock);
94 return nfs; 94 return nfs;
95 } 95 }
96 }; 96 }
97 97
98 spin_unlock(&nfs_version_lock); 98 spin_unlock(&nfs_version_lock);
99 return ERR_PTR(-EPROTONOSUPPORT);; 99 return ERR_PTR(-EPROTONOSUPPORT);
100} 100}
101 101
102struct nfs_subversion *get_nfs_version(unsigned int version) 102struct nfs_subversion *get_nfs_version(unsigned int version)
@@ -498,7 +498,8 @@ nfs_get_client(const struct nfs_client_initdata *cl_init,
498 return nfs_found_client(cl_init, clp); 498 return nfs_found_client(cl_init, clp);
499 } 499 }
500 if (new) { 500 if (new) {
501 list_add(&new->cl_share_link, &nn->nfs_client_list); 501 list_add_tail(&new->cl_share_link,
502 &nn->nfs_client_list);
502 spin_unlock(&nn->nfs_client_lock); 503 spin_unlock(&nn->nfs_client_lock);
503 new->cl_flags = cl_init->init_flags; 504 new->cl_flags = cl_init->init_flags;
504 return rpc_ops->init_client(new, timeparms, ip_addr, 505 return rpc_ops->init_client(new, timeparms, ip_addr,
@@ -668,7 +669,8 @@ int nfs_init_server_rpcclient(struct nfs_server *server,
668{ 669{
669 struct nfs_client *clp = server->nfs_client; 670 struct nfs_client *clp = server->nfs_client;
670 671
671 server->client = rpc_clone_client(clp->cl_rpcclient); 672 server->client = rpc_clone_client_set_auth(clp->cl_rpcclient,
673 pseudoflavour);
672 if (IS_ERR(server->client)) { 674 if (IS_ERR(server->client)) {
673 dprintk("%s: couldn't create rpc_client!\n", __func__); 675 dprintk("%s: couldn't create rpc_client!\n", __func__);
674 return PTR_ERR(server->client); 676 return PTR_ERR(server->client);
@@ -678,16 +680,6 @@ int nfs_init_server_rpcclient(struct nfs_server *server,
678 timeo, 680 timeo,
679 sizeof(server->client->cl_timeout_default)); 681 sizeof(server->client->cl_timeout_default));
680 server->client->cl_timeout = &server->client->cl_timeout_default; 682 server->client->cl_timeout = &server->client->cl_timeout_default;
681
682 if (pseudoflavour != clp->cl_rpcclient->cl_auth->au_flavor) {
683 struct rpc_auth *auth;
684
685 auth = rpcauth_create(pseudoflavour, server->client);
686 if (IS_ERR(auth)) {
687 dprintk("%s: couldn't create credcache!\n", __func__);
688 return PTR_ERR(auth);
689 }
690 }
691 server->client->cl_softrtry = 0; 683 server->client->cl_softrtry = 0;
692 if (server->flags & NFS_MOUNT_SOFT) 684 if (server->flags & NFS_MOUNT_SOFT)
693 server->client->cl_softrtry = 1; 685 server->client->cl_softrtry = 1;
@@ -761,6 +753,8 @@ static int nfs_init_server(struct nfs_server *server,
761 data->timeo, data->retrans); 753 data->timeo, data->retrans);
762 if (data->flags & NFS_MOUNT_NORESVPORT) 754 if (data->flags & NFS_MOUNT_NORESVPORT)
763 set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); 755 set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags);
756 if (server->options & NFS_OPTION_MIGRATION)
757 set_bit(NFS_CS_MIGRATION, &cl_init.init_flags);
764 758
765 /* Allocate or find a client reference we can use */ 759 /* Allocate or find a client reference we can use */
766 clp = nfs_get_client(&cl_init, &timeparms, NULL, RPC_AUTH_UNIX); 760 clp = nfs_get_client(&cl_init, &timeparms, NULL, RPC_AUTH_UNIX);
@@ -855,7 +849,6 @@ static void nfs_server_set_fsinfo(struct nfs_server *server,
855 if (server->wsize > NFS_MAX_FILE_IO_SIZE) 849 if (server->wsize > NFS_MAX_FILE_IO_SIZE)
856 server->wsize = NFS_MAX_FILE_IO_SIZE; 850 server->wsize = NFS_MAX_FILE_IO_SIZE;
857 server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 851 server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
858 server->pnfs_blksize = fsinfo->blksize;
859 852
860 server->wtmult = nfs_block_bits(fsinfo->wtmult, NULL); 853 server->wtmult = nfs_block_bits(fsinfo->wtmult, NULL);
861 854
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 627f108ede23..ce8cb926526b 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -2072,7 +2072,7 @@ found:
2072 nfs_access_free_entry(entry); 2072 nfs_access_free_entry(entry);
2073} 2073}
2074 2074
2075static void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set) 2075void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set)
2076{ 2076{
2077 struct nfs_access_entry *cache = kmalloc(sizeof(*cache), GFP_KERNEL); 2077 struct nfs_access_entry *cache = kmalloc(sizeof(*cache), GFP_KERNEL);
2078 if (cache == NULL) 2078 if (cache == NULL)
@@ -2098,6 +2098,20 @@ static void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *s
2098 spin_unlock(&nfs_access_lru_lock); 2098 spin_unlock(&nfs_access_lru_lock);
2099 } 2099 }
2100} 2100}
2101EXPORT_SYMBOL_GPL(nfs_access_add_cache);
2102
2103void nfs_access_set_mask(struct nfs_access_entry *entry, u32 access_result)
2104{
2105 entry->mask = 0;
2106 if (access_result & NFS4_ACCESS_READ)
2107 entry->mask |= MAY_READ;
2108 if (access_result &
2109 (NFS4_ACCESS_MODIFY | NFS4_ACCESS_EXTEND | NFS4_ACCESS_DELETE))
2110 entry->mask |= MAY_WRITE;
2111 if (access_result & (NFS4_ACCESS_LOOKUP|NFS4_ACCESS_EXECUTE))
2112 entry->mask |= MAY_EXEC;
2113}
2114EXPORT_SYMBOL_GPL(nfs_access_set_mask);
2101 2115
2102static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask) 2116static int nfs_do_access(struct inode *inode, struct rpc_cred *cred, int mask)
2103{ 2117{
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 1ba385b7c90d..cae26cbd59ee 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -46,6 +46,7 @@
46#include <linux/kref.h> 46#include <linux/kref.h>
47#include <linux/slab.h> 47#include <linux/slab.h>
48#include <linux/task_io_accounting_ops.h> 48#include <linux/task_io_accounting_ops.h>
49#include <linux/module.h>
49 50
50#include <linux/nfs_fs.h> 51#include <linux/nfs_fs.h>
51#include <linux/nfs_page.h> 52#include <linux/nfs_page.h>
@@ -78,6 +79,7 @@ struct nfs_direct_req {
78 atomic_t io_count; /* i/os we're waiting for */ 79 atomic_t io_count; /* i/os we're waiting for */
79 spinlock_t lock; /* protect completion state */ 80 spinlock_t lock; /* protect completion state */
80 ssize_t count, /* bytes actually processed */ 81 ssize_t count, /* bytes actually processed */
82 bytes_left, /* bytes left to be sent */
81 error; /* any reported error */ 83 error; /* any reported error */
82 struct completion completion; /* wait for i/o completion */ 84 struct completion completion; /* wait for i/o completion */
83 85
@@ -190,6 +192,12 @@ static void nfs_direct_req_release(struct nfs_direct_req *dreq)
190 kref_put(&dreq->kref, nfs_direct_req_free); 192 kref_put(&dreq->kref, nfs_direct_req_free);
191} 193}
192 194
195ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq)
196{
197 return dreq->bytes_left;
198}
199EXPORT_SYMBOL_GPL(nfs_dreq_bytes_left);
200
193/* 201/*
194 * Collects and returns the final error value/byte-count. 202 * Collects and returns the final error value/byte-count.
195 */ 203 */
@@ -390,6 +398,7 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *de
390 user_addr += req_len; 398 user_addr += req_len;
391 pos += req_len; 399 pos += req_len;
392 count -= req_len; 400 count -= req_len;
401 dreq->bytes_left -= req_len;
393 } 402 }
394 /* The nfs_page now hold references to these pages */ 403 /* The nfs_page now hold references to these pages */
395 nfs_direct_release_pages(pagevec, npages); 404 nfs_direct_release_pages(pagevec, npages);
@@ -450,23 +459,28 @@ static ssize_t nfs_direct_read(struct kiocb *iocb, const struct iovec *iov,
450 ssize_t result = -ENOMEM; 459 ssize_t result = -ENOMEM;
451 struct inode *inode = iocb->ki_filp->f_mapping->host; 460 struct inode *inode = iocb->ki_filp->f_mapping->host;
452 struct nfs_direct_req *dreq; 461 struct nfs_direct_req *dreq;
462 struct nfs_lock_context *l_ctx;
453 463
454 dreq = nfs_direct_req_alloc(); 464 dreq = nfs_direct_req_alloc();
455 if (dreq == NULL) 465 if (dreq == NULL)
456 goto out; 466 goto out;
457 467
458 dreq->inode = inode; 468 dreq->inode = inode;
469 dreq->bytes_left = iov_length(iov, nr_segs);
459 dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); 470 dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
460 dreq->l_ctx = nfs_get_lock_context(dreq->ctx); 471 l_ctx = nfs_get_lock_context(dreq->ctx);
461 if (dreq->l_ctx == NULL) 472 if (IS_ERR(l_ctx)) {
473 result = PTR_ERR(l_ctx);
462 goto out_release; 474 goto out_release;
475 }
476 dreq->l_ctx = l_ctx;
463 if (!is_sync_kiocb(iocb)) 477 if (!is_sync_kiocb(iocb))
464 dreq->iocb = iocb; 478 dreq->iocb = iocb;
465 479
480 NFS_I(inode)->read_io += iov_length(iov, nr_segs);
466 result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos, uio); 481 result = nfs_direct_read_schedule_iovec(dreq, iov, nr_segs, pos, uio);
467 if (!result) 482 if (!result)
468 result = nfs_direct_wait(dreq); 483 result = nfs_direct_wait(dreq);
469 NFS_I(inode)->read_io += result;
470out_release: 484out_release:
471 nfs_direct_req_release(dreq); 485 nfs_direct_req_release(dreq);
472out: 486out:
@@ -706,6 +720,7 @@ static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *d
706 user_addr += req_len; 720 user_addr += req_len;
707 pos += req_len; 721 pos += req_len;
708 count -= req_len; 722 count -= req_len;
723 dreq->bytes_left -= req_len;
709 } 724 }
710 /* The nfs_page now hold references to these pages */ 725 /* The nfs_page now hold references to these pages */
711 nfs_direct_release_pages(pagevec, npages); 726 nfs_direct_release_pages(pagevec, npages);
@@ -814,6 +829,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
814 get_dreq(dreq); 829 get_dreq(dreq);
815 atomic_inc(&inode->i_dio_count); 830 atomic_inc(&inode->i_dio_count);
816 831
832 NFS_I(dreq->inode)->write_io += iov_length(iov, nr_segs);
817 for (seg = 0; seg < nr_segs; seg++) { 833 for (seg = 0; seg < nr_segs; seg++) {
818 const struct iovec *vec = &iov[seg]; 834 const struct iovec *vec = &iov[seg];
819 result = nfs_direct_write_schedule_segment(&desc, vec, pos, uio); 835 result = nfs_direct_write_schedule_segment(&desc, vec, pos, uio);
@@ -825,7 +841,6 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
825 pos += vec->iov_len; 841 pos += vec->iov_len;
826 } 842 }
827 nfs_pageio_complete(&desc); 843 nfs_pageio_complete(&desc);
828 NFS_I(dreq->inode)->write_io += desc.pg_bytes_written;
829 844
830 /* 845 /*
831 * If no bytes were started, return the error, and let the 846 * If no bytes were started, return the error, and let the
@@ -849,16 +864,21 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, const struct iovec *iov,
849 ssize_t result = -ENOMEM; 864 ssize_t result = -ENOMEM;
850 struct inode *inode = iocb->ki_filp->f_mapping->host; 865 struct inode *inode = iocb->ki_filp->f_mapping->host;
851 struct nfs_direct_req *dreq; 866 struct nfs_direct_req *dreq;
867 struct nfs_lock_context *l_ctx;
852 868
853 dreq = nfs_direct_req_alloc(); 869 dreq = nfs_direct_req_alloc();
854 if (!dreq) 870 if (!dreq)
855 goto out; 871 goto out;
856 872
857 dreq->inode = inode; 873 dreq->inode = inode;
874 dreq->bytes_left = count;
858 dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp)); 875 dreq->ctx = get_nfs_open_context(nfs_file_open_context(iocb->ki_filp));
859 dreq->l_ctx = nfs_get_lock_context(dreq->ctx); 876 l_ctx = nfs_get_lock_context(dreq->ctx);
860 if (dreq->l_ctx == NULL) 877 if (IS_ERR(l_ctx)) {
878 result = PTR_ERR(l_ctx);
861 goto out_release; 879 goto out_release;
880 }
881 dreq->l_ctx = l_ctx;
862 if (!is_sync_kiocb(iocb)) 882 if (!is_sync_kiocb(iocb))
863 dreq->iocb = iocb; 883 dreq->iocb = iocb;
864 884
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index f692be97676d..582bb8866131 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -259,7 +259,7 @@ nfs_file_fsync_commit(struct file *file, loff_t start, loff_t end, int datasync)
259 struct dentry *dentry = file->f_path.dentry; 259 struct dentry *dentry = file->f_path.dentry;
260 struct nfs_open_context *ctx = nfs_file_open_context(file); 260 struct nfs_open_context *ctx = nfs_file_open_context(file);
261 struct inode *inode = dentry->d_inode; 261 struct inode *inode = dentry->d_inode;
262 int have_error, status; 262 int have_error, do_resend, status;
263 int ret = 0; 263 int ret = 0;
264 264
265 dprintk("NFS: fsync file(%s/%s) datasync %d\n", 265 dprintk("NFS: fsync file(%s/%s) datasync %d\n",
@@ -267,15 +267,23 @@ nfs_file_fsync_commit(struct file *file, loff_t start, loff_t end, int datasync)
267 datasync); 267 datasync);
268 268
269 nfs_inc_stats(inode, NFSIOS_VFSFSYNC); 269 nfs_inc_stats(inode, NFSIOS_VFSFSYNC);
270 do_resend = test_and_clear_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags);
270 have_error = test_and_clear_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); 271 have_error = test_and_clear_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
271 status = nfs_commit_inode(inode, FLUSH_SYNC); 272 status = nfs_commit_inode(inode, FLUSH_SYNC);
272 if (status >= 0 && ret < 0)
273 status = ret;
274 have_error |= test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags); 273 have_error |= test_bit(NFS_CONTEXT_ERROR_WRITE, &ctx->flags);
275 if (have_error) 274 if (have_error) {
276 ret = xchg(&ctx->error, 0); 275 ret = xchg(&ctx->error, 0);
277 if (!ret && status < 0) 276 if (ret)
277 goto out;
278 }
279 if (status < 0) {
278 ret = status; 280 ret = status;
281 goto out;
282 }
283 do_resend |= test_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags);
284 if (do_resend)
285 ret = -EAGAIN;
286out:
279 return ret; 287 return ret;
280} 288}
281EXPORT_SYMBOL_GPL(nfs_file_fsync_commit); 289EXPORT_SYMBOL_GPL(nfs_file_fsync_commit);
@@ -286,13 +294,22 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
286 int ret; 294 int ret;
287 struct inode *inode = file->f_path.dentry->d_inode; 295 struct inode *inode = file->f_path.dentry->d_inode;
288 296
289 ret = filemap_write_and_wait_range(inode->i_mapping, start, end); 297 do {
290 if (ret != 0) 298 ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
291 goto out; 299 if (ret != 0)
292 mutex_lock(&inode->i_mutex); 300 break;
293 ret = nfs_file_fsync_commit(file, start, end, datasync); 301 mutex_lock(&inode->i_mutex);
294 mutex_unlock(&inode->i_mutex); 302 ret = nfs_file_fsync_commit(file, start, end, datasync);
295out: 303 mutex_unlock(&inode->i_mutex);
304 /*
305 * If nfs_file_fsync_commit detected a server reboot, then
306 * resend all dirty pages that might have been covered by
307 * the NFS_CONTEXT_RESEND_WRITES flag
308 */
309 start = 0;
310 end = LLONG_MAX;
311 } while (ret == -EAGAIN);
312
296 return ret; 313 return ret;
297} 314}
298 315
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index 4654ced096a6..033803c36644 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -32,6 +32,8 @@
32 32
33#include <asm/uaccess.h> 33#include <asm/uaccess.h>
34 34
35#include "internal.h"
36
35#define NFSDBG_FACILITY NFSDBG_CLIENT 37#define NFSDBG_FACILITY NFSDBG_CLIENT
36 38
37/* 39/*
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index a850079467d8..9cc4a3fbf4b0 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -55,18 +55,19 @@
55static const struct cred *id_resolver_cache; 55static const struct cred *id_resolver_cache;
56static struct key_type key_type_id_resolver_legacy; 56static struct key_type key_type_id_resolver_legacy;
57 57
58struct idmap {
59 struct rpc_pipe *idmap_pipe;
60 struct key_construction *idmap_key_cons;
61 struct mutex idmap_mutex;
62};
63
64struct idmap_legacy_upcalldata { 58struct idmap_legacy_upcalldata {
65 struct rpc_pipe_msg pipe_msg; 59 struct rpc_pipe_msg pipe_msg;
66 struct idmap_msg idmap_msg; 60 struct idmap_msg idmap_msg;
61 struct key_construction *key_cons;
67 struct idmap *idmap; 62 struct idmap *idmap;
68}; 63};
69 64
65struct idmap {
66 struct rpc_pipe *idmap_pipe;
67 struct idmap_legacy_upcalldata *idmap_upcall_data;
68 struct mutex idmap_mutex;
69};
70
70/** 71/**
71 * nfs_fattr_init_names - initialise the nfs_fattr owner_name/group_name fields 72 * nfs_fattr_init_names - initialise the nfs_fattr owner_name/group_name fields
72 * @fattr: fully initialised struct nfs_fattr 73 * @fattr: fully initialised struct nfs_fattr
@@ -158,7 +159,7 @@ static int nfs_map_string_to_numeric(const char *name, size_t namelen, __u32 *re
158 return 0; 159 return 0;
159 memcpy(buf, name, namelen); 160 memcpy(buf, name, namelen);
160 buf[namelen] = '\0'; 161 buf[namelen] = '\0';
161 if (strict_strtoul(buf, 0, &val) != 0) 162 if (kstrtoul(buf, 0, &val) != 0)
162 return 0; 163 return 0;
163 *res = val; 164 *res = val;
164 return 1; 165 return 1;
@@ -330,7 +331,6 @@ static ssize_t nfs_idmap_get_key(const char *name, size_t namelen,
330 ret = nfs_idmap_request_key(&key_type_id_resolver_legacy, 331 ret = nfs_idmap_request_key(&key_type_id_resolver_legacy,
331 name, namelen, type, data, 332 name, namelen, type, data,
332 data_size, idmap); 333 data_size, idmap);
333 idmap->idmap_key_cons = NULL;
334 mutex_unlock(&idmap->idmap_mutex); 334 mutex_unlock(&idmap->idmap_mutex);
335 } 335 }
336 return ret; 336 return ret;
@@ -364,7 +364,7 @@ static int nfs_idmap_lookup_id(const char *name, size_t namelen, const char *typ
364 if (data_size <= 0) { 364 if (data_size <= 0) {
365 ret = -EINVAL; 365 ret = -EINVAL;
366 } else { 366 } else {
367 ret = strict_strtol(id_str, 10, &id_long); 367 ret = kstrtol(id_str, 10, &id_long);
368 *id = (__u32)id_long; 368 *id = (__u32)id_long;
369 } 369 }
370 return ret; 370 return ret;
@@ -465,8 +465,6 @@ nfs_idmap_new(struct nfs_client *clp)
465 struct rpc_pipe *pipe; 465 struct rpc_pipe *pipe;
466 int error; 466 int error;
467 467
468 BUG_ON(clp->cl_idmap != NULL);
469
470 idmap = kzalloc(sizeof(*idmap), GFP_KERNEL); 468 idmap = kzalloc(sizeof(*idmap), GFP_KERNEL);
471 if (idmap == NULL) 469 if (idmap == NULL)
472 return -ENOMEM; 470 return -ENOMEM;
@@ -510,7 +508,6 @@ static int __rpc_pipefs_event(struct nfs_client *clp, unsigned long event,
510 508
511 switch (event) { 509 switch (event) {
512 case RPC_PIPEFS_MOUNT: 510 case RPC_PIPEFS_MOUNT:
513 BUG_ON(clp->cl_rpcclient->cl_dentry == NULL);
514 err = __nfs_idmap_register(clp->cl_rpcclient->cl_dentry, 511 err = __nfs_idmap_register(clp->cl_rpcclient->cl_dentry,
515 clp->cl_idmap, 512 clp->cl_idmap,
516 clp->cl_idmap->idmap_pipe); 513 clp->cl_idmap->idmap_pipe);
@@ -632,9 +629,6 @@ static int nfs_idmap_prepare_message(char *desc, struct idmap *idmap,
632 substring_t substr; 629 substring_t substr;
633 int token, ret; 630 int token, ret;
634 631
635 memset(im, 0, sizeof(*im));
636 memset(msg, 0, sizeof(*msg));
637
638 im->im_type = IDMAP_TYPE_GROUP; 632 im->im_type = IDMAP_TYPE_GROUP;
639 token = match_token(desc, nfs_idmap_tokens, &substr); 633 token = match_token(desc, nfs_idmap_tokens, &substr);
640 634
@@ -665,6 +659,35 @@ out:
665 return ret; 659 return ret;
666} 660}
667 661
662static bool
663nfs_idmap_prepare_pipe_upcall(struct idmap *idmap,
664 struct idmap_legacy_upcalldata *data)
665{
666 if (idmap->idmap_upcall_data != NULL) {
667 WARN_ON_ONCE(1);
668 return false;
669 }
670 idmap->idmap_upcall_data = data;
671 return true;
672}
673
674static void
675nfs_idmap_complete_pipe_upcall_locked(struct idmap *idmap, int ret)
676{
677 struct key_construction *cons = idmap->idmap_upcall_data->key_cons;
678
679 kfree(idmap->idmap_upcall_data);
680 idmap->idmap_upcall_data = NULL;
681 complete_request_key(cons, ret);
682}
683
684static void
685nfs_idmap_abort_pipe_upcall(struct idmap *idmap, int ret)
686{
687 if (idmap->idmap_upcall_data != NULL)
688 nfs_idmap_complete_pipe_upcall_locked(idmap, ret);
689}
690
668static int nfs_idmap_legacy_upcall(struct key_construction *cons, 691static int nfs_idmap_legacy_upcall(struct key_construction *cons,
669 const char *op, 692 const char *op,
670 void *aux) 693 void *aux)
@@ -677,29 +700,28 @@ static int nfs_idmap_legacy_upcall(struct key_construction *cons,
677 int ret = -ENOMEM; 700 int ret = -ENOMEM;
678 701
679 /* msg and im are freed in idmap_pipe_destroy_msg */ 702 /* msg and im are freed in idmap_pipe_destroy_msg */
680 data = kmalloc(sizeof(*data), GFP_KERNEL); 703 data = kzalloc(sizeof(*data), GFP_KERNEL);
681 if (!data) 704 if (!data)
682 goto out1; 705 goto out1;
683 706
684 msg = &data->pipe_msg; 707 msg = &data->pipe_msg;
685 im = &data->idmap_msg; 708 im = &data->idmap_msg;
686 data->idmap = idmap; 709 data->idmap = idmap;
710 data->key_cons = cons;
687 711
688 ret = nfs_idmap_prepare_message(key->description, idmap, im, msg); 712 ret = nfs_idmap_prepare_message(key->description, idmap, im, msg);
689 if (ret < 0) 713 if (ret < 0)
690 goto out2; 714 goto out2;
691 715
692 BUG_ON(idmap->idmap_key_cons != NULL); 716 ret = -EAGAIN;
693 idmap->idmap_key_cons = cons; 717 if (!nfs_idmap_prepare_pipe_upcall(idmap, data))
718 goto out2;
694 719
695 ret = rpc_queue_upcall(idmap->idmap_pipe, msg); 720 ret = rpc_queue_upcall(idmap->idmap_pipe, msg);
696 if (ret < 0) 721 if (ret < 0)
697 goto out3; 722 nfs_idmap_abort_pipe_upcall(idmap, ret);
698 723
699 return ret; 724 return ret;
700
701out3:
702 idmap->idmap_key_cons = NULL;
703out2: 725out2:
704 kfree(data); 726 kfree(data);
705out1: 727out1:
@@ -714,21 +736,32 @@ static int nfs_idmap_instantiate(struct key *key, struct key *authkey, char *dat
714 authkey); 736 authkey);
715} 737}
716 738
717static int nfs_idmap_read_message(struct idmap_msg *im, struct key *key, struct key *authkey) 739static int nfs_idmap_read_and_verify_message(struct idmap_msg *im,
740 struct idmap_msg *upcall,
741 struct key *key, struct key *authkey)
718{ 742{
719 char id_str[NFS_UINT_MAXLEN]; 743 char id_str[NFS_UINT_MAXLEN];
720 int ret = -EINVAL; 744 int ret = -ENOKEY;
721 745
746 /* ret = -ENOKEY */
747 if (upcall->im_type != im->im_type || upcall->im_conv != im->im_conv)
748 goto out;
722 switch (im->im_conv) { 749 switch (im->im_conv) {
723 case IDMAP_CONV_NAMETOID: 750 case IDMAP_CONV_NAMETOID:
751 if (strcmp(upcall->im_name, im->im_name) != 0)
752 break;
724 sprintf(id_str, "%d", im->im_id); 753 sprintf(id_str, "%d", im->im_id);
725 ret = nfs_idmap_instantiate(key, authkey, id_str); 754 ret = nfs_idmap_instantiate(key, authkey, id_str);
726 break; 755 break;
727 case IDMAP_CONV_IDTONAME: 756 case IDMAP_CONV_IDTONAME:
757 if (upcall->im_id != im->im_id)
758 break;
728 ret = nfs_idmap_instantiate(key, authkey, im->im_name); 759 ret = nfs_idmap_instantiate(key, authkey, im->im_name);
729 break; 760 break;
761 default:
762 ret = -EINVAL;
730 } 763 }
731 764out:
732 return ret; 765 return ret;
733} 766}
734 767
@@ -740,14 +773,16 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
740 struct key_construction *cons; 773 struct key_construction *cons;
741 struct idmap_msg im; 774 struct idmap_msg im;
742 size_t namelen_in; 775 size_t namelen_in;
743 int ret; 776 int ret = -ENOKEY;
744 777
745 /* If instantiation is successful, anyone waiting for key construction 778 /* If instantiation is successful, anyone waiting for key construction
746 * will have been woken up and someone else may now have used 779 * will have been woken up and someone else may now have used
747 * idmap_key_cons - so after this point we may no longer touch it. 780 * idmap_key_cons - so after this point we may no longer touch it.
748 */ 781 */
749 cons = ACCESS_ONCE(idmap->idmap_key_cons); 782 if (idmap->idmap_upcall_data == NULL)
750 idmap->idmap_key_cons = NULL; 783 goto out_noupcall;
784
785 cons = idmap->idmap_upcall_data->key_cons;
751 786
752 if (mlen != sizeof(im)) { 787 if (mlen != sizeof(im)) {
753 ret = -ENOSPC; 788 ret = -ENOSPC;
@@ -768,16 +803,19 @@ idmap_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
768 if (namelen_in == 0 || namelen_in == IDMAP_NAMESZ) { 803 if (namelen_in == 0 || namelen_in == IDMAP_NAMESZ) {
769 ret = -EINVAL; 804 ret = -EINVAL;
770 goto out; 805 goto out;
771 } 806}
772 807
773 ret = nfs_idmap_read_message(&im, cons->key, cons->authkey); 808 ret = nfs_idmap_read_and_verify_message(&im,
809 &idmap->idmap_upcall_data->idmap_msg,
810 cons->key, cons->authkey);
774 if (ret >= 0) { 811 if (ret >= 0) {
775 key_set_timeout(cons->key, nfs_idmap_cache_timeout); 812 key_set_timeout(cons->key, nfs_idmap_cache_timeout);
776 ret = mlen; 813 ret = mlen;
777 } 814 }
778 815
779out: 816out:
780 complete_request_key(cons, ret); 817 nfs_idmap_complete_pipe_upcall_locked(idmap, ret);
818out_noupcall:
781 return ret; 819 return ret;
782} 820}
783 821
@@ -788,14 +826,9 @@ idmap_pipe_destroy_msg(struct rpc_pipe_msg *msg)
788 struct idmap_legacy_upcalldata, 826 struct idmap_legacy_upcalldata,
789 pipe_msg); 827 pipe_msg);
790 struct idmap *idmap = data->idmap; 828 struct idmap *idmap = data->idmap;
791 struct key_construction *cons; 829
792 if (msg->errno) { 830 if (msg->errno)
793 cons = ACCESS_ONCE(idmap->idmap_key_cons); 831 nfs_idmap_abort_pipe_upcall(idmap, msg->errno);
794 idmap->idmap_key_cons = NULL;
795 complete_request_key(cons, msg->errno);
796 }
797 /* Free memory allocated in nfs_idmap_legacy_upcall() */
798 kfree(data);
799} 832}
800 833
801static void 834static void
@@ -803,7 +836,8 @@ idmap_release_pipe(struct inode *inode)
803{ 836{
804 struct rpc_inode *rpci = RPC_I(inode); 837 struct rpc_inode *rpci = RPC_I(inode);
805 struct idmap *idmap = (struct idmap *)rpci->private; 838 struct idmap *idmap = (struct idmap *)rpci->private;
806 idmap->idmap_key_cons = NULL; 839
840 nfs_idmap_abort_pipe_upcall(idmap, -EPIPE);
807} 841}
808 842
809int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid) 843int nfs_map_name_to_uid(const struct nfs_server *server, const char *name, size_t namelen, __u32 *uid)
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index e4c716d374a8..5c7325c5c5e6 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -547,8 +547,8 @@ EXPORT_SYMBOL_GPL(nfs_getattr);
547static void nfs_init_lock_context(struct nfs_lock_context *l_ctx) 547static void nfs_init_lock_context(struct nfs_lock_context *l_ctx)
548{ 548{
549 atomic_set(&l_ctx->count, 1); 549 atomic_set(&l_ctx->count, 1);
550 l_ctx->lockowner = current->files; 550 l_ctx->lockowner.l_owner = current->files;
551 l_ctx->pid = current->tgid; 551 l_ctx->lockowner.l_pid = current->tgid;
552 INIT_LIST_HEAD(&l_ctx->list); 552 INIT_LIST_HEAD(&l_ctx->list);
553} 553}
554 554
@@ -557,9 +557,9 @@ static struct nfs_lock_context *__nfs_find_lock_context(struct nfs_open_context
557 struct nfs_lock_context *pos; 557 struct nfs_lock_context *pos;
558 558
559 list_for_each_entry(pos, &ctx->lock_context.list, list) { 559 list_for_each_entry(pos, &ctx->lock_context.list, list) {
560 if (pos->lockowner != current->files) 560 if (pos->lockowner.l_owner != current->files)
561 continue; 561 continue;
562 if (pos->pid != current->tgid) 562 if (pos->lockowner.l_pid != current->tgid)
563 continue; 563 continue;
564 atomic_inc(&pos->count); 564 atomic_inc(&pos->count);
565 return pos; 565 return pos;
@@ -578,7 +578,7 @@ struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ctx)
578 spin_unlock(&inode->i_lock); 578 spin_unlock(&inode->i_lock);
579 new = kmalloc(sizeof(*new), GFP_KERNEL); 579 new = kmalloc(sizeof(*new), GFP_KERNEL);
580 if (new == NULL) 580 if (new == NULL)
581 return NULL; 581 return ERR_PTR(-ENOMEM);
582 nfs_init_lock_context(new); 582 nfs_init_lock_context(new);
583 spin_lock(&inode->i_lock); 583 spin_lock(&inode->i_lock);
584 res = __nfs_find_lock_context(ctx); 584 res = __nfs_find_lock_context(ctx);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 31fdb03225cd..59b133c5d652 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -101,11 +101,11 @@ struct nfs_client_initdata {
101 */ 101 */
102struct nfs_parsed_mount_data { 102struct nfs_parsed_mount_data {
103 int flags; 103 int flags;
104 int rsize, wsize; 104 unsigned int rsize, wsize;
105 int timeo, retrans; 105 unsigned int timeo, retrans;
106 int acregmin, acregmax, 106 unsigned int acregmin, acregmax,
107 acdirmin, acdirmax; 107 acdirmin, acdirmax;
108 int namlen; 108 unsigned int namlen;
109 unsigned int options; 109 unsigned int options;
110 unsigned int bsize; 110 unsigned int bsize;
111 unsigned int auth_flavor_len; 111 unsigned int auth_flavor_len;
@@ -464,6 +464,7 @@ static inline void nfs_inode_dio_wait(struct inode *inode)
464{ 464{
465 inode_dio_wait(inode); 465 inode_dio_wait(inode);
466} 466}
467extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq);
467 468
468/* nfs4proc.c */ 469/* nfs4proc.c */
469extern void __nfs4_read_done_cb(struct nfs_read_data *); 470extern void __nfs4_read_done_cb(struct nfs_read_data *);
@@ -483,6 +484,12 @@ extern int _nfs4_call_sync_session(struct rpc_clnt *clnt,
483 struct nfs4_sequence_args *args, 484 struct nfs4_sequence_args *args,
484 struct nfs4_sequence_res *res, 485 struct nfs4_sequence_res *res,
485 int cache_reply); 486 int cache_reply);
487extern int nfs40_walk_client_list(struct nfs_client *clp,
488 struct nfs_client **result,
489 struct rpc_cred *cred);
490extern int nfs41_walk_client_list(struct nfs_client *clp,
491 struct nfs_client **result,
492 struct rpc_cred *cred);
486 493
487/* 494/*
488 * Determine the device name as a string 495 * Determine the device name as a string
diff --git a/fs/nfs/netns.h b/fs/nfs/netns.h
index 0539de1b8d1f..8ee1fab83268 100644
--- a/fs/nfs/netns.h
+++ b/fs/nfs/netns.h
@@ -5,6 +5,7 @@
5#ifndef __NFS_NETNS_H__ 5#ifndef __NFS_NETNS_H__
6#define __NFS_NETNS_H__ 6#define __NFS_NETNS_H__
7 7
8#include <linux/nfs4.h>
8#include <net/net_namespace.h> 9#include <net/net_namespace.h>
9#include <net/netns/generic.h> 10#include <net/netns/generic.h>
10 11
@@ -22,6 +23,9 @@ struct nfs_net {
22 struct list_head nfs_volume_list; 23 struct list_head nfs_volume_list;
23#if IS_ENABLED(CONFIG_NFS_V4) 24#if IS_ENABLED(CONFIG_NFS_V4)
24 struct idr cb_ident_idr; /* Protected by nfs_client_lock */ 25 struct idr cb_ident_idr; /* Protected by nfs_client_lock */
26 unsigned short nfs_callback_tcpport;
27 unsigned short nfs_callback_tcpport6;
28 int cb_users[NFS4_MAX_MINOR_VERSION + 1];
25#endif 29#endif
26 spinlock_t nfs_client_lock; 30 spinlock_t nfs_client_lock;
27 struct timespec boot_time; 31 struct timespec boot_time;
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index da0618aeeadb..a525fdefccde 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -132,8 +132,8 @@ struct nfs4_lock_owner {
132struct nfs4_lock_state { 132struct nfs4_lock_state {
133 struct list_head ls_locks; /* Other lock stateids */ 133 struct list_head ls_locks; /* Other lock stateids */
134 struct nfs4_state * ls_state; /* Pointer to open state */ 134 struct nfs4_state * ls_state; /* Pointer to open state */
135#define NFS_LOCK_INITIALIZED 1 135#define NFS_LOCK_INITIALIZED 0
136 int ls_flags; 136 unsigned long ls_flags;
137 struct nfs_seqid_counter ls_seqid; 137 struct nfs_seqid_counter ls_seqid;
138 nfs4_stateid ls_stateid; 138 nfs4_stateid ls_stateid;
139 atomic_t ls_count; 139 atomic_t ls_count;
@@ -191,6 +191,8 @@ struct nfs4_state_recovery_ops {
191 int (*establish_clid)(struct nfs_client *, struct rpc_cred *); 191 int (*establish_clid)(struct nfs_client *, struct rpc_cred *);
192 struct rpc_cred * (*get_clid_cred)(struct nfs_client *); 192 struct rpc_cred * (*get_clid_cred)(struct nfs_client *);
193 int (*reclaim_complete)(struct nfs_client *); 193 int (*reclaim_complete)(struct nfs_client *);
194 int (*detect_trunking)(struct nfs_client *, struct nfs_client **,
195 struct rpc_cred *);
194}; 196};
195 197
196struct nfs4_state_maintenance_ops { 198struct nfs4_state_maintenance_ops {
@@ -223,7 +225,7 @@ extern int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred);
223extern int nfs4_destroy_clientid(struct nfs_client *clp); 225extern int nfs4_destroy_clientid(struct nfs_client *clp);
224extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *); 226extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *);
225extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *); 227extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *);
226extern int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc); 228extern int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait);
227extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle); 229extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle);
228extern int nfs4_proc_fs_locations(struct rpc_clnt *, struct inode *, const struct qstr *, 230extern int nfs4_proc_fs_locations(struct rpc_clnt *, struct inode *, const struct qstr *,
229 struct nfs4_fs_locations *, struct page *); 231 struct nfs4_fs_locations *, struct page *);
@@ -320,9 +322,15 @@ extern void nfs4_renew_state(struct work_struct *);
320/* nfs4state.c */ 322/* nfs4state.c */
321struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp); 323struct rpc_cred *nfs4_get_setclientid_cred(struct nfs_client *clp);
322struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp); 324struct rpc_cred *nfs4_get_renew_cred_locked(struct nfs_client *clp);
325int nfs4_discover_server_trunking(struct nfs_client *clp,
326 struct nfs_client **);
327int nfs40_discover_server_trunking(struct nfs_client *clp,
328 struct nfs_client **, struct rpc_cred *);
323#if defined(CONFIG_NFS_V4_1) 329#if defined(CONFIG_NFS_V4_1)
324struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp); 330struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp);
325struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp); 331struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp);
332int nfs41_discover_server_trunking(struct nfs_client *clp,
333 struct nfs_client **, struct rpc_cred *);
326extern void nfs4_schedule_session_recovery(struct nfs4_session *, int); 334extern void nfs4_schedule_session_recovery(struct nfs4_session *, int);
327#else 335#else
328static inline void nfs4_schedule_session_recovery(struct nfs4_session *session, int err) 336static inline void nfs4_schedule_session_recovery(struct nfs4_session *session, int err)
@@ -351,7 +359,7 @@ extern void nfs41_handle_server_scope(struct nfs_client *,
351extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp); 359extern void nfs4_put_lock_state(struct nfs4_lock_state *lsp);
352extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl); 360extern int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl);
353extern void nfs4_select_rw_stateid(nfs4_stateid *, struct nfs4_state *, 361extern void nfs4_select_rw_stateid(nfs4_stateid *, struct nfs4_state *,
354 fmode_t, fl_owner_t, pid_t); 362 fmode_t, const struct nfs_lockowner *);
355 363
356extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask); 364extern struct nfs_seqid *nfs_alloc_seqid(struct nfs_seqid_counter *counter, gfp_t gfp_mask);
357extern int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task); 365extern int nfs_wait_on_sequence(struct nfs_seqid *seqid, struct rpc_task *task);
@@ -372,6 +380,9 @@ extern bool nfs4_disable_idmapping;
372extern unsigned short max_session_slots; 380extern unsigned short max_session_slots;
373extern unsigned short send_implementation_id; 381extern unsigned short send_implementation_id;
374 382
383#define NFS4_CLIENT_ID_UNIQ_LEN (64)
384extern char nfs4_client_id_uniquifier[NFS4_CLIENT_ID_UNIQ_LEN];
385
375/* nfs4sysctl.c */ 386/* nfs4sysctl.c */
376#ifdef CONFIG_SYSCTL 387#ifdef CONFIG_SYSCTL
377int nfs4_register_sysctl(void); 388int nfs4_register_sysctl(void);
diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c
index 24eb663f8ed5..6bacfde1319a 100644
--- a/fs/nfs/nfs4client.c
+++ b/fs/nfs/nfs4client.c
@@ -84,7 +84,7 @@ error:
84static void nfs4_destroy_callback(struct nfs_client *clp) 84static void nfs4_destroy_callback(struct nfs_client *clp)
85{ 85{
86 if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state)) 86 if (__test_and_clear_bit(NFS_CS_CALLBACK, &clp->cl_res_state))
87 nfs_callback_down(clp->cl_mvops->minor_version); 87 nfs_callback_down(clp->cl_mvops->minor_version, clp->cl_net);
88} 88}
89 89
90static void nfs4_shutdown_client(struct nfs_client *clp) 90static void nfs4_shutdown_client(struct nfs_client *clp)
@@ -185,6 +185,7 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp,
185 rpc_authflavor_t authflavour) 185 rpc_authflavor_t authflavour)
186{ 186{
187 char buf[INET6_ADDRSTRLEN + 1]; 187 char buf[INET6_ADDRSTRLEN + 1];
188 struct nfs_client *old;
188 int error; 189 int error;
189 190
190 if (clp->cl_cons_state == NFS_CS_READY) { 191 if (clp->cl_cons_state == NFS_CS_READY) {
@@ -230,6 +231,17 @@ struct nfs_client *nfs4_init_client(struct nfs_client *clp,
230 231
231 if (!nfs4_has_session(clp)) 232 if (!nfs4_has_session(clp))
232 nfs_mark_client_ready(clp, NFS_CS_READY); 233 nfs_mark_client_ready(clp, NFS_CS_READY);
234
235 error = nfs4_discover_server_trunking(clp, &old);
236 if (error < 0)
237 goto error;
238 if (clp != old) {
239 clp->cl_preserve_clid = true;
240 nfs_put_client(clp);
241 clp = old;
242 atomic_inc(&clp->cl_count);
243 }
244
233 return clp; 245 return clp;
234 246
235error: 247error:
@@ -239,6 +251,248 @@ error:
239 return ERR_PTR(error); 251 return ERR_PTR(error);
240} 252}
241 253
254/*
255 * SETCLIENTID just did a callback update with the callback ident in
256 * "drop," but server trunking discovery claims "drop" and "keep" are
257 * actually the same server. Swap the callback IDs so that "keep"
258 * will continue to use the callback ident the server now knows about,
259 * and so that "keep"'s original callback ident is destroyed when
260 * "drop" is freed.
261 */
262static void nfs4_swap_callback_idents(struct nfs_client *keep,
263 struct nfs_client *drop)
264{
265 struct nfs_net *nn = net_generic(keep->cl_net, nfs_net_id);
266 unsigned int save = keep->cl_cb_ident;
267
268 if (keep->cl_cb_ident == drop->cl_cb_ident)
269 return;
270
271 dprintk("%s: keeping callback ident %u and dropping ident %u\n",
272 __func__, keep->cl_cb_ident, drop->cl_cb_ident);
273
274 spin_lock(&nn->nfs_client_lock);
275
276 idr_replace(&nn->cb_ident_idr, keep, drop->cl_cb_ident);
277 keep->cl_cb_ident = drop->cl_cb_ident;
278
279 idr_replace(&nn->cb_ident_idr, drop, save);
280 drop->cl_cb_ident = save;
281
282 spin_unlock(&nn->nfs_client_lock);
283}
284
285/**
286 * nfs40_walk_client_list - Find server that recognizes a client ID
287 *
288 * @new: nfs_client with client ID to test
289 * @result: OUT: found nfs_client, or new
290 * @cred: credential to use for trunking test
291 *
292 * Returns zero, a negative errno, or a negative NFS4ERR status.
293 * If zero is returned, an nfs_client pointer is planted in "result."
294 *
295 * NB: nfs40_walk_client_list() relies on the new nfs_client being
296 * the last nfs_client on the list.
297 */
298int nfs40_walk_client_list(struct nfs_client *new,
299 struct nfs_client **result,
300 struct rpc_cred *cred)
301{
302 struct nfs_net *nn = net_generic(new->cl_net, nfs_net_id);
303 struct nfs_client *pos, *n, *prev = NULL;
304 struct nfs4_setclientid_res clid = {
305 .clientid = new->cl_clientid,
306 .confirm = new->cl_confirm,
307 };
308 int status;
309
310 spin_lock(&nn->nfs_client_lock);
311 list_for_each_entry_safe(pos, n, &nn->nfs_client_list, cl_share_link) {
312 /* If "pos" isn't marked ready, we can't trust the
313 * remaining fields in "pos" */
314 if (pos->cl_cons_state < NFS_CS_READY)
315 continue;
316
317 if (pos->rpc_ops != new->rpc_ops)
318 continue;
319
320 if (pos->cl_proto != new->cl_proto)
321 continue;
322
323 if (pos->cl_minorversion != new->cl_minorversion)
324 continue;
325
326 if (pos->cl_clientid != new->cl_clientid)
327 continue;
328
329 atomic_inc(&pos->cl_count);
330 spin_unlock(&nn->nfs_client_lock);
331
332 if (prev)
333 nfs_put_client(prev);
334
335 status = nfs4_proc_setclientid_confirm(pos, &clid, cred);
336 if (status == 0) {
337 nfs4_swap_callback_idents(pos, new);
338
339 nfs_put_client(pos);
340 *result = pos;
341 dprintk("NFS: <-- %s using nfs_client = %p ({%d})\n",
342 __func__, pos, atomic_read(&pos->cl_count));
343 return 0;
344 }
345 if (status != -NFS4ERR_STALE_CLIENTID) {
346 nfs_put_client(pos);
347 dprintk("NFS: <-- %s status = %d, no result\n",
348 __func__, status);
349 return status;
350 }
351
352 spin_lock(&nn->nfs_client_lock);
353 prev = pos;
354 }
355
356 /*
357 * No matching nfs_client found. This should be impossible,
358 * because the new nfs_client has already been added to
359 * nfs_client_list by nfs_get_client().
360 *
361 * Don't BUG(), since the caller is holding a mutex.
362 */
363 if (prev)
364 nfs_put_client(prev);
365 spin_unlock(&nn->nfs_client_lock);
366 pr_err("NFS: %s Error: no matching nfs_client found\n", __func__);
367 return -NFS4ERR_STALE_CLIENTID;
368}
369
370#ifdef CONFIG_NFS_V4_1
371/*
372 * Returns true if the client IDs match
373 */
374static bool nfs4_match_clientids(struct nfs_client *a, struct nfs_client *b)
375{
376 if (a->cl_clientid != b->cl_clientid) {
377 dprintk("NFS: --> %s client ID %llx does not match %llx\n",
378 __func__, a->cl_clientid, b->cl_clientid);
379 return false;
380 }
381 dprintk("NFS: --> %s client ID %llx matches %llx\n",
382 __func__, a->cl_clientid, b->cl_clientid);
383 return true;
384}
385
386/*
387 * Returns true if the server owners match
388 */
389static bool
390nfs4_match_serverowners(struct nfs_client *a, struct nfs_client *b)
391{
392 struct nfs41_server_owner *o1 = a->cl_serverowner;
393 struct nfs41_server_owner *o2 = b->cl_serverowner;
394
395 if (o1->minor_id != o2->minor_id) {
396 dprintk("NFS: --> %s server owner minor IDs do not match\n",
397 __func__);
398 return false;
399 }
400
401 if (o1->major_id_sz != o2->major_id_sz)
402 goto out_major_mismatch;
403 if (memcmp(o1->major_id, o2->major_id, o1->major_id_sz) != 0)
404 goto out_major_mismatch;
405
406 dprintk("NFS: --> %s server owners match\n", __func__);
407 return true;
408
409out_major_mismatch:
410 dprintk("NFS: --> %s server owner major IDs do not match\n",
411 __func__);
412 return false;
413}
414
415/**
416 * nfs41_walk_client_list - Find nfs_client that matches a client/server owner
417 *
418 * @new: nfs_client with client ID to test
419 * @result: OUT: found nfs_client, or new
420 * @cred: credential to use for trunking test
421 *
422 * Returns zero, a negative errno, or a negative NFS4ERR status.
423 * If zero is returned, an nfs_client pointer is planted in "result."
424 *
425 * NB: nfs41_walk_client_list() relies on the new nfs_client being
426 * the last nfs_client on the list.
427 */
428int nfs41_walk_client_list(struct nfs_client *new,
429 struct nfs_client **result,
430 struct rpc_cred *cred)
431{
432 struct nfs_net *nn = net_generic(new->cl_net, nfs_net_id);
433 struct nfs_client *pos, *n, *prev = NULL;
434 int error;
435
436 spin_lock(&nn->nfs_client_lock);
437 list_for_each_entry_safe(pos, n, &nn->nfs_client_list, cl_share_link) {
438 /* If "pos" isn't marked ready, we can't trust the
439 * remaining fields in "pos", especially the client
440 * ID and serverowner fields. Wait for CREATE_SESSION
441 * to finish. */
442 if (pos->cl_cons_state < NFS_CS_READY) {
443 atomic_inc(&pos->cl_count);
444 spin_unlock(&nn->nfs_client_lock);
445
446 if (prev)
447 nfs_put_client(prev);
448 prev = pos;
449
450 error = nfs_wait_client_init_complete(pos);
451 if (error < 0) {
452 nfs_put_client(pos);
453 spin_lock(&nn->nfs_client_lock);
454 continue;
455 }
456
457 spin_lock(&nn->nfs_client_lock);
458 }
459
460 if (pos->rpc_ops != new->rpc_ops)
461 continue;
462
463 if (pos->cl_proto != new->cl_proto)
464 continue;
465
466 if (pos->cl_minorversion != new->cl_minorversion)
467 continue;
468
469 if (!nfs4_match_clientids(pos, new))
470 continue;
471
472 if (!nfs4_match_serverowners(pos, new))
473 continue;
474
475 spin_unlock(&nn->nfs_client_lock);
476 dprintk("NFS: <-- %s using nfs_client = %p ({%d})\n",
477 __func__, pos, atomic_read(&pos->cl_count));
478
479 *result = pos;
480 return 0;
481 }
482
483 /*
484 * No matching nfs_client found. This should be impossible,
485 * because the new nfs_client has already been added to
486 * nfs_client_list by nfs_get_client().
487 *
488 * Don't BUG(), since the caller is holding a mutex.
489 */
490 spin_unlock(&nn->nfs_client_lock);
491 pr_err("NFS: %s Error: no matching nfs_client found\n", __func__);
492 return -NFS4ERR_STALE_CLIENTID;
493}
494#endif /* CONFIG_NFS_V4_1 */
495
242static void nfs4_destroy_server(struct nfs_server *server) 496static void nfs4_destroy_server(struct nfs_server *server)
243{ 497{
244 nfs_server_return_all_delegations(server); 498 nfs_server_return_all_delegations(server);
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index eb5eb8eef4d3..afddd6639afb 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -95,16 +95,25 @@ nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
95 int ret; 95 int ret;
96 struct inode *inode = file->f_path.dentry->d_inode; 96 struct inode *inode = file->f_path.dentry->d_inode;
97 97
98 ret = filemap_write_and_wait_range(inode->i_mapping, start, end); 98 do {
99 if (ret != 0) 99 ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
100 goto out; 100 if (ret != 0)
101 mutex_lock(&inode->i_mutex); 101 break;
102 ret = nfs_file_fsync_commit(file, start, end, datasync); 102 mutex_lock(&inode->i_mutex);
103 if (!ret && !datasync) 103 ret = nfs_file_fsync_commit(file, start, end, datasync);
104 /* application has asked for meta-data sync */ 104 if (!ret && !datasync)
105 ret = pnfs_layoutcommit_inode(inode, true); 105 /* application has asked for meta-data sync */
106 mutex_unlock(&inode->i_mutex); 106 ret = pnfs_layoutcommit_inode(inode, true);
107out: 107 mutex_unlock(&inode->i_mutex);
108 /*
109 * If nfs_file_fsync_commit detected a server reboot, then
110 * resend all dirty pages that might have been covered by
111 * the NFS_CONTEXT_RESEND_WRITES flag
112 */
113 start = 0;
114 end = LLONG_MAX;
115 } while (ret == -EAGAIN);
116
108 return ret; 117 return ret;
109} 118}
110 119
diff --git a/fs/nfs/nfs4filelayout.c b/fs/nfs/nfs4filelayout.c
index 53f94d915bd1..52d847212066 100644
--- a/fs/nfs/nfs4filelayout.c
+++ b/fs/nfs/nfs4filelayout.c
@@ -190,8 +190,6 @@ static int filelayout_async_handle_error(struct rpc_task *task,
190 * i/o and all i/o waiting on the slot table to the MDS until 190 * i/o and all i/o waiting on the slot table to the MDS until
191 * layout is destroyed and a new valid layout is obtained. 191 * layout is destroyed and a new valid layout is obtained.
192 */ 192 */
193 set_bit(NFS_LAYOUT_INVALID,
194 &NFS_I(inode)->layout->plh_flags);
195 pnfs_destroy_layout(NFS_I(inode)); 193 pnfs_destroy_layout(NFS_I(inode));
196 rpc_wake_up(&tbl->slot_tbl_waitq); 194 rpc_wake_up(&tbl->slot_tbl_waitq);
197 goto reset; 195 goto reset;
@@ -205,7 +203,7 @@ static int filelayout_async_handle_error(struct rpc_task *task,
205 case -EPIPE: 203 case -EPIPE:
206 dprintk("%s DS connection error %d\n", __func__, 204 dprintk("%s DS connection error %d\n", __func__,
207 task->tk_status); 205 task->tk_status);
208 filelayout_mark_devid_invalid(devid); 206 nfs4_mark_deviceid_unavailable(devid);
209 clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags); 207 clear_bit(NFS_INO_LAYOUTCOMMIT, &NFS_I(inode)->flags);
210 _pnfs_return_layout(inode); 208 _pnfs_return_layout(inode);
211 rpc_wake_up(&tbl->slot_tbl_waitq); 209 rpc_wake_up(&tbl->slot_tbl_waitq);
@@ -269,6 +267,21 @@ filelayout_set_layoutcommit(struct nfs_write_data *wdata)
269 (unsigned long) NFS_I(hdr->inode)->layout->plh_lwb); 267 (unsigned long) NFS_I(hdr->inode)->layout->plh_lwb);
270} 268}
271 269
270bool
271filelayout_test_devid_unavailable(struct nfs4_deviceid_node *node)
272{
273 return filelayout_test_devid_invalid(node) ||
274 nfs4_test_deviceid_unavailable(node);
275}
276
277static bool
278filelayout_reset_to_mds(struct pnfs_layout_segment *lseg)
279{
280 struct nfs4_deviceid_node *node = FILELAYOUT_DEVID_NODE(lseg);
281
282 return filelayout_test_devid_unavailable(node);
283}
284
272/* 285/*
273 * Call ops for the async read/write cases 286 * Call ops for the async read/write cases
274 * In the case of dense layouts, the offset needs to be reset to its 287 * In the case of dense layouts, the offset needs to be reset to its
@@ -453,7 +466,7 @@ static void filelayout_commit_release(void *calldata)
453 struct nfs_commit_data *data = calldata; 466 struct nfs_commit_data *data = calldata;
454 467
455 data->completion_ops->completion(data); 468 data->completion_ops->completion(data);
456 put_lseg(data->lseg); 469 pnfs_put_lseg(data->lseg);
457 nfs_put_client(data->ds_clp); 470 nfs_put_client(data->ds_clp);
458 nfs_commitdata_release(data); 471 nfs_commitdata_release(data);
459} 472}
@@ -608,13 +621,13 @@ filelayout_check_layout(struct pnfs_layout_hdr *lo,
608 d = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode)->pnfs_curr_ld, 621 d = nfs4_find_get_deviceid(NFS_SERVER(lo->plh_inode)->pnfs_curr_ld,
609 NFS_SERVER(lo->plh_inode)->nfs_client, id); 622 NFS_SERVER(lo->plh_inode)->nfs_client, id);
610 if (d == NULL) { 623 if (d == NULL) {
611 dsaddr = get_device_info(lo->plh_inode, id, gfp_flags); 624 dsaddr = filelayout_get_device_info(lo->plh_inode, id, gfp_flags);
612 if (dsaddr == NULL) 625 if (dsaddr == NULL)
613 goto out; 626 goto out;
614 } else 627 } else
615 dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node); 628 dsaddr = container_of(d, struct nfs4_file_layout_dsaddr, id_node);
616 /* Found deviceid is being reaped */ 629 /* Found deviceid is unavailable */
617 if (test_bit(NFS_DEVICEID_INVALID, &dsaddr->id_node.flags)) 630 if (filelayout_test_devid_unavailable(&dsaddr->id_node))
618 goto out_put; 631 goto out_put;
619 632
620 fl->dsaddr = dsaddr; 633 fl->dsaddr = dsaddr;
@@ -931,7 +944,7 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio,
931 nfs_init_cinfo(&cinfo, pgio->pg_inode, pgio->pg_dreq); 944 nfs_init_cinfo(&cinfo, pgio->pg_inode, pgio->pg_dreq);
932 status = filelayout_alloc_commit_info(pgio->pg_lseg, &cinfo, GFP_NOFS); 945 status = filelayout_alloc_commit_info(pgio->pg_lseg, &cinfo, GFP_NOFS);
933 if (status < 0) { 946 if (status < 0) {
934 put_lseg(pgio->pg_lseg); 947 pnfs_put_lseg(pgio->pg_lseg);
935 pgio->pg_lseg = NULL; 948 pgio->pg_lseg = NULL;
936 goto out_mds; 949 goto out_mds;
937 } 950 }
@@ -985,7 +998,7 @@ filelayout_clear_request_commit(struct nfs_page *req,
985out: 998out:
986 nfs_request_remove_commit_list(req, cinfo); 999 nfs_request_remove_commit_list(req, cinfo);
987 spin_unlock(cinfo->lock); 1000 spin_unlock(cinfo->lock);
988 put_lseg(freeme); 1001 pnfs_put_lseg(freeme);
989} 1002}
990 1003
991static struct list_head * 1004static struct list_head *
@@ -1018,7 +1031,7 @@ filelayout_choose_commit_list(struct nfs_page *req,
1018 * off due to a rewrite, in which case it will be done in 1031 * off due to a rewrite, in which case it will be done in
1019 * filelayout_clear_request_commit 1032 * filelayout_clear_request_commit
1020 */ 1033 */
1021 buckets[i].wlseg = get_lseg(lseg); 1034 buckets[i].wlseg = pnfs_get_lseg(lseg);
1022 } 1035 }
1023 set_bit(PG_COMMIT_TO_DS, &req->wb_flags); 1036 set_bit(PG_COMMIT_TO_DS, &req->wb_flags);
1024 cinfo->ds->nwritten++; 1037 cinfo->ds->nwritten++;
@@ -1128,7 +1141,7 @@ filelayout_scan_ds_commit_list(struct pnfs_commit_bucket *bucket,
1128 if (list_empty(src)) 1141 if (list_empty(src))
1129 bucket->wlseg = NULL; 1142 bucket->wlseg = NULL;
1130 else 1143 else
1131 get_lseg(bucket->clseg); 1144 pnfs_get_lseg(bucket->clseg);
1132 } 1145 }
1133 return ret; 1146 return ret;
1134} 1147}
@@ -1159,12 +1172,12 @@ static void filelayout_recover_commit_reqs(struct list_head *dst,
1159 1172
1160 /* NOTE cinfo->lock is NOT held, relying on fact that this is 1173 /* NOTE cinfo->lock is NOT held, relying on fact that this is
1161 * only called on single thread per dreq. 1174 * only called on single thread per dreq.
1162 * Can't take the lock because need to do put_lseg 1175 * Can't take the lock because need to do pnfs_put_lseg
1163 */ 1176 */
1164 for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) { 1177 for (i = 0, b = cinfo->ds->buckets; i < cinfo->ds->nbuckets; i++, b++) {
1165 if (transfer_commit_list(&b->written, dst, cinfo, 0)) { 1178 if (transfer_commit_list(&b->written, dst, cinfo, 0)) {
1166 BUG_ON(!list_empty(&b->written)); 1179 BUG_ON(!list_empty(&b->written));
1167 put_lseg(b->wlseg); 1180 pnfs_put_lseg(b->wlseg);
1168 b->wlseg = NULL; 1181 b->wlseg = NULL;
1169 } 1182 }
1170 } 1183 }
@@ -1200,7 +1213,7 @@ alloc_ds_commits(struct nfs_commit_info *cinfo, struct list_head *list)
1200 if (list_empty(&bucket->committing)) 1213 if (list_empty(&bucket->committing))
1201 continue; 1214 continue;
1202 nfs_retry_commit(&bucket->committing, bucket->clseg, cinfo); 1215 nfs_retry_commit(&bucket->committing, bucket->clseg, cinfo);
1203 put_lseg(bucket->clseg); 1216 pnfs_put_lseg(bucket->clseg);
1204 bucket->clseg = NULL; 1217 bucket->clseg = NULL;
1205 } 1218 }
1206 /* Caller will clean up entries put on list */ 1219 /* Caller will clean up entries put on list */
diff --git a/fs/nfs/nfs4filelayout.h b/fs/nfs/nfs4filelayout.h
index 43fe802dd678..dca47d786710 100644
--- a/fs/nfs/nfs4filelayout.h
+++ b/fs/nfs/nfs4filelayout.h
@@ -129,23 +129,13 @@ filelayout_mark_devid_invalid(struct nfs4_deviceid_node *node)
129} 129}
130 130
131static inline bool 131static inline bool
132filelayout_test_layout_invalid(struct pnfs_layout_hdr *lo)
133{
134 return test_bit(NFS_LAYOUT_INVALID, &lo->plh_flags);
135}
136
137static inline bool
138filelayout_test_devid_invalid(struct nfs4_deviceid_node *node) 132filelayout_test_devid_invalid(struct nfs4_deviceid_node *node)
139{ 133{
140 return test_bit(NFS_DEVICEID_INVALID, &node->flags); 134 return test_bit(NFS_DEVICEID_INVALID, &node->flags);
141} 135}
142 136
143static inline bool 137extern bool
144filelayout_reset_to_mds(struct pnfs_layout_segment *lseg) 138filelayout_test_devid_unavailable(struct nfs4_deviceid_node *node);
145{
146 return filelayout_test_devid_invalid(FILELAYOUT_DEVID_NODE(lseg)) ||
147 filelayout_test_layout_invalid(lseg->pls_layout);
148}
149 139
150extern struct nfs_fh * 140extern struct nfs_fh *
151nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j); 141nfs4_fl_select_ds_fh(struct pnfs_layout_segment *lseg, u32 j);
@@ -158,7 +148,7 @@ struct nfs4_pnfs_ds *nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg,
158extern void nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr); 148extern void nfs4_fl_put_deviceid(struct nfs4_file_layout_dsaddr *dsaddr);
159extern void nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr); 149extern void nfs4_fl_free_deviceid(struct nfs4_file_layout_dsaddr *dsaddr);
160struct nfs4_file_layout_dsaddr * 150struct nfs4_file_layout_dsaddr *
161get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags); 151filelayout_get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags);
162void nfs4_ds_disconnect(struct nfs_client *clp); 152void nfs4_ds_disconnect(struct nfs_client *clp);
163 153
164#endif /* FS_NFS_NFS4FILELAYOUT_H */ 154#endif /* FS_NFS_NFS4FILELAYOUT_H */
diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c
index f81231f30d94..3336d5eaf879 100644
--- a/fs/nfs/nfs4filelayoutdev.c
+++ b/fs/nfs/nfs4filelayoutdev.c
@@ -690,7 +690,7 @@ decode_and_add_device(struct inode *inode, struct pnfs_device *dev, gfp_t gfp_fl
690 * of available devices, and return it. 690 * of available devices, and return it.
691 */ 691 */
692struct nfs4_file_layout_dsaddr * 692struct nfs4_file_layout_dsaddr *
693get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags) 693filelayout_get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_flags)
694{ 694{
695 struct pnfs_device *pdev = NULL; 695 struct pnfs_device *pdev = NULL;
696 u32 max_resp_sz; 696 u32 max_resp_sz;
@@ -804,13 +804,14 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
804 struct nfs4_pnfs_ds *ds = dsaddr->ds_list[ds_idx]; 804 struct nfs4_pnfs_ds *ds = dsaddr->ds_list[ds_idx];
805 struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg); 805 struct nfs4_deviceid_node *devid = FILELAYOUT_DEVID_NODE(lseg);
806 806
807 if (filelayout_test_devid_invalid(devid)) 807 if (filelayout_test_devid_unavailable(devid))
808 return NULL; 808 return NULL;
809 809
810 if (ds == NULL) { 810 if (ds == NULL) {
811 printk(KERN_ERR "NFS: %s: No data server for offset index %d\n", 811 printk(KERN_ERR "NFS: %s: No data server for offset index %d\n",
812 __func__, ds_idx); 812 __func__, ds_idx);
813 goto mark_dev_invalid; 813 filelayout_mark_devid_invalid(devid);
814 return NULL;
814 } 815 }
815 816
816 if (!ds->ds_clp) { 817 if (!ds->ds_clp) {
@@ -818,14 +819,12 @@ nfs4_fl_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx)
818 int err; 819 int err;
819 820
820 err = nfs4_ds_connect(s, ds); 821 err = nfs4_ds_connect(s, ds);
821 if (err) 822 if (err) {
822 goto mark_dev_invalid; 823 nfs4_mark_deviceid_unavailable(devid);
824 return NULL;
825 }
823 } 826 }
824 return ds; 827 return ds;
825
826mark_dev_invalid:
827 filelayout_mark_devid_invalid(devid);
828 return NULL;
829} 828}
830 829
831module_param(dataserver_retrans, uint, 0644); 830module_param(dataserver_retrans, uint, 0644);
diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c
index 017b4b01a69c..79fbb61ce202 100644
--- a/fs/nfs/nfs4namespace.c
+++ b/fs/nfs/nfs4namespace.c
@@ -192,25 +192,13 @@ out:
192struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *clnt, struct inode *inode, 192struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *clnt, struct inode *inode,
193 struct qstr *name) 193 struct qstr *name)
194{ 194{
195 struct rpc_clnt *clone;
196 struct rpc_auth *auth;
197 rpc_authflavor_t flavor; 195 rpc_authflavor_t flavor;
198 196
199 flavor = nfs4_negotiate_security(inode, name); 197 flavor = nfs4_negotiate_security(inode, name);
200 if ((int)flavor < 0) 198 if ((int)flavor < 0)
201 return ERR_PTR(flavor); 199 return ERR_PTR((int)flavor);
202 200
203 clone = rpc_clone_client(clnt); 201 return rpc_clone_client_set_auth(clnt, flavor);
204 if (IS_ERR(clone))
205 return clone;
206
207 auth = rpcauth_create(flavor, clone);
208 if (!auth) {
209 rpc_shutdown_client(clone);
210 clone = ERR_PTR(-EIO);
211 }
212
213 return clone;
214} 202}
215 203
216static struct vfsmount *try_location(struct nfs_clone_mount *mountdata, 204static struct vfsmount *try_location(struct nfs_clone_mount *mountdata,
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 1e50326d00dd..68b21d81b7ac 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -104,6 +104,8 @@ static int nfs4_map_errors(int err)
104 return -EACCES; 104 return -EACCES;
105 case -NFS4ERR_MINOR_VERS_MISMATCH: 105 case -NFS4ERR_MINOR_VERS_MISMATCH:
106 return -EPROTONOSUPPORT; 106 return -EPROTONOSUPPORT;
107 case -NFS4ERR_ACCESS:
108 return -EACCES;
107 default: 109 default:
108 dprintk("%s could not handle NFSv4 error %d\n", 110 dprintk("%s could not handle NFSv4 error %d\n",
109 __func__, -err); 111 __func__, -err);
@@ -150,6 +152,12 @@ static const u32 nfs4_pnfs_open_bitmap[3] = {
150 FATTR4_WORD2_MDSTHRESHOLD 152 FATTR4_WORD2_MDSTHRESHOLD
151}; 153};
152 154
155static const u32 nfs4_open_noattr_bitmap[3] = {
156 FATTR4_WORD0_TYPE
157 | FATTR4_WORD0_CHANGE
158 | FATTR4_WORD0_FILEID,
159};
160
153const u32 nfs4_statfs_bitmap[2] = { 161const u32 nfs4_statfs_bitmap[2] = {
154 FATTR4_WORD0_FILES_AVAIL 162 FATTR4_WORD0_FILES_AVAIL
155 | FATTR4_WORD0_FILES_FREE 163 | FATTR4_WORD0_FILES_FREE
@@ -832,6 +840,7 @@ static void nfs4_init_opendata_res(struct nfs4_opendata *p)
832 p->o_res.seqid = p->o_arg.seqid; 840 p->o_res.seqid = p->o_arg.seqid;
833 p->c_res.seqid = p->c_arg.seqid; 841 p->c_res.seqid = p->c_arg.seqid;
834 p->o_res.server = p->o_arg.server; 842 p->o_res.server = p->o_arg.server;
843 p->o_res.access_request = p->o_arg.access;
835 nfs_fattr_init(&p->f_attr); 844 nfs_fattr_init(&p->f_attr);
836 nfs_fattr_init_names(&p->f_attr, &p->owner_name, &p->group_name); 845 nfs_fattr_init_names(&p->f_attr, &p->owner_name, &p->group_name);
837} 846}
@@ -860,6 +869,14 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry,
860 p->o_arg.fh = NFS_FH(dir); 869 p->o_arg.fh = NFS_FH(dir);
861 p->o_arg.open_flags = flags; 870 p->o_arg.open_flags = flags;
862 p->o_arg.fmode = fmode & (FMODE_READ|FMODE_WRITE); 871 p->o_arg.fmode = fmode & (FMODE_READ|FMODE_WRITE);
872 /* don't put an ACCESS op in OPEN compound if O_EXCL, because ACCESS
873 * will return permission denied for all bits until close */
874 if (!(flags & O_EXCL)) {
875 /* ask server to check for all possible rights as results
876 * are cached */
877 p->o_arg.access = NFS4_ACCESS_READ | NFS4_ACCESS_MODIFY |
878 NFS4_ACCESS_EXTEND | NFS4_ACCESS_EXECUTE;
879 }
863 p->o_arg.clientid = server->nfs_client->cl_clientid; 880 p->o_arg.clientid = server->nfs_client->cl_clientid;
864 p->o_arg.id.create_time = ktime_to_ns(sp->so_seqid.create_time); 881 p->o_arg.id.create_time = ktime_to_ns(sp->so_seqid.create_time);
865 p->o_arg.id.uniquifier = sp->so_seqid.owner_id; 882 p->o_arg.id.uniquifier = sp->so_seqid.owner_id;
@@ -1115,11 +1132,80 @@ out_return_state:
1115 return state; 1132 return state;
1116} 1133}
1117 1134
1118static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data) 1135static void
1136nfs4_opendata_check_deleg(struct nfs4_opendata *data, struct nfs4_state *state)
1137{
1138 struct nfs_client *clp = NFS_SERVER(state->inode)->nfs_client;
1139 struct nfs_delegation *delegation;
1140 int delegation_flags = 0;
1141
1142 rcu_read_lock();
1143 delegation = rcu_dereference(NFS_I(state->inode)->delegation);
1144 if (delegation)
1145 delegation_flags = delegation->flags;
1146 rcu_read_unlock();
1147 if (data->o_arg.claim == NFS4_OPEN_CLAIM_DELEGATE_CUR) {
1148 pr_err_ratelimited("NFS: Broken NFSv4 server %s is "
1149 "returning a delegation for "
1150 "OPEN(CLAIM_DELEGATE_CUR)\n",
1151 clp->cl_hostname);
1152 } else if ((delegation_flags & 1UL<<NFS_DELEGATION_NEED_RECLAIM) == 0)
1153 nfs_inode_set_delegation(state->inode,
1154 data->owner->so_cred,
1155 &data->o_res);
1156 else
1157 nfs_inode_reclaim_delegation(state->inode,
1158 data->owner->so_cred,
1159 &data->o_res);
1160}
1161
1162/*
1163 * Check the inode attributes against the CLAIM_PREVIOUS returned attributes
1164 * and update the nfs4_state.
1165 */
1166static struct nfs4_state *
1167_nfs4_opendata_reclaim_to_nfs4_state(struct nfs4_opendata *data)
1168{
1169 struct inode *inode = data->state->inode;
1170 struct nfs4_state *state = data->state;
1171 int ret;
1172
1173 if (!data->rpc_done) {
1174 ret = data->rpc_status;
1175 goto err;
1176 }
1177
1178 ret = -ESTALE;
1179 if (!(data->f_attr.valid & NFS_ATTR_FATTR_TYPE) ||
1180 !(data->f_attr.valid & NFS_ATTR_FATTR_FILEID) ||
1181 !(data->f_attr.valid & NFS_ATTR_FATTR_CHANGE))
1182 goto err;
1183
1184 ret = -ENOMEM;
1185 state = nfs4_get_open_state(inode, data->owner);
1186 if (state == NULL)
1187 goto err;
1188
1189 ret = nfs_refresh_inode(inode, &data->f_attr);
1190 if (ret)
1191 goto err;
1192
1193 if (data->o_res.delegation_type != 0)
1194 nfs4_opendata_check_deleg(data, state);
1195 update_open_stateid(state, &data->o_res.stateid, NULL,
1196 data->o_arg.fmode);
1197
1198 return state;
1199err:
1200 return ERR_PTR(ret);
1201
1202}
1203
1204static struct nfs4_state *
1205_nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data)
1119{ 1206{
1120 struct inode *inode; 1207 struct inode *inode;
1121 struct nfs4_state *state = NULL; 1208 struct nfs4_state *state = NULL;
1122 struct nfs_delegation *delegation;
1123 int ret; 1209 int ret;
1124 1210
1125 if (!data->rpc_done) { 1211 if (!data->rpc_done) {
@@ -1138,30 +1224,8 @@ static struct nfs4_state *nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data
1138 state = nfs4_get_open_state(inode, data->owner); 1224 state = nfs4_get_open_state(inode, data->owner);
1139 if (state == NULL) 1225 if (state == NULL)
1140 goto err_put_inode; 1226 goto err_put_inode;
1141 if (data->o_res.delegation_type != 0) { 1227 if (data->o_res.delegation_type != 0)
1142 struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; 1228 nfs4_opendata_check_deleg(data, state);
1143 int delegation_flags = 0;
1144
1145 rcu_read_lock();
1146 delegation = rcu_dereference(NFS_I(inode)->delegation);
1147 if (delegation)
1148 delegation_flags = delegation->flags;
1149 rcu_read_unlock();
1150 if (data->o_arg.claim == NFS4_OPEN_CLAIM_DELEGATE_CUR) {
1151 pr_err_ratelimited("NFS: Broken NFSv4 server %s is "
1152 "returning a delegation for "
1153 "OPEN(CLAIM_DELEGATE_CUR)\n",
1154 clp->cl_hostname);
1155 } else if ((delegation_flags & 1UL<<NFS_DELEGATION_NEED_RECLAIM) == 0)
1156 nfs_inode_set_delegation(state->inode,
1157 data->owner->so_cred,
1158 &data->o_res);
1159 else
1160 nfs_inode_reclaim_delegation(state->inode,
1161 data->owner->so_cred,
1162 &data->o_res);
1163 }
1164
1165 update_open_stateid(state, &data->o_res.stateid, NULL, 1229 update_open_stateid(state, &data->o_res.stateid, NULL,
1166 data->o_arg.fmode); 1230 data->o_arg.fmode);
1167 iput(inode); 1231 iput(inode);
@@ -1173,6 +1237,14 @@ err:
1173 return ERR_PTR(ret); 1237 return ERR_PTR(ret);
1174} 1238}
1175 1239
1240static struct nfs4_state *
1241nfs4_opendata_to_nfs4_state(struct nfs4_opendata *data)
1242{
1243 if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS)
1244 return _nfs4_opendata_reclaim_to_nfs4_state(data);
1245 return _nfs4_opendata_to_nfs4_state(data);
1246}
1247
1176static struct nfs_open_context *nfs4_state_find_open_context(struct nfs4_state *state) 1248static struct nfs_open_context *nfs4_state_find_open_context(struct nfs4_state *state)
1177{ 1249{
1178 struct nfs_inode *nfsi = NFS_I(state->inode); 1250 struct nfs_inode *nfsi = NFS_I(state->inode);
@@ -1494,6 +1566,7 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata)
1494 data->o_arg.clientid = sp->so_server->nfs_client->cl_clientid; 1566 data->o_arg.clientid = sp->so_server->nfs_client->cl_clientid;
1495 if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS) { 1567 if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS) {
1496 task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR]; 1568 task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR];
1569 data->o_arg.open_bitmap = &nfs4_open_noattr_bitmap[0];
1497 nfs_copy_fh(&data->o_res.fh, data->o_arg.fh); 1570 nfs_copy_fh(&data->o_res.fh, data->o_arg.fh);
1498 } 1571 }
1499 data->timestamp = jiffies; 1572 data->timestamp = jiffies;
@@ -1526,7 +1599,8 @@ static void nfs4_open_done(struct rpc_task *task, void *calldata)
1526 return; 1599 return;
1527 1600
1528 if (task->tk_status == 0) { 1601 if (task->tk_status == 0) {
1529 switch (data->o_res.f_attr->mode & S_IFMT) { 1602 if (data->o_res.f_attr->valid & NFS_ATTR_FATTR_TYPE) {
1603 switch (data->o_res.f_attr->mode & S_IFMT) {
1530 case S_IFREG: 1604 case S_IFREG:
1531 break; 1605 break;
1532 case S_IFLNK: 1606 case S_IFLNK:
@@ -1537,6 +1611,7 @@ static void nfs4_open_done(struct rpc_task *task, void *calldata)
1537 break; 1611 break;
1538 default: 1612 default:
1539 data->rpc_status = -ENOTDIR; 1613 data->rpc_status = -ENOTDIR;
1614 }
1540 } 1615 }
1541 renew_lease(data->o_res.server, data->timestamp); 1616 renew_lease(data->o_res.server, data->timestamp);
1542 if (!(data->o_res.rflags & NFS4_OPEN_RESULT_CONFIRM)) 1617 if (!(data->o_res.rflags & NFS4_OPEN_RESULT_CONFIRM))
@@ -1643,6 +1718,39 @@ static int _nfs4_recover_proc_open(struct nfs4_opendata *data)
1643 return status; 1718 return status;
1644} 1719}
1645 1720
1721static int nfs4_opendata_access(struct rpc_cred *cred,
1722 struct nfs4_opendata *opendata,
1723 struct nfs4_state *state, fmode_t fmode)
1724{
1725 struct nfs_access_entry cache;
1726 u32 mask;
1727
1728 /* access call failed or for some reason the server doesn't
1729 * support any access modes -- defer access call until later */
1730 if (opendata->o_res.access_supported == 0)
1731 return 0;
1732
1733 mask = 0;
1734 /* don't check MAY_WRITE - a newly created file may not have
1735 * write mode bits, but POSIX allows the creating process to write */
1736 if (fmode & FMODE_READ)
1737 mask |= MAY_READ;
1738 if (fmode & FMODE_EXEC)
1739 mask |= MAY_EXEC;
1740
1741 cache.cred = cred;
1742 cache.jiffies = jiffies;
1743 nfs_access_set_mask(&cache, opendata->o_res.access_result);
1744 nfs_access_add_cache(state->inode, &cache);
1745
1746 if ((mask & ~cache.mask & (MAY_READ | MAY_EXEC)) == 0)
1747 return 0;
1748
1749 /* even though OPEN succeeded, access is denied. Close the file */
1750 nfs4_close_state(state, fmode);
1751 return -NFS4ERR_ACCESS;
1752}
1753
1646/* 1754/*
1647 * Note: On error, nfs4_proc_open will free the struct nfs4_opendata 1755 * Note: On error, nfs4_proc_open will free the struct nfs4_opendata
1648 */ 1756 */
@@ -1774,7 +1882,11 @@ static void nfs41_clear_delegation_stateid(struct nfs4_state *state)
1774 * informs us the stateid is unrecognized. */ 1882 * informs us the stateid is unrecognized. */
1775 if (status != -NFS4ERR_BAD_STATEID) 1883 if (status != -NFS4ERR_BAD_STATEID)
1776 nfs41_free_stateid(server, stateid); 1884 nfs41_free_stateid(server, stateid);
1885 nfs_remove_bad_delegation(state->inode);
1777 1886
1887 write_seqlock(&state->seqlock);
1888 nfs4_stateid_copy(&state->stateid, &state->open_stateid);
1889 write_sequnlock(&state->seqlock);
1778 clear_bit(NFS_DELEGATED_STATE, &state->flags); 1890 clear_bit(NFS_DELEGATED_STATE, &state->flags);
1779 } 1891 }
1780} 1892}
@@ -1790,7 +1902,7 @@ static void nfs41_clear_delegation_stateid(struct nfs4_state *state)
1790static int nfs41_check_open_stateid(struct nfs4_state *state) 1902static int nfs41_check_open_stateid(struct nfs4_state *state)
1791{ 1903{
1792 struct nfs_server *server = NFS_SERVER(state->inode); 1904 struct nfs_server *server = NFS_SERVER(state->inode);
1793 nfs4_stateid *stateid = &state->stateid; 1905 nfs4_stateid *stateid = &state->open_stateid;
1794 int status; 1906 int status;
1795 1907
1796 /* If a state reset has been done, test_stateid is unneeded */ 1908 /* If a state reset has been done, test_stateid is unneeded */
@@ -1896,6 +2008,10 @@ static int _nfs4_do_open(struct inode *dir,
1896 if (server->caps & NFS_CAP_POSIX_LOCK) 2008 if (server->caps & NFS_CAP_POSIX_LOCK)
1897 set_bit(NFS_STATE_POSIX_LOCKS, &state->flags); 2009 set_bit(NFS_STATE_POSIX_LOCKS, &state->flags);
1898 2010
2011 status = nfs4_opendata_access(cred, opendata, state, fmode);
2012 if (status != 0)
2013 goto err_opendata_put;
2014
1899 if (opendata->o_arg.open_flags & O_EXCL) { 2015 if (opendata->o_arg.open_flags & O_EXCL) {
1900 nfs4_exclusive_attrset(opendata, sattr); 2016 nfs4_exclusive_attrset(opendata, sattr);
1901 2017
@@ -1941,7 +2057,7 @@ static struct nfs4_state *nfs4_do_open(struct inode *dir,
1941 struct nfs4_state *res; 2057 struct nfs4_state *res;
1942 int status; 2058 int status;
1943 2059
1944 fmode &= FMODE_READ|FMODE_WRITE; 2060 fmode &= FMODE_READ|FMODE_WRITE|FMODE_EXEC;
1945 do { 2061 do {
1946 status = _nfs4_do_open(dir, dentry, fmode, flags, sattr, cred, 2062 status = _nfs4_do_open(dir, dentry, fmode, flags, sattr, cred,
1947 &res, ctx_th); 2063 &res, ctx_th);
@@ -2013,8 +2129,12 @@ static int _nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred,
2013 nfs_fattr_init(fattr); 2129 nfs_fattr_init(fattr);
2014 2130
2015 if (state != NULL) { 2131 if (state != NULL) {
2132 struct nfs_lockowner lockowner = {
2133 .l_owner = current->files,
2134 .l_pid = current->tgid,
2135 };
2016 nfs4_select_rw_stateid(&arg.stateid, state, FMODE_WRITE, 2136 nfs4_select_rw_stateid(&arg.stateid, state, FMODE_WRITE,
2017 current->files, current->tgid); 2137 &lockowner);
2018 } else if (nfs4_copy_delegation_stateid(&arg.stateid, inode, 2138 } else if (nfs4_copy_delegation_stateid(&arg.stateid, inode,
2019 FMODE_WRITE)) { 2139 FMODE_WRITE)) {
2020 /* Use that stateid */ 2140 /* Use that stateid */
@@ -2133,6 +2253,7 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
2133{ 2253{
2134 struct nfs4_closedata *calldata = data; 2254 struct nfs4_closedata *calldata = data;
2135 struct nfs4_state *state = calldata->state; 2255 struct nfs4_state *state = calldata->state;
2256 struct inode *inode = calldata->inode;
2136 int call_close = 0; 2257 int call_close = 0;
2137 2258
2138 dprintk("%s: begin!\n", __func__); 2259 dprintk("%s: begin!\n", __func__);
@@ -2166,16 +2287,13 @@ static void nfs4_close_prepare(struct rpc_task *task, void *data)
2166 if (calldata->arg.fmode == 0) { 2287 if (calldata->arg.fmode == 0) {
2167 task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE]; 2288 task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_CLOSE];
2168 if (calldata->roc && 2289 if (calldata->roc &&
2169 pnfs_roc_drain(calldata->inode, &calldata->roc_barrier)) { 2290 pnfs_roc_drain(inode, &calldata->roc_barrier, task))
2170 rpc_sleep_on(&NFS_SERVER(calldata->inode)->roc_rpcwaitq,
2171 task, NULL);
2172 goto out; 2291 goto out;
2173 }
2174 } 2292 }
2175 2293
2176 nfs_fattr_init(calldata->res.fattr); 2294 nfs_fattr_init(calldata->res.fattr);
2177 calldata->timestamp = jiffies; 2295 calldata->timestamp = jiffies;
2178 if (nfs4_setup_sequence(NFS_SERVER(calldata->inode), 2296 if (nfs4_setup_sequence(NFS_SERVER(inode),
2179 &calldata->arg.seq_args, 2297 &calldata->arg.seq_args,
2180 &calldata->res.seq_res, 2298 &calldata->res.seq_res,
2181 task)) 2299 task))
@@ -2202,7 +2320,7 @@ static const struct rpc_call_ops nfs4_close_ops = {
2202 * 2320 *
2203 * NOTE: Caller must be holding the sp->so_owner semaphore! 2321 * NOTE: Caller must be holding the sp->so_owner semaphore!
2204 */ 2322 */
2205int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc) 2323int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait)
2206{ 2324{
2207 struct nfs_server *server = NFS_SERVER(state->inode); 2325 struct nfs_server *server = NFS_SERVER(state->inode);
2208 struct nfs4_closedata *calldata; 2326 struct nfs4_closedata *calldata;
@@ -2238,7 +2356,7 @@ int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc)
2238 calldata->res.fattr = &calldata->fattr; 2356 calldata->res.fattr = &calldata->fattr;
2239 calldata->res.seqid = calldata->arg.seqid; 2357 calldata->res.seqid = calldata->arg.seqid;
2240 calldata->res.server = server; 2358 calldata->res.server = server;
2241 calldata->roc = roc; 2359 calldata->roc = pnfs_roc(state->inode);
2242 nfs_sb_active(calldata->inode->i_sb); 2360 nfs_sb_active(calldata->inode->i_sb);
2243 2361
2244 msg.rpc_argp = &calldata->arg; 2362 msg.rpc_argp = &calldata->arg;
@@ -2255,8 +2373,6 @@ int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc)
2255out_free_calldata: 2373out_free_calldata:
2256 kfree(calldata); 2374 kfree(calldata);
2257out: 2375out:
2258 if (roc)
2259 pnfs_roc_release(state->inode);
2260 nfs4_put_open_state(state); 2376 nfs4_put_open_state(state);
2261 nfs4_put_state_owner(sp); 2377 nfs4_put_state_owner(sp);
2262 return status; 2378 return status;
@@ -2399,7 +2515,7 @@ static int nfs4_lookup_root_sec(struct nfs_server *server, struct nfs_fh *fhandl
2399 int ret; 2515 int ret;
2400 2516
2401 auth = rpcauth_create(flavor, server->client); 2517 auth = rpcauth_create(flavor, server->client);
2402 if (!auth) { 2518 if (IS_ERR(auth)) {
2403 ret = -EIO; 2519 ret = -EIO;
2404 goto out; 2520 goto out;
2405 } 2521 }
@@ -2767,13 +2883,7 @@ static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry
2767 2883
2768 status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); 2884 status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0);
2769 if (!status) { 2885 if (!status) {
2770 entry->mask = 0; 2886 nfs_access_set_mask(entry, res.access);
2771 if (res.access & NFS4_ACCESS_READ)
2772 entry->mask |= MAY_READ;
2773 if (res.access & (NFS4_ACCESS_MODIFY | NFS4_ACCESS_EXTEND | NFS4_ACCESS_DELETE))
2774 entry->mask |= MAY_WRITE;
2775 if (res.access & (NFS4_ACCESS_LOOKUP|NFS4_ACCESS_EXECUTE))
2776 entry->mask |= MAY_EXEC;
2777 nfs_refresh_inode(inode, res.fattr); 2887 nfs_refresh_inode(inode, res.fattr);
2778 } 2888 }
2779 nfs_free_fattr(res.fattr); 2889 nfs_free_fattr(res.fattr);
@@ -3362,8 +3472,11 @@ static int nfs4_proc_fsinfo(struct nfs_server *server, struct nfs_fh *fhandle, s
3362 3472
3363 nfs_fattr_init(fsinfo->fattr); 3473 nfs_fattr_init(fsinfo->fattr);
3364 error = nfs4_do_fsinfo(server, fhandle, fsinfo); 3474 error = nfs4_do_fsinfo(server, fhandle, fsinfo);
3365 if (error == 0) 3475 if (error == 0) {
3476 /* block layout checks this! */
3477 server->pnfs_blksize = fsinfo->blksize;
3366 set_pnfs_layoutdriver(server, fhandle, fsinfo->layouttype); 3478 set_pnfs_layoutdriver(server, fhandle, fsinfo->layouttype);
3479 }
3367 3480
3368 return error; 3481 return error;
3369} 3482}
@@ -4007,6 +4120,36 @@ static void nfs4_init_boot_verifier(const struct nfs_client *clp,
4007 memcpy(bootverf->data, verf, sizeof(bootverf->data)); 4120 memcpy(bootverf->data, verf, sizeof(bootverf->data));
4008} 4121}
4009 4122
4123static unsigned int
4124nfs4_init_nonuniform_client_string(const struct nfs_client *clp,
4125 char *buf, size_t len)
4126{
4127 unsigned int result;
4128
4129 rcu_read_lock();
4130 result = scnprintf(buf, len, "Linux NFSv4.0 %s/%s %s",
4131 clp->cl_ipaddr,
4132 rpc_peeraddr2str(clp->cl_rpcclient,
4133 RPC_DISPLAY_ADDR),
4134 rpc_peeraddr2str(clp->cl_rpcclient,
4135 RPC_DISPLAY_PROTO));
4136 rcu_read_unlock();
4137 return result;
4138}
4139
4140static unsigned int
4141nfs4_init_uniform_client_string(const struct nfs_client *clp,
4142 char *buf, size_t len)
4143{
4144 char *nodename = clp->cl_rpcclient->cl_nodename;
4145
4146 if (nfs4_client_id_uniquifier[0] != '\0')
4147 nodename = nfs4_client_id_uniquifier;
4148 return scnprintf(buf, len, "Linux NFSv%u.%u %s",
4149 clp->rpc_ops->version, clp->cl_minorversion,
4150 nodename);
4151}
4152
4010/** 4153/**
4011 * nfs4_proc_setclientid - Negotiate client ID 4154 * nfs4_proc_setclientid - Negotiate client ID
4012 * @clp: state data structure 4155 * @clp: state data structure
@@ -4037,15 +4180,18 @@ int nfs4_proc_setclientid(struct nfs_client *clp, u32 program,
4037 4180
4038 /* nfs_client_id4 */ 4181 /* nfs_client_id4 */
4039 nfs4_init_boot_verifier(clp, &sc_verifier); 4182 nfs4_init_boot_verifier(clp, &sc_verifier);
4040 rcu_read_lock(); 4183 if (test_bit(NFS_CS_MIGRATION, &clp->cl_flags))
4041 setclientid.sc_name_len = scnprintf(setclientid.sc_name, 4184 setclientid.sc_name_len =
4042 sizeof(setclientid.sc_name), "%s/%s %s", 4185 nfs4_init_uniform_client_string(clp,
4043 clp->cl_ipaddr, 4186 setclientid.sc_name,
4044 rpc_peeraddr2str(clp->cl_rpcclient, 4187 sizeof(setclientid.sc_name));
4045 RPC_DISPLAY_ADDR), 4188 else
4046 rpc_peeraddr2str(clp->cl_rpcclient, 4189 setclientid.sc_name_len =
4047 RPC_DISPLAY_PROTO)); 4190 nfs4_init_nonuniform_client_string(clp,
4191 setclientid.sc_name,
4192 sizeof(setclientid.sc_name));
4048 /* cb_client4 */ 4193 /* cb_client4 */
4194 rcu_read_lock();
4049 setclientid.sc_netid_len = scnprintf(setclientid.sc_netid, 4195 setclientid.sc_netid_len = scnprintf(setclientid.sc_netid,
4050 sizeof(setclientid.sc_netid), 4196 sizeof(setclientid.sc_netid),
4051 rpc_peeraddr2str(clp->cl_rpcclient, 4197 rpc_peeraddr2str(clp->cl_rpcclient,
@@ -4391,7 +4537,7 @@ static void nfs4_locku_prepare(struct rpc_task *task, void *data)
4391 4537
4392 if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0) 4538 if (nfs_wait_on_sequence(calldata->arg.seqid, task) != 0)
4393 return; 4539 return;
4394 if ((calldata->lsp->ls_flags & NFS_LOCK_INITIALIZED) == 0) { 4540 if (test_bit(NFS_LOCK_INITIALIZED, &calldata->lsp->ls_flags) == 0) {
4395 /* Note: exit _without_ running nfs4_locku_done */ 4541 /* Note: exit _without_ running nfs4_locku_done */
4396 task->tk_action = NULL; 4542 task->tk_action = NULL;
4397 return; 4543 return;
@@ -4585,7 +4731,7 @@ static void nfs4_lock_done(struct rpc_task *task, void *calldata)
4585 } 4731 }
4586 if (data->rpc_status == 0) { 4732 if (data->rpc_status == 0) {
4587 nfs4_stateid_copy(&data->lsp->ls_stateid, &data->res.stateid); 4733 nfs4_stateid_copy(&data->lsp->ls_stateid, &data->res.stateid);
4588 data->lsp->ls_flags |= NFS_LOCK_INITIALIZED; 4734 set_bit(NFS_LOCK_INITIALIZED, &data->lsp->ls_flags);
4589 renew_lease(NFS_SERVER(data->ctx->dentry->d_inode), data->timestamp); 4735 renew_lease(NFS_SERVER(data->ctx->dentry->d_inode), data->timestamp);
4590 } 4736 }
4591out: 4737out:
@@ -4632,7 +4778,7 @@ static void nfs4_handle_setlk_error(struct nfs_server *server, struct nfs4_lock_
4632 case -NFS4ERR_BAD_STATEID: 4778 case -NFS4ERR_BAD_STATEID:
4633 lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED; 4779 lsp->ls_seqid.flags &= ~NFS_SEQID_CONFIRMED;
4634 if (new_lock_owner != 0 || 4780 if (new_lock_owner != 0 ||
4635 (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0) 4781 test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags) != 0)
4636 nfs4_schedule_stateid_recovery(server, lsp->ls_state); 4782 nfs4_schedule_stateid_recovery(server, lsp->ls_state);
4637 break; 4783 break;
4638 case -NFS4ERR_STALE_STATEID: 4784 case -NFS4ERR_STALE_STATEID:
@@ -4756,7 +4902,7 @@ static int nfs41_check_expired_locks(struct nfs4_state *state)
4756 struct nfs_server *server = NFS_SERVER(state->inode); 4902 struct nfs_server *server = NFS_SERVER(state->inode);
4757 4903
4758 list_for_each_entry(lsp, &state->lock_states, ls_locks) { 4904 list_for_each_entry(lsp, &state->lock_states, ls_locks) {
4759 if (lsp->ls_flags & NFS_LOCK_INITIALIZED) { 4905 if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags)) {
4760 status = nfs41_test_stateid(server, &lsp->ls_stateid); 4906 status = nfs41_test_stateid(server, &lsp->ls_stateid);
4761 if (status != NFS_OK) { 4907 if (status != NFS_OK) {
4762 /* Free the stateid unless the server 4908 /* Free the stateid unless the server
@@ -4764,7 +4910,7 @@ static int nfs41_check_expired_locks(struct nfs4_state *state)
4764 if (status != -NFS4ERR_BAD_STATEID) 4910 if (status != -NFS4ERR_BAD_STATEID)
4765 nfs41_free_stateid(server, 4911 nfs41_free_stateid(server,
4766 &lsp->ls_stateid); 4912 &lsp->ls_stateid);
4767 lsp->ls_flags &= ~NFS_LOCK_INITIALIZED; 4913 clear_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags);
4768 ret = status; 4914 ret = status;
4769 } 4915 }
4770 } 4916 }
@@ -5267,10 +5413,8 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred)
5267 }; 5413 };
5268 5414
5269 nfs4_init_boot_verifier(clp, &verifier); 5415 nfs4_init_boot_verifier(clp, &verifier);
5270 args.id_len = scnprintf(args.id, sizeof(args.id), 5416 args.id_len = nfs4_init_uniform_client_string(clp, args.id,
5271 "%s/%s", 5417 sizeof(args.id));
5272 clp->cl_ipaddr,
5273 clp->cl_rpcclient->cl_nodename);
5274 dprintk("NFS call exchange_id auth=%s, '%.*s'\n", 5418 dprintk("NFS call exchange_id auth=%s, '%.*s'\n",
5275 clp->cl_rpcclient->cl_auth->au_ops->au_name, 5419 clp->cl_rpcclient->cl_auth->au_ops->au_name,
5276 args.id_len, args.id); 5420 args.id_len, args.id);
@@ -5391,6 +5535,8 @@ int nfs4_destroy_clientid(struct nfs_client *clp)
5391 goto out; 5535 goto out;
5392 if (clp->cl_exchange_flags == 0) 5536 if (clp->cl_exchange_flags == 0)
5393 goto out; 5537 goto out;
5538 if (clp->cl_preserve_clid)
5539 goto out;
5394 cred = nfs4_get_exchange_id_cred(clp); 5540 cred = nfs4_get_exchange_id_cred(clp);
5395 ret = nfs4_proc_destroy_clientid(clp, cred); 5541 ret = nfs4_proc_destroy_clientid(clp, cred);
5396 if (cred) 5542 if (cred)
@@ -6196,26 +6342,44 @@ nfs4_layoutget_prepare(struct rpc_task *task, void *calldata)
6196static void nfs4_layoutget_done(struct rpc_task *task, void *calldata) 6342static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
6197{ 6343{
6198 struct nfs4_layoutget *lgp = calldata; 6344 struct nfs4_layoutget *lgp = calldata;
6199 struct nfs_server *server = NFS_SERVER(lgp->args.inode); 6345 struct inode *inode = lgp->args.inode;
6346 struct nfs_server *server = NFS_SERVER(inode);
6347 struct pnfs_layout_hdr *lo;
6348 struct nfs4_state *state = NULL;
6200 6349
6201 dprintk("--> %s\n", __func__); 6350 dprintk("--> %s\n", __func__);
6202 6351
6203 if (!nfs4_sequence_done(task, &lgp->res.seq_res)) 6352 if (!nfs4_sequence_done(task, &lgp->res.seq_res))
6204 return; 6353 goto out;
6205 6354
6206 switch (task->tk_status) { 6355 switch (task->tk_status) {
6207 case 0: 6356 case 0:
6208 break; 6357 goto out;
6209 case -NFS4ERR_LAYOUTTRYLATER: 6358 case -NFS4ERR_LAYOUTTRYLATER:
6210 case -NFS4ERR_RECALLCONFLICT: 6359 case -NFS4ERR_RECALLCONFLICT:
6211 task->tk_status = -NFS4ERR_DELAY; 6360 task->tk_status = -NFS4ERR_DELAY;
6212 /* Fall through */ 6361 break;
6213 default: 6362 case -NFS4ERR_EXPIRED:
6214 if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) { 6363 case -NFS4ERR_BAD_STATEID:
6215 rpc_restart_call_prepare(task); 6364 spin_lock(&inode->i_lock);
6216 return; 6365 lo = NFS_I(inode)->layout;
6366 if (!lo || list_empty(&lo->plh_segs)) {
6367 spin_unlock(&inode->i_lock);
6368 /* If the open stateid was bad, then recover it. */
6369 state = lgp->args.ctx->state;
6370 } else {
6371 LIST_HEAD(head);
6372
6373 pnfs_mark_matching_lsegs_invalid(lo, &head, NULL);
6374 spin_unlock(&inode->i_lock);
6375 /* Mark the bad layout state as invalid, then
6376 * retry using the open stateid. */
6377 pnfs_free_lseg_list(&head);
6217 } 6378 }
6218 } 6379 }
6380 if (nfs4_async_handle_error(task, server, state) == -EAGAIN)
6381 rpc_restart_call_prepare(task);
6382out:
6219 dprintk("<-- %s\n", __func__); 6383 dprintk("<-- %s\n", __func__);
6220} 6384}
6221 6385
@@ -6282,7 +6446,8 @@ static const struct rpc_call_ops nfs4_layoutget_call_ops = {
6282 .rpc_release = nfs4_layoutget_release, 6446 .rpc_release = nfs4_layoutget_release,
6283}; 6447};
6284 6448
6285void nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags) 6449struct pnfs_layout_segment *
6450nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags)
6286{ 6451{
6287 struct nfs_server *server = NFS_SERVER(lgp->args.inode); 6452 struct nfs_server *server = NFS_SERVER(lgp->args.inode);
6288 size_t max_pages = max_response_pages(server); 6453 size_t max_pages = max_response_pages(server);
@@ -6299,6 +6464,7 @@ void nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags)
6299 .callback_data = lgp, 6464 .callback_data = lgp,
6300 .flags = RPC_TASK_ASYNC, 6465 .flags = RPC_TASK_ASYNC,
6301 }; 6466 };
6467 struct pnfs_layout_segment *lseg = NULL;
6302 int status = 0; 6468 int status = 0;
6303 6469
6304 dprintk("--> %s\n", __func__); 6470 dprintk("--> %s\n", __func__);
@@ -6306,7 +6472,7 @@ void nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags)
6306 lgp->args.layout.pages = nfs4_alloc_pages(max_pages, gfp_flags); 6472 lgp->args.layout.pages = nfs4_alloc_pages(max_pages, gfp_flags);
6307 if (!lgp->args.layout.pages) { 6473 if (!lgp->args.layout.pages) {
6308 nfs4_layoutget_release(lgp); 6474 nfs4_layoutget_release(lgp);
6309 return; 6475 return ERR_PTR(-ENOMEM);
6310 } 6476 }
6311 lgp->args.layout.pglen = max_pages * PAGE_SIZE; 6477 lgp->args.layout.pglen = max_pages * PAGE_SIZE;
6312 6478
@@ -6315,15 +6481,17 @@ void nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags)
6315 nfs41_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0); 6481 nfs41_init_sequence(&lgp->args.seq_args, &lgp->res.seq_res, 0);
6316 task = rpc_run_task(&task_setup_data); 6482 task = rpc_run_task(&task_setup_data);
6317 if (IS_ERR(task)) 6483 if (IS_ERR(task))
6318 return; 6484 return ERR_CAST(task);
6319 status = nfs4_wait_for_completion_rpc_task(task); 6485 status = nfs4_wait_for_completion_rpc_task(task);
6320 if (status == 0) 6486 if (status == 0)
6321 status = task->tk_status; 6487 status = task->tk_status;
6322 if (status == 0) 6488 if (status == 0)
6323 status = pnfs_layout_process(lgp); 6489 lseg = pnfs_layout_process(lgp);
6324 rpc_put_task(task); 6490 rpc_put_task(task);
6325 dprintk("<-- %s status=%d\n", __func__, status); 6491 dprintk("<-- %s status=%d\n", __func__, status);
6326 return; 6492 if (status)
6493 return ERR_PTR(status);
6494 return lseg;
6327} 6495}
6328 6496
6329static void 6497static void
@@ -6342,7 +6510,6 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
6342{ 6510{
6343 struct nfs4_layoutreturn *lrp = calldata; 6511 struct nfs4_layoutreturn *lrp = calldata;
6344 struct nfs_server *server; 6512 struct nfs_server *server;
6345 struct pnfs_layout_hdr *lo = lrp->args.layout;
6346 6513
6347 dprintk("--> %s\n", __func__); 6514 dprintk("--> %s\n", __func__);
6348 6515
@@ -6354,20 +6521,21 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
6354 rpc_restart_call_prepare(task); 6521 rpc_restart_call_prepare(task);
6355 return; 6522 return;
6356 } 6523 }
6357 spin_lock(&lo->plh_inode->i_lock);
6358 if (task->tk_status == 0 && lrp->res.lrs_present)
6359 pnfs_set_layout_stateid(lo, &lrp->res.stateid, true);
6360 lo->plh_block_lgets--;
6361 spin_unlock(&lo->plh_inode->i_lock);
6362 dprintk("<-- %s\n", __func__); 6524 dprintk("<-- %s\n", __func__);
6363} 6525}
6364 6526
6365static void nfs4_layoutreturn_release(void *calldata) 6527static void nfs4_layoutreturn_release(void *calldata)
6366{ 6528{
6367 struct nfs4_layoutreturn *lrp = calldata; 6529 struct nfs4_layoutreturn *lrp = calldata;
6530 struct pnfs_layout_hdr *lo = lrp->args.layout;
6368 6531
6369 dprintk("--> %s\n", __func__); 6532 dprintk("--> %s\n", __func__);
6370 put_layout_hdr(lrp->args.layout); 6533 spin_lock(&lo->plh_inode->i_lock);
6534 if (lrp->res.lrs_present)
6535 pnfs_set_layout_stateid(lo, &lrp->res.stateid, true);
6536 lo->plh_block_lgets--;
6537 spin_unlock(&lo->plh_inode->i_lock);
6538 pnfs_put_layout_hdr(lrp->args.layout);
6371 kfree(calldata); 6539 kfree(calldata);
6372 dprintk("<-- %s\n", __func__); 6540 dprintk("<-- %s\n", __func__);
6373} 6541}
@@ -6541,7 +6709,7 @@ static void nfs4_layoutcommit_release(void *calldata)
6541 list_del_init(&lseg->pls_lc_list); 6709 list_del_init(&lseg->pls_lc_list);
6542 if (test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT, 6710 if (test_and_clear_bit(NFS_LSEG_LAYOUTCOMMIT,
6543 &lseg->pls_flags)) 6711 &lseg->pls_flags))
6544 put_lseg(lseg); 6712 pnfs_put_lseg(lseg);
6545 } 6713 }
6546 6714
6547 clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock); 6715 clear_bit_unlock(NFS_INO_LAYOUTCOMMITTING, bitlock);
@@ -6800,6 +6968,7 @@ static const struct nfs4_state_recovery_ops nfs40_reboot_recovery_ops = {
6800 .recover_lock = nfs4_lock_reclaim, 6968 .recover_lock = nfs4_lock_reclaim,
6801 .establish_clid = nfs4_init_clientid, 6969 .establish_clid = nfs4_init_clientid,
6802 .get_clid_cred = nfs4_get_setclientid_cred, 6970 .get_clid_cred = nfs4_get_setclientid_cred,
6971 .detect_trunking = nfs40_discover_server_trunking,
6803}; 6972};
6804 6973
6805#if defined(CONFIG_NFS_V4_1) 6974#if defined(CONFIG_NFS_V4_1)
@@ -6811,6 +6980,7 @@ static const struct nfs4_state_recovery_ops nfs41_reboot_recovery_ops = {
6811 .establish_clid = nfs41_init_clientid, 6980 .establish_clid = nfs41_init_clientid,
6812 .get_clid_cred = nfs4_get_exchange_id_cred, 6981 .get_clid_cred = nfs4_get_exchange_id_cred,
6813 .reclaim_complete = nfs41_proc_reclaim_complete, 6982 .reclaim_complete = nfs41_proc_reclaim_complete,
6983 .detect_trunking = nfs41_discover_server_trunking,
6814}; 6984};
6815#endif /* CONFIG_NFS_V4_1 */ 6985#endif /* CONFIG_NFS_V4_1 */
6816 6986
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 55148def5540..c351e6b39838 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -51,18 +51,21 @@
51#include <linux/bitops.h> 51#include <linux/bitops.h>
52#include <linux/jiffies.h> 52#include <linux/jiffies.h>
53 53
54#include <linux/sunrpc/clnt.h>
55
54#include "nfs4_fs.h" 56#include "nfs4_fs.h"
55#include "callback.h" 57#include "callback.h"
56#include "delegation.h" 58#include "delegation.h"
57#include "internal.h" 59#include "internal.h"
58#include "pnfs.h" 60#include "pnfs.h"
61#include "netns.h"
59 62
60#define NFSDBG_FACILITY NFSDBG_STATE 63#define NFSDBG_FACILITY NFSDBG_STATE
61 64
62#define OPENOWNER_POOL_SIZE 8 65#define OPENOWNER_POOL_SIZE 8
63 66
64const nfs4_stateid zero_stateid; 67const nfs4_stateid zero_stateid;
65 68static DEFINE_MUTEX(nfs_clid_init_mutex);
66static LIST_HEAD(nfs4_clientid_list); 69static LIST_HEAD(nfs4_clientid_list);
67 70
68int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred *cred) 71int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
@@ -73,12 +76,13 @@ int nfs4_init_clientid(struct nfs_client *clp, struct rpc_cred *cred)
73 }; 76 };
74 unsigned short port; 77 unsigned short port;
75 int status; 78 int status;
79 struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id);
76 80
77 if (test_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state)) 81 if (test_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state))
78 goto do_confirm; 82 goto do_confirm;
79 port = nfs_callback_tcpport; 83 port = nn->nfs_callback_tcpport;
80 if (clp->cl_addr.ss_family == AF_INET6) 84 if (clp->cl_addr.ss_family == AF_INET6)
81 port = nfs_callback_tcpport6; 85 port = nn->nfs_callback_tcpport6;
82 86
83 status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, port, cred, &clid); 87 status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, port, cred, &clid);
84 if (status != 0) 88 if (status != 0)
@@ -96,6 +100,56 @@ out:
96 return status; 100 return status;
97} 101}
98 102
103/**
104 * nfs40_discover_server_trunking - Detect server IP address trunking (mv0)
105 *
106 * @clp: nfs_client under test
107 * @result: OUT: found nfs_client, or clp
108 * @cred: credential to use for trunking test
109 *
110 * Returns zero, a negative errno, or a negative NFS4ERR status.
111 * If zero is returned, an nfs_client pointer is planted in
112 * "result".
113 *
114 * Note: The returned client may not yet be marked ready.
115 */
116int nfs40_discover_server_trunking(struct nfs_client *clp,
117 struct nfs_client **result,
118 struct rpc_cred *cred)
119{
120 struct nfs4_setclientid_res clid = {
121 .clientid = clp->cl_clientid,
122 .confirm = clp->cl_confirm,
123 };
124 struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id);
125 unsigned short port;
126 int status;
127
128 port = nn->nfs_callback_tcpport;
129 if (clp->cl_addr.ss_family == AF_INET6)
130 port = nn->nfs_callback_tcpport6;
131
132 status = nfs4_proc_setclientid(clp, NFS4_CALLBACK, port, cred, &clid);
133 if (status != 0)
134 goto out;
135 clp->cl_clientid = clid.clientid;
136 clp->cl_confirm = clid.confirm;
137
138 status = nfs40_walk_client_list(clp, result, cred);
139 switch (status) {
140 case -NFS4ERR_STALE_CLIENTID:
141 set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
142 case 0:
143 /* Sustain the lease, even if it's empty. If the clientid4
144 * goes stale it's of no use for trunking discovery. */
145 nfs4_schedule_state_renewal(*result);
146 break;
147 }
148
149out:
150 return status;
151}
152
99struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp) 153struct rpc_cred *nfs4_get_machine_cred_locked(struct nfs_client *clp)
100{ 154{
101 struct rpc_cred *cred = NULL; 155 struct rpc_cred *cred = NULL;
@@ -275,6 +329,33 @@ out:
275 return status; 329 return status;
276} 330}
277 331
332/**
333 * nfs41_discover_server_trunking - Detect server IP address trunking (mv1)
334 *
335 * @clp: nfs_client under test
336 * @result: OUT: found nfs_client, or clp
337 * @cred: credential to use for trunking test
338 *
339 * Returns NFS4_OK, a negative errno, or a negative NFS4ERR status.
340 * If NFS4_OK is returned, an nfs_client pointer is planted in
341 * "result".
342 *
343 * Note: The returned client may not yet be marked ready.
344 */
345int nfs41_discover_server_trunking(struct nfs_client *clp,
346 struct nfs_client **result,
347 struct rpc_cred *cred)
348{
349 int status;
350
351 status = nfs4_proc_exchange_id(clp, cred);
352 if (status != NFS4_OK)
353 return status;
354 set_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
355
356 return nfs41_walk_client_list(clp, result, cred);
357}
358
278struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp) 359struct rpc_cred *nfs4_get_exchange_id_cred(struct nfs_client *clp)
279{ 360{
280 struct rpc_cred *cred; 361 struct rpc_cred *cred;
@@ -729,11 +810,8 @@ static void __nfs4_close(struct nfs4_state *state,
729 if (!call_close) { 810 if (!call_close) {
730 nfs4_put_open_state(state); 811 nfs4_put_open_state(state);
731 nfs4_put_state_owner(owner); 812 nfs4_put_state_owner(owner);
732 } else { 813 } else
733 bool roc = pnfs_roc(state->inode); 814 nfs4_do_close(state, gfp_mask, wait);
734
735 nfs4_do_close(state, gfp_mask, wait, roc);
736 }
737} 815}
738 816
739void nfs4_close_state(struct nfs4_state *state, fmode_t fmode) 817void nfs4_close_state(struct nfs4_state *state, fmode_t fmode)
@@ -865,7 +943,7 @@ void nfs4_put_lock_state(struct nfs4_lock_state *lsp)
865 if (list_empty(&state->lock_states)) 943 if (list_empty(&state->lock_states))
866 clear_bit(LK_STATE_IN_USE, &state->flags); 944 clear_bit(LK_STATE_IN_USE, &state->flags);
867 spin_unlock(&state->state_lock); 945 spin_unlock(&state->state_lock);
868 if (lsp->ls_flags & NFS_LOCK_INITIALIZED) { 946 if (test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags)) {
869 if (nfs4_release_lockowner(lsp) == 0) 947 if (nfs4_release_lockowner(lsp) == 0)
870 return; 948 return;
871 } 949 }
@@ -911,17 +989,25 @@ int nfs4_set_lock_state(struct nfs4_state *state, struct file_lock *fl)
911} 989}
912 990
913static bool nfs4_copy_lock_stateid(nfs4_stateid *dst, struct nfs4_state *state, 991static bool nfs4_copy_lock_stateid(nfs4_stateid *dst, struct nfs4_state *state,
914 fl_owner_t fl_owner, pid_t fl_pid) 992 const struct nfs_lockowner *lockowner)
915{ 993{
916 struct nfs4_lock_state *lsp; 994 struct nfs4_lock_state *lsp;
995 fl_owner_t fl_owner;
996 pid_t fl_pid;
917 bool ret = false; 997 bool ret = false;
918 998
999
1000 if (lockowner == NULL)
1001 goto out;
1002
919 if (test_bit(LK_STATE_IN_USE, &state->flags) == 0) 1003 if (test_bit(LK_STATE_IN_USE, &state->flags) == 0)
920 goto out; 1004 goto out;
921 1005
1006 fl_owner = lockowner->l_owner;
1007 fl_pid = lockowner->l_pid;
922 spin_lock(&state->state_lock); 1008 spin_lock(&state->state_lock);
923 lsp = __nfs4_find_lock_state(state, fl_owner, fl_pid, NFS4_ANY_LOCK_TYPE); 1009 lsp = __nfs4_find_lock_state(state, fl_owner, fl_pid, NFS4_ANY_LOCK_TYPE);
924 if (lsp != NULL && (lsp->ls_flags & NFS_LOCK_INITIALIZED) != 0) { 1010 if (lsp != NULL && test_bit(NFS_LOCK_INITIALIZED, &lsp->ls_flags) != 0) {
925 nfs4_stateid_copy(dst, &lsp->ls_stateid); 1011 nfs4_stateid_copy(dst, &lsp->ls_stateid);
926 ret = true; 1012 ret = true;
927 } 1013 }
@@ -946,11 +1032,11 @@ static void nfs4_copy_open_stateid(nfs4_stateid *dst, struct nfs4_state *state)
946 * requests. 1032 * requests.
947 */ 1033 */
948void nfs4_select_rw_stateid(nfs4_stateid *dst, struct nfs4_state *state, 1034void nfs4_select_rw_stateid(nfs4_stateid *dst, struct nfs4_state *state,
949 fmode_t fmode, fl_owner_t fl_owner, pid_t fl_pid) 1035 fmode_t fmode, const struct nfs_lockowner *lockowner)
950{ 1036{
951 if (nfs4_copy_delegation_stateid(dst, state->inode, fmode)) 1037 if (nfs4_copy_delegation_stateid(dst, state->inode, fmode))
952 return; 1038 return;
953 if (nfs4_copy_lock_stateid(dst, state, fl_owner, fl_pid)) 1039 if (nfs4_copy_lock_stateid(dst, state, lockowner))
954 return; 1040 return;
955 nfs4_copy_open_stateid(dst, state); 1041 nfs4_copy_open_stateid(dst, state);
956} 1042}
@@ -1289,7 +1375,7 @@ restart:
1289 if (status >= 0) { 1375 if (status >= 0) {
1290 spin_lock(&state->state_lock); 1376 spin_lock(&state->state_lock);
1291 list_for_each_entry(lock, &state->lock_states, ls_locks) { 1377 list_for_each_entry(lock, &state->lock_states, ls_locks) {
1292 if (!(lock->ls_flags & NFS_LOCK_INITIALIZED)) 1378 if (!test_bit(NFS_LOCK_INITIALIZED, &lock->ls_flags))
1293 pr_warn_ratelimited("NFS: " 1379 pr_warn_ratelimited("NFS: "
1294 "%s: Lock reclaim " 1380 "%s: Lock reclaim "
1295 "failed!\n", __func__); 1381 "failed!\n", __func__);
@@ -1361,7 +1447,7 @@ static void nfs4_clear_open_state(struct nfs4_state *state)
1361 spin_lock(&state->state_lock); 1447 spin_lock(&state->state_lock);
1362 list_for_each_entry(lock, &state->lock_states, ls_locks) { 1448 list_for_each_entry(lock, &state->lock_states, ls_locks) {
1363 lock->ls_seqid.flags = 0; 1449 lock->ls_seqid.flags = 0;
1364 lock->ls_flags &= ~NFS_LOCK_INITIALIZED; 1450 clear_bit(NFS_LOCK_INITIALIZED, &lock->ls_flags);
1365 } 1451 }
1366 spin_unlock(&state->state_lock); 1452 spin_unlock(&state->state_lock);
1367} 1453}
@@ -1595,8 +1681,8 @@ out:
1595 return nfs4_recovery_handle_error(clp, status); 1681 return nfs4_recovery_handle_error(clp, status);
1596} 1682}
1597 1683
1598/* Set NFS4CLNT_LEASE_EXPIRED for all v4.0 errors and for recoverable errors 1684/* Set NFS4CLNT_LEASE_EXPIRED and reclaim reboot state for all v4.0 errors
1599 * on EXCHANGE_ID for v4.1 1685 * and for recoverable errors on EXCHANGE_ID for v4.1
1600 */ 1686 */
1601static int nfs4_handle_reclaim_lease_error(struct nfs_client *clp, int status) 1687static int nfs4_handle_reclaim_lease_error(struct nfs_client *clp, int status)
1602{ 1688{
@@ -1606,8 +1692,12 @@ static int nfs4_handle_reclaim_lease_error(struct nfs_client *clp, int status)
1606 return -ESERVERFAULT; 1692 return -ESERVERFAULT;
1607 /* Lease confirmation error: retry after purging the lease */ 1693 /* Lease confirmation error: retry after purging the lease */
1608 ssleep(1); 1694 ssleep(1);
1695 clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
1696 break;
1609 case -NFS4ERR_STALE_CLIENTID: 1697 case -NFS4ERR_STALE_CLIENTID:
1610 clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state); 1698 clear_bit(NFS4CLNT_LEASE_CONFIRM, &clp->cl_state);
1699 nfs4_state_clear_reclaim_reboot(clp);
1700 nfs4_state_start_reclaim_reboot(clp);
1611 break; 1701 break;
1612 case -NFS4ERR_CLID_INUSE: 1702 case -NFS4ERR_CLID_INUSE:
1613 pr_err("NFS: Server %s reports our clientid is in use\n", 1703 pr_err("NFS: Server %s reports our clientid is in use\n",
@@ -1698,6 +1788,109 @@ static int nfs4_purge_lease(struct nfs_client *clp)
1698 return 0; 1788 return 0;
1699} 1789}
1700 1790
1791/**
1792 * nfs4_discover_server_trunking - Detect server IP address trunking
1793 *
1794 * @clp: nfs_client under test
1795 * @result: OUT: found nfs_client, or clp
1796 *
1797 * Returns zero or a negative errno. If zero is returned,
1798 * an nfs_client pointer is planted in "result".
1799 *
1800 * Note: since we are invoked in process context, and
1801 * not from inside the state manager, we cannot use
1802 * nfs4_handle_reclaim_lease_error().
1803 */
1804int nfs4_discover_server_trunking(struct nfs_client *clp,
1805 struct nfs_client **result)
1806{
1807 const struct nfs4_state_recovery_ops *ops =
1808 clp->cl_mvops->reboot_recovery_ops;
1809 rpc_authflavor_t *flavors, flav, save;
1810 struct rpc_clnt *clnt;
1811 struct rpc_cred *cred;
1812 int i, len, status;
1813
1814 dprintk("NFS: %s: testing '%s'\n", __func__, clp->cl_hostname);
1815
1816 len = NFS_MAX_SECFLAVORS;
1817 flavors = kcalloc(len, sizeof(*flavors), GFP_KERNEL);
1818 if (flavors == NULL) {
1819 status = -ENOMEM;
1820 goto out;
1821 }
1822 len = rpcauth_list_flavors(flavors, len);
1823 if (len < 0) {
1824 status = len;
1825 goto out_free;
1826 }
1827 clnt = clp->cl_rpcclient;
1828 save = clnt->cl_auth->au_flavor;
1829 i = 0;
1830
1831 mutex_lock(&nfs_clid_init_mutex);
1832 status = -ENOENT;
1833again:
1834 cred = ops->get_clid_cred(clp);
1835 if (cred == NULL)
1836 goto out_unlock;
1837
1838 status = ops->detect_trunking(clp, result, cred);
1839 put_rpccred(cred);
1840 switch (status) {
1841 case 0:
1842 break;
1843
1844 case -EACCES:
1845 if (clp->cl_machine_cred == NULL)
1846 break;
1847 /* Handle case where the user hasn't set up machine creds */
1848 nfs4_clear_machine_cred(clp);
1849 case -NFS4ERR_DELAY:
1850 case -ETIMEDOUT:
1851 case -EAGAIN:
1852 ssleep(1);
1853 dprintk("NFS: %s after status %d, retrying\n",
1854 __func__, status);
1855 goto again;
1856
1857 case -NFS4ERR_CLID_INUSE:
1858 case -NFS4ERR_WRONGSEC:
1859 status = -EPERM;
1860 if (i >= len)
1861 break;
1862
1863 flav = flavors[i++];
1864 if (flav == save)
1865 flav = flavors[i++];
1866 clnt = rpc_clone_client_set_auth(clnt, flav);
1867 if (IS_ERR(clnt)) {
1868 status = PTR_ERR(clnt);
1869 break;
1870 }
1871 clp->cl_rpcclient = clnt;
1872 goto again;
1873
1874 case -NFS4ERR_MINOR_VERS_MISMATCH:
1875 status = -EPROTONOSUPPORT;
1876 break;
1877
1878 case -EKEYEXPIRED:
1879 nfs4_warn_keyexpired(clp->cl_hostname);
1880 case -NFS4ERR_NOT_SAME: /* FixMe: implement recovery
1881 * in nfs4_exchange_id */
1882 status = -EKEYEXPIRED;
1883 }
1884
1885out_unlock:
1886 mutex_unlock(&nfs_clid_init_mutex);
1887out_free:
1888 kfree(flavors);
1889out:
1890 dprintk("NFS: %s: status = %d\n", __func__, status);
1891 return status;
1892}
1893
1701#ifdef CONFIG_NFS_V4_1 1894#ifdef CONFIG_NFS_V4_1
1702void nfs4_schedule_session_recovery(struct nfs4_session *session, int err) 1895void nfs4_schedule_session_recovery(struct nfs4_session *session, int err)
1703{ 1896{
@@ -2008,6 +2201,7 @@ out_error:
2008 pr_warn_ratelimited("NFS: state manager%s%s failed on NFSv4 server %s" 2201 pr_warn_ratelimited("NFS: state manager%s%s failed on NFSv4 server %s"
2009 " with error %d\n", section_sep, section, 2202 " with error %d\n", section_sep, section,
2010 clp->cl_hostname, -status); 2203 clp->cl_hostname, -status);
2204 ssleep(1);
2011 nfs4_end_drain_session(clp); 2205 nfs4_end_drain_session(clp);
2012 nfs4_clear_state_manager_bit(clp); 2206 nfs4_clear_state_manager_bit(clp);
2013} 2207}
diff --git a/fs/nfs/nfs4sysctl.c b/fs/nfs/nfs4sysctl.c
index 5729bc8aa75d..2628d921b7e3 100644
--- a/fs/nfs/nfs4sysctl.c
+++ b/fs/nfs/nfs4sysctl.c
@@ -9,6 +9,7 @@
9#include <linux/nfs_idmap.h> 9#include <linux/nfs_idmap.h>
10#include <linux/nfs_fs.h> 10#include <linux/nfs_fs.h>
11 11
12#include "nfs4_fs.h"
12#include "callback.h" 13#include "callback.h"
13 14
14static const int nfs_set_port_min = 0; 15static const int nfs_set_port_min = 0;
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 8dba6bd48557..40836ee5dc3a 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -447,12 +447,14 @@ static int nfs4_stat_to_errno(int);
447 encode_sequence_maxsz + \ 447 encode_sequence_maxsz + \
448 encode_putfh_maxsz + \ 448 encode_putfh_maxsz + \
449 encode_open_maxsz + \ 449 encode_open_maxsz + \
450 encode_access_maxsz + \
450 encode_getfh_maxsz + \ 451 encode_getfh_maxsz + \
451 encode_getattr_maxsz) 452 encode_getattr_maxsz)
452#define NFS4_dec_open_sz (compound_decode_hdr_maxsz + \ 453#define NFS4_dec_open_sz (compound_decode_hdr_maxsz + \
453 decode_sequence_maxsz + \ 454 decode_sequence_maxsz + \
454 decode_putfh_maxsz + \ 455 decode_putfh_maxsz + \
455 decode_open_maxsz + \ 456 decode_open_maxsz + \
457 decode_access_maxsz + \
456 decode_getfh_maxsz + \ 458 decode_getfh_maxsz + \
457 decode_getattr_maxsz) 459 decode_getattr_maxsz)
458#define NFS4_enc_open_confirm_sz \ 460#define NFS4_enc_open_confirm_sz \
@@ -467,11 +469,13 @@ static int nfs4_stat_to_errno(int);
467 encode_sequence_maxsz + \ 469 encode_sequence_maxsz + \
468 encode_putfh_maxsz + \ 470 encode_putfh_maxsz + \
469 encode_open_maxsz + \ 471 encode_open_maxsz + \
472 encode_access_maxsz + \
470 encode_getattr_maxsz) 473 encode_getattr_maxsz)
471#define NFS4_dec_open_noattr_sz (compound_decode_hdr_maxsz + \ 474#define NFS4_dec_open_noattr_sz (compound_decode_hdr_maxsz + \
472 decode_sequence_maxsz + \ 475 decode_sequence_maxsz + \
473 decode_putfh_maxsz + \ 476 decode_putfh_maxsz + \
474 decode_open_maxsz + \ 477 decode_open_maxsz + \
478 decode_access_maxsz + \
475 decode_getattr_maxsz) 479 decode_getattr_maxsz)
476#define NFS4_enc_open_downgrade_sz \ 480#define NFS4_enc_open_downgrade_sz \
477 (compound_encode_hdr_maxsz + \ 481 (compound_encode_hdr_maxsz + \
@@ -1509,8 +1513,12 @@ static void encode_open_stateid(struct xdr_stream *xdr,
1509 nfs4_stateid stateid; 1513 nfs4_stateid stateid;
1510 1514
1511 if (ctx->state != NULL) { 1515 if (ctx->state != NULL) {
1516 const struct nfs_lockowner *lockowner = NULL;
1517
1518 if (l_ctx != NULL)
1519 lockowner = &l_ctx->lockowner;
1512 nfs4_select_rw_stateid(&stateid, ctx->state, 1520 nfs4_select_rw_stateid(&stateid, ctx->state,
1513 fmode, l_ctx->lockowner, l_ctx->pid); 1521 fmode, lockowner);
1514 if (zero_seqid) 1522 if (zero_seqid)
1515 stateid.seqid = 0; 1523 stateid.seqid = 0;
1516 encode_nfs4_stateid(xdr, &stateid); 1524 encode_nfs4_stateid(xdr, &stateid);
@@ -2216,6 +2224,8 @@ static void nfs4_xdr_enc_open(struct rpc_rqst *req, struct xdr_stream *xdr,
2216 encode_putfh(xdr, args->fh, &hdr); 2224 encode_putfh(xdr, args->fh, &hdr);
2217 encode_open(xdr, args, &hdr); 2225 encode_open(xdr, args, &hdr);
2218 encode_getfh(xdr, &hdr); 2226 encode_getfh(xdr, &hdr);
2227 if (args->access)
2228 encode_access(xdr, args->access, &hdr);
2219 encode_getfattr_open(xdr, args->bitmask, args->open_bitmap, &hdr); 2229 encode_getfattr_open(xdr, args->bitmask, args->open_bitmap, &hdr);
2220 encode_nops(&hdr); 2230 encode_nops(&hdr);
2221} 2231}
@@ -2252,7 +2262,9 @@ static void nfs4_xdr_enc_open_noattr(struct rpc_rqst *req,
2252 encode_sequence(xdr, &args->seq_args, &hdr); 2262 encode_sequence(xdr, &args->seq_args, &hdr);
2253 encode_putfh(xdr, args->fh, &hdr); 2263 encode_putfh(xdr, args->fh, &hdr);
2254 encode_open(xdr, args, &hdr); 2264 encode_open(xdr, args, &hdr);
2255 encode_getfattr(xdr, args->bitmask, &hdr); 2265 if (args->access)
2266 encode_access(xdr, args->access, &hdr);
2267 encode_getfattr_open(xdr, args->bitmask, args->open_bitmap, &hdr);
2256 encode_nops(&hdr); 2268 encode_nops(&hdr);
2257} 2269}
2258 2270
@@ -4095,7 +4107,7 @@ out_overflow:
4095 return -EIO; 4107 return -EIO;
4096} 4108}
4097 4109
4098static int decode_access(struct xdr_stream *xdr, struct nfs4_accessres *access) 4110static int decode_access(struct xdr_stream *xdr, u32 *supported, u32 *access)
4099{ 4111{
4100 __be32 *p; 4112 __be32 *p;
4101 uint32_t supp, acc; 4113 uint32_t supp, acc;
@@ -4109,8 +4121,8 @@ static int decode_access(struct xdr_stream *xdr, struct nfs4_accessres *access)
4109 goto out_overflow; 4121 goto out_overflow;
4110 supp = be32_to_cpup(p++); 4122 supp = be32_to_cpup(p++);
4111 acc = be32_to_cpup(p); 4123 acc = be32_to_cpup(p);
4112 access->supported = supp; 4124 *supported = supp;
4113 access->access = acc; 4125 *access = acc;
4114 return 0; 4126 return 0;
4115out_overflow: 4127out_overflow:
4116 print_overflow_msg(__func__, xdr); 4128 print_overflow_msg(__func__, xdr);
@@ -5642,7 +5654,8 @@ static int decode_getdeviceinfo(struct xdr_stream *xdr,
5642 * and places the remaining xdr data in xdr_buf->tail 5654 * and places the remaining xdr data in xdr_buf->tail
5643 */ 5655 */
5644 pdev->mincount = be32_to_cpup(p); 5656 pdev->mincount = be32_to_cpup(p);
5645 xdr_read_pages(xdr, pdev->mincount); /* include space for the length */ 5657 if (xdr_read_pages(xdr, pdev->mincount) != pdev->mincount)
5658 goto out_overflow;
5646 5659
5647 /* Parse notification bitmap, verifying that it is zero. */ 5660 /* Parse notification bitmap, verifying that it is zero. */
5648 p = xdr_inline_decode(xdr, 4); 5661 p = xdr_inline_decode(xdr, 4);
@@ -5887,7 +5900,7 @@ static int nfs4_xdr_dec_access(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
5887 status = decode_putfh(xdr); 5900 status = decode_putfh(xdr);
5888 if (status != 0) 5901 if (status != 0)
5889 goto out; 5902 goto out;
5890 status = decode_access(xdr, res); 5903 status = decode_access(xdr, &res->supported, &res->access);
5891 if (status != 0) 5904 if (status != 0)
5892 goto out; 5905 goto out;
5893 decode_getfattr(xdr, res->fattr, res->server); 5906 decode_getfattr(xdr, res->fattr, res->server);
@@ -6228,6 +6241,8 @@ static int nfs4_xdr_dec_open(struct rpc_rqst *rqstp, struct xdr_stream *xdr,
6228 status = decode_getfh(xdr, &res->fh); 6241 status = decode_getfh(xdr, &res->fh);
6229 if (status) 6242 if (status)
6230 goto out; 6243 goto out;
6244 if (res->access_request)
6245 decode_access(xdr, &res->access_supported, &res->access_result);
6231 decode_getfattr(xdr, res->f_attr, res->server); 6246 decode_getfattr(xdr, res->f_attr, res->server);
6232out: 6247out:
6233 return status; 6248 return status;
@@ -6276,6 +6291,8 @@ static int nfs4_xdr_dec_open_noattr(struct rpc_rqst *rqstp,
6276 status = decode_open(xdr, res); 6291 status = decode_open(xdr, res);
6277 if (status) 6292 if (status)
6278 goto out; 6293 goto out;
6294 if (res->access_request)
6295 decode_access(xdr, &res->access_supported, &res->access_result);
6279 decode_getfattr(xdr, res->f_attr, res->server); 6296 decode_getfattr(xdr, res->f_attr, res->server);
6280out: 6297out:
6281 return status; 6298 return status;
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index ea6d111b03e9..be731e6b7b9c 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -41,6 +41,7 @@
41#include <scsi/osd_ore.h> 41#include <scsi/osd_ore.h>
42 42
43#include "objlayout.h" 43#include "objlayout.h"
44#include "../internal.h"
44 45
45#define NFSDBG_FACILITY NFSDBG_PNFS_LD 46#define NFSDBG_FACILITY NFSDBG_PNFS_LD
46 47
@@ -606,8 +607,14 @@ static bool aligned_on_raid_stripe(u64 offset, struct ore_layout *layout,
606void objio_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) 607void objio_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
607{ 608{
608 unsigned long stripe_end = 0; 609 unsigned long stripe_end = 0;
610 u64 wb_size;
609 611
610 pnfs_generic_pg_init_write(pgio, req); 612 if (pgio->pg_dreq == NULL)
613 wb_size = i_size_read(pgio->pg_inode) - req_offset(req);
614 else
615 wb_size = nfs_dreq_bytes_left(pgio->pg_dreq);
616
617 pnfs_generic_pg_init_write(pgio, req, wb_size);
611 if (unlikely(pgio->pg_lseg == NULL)) 618 if (unlikely(pgio->pg_lseg == NULL))
612 return; /* Not pNFS */ 619 return; /* Not pNFS */
613 620
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 311a79681e2b..e56e846e9d2d 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -102,6 +102,7 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
102 unsigned int offset, unsigned int count) 102 unsigned int offset, unsigned int count)
103{ 103{
104 struct nfs_page *req; 104 struct nfs_page *req;
105 struct nfs_lock_context *l_ctx;
105 106
106 /* try to allocate the request struct */ 107 /* try to allocate the request struct */
107 req = nfs_page_alloc(); 108 req = nfs_page_alloc();
@@ -109,11 +110,12 @@ nfs_create_request(struct nfs_open_context *ctx, struct inode *inode,
109 return ERR_PTR(-ENOMEM); 110 return ERR_PTR(-ENOMEM);
110 111
111 /* get lock context early so we can deal with alloc failures */ 112 /* get lock context early so we can deal with alloc failures */
112 req->wb_lock_context = nfs_get_lock_context(ctx); 113 l_ctx = nfs_get_lock_context(ctx);
113 if (req->wb_lock_context == NULL) { 114 if (IS_ERR(l_ctx)) {
114 nfs_page_free(req); 115 nfs_page_free(req);
115 return ERR_PTR(-ENOMEM); 116 return ERR_CAST(l_ctx);
116 } 117 }
118 req->wb_lock_context = l_ctx;
117 119
118 /* Initialize the request struct. Initially, we assume a 120 /* Initialize the request struct. Initially, we assume a
119 * long write-back delay. This will be adjusted in 121 * long write-back delay. This will be adjusted in
@@ -290,7 +292,9 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev,
290{ 292{
291 if (req->wb_context->cred != prev->wb_context->cred) 293 if (req->wb_context->cred != prev->wb_context->cred)
292 return false; 294 return false;
293 if (req->wb_lock_context->lockowner != prev->wb_lock_context->lockowner) 295 if (req->wb_lock_context->lockowner.l_owner != prev->wb_lock_context->lockowner.l_owner)
296 return false;
297 if (req->wb_lock_context->lockowner.l_pid != prev->wb_lock_context->lockowner.l_pid)
294 return false; 298 return false;
295 if (req->wb_context->state != prev->wb_context->state) 299 if (req->wb_context->state != prev->wb_context->state)
296 return false; 300 return false;
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 2e00feacd4be..fe624c91bd00 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -35,6 +35,7 @@
35#include "iostat.h" 35#include "iostat.h"
36 36
37#define NFSDBG_FACILITY NFSDBG_PNFS 37#define NFSDBG_FACILITY NFSDBG_PNFS
38#define PNFS_LAYOUTGET_RETRY_TIMEOUT (120*HZ)
38 39
39/* Locking: 40/* Locking:
40 * 41 *
@@ -190,7 +191,7 @@ EXPORT_SYMBOL_GPL(pnfs_unregister_layoutdriver);
190 191
191/* Need to hold i_lock if caller does not already hold reference */ 192/* Need to hold i_lock if caller does not already hold reference */
192void 193void
193get_layout_hdr(struct pnfs_layout_hdr *lo) 194pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo)
194{ 195{
195 atomic_inc(&lo->plh_refcount); 196 atomic_inc(&lo->plh_refcount);
196} 197}
@@ -199,43 +200,107 @@ static struct pnfs_layout_hdr *
199pnfs_alloc_layout_hdr(struct inode *ino, gfp_t gfp_flags) 200pnfs_alloc_layout_hdr(struct inode *ino, gfp_t gfp_flags)
200{ 201{
201 struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld; 202 struct pnfs_layoutdriver_type *ld = NFS_SERVER(ino)->pnfs_curr_ld;
202 return ld->alloc_layout_hdr ? ld->alloc_layout_hdr(ino, gfp_flags) : 203 return ld->alloc_layout_hdr(ino, gfp_flags);
203 kzalloc(sizeof(struct pnfs_layout_hdr), gfp_flags);
204} 204}
205 205
206static void 206static void
207pnfs_free_layout_hdr(struct pnfs_layout_hdr *lo) 207pnfs_free_layout_hdr(struct pnfs_layout_hdr *lo)
208{ 208{
209 struct pnfs_layoutdriver_type *ld = NFS_SERVER(lo->plh_inode)->pnfs_curr_ld; 209 struct nfs_server *server = NFS_SERVER(lo->plh_inode);
210 struct pnfs_layoutdriver_type *ld = server->pnfs_curr_ld;
211
212 if (!list_empty(&lo->plh_layouts)) {
213 struct nfs_client *clp = server->nfs_client;
214
215 spin_lock(&clp->cl_lock);
216 list_del_init(&lo->plh_layouts);
217 spin_unlock(&clp->cl_lock);
218 }
210 put_rpccred(lo->plh_lc_cred); 219 put_rpccred(lo->plh_lc_cred);
211 return ld->alloc_layout_hdr ? ld->free_layout_hdr(lo) : kfree(lo); 220 return ld->free_layout_hdr(lo);
212} 221}
213 222
214static void 223static void
215destroy_layout_hdr(struct pnfs_layout_hdr *lo) 224pnfs_detach_layout_hdr(struct pnfs_layout_hdr *lo)
216{ 225{
226 struct nfs_inode *nfsi = NFS_I(lo->plh_inode);
217 dprintk("%s: freeing layout cache %p\n", __func__, lo); 227 dprintk("%s: freeing layout cache %p\n", __func__, lo);
218 BUG_ON(!list_empty(&lo->plh_layouts)); 228 nfsi->layout = NULL;
219 NFS_I(lo->plh_inode)->layout = NULL; 229 /* Reset MDS Threshold I/O counters */
220 pnfs_free_layout_hdr(lo); 230 nfsi->write_io = 0;
231 nfsi->read_io = 0;
232}
233
234void
235pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo)
236{
237 struct inode *inode = lo->plh_inode;
238
239 if (atomic_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) {
240 pnfs_detach_layout_hdr(lo);
241 spin_unlock(&inode->i_lock);
242 pnfs_free_layout_hdr(lo);
243 }
244}
245
246static int
247pnfs_iomode_to_fail_bit(u32 iomode)
248{
249 return iomode == IOMODE_RW ?
250 NFS_LAYOUT_RW_FAILED : NFS_LAYOUT_RO_FAILED;
221} 251}
222 252
223static void 253static void
224put_layout_hdr_locked(struct pnfs_layout_hdr *lo) 254pnfs_layout_set_fail_bit(struct pnfs_layout_hdr *lo, int fail_bit)
225{ 255{
226 if (atomic_dec_and_test(&lo->plh_refcount)) 256 lo->plh_retry_timestamp = jiffies;
227 destroy_layout_hdr(lo); 257 if (test_and_set_bit(fail_bit, &lo->plh_flags))
258 atomic_inc(&lo->plh_refcount);
228} 259}
229 260
230void 261static void
231put_layout_hdr(struct pnfs_layout_hdr *lo) 262pnfs_layout_clear_fail_bit(struct pnfs_layout_hdr *lo, int fail_bit)
263{
264 if (test_and_clear_bit(fail_bit, &lo->plh_flags))
265 atomic_dec(&lo->plh_refcount);
266}
267
268static void
269pnfs_layout_io_set_failed(struct pnfs_layout_hdr *lo, u32 iomode)
232{ 270{
233 struct inode *inode = lo->plh_inode; 271 struct inode *inode = lo->plh_inode;
272 struct pnfs_layout_range range = {
273 .iomode = iomode,
274 .offset = 0,
275 .length = NFS4_MAX_UINT64,
276 };
277 LIST_HEAD(head);
234 278
235 if (atomic_dec_and_lock(&lo->plh_refcount, &inode->i_lock)) { 279 spin_lock(&inode->i_lock);
236 destroy_layout_hdr(lo); 280 pnfs_layout_set_fail_bit(lo, pnfs_iomode_to_fail_bit(iomode));
237 spin_unlock(&inode->i_lock); 281 pnfs_mark_matching_lsegs_invalid(lo, &head, &range);
282 spin_unlock(&inode->i_lock);
283 pnfs_free_lseg_list(&head);
284 dprintk("%s Setting layout IOMODE_%s fail bit\n", __func__,
285 iomode == IOMODE_RW ? "RW" : "READ");
286}
287
288static bool
289pnfs_layout_io_test_failed(struct pnfs_layout_hdr *lo, u32 iomode)
290{
291 unsigned long start, end;
292 int fail_bit = pnfs_iomode_to_fail_bit(iomode);
293
294 if (test_bit(fail_bit, &lo->plh_flags) == 0)
295 return false;
296 end = jiffies;
297 start = end - PNFS_LAYOUTGET_RETRY_TIMEOUT;
298 if (!time_in_range(lo->plh_retry_timestamp, start, end)) {
299 /* It is time to retry the failed layoutgets */
300 pnfs_layout_clear_fail_bit(lo, fail_bit);
301 return false;
238 } 302 }
303 return true;
239} 304}
240 305
241static void 306static void
@@ -249,33 +314,32 @@ init_lseg(struct pnfs_layout_hdr *lo, struct pnfs_layout_segment *lseg)
249 lseg->pls_layout = lo; 314 lseg->pls_layout = lo;
250} 315}
251 316
252static void free_lseg(struct pnfs_layout_segment *lseg) 317static void pnfs_free_lseg(struct pnfs_layout_segment *lseg)
253{ 318{
254 struct inode *ino = lseg->pls_layout->plh_inode; 319 struct inode *ino = lseg->pls_layout->plh_inode;
255 320
256 NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg); 321 NFS_SERVER(ino)->pnfs_curr_ld->free_lseg(lseg);
257 /* Matched by get_layout_hdr in pnfs_insert_layout */
258 put_layout_hdr(NFS_I(ino)->layout);
259} 322}
260 323
261static void 324static void
262put_lseg_common(struct pnfs_layout_segment *lseg) 325pnfs_layout_remove_lseg(struct pnfs_layout_hdr *lo,
326 struct pnfs_layout_segment *lseg)
263{ 327{
264 struct inode *inode = lseg->pls_layout->plh_inode; 328 struct inode *inode = lo->plh_inode;
265 329
266 WARN_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); 330 WARN_ON(test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
267 list_del_init(&lseg->pls_list); 331 list_del_init(&lseg->pls_list);
268 if (list_empty(&lseg->pls_layout->plh_segs)) { 332 /* Matched by pnfs_get_layout_hdr in pnfs_layout_insert_lseg */
269 set_bit(NFS_LAYOUT_DESTROYED, &lseg->pls_layout->plh_flags); 333 atomic_dec(&lo->plh_refcount);
270 /* Matched by initial refcount set in alloc_init_layout_hdr */ 334 if (list_empty(&lo->plh_segs))
271 put_layout_hdr_locked(lseg->pls_layout); 335 clear_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags);
272 }
273 rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq); 336 rpc_wake_up(&NFS_SERVER(inode)->roc_rpcwaitq);
274} 337}
275 338
276void 339void
277put_lseg(struct pnfs_layout_segment *lseg) 340pnfs_put_lseg(struct pnfs_layout_segment *lseg)
278{ 341{
342 struct pnfs_layout_hdr *lo;
279 struct inode *inode; 343 struct inode *inode;
280 344
281 if (!lseg) 345 if (!lseg)
@@ -284,17 +348,17 @@ put_lseg(struct pnfs_layout_segment *lseg)
284 dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg, 348 dprintk("%s: lseg %p ref %d valid %d\n", __func__, lseg,
285 atomic_read(&lseg->pls_refcount), 349 atomic_read(&lseg->pls_refcount),
286 test_bit(NFS_LSEG_VALID, &lseg->pls_flags)); 350 test_bit(NFS_LSEG_VALID, &lseg->pls_flags));
287 inode = lseg->pls_layout->plh_inode; 351 lo = lseg->pls_layout;
352 inode = lo->plh_inode;
288 if (atomic_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) { 353 if (atomic_dec_and_lock(&lseg->pls_refcount, &inode->i_lock)) {
289 LIST_HEAD(free_me); 354 pnfs_get_layout_hdr(lo);
290 355 pnfs_layout_remove_lseg(lo, lseg);
291 put_lseg_common(lseg);
292 list_add(&lseg->pls_list, &free_me);
293 spin_unlock(&inode->i_lock); 356 spin_unlock(&inode->i_lock);
294 pnfs_free_lseg_list(&free_me); 357 pnfs_free_lseg(lseg);
358 pnfs_put_layout_hdr(lo);
295 } 359 }
296} 360}
297EXPORT_SYMBOL_GPL(put_lseg); 361EXPORT_SYMBOL_GPL(pnfs_put_lseg);
298 362
299static inline u64 363static inline u64
300end_offset(u64 start, u64 len) 364end_offset(u64 start, u64 len)
@@ -378,7 +442,7 @@ static int mark_lseg_invalid(struct pnfs_layout_segment *lseg,
378 dprintk("%s: lseg %p ref %d\n", __func__, lseg, 442 dprintk("%s: lseg %p ref %d\n", __func__, lseg,
379 atomic_read(&lseg->pls_refcount)); 443 atomic_read(&lseg->pls_refcount));
380 if (atomic_dec_and_test(&lseg->pls_refcount)) { 444 if (atomic_dec_and_test(&lseg->pls_refcount)) {
381 put_lseg_common(lseg); 445 pnfs_layout_remove_lseg(lseg->pls_layout, lseg);
382 list_add(&lseg->pls_list, tmp_list); 446 list_add(&lseg->pls_list, tmp_list);
383 rv = 1; 447 rv = 1;
384 } 448 }
@@ -390,7 +454,7 @@ static int mark_lseg_invalid(struct pnfs_layout_segment *lseg,
390 * after call. 454 * after call.
391 */ 455 */
392int 456int
393mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, 457pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
394 struct list_head *tmp_list, 458 struct list_head *tmp_list,
395 struct pnfs_layout_range *recall_range) 459 struct pnfs_layout_range *recall_range)
396{ 460{
@@ -399,14 +463,8 @@ mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
399 463
400 dprintk("%s:Begin lo %p\n", __func__, lo); 464 dprintk("%s:Begin lo %p\n", __func__, lo);
401 465
402 if (list_empty(&lo->plh_segs)) { 466 if (list_empty(&lo->plh_segs))
403 /* Reset MDS Threshold I/O counters */
404 NFS_I(lo->plh_inode)->write_io = 0;
405 NFS_I(lo->plh_inode)->read_io = 0;
406 if (!test_and_set_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags))
407 put_layout_hdr_locked(lo);
408 return 0; 467 return 0;
409 }
410 list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list) 468 list_for_each_entry_safe(lseg, next, &lo->plh_segs, pls_list)
411 if (!recall_range || 469 if (!recall_range ||
412 should_free_lseg(&lseg->pls_range, recall_range)) { 470 should_free_lseg(&lseg->pls_range, recall_range)) {
@@ -426,25 +484,13 @@ void
426pnfs_free_lseg_list(struct list_head *free_me) 484pnfs_free_lseg_list(struct list_head *free_me)
427{ 485{
428 struct pnfs_layout_segment *lseg, *tmp; 486 struct pnfs_layout_segment *lseg, *tmp;
429 struct pnfs_layout_hdr *lo;
430 487
431 if (list_empty(free_me)) 488 if (list_empty(free_me))
432 return; 489 return;
433 490
434 lo = list_first_entry(free_me, struct pnfs_layout_segment,
435 pls_list)->pls_layout;
436
437 if (test_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags)) {
438 struct nfs_client *clp;
439
440 clp = NFS_SERVER(lo->plh_inode)->nfs_client;
441 spin_lock(&clp->cl_lock);
442 list_del_init(&lo->plh_layouts);
443 spin_unlock(&clp->cl_lock);
444 }
445 list_for_each_entry_safe(lseg, tmp, free_me, pls_list) { 491 list_for_each_entry_safe(lseg, tmp, free_me, pls_list) {
446 list_del(&lseg->pls_list); 492 list_del(&lseg->pls_list);
447 free_lseg(lseg); 493 pnfs_free_lseg(lseg);
448 } 494 }
449} 495}
450 496
@@ -458,10 +504,15 @@ pnfs_destroy_layout(struct nfs_inode *nfsi)
458 lo = nfsi->layout; 504 lo = nfsi->layout;
459 if (lo) { 505 if (lo) {
460 lo->plh_block_lgets++; /* permanently block new LAYOUTGETs */ 506 lo->plh_block_lgets++; /* permanently block new LAYOUTGETs */
461 mark_matching_lsegs_invalid(lo, &tmp_list, NULL); 507 pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL);
462 } 508 pnfs_get_layout_hdr(lo);
463 spin_unlock(&nfsi->vfs_inode.i_lock); 509 pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RO_FAILED);
464 pnfs_free_lseg_list(&tmp_list); 510 pnfs_layout_clear_fail_bit(lo, NFS_LAYOUT_RW_FAILED);
511 spin_unlock(&nfsi->vfs_inode.i_lock);
512 pnfs_free_lseg_list(&tmp_list);
513 pnfs_put_layout_hdr(lo);
514 } else
515 spin_unlock(&nfsi->vfs_inode.i_lock);
465} 516}
466EXPORT_SYMBOL_GPL(pnfs_destroy_layout); 517EXPORT_SYMBOL_GPL(pnfs_destroy_layout);
467 518
@@ -498,46 +549,54 @@ pnfs_destroy_all_layouts(struct nfs_client *clp)
498 } 549 }
499} 550}
500 551
552/*
553 * Compare 2 layout stateid sequence ids, to see which is newer,
554 * taking into account wraparound issues.
555 */
556static bool pnfs_seqid_is_newer(u32 s1, u32 s2)
557{
558 return (s32)s1 - (s32)s2 > 0;
559}
560
501/* update lo->plh_stateid with new if is more recent */ 561/* update lo->plh_stateid with new if is more recent */
502void 562void
503pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new, 563pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, const nfs4_stateid *new,
504 bool update_barrier) 564 bool update_barrier)
505{ 565{
506 u32 oldseq, newseq; 566 u32 oldseq, newseq, new_barrier;
567 int empty = list_empty(&lo->plh_segs);
507 568
508 oldseq = be32_to_cpu(lo->plh_stateid.seqid); 569 oldseq = be32_to_cpu(lo->plh_stateid.seqid);
509 newseq = be32_to_cpu(new->seqid); 570 newseq = be32_to_cpu(new->seqid);
510 if ((int)(newseq - oldseq) > 0) { 571 if (empty || pnfs_seqid_is_newer(newseq, oldseq)) {
511 nfs4_stateid_copy(&lo->plh_stateid, new); 572 nfs4_stateid_copy(&lo->plh_stateid, new);
512 if (update_barrier) { 573 if (update_barrier) {
513 u32 new_barrier = be32_to_cpu(new->seqid); 574 new_barrier = be32_to_cpu(new->seqid);
514
515 if ((int)(new_barrier - lo->plh_barrier))
516 lo->plh_barrier = new_barrier;
517 } else { 575 } else {
518 /* Because of wraparound, we want to keep the barrier 576 /* Because of wraparound, we want to keep the barrier
519 * "close" to the current seqids. It needs to be 577 * "close" to the current seqids.
520 * within 2**31 to count as "behind", so if it
521 * gets too near that limit, give us a litle leeway
522 * and bring it to within 2**30.
523 * NOTE - and yes, this is all unsigned arithmetic.
524 */ 578 */
525 if (unlikely((newseq - lo->plh_barrier) > (3 << 29))) 579 new_barrier = newseq - atomic_read(&lo->plh_outstanding);
526 lo->plh_barrier = newseq - (1 << 30);
527 } 580 }
581 if (empty || pnfs_seqid_is_newer(new_barrier, lo->plh_barrier))
582 lo->plh_barrier = new_barrier;
528 } 583 }
529} 584}
530 585
586static bool
587pnfs_layout_stateid_blocked(const struct pnfs_layout_hdr *lo,
588 const nfs4_stateid *stateid)
589{
590 u32 seqid = be32_to_cpu(stateid->seqid);
591
592 return !pnfs_seqid_is_newer(seqid, lo->plh_barrier);
593}
594
531/* lget is set to 1 if called from inside send_layoutget call chain */ 595/* lget is set to 1 if called from inside send_layoutget call chain */
532static bool 596static bool
533pnfs_layoutgets_blocked(struct pnfs_layout_hdr *lo, nfs4_stateid *stateid, 597pnfs_layoutgets_blocked(const struct pnfs_layout_hdr *lo, int lget)
534 int lget)
535{ 598{
536 if ((stateid) &&
537 (int)(lo->plh_barrier - be32_to_cpu(stateid->seqid)) >= 0)
538 return true;
539 return lo->plh_block_lgets || 599 return lo->plh_block_lgets ||
540 test_bit(NFS_LAYOUT_DESTROYED, &lo->plh_flags) ||
541 test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) || 600 test_bit(NFS_LAYOUT_BULK_RECALL, &lo->plh_flags) ||
542 (list_empty(&lo->plh_segs) && 601 (list_empty(&lo->plh_segs) &&
543 (atomic_read(&lo->plh_outstanding) > lget)); 602 (atomic_read(&lo->plh_outstanding) > lget));
@@ -551,7 +610,7 @@ pnfs_choose_layoutget_stateid(nfs4_stateid *dst, struct pnfs_layout_hdr *lo,
551 610
552 dprintk("--> %s\n", __func__); 611 dprintk("--> %s\n", __func__);
553 spin_lock(&lo->plh_inode->i_lock); 612 spin_lock(&lo->plh_inode->i_lock);
554 if (pnfs_layoutgets_blocked(lo, NULL, 1)) { 613 if (pnfs_layoutgets_blocked(lo, 1)) {
555 status = -EAGAIN; 614 status = -EAGAIN;
556 } else if (list_empty(&lo->plh_segs)) { 615 } else if (list_empty(&lo->plh_segs)) {
557 int seq; 616 int seq;
@@ -582,7 +641,7 @@ send_layoutget(struct pnfs_layout_hdr *lo,
582 struct inode *ino = lo->plh_inode; 641 struct inode *ino = lo->plh_inode;
583 struct nfs_server *server = NFS_SERVER(ino); 642 struct nfs_server *server = NFS_SERVER(ino);
584 struct nfs4_layoutget *lgp; 643 struct nfs4_layoutget *lgp;
585 struct pnfs_layout_segment *lseg = NULL; 644 struct pnfs_layout_segment *lseg;
586 645
587 dprintk("--> %s\n", __func__); 646 dprintk("--> %s\n", __func__);
588 647
@@ -599,16 +658,22 @@ send_layoutget(struct pnfs_layout_hdr *lo,
599 lgp->args.type = server->pnfs_curr_ld->id; 658 lgp->args.type = server->pnfs_curr_ld->id;
600 lgp->args.inode = ino; 659 lgp->args.inode = ino;
601 lgp->args.ctx = get_nfs_open_context(ctx); 660 lgp->args.ctx = get_nfs_open_context(ctx);
602 lgp->lsegpp = &lseg;
603 lgp->gfp_flags = gfp_flags; 661 lgp->gfp_flags = gfp_flags;
604 662
605 /* Synchronously retrieve layout information from server and 663 /* Synchronously retrieve layout information from server and
606 * store in lseg. 664 * store in lseg.
607 */ 665 */
608 nfs4_proc_layoutget(lgp, gfp_flags); 666 lseg = nfs4_proc_layoutget(lgp, gfp_flags);
609 if (!lseg) { 667 if (IS_ERR(lseg)) {
610 /* remember that LAYOUTGET failed and suspend trying */ 668 switch (PTR_ERR(lseg)) {
611 set_bit(lo_fail_bit(range->iomode), &lo->plh_flags); 669 case -ENOMEM:
670 case -ERESTARTSYS:
671 break;
672 default:
673 /* remember that LAYOUTGET failed and suspend trying */
674 pnfs_layout_io_set_failed(lo, range->iomode);
675 }
676 return NULL;
612 } 677 }
613 678
614 return lseg; 679 return lseg;
@@ -636,25 +701,24 @@ _pnfs_return_layout(struct inode *ino)
636 701
637 spin_lock(&ino->i_lock); 702 spin_lock(&ino->i_lock);
638 lo = nfsi->layout; 703 lo = nfsi->layout;
639 if (!lo || pnfs_test_layout_returned(lo)) { 704 if (!lo) {
640 spin_unlock(&ino->i_lock); 705 spin_unlock(&ino->i_lock);
641 dprintk("NFS: %s no layout to return\n", __func__); 706 dprintk("NFS: %s no layout to return\n", __func__);
642 goto out; 707 goto out;
643 } 708 }
644 stateid = nfsi->layout->plh_stateid; 709 stateid = nfsi->layout->plh_stateid;
645 /* Reference matched in nfs4_layoutreturn_release */ 710 /* Reference matched in nfs4_layoutreturn_release */
646 get_layout_hdr(lo); 711 pnfs_get_layout_hdr(lo);
647 empty = list_empty(&lo->plh_segs); 712 empty = list_empty(&lo->plh_segs);
648 mark_matching_lsegs_invalid(lo, &tmp_list, NULL); 713 pnfs_mark_matching_lsegs_invalid(lo, &tmp_list, NULL);
649 /* Don't send a LAYOUTRETURN if list was initially empty */ 714 /* Don't send a LAYOUTRETURN if list was initially empty */
650 if (empty) { 715 if (empty) {
651 spin_unlock(&ino->i_lock); 716 spin_unlock(&ino->i_lock);
652 put_layout_hdr(lo); 717 pnfs_put_layout_hdr(lo);
653 dprintk("NFS: %s no layout segments to return\n", __func__); 718 dprintk("NFS: %s no layout segments to return\n", __func__);
654 goto out; 719 goto out;
655 } 720 }
656 lo->plh_block_lgets++; 721 lo->plh_block_lgets++;
657 pnfs_mark_layout_returned(lo);
658 spin_unlock(&ino->i_lock); 722 spin_unlock(&ino->i_lock);
659 pnfs_free_lseg_list(&tmp_list); 723 pnfs_free_lseg_list(&tmp_list);
660 724
@@ -663,10 +727,10 @@ _pnfs_return_layout(struct inode *ino)
663 lrp = kzalloc(sizeof(*lrp), GFP_KERNEL); 727 lrp = kzalloc(sizeof(*lrp), GFP_KERNEL);
664 if (unlikely(lrp == NULL)) { 728 if (unlikely(lrp == NULL)) {
665 status = -ENOMEM; 729 status = -ENOMEM;
666 set_bit(NFS_LAYOUT_RW_FAILED, &lo->plh_flags); 730 spin_lock(&ino->i_lock);
667 set_bit(NFS_LAYOUT_RO_FAILED, &lo->plh_flags); 731 lo->plh_block_lgets--;
668 pnfs_clear_layout_returned(lo); 732 spin_unlock(&ino->i_lock);
669 put_layout_hdr(lo); 733 pnfs_put_layout_hdr(lo);
670 goto out; 734 goto out;
671 } 735 }
672 736
@@ -703,7 +767,7 @@ bool pnfs_roc(struct inode *ino)
703 if (!found) 767 if (!found)
704 goto out_nolayout; 768 goto out_nolayout;
705 lo->plh_block_lgets++; 769 lo->plh_block_lgets++;
706 get_layout_hdr(lo); /* matched in pnfs_roc_release */ 770 pnfs_get_layout_hdr(lo); /* matched in pnfs_roc_release */
707 spin_unlock(&ino->i_lock); 771 spin_unlock(&ino->i_lock);
708 pnfs_free_lseg_list(&tmp_list); 772 pnfs_free_lseg_list(&tmp_list);
709 return true; 773 return true;
@@ -720,8 +784,12 @@ void pnfs_roc_release(struct inode *ino)
720 spin_lock(&ino->i_lock); 784 spin_lock(&ino->i_lock);
721 lo = NFS_I(ino)->layout; 785 lo = NFS_I(ino)->layout;
722 lo->plh_block_lgets--; 786 lo->plh_block_lgets--;
723 put_layout_hdr_locked(lo); 787 if (atomic_dec_and_test(&lo->plh_refcount)) {
724 spin_unlock(&ino->i_lock); 788 pnfs_detach_layout_hdr(lo);
789 spin_unlock(&ino->i_lock);
790 pnfs_free_layout_hdr(lo);
791 } else
792 spin_unlock(&ino->i_lock);
725} 793}
726 794
727void pnfs_roc_set_barrier(struct inode *ino, u32 barrier) 795void pnfs_roc_set_barrier(struct inode *ino, u32 barrier)
@@ -730,32 +798,34 @@ void pnfs_roc_set_barrier(struct inode *ino, u32 barrier)
730 798
731 spin_lock(&ino->i_lock); 799 spin_lock(&ino->i_lock);
732 lo = NFS_I(ino)->layout; 800 lo = NFS_I(ino)->layout;
733 if ((int)(barrier - lo->plh_barrier) > 0) 801 if (pnfs_seqid_is_newer(barrier, lo->plh_barrier))
734 lo->plh_barrier = barrier; 802 lo->plh_barrier = barrier;
735 spin_unlock(&ino->i_lock); 803 spin_unlock(&ino->i_lock);
736} 804}
737 805
738bool pnfs_roc_drain(struct inode *ino, u32 *barrier) 806bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task)
739{ 807{
740 struct nfs_inode *nfsi = NFS_I(ino); 808 struct nfs_inode *nfsi = NFS_I(ino);
809 struct pnfs_layout_hdr *lo;
741 struct pnfs_layout_segment *lseg; 810 struct pnfs_layout_segment *lseg;
811 u32 current_seqid;
742 bool found = false; 812 bool found = false;
743 813
744 spin_lock(&ino->i_lock); 814 spin_lock(&ino->i_lock);
745 list_for_each_entry(lseg, &nfsi->layout->plh_segs, pls_list) 815 list_for_each_entry(lseg, &nfsi->layout->plh_segs, pls_list)
746 if (test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) { 816 if (test_bit(NFS_LSEG_ROC, &lseg->pls_flags)) {
817 rpc_sleep_on(&NFS_SERVER(ino)->roc_rpcwaitq, task, NULL);
747 found = true; 818 found = true;
748 break; 819 goto out;
749 } 820 }
750 if (!found) { 821 lo = nfsi->layout;
751 struct pnfs_layout_hdr *lo = nfsi->layout; 822 current_seqid = be32_to_cpu(lo->plh_stateid.seqid);
752 u32 current_seqid = be32_to_cpu(lo->plh_stateid.seqid);
753 823
754 /* Since close does not return a layout stateid for use as 824 /* Since close does not return a layout stateid for use as
755 * a barrier, we choose the worst-case barrier. 825 * a barrier, we choose the worst-case barrier.
756 */ 826 */
757 *barrier = current_seqid + atomic_read(&lo->plh_outstanding); 827 *barrier = current_seqid + atomic_read(&lo->plh_outstanding);
758 } 828out:
759 spin_unlock(&ino->i_lock); 829 spin_unlock(&ino->i_lock);
760 return found; 830 return found;
761} 831}
@@ -786,14 +856,13 @@ cmp_layout(struct pnfs_layout_range *l1,
786} 856}
787 857
788static void 858static void
789pnfs_insert_layout(struct pnfs_layout_hdr *lo, 859pnfs_layout_insert_lseg(struct pnfs_layout_hdr *lo,
790 struct pnfs_layout_segment *lseg) 860 struct pnfs_layout_segment *lseg)
791{ 861{
792 struct pnfs_layout_segment *lp; 862 struct pnfs_layout_segment *lp;
793 863
794 dprintk("%s:Begin\n", __func__); 864 dprintk("%s:Begin\n", __func__);
795 865
796 assert_spin_locked(&lo->plh_inode->i_lock);
797 list_for_each_entry(lp, &lo->plh_segs, pls_list) { 866 list_for_each_entry(lp, &lo->plh_segs, pls_list) {
798 if (cmp_layout(&lseg->pls_range, &lp->pls_range) > 0) 867 if (cmp_layout(&lseg->pls_range, &lp->pls_range) > 0)
799 continue; 868 continue;
@@ -813,7 +882,7 @@ pnfs_insert_layout(struct pnfs_layout_hdr *lo,
813 __func__, lseg, lseg->pls_range.iomode, 882 __func__, lseg, lseg->pls_range.iomode,
814 lseg->pls_range.offset, lseg->pls_range.length); 883 lseg->pls_range.offset, lseg->pls_range.length);
815out: 884out:
816 get_layout_hdr(lo); 885 pnfs_get_layout_hdr(lo);
817 886
818 dprintk("%s:Return\n", __func__); 887 dprintk("%s:Return\n", __func__);
819} 888}
@@ -847,21 +916,19 @@ pnfs_find_alloc_layout(struct inode *ino,
847 916
848 dprintk("%s Begin ino=%p layout=%p\n", __func__, ino, nfsi->layout); 917 dprintk("%s Begin ino=%p layout=%p\n", __func__, ino, nfsi->layout);
849 918
850 assert_spin_locked(&ino->i_lock); 919 if (nfsi->layout != NULL)
851 if (nfsi->layout) { 920 goto out_existing;
852 if (test_bit(NFS_LAYOUT_DESTROYED, &nfsi->layout->plh_flags))
853 return NULL;
854 else
855 return nfsi->layout;
856 }
857 spin_unlock(&ino->i_lock); 921 spin_unlock(&ino->i_lock);
858 new = alloc_init_layout_hdr(ino, ctx, gfp_flags); 922 new = alloc_init_layout_hdr(ino, ctx, gfp_flags);
859 spin_lock(&ino->i_lock); 923 spin_lock(&ino->i_lock);
860 924
861 if (likely(nfsi->layout == NULL)) /* Won the race? */ 925 if (likely(nfsi->layout == NULL)) { /* Won the race? */
862 nfsi->layout = new; 926 nfsi->layout = new;
863 else 927 return new;
864 pnfs_free_layout_hdr(new); 928 }
929 pnfs_free_layout_hdr(new);
930out_existing:
931 pnfs_get_layout_hdr(nfsi->layout);
865 return nfsi->layout; 932 return nfsi->layout;
866} 933}
867 934
@@ -904,11 +971,10 @@ pnfs_find_lseg(struct pnfs_layout_hdr *lo,
904 971
905 dprintk("%s:Begin\n", __func__); 972 dprintk("%s:Begin\n", __func__);
906 973
907 assert_spin_locked(&lo->plh_inode->i_lock);
908 list_for_each_entry(lseg, &lo->plh_segs, pls_list) { 974 list_for_each_entry(lseg, &lo->plh_segs, pls_list) {
909 if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) && 975 if (test_bit(NFS_LSEG_VALID, &lseg->pls_flags) &&
910 is_matching_lseg(&lseg->pls_range, range)) { 976 is_matching_lseg(&lseg->pls_range, range)) {
911 ret = get_lseg(lseg); 977 ret = pnfs_get_lseg(lseg);
912 break; 978 break;
913 } 979 }
914 if (lseg->pls_range.offset > range->offset) 980 if (lseg->pls_range.offset > range->offset)
@@ -1013,7 +1079,6 @@ pnfs_update_layout(struct inode *ino,
1013 .length = count, 1079 .length = count,
1014 }; 1080 };
1015 unsigned pg_offset; 1081 unsigned pg_offset;
1016 struct nfs_inode *nfsi = NFS_I(ino);
1017 struct nfs_server *server = NFS_SERVER(ino); 1082 struct nfs_server *server = NFS_SERVER(ino);
1018 struct nfs_client *clp = server->nfs_client; 1083 struct nfs_client *clp = server->nfs_client;
1019 struct pnfs_layout_hdr *lo; 1084 struct pnfs_layout_hdr *lo;
@@ -1021,16 +1086,16 @@ pnfs_update_layout(struct inode *ino,
1021 bool first = false; 1086 bool first = false;
1022 1087
1023 if (!pnfs_enabled_sb(NFS_SERVER(ino))) 1088 if (!pnfs_enabled_sb(NFS_SERVER(ino)))
1024 return NULL; 1089 goto out;
1025 1090
1026 if (pnfs_within_mdsthreshold(ctx, ino, iomode)) 1091 if (pnfs_within_mdsthreshold(ctx, ino, iomode))
1027 return NULL; 1092 goto out;
1028 1093
1029 spin_lock(&ino->i_lock); 1094 spin_lock(&ino->i_lock);
1030 lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags); 1095 lo = pnfs_find_alloc_layout(ino, ctx, gfp_flags);
1031 if (lo == NULL) { 1096 if (lo == NULL) {
1032 dprintk("%s ERROR: can't get pnfs_layout_hdr\n", __func__); 1097 spin_unlock(&ino->i_lock);
1033 goto out_unlock; 1098 goto out;
1034 } 1099 }
1035 1100
1036 /* Do we even need to bother with this? */ 1101 /* Do we even need to bother with this? */
@@ -1040,7 +1105,7 @@ pnfs_update_layout(struct inode *ino,
1040 } 1105 }
1041 1106
1042 /* if LAYOUTGET already failed once we don't try again */ 1107 /* if LAYOUTGET already failed once we don't try again */
1043 if (test_bit(lo_fail_bit(iomode), &nfsi->layout->plh_flags)) 1108 if (pnfs_layout_io_test_failed(lo, iomode))
1044 goto out_unlock; 1109 goto out_unlock;
1045 1110
1046 /* Check to see if the layout for the given range already exists */ 1111 /* Check to see if the layout for the given range already exists */
@@ -1048,17 +1113,13 @@ pnfs_update_layout(struct inode *ino,
1048 if (lseg) 1113 if (lseg)
1049 goto out_unlock; 1114 goto out_unlock;
1050 1115
1051 if (pnfs_layoutgets_blocked(lo, NULL, 0)) 1116 if (pnfs_layoutgets_blocked(lo, 0))
1052 goto out_unlock; 1117 goto out_unlock;
1053 atomic_inc(&lo->plh_outstanding); 1118 atomic_inc(&lo->plh_outstanding);
1054 1119
1055 get_layout_hdr(lo);
1056 if (list_empty(&lo->plh_segs)) 1120 if (list_empty(&lo->plh_segs))
1057 first = true; 1121 first = true;
1058 1122
1059 /* Enable LAYOUTRETURNs */
1060 pnfs_clear_layout_returned(lo);
1061
1062 spin_unlock(&ino->i_lock); 1123 spin_unlock(&ino->i_lock);
1063 if (first) { 1124 if (first) {
1064 /* The lo must be on the clp list if there is any 1125 /* The lo must be on the clp list if there is any
@@ -1079,24 +1140,26 @@ pnfs_update_layout(struct inode *ino,
1079 arg.length = PAGE_CACHE_ALIGN(arg.length); 1140 arg.length = PAGE_CACHE_ALIGN(arg.length);
1080 1141
1081 lseg = send_layoutget(lo, ctx, &arg, gfp_flags); 1142 lseg = send_layoutget(lo, ctx, &arg, gfp_flags);
1082 if (!lseg && first) {
1083 spin_lock(&clp->cl_lock);
1084 list_del_init(&lo->plh_layouts);
1085 spin_unlock(&clp->cl_lock);
1086 }
1087 atomic_dec(&lo->plh_outstanding); 1143 atomic_dec(&lo->plh_outstanding);
1088 put_layout_hdr(lo); 1144out_put_layout_hdr:
1145 pnfs_put_layout_hdr(lo);
1089out: 1146out:
1090 dprintk("%s end, state 0x%lx lseg %p\n", __func__, 1147 dprintk("%s: inode %s/%llu pNFS layout segment %s for "
1091 nfsi->layout ? nfsi->layout->plh_flags : -1, lseg); 1148 "(%s, offset: %llu, length: %llu)\n",
1149 __func__, ino->i_sb->s_id,
1150 (unsigned long long)NFS_FILEID(ino),
1151 lseg == NULL ? "not found" : "found",
1152 iomode==IOMODE_RW ? "read/write" : "read-only",
1153 (unsigned long long)pos,
1154 (unsigned long long)count);
1092 return lseg; 1155 return lseg;
1093out_unlock: 1156out_unlock:
1094 spin_unlock(&ino->i_lock); 1157 spin_unlock(&ino->i_lock);
1095 goto out; 1158 goto out_put_layout_hdr;
1096} 1159}
1097EXPORT_SYMBOL_GPL(pnfs_update_layout); 1160EXPORT_SYMBOL_GPL(pnfs_update_layout);
1098 1161
1099int 1162struct pnfs_layout_segment *
1100pnfs_layout_process(struct nfs4_layoutget *lgp) 1163pnfs_layout_process(struct nfs4_layoutget *lgp)
1101{ 1164{
1102 struct pnfs_layout_hdr *lo = NFS_I(lgp->args.inode)->layout; 1165 struct pnfs_layout_hdr *lo = NFS_I(lgp->args.inode)->layout;
@@ -1123,25 +1186,29 @@ pnfs_layout_process(struct nfs4_layoutget *lgp)
1123 goto out_forget_reply; 1186 goto out_forget_reply;
1124 } 1187 }
1125 1188
1126 if (pnfs_layoutgets_blocked(lo, &res->stateid, 1)) { 1189 if (pnfs_layoutgets_blocked(lo, 1) ||
1190 pnfs_layout_stateid_blocked(lo, &res->stateid)) {
1127 dprintk("%s forget reply due to state\n", __func__); 1191 dprintk("%s forget reply due to state\n", __func__);
1128 goto out_forget_reply; 1192 goto out_forget_reply;
1129 } 1193 }
1194
1195 /* Done processing layoutget. Set the layout stateid */
1196 pnfs_set_layout_stateid(lo, &res->stateid, false);
1197
1130 init_lseg(lo, lseg); 1198 init_lseg(lo, lseg);
1131 lseg->pls_range = res->range; 1199 lseg->pls_range = res->range;
1132 *lgp->lsegpp = get_lseg(lseg); 1200 pnfs_get_lseg(lseg);
1133 pnfs_insert_layout(lo, lseg); 1201 pnfs_layout_insert_lseg(lo, lseg);
1134 1202
1135 if (res->return_on_close) { 1203 if (res->return_on_close) {
1136 set_bit(NFS_LSEG_ROC, &lseg->pls_flags); 1204 set_bit(NFS_LSEG_ROC, &lseg->pls_flags);
1137 set_bit(NFS_LAYOUT_ROC, &lo->plh_flags); 1205 set_bit(NFS_LAYOUT_ROC, &lo->plh_flags);
1138 } 1206 }
1139 1207
1140 /* Done processing layoutget. Set the layout stateid */
1141 pnfs_set_layout_stateid(lo, &res->stateid, false);
1142 spin_unlock(&ino->i_lock); 1208 spin_unlock(&ino->i_lock);
1209 return lseg;
1143out: 1210out:
1144 return status; 1211 return ERR_PTR(status);
1145 1212
1146out_forget_reply: 1213out_forget_reply:
1147 spin_unlock(&ino->i_lock); 1214 spin_unlock(&ino->i_lock);
@@ -1153,16 +1220,24 @@ out_forget_reply:
1153void 1220void
1154pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) 1221pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
1155{ 1222{
1223 u64 rd_size = req->wb_bytes;
1224
1156 BUG_ON(pgio->pg_lseg != NULL); 1225 BUG_ON(pgio->pg_lseg != NULL);
1157 1226
1158 if (req->wb_offset != req->wb_pgbase) { 1227 if (req->wb_offset != req->wb_pgbase) {
1159 nfs_pageio_reset_read_mds(pgio); 1228 nfs_pageio_reset_read_mds(pgio);
1160 return; 1229 return;
1161 } 1230 }
1231
1232 if (pgio->pg_dreq == NULL)
1233 rd_size = i_size_read(pgio->pg_inode) - req_offset(req);
1234 else
1235 rd_size = nfs_dreq_bytes_left(pgio->pg_dreq);
1236
1162 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 1237 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
1163 req->wb_context, 1238 req->wb_context,
1164 req_offset(req), 1239 req_offset(req),
1165 req->wb_bytes, 1240 rd_size,
1166 IOMODE_READ, 1241 IOMODE_READ,
1167 GFP_KERNEL); 1242 GFP_KERNEL);
1168 /* If no lseg, fall back to read through mds */ 1243 /* If no lseg, fall back to read through mds */
@@ -1173,7 +1248,8 @@ pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *r
1173EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_read); 1248EXPORT_SYMBOL_GPL(pnfs_generic_pg_init_read);
1174 1249
1175void 1250void
1176pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req) 1251pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio,
1252 struct nfs_page *req, u64 wb_size)
1177{ 1253{
1178 BUG_ON(pgio->pg_lseg != NULL); 1254 BUG_ON(pgio->pg_lseg != NULL);
1179 1255
@@ -1181,10 +1257,11 @@ pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *
1181 nfs_pageio_reset_write_mds(pgio); 1257 nfs_pageio_reset_write_mds(pgio);
1182 return; 1258 return;
1183 } 1259 }
1260
1184 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, 1261 pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode,
1185 req->wb_context, 1262 req->wb_context,
1186 req_offset(req), 1263 req_offset(req),
1187 req->wb_bytes, 1264 wb_size,
1188 IOMODE_RW, 1265 IOMODE_RW,
1189 GFP_NOFS); 1266 GFP_NOFS);
1190 /* If no lseg, fall back to write through mds */ 1267 /* If no lseg, fall back to write through mds */
@@ -1362,12 +1439,12 @@ pnfs_do_multiple_writes(struct nfs_pageio_descriptor *desc, struct list_head *he
1362 if (trypnfs == PNFS_NOT_ATTEMPTED) 1439 if (trypnfs == PNFS_NOT_ATTEMPTED)
1363 pnfs_write_through_mds(desc, data); 1440 pnfs_write_through_mds(desc, data);
1364 } 1441 }
1365 put_lseg(lseg); 1442 pnfs_put_lseg(lseg);
1366} 1443}
1367 1444
1368static void pnfs_writehdr_free(struct nfs_pgio_header *hdr) 1445static void pnfs_writehdr_free(struct nfs_pgio_header *hdr)
1369{ 1446{
1370 put_lseg(hdr->lseg); 1447 pnfs_put_lseg(hdr->lseg);
1371 nfs_writehdr_free(hdr); 1448 nfs_writehdr_free(hdr);
1372} 1449}
1373EXPORT_SYMBOL_GPL(pnfs_writehdr_free); 1450EXPORT_SYMBOL_GPL(pnfs_writehdr_free);
@@ -1382,17 +1459,17 @@ pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc)
1382 whdr = nfs_writehdr_alloc(); 1459 whdr = nfs_writehdr_alloc();
1383 if (!whdr) { 1460 if (!whdr) {
1384 desc->pg_completion_ops->error_cleanup(&desc->pg_list); 1461 desc->pg_completion_ops->error_cleanup(&desc->pg_list);
1385 put_lseg(desc->pg_lseg); 1462 pnfs_put_lseg(desc->pg_lseg);
1386 desc->pg_lseg = NULL; 1463 desc->pg_lseg = NULL;
1387 return -ENOMEM; 1464 return -ENOMEM;
1388 } 1465 }
1389 hdr = &whdr->header; 1466 hdr = &whdr->header;
1390 nfs_pgheader_init(desc, hdr, pnfs_writehdr_free); 1467 nfs_pgheader_init(desc, hdr, pnfs_writehdr_free);
1391 hdr->lseg = get_lseg(desc->pg_lseg); 1468 hdr->lseg = pnfs_get_lseg(desc->pg_lseg);
1392 atomic_inc(&hdr->refcnt); 1469 atomic_inc(&hdr->refcnt);
1393 ret = nfs_generic_flush(desc, hdr); 1470 ret = nfs_generic_flush(desc, hdr);
1394 if (ret != 0) { 1471 if (ret != 0) {
1395 put_lseg(desc->pg_lseg); 1472 pnfs_put_lseg(desc->pg_lseg);
1396 desc->pg_lseg = NULL; 1473 desc->pg_lseg = NULL;
1397 } else 1474 } else
1398 pnfs_do_multiple_writes(desc, &hdr->rpc_list, desc->pg_ioflags); 1475 pnfs_do_multiple_writes(desc, &hdr->rpc_list, desc->pg_ioflags);
@@ -1517,12 +1594,12 @@ pnfs_do_multiple_reads(struct nfs_pageio_descriptor *desc, struct list_head *hea
1517 if (trypnfs == PNFS_NOT_ATTEMPTED) 1594 if (trypnfs == PNFS_NOT_ATTEMPTED)
1518 pnfs_read_through_mds(desc, data); 1595 pnfs_read_through_mds(desc, data);
1519 } 1596 }
1520 put_lseg(lseg); 1597 pnfs_put_lseg(lseg);
1521} 1598}
1522 1599
1523static void pnfs_readhdr_free(struct nfs_pgio_header *hdr) 1600static void pnfs_readhdr_free(struct nfs_pgio_header *hdr)
1524{ 1601{
1525 put_lseg(hdr->lseg); 1602 pnfs_put_lseg(hdr->lseg);
1526 nfs_readhdr_free(hdr); 1603 nfs_readhdr_free(hdr);
1527} 1604}
1528EXPORT_SYMBOL_GPL(pnfs_readhdr_free); 1605EXPORT_SYMBOL_GPL(pnfs_readhdr_free);
@@ -1538,17 +1615,17 @@ pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc)
1538 if (!rhdr) { 1615 if (!rhdr) {
1539 desc->pg_completion_ops->error_cleanup(&desc->pg_list); 1616 desc->pg_completion_ops->error_cleanup(&desc->pg_list);
1540 ret = -ENOMEM; 1617 ret = -ENOMEM;
1541 put_lseg(desc->pg_lseg); 1618 pnfs_put_lseg(desc->pg_lseg);
1542 desc->pg_lseg = NULL; 1619 desc->pg_lseg = NULL;
1543 return ret; 1620 return ret;
1544 } 1621 }
1545 hdr = &rhdr->header; 1622 hdr = &rhdr->header;
1546 nfs_pgheader_init(desc, hdr, pnfs_readhdr_free); 1623 nfs_pgheader_init(desc, hdr, pnfs_readhdr_free);
1547 hdr->lseg = get_lseg(desc->pg_lseg); 1624 hdr->lseg = pnfs_get_lseg(desc->pg_lseg);
1548 atomic_inc(&hdr->refcnt); 1625 atomic_inc(&hdr->refcnt);
1549 ret = nfs_generic_pagein(desc, hdr); 1626 ret = nfs_generic_pagein(desc, hdr);
1550 if (ret != 0) { 1627 if (ret != 0) {
1551 put_lseg(desc->pg_lseg); 1628 pnfs_put_lseg(desc->pg_lseg);
1552 desc->pg_lseg = NULL; 1629 desc->pg_lseg = NULL;
1553 } else 1630 } else
1554 pnfs_do_multiple_reads(desc, &hdr->rpc_list); 1631 pnfs_do_multiple_reads(desc, &hdr->rpc_list);
@@ -1574,13 +1651,7 @@ static void pnfs_list_write_lseg(struct inode *inode, struct list_head *listp)
1574 1651
1575void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg) 1652void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg)
1576{ 1653{
1577 if (lseg->pls_range.iomode == IOMODE_RW) { 1654 pnfs_layout_io_set_failed(lseg->pls_layout, lseg->pls_range.iomode);
1578 dprintk("%s Setting layout IOMODE_RW fail bit\n", __func__);
1579 set_bit(lo_fail_bit(IOMODE_RW), &lseg->pls_layout->plh_flags);
1580 } else {
1581 dprintk("%s Setting layout IOMODE_READ fail bit\n", __func__);
1582 set_bit(lo_fail_bit(IOMODE_READ), &lseg->pls_layout->plh_flags);
1583 }
1584} 1655}
1585EXPORT_SYMBOL_GPL(pnfs_set_lo_fail); 1656EXPORT_SYMBOL_GPL(pnfs_set_lo_fail);
1586 1657
@@ -1601,7 +1672,7 @@ pnfs_set_layoutcommit(struct nfs_write_data *wdata)
1601 } 1672 }
1602 if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &hdr->lseg->pls_flags)) { 1673 if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &hdr->lseg->pls_flags)) {
1603 /* references matched in nfs4_layoutcommit_release */ 1674 /* references matched in nfs4_layoutcommit_release */
1604 get_lseg(hdr->lseg); 1675 pnfs_get_lseg(hdr->lseg);
1605 } 1676 }
1606 if (end_pos > nfsi->layout->plh_lwb) 1677 if (end_pos > nfsi->layout->plh_lwb)
1607 nfsi->layout->plh_lwb = end_pos; 1678 nfsi->layout->plh_lwb = end_pos;
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 745aa1b39e7c..2d722dba1111 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -62,9 +62,6 @@ enum {
62 NFS_LAYOUT_RW_FAILED, /* get rw layout failed stop trying */ 62 NFS_LAYOUT_RW_FAILED, /* get rw layout failed stop trying */
63 NFS_LAYOUT_BULK_RECALL, /* bulk recall affecting layout */ 63 NFS_LAYOUT_BULK_RECALL, /* bulk recall affecting layout */
64 NFS_LAYOUT_ROC, /* some lseg had roc bit set */ 64 NFS_LAYOUT_ROC, /* some lseg had roc bit set */
65 NFS_LAYOUT_DESTROYED, /* no new use of layout allowed */
66 NFS_LAYOUT_INVALID, /* layout is being destroyed */
67 NFS_LAYOUT_RETURNED, /* layout has already been returned */
68}; 65};
69 66
70enum layoutdriver_policy_flags { 67enum layoutdriver_policy_flags {
@@ -140,6 +137,7 @@ struct pnfs_layout_hdr {
140 atomic_t plh_outstanding; /* number of RPCs out */ 137 atomic_t plh_outstanding; /* number of RPCs out */
141 unsigned long plh_block_lgets; /* block LAYOUTGET if >0 */ 138 unsigned long plh_block_lgets; /* block LAYOUTGET if >0 */
142 u32 plh_barrier; /* ignore lower seqids */ 139 u32 plh_barrier; /* ignore lower seqids */
140 unsigned long plh_retry_timestamp;
143 unsigned long plh_flags; 141 unsigned long plh_flags;
144 loff_t plh_lwb; /* last write byte for layoutcommit */ 142 loff_t plh_lwb; /* last write byte for layoutcommit */
145 struct rpc_cred *plh_lc_cred; /* layoutcommit cred */ 143 struct rpc_cred *plh_lc_cred; /* layoutcommit cred */
@@ -172,12 +170,12 @@ extern int nfs4_proc_getdevicelist(struct nfs_server *server,
172 struct pnfs_devicelist *devlist); 170 struct pnfs_devicelist *devlist);
173extern int nfs4_proc_getdeviceinfo(struct nfs_server *server, 171extern int nfs4_proc_getdeviceinfo(struct nfs_server *server,
174 struct pnfs_device *dev); 172 struct pnfs_device *dev);
175extern void nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags); 173extern struct pnfs_layout_segment* nfs4_proc_layoutget(struct nfs4_layoutget *lgp, gfp_t gfp_flags);
176extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp); 174extern int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp);
177 175
178/* pnfs.c */ 176/* pnfs.c */
179void get_layout_hdr(struct pnfs_layout_hdr *lo); 177void pnfs_get_layout_hdr(struct pnfs_layout_hdr *lo);
180void put_lseg(struct pnfs_layout_segment *lseg); 178void pnfs_put_lseg(struct pnfs_layout_segment *lseg);
181 179
182void pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *, 180void pnfs_pageio_init_read(struct nfs_pageio_descriptor *, struct inode *,
183 const struct nfs_pgio_completion_ops *); 181 const struct nfs_pgio_completion_ops *);
@@ -188,28 +186,29 @@ void set_pnfs_layoutdriver(struct nfs_server *, const struct nfs_fh *, u32);
188void unset_pnfs_layoutdriver(struct nfs_server *); 186void unset_pnfs_layoutdriver(struct nfs_server *);
189void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page *); 187void pnfs_generic_pg_init_read(struct nfs_pageio_descriptor *, struct nfs_page *);
190int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc); 188int pnfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc);
191void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *, struct nfs_page *); 189void pnfs_generic_pg_init_write(struct nfs_pageio_descriptor *pgio,
190 struct nfs_page *req, u64 wb_size);
192int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc); 191int pnfs_generic_pg_writepages(struct nfs_pageio_descriptor *desc);
193bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req); 192bool pnfs_generic_pg_test(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev, struct nfs_page *req);
194void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg); 193void pnfs_set_lo_fail(struct pnfs_layout_segment *lseg);
195int pnfs_layout_process(struct nfs4_layoutget *lgp); 194struct pnfs_layout_segment *pnfs_layout_process(struct nfs4_layoutget *lgp);
196void pnfs_free_lseg_list(struct list_head *tmp_list); 195void pnfs_free_lseg_list(struct list_head *tmp_list);
197void pnfs_destroy_layout(struct nfs_inode *); 196void pnfs_destroy_layout(struct nfs_inode *);
198void pnfs_destroy_all_layouts(struct nfs_client *); 197void pnfs_destroy_all_layouts(struct nfs_client *);
199void put_layout_hdr(struct pnfs_layout_hdr *lo); 198void pnfs_put_layout_hdr(struct pnfs_layout_hdr *lo);
200void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo, 199void pnfs_set_layout_stateid(struct pnfs_layout_hdr *lo,
201 const nfs4_stateid *new, 200 const nfs4_stateid *new,
202 bool update_barrier); 201 bool update_barrier);
203int pnfs_choose_layoutget_stateid(nfs4_stateid *dst, 202int pnfs_choose_layoutget_stateid(nfs4_stateid *dst,
204 struct pnfs_layout_hdr *lo, 203 struct pnfs_layout_hdr *lo,
205 struct nfs4_state *open_state); 204 struct nfs4_state *open_state);
206int mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo, 205int pnfs_mark_matching_lsegs_invalid(struct pnfs_layout_hdr *lo,
207 struct list_head *tmp_list, 206 struct list_head *tmp_list,
208 struct pnfs_layout_range *recall_range); 207 struct pnfs_layout_range *recall_range);
209bool pnfs_roc(struct inode *ino); 208bool pnfs_roc(struct inode *ino);
210void pnfs_roc_release(struct inode *ino); 209void pnfs_roc_release(struct inode *ino);
211void pnfs_roc_set_barrier(struct inode *ino, u32 barrier); 210void pnfs_roc_set_barrier(struct inode *ino, u32 barrier);
212bool pnfs_roc_drain(struct inode *ino, u32 *barrier); 211bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task);
213void pnfs_set_layoutcommit(struct nfs_write_data *wdata); 212void pnfs_set_layoutcommit(struct nfs_write_data *wdata);
214void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data); 213void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data);
215int pnfs_layoutcommit_inode(struct inode *inode, bool sync); 214int pnfs_layoutcommit_inode(struct inode *inode, bool sync);
@@ -233,6 +232,7 @@ struct nfs4_threshold *pnfs_mdsthreshold_alloc(void);
233/* nfs4_deviceid_flags */ 232/* nfs4_deviceid_flags */
234enum { 233enum {
235 NFS_DEVICEID_INVALID = 0, /* set when MDS clientid recalled */ 234 NFS_DEVICEID_INVALID = 0, /* set when MDS clientid recalled */
235 NFS_DEVICEID_UNAVAILABLE, /* device temporarily unavailable */
236}; 236};
237 237
238/* pnfs_dev.c */ 238/* pnfs_dev.c */
@@ -242,6 +242,7 @@ struct nfs4_deviceid_node {
242 const struct pnfs_layoutdriver_type *ld; 242 const struct pnfs_layoutdriver_type *ld;
243 const struct nfs_client *nfs_client; 243 const struct nfs_client *nfs_client;
244 unsigned long flags; 244 unsigned long flags;
245 unsigned long timestamp_unavailable;
245 struct nfs4_deviceid deviceid; 246 struct nfs4_deviceid deviceid;
246 atomic_t ref; 247 atomic_t ref;
247}; 248};
@@ -254,34 +255,12 @@ void nfs4_init_deviceid_node(struct nfs4_deviceid_node *,
254 const struct nfs4_deviceid *); 255 const struct nfs4_deviceid *);
255struct nfs4_deviceid_node *nfs4_insert_deviceid_node(struct nfs4_deviceid_node *); 256struct nfs4_deviceid_node *nfs4_insert_deviceid_node(struct nfs4_deviceid_node *);
256bool nfs4_put_deviceid_node(struct nfs4_deviceid_node *); 257bool nfs4_put_deviceid_node(struct nfs4_deviceid_node *);
258void nfs4_mark_deviceid_unavailable(struct nfs4_deviceid_node *node);
259bool nfs4_test_deviceid_unavailable(struct nfs4_deviceid_node *node);
257void nfs4_deviceid_purge_client(const struct nfs_client *); 260void nfs4_deviceid_purge_client(const struct nfs_client *);
258 261
259static inline void
260pnfs_mark_layout_returned(struct pnfs_layout_hdr *lo)
261{
262 set_bit(NFS_LAYOUT_RETURNED, &lo->plh_flags);
263}
264
265static inline void
266pnfs_clear_layout_returned(struct pnfs_layout_hdr *lo)
267{
268 clear_bit(NFS_LAYOUT_RETURNED, &lo->plh_flags);
269}
270
271static inline bool
272pnfs_test_layout_returned(struct pnfs_layout_hdr *lo)
273{
274 return test_bit(NFS_LAYOUT_RETURNED, &lo->plh_flags);
275}
276
277static inline int lo_fail_bit(u32 iomode)
278{
279 return iomode == IOMODE_RW ?
280 NFS_LAYOUT_RW_FAILED : NFS_LAYOUT_RO_FAILED;
281}
282
283static inline struct pnfs_layout_segment * 262static inline struct pnfs_layout_segment *
284get_lseg(struct pnfs_layout_segment *lseg) 263pnfs_get_lseg(struct pnfs_layout_segment *lseg)
285{ 264{
286 if (lseg) { 265 if (lseg) {
287 atomic_inc(&lseg->pls_refcount); 266 atomic_inc(&lseg->pls_refcount);
@@ -406,12 +385,12 @@ static inline void pnfs_destroy_layout(struct nfs_inode *nfsi)
406} 385}
407 386
408static inline struct pnfs_layout_segment * 387static inline struct pnfs_layout_segment *
409get_lseg(struct pnfs_layout_segment *lseg) 388pnfs_get_lseg(struct pnfs_layout_segment *lseg)
410{ 389{
411 return NULL; 390 return NULL;
412} 391}
413 392
414static inline void put_lseg(struct pnfs_layout_segment *lseg) 393static inline void pnfs_put_lseg(struct pnfs_layout_segment *lseg)
415{ 394{
416} 395}
417 396
@@ -443,7 +422,7 @@ pnfs_roc_set_barrier(struct inode *ino, u32 barrier)
443} 422}
444 423
445static inline bool 424static inline bool
446pnfs_roc_drain(struct inode *ino, u32 *barrier) 425pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task)
447{ 426{
448 return false; 427 return false;
449} 428}
diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c
index 73f701f1f4d3..d35b62e83ea6 100644
--- a/fs/nfs/pnfs_dev.c
+++ b/fs/nfs/pnfs_dev.c
@@ -40,6 +40,8 @@
40#define NFS4_DEVICE_ID_HASH_SIZE (1 << NFS4_DEVICE_ID_HASH_BITS) 40#define NFS4_DEVICE_ID_HASH_SIZE (1 << NFS4_DEVICE_ID_HASH_BITS)
41#define NFS4_DEVICE_ID_HASH_MASK (NFS4_DEVICE_ID_HASH_SIZE - 1) 41#define NFS4_DEVICE_ID_HASH_MASK (NFS4_DEVICE_ID_HASH_SIZE - 1)
42 42
43#define PNFS_DEVICE_RETRY_TIMEOUT (120*HZ)
44
43static struct hlist_head nfs4_deviceid_cache[NFS4_DEVICE_ID_HASH_SIZE]; 45static struct hlist_head nfs4_deviceid_cache[NFS4_DEVICE_ID_HASH_SIZE];
44static DEFINE_SPINLOCK(nfs4_deviceid_lock); 46static DEFINE_SPINLOCK(nfs4_deviceid_lock);
45 47
@@ -218,6 +220,30 @@ nfs4_put_deviceid_node(struct nfs4_deviceid_node *d)
218} 220}
219EXPORT_SYMBOL_GPL(nfs4_put_deviceid_node); 221EXPORT_SYMBOL_GPL(nfs4_put_deviceid_node);
220 222
223void
224nfs4_mark_deviceid_unavailable(struct nfs4_deviceid_node *node)
225{
226 node->timestamp_unavailable = jiffies;
227 set_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags);
228}
229EXPORT_SYMBOL_GPL(nfs4_mark_deviceid_unavailable);
230
231bool
232nfs4_test_deviceid_unavailable(struct nfs4_deviceid_node *node)
233{
234 if (test_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags)) {
235 unsigned long start, end;
236
237 end = jiffies;
238 start = end - PNFS_DEVICE_RETRY_TIMEOUT;
239 if (time_in_range(node->timestamp_unavailable, start, end))
240 return true;
241 clear_bit(NFS_DEVICEID_UNAVAILABLE, &node->flags);
242 }
243 return false;
244}
245EXPORT_SYMBOL_GPL(nfs4_test_deviceid_unavailable);
246
221static void 247static void
222_deviceid_purge_client(const struct nfs_client *clp, long hash) 248_deviceid_purge_client(const struct nfs_client *clp, long hash)
223{ 249{
@@ -276,3 +302,4 @@ nfs4_deviceid_mark_client_invalid(struct nfs_client *clp)
276 } 302 }
277 rcu_read_unlock(); 303 rcu_read_unlock();
278} 304}
305
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
index d2c7f5db0847..e831bce49766 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -88,6 +88,7 @@ enum {
88 Opt_sharecache, Opt_nosharecache, 88 Opt_sharecache, Opt_nosharecache,
89 Opt_resvport, Opt_noresvport, 89 Opt_resvport, Opt_noresvport,
90 Opt_fscache, Opt_nofscache, 90 Opt_fscache, Opt_nofscache,
91 Opt_migration, Opt_nomigration,
91 92
92 /* Mount options that take integer arguments */ 93 /* Mount options that take integer arguments */
93 Opt_port, 94 Opt_port,
@@ -147,6 +148,8 @@ static const match_table_t nfs_mount_option_tokens = {
147 { Opt_noresvport, "noresvport" }, 148 { Opt_noresvport, "noresvport" },
148 { Opt_fscache, "fsc" }, 149 { Opt_fscache, "fsc" },
149 { Opt_nofscache, "nofsc" }, 150 { Opt_nofscache, "nofsc" },
151 { Opt_migration, "migration" },
152 { Opt_nomigration, "nomigration" },
150 153
151 { Opt_port, "port=%s" }, 154 { Opt_port, "port=%s" },
152 { Opt_rsize, "rsize=%s" }, 155 { Opt_rsize, "rsize=%s" },
@@ -676,6 +679,9 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
676 if (nfss->options & NFS_OPTION_FSCACHE) 679 if (nfss->options & NFS_OPTION_FSCACHE)
677 seq_printf(m, ",fsc"); 680 seq_printf(m, ",fsc");
678 681
682 if (nfss->options & NFS_OPTION_MIGRATION)
683 seq_printf(m, ",migration");
684
679 if (nfss->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG) { 685 if (nfss->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG) {
680 if (nfss->flags & NFS_MOUNT_LOOKUP_CACHE_NONE) 686 if (nfss->flags & NFS_MOUNT_LOOKUP_CACHE_NONE)
681 seq_printf(m, ",lookupcache=none"); 687 seq_printf(m, ",lookupcache=none");
@@ -1106,7 +1112,7 @@ static int nfs_get_option_ul(substring_t args[], unsigned long *option)
1106 string = match_strdup(args); 1112 string = match_strdup(args);
1107 if (string == NULL) 1113 if (string == NULL)
1108 return -ENOMEM; 1114 return -ENOMEM;
1109 rc = strict_strtoul(string, 10, option); 1115 rc = kstrtoul(string, 10, option);
1110 kfree(string); 1116 kfree(string);
1111 1117
1112 return rc; 1118 return rc;
@@ -1243,6 +1249,12 @@ static int nfs_parse_mount_options(char *raw,
1243 kfree(mnt->fscache_uniq); 1249 kfree(mnt->fscache_uniq);
1244 mnt->fscache_uniq = NULL; 1250 mnt->fscache_uniq = NULL;
1245 break; 1251 break;
1252 case Opt_migration:
1253 mnt->options |= NFS_OPTION_MIGRATION;
1254 break;
1255 case Opt_nomigration:
1256 mnt->options &= NFS_OPTION_MIGRATION;
1257 break;
1246 1258
1247 /* 1259 /*
1248 * options that take numeric values 1260 * options that take numeric values
@@ -1535,6 +1547,10 @@ static int nfs_parse_mount_options(char *raw,
1535 if (mnt->minorversion && mnt->version != 4) 1547 if (mnt->minorversion && mnt->version != 4)
1536 goto out_minorversion_mismatch; 1548 goto out_minorversion_mismatch;
1537 1549
1550 if (mnt->options & NFS_OPTION_MIGRATION &&
1551 mnt->version != 4 && mnt->minorversion != 0)
1552 goto out_migration_misuse;
1553
1538 /* 1554 /*
1539 * verify that any proto=/mountproto= options match the address 1555 * verify that any proto=/mountproto= options match the address
1540 * families in the addr=/mountaddr= options. 1556 * families in the addr=/mountaddr= options.
@@ -1572,6 +1588,10 @@ out_minorversion_mismatch:
1572 printk(KERN_INFO "NFS: mount option vers=%u does not support " 1588 printk(KERN_INFO "NFS: mount option vers=%u does not support "
1573 "minorversion=%u\n", mnt->version, mnt->minorversion); 1589 "minorversion=%u\n", mnt->version, mnt->minorversion);
1574 return 0; 1590 return 0;
1591out_migration_misuse:
1592 printk(KERN_INFO
1593 "NFS: 'migration' not supported for this NFS version\n");
1594 return 0;
1575out_nomem: 1595out_nomem:
1576 printk(KERN_INFO "NFS: not enough memory to parse option\n"); 1596 printk(KERN_INFO "NFS: not enough memory to parse option\n");
1577 return 0; 1597 return 0;
@@ -2494,7 +2514,7 @@ EXPORT_SYMBOL_GPL(nfs_kill_super);
2494/* 2514/*
2495 * Clone an NFS2/3/4 server record on xdev traversal (FSID-change) 2515 * Clone an NFS2/3/4 server record on xdev traversal (FSID-change)
2496 */ 2516 */
2497struct dentry * 2517static struct dentry *
2498nfs_xdev_mount(struct file_system_type *fs_type, int flags, 2518nfs_xdev_mount(struct file_system_type *fs_type, int flags,
2499 const char *dev_name, void *raw_data) 2519 const char *dev_name, void *raw_data)
2500{ 2520{
@@ -2642,6 +2662,7 @@ unsigned int nfs_idmap_cache_timeout = 600;
2642bool nfs4_disable_idmapping = true; 2662bool nfs4_disable_idmapping = true;
2643unsigned short max_session_slots = NFS4_DEF_SLOT_TABLE_SIZE; 2663unsigned short max_session_slots = NFS4_DEF_SLOT_TABLE_SIZE;
2644unsigned short send_implementation_id = 1; 2664unsigned short send_implementation_id = 1;
2665char nfs4_client_id_uniquifier[NFS4_CLIENT_ID_UNIQ_LEN] = "";
2645 2666
2646EXPORT_SYMBOL_GPL(nfs_callback_set_tcpport); 2667EXPORT_SYMBOL_GPL(nfs_callback_set_tcpport);
2647EXPORT_SYMBOL_GPL(nfs_callback_tcpport); 2668EXPORT_SYMBOL_GPL(nfs_callback_tcpport);
@@ -2649,6 +2670,7 @@ EXPORT_SYMBOL_GPL(nfs_idmap_cache_timeout);
2649EXPORT_SYMBOL_GPL(nfs4_disable_idmapping); 2670EXPORT_SYMBOL_GPL(nfs4_disable_idmapping);
2650EXPORT_SYMBOL_GPL(max_session_slots); 2671EXPORT_SYMBOL_GPL(max_session_slots);
2651EXPORT_SYMBOL_GPL(send_implementation_id); 2672EXPORT_SYMBOL_GPL(send_implementation_id);
2673EXPORT_SYMBOL_GPL(nfs4_client_id_uniquifier);
2652 2674
2653#define NFS_CALLBACK_MAXPORTNR (65535U) 2675#define NFS_CALLBACK_MAXPORTNR (65535U)
2654 2676
@@ -2659,7 +2681,7 @@ static int param_set_portnr(const char *val, const struct kernel_param *kp)
2659 2681
2660 if (!val) 2682 if (!val)
2661 return -EINVAL; 2683 return -EINVAL;
2662 ret = strict_strtoul(val, 0, &num); 2684 ret = kstrtoul(val, 0, &num);
2663 if (ret == -EINVAL || num > NFS_CALLBACK_MAXPORTNR) 2685 if (ret == -EINVAL || num > NFS_CALLBACK_MAXPORTNR)
2664 return -EINVAL; 2686 return -EINVAL;
2665 *((unsigned int *)kp->arg) = num; 2687 *((unsigned int *)kp->arg) = num;
@@ -2674,6 +2696,8 @@ static struct kernel_param_ops param_ops_portnr = {
2674module_param_named(callback_tcpport, nfs_callback_set_tcpport, portnr, 0644); 2696module_param_named(callback_tcpport, nfs_callback_set_tcpport, portnr, 0644);
2675module_param(nfs_idmap_cache_timeout, int, 0644); 2697module_param(nfs_idmap_cache_timeout, int, 0644);
2676module_param(nfs4_disable_idmapping, bool, 0644); 2698module_param(nfs4_disable_idmapping, bool, 0644);
2699module_param_string(nfs4_unique_id, nfs4_client_id_uniquifier,
2700 NFS4_CLIENT_ID_UNIQ_LEN, 0600);
2677MODULE_PARM_DESC(nfs4_disable_idmapping, 2701MODULE_PARM_DESC(nfs4_disable_idmapping,
2678 "Turn off NFSv4 idmapping when using 'sec=sys'"); 2702 "Turn off NFSv4 idmapping when using 'sec=sys'");
2679module_param(max_session_slots, ushort, 0644); 2703module_param(max_session_slots, ushort, 0644);
@@ -2682,6 +2706,7 @@ MODULE_PARM_DESC(max_session_slots, "Maximum number of outstanding NFSv4.1 "
2682module_param(send_implementation_id, ushort, 0644); 2706module_param(send_implementation_id, ushort, 0644);
2683MODULE_PARM_DESC(send_implementation_id, 2707MODULE_PARM_DESC(send_implementation_id,
2684 "Send implementation ID with NFSv4.1 exchange_id"); 2708 "Send implementation ID with NFSv4.1 exchange_id");
2709MODULE_PARM_DESC(nfs4_unique_id, "nfs_client_id4 uniquifier string");
2685MODULE_ALIAS("nfs4"); 2710MODULE_ALIAS("nfs4");
2686 2711
2687#endif /* CONFIG_NFS_V4 */ 2712#endif /* CONFIG_NFS_V4 */
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index e3b55372726c..9347ab7c9574 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -846,6 +846,7 @@ static int nfs_writepage_setup(struct nfs_open_context *ctx, struct page *page,
846int nfs_flush_incompatible(struct file *file, struct page *page) 846int nfs_flush_incompatible(struct file *file, struct page *page)
847{ 847{
848 struct nfs_open_context *ctx = nfs_file_open_context(file); 848 struct nfs_open_context *ctx = nfs_file_open_context(file);
849 struct nfs_lock_context *l_ctx;
849 struct nfs_page *req; 850 struct nfs_page *req;
850 int do_flush, status; 851 int do_flush, status;
851 /* 852 /*
@@ -860,9 +861,12 @@ int nfs_flush_incompatible(struct file *file, struct page *page)
860 req = nfs_page_find_request(page); 861 req = nfs_page_find_request(page);
861 if (req == NULL) 862 if (req == NULL)
862 return 0; 863 return 0;
863 do_flush = req->wb_page != page || req->wb_context != ctx || 864 l_ctx = req->wb_lock_context;
864 req->wb_lock_context->lockowner != current->files || 865 do_flush = req->wb_page != page || req->wb_context != ctx;
865 req->wb_lock_context->pid != current->tgid; 866 if (l_ctx) {
867 do_flush |= l_ctx->lockowner.l_owner != current->files
868 || l_ctx->lockowner.l_pid != current->tgid;
869 }
866 nfs_release_request(req); 870 nfs_release_request(req);
867 if (!do_flush) 871 if (!do_flush)
868 return 0; 872 return 0;
@@ -1576,6 +1580,7 @@ static void nfs_commit_release_pages(struct nfs_commit_data *data)
1576 /* We have a mismatch. Write the page again */ 1580 /* We have a mismatch. Write the page again */
1577 dprintk(" mismatch\n"); 1581 dprintk(" mismatch\n");
1578 nfs_mark_request_dirty(req); 1582 nfs_mark_request_dirty(req);
1583 set_bit(NFS_CONTEXT_RESEND_WRITES, &req->wb_context->flags);
1579 next: 1584 next:
1580 nfs_unlock_and_release_request(req); 1585 nfs_unlock_and_release_request(req);
1581 } 1586 }
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 4b03f56e280e..334a2f5f6bf1 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -81,12 +81,16 @@ struct nfs_access_entry {
81 int mask; 81 int mask;
82}; 82};
83 83
84struct nfs_lockowner {
85 fl_owner_t l_owner;
86 pid_t l_pid;
87};
88
84struct nfs_lock_context { 89struct nfs_lock_context {
85 atomic_t count; 90 atomic_t count;
86 struct list_head list; 91 struct list_head list;
87 struct nfs_open_context *open_context; 92 struct nfs_open_context *open_context;
88 fl_owner_t lockowner; 93 struct nfs_lockowner lockowner;
89 pid_t pid;
90}; 94};
91 95
92struct nfs4_state; 96struct nfs4_state;
@@ -99,6 +103,7 @@ struct nfs_open_context {
99 103
100 unsigned long flags; 104 unsigned long flags;
101#define NFS_CONTEXT_ERROR_WRITE (0) 105#define NFS_CONTEXT_ERROR_WRITE (0)
106#define NFS_CONTEXT_RESEND_WRITES (1)
102 int error; 107 int error;
103 108
104 struct list_head list; 109 struct list_head list;
@@ -355,6 +360,8 @@ extern int nfs_refresh_inode(struct inode *, struct nfs_fattr *);
355extern int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr); 360extern int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr);
356extern int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr); 361extern int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr);
357extern int nfs_getattr(struct vfsmount *, struct dentry *, struct kstat *); 362extern int nfs_getattr(struct vfsmount *, struct dentry *, struct kstat *);
363extern void nfs_access_add_cache(struct inode *, struct nfs_access_entry *);
364extern void nfs_access_set_mask(struct nfs_access_entry *, u32);
358extern int nfs_permission(struct inode *, int); 365extern int nfs_permission(struct inode *, int);
359extern int nfs_open(struct inode *, struct file *); 366extern int nfs_open(struct inode *, struct file *);
360extern int nfs_release(struct inode *, struct file *); 367extern int nfs_release(struct inode *, struct file *);
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 310c63c8ab2c..a9e76ee1adca 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -39,6 +39,7 @@ struct nfs_client {
39 unsigned long cl_flags; /* behavior switches */ 39 unsigned long cl_flags; /* behavior switches */
40#define NFS_CS_NORESVPORT 0 /* - use ephemeral src port */ 40#define NFS_CS_NORESVPORT 0 /* - use ephemeral src port */
41#define NFS_CS_DISCRTRY 1 /* - disconnect on RPC retry */ 41#define NFS_CS_DISCRTRY 1 /* - disconnect on RPC retry */
42#define NFS_CS_MIGRATION 2 /* - transparent state migr */
42 struct sockaddr_storage cl_addr; /* server identifier */ 43 struct sockaddr_storage cl_addr; /* server identifier */
43 size_t cl_addrlen; 44 size_t cl_addrlen;
44 char * cl_hostname; /* hostname of server */ 45 char * cl_hostname; /* hostname of server */
@@ -81,6 +82,7 @@ struct nfs_client {
81 /* The flags used for obtaining the clientid during EXCHANGE_ID */ 82 /* The flags used for obtaining the clientid during EXCHANGE_ID */
82 u32 cl_exchange_flags; 83 u32 cl_exchange_flags;
83 struct nfs4_session *cl_session; /* shared session */ 84 struct nfs4_session *cl_session; /* shared session */
85 bool cl_preserve_clid;
84 struct nfs41_server_owner *cl_serverowner; 86 struct nfs41_server_owner *cl_serverowner;
85 struct nfs41_server_scope *cl_serverscope; 87 struct nfs41_server_scope *cl_serverscope;
86 struct nfs41_impl_id *cl_implid; 88 struct nfs41_impl_id *cl_implid;
@@ -125,6 +127,7 @@ struct nfs_server {
125 unsigned int namelen; 127 unsigned int namelen;
126 unsigned int options; /* extra options enabled by mount */ 128 unsigned int options; /* extra options enabled by mount */
127#define NFS_OPTION_FSCACHE 0x00000001 /* - local caching enabled */ 129#define NFS_OPTION_FSCACHE 0x00000001 /* - local caching enabled */
130#define NFS_OPTION_MIGRATION 0x00000002 /* - NFSv4 migration enabled */
128 131
129 struct nfs_fsid fsid; 132 struct nfs_fsid fsid;
130 __u64 maxfilesize; /* maximum file size */ 133 __u64 maxfilesize; /* maximum file size */
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index be9cf3c7e79e..a73ea89789d1 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -251,7 +251,6 @@ struct nfs4_layoutget_res {
251struct nfs4_layoutget { 251struct nfs4_layoutget {
252 struct nfs4_layoutget_args args; 252 struct nfs4_layoutget_args args;
253 struct nfs4_layoutget_res res; 253 struct nfs4_layoutget_res res;
254 struct pnfs_layout_segment **lsegpp;
255 gfp_t gfp_flags; 254 gfp_t gfp_flags;
256}; 255};
257 256
@@ -335,6 +334,7 @@ struct nfs_openargs {
335 struct nfs_seqid * seqid; 334 struct nfs_seqid * seqid;
336 int open_flags; 335 int open_flags;
337 fmode_t fmode; 336 fmode_t fmode;
337 u32 access;
338 __u64 clientid; 338 __u64 clientid;
339 struct stateowner_id id; 339 struct stateowner_id id;
340 union { 340 union {
@@ -369,6 +369,9 @@ struct nfs_openres {
369 struct nfs4_string *owner; 369 struct nfs4_string *owner;
370 struct nfs4_string *group_owner; 370 struct nfs4_string *group_owner;
371 struct nfs4_sequence_res seq_res; 371 struct nfs4_sequence_res seq_res;
372 __u32 access_request;
373 __u32 access_supported;
374 __u32 access_result;
372}; 375};
373 376
374/* 377/*
diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 523547ecfee2..34206b84d8da 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -130,6 +130,8 @@ struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *,
130 const struct rpc_program *, u32); 130 const struct rpc_program *, u32);
131void rpc_task_reset_client(struct rpc_task *task, struct rpc_clnt *clnt); 131void rpc_task_reset_client(struct rpc_task *task, struct rpc_clnt *clnt);
132struct rpc_clnt *rpc_clone_client(struct rpc_clnt *); 132struct rpc_clnt *rpc_clone_client(struct rpc_clnt *);
133struct rpc_clnt *rpc_clone_client_set_auth(struct rpc_clnt *,
134 rpc_authflavor_t);
133void rpc_shutdown_client(struct rpc_clnt *); 135void rpc_shutdown_client(struct rpc_clnt *);
134void rpc_release_client(struct rpc_clnt *); 136void rpc_release_client(struct rpc_clnt *);
135void rpc_task_release_client(struct rpc_task *); 137void rpc_task_release_client(struct rpc_task *);
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index bf8c49ff7530..951cb9b7d02b 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -173,8 +173,7 @@ struct rpc_xprt {
173 unsigned int min_reqs; /* min number of slots */ 173 unsigned int min_reqs; /* min number of slots */
174 atomic_t num_reqs; /* total slots */ 174 atomic_t num_reqs; /* total slots */
175 unsigned long state; /* transport state */ 175 unsigned long state; /* transport state */
176 unsigned char shutdown : 1, /* being shut down */ 176 unsigned char resvport : 1; /* use a reserved port */
177 resvport : 1; /* use a reserved port */
178 unsigned int swapper; /* we're swapping over this 177 unsigned int swapper; /* we're swapping over this
179 transport */ 178 transport */
180 unsigned int bind_index; /* bind function index */ 179 unsigned int bind_index; /* bind function index */
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 34c522021004..909dc0c31aab 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -239,7 +239,7 @@ gss_fill_context(const void *p, const void *end, struct gss_cl_ctx *ctx, struct
239 } 239 }
240 return q; 240 return q;
241err: 241err:
242 dprintk("RPC: gss_fill_context returning %ld\n", -PTR_ERR(p)); 242 dprintk("RPC: %s returning %ld\n", __func__, -PTR_ERR(p));
243 return p; 243 return p;
244} 244}
245 245
@@ -301,10 +301,10 @@ __gss_find_upcall(struct rpc_pipe *pipe, uid_t uid)
301 if (pos->uid != uid) 301 if (pos->uid != uid)
302 continue; 302 continue;
303 atomic_inc(&pos->count); 303 atomic_inc(&pos->count);
304 dprintk("RPC: gss_find_upcall found msg %p\n", pos); 304 dprintk("RPC: %s found msg %p\n", __func__, pos);
305 return pos; 305 return pos;
306 } 306 }
307 dprintk("RPC: gss_find_upcall found nothing\n"); 307 dprintk("RPC: %s found nothing\n", __func__);
308 return NULL; 308 return NULL;
309} 309}
310 310
@@ -507,8 +507,8 @@ gss_refresh_upcall(struct rpc_task *task)
507 struct rpc_pipe *pipe; 507 struct rpc_pipe *pipe;
508 int err = 0; 508 int err = 0;
509 509
510 dprintk("RPC: %5u gss_refresh_upcall for uid %u\n", task->tk_pid, 510 dprintk("RPC: %5u %s for uid %u\n",
511 cred->cr_uid); 511 task->tk_pid, __func__, cred->cr_uid);
512 gss_msg = gss_setup_upcall(task->tk_client, gss_auth, cred); 512 gss_msg = gss_setup_upcall(task->tk_client, gss_auth, cred);
513 if (PTR_ERR(gss_msg) == -EAGAIN) { 513 if (PTR_ERR(gss_msg) == -EAGAIN) {
514 /* XXX: warning on the first, under the assumption we 514 /* XXX: warning on the first, under the assumption we
@@ -539,8 +539,8 @@ gss_refresh_upcall(struct rpc_task *task)
539 spin_unlock(&pipe->lock); 539 spin_unlock(&pipe->lock);
540 gss_release_msg(gss_msg); 540 gss_release_msg(gss_msg);
541out: 541out:
542 dprintk("RPC: %5u gss_refresh_upcall for uid %u result %d\n", 542 dprintk("RPC: %5u %s for uid %u result %d\n",
543 task->tk_pid, cred->cr_uid, err); 543 task->tk_pid, __func__, cred->cr_uid, err);
544 return err; 544 return err;
545} 545}
546 546
@@ -553,7 +553,7 @@ gss_create_upcall(struct gss_auth *gss_auth, struct gss_cred *gss_cred)
553 DEFINE_WAIT(wait); 553 DEFINE_WAIT(wait);
554 int err = 0; 554 int err = 0;
555 555
556 dprintk("RPC: gss_upcall for uid %u\n", cred->cr_uid); 556 dprintk("RPC: %s for uid %u\n", __func__, cred->cr_uid);
557retry: 557retry:
558 gss_msg = gss_setup_upcall(gss_auth->client, gss_auth, cred); 558 gss_msg = gss_setup_upcall(gss_auth->client, gss_auth, cred);
559 if (PTR_ERR(gss_msg) == -EAGAIN) { 559 if (PTR_ERR(gss_msg) == -EAGAIN) {
@@ -594,8 +594,8 @@ out_intr:
594 finish_wait(&gss_msg->waitqueue, &wait); 594 finish_wait(&gss_msg->waitqueue, &wait);
595 gss_release_msg(gss_msg); 595 gss_release_msg(gss_msg);
596out: 596out:
597 dprintk("RPC: gss_create_upcall for uid %u result %d\n", 597 dprintk("RPC: %s for uid %u result %d\n",
598 cred->cr_uid, err); 598 __func__, cred->cr_uid, err);
599 return err; 599 return err;
600} 600}
601 601
@@ -681,7 +681,7 @@ err_put_ctx:
681err: 681err:
682 kfree(buf); 682 kfree(buf);
683out: 683out:
684 dprintk("RPC: gss_pipe_downcall returning %Zd\n", err); 684 dprintk("RPC: %s returning %Zd\n", __func__, err);
685 return err; 685 return err;
686} 686}
687 687
@@ -747,8 +747,8 @@ gss_pipe_destroy_msg(struct rpc_pipe_msg *msg)
747 struct gss_upcall_msg *gss_msg = container_of(msg, struct gss_upcall_msg, msg); 747 struct gss_upcall_msg *gss_msg = container_of(msg, struct gss_upcall_msg, msg);
748 748
749 if (msg->errno < 0) { 749 if (msg->errno < 0) {
750 dprintk("RPC: gss_pipe_destroy_msg releasing msg %p\n", 750 dprintk("RPC: %s releasing msg %p\n",
751 gss_msg); 751 __func__, gss_msg);
752 atomic_inc(&gss_msg->count); 752 atomic_inc(&gss_msg->count);
753 gss_unhash_msg(gss_msg); 753 gss_unhash_msg(gss_msg);
754 if (msg->errno == -ETIMEDOUT) 754 if (msg->errno == -ETIMEDOUT)
@@ -976,7 +976,7 @@ gss_destroying_context(struct rpc_cred *cred)
976static void 976static void
977gss_do_free_ctx(struct gss_cl_ctx *ctx) 977gss_do_free_ctx(struct gss_cl_ctx *ctx)
978{ 978{
979 dprintk("RPC: gss_free_ctx\n"); 979 dprintk("RPC: %s\n", __func__);
980 980
981 gss_delete_sec_context(&ctx->gc_gss_ctx); 981 gss_delete_sec_context(&ctx->gc_gss_ctx);
982 kfree(ctx->gc_wire_ctx.data); 982 kfree(ctx->gc_wire_ctx.data);
@@ -999,7 +999,7 @@ gss_free_ctx(struct gss_cl_ctx *ctx)
999static void 999static void
1000gss_free_cred(struct gss_cred *gss_cred) 1000gss_free_cred(struct gss_cred *gss_cred)
1001{ 1001{
1002 dprintk("RPC: gss_free_cred %p\n", gss_cred); 1002 dprintk("RPC: %s cred=%p\n", __func__, gss_cred);
1003 kfree(gss_cred); 1003 kfree(gss_cred);
1004} 1004}
1005 1005
@@ -1049,8 +1049,8 @@ gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
1049 struct gss_cred *cred = NULL; 1049 struct gss_cred *cred = NULL;
1050 int err = -ENOMEM; 1050 int err = -ENOMEM;
1051 1051
1052 dprintk("RPC: gss_create_cred for uid %d, flavor %d\n", 1052 dprintk("RPC: %s for uid %d, flavor %d\n",
1053 acred->uid, auth->au_flavor); 1053 __func__, acred->uid, auth->au_flavor);
1054 1054
1055 if (!(cred = kzalloc(sizeof(*cred), GFP_NOFS))) 1055 if (!(cred = kzalloc(sizeof(*cred), GFP_NOFS)))
1056 goto out_err; 1056 goto out_err;
@@ -1069,7 +1069,7 @@ gss_create_cred(struct rpc_auth *auth, struct auth_cred *acred, int flags)
1069 return &cred->gc_base; 1069 return &cred->gc_base;
1070 1070
1071out_err: 1071out_err:
1072 dprintk("RPC: gss_create_cred failed with error %d\n", err); 1072 dprintk("RPC: %s failed with error %d\n", __func__, err);
1073 return ERR_PTR(err); 1073 return ERR_PTR(err);
1074} 1074}
1075 1075
@@ -1127,7 +1127,7 @@ gss_marshal(struct rpc_task *task, __be32 *p)
1127 struct kvec iov; 1127 struct kvec iov;
1128 struct xdr_buf verf_buf; 1128 struct xdr_buf verf_buf;
1129 1129
1130 dprintk("RPC: %5u gss_marshal\n", task->tk_pid); 1130 dprintk("RPC: %5u %s\n", task->tk_pid, __func__);
1131 1131
1132 *p++ = htonl(RPC_AUTH_GSS); 1132 *p++ = htonl(RPC_AUTH_GSS);
1133 cred_len = p++; 1133 cred_len = p++;
@@ -1253,7 +1253,7 @@ gss_validate(struct rpc_task *task, __be32 *p)
1253 u32 flav,len; 1253 u32 flav,len;
1254 u32 maj_stat; 1254 u32 maj_stat;
1255 1255
1256 dprintk("RPC: %5u gss_validate\n", task->tk_pid); 1256 dprintk("RPC: %5u %s\n", task->tk_pid, __func__);
1257 1257
1258 flav = ntohl(*p++); 1258 flav = ntohl(*p++);
1259 if ((len = ntohl(*p++)) > RPC_MAX_AUTH_SIZE) 1259 if ((len = ntohl(*p++)) > RPC_MAX_AUTH_SIZE)
@@ -1271,20 +1271,20 @@ gss_validate(struct rpc_task *task, __be32 *p)
1271 if (maj_stat == GSS_S_CONTEXT_EXPIRED) 1271 if (maj_stat == GSS_S_CONTEXT_EXPIRED)
1272 clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); 1272 clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags);
1273 if (maj_stat) { 1273 if (maj_stat) {
1274 dprintk("RPC: %5u gss_validate: gss_verify_mic returned " 1274 dprintk("RPC: %5u %s: gss_verify_mic returned error 0x%08x\n",
1275 "error 0x%08x\n", task->tk_pid, maj_stat); 1275 task->tk_pid, __func__, maj_stat);
1276 goto out_bad; 1276 goto out_bad;
1277 } 1277 }
1278 /* We leave it to unwrap to calculate au_rslack. For now we just 1278 /* We leave it to unwrap to calculate au_rslack. For now we just
1279 * calculate the length of the verifier: */ 1279 * calculate the length of the verifier: */
1280 cred->cr_auth->au_verfsize = XDR_QUADLEN(len) + 2; 1280 cred->cr_auth->au_verfsize = XDR_QUADLEN(len) + 2;
1281 gss_put_ctx(ctx); 1281 gss_put_ctx(ctx);
1282 dprintk("RPC: %5u gss_validate: gss_verify_mic succeeded.\n", 1282 dprintk("RPC: %5u %s: gss_verify_mic succeeded.\n",
1283 task->tk_pid); 1283 task->tk_pid, __func__);
1284 return p + XDR_QUADLEN(len); 1284 return p + XDR_QUADLEN(len);
1285out_bad: 1285out_bad:
1286 gss_put_ctx(ctx); 1286 gss_put_ctx(ctx);
1287 dprintk("RPC: %5u gss_validate failed.\n", task->tk_pid); 1287 dprintk("RPC: %5u %s failed.\n", task->tk_pid, __func__);
1288 return NULL; 1288 return NULL;
1289} 1289}
1290 1290
@@ -1466,7 +1466,7 @@ gss_wrap_req(struct rpc_task *task,
1466 struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred); 1466 struct gss_cl_ctx *ctx = gss_cred_get_ctx(cred);
1467 int status = -EIO; 1467 int status = -EIO;
1468 1468
1469 dprintk("RPC: %5u gss_wrap_req\n", task->tk_pid); 1469 dprintk("RPC: %5u %s\n", task->tk_pid, __func__);
1470 if (ctx->gc_proc != RPC_GSS_PROC_DATA) { 1470 if (ctx->gc_proc != RPC_GSS_PROC_DATA) {
1471 /* The spec seems a little ambiguous here, but I think that not 1471 /* The spec seems a little ambiguous here, but I think that not
1472 * wrapping context destruction requests makes the most sense. 1472 * wrapping context destruction requests makes the most sense.
@@ -1489,7 +1489,7 @@ gss_wrap_req(struct rpc_task *task,
1489 } 1489 }
1490out: 1490out:
1491 gss_put_ctx(ctx); 1491 gss_put_ctx(ctx);
1492 dprintk("RPC: %5u gss_wrap_req returning %d\n", task->tk_pid, status); 1492 dprintk("RPC: %5u %s returning %d\n", task->tk_pid, __func__, status);
1493 return status; 1493 return status;
1494} 1494}
1495 1495
@@ -1604,8 +1604,8 @@ out_decode:
1604 status = gss_unwrap_req_decode(decode, rqstp, p, obj); 1604 status = gss_unwrap_req_decode(decode, rqstp, p, obj);
1605out: 1605out:
1606 gss_put_ctx(ctx); 1606 gss_put_ctx(ctx);
1607 dprintk("RPC: %5u gss_unwrap_resp returning %d\n", task->tk_pid, 1607 dprintk("RPC: %5u %s returning %d\n",
1608 status); 1608 task->tk_pid, __func__, status);
1609 return status; 1609 return status;
1610} 1610}
1611 1611
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index fa48c60aef23..cdc7564b4512 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -490,61 +490,86 @@ EXPORT_SYMBOL_GPL(rpc_create);
490 * same transport while varying parameters such as the authentication 490 * same transport while varying parameters such as the authentication
491 * flavour. 491 * flavour.
492 */ 492 */
493struct rpc_clnt * 493static struct rpc_clnt *__rpc_clone_client(struct rpc_create_args *args,
494rpc_clone_client(struct rpc_clnt *clnt) 494 struct rpc_clnt *clnt)
495{ 495{
496 struct rpc_clnt *new;
497 struct rpc_xprt *xprt; 496 struct rpc_xprt *xprt;
498 int err = -ENOMEM; 497 struct rpc_clnt *new;
498 int err;
499 499
500 new = kmemdup(clnt, sizeof(*new), GFP_KERNEL); 500 err = -ENOMEM;
501 if (!new)
502 goto out_no_clnt;
503 new->cl_parent = clnt;
504 /* Turn off autobind on clones */
505 new->cl_autobind = 0;
506 INIT_LIST_HEAD(&new->cl_tasks);
507 spin_lock_init(&new->cl_lock);
508 rpc_init_rtt(&new->cl_rtt_default, clnt->cl_timeout->to_initval);
509 new->cl_metrics = rpc_alloc_iostats(clnt);
510 if (new->cl_metrics == NULL)
511 goto out_no_stats;
512 if (clnt->cl_principal) {
513 new->cl_principal = kstrdup(clnt->cl_principal, GFP_KERNEL);
514 if (new->cl_principal == NULL)
515 goto out_no_principal;
516 }
517 rcu_read_lock(); 501 rcu_read_lock();
518 xprt = xprt_get(rcu_dereference(clnt->cl_xprt)); 502 xprt = xprt_get(rcu_dereference(clnt->cl_xprt));
519 rcu_read_unlock(); 503 rcu_read_unlock();
520 if (xprt == NULL) 504 if (xprt == NULL)
521 goto out_no_transport; 505 goto out_err;
522 rcu_assign_pointer(new->cl_xprt, xprt); 506 args->servername = xprt->servername;
523 atomic_set(&new->cl_count, 1); 507
524 err = rpc_setup_pipedir(new, clnt->cl_program->pipe_dir_name); 508 new = rpc_new_client(args, xprt);
525 if (err != 0) 509 if (IS_ERR(new)) {
526 goto out_no_path; 510 err = PTR_ERR(new);
527 rpc_clnt_set_nodename(new, utsname()->nodename); 511 goto out_put;
528 if (new->cl_auth) 512 }
529 atomic_inc(&new->cl_auth->au_count); 513
530 atomic_inc(&clnt->cl_count); 514 atomic_inc(&clnt->cl_count);
531 rpc_register_client(new); 515 new->cl_parent = clnt;
532 rpciod_up(); 516
517 /* Turn off autobind on clones */
518 new->cl_autobind = 0;
519 new->cl_softrtry = clnt->cl_softrtry;
520 new->cl_discrtry = clnt->cl_discrtry;
521 new->cl_chatty = clnt->cl_chatty;
533 return new; 522 return new;
534out_no_path: 523
524out_put:
535 xprt_put(xprt); 525 xprt_put(xprt);
536out_no_transport: 526out_err:
537 kfree(new->cl_principal);
538out_no_principal:
539 rpc_free_iostats(new->cl_metrics);
540out_no_stats:
541 kfree(new);
542out_no_clnt:
543 dprintk("RPC: %s: returned error %d\n", __func__, err); 527 dprintk("RPC: %s: returned error %d\n", __func__, err);
544 return ERR_PTR(err); 528 return ERR_PTR(err);
545} 529}
530
531/**
532 * rpc_clone_client - Clone an RPC client structure
533 *
534 * @clnt: RPC client whose parameters are copied
535 *
536 * Returns a fresh RPC client or an ERR_PTR.
537 */
538struct rpc_clnt *rpc_clone_client(struct rpc_clnt *clnt)
539{
540 struct rpc_create_args args = {
541 .program = clnt->cl_program,
542 .prognumber = clnt->cl_prog,
543 .version = clnt->cl_vers,
544 .authflavor = clnt->cl_auth->au_flavor,
545 .client_name = clnt->cl_principal,
546 };
547 return __rpc_clone_client(&args, clnt);
548}
546EXPORT_SYMBOL_GPL(rpc_clone_client); 549EXPORT_SYMBOL_GPL(rpc_clone_client);
547 550
551/**
552 * rpc_clone_client_set_auth - Clone an RPC client structure and set its auth
553 *
554 * @clnt: RPC client whose parameters are copied
555 * @auth: security flavor for new client
556 *
557 * Returns a fresh RPC client or an ERR_PTR.
558 */
559struct rpc_clnt *
560rpc_clone_client_set_auth(struct rpc_clnt *clnt, rpc_authflavor_t flavor)
561{
562 struct rpc_create_args args = {
563 .program = clnt->cl_program,
564 .prognumber = clnt->cl_prog,
565 .version = clnt->cl_vers,
566 .authflavor = flavor,
567 .client_name = clnt->cl_principal,
568 };
569 return __rpc_clone_client(&args, clnt);
570}
571EXPORT_SYMBOL_GPL(rpc_clone_client_set_auth);
572
548/* 573/*
549 * Kill all tasks for the given client. 574 * Kill all tasks for the given client.
550 * XXX: kill their descendants as well? 575 * XXX: kill their descendants as well?
diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
index 21fde99e5c56..80f5dd23417d 100644
--- a/net/sunrpc/rpc_pipe.c
+++ b/net/sunrpc/rpc_pipe.c
@@ -1119,8 +1119,8 @@ rpc_fill_super(struct super_block *sb, void *data, int silent)
1119 return -ENOMEM; 1119 return -ENOMEM;
1120 if (rpc_populate(root, files, RPCAUTH_lockd, RPCAUTH_RootEOF, NULL)) 1120 if (rpc_populate(root, files, RPCAUTH_lockd, RPCAUTH_RootEOF, NULL))
1121 return -ENOMEM; 1121 return -ENOMEM;
1122 dprintk("RPC: sending pipefs MOUNT notification for net %p%s\n", net, 1122 dprintk("RPC: sending pipefs MOUNT notification for net %p%s\n",
1123 NET_NAME(net)); 1123 net, NET_NAME(net));
1124 sn->pipefs_sb = sb; 1124 sn->pipefs_sb = sb;
1125 err = blocking_notifier_call_chain(&rpc_pipefs_notifier_list, 1125 err = blocking_notifier_call_chain(&rpc_pipefs_notifier_list,
1126 RPC_PIPEFS_MOUNT, 1126 RPC_PIPEFS_MOUNT,
@@ -1155,8 +1155,8 @@ static void rpc_kill_sb(struct super_block *sb)
1155 sn->pipefs_sb = NULL; 1155 sn->pipefs_sb = NULL;
1156 mutex_unlock(&sn->pipefs_sb_lock); 1156 mutex_unlock(&sn->pipefs_sb_lock);
1157 put_net(net); 1157 put_net(net);
1158 dprintk("RPC: sending pipefs UMOUNT notification for net %p%s\n", net, 1158 dprintk("RPC: sending pipefs UMOUNT notification for net %p%s\n",
1159 NET_NAME(net)); 1159 net, NET_NAME(net));
1160 blocking_notifier_call_chain(&rpc_pipefs_notifier_list, 1160 blocking_notifier_call_chain(&rpc_pipefs_notifier_list,
1161 RPC_PIPEFS_UMOUNT, 1161 RPC_PIPEFS_UMOUNT,
1162 sb); 1162 sb);
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 128494ec9a64..6357fcb00c7e 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -1022,7 +1022,7 @@ static int rpciod_start(void)
1022 * Create the rpciod thread and wait for it to start. 1022 * Create the rpciod thread and wait for it to start.
1023 */ 1023 */
1024 dprintk("RPC: creating workqueue rpciod\n"); 1024 dprintk("RPC: creating workqueue rpciod\n");
1025 wq = alloc_workqueue("rpciod", WQ_MEM_RECLAIM, 0); 1025 wq = alloc_workqueue("rpciod", WQ_MEM_RECLAIM, 1);
1026 rpciod_workqueue = wq; 1026 rpciod_workqueue = wq;
1027 return rpciod_workqueue != NULL; 1027 return rpciod_workqueue != NULL;
1028} 1028}
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index 0afba1b4b656..08f50afd5f2a 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -730,19 +730,24 @@ static unsigned int xdr_align_pages(struct xdr_stream *xdr, unsigned int len)
730 730
731 if (xdr->nwords == 0) 731 if (xdr->nwords == 0)
732 return 0; 732 return 0;
733 if (nwords > xdr->nwords) {
734 nwords = xdr->nwords;
735 len = nwords << 2;
736 }
737 /* Realign pages to current pointer position */ 733 /* Realign pages to current pointer position */
738 iov = buf->head; 734 iov = buf->head;
739 if (iov->iov_len > cur) 735 if (iov->iov_len > cur) {
740 xdr_shrink_bufhead(buf, iov->iov_len - cur); 736 xdr_shrink_bufhead(buf, iov->iov_len - cur);
737 xdr->nwords = XDR_QUADLEN(buf->len - cur);
738 }
741 739
742 /* Truncate page data and move it into the tail */ 740 if (nwords > xdr->nwords) {
743 if (buf->page_len > len) 741 nwords = xdr->nwords;
742 len = nwords << 2;
743 }
744 if (buf->page_len <= len)
745 len = buf->page_len;
746 else if (nwords < xdr->nwords) {
747 /* Truncate page data and move it into the tail */
744 xdr_shrink_pagelen(buf, buf->page_len - len); 748 xdr_shrink_pagelen(buf, buf->page_len - len);
745 xdr->nwords = XDR_QUADLEN(buf->len - cur); 749 xdr->nwords = XDR_QUADLEN(buf->len - cur);
750 }
746 return len; 751 return len;
747} 752}
748 753
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 5d7f61d7559c..bd462a532acf 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -231,7 +231,7 @@ EXPORT_SYMBOL_GPL(xprt_reserve_xprt);
231static void xprt_clear_locked(struct rpc_xprt *xprt) 231static void xprt_clear_locked(struct rpc_xprt *xprt)
232{ 232{
233 xprt->snd_task = NULL; 233 xprt->snd_task = NULL;
234 if (!test_bit(XPRT_CLOSE_WAIT, &xprt->state) || xprt->shutdown) { 234 if (!test_bit(XPRT_CLOSE_WAIT, &xprt->state)) {
235 smp_mb__before_clear_bit(); 235 smp_mb__before_clear_bit();
236 clear_bit(XPRT_LOCKED, &xprt->state); 236 clear_bit(XPRT_LOCKED, &xprt->state);
237 smp_mb__after_clear_bit(); 237 smp_mb__after_clear_bit();
@@ -504,9 +504,6 @@ EXPORT_SYMBOL_GPL(xprt_wait_for_buffer_space);
504 */ 504 */
505void xprt_write_space(struct rpc_xprt *xprt) 505void xprt_write_space(struct rpc_xprt *xprt)
506{ 506{
507 if (unlikely(xprt->shutdown))
508 return;
509
510 spin_lock_bh(&xprt->transport_lock); 507 spin_lock_bh(&xprt->transport_lock);
511 if (xprt->snd_task) { 508 if (xprt->snd_task) {
512 dprintk("RPC: write space: waking waiting task on " 509 dprintk("RPC: write space: waking waiting task on "
@@ -679,7 +676,7 @@ xprt_init_autodisconnect(unsigned long data)
679 struct rpc_xprt *xprt = (struct rpc_xprt *)data; 676 struct rpc_xprt *xprt = (struct rpc_xprt *)data;
680 677
681 spin_lock(&xprt->transport_lock); 678 spin_lock(&xprt->transport_lock);
682 if (!list_empty(&xprt->recv) || xprt->shutdown) 679 if (!list_empty(&xprt->recv))
683 goto out_abort; 680 goto out_abort;
684 if (test_and_set_bit(XPRT_LOCKED, &xprt->state)) 681 if (test_and_set_bit(XPRT_LOCKED, &xprt->state))
685 goto out_abort; 682 goto out_abort;
@@ -1262,7 +1259,6 @@ out:
1262static void xprt_destroy(struct rpc_xprt *xprt) 1259static void xprt_destroy(struct rpc_xprt *xprt)
1263{ 1260{
1264 dprintk("RPC: destroying transport %p\n", xprt); 1261 dprintk("RPC: destroying transport %p\n", xprt);
1265 xprt->shutdown = 1;
1266 del_timer_sync(&xprt->timer); 1262 del_timer_sync(&xprt->timer);
1267 1263
1268 rpc_destroy_wait_queue(&xprt->binding); 1264 rpc_destroy_wait_queue(&xprt->binding);
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 5d9202dc7cb1..c9aa7a35f3bf 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -199,21 +199,15 @@ xprt_rdma_connect_worker(struct work_struct *work)
199 struct rpc_xprt *xprt = &r_xprt->xprt; 199 struct rpc_xprt *xprt = &r_xprt->xprt;
200 int rc = 0; 200 int rc = 0;
201 201
202 if (!xprt->shutdown) { 202 current->flags |= PF_FSTRANS;
203 current->flags |= PF_FSTRANS; 203 xprt_clear_connected(xprt);
204 xprt_clear_connected(xprt); 204
205 205 dprintk("RPC: %s: %sconnect\n", __func__,
206 dprintk("RPC: %s: %sconnect\n", __func__, 206 r_xprt->rx_ep.rep_connected != 0 ? "re" : "");
207 r_xprt->rx_ep.rep_connected != 0 ? "re" : ""); 207 rc = rpcrdma_ep_connect(&r_xprt->rx_ep, &r_xprt->rx_ia);
208 rc = rpcrdma_ep_connect(&r_xprt->rx_ep, &r_xprt->rx_ia); 208 if (rc)
209 if (rc) 209 xprt_wake_pending_tasks(xprt, rc);
210 goto out;
211 }
212 goto out_clear;
213 210
214out:
215 xprt_wake_pending_tasks(xprt, rc);
216out_clear:
217 dprintk("RPC: %s: exit\n", __func__); 211 dprintk("RPC: %s: exit\n", __func__);
218 xprt_clear_connecting(xprt); 212 xprt_clear_connecting(xprt);
219 current->flags &= ~PF_FSTRANS; 213 current->flags &= ~PF_FSTRANS;
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index a35b8e52e551..aaaadfbe36e9 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -917,9 +917,6 @@ static void xs_local_data_ready(struct sock *sk, int len)
917 if (skb == NULL) 917 if (skb == NULL)
918 goto out; 918 goto out;
919 919
920 if (xprt->shutdown)
921 goto dropit;
922
923 repsize = skb->len - sizeof(rpc_fraghdr); 920 repsize = skb->len - sizeof(rpc_fraghdr);
924 if (repsize < 4) { 921 if (repsize < 4) {
925 dprintk("RPC: impossible RPC reply size %d\n", repsize); 922 dprintk("RPC: impossible RPC reply size %d\n", repsize);
@@ -981,9 +978,6 @@ static void xs_udp_data_ready(struct sock *sk, int len)
981 if ((skb = skb_recv_datagram(sk, 0, 1, &err)) == NULL) 978 if ((skb = skb_recv_datagram(sk, 0, 1, &err)) == NULL)
982 goto out; 979 goto out;
983 980
984 if (xprt->shutdown)
985 goto dropit;
986
987 repsize = skb->len - sizeof(struct udphdr); 981 repsize = skb->len - sizeof(struct udphdr);
988 if (repsize < 4) { 982 if (repsize < 4) {
989 dprintk("RPC: impossible RPC reply size %d!\n", repsize); 983 dprintk("RPC: impossible RPC reply size %d!\n", repsize);
@@ -1025,6 +1019,16 @@ static void xs_udp_data_ready(struct sock *sk, int len)
1025 read_unlock_bh(&sk->sk_callback_lock); 1019 read_unlock_bh(&sk->sk_callback_lock);
1026} 1020}
1027 1021
1022/*
1023 * Helper function to force a TCP close if the server is sending
1024 * junk and/or it has put us in CLOSE_WAIT
1025 */
1026static void xs_tcp_force_close(struct rpc_xprt *xprt)
1027{
1028 set_bit(XPRT_CONNECTION_CLOSE, &xprt->state);
1029 xprt_force_disconnect(xprt);
1030}
1031
1028static inline void xs_tcp_read_fraghdr(struct rpc_xprt *xprt, struct xdr_skb_reader *desc) 1032static inline void xs_tcp_read_fraghdr(struct rpc_xprt *xprt, struct xdr_skb_reader *desc)
1029{ 1033{
1030 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); 1034 struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
@@ -1051,7 +1055,7 @@ static inline void xs_tcp_read_fraghdr(struct rpc_xprt *xprt, struct xdr_skb_rea
1051 /* Sanity check of the record length */ 1055 /* Sanity check of the record length */
1052 if (unlikely(transport->tcp_reclen < 8)) { 1056 if (unlikely(transport->tcp_reclen < 8)) {
1053 dprintk("RPC: invalid TCP record fragment length\n"); 1057 dprintk("RPC: invalid TCP record fragment length\n");
1054 xprt_force_disconnect(xprt); 1058 xs_tcp_force_close(xprt);
1055 return; 1059 return;
1056 } 1060 }
1057 dprintk("RPC: reading TCP record fragment of length %d\n", 1061 dprintk("RPC: reading TCP record fragment of length %d\n",
@@ -1132,7 +1136,7 @@ static inline void xs_tcp_read_calldir(struct sock_xprt *transport,
1132 break; 1136 break;
1133 default: 1137 default:
1134 dprintk("RPC: invalid request message type\n"); 1138 dprintk("RPC: invalid request message type\n");
1135 xprt_force_disconnect(&transport->xprt); 1139 xs_tcp_force_close(&transport->xprt);
1136 } 1140 }
1137 xs_tcp_check_fraghdr(transport); 1141 xs_tcp_check_fraghdr(transport);
1138} 1142}
@@ -1402,9 +1406,6 @@ static void xs_tcp_data_ready(struct sock *sk, int bytes)
1402 read_lock_bh(&sk->sk_callback_lock); 1406 read_lock_bh(&sk->sk_callback_lock);
1403 if (!(xprt = xprt_from_sock(sk))) 1407 if (!(xprt = xprt_from_sock(sk)))
1404 goto out; 1408 goto out;
1405 if (xprt->shutdown)
1406 goto out;
1407
1408 /* Any data means we had a useful conversation, so 1409 /* Any data means we had a useful conversation, so
1409 * the we don't need to delay the next reconnect 1410 * the we don't need to delay the next reconnect
1410 */ 1411 */
@@ -1455,6 +1456,8 @@ static void xs_tcp_cancel_linger_timeout(struct rpc_xprt *xprt)
1455static void xs_sock_mark_closed(struct rpc_xprt *xprt) 1456static void xs_sock_mark_closed(struct rpc_xprt *xprt)
1456{ 1457{
1457 smp_mb__before_clear_bit(); 1458 smp_mb__before_clear_bit();
1459 clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
1460 clear_bit(XPRT_CONNECTION_CLOSE, &xprt->state);
1458 clear_bit(XPRT_CLOSE_WAIT, &xprt->state); 1461 clear_bit(XPRT_CLOSE_WAIT, &xprt->state);
1459 clear_bit(XPRT_CLOSING, &xprt->state); 1462 clear_bit(XPRT_CLOSING, &xprt->state);
1460 smp_mb__after_clear_bit(); 1463 smp_mb__after_clear_bit();
@@ -1512,8 +1515,8 @@ static void xs_tcp_state_change(struct sock *sk)
1512 break; 1515 break;
1513 case TCP_CLOSE_WAIT: 1516 case TCP_CLOSE_WAIT:
1514 /* The server initiated a shutdown of the socket */ 1517 /* The server initiated a shutdown of the socket */
1515 xprt_force_disconnect(xprt);
1516 xprt->connect_cookie++; 1518 xprt->connect_cookie++;
1519 xs_tcp_force_close(xprt);
1517 case TCP_CLOSING: 1520 case TCP_CLOSING:
1518 /* 1521 /*
1519 * If the server closed down the connection, make sure that 1522 * If the server closed down the connection, make sure that
@@ -1889,9 +1892,6 @@ static void xs_local_setup_socket(struct work_struct *work)
1889 struct socket *sock; 1892 struct socket *sock;
1890 int status = -EIO; 1893 int status = -EIO;
1891 1894
1892 if (xprt->shutdown)
1893 goto out;
1894
1895 current->flags |= PF_FSTRANS; 1895 current->flags |= PF_FSTRANS;
1896 1896
1897 clear_bit(XPRT_CONNECTION_ABORT, &xprt->state); 1897 clear_bit(XPRT_CONNECTION_ABORT, &xprt->state);
@@ -2008,9 +2008,6 @@ static void xs_udp_setup_socket(struct work_struct *work)
2008 struct socket *sock = transport->sock; 2008 struct socket *sock = transport->sock;
2009 int status = -EIO; 2009 int status = -EIO;
2010 2010
2011 if (xprt->shutdown)
2012 goto out;
2013
2014 current->flags |= PF_FSTRANS; 2011 current->flags |= PF_FSTRANS;
2015 2012
2016 /* Start by resetting any existing state */ 2013 /* Start by resetting any existing state */
@@ -2156,9 +2153,6 @@ static void xs_tcp_setup_socket(struct work_struct *work)
2156 struct rpc_xprt *xprt = &transport->xprt; 2153 struct rpc_xprt *xprt = &transport->xprt;
2157 int status = -EIO; 2154 int status = -EIO;
2158 2155
2159 if (xprt->shutdown)
2160 goto out;
2161
2162 current->flags |= PF_FSTRANS; 2156 current->flags |= PF_FSTRANS;
2163 2157
2164 if (!sock) { 2158 if (!sock) {
@@ -2199,8 +2193,7 @@ static void xs_tcp_setup_socket(struct work_struct *work)
2199 /* We're probably in TIME_WAIT. Get rid of existing socket, 2193 /* We're probably in TIME_WAIT. Get rid of existing socket,
2200 * and retry 2194 * and retry
2201 */ 2195 */
2202 set_bit(XPRT_CONNECTION_CLOSE, &xprt->state); 2196 xs_tcp_force_close(xprt);
2203 xprt_force_disconnect(xprt);
2204 break; 2197 break;
2205 case -ECONNREFUSED: 2198 case -ECONNREFUSED:
2206 case -ECONNRESET: 2199 case -ECONNRESET:
@@ -2528,6 +2521,7 @@ static struct rpc_xprt_ops xs_tcp_ops = {
2528static struct rpc_xprt_ops bc_tcp_ops = { 2521static struct rpc_xprt_ops bc_tcp_ops = {
2529 .reserve_xprt = xprt_reserve_xprt, 2522 .reserve_xprt = xprt_reserve_xprt,
2530 .release_xprt = xprt_release_xprt, 2523 .release_xprt = xprt_release_xprt,
2524 .alloc_slot = xprt_alloc_slot,
2531 .rpcbind = xs_local_rpcbind, 2525 .rpcbind = xs_local_rpcbind,
2532 .buf_alloc = bc_malloc, 2526 .buf_alloc = bc_malloc,
2533 .buf_free = bc_free, 2527 .buf_free = bc_free,