summaryrefslogtreecommitdiffstats
path: root/fs/nfsd
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@lst.de>2014-05-05 07:11:59 -0400
committerChristoph Hellwig <hch@lst.de>2015-02-02 12:09:42 -0500
commit9cf514ccfacb301f3b1b4509a8ce25dffad55880 (patch)
tree2d78e2ad865bfc77c910b100c1fadab8f5004def /fs/nfsd
parent4d227fca1b32f95f1246894ebef879efccb2ec15 (diff)
nfsd: implement pNFS operations
Add support for the GETDEVICEINFO, LAYOUTGET, LAYOUTCOMMIT and LAYOUTRETURN NFSv4.1 operations, as well as backing code to manage outstanding layouts and devices. Layout management is very straight forward, with a nfs4_layout_stateid structure that extends nfs4_stid to manage layout stateids as the top-level structure. It is linked into the nfs4_file and nfs4_client structures like the other stateids, and contains a linked list of layouts that hang of the stateid. The actual layout operations are implemented in layout drivers that are not part of this commit, but will be added later. The worst part of this commit is the management of the pNFS device IDs, which suffers from a specification that is not sanely implementable due to the fact that the device-IDs are global and not bound to an export, and have a small enough size so that we can't store the fsid portion of a file handle, and must never be reused. As we still do need perform all export authentication and validation checks on a device ID passed to GETDEVICEINFO we are caught between a rock and a hard place. To work around this issue we add a new hash that maps from a 64-bit integer to a fsid so that we can look up the export to authenticate against it, a 32-bit integer as a generation that we can bump when changing the device, and a currently unused 32-bit integer that could be used in the future to handle more than a single device per export. Entries in this hash table are never deleted as we can't reuse the ids anyway, and would have a severe lifetime problem anyway as Linux export structures are temporary structures that can go away under load. Parts of the XDR data, structures and marshaling/unmarshaling code, as well as many concepts are derived from the old pNFS server implementation from Andy Adamson, Benny Halevy, Dean Hildebrand, Marc Eshel, Fred Isaman, Mike Sager, Ricardo Labiaga and many others. Signed-off-by: Christoph Hellwig <hch@lst.de>
Diffstat (limited to 'fs/nfsd')
-rw-r--r--fs/nfsd/Kconfig10
-rw-r--r--fs/nfsd/Makefile1
-rw-r--r--fs/nfsd/export.c8
-rw-r--r--fs/nfsd/export.h2
-rw-r--r--fs/nfsd/nfs4layouts.c487
-rw-r--r--fs/nfsd/nfs4proc.c302
-rw-r--r--fs/nfsd/nfs4state.c16
-rw-r--r--fs/nfsd/nfs4xdr.c312
-rw-r--r--fs/nfsd/nfsctl.c9
-rw-r--r--fs/nfsd/nfsd.h16
-rw-r--r--fs/nfsd/pnfs.h80
-rw-r--r--fs/nfsd/state.h21
-rw-r--r--fs/nfsd/xdr4.h59
13 files changed, 1319 insertions, 4 deletions
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig
index 73395156bdb4..683bf718aead 100644
--- a/fs/nfsd/Kconfig
+++ b/fs/nfsd/Kconfig
@@ -82,6 +82,16 @@ config NFSD_V4
82 82
83 If unsure, say N. 83 If unsure, say N.
84 84
85config NFSD_PNFS
86 bool "NFSv4.1 server support for Parallel NFS (pNFS)"
87 depends on NFSD_V4
88 help
89 This option enables support for the parallel NFS features of the
90 minor version 1 of the NFSv4 protocol (RFC5661) in the kernel's NFS
91 server.
92
93 If unsure, say N.
94
85config NFSD_V4_SECURITY_LABEL 95config NFSD_V4_SECURITY_LABEL
86 bool "Provide Security Label support for NFSv4 server" 96 bool "Provide Security Label support for NFSv4 server"
87 depends on NFSD_V4 && SECURITY 97 depends on NFSD_V4 && SECURITY
diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile
index af32ef06b4fe..5806270a8567 100644
--- a/fs/nfsd/Makefile
+++ b/fs/nfsd/Makefile
@@ -12,3 +12,4 @@ nfsd-$(CONFIG_NFSD_V3) += nfs3proc.o nfs3xdr.o
12nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o 12nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o
13nfsd-$(CONFIG_NFSD_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \ 13nfsd-$(CONFIG_NFSD_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \
14 nfs4acl.o nfs4callback.o nfs4recover.o 14 nfs4acl.o nfs4callback.o nfs4recover.o
15nfsd-$(CONFIG_NFSD_PNFS) += nfs4layouts.o
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 30a739d896ff..c3e3b6e55ae2 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -20,6 +20,7 @@
20#include "nfsd.h" 20#include "nfsd.h"
21#include "nfsfh.h" 21#include "nfsfh.h"
22#include "netns.h" 22#include "netns.h"
23#include "pnfs.h"
23 24
24#define NFSDDBG_FACILITY NFSDDBG_EXPORT 25#define NFSDDBG_FACILITY NFSDDBG_EXPORT
25 26
@@ -545,6 +546,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
545 546
546 exp.ex_client = dom; 547 exp.ex_client = dom;
547 exp.cd = cd; 548 exp.cd = cd;
549 exp.ex_devid_map = NULL;
548 550
549 /* expiry */ 551 /* expiry */
550 err = -EINVAL; 552 err = -EINVAL;
@@ -621,6 +623,8 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
621 if (!gid_valid(exp.ex_anon_gid)) 623 if (!gid_valid(exp.ex_anon_gid))
622 goto out4; 624 goto out4;
623 err = 0; 625 err = 0;
626
627 nfsd4_setup_layout_type(&exp);
624 } 628 }
625 629
626 expp = svc_export_lookup(&exp); 630 expp = svc_export_lookup(&exp);
@@ -703,6 +707,7 @@ static void svc_export_init(struct cache_head *cnew, struct cache_head *citem)
703 new->ex_fslocs.locations = NULL; 707 new->ex_fslocs.locations = NULL;
704 new->ex_fslocs.locations_count = 0; 708 new->ex_fslocs.locations_count = 0;
705 new->ex_fslocs.migrated = 0; 709 new->ex_fslocs.migrated = 0;
710 new->ex_layout_type = 0;
706 new->ex_uuid = NULL; 711 new->ex_uuid = NULL;
707 new->cd = item->cd; 712 new->cd = item->cd;
708} 713}
@@ -717,6 +722,8 @@ static void export_update(struct cache_head *cnew, struct cache_head *citem)
717 new->ex_anon_uid = item->ex_anon_uid; 722 new->ex_anon_uid = item->ex_anon_uid;
718 new->ex_anon_gid = item->ex_anon_gid; 723 new->ex_anon_gid = item->ex_anon_gid;
719 new->ex_fsid = item->ex_fsid; 724 new->ex_fsid = item->ex_fsid;
725 new->ex_devid_map = item->ex_devid_map;
726 item->ex_devid_map = NULL;
720 new->ex_uuid = item->ex_uuid; 727 new->ex_uuid = item->ex_uuid;
721 item->ex_uuid = NULL; 728 item->ex_uuid = NULL;
722 new->ex_fslocs.locations = item->ex_fslocs.locations; 729 new->ex_fslocs.locations = item->ex_fslocs.locations;
@@ -725,6 +732,7 @@ static void export_update(struct cache_head *cnew, struct cache_head *citem)
725 item->ex_fslocs.locations_count = 0; 732 item->ex_fslocs.locations_count = 0;
726 new->ex_fslocs.migrated = item->ex_fslocs.migrated; 733 new->ex_fslocs.migrated = item->ex_fslocs.migrated;
727 item->ex_fslocs.migrated = 0; 734 item->ex_fslocs.migrated = 0;
735 new->ex_layout_type = item->ex_layout_type;
728 new->ex_nflavors = item->ex_nflavors; 736 new->ex_nflavors = item->ex_nflavors;
729 for (i = 0; i < MAX_SECINFO_LIST; i++) { 737 for (i = 0; i < MAX_SECINFO_LIST; i++) {
730 new->ex_flavors[i] = item->ex_flavors[i]; 738 new->ex_flavors[i] = item->ex_flavors[i];
diff --git a/fs/nfsd/export.h b/fs/nfsd/export.h
index 04dc8c167b0c..1f52bfcc436f 100644
--- a/fs/nfsd/export.h
+++ b/fs/nfsd/export.h
@@ -56,6 +56,8 @@ struct svc_export {
56 struct nfsd4_fs_locations ex_fslocs; 56 struct nfsd4_fs_locations ex_fslocs;
57 uint32_t ex_nflavors; 57 uint32_t ex_nflavors;
58 struct exp_flavor_info ex_flavors[MAX_SECINFO_LIST]; 58 struct exp_flavor_info ex_flavors[MAX_SECINFO_LIST];
59 enum pnfs_layouttype ex_layout_type;
60 struct nfsd4_deviceid_map *ex_devid_map;
59 struct cache_detail *cd; 61 struct cache_detail *cd;
60}; 62};
61 63
diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
new file mode 100644
index 000000000000..8273270418b1
--- /dev/null
+++ b/fs/nfsd/nfs4layouts.c
@@ -0,0 +1,487 @@
1/*
2 * Copyright (c) 2014 Christoph Hellwig.
3 */
4#include <linux/jhash.h>
5#include <linux/sched.h>
6
7#include "pnfs.h"
8#include "netns.h"
9
10#define NFSDDBG_FACILITY NFSDDBG_PNFS
11
12struct nfs4_layout {
13 struct list_head lo_perstate;
14 struct nfs4_layout_stateid *lo_state;
15 struct nfsd4_layout_seg lo_seg;
16};
17
18static struct kmem_cache *nfs4_layout_cache;
19static struct kmem_cache *nfs4_layout_stateid_cache;
20
21const struct nfsd4_layout_ops *nfsd4_layout_ops[LAYOUT_TYPE_MAX] = {
22};
23
24/* pNFS device ID to export fsid mapping */
25#define DEVID_HASH_BITS 8
26#define DEVID_HASH_SIZE (1 << DEVID_HASH_BITS)
27#define DEVID_HASH_MASK (DEVID_HASH_SIZE - 1)
28static u64 nfsd_devid_seq = 1;
29static struct list_head nfsd_devid_hash[DEVID_HASH_SIZE];
30static DEFINE_SPINLOCK(nfsd_devid_lock);
31
32static inline u32 devid_hashfn(u64 idx)
33{
34 return jhash_2words(idx, idx >> 32, 0) & DEVID_HASH_MASK;
35}
36
37static void
38nfsd4_alloc_devid_map(const struct svc_fh *fhp)
39{
40 const struct knfsd_fh *fh = &fhp->fh_handle;
41 size_t fsid_len = key_len(fh->fh_fsid_type);
42 struct nfsd4_deviceid_map *map, *old;
43 int i;
44
45 map = kzalloc(sizeof(*map) + fsid_len, GFP_KERNEL);
46 if (!map)
47 return;
48
49 map->fsid_type = fh->fh_fsid_type;
50 memcpy(&map->fsid, fh->fh_fsid, fsid_len);
51
52 spin_lock(&nfsd_devid_lock);
53 if (fhp->fh_export->ex_devid_map)
54 goto out_unlock;
55
56 for (i = 0; i < DEVID_HASH_SIZE; i++) {
57 list_for_each_entry(old, &nfsd_devid_hash[i], hash) {
58 if (old->fsid_type != fh->fh_fsid_type)
59 continue;
60 if (memcmp(old->fsid, fh->fh_fsid,
61 key_len(old->fsid_type)))
62 continue;
63
64 fhp->fh_export->ex_devid_map = old;
65 goto out_unlock;
66 }
67 }
68
69 map->idx = nfsd_devid_seq++;
70 list_add_tail_rcu(&map->hash, &nfsd_devid_hash[devid_hashfn(map->idx)]);
71 fhp->fh_export->ex_devid_map = map;
72 map = NULL;
73
74out_unlock:
75 spin_unlock(&nfsd_devid_lock);
76 kfree(map);
77}
78
79struct nfsd4_deviceid_map *
80nfsd4_find_devid_map(int idx)
81{
82 struct nfsd4_deviceid_map *map, *ret = NULL;
83
84 rcu_read_lock();
85 list_for_each_entry_rcu(map, &nfsd_devid_hash[devid_hashfn(idx)], hash)
86 if (map->idx == idx)
87 ret = map;
88 rcu_read_unlock();
89
90 return ret;
91}
92
93int
94nfsd4_set_deviceid(struct nfsd4_deviceid *id, const struct svc_fh *fhp,
95 u32 device_generation)
96{
97 if (!fhp->fh_export->ex_devid_map) {
98 nfsd4_alloc_devid_map(fhp);
99 if (!fhp->fh_export->ex_devid_map)
100 return -ENOMEM;
101 }
102
103 id->fsid_idx = fhp->fh_export->ex_devid_map->idx;
104 id->generation = device_generation;
105 id->pad = 0;
106 return 0;
107}
108
109void nfsd4_setup_layout_type(struct svc_export *exp)
110{
111 if (exp->ex_flags & NFSEXP_NOPNFS)
112 return;
113}
114
115static void
116nfsd4_free_layout_stateid(struct nfs4_stid *stid)
117{
118 struct nfs4_layout_stateid *ls = layoutstateid(stid);
119 struct nfs4_client *clp = ls->ls_stid.sc_client;
120 struct nfs4_file *fp = ls->ls_stid.sc_file;
121
122 spin_lock(&clp->cl_lock);
123 list_del_init(&ls->ls_perclnt);
124 spin_unlock(&clp->cl_lock);
125
126 spin_lock(&fp->fi_lock);
127 list_del_init(&ls->ls_perfile);
128 spin_unlock(&fp->fi_lock);
129
130 kmem_cache_free(nfs4_layout_stateid_cache, ls);
131}
132
133static struct nfs4_layout_stateid *
134nfsd4_alloc_layout_stateid(struct nfsd4_compound_state *cstate,
135 struct nfs4_stid *parent, u32 layout_type)
136{
137 struct nfs4_client *clp = cstate->clp;
138 struct nfs4_file *fp = parent->sc_file;
139 struct nfs4_layout_stateid *ls;
140 struct nfs4_stid *stp;
141
142 stp = nfs4_alloc_stid(cstate->clp, nfs4_layout_stateid_cache);
143 if (!stp)
144 return NULL;
145 stp->sc_free = nfsd4_free_layout_stateid;
146 get_nfs4_file(fp);
147 stp->sc_file = fp;
148
149 ls = layoutstateid(stp);
150 INIT_LIST_HEAD(&ls->ls_perclnt);
151 INIT_LIST_HEAD(&ls->ls_perfile);
152 spin_lock_init(&ls->ls_lock);
153 INIT_LIST_HEAD(&ls->ls_layouts);
154 ls->ls_layout_type = layout_type;
155
156 spin_lock(&clp->cl_lock);
157 stp->sc_type = NFS4_LAYOUT_STID;
158 list_add(&ls->ls_perclnt, &clp->cl_lo_states);
159 spin_unlock(&clp->cl_lock);
160
161 spin_lock(&fp->fi_lock);
162 list_add(&ls->ls_perfile, &fp->fi_lo_states);
163 spin_unlock(&fp->fi_lock);
164
165 return ls;
166}
167
168__be32
169nfsd4_preprocess_layout_stateid(struct svc_rqst *rqstp,
170 struct nfsd4_compound_state *cstate, stateid_t *stateid,
171 bool create, u32 layout_type, struct nfs4_layout_stateid **lsp)
172{
173 struct nfs4_layout_stateid *ls;
174 struct nfs4_stid *stid;
175 unsigned char typemask = NFS4_LAYOUT_STID;
176 __be32 status;
177
178 if (create)
179 typemask |= (NFS4_OPEN_STID | NFS4_LOCK_STID | NFS4_DELEG_STID);
180
181 status = nfsd4_lookup_stateid(cstate, stateid, typemask, &stid,
182 net_generic(SVC_NET(rqstp), nfsd_net_id));
183 if (status)
184 goto out;
185
186 if (!fh_match(&cstate->current_fh.fh_handle,
187 &stid->sc_file->fi_fhandle)) {
188 status = nfserr_bad_stateid;
189 goto out_put_stid;
190 }
191
192 if (stid->sc_type != NFS4_LAYOUT_STID) {
193 ls = nfsd4_alloc_layout_stateid(cstate, stid, layout_type);
194 nfs4_put_stid(stid);
195
196 status = nfserr_jukebox;
197 if (!ls)
198 goto out;
199 } else {
200 ls = container_of(stid, struct nfs4_layout_stateid, ls_stid);
201
202 status = nfserr_bad_stateid;
203 if (stateid->si_generation > stid->sc_stateid.si_generation)
204 goto out_put_stid;
205 if (layout_type != ls->ls_layout_type)
206 goto out_put_stid;
207 }
208
209 *lsp = ls;
210 return 0;
211
212out_put_stid:
213 nfs4_put_stid(stid);
214out:
215 return status;
216}
217
218static inline u64
219layout_end(struct nfsd4_layout_seg *seg)
220{
221 u64 end = seg->offset + seg->length;
222 return end >= seg->offset ? end : NFS4_MAX_UINT64;
223}
224
225static void
226layout_update_len(struct nfsd4_layout_seg *lo, u64 end)
227{
228 if (end == NFS4_MAX_UINT64)
229 lo->length = NFS4_MAX_UINT64;
230 else
231 lo->length = end - lo->offset;
232}
233
234static bool
235layouts_overlapping(struct nfs4_layout *lo, struct nfsd4_layout_seg *s)
236{
237 if (s->iomode != IOMODE_ANY && s->iomode != lo->lo_seg.iomode)
238 return false;
239 if (layout_end(&lo->lo_seg) <= s->offset)
240 return false;
241 if (layout_end(s) <= lo->lo_seg.offset)
242 return false;
243 return true;
244}
245
246static bool
247layouts_try_merge(struct nfsd4_layout_seg *lo, struct nfsd4_layout_seg *new)
248{
249 if (lo->iomode != new->iomode)
250 return false;
251 if (layout_end(new) < lo->offset)
252 return false;
253 if (layout_end(lo) < new->offset)
254 return false;
255
256 lo->offset = min(lo->offset, new->offset);
257 layout_update_len(lo, max(layout_end(lo), layout_end(new)));
258 return true;
259}
260
261__be32
262nfsd4_insert_layout(struct nfsd4_layoutget *lgp, struct nfs4_layout_stateid *ls)
263{
264 struct nfsd4_layout_seg *seg = &lgp->lg_seg;
265 struct nfs4_layout *lp, *new = NULL;
266
267 spin_lock(&ls->ls_lock);
268 list_for_each_entry(lp, &ls->ls_layouts, lo_perstate) {
269 if (layouts_try_merge(&lp->lo_seg, seg))
270 goto done;
271 }
272 spin_unlock(&ls->ls_lock);
273
274 new = kmem_cache_alloc(nfs4_layout_cache, GFP_KERNEL);
275 if (!new)
276 return nfserr_jukebox;
277 memcpy(&new->lo_seg, seg, sizeof(lp->lo_seg));
278 new->lo_state = ls;
279
280 spin_lock(&ls->ls_lock);
281 list_for_each_entry(lp, &ls->ls_layouts, lo_perstate) {
282 if (layouts_try_merge(&lp->lo_seg, seg))
283 goto done;
284 }
285
286 atomic_inc(&ls->ls_stid.sc_count);
287 list_add_tail(&new->lo_perstate, &ls->ls_layouts);
288 new = NULL;
289done:
290 update_stateid(&ls->ls_stid.sc_stateid);
291 memcpy(&lgp->lg_sid, &ls->ls_stid.sc_stateid, sizeof(stateid_t));
292 spin_unlock(&ls->ls_lock);
293 if (new)
294 kmem_cache_free(nfs4_layout_cache, new);
295 return nfs_ok;
296}
297
298static void
299nfsd4_free_layouts(struct list_head *reaplist)
300{
301 while (!list_empty(reaplist)) {
302 struct nfs4_layout *lp = list_first_entry(reaplist,
303 struct nfs4_layout, lo_perstate);
304
305 list_del(&lp->lo_perstate);
306 nfs4_put_stid(&lp->lo_state->ls_stid);
307 kmem_cache_free(nfs4_layout_cache, lp);
308 }
309}
310
311static void
312nfsd4_return_file_layout(struct nfs4_layout *lp, struct nfsd4_layout_seg *seg,
313 struct list_head *reaplist)
314{
315 struct nfsd4_layout_seg *lo = &lp->lo_seg;
316 u64 end = layout_end(lo);
317
318 if (seg->offset <= lo->offset) {
319 if (layout_end(seg) >= end) {
320 list_move_tail(&lp->lo_perstate, reaplist);
321 return;
322 }
323 end = seg->offset;
324 } else {
325 /* retain the whole layout segment on a split. */
326 if (layout_end(seg) < end) {
327 dprintk("%s: split not supported\n", __func__);
328 return;
329 }
330
331 lo->offset = layout_end(seg);
332 }
333
334 layout_update_len(lo, end);
335}
336
337__be32
338nfsd4_return_file_layouts(struct svc_rqst *rqstp,
339 struct nfsd4_compound_state *cstate,
340 struct nfsd4_layoutreturn *lrp)
341{
342 struct nfs4_layout_stateid *ls;
343 struct nfs4_layout *lp, *n;
344 LIST_HEAD(reaplist);
345 __be32 nfserr;
346 int found = 0;
347
348 nfserr = nfsd4_preprocess_layout_stateid(rqstp, cstate, &lrp->lr_sid,
349 false, lrp->lr_layout_type,
350 &ls);
351 if (nfserr)
352 return nfserr;
353
354 spin_lock(&ls->ls_lock);
355 list_for_each_entry_safe(lp, n, &ls->ls_layouts, lo_perstate) {
356 if (layouts_overlapping(lp, &lrp->lr_seg)) {
357 nfsd4_return_file_layout(lp, &lrp->lr_seg, &reaplist);
358 found++;
359 }
360 }
361 if (!list_empty(&ls->ls_layouts)) {
362 if (found) {
363 update_stateid(&ls->ls_stid.sc_stateid);
364 memcpy(&lrp->lr_sid, &ls->ls_stid.sc_stateid,
365 sizeof(stateid_t));
366 }
367 lrp->lrs_present = 1;
368 } else {
369 nfs4_unhash_stid(&ls->ls_stid);
370 lrp->lrs_present = 0;
371 }
372 spin_unlock(&ls->ls_lock);
373
374 nfs4_put_stid(&ls->ls_stid);
375 nfsd4_free_layouts(&reaplist);
376 return nfs_ok;
377}
378
379__be32
380nfsd4_return_client_layouts(struct svc_rqst *rqstp,
381 struct nfsd4_compound_state *cstate,
382 struct nfsd4_layoutreturn *lrp)
383{
384 struct nfs4_layout_stateid *ls, *n;
385 struct nfs4_client *clp = cstate->clp;
386 struct nfs4_layout *lp, *t;
387 LIST_HEAD(reaplist);
388
389 lrp->lrs_present = 0;
390
391 spin_lock(&clp->cl_lock);
392 list_for_each_entry_safe(ls, n, &clp->cl_lo_states, ls_perclnt) {
393 if (lrp->lr_return_type == RETURN_FSID &&
394 !fh_fsid_match(&ls->ls_stid.sc_file->fi_fhandle,
395 &cstate->current_fh.fh_handle))
396 continue;
397
398 spin_lock(&ls->ls_lock);
399 list_for_each_entry_safe(lp, t, &ls->ls_layouts, lo_perstate) {
400 if (lrp->lr_seg.iomode == IOMODE_ANY ||
401 lrp->lr_seg.iomode == lp->lo_seg.iomode)
402 list_move_tail(&lp->lo_perstate, &reaplist);
403 }
404 spin_unlock(&ls->ls_lock);
405 }
406 spin_unlock(&clp->cl_lock);
407
408 nfsd4_free_layouts(&reaplist);
409 return 0;
410}
411
412static void
413nfsd4_return_all_layouts(struct nfs4_layout_stateid *ls,
414 struct list_head *reaplist)
415{
416 spin_lock(&ls->ls_lock);
417 list_splice_init(&ls->ls_layouts, reaplist);
418 spin_unlock(&ls->ls_lock);
419}
420
421void
422nfsd4_return_all_client_layouts(struct nfs4_client *clp)
423{
424 struct nfs4_layout_stateid *ls, *n;
425 LIST_HEAD(reaplist);
426
427 spin_lock(&clp->cl_lock);
428 list_for_each_entry_safe(ls, n, &clp->cl_lo_states, ls_perclnt)
429 nfsd4_return_all_layouts(ls, &reaplist);
430 spin_unlock(&clp->cl_lock);
431
432 nfsd4_free_layouts(&reaplist);
433}
434
435void
436nfsd4_return_all_file_layouts(struct nfs4_client *clp, struct nfs4_file *fp)
437{
438 struct nfs4_layout_stateid *ls, *n;
439 LIST_HEAD(reaplist);
440
441 spin_lock(&fp->fi_lock);
442 list_for_each_entry_safe(ls, n, &fp->fi_lo_states, ls_perfile) {
443 if (ls->ls_stid.sc_client == clp)
444 nfsd4_return_all_layouts(ls, &reaplist);
445 }
446 spin_unlock(&fp->fi_lock);
447
448 nfsd4_free_layouts(&reaplist);
449}
450
451int
452nfsd4_init_pnfs(void)
453{
454 int i;
455
456 for (i = 0; i < DEVID_HASH_SIZE; i++)
457 INIT_LIST_HEAD(&nfsd_devid_hash[i]);
458
459 nfs4_layout_cache = kmem_cache_create("nfs4_layout",
460 sizeof(struct nfs4_layout), 0, 0, NULL);
461 if (!nfs4_layout_cache)
462 return -ENOMEM;
463
464 nfs4_layout_stateid_cache = kmem_cache_create("nfs4_layout_stateid",
465 sizeof(struct nfs4_layout_stateid), 0, 0, NULL);
466 if (!nfs4_layout_stateid_cache) {
467 kmem_cache_destroy(nfs4_layout_cache);
468 return -ENOMEM;
469 }
470 return 0;
471}
472
473void
474nfsd4_exit_pnfs(void)
475{
476 int i;
477
478 kmem_cache_destroy(nfs4_layout_cache);
479 kmem_cache_destroy(nfs4_layout_stateid_cache);
480
481 for (i = 0; i < DEVID_HASH_SIZE; i++) {
482 struct nfsd4_deviceid_map *map, *n;
483
484 list_for_each_entry_safe(map, n, &nfsd_devid_hash[i], hash)
485 kfree(map);
486 }
487}
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index ac71d13c69ef..2b91443497cc 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -43,6 +43,7 @@
43#include "current_stateid.h" 43#include "current_stateid.h"
44#include "netns.h" 44#include "netns.h"
45#include "acl.h" 45#include "acl.h"
46#include "pnfs.h"
46 47
47#ifdef CONFIG_NFSD_V4_SECURITY_LABEL 48#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
48#include <linux/security.h> 49#include <linux/security.h>
@@ -1178,6 +1179,252 @@ nfsd4_verify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
1178 return status == nfserr_same ? nfs_ok : status; 1179 return status == nfserr_same ? nfs_ok : status;
1179} 1180}
1180 1181
1182#ifdef CONFIG_NFSD_PNFS
1183static const struct nfsd4_layout_ops *
1184nfsd4_layout_verify(struct svc_export *exp, unsigned int layout_type)
1185{
1186 if (!exp->ex_layout_type) {
1187 dprintk("%s: export does not support pNFS\n", __func__);
1188 return NULL;
1189 }
1190
1191 if (exp->ex_layout_type != layout_type) {
1192 dprintk("%s: layout type %d not supported\n",
1193 __func__, layout_type);
1194 return NULL;
1195 }
1196
1197 return nfsd4_layout_ops[layout_type];
1198}
1199
1200static __be32
1201nfsd4_getdeviceinfo(struct svc_rqst *rqstp,
1202 struct nfsd4_compound_state *cstate,
1203 struct nfsd4_getdeviceinfo *gdp)
1204{
1205 const struct nfsd4_layout_ops *ops;
1206 struct nfsd4_deviceid_map *map;
1207 struct svc_export *exp;
1208 __be32 nfserr;
1209
1210 dprintk("%s: layout_type %u dev_id [0x%llx:0x%x] maxcnt %u\n",
1211 __func__,
1212 gdp->gd_layout_type,
1213 gdp->gd_devid.fsid_idx, gdp->gd_devid.generation,
1214 gdp->gd_maxcount);
1215
1216 map = nfsd4_find_devid_map(gdp->gd_devid.fsid_idx);
1217 if (!map) {
1218 dprintk("%s: couldn't find device ID to export mapping!\n",
1219 __func__);
1220 return nfserr_noent;
1221 }
1222
1223 exp = rqst_exp_find(rqstp, map->fsid_type, map->fsid);
1224 if (IS_ERR(exp)) {
1225 dprintk("%s: could not find device id\n", __func__);
1226 return nfserr_noent;
1227 }
1228
1229 nfserr = nfserr_layoutunavailable;
1230 ops = nfsd4_layout_verify(exp, gdp->gd_layout_type);
1231 if (!ops)
1232 goto out;
1233
1234 nfserr = nfs_ok;
1235 if (gdp->gd_maxcount != 0)
1236 nfserr = ops->proc_getdeviceinfo(exp->ex_path.mnt->mnt_sb, gdp);
1237
1238 gdp->gd_notify_types &= ops->notify_types;
1239 exp_put(exp);
1240out:
1241 return nfserr;
1242}
1243
1244static __be32
1245nfsd4_layoutget(struct svc_rqst *rqstp,
1246 struct nfsd4_compound_state *cstate,
1247 struct nfsd4_layoutget *lgp)
1248{
1249 struct svc_fh *current_fh = &cstate->current_fh;
1250 const struct nfsd4_layout_ops *ops;
1251 struct nfs4_layout_stateid *ls;
1252 __be32 nfserr;
1253 int accmode;
1254
1255 switch (lgp->lg_seg.iomode) {
1256 case IOMODE_READ:
1257 accmode = NFSD_MAY_READ;
1258 break;
1259 case IOMODE_RW:
1260 accmode = NFSD_MAY_READ | NFSD_MAY_WRITE;
1261 break;
1262 default:
1263 dprintk("%s: invalid iomode %d\n",
1264 __func__, lgp->lg_seg.iomode);
1265 nfserr = nfserr_badiomode;
1266 goto out;
1267 }
1268
1269 nfserr = fh_verify(rqstp, current_fh, 0, accmode);
1270 if (nfserr)
1271 goto out;
1272
1273 nfserr = nfserr_layoutunavailable;
1274 ops = nfsd4_layout_verify(current_fh->fh_export, lgp->lg_layout_type);
1275 if (!ops)
1276 goto out;
1277
1278 /*
1279 * Verify minlength and range as per RFC5661:
1280 * o If loga_length is less than loga_minlength,
1281 * the metadata server MUST return NFS4ERR_INVAL.
1282 * o If the sum of loga_offset and loga_minlength exceeds
1283 * NFS4_UINT64_MAX, and loga_minlength is not
1284 * NFS4_UINT64_MAX, the error NFS4ERR_INVAL MUST result.
1285 * o If the sum of loga_offset and loga_length exceeds
1286 * NFS4_UINT64_MAX, and loga_length is not NFS4_UINT64_MAX,
1287 * the error NFS4ERR_INVAL MUST result.
1288 */
1289 nfserr = nfserr_inval;
1290 if (lgp->lg_seg.length < lgp->lg_minlength ||
1291 (lgp->lg_minlength != NFS4_MAX_UINT64 &&
1292 lgp->lg_minlength > NFS4_MAX_UINT64 - lgp->lg_seg.offset) ||
1293 (lgp->lg_seg.length != NFS4_MAX_UINT64 &&
1294 lgp->lg_seg.length > NFS4_MAX_UINT64 - lgp->lg_seg.offset))
1295 goto out;
1296 if (lgp->lg_seg.length == 0)
1297 goto out;
1298
1299 nfserr = nfsd4_preprocess_layout_stateid(rqstp, cstate, &lgp->lg_sid,
1300 true, lgp->lg_layout_type, &ls);
1301 if (nfserr)
1302 goto out;
1303
1304 nfserr = ops->proc_layoutget(current_fh->fh_dentry->d_inode,
1305 current_fh, lgp);
1306 if (nfserr)
1307 goto out_put_stid;
1308
1309 nfserr = nfsd4_insert_layout(lgp, ls);
1310
1311out_put_stid:
1312 nfs4_put_stid(&ls->ls_stid);
1313out:
1314 return nfserr;
1315}
1316
1317static __be32
1318nfsd4_layoutcommit(struct svc_rqst *rqstp,
1319 struct nfsd4_compound_state *cstate,
1320 struct nfsd4_layoutcommit *lcp)
1321{
1322 const struct nfsd4_layout_seg *seg = &lcp->lc_seg;
1323 struct svc_fh *current_fh = &cstate->current_fh;
1324 const struct nfsd4_layout_ops *ops;
1325 loff_t new_size = lcp->lc_last_wr + 1;
1326 struct inode *inode;
1327 struct nfs4_layout_stateid *ls;
1328 __be32 nfserr;
1329
1330 nfserr = fh_verify(rqstp, current_fh, 0, NFSD_MAY_WRITE);
1331 if (nfserr)
1332 goto out;
1333
1334 nfserr = nfserr_layoutunavailable;
1335 ops = nfsd4_layout_verify(current_fh->fh_export, lcp->lc_layout_type);
1336 if (!ops)
1337 goto out;
1338 inode = current_fh->fh_dentry->d_inode;
1339
1340 nfserr = nfserr_inval;
1341 if (new_size <= seg->offset) {
1342 dprintk("pnfsd: last write before layout segment\n");
1343 goto out;
1344 }
1345 if (new_size > seg->offset + seg->length) {
1346 dprintk("pnfsd: last write beyond layout segment\n");
1347 goto out;
1348 }
1349 if (!lcp->lc_newoffset && new_size > i_size_read(inode)) {
1350 dprintk("pnfsd: layoutcommit beyond EOF\n");
1351 goto out;
1352 }
1353
1354 nfserr = nfsd4_preprocess_layout_stateid(rqstp, cstate, &lcp->lc_sid,
1355 false, lcp->lc_layout_type,
1356 &ls);
1357 if (nfserr) {
1358 /* fixup error code as per RFC5661 */
1359 if (nfserr == nfserr_bad_stateid)
1360 nfserr = nfserr_badlayout;
1361 goto out;
1362 }
1363
1364 nfserr = ops->proc_layoutcommit(inode, lcp);
1365 if (nfserr)
1366 goto out_put_stid;
1367
1368 if (new_size > i_size_read(inode)) {
1369 lcp->lc_size_chg = 1;
1370 lcp->lc_newsize = new_size;
1371 } else {
1372 lcp->lc_size_chg = 0;
1373 }
1374
1375out_put_stid:
1376 nfs4_put_stid(&ls->ls_stid);
1377out:
1378 return nfserr;
1379}
1380
1381static __be32
1382nfsd4_layoutreturn(struct svc_rqst *rqstp,
1383 struct nfsd4_compound_state *cstate,
1384 struct nfsd4_layoutreturn *lrp)
1385{
1386 struct svc_fh *current_fh = &cstate->current_fh;
1387 __be32 nfserr;
1388
1389 nfserr = fh_verify(rqstp, current_fh, 0, NFSD_MAY_NOP);
1390 if (nfserr)
1391 goto out;
1392
1393 nfserr = nfserr_layoutunavailable;
1394 if (!nfsd4_layout_verify(current_fh->fh_export, lrp->lr_layout_type))
1395 goto out;
1396
1397 switch (lrp->lr_seg.iomode) {
1398 case IOMODE_READ:
1399 case IOMODE_RW:
1400 case IOMODE_ANY:
1401 break;
1402 default:
1403 dprintk("%s: invalid iomode %d\n", __func__,
1404 lrp->lr_seg.iomode);
1405 nfserr = nfserr_inval;
1406 goto out;
1407 }
1408
1409 switch (lrp->lr_return_type) {
1410 case RETURN_FILE:
1411 nfserr = nfsd4_return_file_layouts(rqstp, cstate, lrp);
1412 break;
1413 case RETURN_FSID:
1414 case RETURN_ALL:
1415 nfserr = nfsd4_return_client_layouts(rqstp, cstate, lrp);
1416 break;
1417 default:
1418 dprintk("%s: invalid return_type %d\n", __func__,
1419 lrp->lr_return_type);
1420 nfserr = nfserr_inval;
1421 break;
1422 }
1423out:
1424 return nfserr;
1425}
1426#endif /* CONFIG_NFSD_PNFS */
1427
1181/* 1428/*
1182 * NULL call. 1429 * NULL call.
1183 */ 1430 */
@@ -1679,6 +1926,36 @@ static inline u32 nfsd4_create_session_rsize(struct svc_rqst *rqstp, struct nfsd
1679 op_encode_channel_attrs_maxsz) * sizeof(__be32); 1926 op_encode_channel_attrs_maxsz) * sizeof(__be32);
1680} 1927}
1681 1928
1929#ifdef CONFIG_NFSD_PNFS
1930/*
1931 * At this stage we don't really know what layout driver will handle the request,
1932 * so we need to define an arbitrary upper bound here.
1933 */
1934#define MAX_LAYOUT_SIZE 128
1935static inline u32 nfsd4_layoutget_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
1936{
1937 return (op_encode_hdr_size +
1938 1 /* logr_return_on_close */ +
1939 op_encode_stateid_maxsz +
1940 1 /* nr of layouts */ +
1941 MAX_LAYOUT_SIZE) * sizeof(__be32);
1942}
1943
1944static inline u32 nfsd4_layoutcommit_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
1945{
1946 return (op_encode_hdr_size +
1947 1 /* locr_newsize */ +
1948 2 /* ns_size */) * sizeof(__be32);
1949}
1950
1951static inline u32 nfsd4_layoutreturn_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
1952{
1953 return (op_encode_hdr_size +
1954 1 /* lrs_stateid */ +
1955 op_encode_stateid_maxsz) * sizeof(__be32);
1956}
1957#endif /* CONFIG_NFSD_PNFS */
1958
1682static struct nfsd4_operation nfsd4_ops[] = { 1959static struct nfsd4_operation nfsd4_ops[] = {
1683 [OP_ACCESS] = { 1960 [OP_ACCESS] = {
1684 .op_func = (nfsd4op_func)nfsd4_access, 1961 .op_func = (nfsd4op_func)nfsd4_access,
@@ -1966,6 +2243,31 @@ static struct nfsd4_operation nfsd4_ops[] = {
1966 .op_get_currentstateid = (stateid_getter)nfsd4_get_freestateid, 2243 .op_get_currentstateid = (stateid_getter)nfsd4_get_freestateid,
1967 .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, 2244 .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
1968 }, 2245 },
2246#ifdef CONFIG_NFSD_PNFS
2247 [OP_GETDEVICEINFO] = {
2248 .op_func = (nfsd4op_func)nfsd4_getdeviceinfo,
2249 .op_flags = ALLOWED_WITHOUT_FH,
2250 .op_name = "OP_GETDEVICEINFO",
2251 },
2252 [OP_LAYOUTGET] = {
2253 .op_func = (nfsd4op_func)nfsd4_layoutget,
2254 .op_flags = OP_MODIFIES_SOMETHING,
2255 .op_name = "OP_LAYOUTGET",
2256 .op_rsize_bop = (nfsd4op_rsize)nfsd4_layoutget_rsize,
2257 },
2258 [OP_LAYOUTCOMMIT] = {
2259 .op_func = (nfsd4op_func)nfsd4_layoutcommit,
2260 .op_flags = OP_MODIFIES_SOMETHING,
2261 .op_name = "OP_LAYOUTCOMMIT",
2262 .op_rsize_bop = (nfsd4op_rsize)nfsd4_layoutcommit_rsize,
2263 },
2264 [OP_LAYOUTRETURN] = {
2265 .op_func = (nfsd4op_func)nfsd4_layoutreturn,
2266 .op_flags = OP_MODIFIES_SOMETHING,
2267 .op_name = "OP_LAYOUTRETURN",
2268 .op_rsize_bop = (nfsd4op_rsize)nfsd4_layoutreturn_rsize,
2269 },
2270#endif /* CONFIG_NFSD_PNFS */
1969 2271
1970 /* NFSv4.2 operations */ 2272 /* NFSv4.2 operations */
1971 [OP_ALLOCATE] = { 2273 [OP_ALLOCATE] = {
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index eefd29ec43f2..c89f79dc69e2 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -48,6 +48,7 @@
48#include "current_stateid.h" 48#include "current_stateid.h"
49 49
50#include "netns.h" 50#include "netns.h"
51#include "pnfs.h"
51 52
52#define NFSDDBG_FACILITY NFSDDBG_PROC 53#define NFSDDBG_FACILITY NFSDDBG_PROC
53 54
@@ -1539,6 +1540,9 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name)
1539 INIT_LIST_HEAD(&clp->cl_lru); 1540 INIT_LIST_HEAD(&clp->cl_lru);
1540 INIT_LIST_HEAD(&clp->cl_callbacks); 1541 INIT_LIST_HEAD(&clp->cl_callbacks);
1541 INIT_LIST_HEAD(&clp->cl_revoked); 1542 INIT_LIST_HEAD(&clp->cl_revoked);
1543#ifdef CONFIG_NFSD_PNFS
1544 INIT_LIST_HEAD(&clp->cl_lo_states);
1545#endif
1542 spin_lock_init(&clp->cl_lock); 1546 spin_lock_init(&clp->cl_lock);
1543 rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table"); 1547 rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table");
1544 return clp; 1548 return clp;
@@ -1643,6 +1647,7 @@ __destroy_client(struct nfs4_client *clp)
1643 nfs4_get_stateowner(&oo->oo_owner); 1647 nfs4_get_stateowner(&oo->oo_owner);
1644 release_openowner(oo); 1648 release_openowner(oo);
1645 } 1649 }
1650 nfsd4_return_all_client_layouts(clp);
1646 nfsd4_shutdown_callback(clp); 1651 nfsd4_shutdown_callback(clp);
1647 if (clp->cl_cb_conn.cb_xprt) 1652 if (clp->cl_cb_conn.cb_xprt)
1648 svc_xprt_put(clp->cl_cb_conn.cb_xprt); 1653 svc_xprt_put(clp->cl_cb_conn.cb_xprt);
@@ -2126,8 +2131,11 @@ nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp,
2126static void 2131static void
2127nfsd4_set_ex_flags(struct nfs4_client *new, struct nfsd4_exchange_id *clid) 2132nfsd4_set_ex_flags(struct nfs4_client *new, struct nfsd4_exchange_id *clid)
2128{ 2133{
2129 /* pNFS is not supported */ 2134#ifdef CONFIG_NFSD_PNFS
2135 new->cl_exchange_flags |= EXCHGID4_FLAG_USE_PNFS_MDS;
2136#else
2130 new->cl_exchange_flags |= EXCHGID4_FLAG_USE_NON_PNFS; 2137 new->cl_exchange_flags |= EXCHGID4_FLAG_USE_NON_PNFS;
2138#endif
2131 2139
2132 /* Referrals are supported, Migration is not. */ 2140 /* Referrals are supported, Migration is not. */
2133 new->cl_exchange_flags |= EXCHGID4_FLAG_SUPP_MOVED_REFER; 2141 new->cl_exchange_flags |= EXCHGID4_FLAG_SUPP_MOVED_REFER;
@@ -3055,6 +3063,9 @@ static void nfsd4_init_file(struct knfsd_fh *fh, unsigned int hashval,
3055 fp->fi_share_deny = 0; 3063 fp->fi_share_deny = 0;
3056 memset(fp->fi_fds, 0, sizeof(fp->fi_fds)); 3064 memset(fp->fi_fds, 0, sizeof(fp->fi_fds));
3057 memset(fp->fi_access, 0, sizeof(fp->fi_access)); 3065 memset(fp->fi_access, 0, sizeof(fp->fi_access));
3066#ifdef CONFIG_NFSD_PNFS
3067 INIT_LIST_HEAD(&fp->fi_lo_states);
3068#endif
3058 hlist_add_head_rcu(&fp->fi_hash, &file_hashtbl[hashval]); 3069 hlist_add_head_rcu(&fp->fi_hash, &file_hashtbl[hashval]);
3059} 3070}
3060 3071
@@ -4841,6 +4852,9 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
4841 update_stateid(&stp->st_stid.sc_stateid); 4852 update_stateid(&stp->st_stid.sc_stateid);
4842 memcpy(&close->cl_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); 4853 memcpy(&close->cl_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
4843 4854
4855 nfsd4_return_all_file_layouts(stp->st_stateowner->so_client,
4856 stp->st_stid.sc_file);
4857
4844 nfsd4_close_open_stateid(stp); 4858 nfsd4_close_open_stateid(stp);
4845 4859
4846 /* put reference from nfs4_preprocess_seqid_op */ 4860 /* put reference from nfs4_preprocess_seqid_op */
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 974533e5a427..df5e66caf100 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -47,6 +47,7 @@
47#include "state.h" 47#include "state.h"
48#include "cache.h" 48#include "cache.h"
49#include "netns.h" 49#include "netns.h"
50#include "pnfs.h"
50 51
51#ifdef CONFIG_NFSD_V4_SECURITY_LABEL 52#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
52#include <linux/security.h> 53#include <linux/security.h>
@@ -1522,6 +1523,127 @@ static __be32 nfsd4_decode_reclaim_complete(struct nfsd4_compoundargs *argp, str
1522 DECODE_TAIL; 1523 DECODE_TAIL;
1523} 1524}
1524 1525
1526#ifdef CONFIG_NFSD_PNFS
1527static __be32
1528nfsd4_decode_getdeviceinfo(struct nfsd4_compoundargs *argp,
1529 struct nfsd4_getdeviceinfo *gdev)
1530{
1531 DECODE_HEAD;
1532 u32 num, i;
1533
1534 READ_BUF(sizeof(struct nfsd4_deviceid) + 3 * 4);
1535 COPYMEM(&gdev->gd_devid, sizeof(struct nfsd4_deviceid));
1536 gdev->gd_layout_type = be32_to_cpup(p++);
1537 gdev->gd_maxcount = be32_to_cpup(p++);
1538 num = be32_to_cpup(p++);
1539 if (num) {
1540 READ_BUF(4 * num);
1541 gdev->gd_notify_types = be32_to_cpup(p++);
1542 for (i = 1; i < num; i++) {
1543 if (be32_to_cpup(p++)) {
1544 status = nfserr_inval;
1545 goto out;
1546 }
1547 }
1548 }
1549 DECODE_TAIL;
1550}
1551
1552static __be32
1553nfsd4_decode_layoutget(struct nfsd4_compoundargs *argp,
1554 struct nfsd4_layoutget *lgp)
1555{
1556 DECODE_HEAD;
1557
1558 READ_BUF(36);
1559 lgp->lg_signal = be32_to_cpup(p++);
1560 lgp->lg_layout_type = be32_to_cpup(p++);
1561 lgp->lg_seg.iomode = be32_to_cpup(p++);
1562 p = xdr_decode_hyper(p, &lgp->lg_seg.offset);
1563 p = xdr_decode_hyper(p, &lgp->lg_seg.length);
1564 p = xdr_decode_hyper(p, &lgp->lg_minlength);
1565 nfsd4_decode_stateid(argp, &lgp->lg_sid);
1566 READ_BUF(4);
1567 lgp->lg_maxcount = be32_to_cpup(p++);
1568
1569 DECODE_TAIL;
1570}
1571
1572static __be32
1573nfsd4_decode_layoutcommit(struct nfsd4_compoundargs *argp,
1574 struct nfsd4_layoutcommit *lcp)
1575{
1576 DECODE_HEAD;
1577 u32 timechange;
1578
1579 READ_BUF(20);
1580 p = xdr_decode_hyper(p, &lcp->lc_seg.offset);
1581 p = xdr_decode_hyper(p, &lcp->lc_seg.length);
1582 lcp->lc_reclaim = be32_to_cpup(p++);
1583 nfsd4_decode_stateid(argp, &lcp->lc_sid);
1584 READ_BUF(4);
1585 lcp->lc_newoffset = be32_to_cpup(p++);
1586 if (lcp->lc_newoffset) {
1587 READ_BUF(8);
1588 p = xdr_decode_hyper(p, &lcp->lc_last_wr);
1589 } else
1590 lcp->lc_last_wr = 0;
1591 READ_BUF(4);
1592 timechange = be32_to_cpup(p++);
1593 if (timechange) {
1594 status = nfsd4_decode_time(argp, &lcp->lc_mtime);
1595 if (status)
1596 return status;
1597 } else {
1598 lcp->lc_mtime.tv_nsec = UTIME_NOW;
1599 }
1600 READ_BUF(8);
1601 lcp->lc_layout_type = be32_to_cpup(p++);
1602
1603 /*
1604 * Save the layout update in XDR format and let the layout driver deal
1605 * with it later.
1606 */
1607 lcp->lc_up_len = be32_to_cpup(p++);
1608 if (lcp->lc_up_len > 0) {
1609 READ_BUF(lcp->lc_up_len);
1610 READMEM(lcp->lc_up_layout, lcp->lc_up_len);
1611 }
1612
1613 DECODE_TAIL;
1614}
1615
1616static __be32
1617nfsd4_decode_layoutreturn(struct nfsd4_compoundargs *argp,
1618 struct nfsd4_layoutreturn *lrp)
1619{
1620 DECODE_HEAD;
1621
1622 READ_BUF(16);
1623 lrp->lr_reclaim = be32_to_cpup(p++);
1624 lrp->lr_layout_type = be32_to_cpup(p++);
1625 lrp->lr_seg.iomode = be32_to_cpup(p++);
1626 lrp->lr_return_type = be32_to_cpup(p++);
1627 if (lrp->lr_return_type == RETURN_FILE) {
1628 READ_BUF(16);
1629 p = xdr_decode_hyper(p, &lrp->lr_seg.offset);
1630 p = xdr_decode_hyper(p, &lrp->lr_seg.length);
1631 nfsd4_decode_stateid(argp, &lrp->lr_sid);
1632 READ_BUF(4);
1633 lrp->lrf_body_len = be32_to_cpup(p++);
1634 if (lrp->lrf_body_len > 0) {
1635 READ_BUF(lrp->lrf_body_len);
1636 READMEM(lrp->lrf_body, lrp->lrf_body_len);
1637 }
1638 } else {
1639 lrp->lr_seg.offset = 0;
1640 lrp->lr_seg.length = NFS4_MAX_UINT64;
1641 }
1642
1643 DECODE_TAIL;
1644}
1645#endif /* CONFIG_NFSD_PNFS */
1646
1525static __be32 1647static __be32
1526nfsd4_decode_fallocate(struct nfsd4_compoundargs *argp, 1648nfsd4_decode_fallocate(struct nfsd4_compoundargs *argp,
1527 struct nfsd4_fallocate *fallocate) 1649 struct nfsd4_fallocate *fallocate)
@@ -1616,11 +1738,19 @@ static nfsd4_dec nfsd4_dec_ops[] = {
1616 [OP_DESTROY_SESSION] = (nfsd4_dec)nfsd4_decode_destroy_session, 1738 [OP_DESTROY_SESSION] = (nfsd4_dec)nfsd4_decode_destroy_session,
1617 [OP_FREE_STATEID] = (nfsd4_dec)nfsd4_decode_free_stateid, 1739 [OP_FREE_STATEID] = (nfsd4_dec)nfsd4_decode_free_stateid,
1618 [OP_GET_DIR_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp, 1740 [OP_GET_DIR_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp,
1741#ifdef CONFIG_NFSD_PNFS
1742 [OP_GETDEVICEINFO] = (nfsd4_dec)nfsd4_decode_getdeviceinfo,
1743 [OP_GETDEVICELIST] = (nfsd4_dec)nfsd4_decode_notsupp,
1744 [OP_LAYOUTCOMMIT] = (nfsd4_dec)nfsd4_decode_layoutcommit,
1745 [OP_LAYOUTGET] = (nfsd4_dec)nfsd4_decode_layoutget,
1746 [OP_LAYOUTRETURN] = (nfsd4_dec)nfsd4_decode_layoutreturn,
1747#else
1619 [OP_GETDEVICEINFO] = (nfsd4_dec)nfsd4_decode_notsupp, 1748 [OP_GETDEVICEINFO] = (nfsd4_dec)nfsd4_decode_notsupp,
1620 [OP_GETDEVICELIST] = (nfsd4_dec)nfsd4_decode_notsupp, 1749 [OP_GETDEVICELIST] = (nfsd4_dec)nfsd4_decode_notsupp,
1621 [OP_LAYOUTCOMMIT] = (nfsd4_dec)nfsd4_decode_notsupp, 1750 [OP_LAYOUTCOMMIT] = (nfsd4_dec)nfsd4_decode_notsupp,
1622 [OP_LAYOUTGET] = (nfsd4_dec)nfsd4_decode_notsupp, 1751 [OP_LAYOUTGET] = (nfsd4_dec)nfsd4_decode_notsupp,
1623 [OP_LAYOUTRETURN] = (nfsd4_dec)nfsd4_decode_notsupp, 1752 [OP_LAYOUTRETURN] = (nfsd4_dec)nfsd4_decode_notsupp,
1753#endif
1624 [OP_SECINFO_NO_NAME] = (nfsd4_dec)nfsd4_decode_secinfo_no_name, 1754 [OP_SECINFO_NO_NAME] = (nfsd4_dec)nfsd4_decode_secinfo_no_name,
1625 [OP_SEQUENCE] = (nfsd4_dec)nfsd4_decode_sequence, 1755 [OP_SEQUENCE] = (nfsd4_dec)nfsd4_decode_sequence,
1626 [OP_SET_SSV] = (nfsd4_dec)nfsd4_decode_notsupp, 1756 [OP_SET_SSV] = (nfsd4_dec)nfsd4_decode_notsupp,
@@ -2548,6 +2678,30 @@ out_acl:
2548 get_parent_attributes(exp, &stat); 2678 get_parent_attributes(exp, &stat);
2549 p = xdr_encode_hyper(p, stat.ino); 2679 p = xdr_encode_hyper(p, stat.ino);
2550 } 2680 }
2681#ifdef CONFIG_NFSD_PNFS
2682 if ((bmval1 & FATTR4_WORD1_FS_LAYOUT_TYPES) ||
2683 (bmval2 & FATTR4_WORD2_LAYOUT_TYPES)) {
2684 if (exp->ex_layout_type) {
2685 p = xdr_reserve_space(xdr, 8);
2686 if (!p)
2687 goto out_resource;
2688 *p++ = cpu_to_be32(1);
2689 *p++ = cpu_to_be32(exp->ex_layout_type);
2690 } else {
2691 p = xdr_reserve_space(xdr, 4);
2692 if (!p)
2693 goto out_resource;
2694 *p++ = cpu_to_be32(0);
2695 }
2696 }
2697
2698 if (bmval2 & FATTR4_WORD2_LAYOUT_BLKSIZE) {
2699 p = xdr_reserve_space(xdr, 4);
2700 if (!p)
2701 goto out_resource;
2702 *p++ = cpu_to_be32(stat.blksize);
2703 }
2704#endif /* CONFIG_NFSD_PNFS */
2551 if (bmval2 & FATTR4_WORD2_SECURITY_LABEL) { 2705 if (bmval2 & FATTR4_WORD2_SECURITY_LABEL) {
2552 status = nfsd4_encode_security_label(xdr, rqstp, context, 2706 status = nfsd4_encode_security_label(xdr, rqstp, context,
2553 contextlen); 2707 contextlen);
@@ -3824,6 +3978,156 @@ nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr,
3824 return nfserr; 3978 return nfserr;
3825} 3979}
3826 3980
3981#ifdef CONFIG_NFSD_PNFS
3982static __be32
3983nfsd4_encode_getdeviceinfo(struct nfsd4_compoundres *resp, __be32 nfserr,
3984 struct nfsd4_getdeviceinfo *gdev)
3985{
3986 struct xdr_stream *xdr = &resp->xdr;
3987 const struct nfsd4_layout_ops *ops =
3988 nfsd4_layout_ops[gdev->gd_layout_type];
3989 u32 starting_len = xdr->buf->len, needed_len;
3990 __be32 *p;
3991
3992 dprintk("%s: err %d\n", __func__, nfserr);
3993 if (nfserr)
3994 goto out;
3995
3996 nfserr = nfserr_resource;
3997 p = xdr_reserve_space(xdr, 4);
3998 if (!p)
3999 goto out;
4000
4001 *p++ = cpu_to_be32(gdev->gd_layout_type);
4002
4003 /* If maxcount is 0 then just update notifications */
4004 if (gdev->gd_maxcount != 0) {
4005 nfserr = ops->encode_getdeviceinfo(xdr, gdev);
4006 if (nfserr) {
4007 /*
4008 * We don't bother to burden the layout drivers with
4009 * enforcing gd_maxcount, just tell the client to
4010 * come back with a bigger buffer if it's not enough.
4011 */
4012 if (xdr->buf->len + 4 > gdev->gd_maxcount)
4013 goto toosmall;
4014 goto out;
4015 }
4016 }
4017
4018 nfserr = nfserr_resource;
4019 if (gdev->gd_notify_types) {
4020 p = xdr_reserve_space(xdr, 4 + 4);
4021 if (!p)
4022 goto out;
4023 *p++ = cpu_to_be32(1); /* bitmap length */
4024 *p++ = cpu_to_be32(gdev->gd_notify_types);
4025 } else {
4026 p = xdr_reserve_space(xdr, 4);
4027 if (!p)
4028 goto out;
4029 *p++ = 0;
4030 }
4031
4032 nfserr = 0;
4033out:
4034 kfree(gdev->gd_device);
4035 dprintk("%s: done: %d\n", __func__, be32_to_cpu(nfserr));
4036 return nfserr;
4037
4038toosmall:
4039 dprintk("%s: maxcount too small\n", __func__);
4040 needed_len = xdr->buf->len + 4 /* notifications */;
4041 xdr_truncate_encode(xdr, starting_len);
4042 p = xdr_reserve_space(xdr, 4);
4043 if (!p) {
4044 nfserr = nfserr_resource;
4045 } else {
4046 *p++ = cpu_to_be32(needed_len);
4047 nfserr = nfserr_toosmall;
4048 }
4049 goto out;
4050}
4051
4052static __be32
4053nfsd4_encode_layoutget(struct nfsd4_compoundres *resp, __be32 nfserr,
4054 struct nfsd4_layoutget *lgp)
4055{
4056 struct xdr_stream *xdr = &resp->xdr;
4057 const struct nfsd4_layout_ops *ops =
4058 nfsd4_layout_ops[lgp->lg_layout_type];
4059 __be32 *p;
4060
4061 dprintk("%s: err %d\n", __func__, nfserr);
4062 if (nfserr)
4063 goto out;
4064
4065 nfserr = nfserr_resource;
4066 p = xdr_reserve_space(xdr, 36 + sizeof(stateid_opaque_t));
4067 if (!p)
4068 goto out;
4069
4070 *p++ = cpu_to_be32(1); /* we always set return-on-close */
4071 *p++ = cpu_to_be32(lgp->lg_sid.si_generation);
4072 p = xdr_encode_opaque_fixed(p, &lgp->lg_sid.si_opaque,
4073 sizeof(stateid_opaque_t));
4074
4075 *p++ = cpu_to_be32(1); /* we always return a single layout */
4076 p = xdr_encode_hyper(p, lgp->lg_seg.offset);
4077 p = xdr_encode_hyper(p, lgp->lg_seg.length);
4078 *p++ = cpu_to_be32(lgp->lg_seg.iomode);
4079 *p++ = cpu_to_be32(lgp->lg_layout_type);
4080
4081 nfserr = ops->encode_layoutget(xdr, lgp);
4082out:
4083 kfree(lgp->lg_content);
4084 return nfserr;
4085}
4086
4087static __be32
4088nfsd4_encode_layoutcommit(struct nfsd4_compoundres *resp, __be32 nfserr,
4089 struct nfsd4_layoutcommit *lcp)
4090{
4091 struct xdr_stream *xdr = &resp->xdr;
4092 __be32 *p;
4093
4094 if (nfserr)
4095 return nfserr;
4096
4097 p = xdr_reserve_space(xdr, 4);
4098 if (!p)
4099 return nfserr_resource;
4100 *p++ = cpu_to_be32(lcp->lc_size_chg);
4101 if (lcp->lc_size_chg) {
4102 p = xdr_reserve_space(xdr, 8);
4103 if (!p)
4104 return nfserr_resource;
4105 p = xdr_encode_hyper(p, lcp->lc_newsize);
4106 }
4107
4108 return nfs_ok;
4109}
4110
4111static __be32
4112nfsd4_encode_layoutreturn(struct nfsd4_compoundres *resp, __be32 nfserr,
4113 struct nfsd4_layoutreturn *lrp)
4114{
4115 struct xdr_stream *xdr = &resp->xdr;
4116 __be32 *p;
4117
4118 if (nfserr)
4119 return nfserr;
4120
4121 p = xdr_reserve_space(xdr, 4);
4122 if (!p)
4123 return nfserr_resource;
4124 *p++ = cpu_to_be32(lrp->lrs_present);
4125 if (lrp->lrs_present)
4126 nfsd4_encode_stateid(xdr, &lrp->lr_sid);
4127 return nfs_ok;
4128}
4129#endif /* CONFIG_NFSD_PNFS */
4130
3827static __be32 4131static __be32
3828nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr, 4132nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr,
3829 struct nfsd4_seek *seek) 4133 struct nfsd4_seek *seek)
@@ -3900,11 +4204,19 @@ static nfsd4_enc nfsd4_enc_ops[] = {
3900 [OP_DESTROY_SESSION] = (nfsd4_enc)nfsd4_encode_noop, 4204 [OP_DESTROY_SESSION] = (nfsd4_enc)nfsd4_encode_noop,
3901 [OP_FREE_STATEID] = (nfsd4_enc)nfsd4_encode_noop, 4205 [OP_FREE_STATEID] = (nfsd4_enc)nfsd4_encode_noop,
3902 [OP_GET_DIR_DELEGATION] = (nfsd4_enc)nfsd4_encode_noop, 4206 [OP_GET_DIR_DELEGATION] = (nfsd4_enc)nfsd4_encode_noop,
4207#ifdef CONFIG_NFSD_PNFS
4208 [OP_GETDEVICEINFO] = (nfsd4_enc)nfsd4_encode_getdeviceinfo,
4209 [OP_GETDEVICELIST] = (nfsd4_enc)nfsd4_encode_noop,
4210 [OP_LAYOUTCOMMIT] = (nfsd4_enc)nfsd4_encode_layoutcommit,
4211 [OP_LAYOUTGET] = (nfsd4_enc)nfsd4_encode_layoutget,
4212 [OP_LAYOUTRETURN] = (nfsd4_enc)nfsd4_encode_layoutreturn,
4213#else
3903 [OP_GETDEVICEINFO] = (nfsd4_enc)nfsd4_encode_noop, 4214 [OP_GETDEVICEINFO] = (nfsd4_enc)nfsd4_encode_noop,
3904 [OP_GETDEVICELIST] = (nfsd4_enc)nfsd4_encode_noop, 4215 [OP_GETDEVICELIST] = (nfsd4_enc)nfsd4_encode_noop,
3905 [OP_LAYOUTCOMMIT] = (nfsd4_enc)nfsd4_encode_noop, 4216 [OP_LAYOUTCOMMIT] = (nfsd4_enc)nfsd4_encode_noop,
3906 [OP_LAYOUTGET] = (nfsd4_enc)nfsd4_encode_noop, 4217 [OP_LAYOUTGET] = (nfsd4_enc)nfsd4_encode_noop,
3907 [OP_LAYOUTRETURN] = (nfsd4_enc)nfsd4_encode_noop, 4218 [OP_LAYOUTRETURN] = (nfsd4_enc)nfsd4_encode_noop,
4219#endif
3908 [OP_SECINFO_NO_NAME] = (nfsd4_enc)nfsd4_encode_secinfo_no_name, 4220 [OP_SECINFO_NO_NAME] = (nfsd4_enc)nfsd4_encode_secinfo_no_name,
3909 [OP_SEQUENCE] = (nfsd4_enc)nfsd4_encode_sequence, 4221 [OP_SEQUENCE] = (nfsd4_enc)nfsd4_encode_sequence,
3910 [OP_SET_SSV] = (nfsd4_enc)nfsd4_encode_noop, 4222 [OP_SET_SSV] = (nfsd4_enc)nfsd4_encode_noop,
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 19ace74d35f6..aa47d75ddb26 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -21,6 +21,7 @@
21#include "cache.h" 21#include "cache.h"
22#include "state.h" 22#include "state.h"
23#include "netns.h" 23#include "netns.h"
24#include "pnfs.h"
24 25
25/* 26/*
26 * We have a single directory with several nodes in it. 27 * We have a single directory with several nodes in it.
@@ -1258,9 +1259,12 @@ static int __init init_nfsd(void)
1258 retval = nfsd4_init_slabs(); 1259 retval = nfsd4_init_slabs();
1259 if (retval) 1260 if (retval)
1260 goto out_unregister_pernet; 1261 goto out_unregister_pernet;
1261 retval = nfsd_fault_inject_init(); /* nfsd fault injection controls */ 1262 retval = nfsd4_init_pnfs();
1262 if (retval) 1263 if (retval)
1263 goto out_free_slabs; 1264 goto out_free_slabs;
1265 retval = nfsd_fault_inject_init(); /* nfsd fault injection controls */
1266 if (retval)
1267 goto out_exit_pnfs;
1264 nfsd_stat_init(); /* Statistics */ 1268 nfsd_stat_init(); /* Statistics */
1265 retval = nfsd_reply_cache_init(); 1269 retval = nfsd_reply_cache_init();
1266 if (retval) 1270 if (retval)
@@ -1282,6 +1286,8 @@ out_free_lockd:
1282out_free_stat: 1286out_free_stat:
1283 nfsd_stat_shutdown(); 1287 nfsd_stat_shutdown();
1284 nfsd_fault_inject_cleanup(); 1288 nfsd_fault_inject_cleanup();
1289out_exit_pnfs:
1290 nfsd4_exit_pnfs();
1285out_free_slabs: 1291out_free_slabs:
1286 nfsd4_free_slabs(); 1292 nfsd4_free_slabs();
1287out_unregister_pernet: 1293out_unregister_pernet:
@@ -1299,6 +1305,7 @@ static void __exit exit_nfsd(void)
1299 nfsd_stat_shutdown(); 1305 nfsd_stat_shutdown();
1300 nfsd_lockd_shutdown(); 1306 nfsd_lockd_shutdown();
1301 nfsd4_free_slabs(); 1307 nfsd4_free_slabs();
1308 nfsd4_exit_pnfs();
1302 nfsd_fault_inject_cleanup(); 1309 nfsd_fault_inject_cleanup();
1303 unregister_filesystem(&nfsd_fs_type); 1310 unregister_filesystem(&nfsd_fs_type);
1304 unregister_pernet_subsys(&nfsd_net_ops); 1311 unregister_pernet_subsys(&nfsd_net_ops);
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 33a46a8dfaf7..565c4da1a9eb 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -325,15 +325,27 @@ void nfsd_lockd_shutdown(void);
325 325
326#define NFSD4_SUPPORTED_ATTRS_WORD2 0 326#define NFSD4_SUPPORTED_ATTRS_WORD2 0
327 327
328/* 4.1 */
329#ifdef CONFIG_NFSD_PNFS
330#define PNFSD_SUPPORTED_ATTRS_WORD1 FATTR4_WORD1_FS_LAYOUT_TYPES
331#define PNFSD_SUPPORTED_ATTRS_WORD2 \
332(FATTR4_WORD2_LAYOUT_BLKSIZE | FATTR4_WORD2_LAYOUT_TYPES)
333#else
334#define PNFSD_SUPPORTED_ATTRS_WORD1 0
335#define PNFSD_SUPPORTED_ATTRS_WORD2 0
336#endif /* CONFIG_NFSD_PNFS */
337
328#define NFSD4_1_SUPPORTED_ATTRS_WORD0 \ 338#define NFSD4_1_SUPPORTED_ATTRS_WORD0 \
329 NFSD4_SUPPORTED_ATTRS_WORD0 339 NFSD4_SUPPORTED_ATTRS_WORD0
330 340
331#define NFSD4_1_SUPPORTED_ATTRS_WORD1 \ 341#define NFSD4_1_SUPPORTED_ATTRS_WORD1 \
332 NFSD4_SUPPORTED_ATTRS_WORD1 342 (NFSD4_SUPPORTED_ATTRS_WORD1 | PNFSD_SUPPORTED_ATTRS_WORD1)
333 343
334#define NFSD4_1_SUPPORTED_ATTRS_WORD2 \ 344#define NFSD4_1_SUPPORTED_ATTRS_WORD2 \
335 (NFSD4_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SUPPATTR_EXCLCREAT) 345 (NFSD4_SUPPORTED_ATTRS_WORD2 | PNFSD_SUPPORTED_ATTRS_WORD2 | \
346 FATTR4_WORD2_SUPPATTR_EXCLCREAT)
336 347
348/* 4.2 */
337#ifdef CONFIG_NFSD_V4_SECURITY_LABEL 349#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
338#define NFSD4_2_SECURITY_ATTRS FATTR4_WORD2_SECURITY_LABEL 350#define NFSD4_2_SECURITY_ATTRS FATTR4_WORD2_SECURITY_LABEL
339#else 351#else
diff --git a/fs/nfsd/pnfs.h b/fs/nfsd/pnfs.h
new file mode 100644
index 000000000000..a9616a4e13cd
--- /dev/null
+++ b/fs/nfsd/pnfs.h
@@ -0,0 +1,80 @@
1#ifndef _FS_NFSD_PNFS_H
2#define _FS_NFSD_PNFS_H 1
3
4#include <linux/exportfs.h>
5#include <linux/nfsd/export.h>
6
7#include "state.h"
8#include "xdr4.h"
9
10struct xdr_stream;
11
12struct nfsd4_deviceid_map {
13 struct list_head hash;
14 u64 idx;
15 int fsid_type;
16 u32 fsid[];
17};
18
19struct nfsd4_layout_ops {
20 u32 notify_types;
21
22 __be32 (*proc_getdeviceinfo)(struct super_block *sb,
23 struct nfsd4_getdeviceinfo *gdevp);
24 __be32 (*encode_getdeviceinfo)(struct xdr_stream *xdr,
25 struct nfsd4_getdeviceinfo *gdevp);
26
27 __be32 (*proc_layoutget)(struct inode *, const struct svc_fh *fhp,
28 struct nfsd4_layoutget *lgp);
29 __be32 (*encode_layoutget)(struct xdr_stream *,
30 struct nfsd4_layoutget *lgp);
31
32 __be32 (*proc_layoutcommit)(struct inode *inode,
33 struct nfsd4_layoutcommit *lcp);
34};
35
36extern const struct nfsd4_layout_ops *nfsd4_layout_ops[];
37
38__be32 nfsd4_preprocess_layout_stateid(struct svc_rqst *rqstp,
39 struct nfsd4_compound_state *cstate, stateid_t *stateid,
40 bool create, u32 layout_type, struct nfs4_layout_stateid **lsp);
41__be32 nfsd4_insert_layout(struct nfsd4_layoutget *lgp,
42 struct nfs4_layout_stateid *ls);
43__be32 nfsd4_return_file_layouts(struct svc_rqst *rqstp,
44 struct nfsd4_compound_state *cstate,
45 struct nfsd4_layoutreturn *lrp);
46__be32 nfsd4_return_client_layouts(struct svc_rqst *rqstp,
47 struct nfsd4_compound_state *cstate,
48 struct nfsd4_layoutreturn *lrp);
49int nfsd4_set_deviceid(struct nfsd4_deviceid *id, const struct svc_fh *fhp,
50 u32 device_generation);
51struct nfsd4_deviceid_map *nfsd4_find_devid_map(int idx);
52
53#ifdef CONFIG_NFSD_PNFS
54void nfsd4_setup_layout_type(struct svc_export *exp);
55void nfsd4_return_all_client_layouts(struct nfs4_client *);
56void nfsd4_return_all_file_layouts(struct nfs4_client *clp,
57 struct nfs4_file *fp);
58int nfsd4_init_pnfs(void);
59void nfsd4_exit_pnfs(void);
60#else
61static inline void nfsd4_setup_layout_type(struct svc_export *exp)
62{
63}
64
65static inline void nfsd4_return_all_client_layouts(struct nfs4_client *clp)
66{
67}
68static inline void nfsd4_return_all_file_layouts(struct nfs4_client *clp,
69 struct nfs4_file *fp)
70{
71}
72static inline void nfsd4_exit_pnfs(void)
73{
74}
75static inline int nfsd4_init_pnfs(void)
76{
77 return 0;
78}
79#endif /* CONFIG_NFSD_PNFS */
80#endif /* _FS_NFSD_PNFS_H */
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 38ebb1268b59..5f66b7fd0297 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -92,6 +92,7 @@ struct nfs4_stid {
92/* For a deleg stateid kept around only to process free_stateid's: */ 92/* For a deleg stateid kept around only to process free_stateid's: */
93#define NFS4_REVOKED_DELEG_STID 16 93#define NFS4_REVOKED_DELEG_STID 16
94#define NFS4_CLOSED_DELEG_STID 32 94#define NFS4_CLOSED_DELEG_STID 32
95#define NFS4_LAYOUT_STID 64
95 unsigned char sc_type; 96 unsigned char sc_type;
96 stateid_t sc_stateid; 97 stateid_t sc_stateid;
97 struct nfs4_client *sc_client; 98 struct nfs4_client *sc_client;
@@ -297,6 +298,9 @@ struct nfs4_client {
297 struct list_head cl_delegations; 298 struct list_head cl_delegations;
298 struct list_head cl_revoked; /* unacknowledged, revoked 4.1 state */ 299 struct list_head cl_revoked; /* unacknowledged, revoked 4.1 state */
299 struct list_head cl_lru; /* tail queue */ 300 struct list_head cl_lru; /* tail queue */
301#ifdef CONFIG_NFSD_PNFS
302 struct list_head cl_lo_states; /* outstanding layout states */
303#endif
300 struct xdr_netobj cl_name; /* id generated by client */ 304 struct xdr_netobj cl_name; /* id generated by client */
301 nfs4_verifier cl_verifier; /* generated by client */ 305 nfs4_verifier cl_verifier; /* generated by client */
302 time_t cl_time; /* time of last lease renewal */ 306 time_t cl_time; /* time of last lease renewal */
@@ -496,6 +500,9 @@ struct nfs4_file {
496 int fi_delegees; 500 int fi_delegees;
497 struct knfsd_fh fi_fhandle; 501 struct knfsd_fh fi_fhandle;
498 bool fi_had_conflict; 502 bool fi_had_conflict;
503#ifdef CONFIG_NFSD_PNFS
504 struct list_head fi_lo_states;
505#endif
499}; 506};
500 507
501/* 508/*
@@ -528,6 +535,20 @@ static inline struct nfs4_ol_stateid *openlockstateid(struct nfs4_stid *s)
528 return container_of(s, struct nfs4_ol_stateid, st_stid); 535 return container_of(s, struct nfs4_ol_stateid, st_stid);
529} 536}
530 537
538struct nfs4_layout_stateid {
539 struct nfs4_stid ls_stid;
540 struct list_head ls_perclnt;
541 struct list_head ls_perfile;
542 spinlock_t ls_lock;
543 struct list_head ls_layouts;
544 u32 ls_layout_type;
545};
546
547static inline struct nfs4_layout_stateid *layoutstateid(struct nfs4_stid *s)
548{
549 return container_of(s, struct nfs4_layout_stateid, ls_stid);
550}
551
531/* flags for preprocess_seqid_op() */ 552/* flags for preprocess_seqid_op() */
532#define RD_STATE 0x00000010 553#define RD_STATE 0x00000010
533#define WR_STATE 0x00000020 554#define WR_STATE 0x00000020
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 90a5925bd6ab..0bda93e58e1b 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -428,6 +428,61 @@ struct nfsd4_reclaim_complete {
428 u32 rca_one_fs; 428 u32 rca_one_fs;
429}; 429};
430 430
431struct nfsd4_deviceid {
432 u64 fsid_idx;
433 u32 generation;
434 u32 pad;
435};
436
437struct nfsd4_layout_seg {
438 u32 iomode;
439 u64 offset;
440 u64 length;
441};
442
443struct nfsd4_getdeviceinfo {
444 struct nfsd4_deviceid gd_devid; /* request */
445 u32 gd_layout_type; /* request */
446 u32 gd_maxcount; /* request */
447 u32 gd_notify_types;/* request - response */
448 void *gd_device; /* response */
449};
450
451struct nfsd4_layoutget {
452 u64 lg_minlength; /* request */
453 u32 lg_signal; /* request */
454 u32 lg_layout_type; /* request */
455 u32 lg_maxcount; /* request */
456 stateid_t lg_sid; /* request/response */
457 struct nfsd4_layout_seg lg_seg; /* request/response */
458 void *lg_content; /* response */
459};
460
461struct nfsd4_layoutcommit {
462 stateid_t lc_sid; /* request */
463 struct nfsd4_layout_seg lc_seg; /* request */
464 u32 lc_reclaim; /* request */
465 u32 lc_newoffset; /* request */
466 u64 lc_last_wr; /* request */
467 struct timespec lc_mtime; /* request */
468 u32 lc_layout_type; /* request */
469 u32 lc_up_len; /* layout length */
470 void *lc_up_layout; /* decoded by callback */
471 u32 lc_size_chg; /* boolean for response */
472 u64 lc_newsize; /* response */
473};
474
475struct nfsd4_layoutreturn {
476 u32 lr_return_type; /* request */
477 u32 lr_layout_type; /* request */
478 struct nfsd4_layout_seg lr_seg; /* request */
479 u32 lr_reclaim; /* request */
480 u32 lrf_body_len; /* request */
481 void *lrf_body; /* request */
482 stateid_t lr_sid; /* request/response */
483 u32 lrs_present; /* response */
484};
485
431struct nfsd4_fallocate { 486struct nfsd4_fallocate {
432 /* request */ 487 /* request */
433 stateid_t falloc_stateid; 488 stateid_t falloc_stateid;
@@ -491,6 +546,10 @@ struct nfsd4_op {
491 struct nfsd4_reclaim_complete reclaim_complete; 546 struct nfsd4_reclaim_complete reclaim_complete;
492 struct nfsd4_test_stateid test_stateid; 547 struct nfsd4_test_stateid test_stateid;
493 struct nfsd4_free_stateid free_stateid; 548 struct nfsd4_free_stateid free_stateid;
549 struct nfsd4_getdeviceinfo getdeviceinfo;
550 struct nfsd4_layoutget layoutget;
551 struct nfsd4_layoutcommit layoutcommit;
552 struct nfsd4_layoutreturn layoutreturn;
494 553
495 /* NFSv4.2 */ 554 /* NFSv4.2 */
496 struct nfsd4_fallocate allocate; 555 struct nfsd4_fallocate allocate;