summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--fs/nfsd/Kconfig10
-rw-r--r--fs/nfsd/Makefile1
-rw-r--r--fs/nfsd/export.c8
-rw-r--r--fs/nfsd/export.h2
-rw-r--r--fs/nfsd/nfs4layouts.c487
-rw-r--r--fs/nfsd/nfs4proc.c302
-rw-r--r--fs/nfsd/nfs4state.c16
-rw-r--r--fs/nfsd/nfs4xdr.c312
-rw-r--r--fs/nfsd/nfsctl.c9
-rw-r--r--fs/nfsd/nfsd.h16
-rw-r--r--fs/nfsd/pnfs.h80
-rw-r--r--fs/nfsd/state.h21
-rw-r--r--fs/nfsd/xdr4.h59
-rw-r--r--include/linux/nfs4.h1
-rw-r--r--include/uapi/linux/nfsd/debug.h1
-rw-r--r--include/uapi/linux/nfsd/export.h4
16 files changed, 1324 insertions, 5 deletions
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig
index 73395156bdb4..683bf718aead 100644
--- a/fs/nfsd/Kconfig
+++ b/fs/nfsd/Kconfig
@@ -82,6 +82,16 @@ config NFSD_V4
82 82
83 If unsure, say N. 83 If unsure, say N.
84 84
85config NFSD_PNFS
86 bool "NFSv4.1 server support for Parallel NFS (pNFS)"
87 depends on NFSD_V4
88 help
89 This option enables support for the parallel NFS features of the
90 minor version 1 of the NFSv4 protocol (RFC5661) in the kernel's NFS
91 server.
92
93 If unsure, say N.
94
85config NFSD_V4_SECURITY_LABEL 95config NFSD_V4_SECURITY_LABEL
86 bool "Provide Security Label support for NFSv4 server" 96 bool "Provide Security Label support for NFSv4 server"
87 depends on NFSD_V4 && SECURITY 97 depends on NFSD_V4 && SECURITY
diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile
index af32ef06b4fe..5806270a8567 100644
--- a/fs/nfsd/Makefile
+++ b/fs/nfsd/Makefile
@@ -12,3 +12,4 @@ nfsd-$(CONFIG_NFSD_V3) += nfs3proc.o nfs3xdr.o
12nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o 12nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o
13nfsd-$(CONFIG_NFSD_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \ 13nfsd-$(CONFIG_NFSD_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \
14 nfs4acl.o nfs4callback.o nfs4recover.o 14 nfs4acl.o nfs4callback.o nfs4recover.o
15nfsd-$(CONFIG_NFSD_PNFS) += nfs4layouts.o
diff --git a/fs/nfsd/export.c b/fs/nfsd/export.c
index 30a739d896ff..c3e3b6e55ae2 100644
--- a/fs/nfsd/export.c
+++ b/fs/nfsd/export.c
@@ -20,6 +20,7 @@
20#include "nfsd.h" 20#include "nfsd.h"
21#include "nfsfh.h" 21#include "nfsfh.h"
22#include "netns.h" 22#include "netns.h"
23#include "pnfs.h"
23 24
24#define NFSDDBG_FACILITY NFSDDBG_EXPORT 25#define NFSDDBG_FACILITY NFSDDBG_EXPORT
25 26
@@ -545,6 +546,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
545 546
546 exp.ex_client = dom; 547 exp.ex_client = dom;
547 exp.cd = cd; 548 exp.cd = cd;
549 exp.ex_devid_map = NULL;
548 550
549 /* expiry */ 551 /* expiry */
550 err = -EINVAL; 552 err = -EINVAL;
@@ -621,6 +623,8 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
621 if (!gid_valid(exp.ex_anon_gid)) 623 if (!gid_valid(exp.ex_anon_gid))
622 goto out4; 624 goto out4;
623 err = 0; 625 err = 0;
626
627 nfsd4_setup_layout_type(&exp);
624 } 628 }
625 629
626 expp = svc_export_lookup(&exp); 630 expp = svc_export_lookup(&exp);
@@ -703,6 +707,7 @@ static void svc_export_init(struct cache_head *cnew, struct cache_head *citem)
703 new->ex_fslocs.locations = NULL; 707 new->ex_fslocs.locations = NULL;
704 new->ex_fslocs.locations_count = 0; 708 new->ex_fslocs.locations_count = 0;
705 new->ex_fslocs.migrated = 0; 709 new->ex_fslocs.migrated = 0;
710 new->ex_layout_type = 0;
706 new->ex_uuid = NULL; 711 new->ex_uuid = NULL;
707 new->cd = item->cd; 712 new->cd = item->cd;
708} 713}
@@ -717,6 +722,8 @@ static void export_update(struct cache_head *cnew, struct cache_head *citem)
717 new->ex_anon_uid = item->ex_anon_uid; 722 new->ex_anon_uid = item->ex_anon_uid;
718 new->ex_anon_gid = item->ex_anon_gid; 723 new->ex_anon_gid = item->ex_anon_gid;
719 new->ex_fsid = item->ex_fsid; 724 new->ex_fsid = item->ex_fsid;
725 new->ex_devid_map = item->ex_devid_map;
726 item->ex_devid_map = NULL;
720 new->ex_uuid = item->ex_uuid; 727 new->ex_uuid = item->ex_uuid;
721 item->ex_uuid = NULL; 728 item->ex_uuid = NULL;
722 new->ex_fslocs.locations = item->ex_fslocs.locations; 729 new->ex_fslocs.locations = item->ex_fslocs.locations;
@@ -725,6 +732,7 @@ static void export_update(struct cache_head *cnew, struct cache_head *citem)
725 item->ex_fslocs.locations_count = 0; 732 item->ex_fslocs.locations_count = 0;
726 new->ex_fslocs.migrated = item->ex_fslocs.migrated; 733 new->ex_fslocs.migrated = item->ex_fslocs.migrated;
727 item->ex_fslocs.migrated = 0; 734 item->ex_fslocs.migrated = 0;
735 new->ex_layout_type = item->ex_layout_type;
728 new->ex_nflavors = item->ex_nflavors; 736 new->ex_nflavors = item->ex_nflavors;
729 for (i = 0; i < MAX_SECINFO_LIST; i++) { 737 for (i = 0; i < MAX_SECINFO_LIST; i++) {
730 new->ex_flavors[i] = item->ex_flavors[i]; 738 new->ex_flavors[i] = item->ex_flavors[i];
diff --git a/fs/nfsd/export.h b/fs/nfsd/export.h
index 04dc8c167b0c..1f52bfcc436f 100644
--- a/fs/nfsd/export.h
+++ b/fs/nfsd/export.h
@@ -56,6 +56,8 @@ struct svc_export {
56 struct nfsd4_fs_locations ex_fslocs; 56 struct nfsd4_fs_locations ex_fslocs;
57 uint32_t ex_nflavors; 57 uint32_t ex_nflavors;
58 struct exp_flavor_info ex_flavors[MAX_SECINFO_LIST]; 58 struct exp_flavor_info ex_flavors[MAX_SECINFO_LIST];
59 enum pnfs_layouttype ex_layout_type;
60 struct nfsd4_deviceid_map *ex_devid_map;
59 struct cache_detail *cd; 61 struct cache_detail *cd;
60}; 62};
61 63
diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
new file mode 100644
index 000000000000..8273270418b1
--- /dev/null
+++ b/fs/nfsd/nfs4layouts.c
@@ -0,0 +1,487 @@
1/*
2 * Copyright (c) 2014 Christoph Hellwig.
3 */
4#include <linux/jhash.h>
5#include <linux/sched.h>
6
7#include "pnfs.h"
8#include "netns.h"
9
10#define NFSDDBG_FACILITY NFSDDBG_PNFS
11
12struct nfs4_layout {
13 struct list_head lo_perstate;
14 struct nfs4_layout_stateid *lo_state;
15 struct nfsd4_layout_seg lo_seg;
16};
17
18static struct kmem_cache *nfs4_layout_cache;
19static struct kmem_cache *nfs4_layout_stateid_cache;
20
21const struct nfsd4_layout_ops *nfsd4_layout_ops[LAYOUT_TYPE_MAX] = {
22};
23
24/* pNFS device ID to export fsid mapping */
25#define DEVID_HASH_BITS 8
26#define DEVID_HASH_SIZE (1 << DEVID_HASH_BITS)
27#define DEVID_HASH_MASK (DEVID_HASH_SIZE - 1)
28static u64 nfsd_devid_seq = 1;
29static struct list_head nfsd_devid_hash[DEVID_HASH_SIZE];
30static DEFINE_SPINLOCK(nfsd_devid_lock);
31
32static inline u32 devid_hashfn(u64 idx)
33{
34 return jhash_2words(idx, idx >> 32, 0) & DEVID_HASH_MASK;
35}
36
37static void
38nfsd4_alloc_devid_map(const struct svc_fh *fhp)
39{
40 const struct knfsd_fh *fh = &fhp->fh_handle;
41 size_t fsid_len = key_len(fh->fh_fsid_type);
42 struct nfsd4_deviceid_map *map, *old;
43 int i;
44
45 map = kzalloc(sizeof(*map) + fsid_len, GFP_KERNEL);
46 if (!map)
47 return;
48
49 map->fsid_type = fh->fh_fsid_type;
50 memcpy(&map->fsid, fh->fh_fsid, fsid_len);
51
52 spin_lock(&nfsd_devid_lock);
53 if (fhp->fh_export->ex_devid_map)
54 goto out_unlock;
55
56 for (i = 0; i < DEVID_HASH_SIZE; i++) {
57 list_for_each_entry(old, &nfsd_devid_hash[i], hash) {
58 if (old->fsid_type != fh->fh_fsid_type)
59 continue;
60 if (memcmp(old->fsid, fh->fh_fsid,
61 key_len(old->fsid_type)))
62 continue;
63
64 fhp->fh_export->ex_devid_map = old;
65 goto out_unlock;
66 }
67 }
68
69 map->idx = nfsd_devid_seq++;
70 list_add_tail_rcu(&map->hash, &nfsd_devid_hash[devid_hashfn(map->idx)]);
71 fhp->fh_export->ex_devid_map = map;
72 map = NULL;
73
74out_unlock:
75 spin_unlock(&nfsd_devid_lock);
76 kfree(map);
77}
78
79struct nfsd4_deviceid_map *
80nfsd4_find_devid_map(int idx)
81{
82 struct nfsd4_deviceid_map *map, *ret = NULL;
83
84 rcu_read_lock();
85 list_for_each_entry_rcu(map, &nfsd_devid_hash[devid_hashfn(idx)], hash)
86 if (map->idx == idx)
87 ret = map;
88 rcu_read_unlock();
89
90 return ret;
91}
92
93int
94nfsd4_set_deviceid(struct nfsd4_deviceid *id, const struct svc_fh *fhp,
95 u32 device_generation)
96{
97 if (!fhp->fh_export->ex_devid_map) {
98 nfsd4_alloc_devid_map(fhp);
99 if (!fhp->fh_export->ex_devid_map)
100 return -ENOMEM;
101 }
102
103 id->fsid_idx = fhp->fh_export->ex_devid_map->idx;
104 id->generation = device_generation;
105 id->pad = 0;
106 return 0;
107}
108
109void nfsd4_setup_layout_type(struct svc_export *exp)
110{
111 if (exp->ex_flags & NFSEXP_NOPNFS)
112 return;
113}
114
115static void
116nfsd4_free_layout_stateid(struct nfs4_stid *stid)
117{
118 struct nfs4_layout_stateid *ls = layoutstateid(stid);
119 struct nfs4_client *clp = ls->ls_stid.sc_client;
120 struct nfs4_file *fp = ls->ls_stid.sc_file;
121
122 spin_lock(&clp->cl_lock);
123 list_del_init(&ls->ls_perclnt);
124 spin_unlock(&clp->cl_lock);
125
126 spin_lock(&fp->fi_lock);
127 list_del_init(&ls->ls_perfile);
128 spin_unlock(&fp->fi_lock);
129
130 kmem_cache_free(nfs4_layout_stateid_cache, ls);
131}
132
133static struct nfs4_layout_stateid *
134nfsd4_alloc_layout_stateid(struct nfsd4_compound_state *cstate,
135 struct nfs4_stid *parent, u32 layout_type)
136{
137 struct nfs4_client *clp = cstate->clp;
138 struct nfs4_file *fp = parent->sc_file;
139 struct nfs4_layout_stateid *ls;
140 struct nfs4_stid *stp;
141
142 stp = nfs4_alloc_stid(cstate->clp, nfs4_layout_stateid_cache);
143 if (!stp)
144 return NULL;
145 stp->sc_free = nfsd4_free_layout_stateid;
146 get_nfs4_file(fp);
147 stp->sc_file = fp;
148
149 ls = layoutstateid(stp);
150 INIT_LIST_HEAD(&ls->ls_perclnt);
151 INIT_LIST_HEAD(&ls->ls_perfile);
152 spin_lock_init(&ls->ls_lock);
153 INIT_LIST_HEAD(&ls->ls_layouts);
154 ls->ls_layout_type = layout_type;
155
156 spin_lock(&clp->cl_lock);
157 stp->sc_type = NFS4_LAYOUT_STID;
158 list_add(&ls->ls_perclnt, &clp->cl_lo_states);
159 spin_unlock(&clp->cl_lock);
160
161 spin_lock(&fp->fi_lock);
162 list_add(&ls->ls_perfile, &fp->fi_lo_states);
163 spin_unlock(&fp->fi_lock);
164
165 return ls;
166}
167
168__be32
169nfsd4_preprocess_layout_stateid(struct svc_rqst *rqstp,
170 struct nfsd4_compound_state *cstate, stateid_t *stateid,
171 bool create, u32 layout_type, struct nfs4_layout_stateid **lsp)
172{
173 struct nfs4_layout_stateid *ls;
174 struct nfs4_stid *stid;
175 unsigned char typemask = NFS4_LAYOUT_STID;
176 __be32 status;
177
178 if (create)
179 typemask |= (NFS4_OPEN_STID | NFS4_LOCK_STID | NFS4_DELEG_STID);
180
181 status = nfsd4_lookup_stateid(cstate, stateid, typemask, &stid,
182 net_generic(SVC_NET(rqstp), nfsd_net_id));
183 if (status)
184 goto out;
185
186 if (!fh_match(&cstate->current_fh.fh_handle,
187 &stid->sc_file->fi_fhandle)) {
188 status = nfserr_bad_stateid;
189 goto out_put_stid;
190 }
191
192 if (stid->sc_type != NFS4_LAYOUT_STID) {
193 ls = nfsd4_alloc_layout_stateid(cstate, stid, layout_type);
194 nfs4_put_stid(stid);
195
196 status = nfserr_jukebox;
197 if (!ls)
198 goto out;
199 } else {
200 ls = container_of(stid, struct nfs4_layout_stateid, ls_stid);
201
202 status = nfserr_bad_stateid;
203 if (stateid->si_generation > stid->sc_stateid.si_generation)
204 goto out_put_stid;
205 if (layout_type != ls->ls_layout_type)
206 goto out_put_stid;
207 }
208
209 *lsp = ls;
210 return 0;
211
212out_put_stid:
213 nfs4_put_stid(stid);
214out:
215 return status;
216}
217
218static inline u64
219layout_end(struct nfsd4_layout_seg *seg)
220{
221 u64 end = seg->offset + seg->length;
222 return end >= seg->offset ? end : NFS4_MAX_UINT64;
223}
224
225static void
226layout_update_len(struct nfsd4_layout_seg *lo, u64 end)
227{
228 if (end == NFS4_MAX_UINT64)
229 lo->length = NFS4_MAX_UINT64;
230 else
231 lo->length = end - lo->offset;
232}
233
234static bool
235layouts_overlapping(struct nfs4_layout *lo, struct nfsd4_layout_seg *s)
236{
237 if (s->iomode != IOMODE_ANY && s->iomode != lo->lo_seg.iomode)
238 return false;
239 if (layout_end(&lo->lo_seg) <= s->offset)
240 return false;
241 if (layout_end(s) <= lo->lo_seg.offset)
242 return false;
243 return true;
244}
245
246static bool
247layouts_try_merge(struct nfsd4_layout_seg *lo, struct nfsd4_layout_seg *new)
248{
249 if (lo->iomode != new->iomode)
250 return false;
251 if (layout_end(new) < lo->offset)
252 return false;
253 if (layout_end(lo) < new->offset)
254 return false;
255
256 lo->offset = min(lo->offset, new->offset);
257 layout_update_len(lo, max(layout_end(lo), layout_end(new)));
258 return true;
259}
260
261__be32
262nfsd4_insert_layout(struct nfsd4_layoutget *lgp, struct nfs4_layout_stateid *ls)
263{
264 struct nfsd4_layout_seg *seg = &lgp->lg_seg;
265 struct nfs4_layout *lp, *new = NULL;
266
267 spin_lock(&ls->ls_lock);
268 list_for_each_entry(lp, &ls->ls_layouts, lo_perstate) {
269 if (layouts_try_merge(&lp->lo_seg, seg))
270 goto done;
271 }
272 spin_unlock(&ls->ls_lock);
273
274 new = kmem_cache_alloc(nfs4_layout_cache, GFP_KERNEL);
275 if (!new)
276 return nfserr_jukebox;
277 memcpy(&new->lo_seg, seg, sizeof(lp->lo_seg));
278 new->lo_state = ls;
279
280 spin_lock(&ls->ls_lock);
281 list_for_each_entry(lp, &ls->ls_layouts, lo_perstate) {
282 if (layouts_try_merge(&lp->lo_seg, seg))
283 goto done;
284 }
285
286 atomic_inc(&ls->ls_stid.sc_count);
287 list_add_tail(&new->lo_perstate, &ls->ls_layouts);
288 new = NULL;
289done:
290 update_stateid(&ls->ls_stid.sc_stateid);
291 memcpy(&lgp->lg_sid, &ls->ls_stid.sc_stateid, sizeof(stateid_t));
292 spin_unlock(&ls->ls_lock);
293 if (new)
294 kmem_cache_free(nfs4_layout_cache, new);
295 return nfs_ok;
296}
297
298static void
299nfsd4_free_layouts(struct list_head *reaplist)
300{
301 while (!list_empty(reaplist)) {
302 struct nfs4_layout *lp = list_first_entry(reaplist,
303 struct nfs4_layout, lo_perstate);
304
305 list_del(&lp->lo_perstate);
306 nfs4_put_stid(&lp->lo_state->ls_stid);
307 kmem_cache_free(nfs4_layout_cache, lp);
308 }
309}
310
311static void
312nfsd4_return_file_layout(struct nfs4_layout *lp, struct nfsd4_layout_seg *seg,
313 struct list_head *reaplist)
314{
315 struct nfsd4_layout_seg *lo = &lp->lo_seg;
316 u64 end = layout_end(lo);
317
318 if (seg->offset <= lo->offset) {
319 if (layout_end(seg) >= end) {
320 list_move_tail(&lp->lo_perstate, reaplist);
321 return;
322 }
323 end = seg->offset;
324 } else {
325 /* retain the whole layout segment on a split. */
326 if (layout_end(seg) < end) {
327 dprintk("%s: split not supported\n", __func__);
328 return;
329 }
330
331 lo->offset = layout_end(seg);
332 }
333
334 layout_update_len(lo, end);
335}
336
337__be32
338nfsd4_return_file_layouts(struct svc_rqst *rqstp,
339 struct nfsd4_compound_state *cstate,
340 struct nfsd4_layoutreturn *lrp)
341{
342 struct nfs4_layout_stateid *ls;
343 struct nfs4_layout *lp, *n;
344 LIST_HEAD(reaplist);
345 __be32 nfserr;
346 int found = 0;
347
348 nfserr = nfsd4_preprocess_layout_stateid(rqstp, cstate, &lrp->lr_sid,
349 false, lrp->lr_layout_type,
350 &ls);
351 if (nfserr)
352 return nfserr;
353
354 spin_lock(&ls->ls_lock);
355 list_for_each_entry_safe(lp, n, &ls->ls_layouts, lo_perstate) {
356 if (layouts_overlapping(lp, &lrp->lr_seg)) {
357 nfsd4_return_file_layout(lp, &lrp->lr_seg, &reaplist);
358 found++;
359 }
360 }
361 if (!list_empty(&ls->ls_layouts)) {
362 if (found) {
363 update_stateid(&ls->ls_stid.sc_stateid);
364 memcpy(&lrp->lr_sid, &ls->ls_stid.sc_stateid,
365 sizeof(stateid_t));
366 }
367 lrp->lrs_present = 1;
368 } else {
369 nfs4_unhash_stid(&ls->ls_stid);
370 lrp->lrs_present = 0;
371 }
372 spin_unlock(&ls->ls_lock);
373
374 nfs4_put_stid(&ls->ls_stid);
375 nfsd4_free_layouts(&reaplist);
376 return nfs_ok;
377}
378
379__be32
380nfsd4_return_client_layouts(struct svc_rqst *rqstp,
381 struct nfsd4_compound_state *cstate,
382 struct nfsd4_layoutreturn *lrp)
383{
384 struct nfs4_layout_stateid *ls, *n;
385 struct nfs4_client *clp = cstate->clp;
386 struct nfs4_layout *lp, *t;
387 LIST_HEAD(reaplist);
388
389 lrp->lrs_present = 0;
390
391 spin_lock(&clp->cl_lock);
392 list_for_each_entry_safe(ls, n, &clp->cl_lo_states, ls_perclnt) {
393 if (lrp->lr_return_type == RETURN_FSID &&
394 !fh_fsid_match(&ls->ls_stid.sc_file->fi_fhandle,
395 &cstate->current_fh.fh_handle))
396 continue;
397
398 spin_lock(&ls->ls_lock);
399 list_for_each_entry_safe(lp, t, &ls->ls_layouts, lo_perstate) {
400 if (lrp->lr_seg.iomode == IOMODE_ANY ||
401 lrp->lr_seg.iomode == lp->lo_seg.iomode)
402 list_move_tail(&lp->lo_perstate, &reaplist);
403 }
404 spin_unlock(&ls->ls_lock);
405 }
406 spin_unlock(&clp->cl_lock);
407
408 nfsd4_free_layouts(&reaplist);
409 return 0;
410}
411
412static void
413nfsd4_return_all_layouts(struct nfs4_layout_stateid *ls,
414 struct list_head *reaplist)
415{
416 spin_lock(&ls->ls_lock);
417 list_splice_init(&ls->ls_layouts, reaplist);
418 spin_unlock(&ls->ls_lock);
419}
420
421void
422nfsd4_return_all_client_layouts(struct nfs4_client *clp)
423{
424 struct nfs4_layout_stateid *ls, *n;
425 LIST_HEAD(reaplist);
426
427 spin_lock(&clp->cl_lock);
428 list_for_each_entry_safe(ls, n, &clp->cl_lo_states, ls_perclnt)
429 nfsd4_return_all_layouts(ls, &reaplist);
430 spin_unlock(&clp->cl_lock);
431
432 nfsd4_free_layouts(&reaplist);
433}
434
435void
436nfsd4_return_all_file_layouts(struct nfs4_client *clp, struct nfs4_file *fp)
437{
438 struct nfs4_layout_stateid *ls, *n;
439 LIST_HEAD(reaplist);
440
441 spin_lock(&fp->fi_lock);
442 list_for_each_entry_safe(ls, n, &fp->fi_lo_states, ls_perfile) {
443 if (ls->ls_stid.sc_client == clp)
444 nfsd4_return_all_layouts(ls, &reaplist);
445 }
446 spin_unlock(&fp->fi_lock);
447
448 nfsd4_free_layouts(&reaplist);
449}
450
451int
452nfsd4_init_pnfs(void)
453{
454 int i;
455
456 for (i = 0; i < DEVID_HASH_SIZE; i++)
457 INIT_LIST_HEAD(&nfsd_devid_hash[i]);
458
459 nfs4_layout_cache = kmem_cache_create("nfs4_layout",
460 sizeof(struct nfs4_layout), 0, 0, NULL);
461 if (!nfs4_layout_cache)
462 return -ENOMEM;
463
464 nfs4_layout_stateid_cache = kmem_cache_create("nfs4_layout_stateid",
465 sizeof(struct nfs4_layout_stateid), 0, 0, NULL);
466 if (!nfs4_layout_stateid_cache) {
467 kmem_cache_destroy(nfs4_layout_cache);
468 return -ENOMEM;
469 }
470 return 0;
471}
472
473void
474nfsd4_exit_pnfs(void)
475{
476 int i;
477
478 kmem_cache_destroy(nfs4_layout_cache);
479 kmem_cache_destroy(nfs4_layout_stateid_cache);
480
481 for (i = 0; i < DEVID_HASH_SIZE; i++) {
482 struct nfsd4_deviceid_map *map, *n;
483
484 list_for_each_entry_safe(map, n, &nfsd_devid_hash[i], hash)
485 kfree(map);
486 }
487}
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index ac71d13c69ef..2b91443497cc 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -43,6 +43,7 @@
43#include "current_stateid.h" 43#include "current_stateid.h"
44#include "netns.h" 44#include "netns.h"
45#include "acl.h" 45#include "acl.h"
46#include "pnfs.h"
46 47
47#ifdef CONFIG_NFSD_V4_SECURITY_LABEL 48#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
48#include <linux/security.h> 49#include <linux/security.h>
@@ -1178,6 +1179,252 @@ nfsd4_verify(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
1178 return status == nfserr_same ? nfs_ok : status; 1179 return status == nfserr_same ? nfs_ok : status;
1179} 1180}
1180 1181
1182#ifdef CONFIG_NFSD_PNFS
1183static const struct nfsd4_layout_ops *
1184nfsd4_layout_verify(struct svc_export *exp, unsigned int layout_type)
1185{
1186 if (!exp->ex_layout_type) {
1187 dprintk("%s: export does not support pNFS\n", __func__);
1188 return NULL;
1189 }
1190
1191 if (exp->ex_layout_type != layout_type) {
1192 dprintk("%s: layout type %d not supported\n",
1193 __func__, layout_type);
1194 return NULL;
1195 }
1196
1197 return nfsd4_layout_ops[layout_type];
1198}
1199
1200static __be32
1201nfsd4_getdeviceinfo(struct svc_rqst *rqstp,
1202 struct nfsd4_compound_state *cstate,
1203 struct nfsd4_getdeviceinfo *gdp)
1204{
1205 const struct nfsd4_layout_ops *ops;
1206 struct nfsd4_deviceid_map *map;
1207 struct svc_export *exp;
1208 __be32 nfserr;
1209
1210 dprintk("%s: layout_type %u dev_id [0x%llx:0x%x] maxcnt %u\n",
1211 __func__,
1212 gdp->gd_layout_type,
1213 gdp->gd_devid.fsid_idx, gdp->gd_devid.generation,
1214 gdp->gd_maxcount);
1215
1216 map = nfsd4_find_devid_map(gdp->gd_devid.fsid_idx);
1217 if (!map) {
1218 dprintk("%s: couldn't find device ID to export mapping!\n",
1219 __func__);
1220 return nfserr_noent;
1221 }
1222
1223 exp = rqst_exp_find(rqstp, map->fsid_type, map->fsid);
1224 if (IS_ERR(exp)) {
1225 dprintk("%s: could not find device id\n", __func__);
1226 return nfserr_noent;
1227 }
1228
1229 nfserr = nfserr_layoutunavailable;
1230 ops = nfsd4_layout_verify(exp, gdp->gd_layout_type);
1231 if (!ops)
1232 goto out;
1233
1234 nfserr = nfs_ok;
1235 if (gdp->gd_maxcount != 0)
1236 nfserr = ops->proc_getdeviceinfo(exp->ex_path.mnt->mnt_sb, gdp);
1237
1238 gdp->gd_notify_types &= ops->notify_types;
1239 exp_put(exp);
1240out:
1241 return nfserr;
1242}
1243
1244static __be32
1245nfsd4_layoutget(struct svc_rqst *rqstp,
1246 struct nfsd4_compound_state *cstate,
1247 struct nfsd4_layoutget *lgp)
1248{
1249 struct svc_fh *current_fh = &cstate->current_fh;
1250 const struct nfsd4_layout_ops *ops;
1251 struct nfs4_layout_stateid *ls;
1252 __be32 nfserr;
1253 int accmode;
1254
1255 switch (lgp->lg_seg.iomode) {
1256 case IOMODE_READ:
1257 accmode = NFSD_MAY_READ;
1258 break;
1259 case IOMODE_RW:
1260 accmode = NFSD_MAY_READ | NFSD_MAY_WRITE;
1261 break;
1262 default:
1263 dprintk("%s: invalid iomode %d\n",
1264 __func__, lgp->lg_seg.iomode);
1265 nfserr = nfserr_badiomode;
1266 goto out;
1267 }
1268
1269 nfserr = fh_verify(rqstp, current_fh, 0, accmode);
1270 if (nfserr)
1271 goto out;
1272
1273 nfserr = nfserr_layoutunavailable;
1274 ops = nfsd4_layout_verify(current_fh->fh_export, lgp->lg_layout_type);
1275 if (!ops)
1276 goto out;
1277
1278 /*
1279 * Verify minlength and range as per RFC5661:
1280 * o If loga_length is less than loga_minlength,
1281 * the metadata server MUST return NFS4ERR_INVAL.
1282 * o If the sum of loga_offset and loga_minlength exceeds
1283 * NFS4_UINT64_MAX, and loga_minlength is not
1284 * NFS4_UINT64_MAX, the error NFS4ERR_INVAL MUST result.
1285 * o If the sum of loga_offset and loga_length exceeds
1286 * NFS4_UINT64_MAX, and loga_length is not NFS4_UINT64_MAX,
1287 * the error NFS4ERR_INVAL MUST result.
1288 */
1289 nfserr = nfserr_inval;
1290 if (lgp->lg_seg.length < lgp->lg_minlength ||
1291 (lgp->lg_minlength != NFS4_MAX_UINT64 &&
1292 lgp->lg_minlength > NFS4_MAX_UINT64 - lgp->lg_seg.offset) ||
1293 (lgp->lg_seg.length != NFS4_MAX_UINT64 &&
1294 lgp->lg_seg.length > NFS4_MAX_UINT64 - lgp->lg_seg.offset))
1295 goto out;
1296 if (lgp->lg_seg.length == 0)
1297 goto out;
1298
1299 nfserr = nfsd4_preprocess_layout_stateid(rqstp, cstate, &lgp->lg_sid,
1300 true, lgp->lg_layout_type, &ls);
1301 if (nfserr)
1302 goto out;
1303
1304 nfserr = ops->proc_layoutget(current_fh->fh_dentry->d_inode,
1305 current_fh, lgp);
1306 if (nfserr)
1307 goto out_put_stid;
1308
1309 nfserr = nfsd4_insert_layout(lgp, ls);
1310
1311out_put_stid:
1312 nfs4_put_stid(&ls->ls_stid);
1313out:
1314 return nfserr;
1315}
1316
1317static __be32
1318nfsd4_layoutcommit(struct svc_rqst *rqstp,
1319 struct nfsd4_compound_state *cstate,
1320 struct nfsd4_layoutcommit *lcp)
1321{
1322 const struct nfsd4_layout_seg *seg = &lcp->lc_seg;
1323 struct svc_fh *current_fh = &cstate->current_fh;
1324 const struct nfsd4_layout_ops *ops;
1325 loff_t new_size = lcp->lc_last_wr + 1;
1326 struct inode *inode;
1327 struct nfs4_layout_stateid *ls;
1328 __be32 nfserr;
1329
1330 nfserr = fh_verify(rqstp, current_fh, 0, NFSD_MAY_WRITE);
1331 if (nfserr)
1332 goto out;
1333
1334 nfserr = nfserr_layoutunavailable;
1335 ops = nfsd4_layout_verify(current_fh->fh_export, lcp->lc_layout_type);
1336 if (!ops)
1337 goto out;
1338 inode = current_fh->fh_dentry->d_inode;
1339
1340 nfserr = nfserr_inval;
1341 if (new_size <= seg->offset) {
1342 dprintk("pnfsd: last write before layout segment\n");
1343 goto out;
1344 }
1345 if (new_size > seg->offset + seg->length) {
1346 dprintk("pnfsd: last write beyond layout segment\n");
1347 goto out;
1348 }
1349 if (!lcp->lc_newoffset && new_size > i_size_read(inode)) {
1350 dprintk("pnfsd: layoutcommit beyond EOF\n");
1351 goto out;
1352 }
1353
1354 nfserr = nfsd4_preprocess_layout_stateid(rqstp, cstate, &lcp->lc_sid,
1355 false, lcp->lc_layout_type,
1356 &ls);
1357 if (nfserr) {
1358 /* fixup error code as per RFC5661 */
1359 if (nfserr == nfserr_bad_stateid)
1360 nfserr = nfserr_badlayout;
1361 goto out;
1362 }
1363
1364 nfserr = ops->proc_layoutcommit(inode, lcp);
1365 if (nfserr)
1366 goto out_put_stid;
1367
1368 if (new_size > i_size_read(inode)) {
1369 lcp->lc_size_chg = 1;
1370 lcp->lc_newsize = new_size;
1371 } else {
1372 lcp->lc_size_chg = 0;
1373 }
1374
1375out_put_stid:
1376 nfs4_put_stid(&ls->ls_stid);
1377out:
1378 return nfserr;
1379}
1380
1381static __be32
1382nfsd4_layoutreturn(struct svc_rqst *rqstp,
1383 struct nfsd4_compound_state *cstate,
1384 struct nfsd4_layoutreturn *lrp)
1385{
1386 struct svc_fh *current_fh = &cstate->current_fh;
1387 __be32 nfserr;
1388
1389 nfserr = fh_verify(rqstp, current_fh, 0, NFSD_MAY_NOP);
1390 if (nfserr)
1391 goto out;
1392
1393 nfserr = nfserr_layoutunavailable;
1394 if (!nfsd4_layout_verify(current_fh->fh_export, lrp->lr_layout_type))
1395 goto out;
1396
1397 switch (lrp->lr_seg.iomode) {
1398 case IOMODE_READ:
1399 case IOMODE_RW:
1400 case IOMODE_ANY:
1401 break;
1402 default:
1403 dprintk("%s: invalid iomode %d\n", __func__,
1404 lrp->lr_seg.iomode);
1405 nfserr = nfserr_inval;
1406 goto out;
1407 }
1408
1409 switch (lrp->lr_return_type) {
1410 case RETURN_FILE:
1411 nfserr = nfsd4_return_file_layouts(rqstp, cstate, lrp);
1412 break;
1413 case RETURN_FSID:
1414 case RETURN_ALL:
1415 nfserr = nfsd4_return_client_layouts(rqstp, cstate, lrp);
1416 break;
1417 default:
1418 dprintk("%s: invalid return_type %d\n", __func__,
1419 lrp->lr_return_type);
1420 nfserr = nfserr_inval;
1421 break;
1422 }
1423out:
1424 return nfserr;
1425}
1426#endif /* CONFIG_NFSD_PNFS */
1427
1181/* 1428/*
1182 * NULL call. 1429 * NULL call.
1183 */ 1430 */
@@ -1679,6 +1926,36 @@ static inline u32 nfsd4_create_session_rsize(struct svc_rqst *rqstp, struct nfsd
1679 op_encode_channel_attrs_maxsz) * sizeof(__be32); 1926 op_encode_channel_attrs_maxsz) * sizeof(__be32);
1680} 1927}
1681 1928
1929#ifdef CONFIG_NFSD_PNFS
1930/*
1931 * At this stage we don't really know what layout driver will handle the request,
1932 * so we need to define an arbitrary upper bound here.
1933 */
1934#define MAX_LAYOUT_SIZE 128
1935static inline u32 nfsd4_layoutget_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
1936{
1937 return (op_encode_hdr_size +
1938 1 /* logr_return_on_close */ +
1939 op_encode_stateid_maxsz +
1940 1 /* nr of layouts */ +
1941 MAX_LAYOUT_SIZE) * sizeof(__be32);
1942}
1943
1944static inline u32 nfsd4_layoutcommit_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
1945{
1946 return (op_encode_hdr_size +
1947 1 /* locr_newsize */ +
1948 2 /* ns_size */) * sizeof(__be32);
1949}
1950
1951static inline u32 nfsd4_layoutreturn_rsize(struct svc_rqst *rqstp, struct nfsd4_op *op)
1952{
1953 return (op_encode_hdr_size +
1954 1 /* lrs_stateid */ +
1955 op_encode_stateid_maxsz) * sizeof(__be32);
1956}
1957#endif /* CONFIG_NFSD_PNFS */
1958
1682static struct nfsd4_operation nfsd4_ops[] = { 1959static struct nfsd4_operation nfsd4_ops[] = {
1683 [OP_ACCESS] = { 1960 [OP_ACCESS] = {
1684 .op_func = (nfsd4op_func)nfsd4_access, 1961 .op_func = (nfsd4op_func)nfsd4_access,
@@ -1966,6 +2243,31 @@ static struct nfsd4_operation nfsd4_ops[] = {
1966 .op_get_currentstateid = (stateid_getter)nfsd4_get_freestateid, 2243 .op_get_currentstateid = (stateid_getter)nfsd4_get_freestateid,
1967 .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize, 2244 .op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
1968 }, 2245 },
2246#ifdef CONFIG_NFSD_PNFS
2247 [OP_GETDEVICEINFO] = {
2248 .op_func = (nfsd4op_func)nfsd4_getdeviceinfo,
2249 .op_flags = ALLOWED_WITHOUT_FH,
2250 .op_name = "OP_GETDEVICEINFO",
2251 },
2252 [OP_LAYOUTGET] = {
2253 .op_func = (nfsd4op_func)nfsd4_layoutget,
2254 .op_flags = OP_MODIFIES_SOMETHING,
2255 .op_name = "OP_LAYOUTGET",
2256 .op_rsize_bop = (nfsd4op_rsize)nfsd4_layoutget_rsize,
2257 },
2258 [OP_LAYOUTCOMMIT] = {
2259 .op_func = (nfsd4op_func)nfsd4_layoutcommit,
2260 .op_flags = OP_MODIFIES_SOMETHING,
2261 .op_name = "OP_LAYOUTCOMMIT",
2262 .op_rsize_bop = (nfsd4op_rsize)nfsd4_layoutcommit_rsize,
2263 },
2264 [OP_LAYOUTRETURN] = {
2265 .op_func = (nfsd4op_func)nfsd4_layoutreturn,
2266 .op_flags = OP_MODIFIES_SOMETHING,
2267 .op_name = "OP_LAYOUTRETURN",
2268 .op_rsize_bop = (nfsd4op_rsize)nfsd4_layoutreturn_rsize,
2269 },
2270#endif /* CONFIG_NFSD_PNFS */
1969 2271
1970 /* NFSv4.2 operations */ 2272 /* NFSv4.2 operations */
1971 [OP_ALLOCATE] = { 2273 [OP_ALLOCATE] = {
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index eefd29ec43f2..c89f79dc69e2 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -48,6 +48,7 @@
48#include "current_stateid.h" 48#include "current_stateid.h"
49 49
50#include "netns.h" 50#include "netns.h"
51#include "pnfs.h"
51 52
52#define NFSDDBG_FACILITY NFSDDBG_PROC 53#define NFSDDBG_FACILITY NFSDDBG_PROC
53 54
@@ -1539,6 +1540,9 @@ static struct nfs4_client *alloc_client(struct xdr_netobj name)
1539 INIT_LIST_HEAD(&clp->cl_lru); 1540 INIT_LIST_HEAD(&clp->cl_lru);
1540 INIT_LIST_HEAD(&clp->cl_callbacks); 1541 INIT_LIST_HEAD(&clp->cl_callbacks);
1541 INIT_LIST_HEAD(&clp->cl_revoked); 1542 INIT_LIST_HEAD(&clp->cl_revoked);
1543#ifdef CONFIG_NFSD_PNFS
1544 INIT_LIST_HEAD(&clp->cl_lo_states);
1545#endif
1542 spin_lock_init(&clp->cl_lock); 1546 spin_lock_init(&clp->cl_lock);
1543 rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table"); 1547 rpc_init_wait_queue(&clp->cl_cb_waitq, "Backchannel slot table");
1544 return clp; 1548 return clp;
@@ -1643,6 +1647,7 @@ __destroy_client(struct nfs4_client *clp)
1643 nfs4_get_stateowner(&oo->oo_owner); 1647 nfs4_get_stateowner(&oo->oo_owner);
1644 release_openowner(oo); 1648 release_openowner(oo);
1645 } 1649 }
1650 nfsd4_return_all_client_layouts(clp);
1646 nfsd4_shutdown_callback(clp); 1651 nfsd4_shutdown_callback(clp);
1647 if (clp->cl_cb_conn.cb_xprt) 1652 if (clp->cl_cb_conn.cb_xprt)
1648 svc_xprt_put(clp->cl_cb_conn.cb_xprt); 1653 svc_xprt_put(clp->cl_cb_conn.cb_xprt);
@@ -2126,8 +2131,11 @@ nfsd4_replay_cache_entry(struct nfsd4_compoundres *resp,
2126static void 2131static void
2127nfsd4_set_ex_flags(struct nfs4_client *new, struct nfsd4_exchange_id *clid) 2132nfsd4_set_ex_flags(struct nfs4_client *new, struct nfsd4_exchange_id *clid)
2128{ 2133{
2129 /* pNFS is not supported */ 2134#ifdef CONFIG_NFSD_PNFS
2135 new->cl_exchange_flags |= EXCHGID4_FLAG_USE_PNFS_MDS;
2136#else
2130 new->cl_exchange_flags |= EXCHGID4_FLAG_USE_NON_PNFS; 2137 new->cl_exchange_flags |= EXCHGID4_FLAG_USE_NON_PNFS;
2138#endif
2131 2139
2132 /* Referrals are supported, Migration is not. */ 2140 /* Referrals are supported, Migration is not. */
2133 new->cl_exchange_flags |= EXCHGID4_FLAG_SUPP_MOVED_REFER; 2141 new->cl_exchange_flags |= EXCHGID4_FLAG_SUPP_MOVED_REFER;
@@ -3055,6 +3063,9 @@ static void nfsd4_init_file(struct knfsd_fh *fh, unsigned int hashval,
3055 fp->fi_share_deny = 0; 3063 fp->fi_share_deny = 0;
3056 memset(fp->fi_fds, 0, sizeof(fp->fi_fds)); 3064 memset(fp->fi_fds, 0, sizeof(fp->fi_fds));
3057 memset(fp->fi_access, 0, sizeof(fp->fi_access)); 3065 memset(fp->fi_access, 0, sizeof(fp->fi_access));
3066#ifdef CONFIG_NFSD_PNFS
3067 INIT_LIST_HEAD(&fp->fi_lo_states);
3068#endif
3058 hlist_add_head_rcu(&fp->fi_hash, &file_hashtbl[hashval]); 3069 hlist_add_head_rcu(&fp->fi_hash, &file_hashtbl[hashval]);
3059} 3070}
3060 3071
@@ -4841,6 +4852,9 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
4841 update_stateid(&stp->st_stid.sc_stateid); 4852 update_stateid(&stp->st_stid.sc_stateid);
4842 memcpy(&close->cl_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t)); 4853 memcpy(&close->cl_stateid, &stp->st_stid.sc_stateid, sizeof(stateid_t));
4843 4854
4855 nfsd4_return_all_file_layouts(stp->st_stateowner->so_client,
4856 stp->st_stid.sc_file);
4857
4844 nfsd4_close_open_stateid(stp); 4858 nfsd4_close_open_stateid(stp);
4845 4859
4846 /* put reference from nfs4_preprocess_seqid_op */ 4860 /* put reference from nfs4_preprocess_seqid_op */
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 974533e5a427..df5e66caf100 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -47,6 +47,7 @@
47#include "state.h" 47#include "state.h"
48#include "cache.h" 48#include "cache.h"
49#include "netns.h" 49#include "netns.h"
50#include "pnfs.h"
50 51
51#ifdef CONFIG_NFSD_V4_SECURITY_LABEL 52#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
52#include <linux/security.h> 53#include <linux/security.h>
@@ -1522,6 +1523,127 @@ static __be32 nfsd4_decode_reclaim_complete(struct nfsd4_compoundargs *argp, str
1522 DECODE_TAIL; 1523 DECODE_TAIL;
1523} 1524}
1524 1525
1526#ifdef CONFIG_NFSD_PNFS
1527static __be32
1528nfsd4_decode_getdeviceinfo(struct nfsd4_compoundargs *argp,
1529 struct nfsd4_getdeviceinfo *gdev)
1530{
1531 DECODE_HEAD;
1532 u32 num, i;
1533
1534 READ_BUF(sizeof(struct nfsd4_deviceid) + 3 * 4);
1535 COPYMEM(&gdev->gd_devid, sizeof(struct nfsd4_deviceid));
1536 gdev->gd_layout_type = be32_to_cpup(p++);
1537 gdev->gd_maxcount = be32_to_cpup(p++);
1538 num = be32_to_cpup(p++);
1539 if (num) {
1540 READ_BUF(4 * num);
1541 gdev->gd_notify_types = be32_to_cpup(p++);
1542 for (i = 1; i < num; i++) {
1543 if (be32_to_cpup(p++)) {
1544 status = nfserr_inval;
1545 goto out;
1546 }
1547 }
1548 }
1549 DECODE_TAIL;
1550}
1551
1552static __be32
1553nfsd4_decode_layoutget(struct nfsd4_compoundargs *argp,
1554 struct nfsd4_layoutget *lgp)
1555{
1556 DECODE_HEAD;
1557
1558 READ_BUF(36);
1559 lgp->lg_signal = be32_to_cpup(p++);
1560 lgp->lg_layout_type = be32_to_cpup(p++);
1561 lgp->lg_seg.iomode = be32_to_cpup(p++);
1562 p = xdr_decode_hyper(p, &lgp->lg_seg.offset);
1563 p = xdr_decode_hyper(p, &lgp->lg_seg.length);
1564 p = xdr_decode_hyper(p, &lgp->lg_minlength);
1565 nfsd4_decode_stateid(argp, &lgp->lg_sid);
1566 READ_BUF(4);
1567 lgp->lg_maxcount = be32_to_cpup(p++);
1568
1569 DECODE_TAIL;
1570}
1571
1572static __be32
1573nfsd4_decode_layoutcommit(struct nfsd4_compoundargs *argp,
1574 struct nfsd4_layoutcommit *lcp)
1575{
1576 DECODE_HEAD;
1577 u32 timechange;
1578
1579 READ_BUF(20);
1580 p = xdr_decode_hyper(p, &lcp->lc_seg.offset);
1581 p = xdr_decode_hyper(p, &lcp->lc_seg.length);
1582 lcp->lc_reclaim = be32_to_cpup(p++);
1583 nfsd4_decode_stateid(argp, &lcp->lc_sid);
1584 READ_BUF(4);
1585 lcp->lc_newoffset = be32_to_cpup(p++);
1586 if (lcp->lc_newoffset) {
1587 READ_BUF(8);
1588 p = xdr_decode_hyper(p, &lcp->lc_last_wr);
1589 } else
1590 lcp->lc_last_wr = 0;
1591 READ_BUF(4);
1592 timechange = be32_to_cpup(p++);
1593 if (timechange) {
1594 status = nfsd4_decode_time(argp, &lcp->lc_mtime);
1595 if (status)
1596 return status;
1597 } else {
1598 lcp->lc_mtime.tv_nsec = UTIME_NOW;
1599 }
1600 READ_BUF(8);
1601 lcp->lc_layout_type = be32_to_cpup(p++);
1602
1603 /*
1604 * Save the layout update in XDR format and let the layout driver deal
1605 * with it later.
1606 */
1607 lcp->lc_up_len = be32_to_cpup(p++);
1608 if (lcp->lc_up_len > 0) {
1609 READ_BUF(lcp->lc_up_len);
1610 READMEM(lcp->lc_up_layout, lcp->lc_up_len);
1611 }
1612
1613 DECODE_TAIL;
1614}
1615
1616static __be32
1617nfsd4_decode_layoutreturn(struct nfsd4_compoundargs *argp,
1618 struct nfsd4_layoutreturn *lrp)
1619{
1620 DECODE_HEAD;
1621
1622 READ_BUF(16);
1623 lrp->lr_reclaim = be32_to_cpup(p++);
1624 lrp->lr_layout_type = be32_to_cpup(p++);
1625 lrp->lr_seg.iomode = be32_to_cpup(p++);
1626 lrp->lr_return_type = be32_to_cpup(p++);
1627 if (lrp->lr_return_type == RETURN_FILE) {
1628 READ_BUF(16);
1629 p = xdr_decode_hyper(p, &lrp->lr_seg.offset);
1630 p = xdr_decode_hyper(p, &lrp->lr_seg.length);
1631 nfsd4_decode_stateid(argp, &lrp->lr_sid);
1632 READ_BUF(4);
1633 lrp->lrf_body_len = be32_to_cpup(p++);
1634 if (lrp->lrf_body_len > 0) {
1635 READ_BUF(lrp->lrf_body_len);
1636 READMEM(lrp->lrf_body, lrp->lrf_body_len);
1637 }
1638 } else {
1639 lrp->lr_seg.offset = 0;
1640 lrp->lr_seg.length = NFS4_MAX_UINT64;
1641 }
1642
1643 DECODE_TAIL;
1644}
1645#endif /* CONFIG_NFSD_PNFS */
1646
1525static __be32 1647static __be32
1526nfsd4_decode_fallocate(struct nfsd4_compoundargs *argp, 1648nfsd4_decode_fallocate(struct nfsd4_compoundargs *argp,
1527 struct nfsd4_fallocate *fallocate) 1649 struct nfsd4_fallocate *fallocate)
@@ -1616,11 +1738,19 @@ static nfsd4_dec nfsd4_dec_ops[] = {
1616 [OP_DESTROY_SESSION] = (nfsd4_dec)nfsd4_decode_destroy_session, 1738 [OP_DESTROY_SESSION] = (nfsd4_dec)nfsd4_decode_destroy_session,
1617 [OP_FREE_STATEID] = (nfsd4_dec)nfsd4_decode_free_stateid, 1739 [OP_FREE_STATEID] = (nfsd4_dec)nfsd4_decode_free_stateid,
1618 [OP_GET_DIR_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp, 1740 [OP_GET_DIR_DELEGATION] = (nfsd4_dec)nfsd4_decode_notsupp,
1741#ifdef CONFIG_NFSD_PNFS
1742 [OP_GETDEVICEINFO] = (nfsd4_dec)nfsd4_decode_getdeviceinfo,
1743 [OP_GETDEVICELIST] = (nfsd4_dec)nfsd4_decode_notsupp,
1744 [OP_LAYOUTCOMMIT] = (nfsd4_dec)nfsd4_decode_layoutcommit,
1745 [OP_LAYOUTGET] = (nfsd4_dec)nfsd4_decode_layoutget,
1746 [OP_LAYOUTRETURN] = (nfsd4_dec)nfsd4_decode_layoutreturn,
1747#else
1619 [OP_GETDEVICEINFO] = (nfsd4_dec)nfsd4_decode_notsupp, 1748 [OP_GETDEVICEINFO] = (nfsd4_dec)nfsd4_decode_notsupp,
1620 [OP_GETDEVICELIST] = (nfsd4_dec)nfsd4_decode_notsupp, 1749 [OP_GETDEVICELIST] = (nfsd4_dec)nfsd4_decode_notsupp,
1621 [OP_LAYOUTCOMMIT] = (nfsd4_dec)nfsd4_decode_notsupp, 1750 [OP_LAYOUTCOMMIT] = (nfsd4_dec)nfsd4_decode_notsupp,
1622 [OP_LAYOUTGET] = (nfsd4_dec)nfsd4_decode_notsupp, 1751 [OP_LAYOUTGET] = (nfsd4_dec)nfsd4_decode_notsupp,
1623 [OP_LAYOUTRETURN] = (nfsd4_dec)nfsd4_decode_notsupp, 1752 [OP_LAYOUTRETURN] = (nfsd4_dec)nfsd4_decode_notsupp,
1753#endif
1624 [OP_SECINFO_NO_NAME] = (nfsd4_dec)nfsd4_decode_secinfo_no_name, 1754 [OP_SECINFO_NO_NAME] = (nfsd4_dec)nfsd4_decode_secinfo_no_name,
1625 [OP_SEQUENCE] = (nfsd4_dec)nfsd4_decode_sequence, 1755 [OP_SEQUENCE] = (nfsd4_dec)nfsd4_decode_sequence,
1626 [OP_SET_SSV] = (nfsd4_dec)nfsd4_decode_notsupp, 1756 [OP_SET_SSV] = (nfsd4_dec)nfsd4_decode_notsupp,
@@ -2548,6 +2678,30 @@ out_acl:
2548 get_parent_attributes(exp, &stat); 2678 get_parent_attributes(exp, &stat);
2549 p = xdr_encode_hyper(p, stat.ino); 2679 p = xdr_encode_hyper(p, stat.ino);
2550 } 2680 }
2681#ifdef CONFIG_NFSD_PNFS
2682 if ((bmval1 & FATTR4_WORD1_FS_LAYOUT_TYPES) ||
2683 (bmval2 & FATTR4_WORD2_LAYOUT_TYPES)) {
2684 if (exp->ex_layout_type) {
2685 p = xdr_reserve_space(xdr, 8);
2686 if (!p)
2687 goto out_resource;
2688 *p++ = cpu_to_be32(1);
2689 *p++ = cpu_to_be32(exp->ex_layout_type);
2690 } else {
2691 p = xdr_reserve_space(xdr, 4);
2692 if (!p)
2693 goto out_resource;
2694 *p++ = cpu_to_be32(0);
2695 }
2696 }
2697
2698 if (bmval2 & FATTR4_WORD2_LAYOUT_BLKSIZE) {
2699 p = xdr_reserve_space(xdr, 4);
2700 if (!p)
2701 goto out_resource;
2702 *p++ = cpu_to_be32(stat.blksize);
2703 }
2704#endif /* CONFIG_NFSD_PNFS */
2551 if (bmval2 & FATTR4_WORD2_SECURITY_LABEL) { 2705 if (bmval2 & FATTR4_WORD2_SECURITY_LABEL) {
2552 status = nfsd4_encode_security_label(xdr, rqstp, context, 2706 status = nfsd4_encode_security_label(xdr, rqstp, context,
2553 contextlen); 2707 contextlen);
@@ -3824,6 +3978,156 @@ nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr,
3824 return nfserr; 3978 return nfserr;
3825} 3979}
3826 3980
3981#ifdef CONFIG_NFSD_PNFS
3982static __be32
3983nfsd4_encode_getdeviceinfo(struct nfsd4_compoundres *resp, __be32 nfserr,
3984 struct nfsd4_getdeviceinfo *gdev)
3985{
3986 struct xdr_stream *xdr = &resp->xdr;
3987 const struct nfsd4_layout_ops *ops =
3988 nfsd4_layout_ops[gdev->gd_layout_type];
3989 u32 starting_len = xdr->buf->len, needed_len;
3990 __be32 *p;
3991
3992 dprintk("%s: err %d\n", __func__, nfserr);
3993 if (nfserr)
3994 goto out;
3995
3996 nfserr = nfserr_resource;
3997 p = xdr_reserve_space(xdr, 4);
3998 if (!p)
3999 goto out;
4000
4001 *p++ = cpu_to_be32(gdev->gd_layout_type);
4002
4003 /* If maxcount is 0 then just update notifications */
4004 if (gdev->gd_maxcount != 0) {
4005 nfserr = ops->encode_getdeviceinfo(xdr, gdev);
4006 if (nfserr) {
4007 /*
4008 * We don't bother to burden the layout drivers with
4009 * enforcing gd_maxcount, just tell the client to
4010 * come back with a bigger buffer if it's not enough.
4011 */
4012 if (xdr->buf->len + 4 > gdev->gd_maxcount)
4013 goto toosmall;
4014 goto out;
4015 }
4016 }
4017
4018 nfserr = nfserr_resource;
4019 if (gdev->gd_notify_types) {
4020 p = xdr_reserve_space(xdr, 4 + 4);
4021 if (!p)
4022 goto out;
4023 *p++ = cpu_to_be32(1); /* bitmap length */
4024 *p++ = cpu_to_be32(gdev->gd_notify_types);
4025 } else {
4026 p = xdr_reserve_space(xdr, 4);
4027 if (!p)
4028 goto out;
4029 *p++ = 0;
4030 }
4031
4032 nfserr = 0;
4033out:
4034 kfree(gdev->gd_device);
4035 dprintk("%s: done: %d\n", __func__, be32_to_cpu(nfserr));
4036 return nfserr;
4037
4038toosmall:
4039 dprintk("%s: maxcount too small\n", __func__);
4040 needed_len = xdr->buf->len + 4 /* notifications */;
4041 xdr_truncate_encode(xdr, starting_len);
4042 p = xdr_reserve_space(xdr, 4);
4043 if (!p) {
4044 nfserr = nfserr_resource;
4045 } else {
4046 *p++ = cpu_to_be32(needed_len);
4047 nfserr = nfserr_toosmall;
4048 }
4049 goto out;
4050}
4051
4052static __be32
4053nfsd4_encode_layoutget(struct nfsd4_compoundres *resp, __be32 nfserr,
4054 struct nfsd4_layoutget *lgp)
4055{
4056 struct xdr_stream *xdr = &resp->xdr;
4057 const struct nfsd4_layout_ops *ops =
4058 nfsd4_layout_ops[lgp->lg_layout_type];
4059 __be32 *p;
4060
4061 dprintk("%s: err %d\n", __func__, nfserr);
4062 if (nfserr)
4063 goto out;
4064
4065 nfserr = nfserr_resource;
4066 p = xdr_reserve_space(xdr, 36 + sizeof(stateid_opaque_t));
4067 if (!p)
4068 goto out;
4069
4070 *p++ = cpu_to_be32(1); /* we always set return-on-close */
4071 *p++ = cpu_to_be32(lgp->lg_sid.si_generation);
4072 p = xdr_encode_opaque_fixed(p, &lgp->lg_sid.si_opaque,
4073 sizeof(stateid_opaque_t));
4074
4075 *p++ = cpu_to_be32(1); /* we always return a single layout */
4076 p = xdr_encode_hyper(p, lgp->lg_seg.offset);
4077 p = xdr_encode_hyper(p, lgp->lg_seg.length);
4078 *p++ = cpu_to_be32(lgp->lg_seg.iomode);
4079 *p++ = cpu_to_be32(lgp->lg_layout_type);
4080
4081 nfserr = ops->encode_layoutget(xdr, lgp);
4082out:
4083 kfree(lgp->lg_content);
4084 return nfserr;
4085}
4086
4087static __be32
4088nfsd4_encode_layoutcommit(struct nfsd4_compoundres *resp, __be32 nfserr,
4089 struct nfsd4_layoutcommit *lcp)
4090{
4091 struct xdr_stream *xdr = &resp->xdr;
4092 __be32 *p;
4093
4094 if (nfserr)
4095 return nfserr;
4096
4097 p = xdr_reserve_space(xdr, 4);
4098 if (!p)
4099 return nfserr_resource;
4100 *p++ = cpu_to_be32(lcp->lc_size_chg);
4101 if (lcp->lc_size_chg) {
4102 p = xdr_reserve_space(xdr, 8);
4103 if (!p)
4104 return nfserr_resource;
4105 p = xdr_encode_hyper(p, lcp->lc_newsize);
4106 }
4107
4108 return nfs_ok;
4109}
4110
4111static __be32
4112nfsd4_encode_layoutreturn(struct nfsd4_compoundres *resp, __be32 nfserr,
4113 struct nfsd4_layoutreturn *lrp)
4114{
4115 struct xdr_stream *xdr = &resp->xdr;
4116 __be32 *p;
4117
4118 if (nfserr)
4119 return nfserr;
4120
4121 p = xdr_reserve_space(xdr, 4);
4122 if (!p)
4123 return nfserr_resource;
4124 *p++ = cpu_to_be32(lrp->lrs_present);
4125 if (lrp->lrs_present)
4126 nfsd4_encode_stateid(xdr, &lrp->lr_sid);
4127 return nfs_ok;
4128}
4129#endif /* CONFIG_NFSD_PNFS */
4130
3827static __be32 4131static __be32
3828nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr, 4132nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr,
3829 struct nfsd4_seek *seek) 4133 struct nfsd4_seek *seek)
@@ -3900,11 +4204,19 @@ static nfsd4_enc nfsd4_enc_ops[] = {
3900 [OP_DESTROY_SESSION] = (nfsd4_enc)nfsd4_encode_noop, 4204 [OP_DESTROY_SESSION] = (nfsd4_enc)nfsd4_encode_noop,
3901 [OP_FREE_STATEID] = (nfsd4_enc)nfsd4_encode_noop, 4205 [OP_FREE_STATEID] = (nfsd4_enc)nfsd4_encode_noop,
3902 [OP_GET_DIR_DELEGATION] = (nfsd4_enc)nfsd4_encode_noop, 4206 [OP_GET_DIR_DELEGATION] = (nfsd4_enc)nfsd4_encode_noop,
4207#ifdef CONFIG_NFSD_PNFS
4208 [OP_GETDEVICEINFO] = (nfsd4_enc)nfsd4_encode_getdeviceinfo,
4209 [OP_GETDEVICELIST] = (nfsd4_enc)nfsd4_encode_noop,
4210 [OP_LAYOUTCOMMIT] = (nfsd4_enc)nfsd4_encode_layoutcommit,
4211 [OP_LAYOUTGET] = (nfsd4_enc)nfsd4_encode_layoutget,
4212 [OP_LAYOUTRETURN] = (nfsd4_enc)nfsd4_encode_layoutreturn,
4213#else
3903 [OP_GETDEVICEINFO] = (nfsd4_enc)nfsd4_encode_noop, 4214 [OP_GETDEVICEINFO] = (nfsd4_enc)nfsd4_encode_noop,
3904 [OP_GETDEVICELIST] = (nfsd4_enc)nfsd4_encode_noop, 4215 [OP_GETDEVICELIST] = (nfsd4_enc)nfsd4_encode_noop,
3905 [OP_LAYOUTCOMMIT] = (nfsd4_enc)nfsd4_encode_noop, 4216 [OP_LAYOUTCOMMIT] = (nfsd4_enc)nfsd4_encode_noop,
3906 [OP_LAYOUTGET] = (nfsd4_enc)nfsd4_encode_noop, 4217 [OP_LAYOUTGET] = (nfsd4_enc)nfsd4_encode_noop,
3907 [OP_LAYOUTRETURN] = (nfsd4_enc)nfsd4_encode_noop, 4218 [OP_LAYOUTRETURN] = (nfsd4_enc)nfsd4_encode_noop,
4219#endif
3908 [OP_SECINFO_NO_NAME] = (nfsd4_enc)nfsd4_encode_secinfo_no_name, 4220 [OP_SECINFO_NO_NAME] = (nfsd4_enc)nfsd4_encode_secinfo_no_name,
3909 [OP_SEQUENCE] = (nfsd4_enc)nfsd4_encode_sequence, 4221 [OP_SEQUENCE] = (nfsd4_enc)nfsd4_encode_sequence,
3910 [OP_SET_SSV] = (nfsd4_enc)nfsd4_encode_noop, 4222 [OP_SET_SSV] = (nfsd4_enc)nfsd4_encode_noop,
diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c
index 19ace74d35f6..aa47d75ddb26 100644
--- a/fs/nfsd/nfsctl.c
+++ b/fs/nfsd/nfsctl.c
@@ -21,6 +21,7 @@
21#include "cache.h" 21#include "cache.h"
22#include "state.h" 22#include "state.h"
23#include "netns.h" 23#include "netns.h"
24#include "pnfs.h"
24 25
25/* 26/*
26 * We have a single directory with several nodes in it. 27 * We have a single directory with several nodes in it.
@@ -1258,9 +1259,12 @@ static int __init init_nfsd(void)
1258 retval = nfsd4_init_slabs(); 1259 retval = nfsd4_init_slabs();
1259 if (retval) 1260 if (retval)
1260 goto out_unregister_pernet; 1261 goto out_unregister_pernet;
1261 retval = nfsd_fault_inject_init(); /* nfsd fault injection controls */ 1262 retval = nfsd4_init_pnfs();
1262 if (retval) 1263 if (retval)
1263 goto out_free_slabs; 1264 goto out_free_slabs;
1265 retval = nfsd_fault_inject_init(); /* nfsd fault injection controls */
1266 if (retval)
1267 goto out_exit_pnfs;
1264 nfsd_stat_init(); /* Statistics */ 1268 nfsd_stat_init(); /* Statistics */
1265 retval = nfsd_reply_cache_init(); 1269 retval = nfsd_reply_cache_init();
1266 if (retval) 1270 if (retval)
@@ -1282,6 +1286,8 @@ out_free_lockd:
1282out_free_stat: 1286out_free_stat:
1283 nfsd_stat_shutdown(); 1287 nfsd_stat_shutdown();
1284 nfsd_fault_inject_cleanup(); 1288 nfsd_fault_inject_cleanup();
1289out_exit_pnfs:
1290 nfsd4_exit_pnfs();
1285out_free_slabs: 1291out_free_slabs:
1286 nfsd4_free_slabs(); 1292 nfsd4_free_slabs();
1287out_unregister_pernet: 1293out_unregister_pernet:
@@ -1299,6 +1305,7 @@ static void __exit exit_nfsd(void)
1299 nfsd_stat_shutdown(); 1305 nfsd_stat_shutdown();
1300 nfsd_lockd_shutdown(); 1306 nfsd_lockd_shutdown();
1301 nfsd4_free_slabs(); 1307 nfsd4_free_slabs();
1308 nfsd4_exit_pnfs();
1302 nfsd_fault_inject_cleanup(); 1309 nfsd_fault_inject_cleanup();
1303 unregister_filesystem(&nfsd_fs_type); 1310 unregister_filesystem(&nfsd_fs_type);
1304 unregister_pernet_subsys(&nfsd_net_ops); 1311 unregister_pernet_subsys(&nfsd_net_ops);
diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 33a46a8dfaf7..565c4da1a9eb 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -325,15 +325,27 @@ void nfsd_lockd_shutdown(void);
325 325
326#define NFSD4_SUPPORTED_ATTRS_WORD2 0 326#define NFSD4_SUPPORTED_ATTRS_WORD2 0
327 327
328/* 4.1 */
329#ifdef CONFIG_NFSD_PNFS
330#define PNFSD_SUPPORTED_ATTRS_WORD1 FATTR4_WORD1_FS_LAYOUT_TYPES
331#define PNFSD_SUPPORTED_ATTRS_WORD2 \
332(FATTR4_WORD2_LAYOUT_BLKSIZE | FATTR4_WORD2_LAYOUT_TYPES)
333#else
334#define PNFSD_SUPPORTED_ATTRS_WORD1 0
335#define PNFSD_SUPPORTED_ATTRS_WORD2 0
336#endif /* CONFIG_NFSD_PNFS */
337
328#define NFSD4_1_SUPPORTED_ATTRS_WORD0 \ 338#define NFSD4_1_SUPPORTED_ATTRS_WORD0 \
329 NFSD4_SUPPORTED_ATTRS_WORD0 339 NFSD4_SUPPORTED_ATTRS_WORD0
330 340
331#define NFSD4_1_SUPPORTED_ATTRS_WORD1 \ 341#define NFSD4_1_SUPPORTED_ATTRS_WORD1 \
332 NFSD4_SUPPORTED_ATTRS_WORD1 342 (NFSD4_SUPPORTED_ATTRS_WORD1 | PNFSD_SUPPORTED_ATTRS_WORD1)
333 343
334#define NFSD4_1_SUPPORTED_ATTRS_WORD2 \ 344#define NFSD4_1_SUPPORTED_ATTRS_WORD2 \
335 (NFSD4_SUPPORTED_ATTRS_WORD2 | FATTR4_WORD2_SUPPATTR_EXCLCREAT) 345 (NFSD4_SUPPORTED_ATTRS_WORD2 | PNFSD_SUPPORTED_ATTRS_WORD2 | \
346 FATTR4_WORD2_SUPPATTR_EXCLCREAT)
336 347
348/* 4.2 */
337#ifdef CONFIG_NFSD_V4_SECURITY_LABEL 349#ifdef CONFIG_NFSD_V4_SECURITY_LABEL
338#define NFSD4_2_SECURITY_ATTRS FATTR4_WORD2_SECURITY_LABEL 350#define NFSD4_2_SECURITY_ATTRS FATTR4_WORD2_SECURITY_LABEL
339#else 351#else
diff --git a/fs/nfsd/pnfs.h b/fs/nfsd/pnfs.h
new file mode 100644
index 000000000000..a9616a4e13cd
--- /dev/null
+++ b/fs/nfsd/pnfs.h
@@ -0,0 +1,80 @@
1#ifndef _FS_NFSD_PNFS_H
2#define _FS_NFSD_PNFS_H 1
3
4#include <linux/exportfs.h>
5#include <linux/nfsd/export.h>
6
7#include "state.h"
8#include "xdr4.h"
9
10struct xdr_stream;
11
12struct nfsd4_deviceid_map {
13 struct list_head hash;
14 u64 idx;
15 int fsid_type;
16 u32 fsid[];
17};
18
19struct nfsd4_layout_ops {
20 u32 notify_types;
21
22 __be32 (*proc_getdeviceinfo)(struct super_block *sb,
23 struct nfsd4_getdeviceinfo *gdevp);
24 __be32 (*encode_getdeviceinfo)(struct xdr_stream *xdr,
25 struct nfsd4_getdeviceinfo *gdevp);
26
27 __be32 (*proc_layoutget)(struct inode *, const struct svc_fh *fhp,
28 struct nfsd4_layoutget *lgp);
29 __be32 (*encode_layoutget)(struct xdr_stream *,
30 struct nfsd4_layoutget *lgp);
31
32 __be32 (*proc_layoutcommit)(struct inode *inode,
33 struct nfsd4_layoutcommit *lcp);
34};
35
36extern const struct nfsd4_layout_ops *nfsd4_layout_ops[];
37
38__be32 nfsd4_preprocess_layout_stateid(struct svc_rqst *rqstp,
39 struct nfsd4_compound_state *cstate, stateid_t *stateid,
40 bool create, u32 layout_type, struct nfs4_layout_stateid **lsp);
41__be32 nfsd4_insert_layout(struct nfsd4_layoutget *lgp,
42 struct nfs4_layout_stateid *ls);
43__be32 nfsd4_return_file_layouts(struct svc_rqst *rqstp,
44 struct nfsd4_compound_state *cstate,
45 struct nfsd4_layoutreturn *lrp);
46__be32 nfsd4_return_client_layouts(struct svc_rqst *rqstp,
47 struct nfsd4_compound_state *cstate,
48 struct nfsd4_layoutreturn *lrp);
49int nfsd4_set_deviceid(struct nfsd4_deviceid *id, const struct svc_fh *fhp,
50 u32 device_generation);
51struct nfsd4_deviceid_map *nfsd4_find_devid_map(int idx);
52
53#ifdef CONFIG_NFSD_PNFS
54void nfsd4_setup_layout_type(struct svc_export *exp);
55void nfsd4_return_all_client_layouts(struct nfs4_client *);
56void nfsd4_return_all_file_layouts(struct nfs4_client *clp,
57 struct nfs4_file *fp);
58int nfsd4_init_pnfs(void);
59void nfsd4_exit_pnfs(void);
60#else
61static inline void nfsd4_setup_layout_type(struct svc_export *exp)
62{
63}
64
65static inline void nfsd4_return_all_client_layouts(struct nfs4_client *clp)
66{
67}
68static inline void nfsd4_return_all_file_layouts(struct nfs4_client *clp,
69 struct nfs4_file *fp)
70{
71}
72static inline void nfsd4_exit_pnfs(void)
73{
74}
75static inline int nfsd4_init_pnfs(void)
76{
77 return 0;
78}
79#endif /* CONFIG_NFSD_PNFS */
80#endif /* _FS_NFSD_PNFS_H */
diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
index 38ebb1268b59..5f66b7fd0297 100644
--- a/fs/nfsd/state.h
+++ b/fs/nfsd/state.h
@@ -92,6 +92,7 @@ struct nfs4_stid {
92/* For a deleg stateid kept around only to process free_stateid's: */ 92/* For a deleg stateid kept around only to process free_stateid's: */
93#define NFS4_REVOKED_DELEG_STID 16 93#define NFS4_REVOKED_DELEG_STID 16
94#define NFS4_CLOSED_DELEG_STID 32 94#define NFS4_CLOSED_DELEG_STID 32
95#define NFS4_LAYOUT_STID 64
95 unsigned char sc_type; 96 unsigned char sc_type;
96 stateid_t sc_stateid; 97 stateid_t sc_stateid;
97 struct nfs4_client *sc_client; 98 struct nfs4_client *sc_client;
@@ -297,6 +298,9 @@ struct nfs4_client {
297 struct list_head cl_delegations; 298 struct list_head cl_delegations;
298 struct list_head cl_revoked; /* unacknowledged, revoked 4.1 state */ 299 struct list_head cl_revoked; /* unacknowledged, revoked 4.1 state */
299 struct list_head cl_lru; /* tail queue */ 300 struct list_head cl_lru; /* tail queue */
301#ifdef CONFIG_NFSD_PNFS
302 struct list_head cl_lo_states; /* outstanding layout states */
303#endif
300 struct xdr_netobj cl_name; /* id generated by client */ 304 struct xdr_netobj cl_name; /* id generated by client */
301 nfs4_verifier cl_verifier; /* generated by client */ 305 nfs4_verifier cl_verifier; /* generated by client */
302 time_t cl_time; /* time of last lease renewal */ 306 time_t cl_time; /* time of last lease renewal */
@@ -496,6 +500,9 @@ struct nfs4_file {
496 int fi_delegees; 500 int fi_delegees;
497 struct knfsd_fh fi_fhandle; 501 struct knfsd_fh fi_fhandle;
498 bool fi_had_conflict; 502 bool fi_had_conflict;
503#ifdef CONFIG_NFSD_PNFS
504 struct list_head fi_lo_states;
505#endif
499}; 506};
500 507
501/* 508/*
@@ -528,6 +535,20 @@ static inline struct nfs4_ol_stateid *openlockstateid(struct nfs4_stid *s)
528 return container_of(s, struct nfs4_ol_stateid, st_stid); 535 return container_of(s, struct nfs4_ol_stateid, st_stid);
529} 536}
530 537
538struct nfs4_layout_stateid {
539 struct nfs4_stid ls_stid;
540 struct list_head ls_perclnt;
541 struct list_head ls_perfile;
542 spinlock_t ls_lock;
543 struct list_head ls_layouts;
544 u32 ls_layout_type;
545};
546
547static inline struct nfs4_layout_stateid *layoutstateid(struct nfs4_stid *s)
548{
549 return container_of(s, struct nfs4_layout_stateid, ls_stid);
550}
551
531/* flags for preprocess_seqid_op() */ 552/* flags for preprocess_seqid_op() */
532#define RD_STATE 0x00000010 553#define RD_STATE 0x00000010
533#define WR_STATE 0x00000020 554#define WR_STATE 0x00000020
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 90a5925bd6ab..0bda93e58e1b 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -428,6 +428,61 @@ struct nfsd4_reclaim_complete {
428 u32 rca_one_fs; 428 u32 rca_one_fs;
429}; 429};
430 430
431struct nfsd4_deviceid {
432 u64 fsid_idx;
433 u32 generation;
434 u32 pad;
435};
436
437struct nfsd4_layout_seg {
438 u32 iomode;
439 u64 offset;
440 u64 length;
441};
442
443struct nfsd4_getdeviceinfo {
444 struct nfsd4_deviceid gd_devid; /* request */
445 u32 gd_layout_type; /* request */
446 u32 gd_maxcount; /* request */
447 u32 gd_notify_types;/* request - response */
448 void *gd_device; /* response */
449};
450
451struct nfsd4_layoutget {
452 u64 lg_minlength; /* request */
453 u32 lg_signal; /* request */
454 u32 lg_layout_type; /* request */
455 u32 lg_maxcount; /* request */
456 stateid_t lg_sid; /* request/response */
457 struct nfsd4_layout_seg lg_seg; /* request/response */
458 void *lg_content; /* response */
459};
460
461struct nfsd4_layoutcommit {
462 stateid_t lc_sid; /* request */
463 struct nfsd4_layout_seg lc_seg; /* request */
464 u32 lc_reclaim; /* request */
465 u32 lc_newoffset; /* request */
466 u64 lc_last_wr; /* request */
467 struct timespec lc_mtime; /* request */
468 u32 lc_layout_type; /* request */
469 u32 lc_up_len; /* layout length */
470 void *lc_up_layout; /* decoded by callback */
471 u32 lc_size_chg; /* boolean for response */
472 u64 lc_newsize; /* response */
473};
474
475struct nfsd4_layoutreturn {
476 u32 lr_return_type; /* request */
477 u32 lr_layout_type; /* request */
478 struct nfsd4_layout_seg lr_seg; /* request */
479 u32 lr_reclaim; /* request */
480 u32 lrf_body_len; /* request */
481 void *lrf_body; /* request */
482 stateid_t lr_sid; /* request/response */
483 u32 lrs_present; /* response */
484};
485
431struct nfsd4_fallocate { 486struct nfsd4_fallocate {
432 /* request */ 487 /* request */
433 stateid_t falloc_stateid; 488 stateid_t falloc_stateid;
@@ -491,6 +546,10 @@ struct nfsd4_op {
491 struct nfsd4_reclaim_complete reclaim_complete; 546 struct nfsd4_reclaim_complete reclaim_complete;
492 struct nfsd4_test_stateid test_stateid; 547 struct nfsd4_test_stateid test_stateid;
493 struct nfsd4_free_stateid free_stateid; 548 struct nfsd4_free_stateid free_stateid;
549 struct nfsd4_getdeviceinfo getdeviceinfo;
550 struct nfsd4_layoutget layoutget;
551 struct nfsd4_layoutcommit layoutcommit;
552 struct nfsd4_layoutreturn layoutreturn;
494 553
495 /* NFSv4.2 */ 554 /* NFSv4.2 */
496 struct nfsd4_fallocate allocate; 555 struct nfsd4_fallocate allocate;
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 8a3589c2542c..bc10d687f2ce 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -411,6 +411,7 @@ enum lock_type4 {
411#define FATTR4_WORD1_TIME_MODIFY_SET (1UL << 22) 411#define FATTR4_WORD1_TIME_MODIFY_SET (1UL << 22)
412#define FATTR4_WORD1_MOUNTED_ON_FILEID (1UL << 23) 412#define FATTR4_WORD1_MOUNTED_ON_FILEID (1UL << 23)
413#define FATTR4_WORD1_FS_LAYOUT_TYPES (1UL << 30) 413#define FATTR4_WORD1_FS_LAYOUT_TYPES (1UL << 30)
414#define FATTR4_WORD2_LAYOUT_TYPES (1UL << 0)
414#define FATTR4_WORD2_LAYOUT_BLKSIZE (1UL << 1) 415#define FATTR4_WORD2_LAYOUT_BLKSIZE (1UL << 1)
415#define FATTR4_WORD2_MDSTHRESHOLD (1UL << 4) 416#define FATTR4_WORD2_MDSTHRESHOLD (1UL << 4)
416#define FATTR4_WORD2_SECURITY_LABEL (1UL << 16) 417#define FATTR4_WORD2_SECURITY_LABEL (1UL << 16)
diff --git a/include/uapi/linux/nfsd/debug.h b/include/uapi/linux/nfsd/debug.h
index 1fdc95bb2375..0bf130a1c58d 100644
--- a/include/uapi/linux/nfsd/debug.h
+++ b/include/uapi/linux/nfsd/debug.h
@@ -32,6 +32,7 @@
32#define NFSDDBG_REPCACHE 0x0080 32#define NFSDDBG_REPCACHE 0x0080
33#define NFSDDBG_XDR 0x0100 33#define NFSDDBG_XDR 0x0100
34#define NFSDDBG_LOCKD 0x0200 34#define NFSDDBG_LOCKD 0x0200
35#define NFSDDBG_PNFS 0x0400
35#define NFSDDBG_ALL 0x7FFF 36#define NFSDDBG_ALL 0x7FFF
36#define NFSDDBG_NOCHANGE 0xFFFF 37#define NFSDDBG_NOCHANGE 0xFFFF
37 38
diff --git a/include/uapi/linux/nfsd/export.h b/include/uapi/linux/nfsd/export.h
index 584b6ef3a5e8..4742f2cb42f2 100644
--- a/include/uapi/linux/nfsd/export.h
+++ b/include/uapi/linux/nfsd/export.h
@@ -47,8 +47,10 @@
47 * exported filesystem. 47 * exported filesystem.
48 */ 48 */
49#define NFSEXP_V4ROOT 0x10000 49#define NFSEXP_V4ROOT 0x10000
50#define NFSEXP_NOPNFS 0x20000
51
50/* All flags that we claim to support. (Note we don't support NOACL.) */ 52/* All flags that we claim to support. (Note we don't support NOACL.) */
51#define NFSEXP_ALLFLAGS 0x1FE7F 53#define NFSEXP_ALLFLAGS 0x3FE7F
52 54
53/* The flags that may vary depending on security flavor: */ 55/* The flags that may vary depending on security flavor: */
54#define NFSEXP_SECINFO_FLAGS (NFSEXP_READONLY | NFSEXP_ROOTSQUASH \ 56#define NFSEXP_SECINFO_FLAGS (NFSEXP_READONLY | NFSEXP_ROOTSQUASH \