diff options
author | Tom Haynes <loghyr@primarydata.com> | 2014-12-11 17:02:04 -0500 |
---|---|---|
committer | Tom Haynes <loghyr@primarydata.com> | 2015-02-03 14:06:52 -0500 |
commit | d67ae825a59d639e4d8b82413af84d854617a87e (patch) | |
tree | 6b11fc9afb214e02b9cf0b66a14817c57f0f9a05 | |
parent | 5fadeb47dcc5c30d4b6cf481b4a78689eab59443 (diff) |
pnfs/flexfiles: Add the FlexFile Layout Driver
The flexfile layout is a new layout that extends the
file layout. It is currently being drafted as a specification at
https://datatracker.ietf.org/doc/draft-ietf-nfsv4-layout-types/
Signed-off-by: Weston Andros Adamson <dros@primarydata.com>
Signed-off-by: Tom Haynes <loghyr@primarydata.com>
Signed-off-by: Tao Peng <bergwolf@primarydata.com>
-rw-r--r-- | fs/nfs/Kconfig | 5 | ||||
-rw-r--r-- | fs/nfs/Makefile | 1 | ||||
-rw-r--r-- | fs/nfs/flexfilelayout/Makefile | 5 | ||||
-rw-r--r-- | fs/nfs/flexfilelayout/flexfilelayout.c | 1574 | ||||
-rw-r--r-- | fs/nfs/flexfilelayout/flexfilelayout.h | 155 | ||||
-rw-r--r-- | fs/nfs/flexfilelayout/flexfilelayoutdev.c | 552 | ||||
-rw-r--r-- | fs/nfs/idmap.c | 3 | ||||
-rw-r--r-- | fs/nfs/nfs4proc.c | 4 | ||||
-rw-r--r-- | fs/nfs/pnfs.c | 32 | ||||
-rw-r--r-- | fs/nfs/pnfs.h | 1 | ||||
-rw-r--r-- | include/linux/nfs4.h | 1 | ||||
-rw-r--r-- | include/linux/nfs_idmap.h | 2 | ||||
-rw-r--r-- | include/linux/sunrpc/metrics.h | 2 |
13 files changed, 2325 insertions, 12 deletions
diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index 3dece03f2fc8..c7abc10279af 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig | |||
@@ -128,6 +128,11 @@ config PNFS_OBJLAYOUT | |||
128 | depends on NFS_V4_1 && SCSI_OSD_ULD | 128 | depends on NFS_V4_1 && SCSI_OSD_ULD |
129 | default NFS_V4 | 129 | default NFS_V4 |
130 | 130 | ||
131 | config PNFS_FLEXFILE_LAYOUT | ||
132 | tristate | ||
133 | depends on NFS_V4_1 && NFS_V3 | ||
134 | default m | ||
135 | |||
131 | config NFS_V4_1_IMPLEMENTATION_ID_DOMAIN | 136 | config NFS_V4_1_IMPLEMENTATION_ID_DOMAIN |
132 | string "NFSv4.1 Implementation ID Domain" | 137 | string "NFSv4.1 Implementation ID Domain" |
133 | depends on NFS_V4_1 | 138 | depends on NFS_V4_1 |
diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile index 23abffa8a4ce..1e987acf20c9 100644 --- a/fs/nfs/Makefile +++ b/fs/nfs/Makefile | |||
@@ -33,3 +33,4 @@ nfsv4-$(CONFIG_NFS_V4_2) += nfs42proc.o | |||
33 | obj-$(CONFIG_PNFS_FILE_LAYOUT) += filelayout/ | 33 | obj-$(CONFIG_PNFS_FILE_LAYOUT) += filelayout/ |
34 | obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayout/ | 34 | obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayout/ |
35 | obj-$(CONFIG_PNFS_BLOCK) += blocklayout/ | 35 | obj-$(CONFIG_PNFS_BLOCK) += blocklayout/ |
36 | obj-$(CONFIG_PNFS_FLEXFILE_LAYOUT) += flexfilelayout/ | ||
diff --git a/fs/nfs/flexfilelayout/Makefile b/fs/nfs/flexfilelayout/Makefile new file mode 100644 index 000000000000..1d2c9f6bbcd4 --- /dev/null +++ b/fs/nfs/flexfilelayout/Makefile | |||
@@ -0,0 +1,5 @@ | |||
1 | # | ||
2 | # Makefile for the pNFS Flexfile Layout Driver kernel module | ||
3 | # | ||
4 | obj-$(CONFIG_PNFS_FLEXFILE_LAYOUT) += nfs_layout_flexfiles.o | ||
5 | nfs_layout_flexfiles-y := flexfilelayout.o flexfilelayoutdev.o | ||
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c new file mode 100644 index 000000000000..f29fb7d7e8f8 --- /dev/null +++ b/fs/nfs/flexfilelayout/flexfilelayout.c | |||
@@ -0,0 +1,1574 @@ | |||
1 | /* | ||
2 | * Module for pnfs flexfile layout driver. | ||
3 | * | ||
4 | * Copyright (c) 2014, Primary Data, Inc. All rights reserved. | ||
5 | * | ||
6 | * Tao Peng <bergwolf@primarydata.com> | ||
7 | */ | ||
8 | |||
9 | #include <linux/nfs_fs.h> | ||
10 | #include <linux/nfs_page.h> | ||
11 | #include <linux/module.h> | ||
12 | |||
13 | #include <linux/sunrpc/metrics.h> | ||
14 | #include <linux/nfs_idmap.h> | ||
15 | |||
16 | #include "flexfilelayout.h" | ||
17 | #include "../nfs4session.h" | ||
18 | #include "../internal.h" | ||
19 | #include "../delegation.h" | ||
20 | #include "../nfs4trace.h" | ||
21 | #include "../iostat.h" | ||
22 | #include "../nfs.h" | ||
23 | |||
24 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD | ||
25 | |||
26 | #define FF_LAYOUT_POLL_RETRY_MAX (15*HZ) | ||
27 | |||
28 | static struct pnfs_layout_hdr * | ||
29 | ff_layout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags) | ||
30 | { | ||
31 | struct nfs4_flexfile_layout *ffl; | ||
32 | |||
33 | ffl = kzalloc(sizeof(*ffl), gfp_flags); | ||
34 | if (ffl) { | ||
35 | INIT_LIST_HEAD(&ffl->error_list); | ||
36 | return &ffl->generic_hdr; | ||
37 | } else | ||
38 | return NULL; | ||
39 | } | ||
40 | |||
41 | static void | ||
42 | ff_layout_free_layout_hdr(struct pnfs_layout_hdr *lo) | ||
43 | { | ||
44 | struct nfs4_ff_layout_ds_err *err, *n; | ||
45 | |||
46 | list_for_each_entry_safe(err, n, &FF_LAYOUT_FROM_HDR(lo)->error_list, | ||
47 | list) { | ||
48 | list_del(&err->list); | ||
49 | kfree(err); | ||
50 | } | ||
51 | kfree(FF_LAYOUT_FROM_HDR(lo)); | ||
52 | } | ||
53 | |||
54 | static int decode_stateid(struct xdr_stream *xdr, nfs4_stateid *stateid) | ||
55 | { | ||
56 | __be32 *p; | ||
57 | |||
58 | p = xdr_inline_decode(xdr, NFS4_STATEID_SIZE); | ||
59 | if (unlikely(p == NULL)) | ||
60 | return -ENOBUFS; | ||
61 | memcpy(stateid, p, NFS4_STATEID_SIZE); | ||
62 | dprintk("%s: stateid id= [%x%x%x%x]\n", __func__, | ||
63 | p[0], p[1], p[2], p[3]); | ||
64 | return 0; | ||
65 | } | ||
66 | |||
67 | static int decode_deviceid(struct xdr_stream *xdr, struct nfs4_deviceid *devid) | ||
68 | { | ||
69 | __be32 *p; | ||
70 | |||
71 | p = xdr_inline_decode(xdr, NFS4_DEVICEID4_SIZE); | ||
72 | if (unlikely(!p)) | ||
73 | return -ENOBUFS; | ||
74 | memcpy(devid, p, NFS4_DEVICEID4_SIZE); | ||
75 | nfs4_print_deviceid(devid); | ||
76 | return 0; | ||
77 | } | ||
78 | |||
79 | static int decode_nfs_fh(struct xdr_stream *xdr, struct nfs_fh *fh) | ||
80 | { | ||
81 | __be32 *p; | ||
82 | |||
83 | p = xdr_inline_decode(xdr, 4); | ||
84 | if (unlikely(!p)) | ||
85 | return -ENOBUFS; | ||
86 | fh->size = be32_to_cpup(p++); | ||
87 | if (fh->size > sizeof(struct nfs_fh)) { | ||
88 | printk(KERN_ERR "NFS flexfiles: Too big fh received %d\n", | ||
89 | fh->size); | ||
90 | return -EOVERFLOW; | ||
91 | } | ||
92 | /* fh.data */ | ||
93 | p = xdr_inline_decode(xdr, fh->size); | ||
94 | if (unlikely(!p)) | ||
95 | return -ENOBUFS; | ||
96 | memcpy(&fh->data, p, fh->size); | ||
97 | dprintk("%s: fh len %d\n", __func__, fh->size); | ||
98 | |||
99 | return 0; | ||
100 | } | ||
101 | |||
102 | /* | ||
103 | * Currently only stringified uids and gids are accepted. | ||
104 | * I.e., kerberos is not supported to the DSes, so no pricipals. | ||
105 | * | ||
106 | * That means that one common function will suffice, but when | ||
107 | * principals are added, this should be split to accomodate | ||
108 | * calls to both nfs_map_name_to_uid() and nfs_map_group_to_gid(). | ||
109 | */ | ||
110 | static int | ||
111 | decode_name(struct xdr_stream *xdr, u32 *id) | ||
112 | { | ||
113 | __be32 *p; | ||
114 | int len; | ||
115 | |||
116 | /* opaque_length(4)*/ | ||
117 | p = xdr_inline_decode(xdr, 4); | ||
118 | if (unlikely(!p)) | ||
119 | return -ENOBUFS; | ||
120 | len = be32_to_cpup(p++); | ||
121 | if (len < 0) | ||
122 | return -EINVAL; | ||
123 | |||
124 | dprintk("%s: len %u\n", __func__, len); | ||
125 | |||
126 | /* opaque body */ | ||
127 | p = xdr_inline_decode(xdr, len); | ||
128 | if (unlikely(!p)) | ||
129 | return -ENOBUFS; | ||
130 | |||
131 | if (!nfs_map_string_to_numeric((char *)p, len, id)) | ||
132 | return -EINVAL; | ||
133 | |||
134 | return 0; | ||
135 | } | ||
136 | |||
137 | static void ff_layout_free_mirror_array(struct nfs4_ff_layout_segment *fls) | ||
138 | { | ||
139 | int i; | ||
140 | |||
141 | if (fls->mirror_array) { | ||
142 | for (i = 0; i < fls->mirror_array_cnt; i++) { | ||
143 | /* normally mirror_ds is freed in | ||
144 | * .free_deviceid_node but we still do it here | ||
145 | * for .alloc_lseg error path */ | ||
146 | if (fls->mirror_array[i]) { | ||
147 | kfree(fls->mirror_array[i]->fh_versions); | ||
148 | nfs4_ff_layout_put_deviceid(fls->mirror_array[i]->mirror_ds); | ||
149 | kfree(fls->mirror_array[i]); | ||
150 | } | ||
151 | } | ||
152 | kfree(fls->mirror_array); | ||
153 | fls->mirror_array = NULL; | ||
154 | } | ||
155 | } | ||
156 | |||
157 | static int ff_layout_check_layout(struct nfs4_layoutget_res *lgr) | ||
158 | { | ||
159 | int ret = 0; | ||
160 | |||
161 | dprintk("--> %s\n", __func__); | ||
162 | |||
163 | /* FIXME: remove this check when layout segment support is added */ | ||
164 | if (lgr->range.offset != 0 || | ||
165 | lgr->range.length != NFS4_MAX_UINT64) { | ||
166 | dprintk("%s Only whole file layouts supported. Use MDS i/o\n", | ||
167 | __func__); | ||
168 | ret = -EINVAL; | ||
169 | } | ||
170 | |||
171 | dprintk("--> %s returns %d\n", __func__, ret); | ||
172 | return ret; | ||
173 | } | ||
174 | |||
175 | static void _ff_layout_free_lseg(struct nfs4_ff_layout_segment *fls) | ||
176 | { | ||
177 | if (fls) { | ||
178 | ff_layout_free_mirror_array(fls); | ||
179 | kfree(fls); | ||
180 | } | ||
181 | } | ||
182 | |||
183 | static void ff_layout_sort_mirrors(struct nfs4_ff_layout_segment *fls) | ||
184 | { | ||
185 | struct nfs4_ff_layout_mirror *tmp; | ||
186 | int i, j; | ||
187 | |||
188 | for (i = 0; i < fls->mirror_array_cnt - 1; i++) { | ||
189 | for (j = i + 1; j < fls->mirror_array_cnt; j++) | ||
190 | if (fls->mirror_array[i]->efficiency < | ||
191 | fls->mirror_array[j]->efficiency) { | ||
192 | tmp = fls->mirror_array[i]; | ||
193 | fls->mirror_array[i] = fls->mirror_array[j]; | ||
194 | fls->mirror_array[j] = tmp; | ||
195 | } | ||
196 | } | ||
197 | } | ||
198 | |||
199 | static struct pnfs_layout_segment * | ||
200 | ff_layout_alloc_lseg(struct pnfs_layout_hdr *lh, | ||
201 | struct nfs4_layoutget_res *lgr, | ||
202 | gfp_t gfp_flags) | ||
203 | { | ||
204 | struct pnfs_layout_segment *ret; | ||
205 | struct nfs4_ff_layout_segment *fls = NULL; | ||
206 | struct xdr_stream stream; | ||
207 | struct xdr_buf buf; | ||
208 | struct page *scratch; | ||
209 | u64 stripe_unit; | ||
210 | u32 mirror_array_cnt; | ||
211 | __be32 *p; | ||
212 | int i, rc; | ||
213 | |||
214 | dprintk("--> %s\n", __func__); | ||
215 | scratch = alloc_page(gfp_flags); | ||
216 | if (!scratch) | ||
217 | return ERR_PTR(-ENOMEM); | ||
218 | |||
219 | xdr_init_decode_pages(&stream, &buf, lgr->layoutp->pages, | ||
220 | lgr->layoutp->len); | ||
221 | xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); | ||
222 | |||
223 | /* stripe unit and mirror_array_cnt */ | ||
224 | rc = -EIO; | ||
225 | p = xdr_inline_decode(&stream, 8 + 4); | ||
226 | if (!p) | ||
227 | goto out_err_free; | ||
228 | |||
229 | p = xdr_decode_hyper(p, &stripe_unit); | ||
230 | mirror_array_cnt = be32_to_cpup(p++); | ||
231 | dprintk("%s: stripe_unit=%llu mirror_array_cnt=%u\n", __func__, | ||
232 | stripe_unit, mirror_array_cnt); | ||
233 | |||
234 | if (mirror_array_cnt > NFS4_FLEXFILE_LAYOUT_MAX_MIRROR_CNT || | ||
235 | mirror_array_cnt == 0) | ||
236 | goto out_err_free; | ||
237 | |||
238 | rc = -ENOMEM; | ||
239 | fls = kzalloc(sizeof(*fls), gfp_flags); | ||
240 | if (!fls) | ||
241 | goto out_err_free; | ||
242 | |||
243 | fls->mirror_array_cnt = mirror_array_cnt; | ||
244 | fls->stripe_unit = stripe_unit; | ||
245 | fls->mirror_array = kcalloc(fls->mirror_array_cnt, | ||
246 | sizeof(fls->mirror_array[0]), gfp_flags); | ||
247 | if (fls->mirror_array == NULL) | ||
248 | goto out_err_free; | ||
249 | |||
250 | for (i = 0; i < fls->mirror_array_cnt; i++) { | ||
251 | struct nfs4_deviceid devid; | ||
252 | struct nfs4_deviceid_node *idnode; | ||
253 | u32 ds_count; | ||
254 | u32 fh_count; | ||
255 | int j; | ||
256 | |||
257 | rc = -EIO; | ||
258 | p = xdr_inline_decode(&stream, 4); | ||
259 | if (!p) | ||
260 | goto out_err_free; | ||
261 | ds_count = be32_to_cpup(p); | ||
262 | |||
263 | /* FIXME: allow for striping? */ | ||
264 | if (ds_count != 1) | ||
265 | goto out_err_free; | ||
266 | |||
267 | fls->mirror_array[i] = | ||
268 | kzalloc(sizeof(struct nfs4_ff_layout_mirror), | ||
269 | gfp_flags); | ||
270 | if (fls->mirror_array[i] == NULL) { | ||
271 | rc = -ENOMEM; | ||
272 | goto out_err_free; | ||
273 | } | ||
274 | |||
275 | spin_lock_init(&fls->mirror_array[i]->lock); | ||
276 | fls->mirror_array[i]->ds_count = ds_count; | ||
277 | |||
278 | /* deviceid */ | ||
279 | rc = decode_deviceid(&stream, &devid); | ||
280 | if (rc) | ||
281 | goto out_err_free; | ||
282 | |||
283 | idnode = nfs4_find_get_deviceid(NFS_SERVER(lh->plh_inode), | ||
284 | &devid, lh->plh_lc_cred, | ||
285 | gfp_flags); | ||
286 | /* | ||
287 | * upon success, mirror_ds is allocated by previous | ||
288 | * getdeviceinfo, or newly by .alloc_deviceid_node | ||
289 | * nfs4_find_get_deviceid failure is indeed getdeviceinfo falure | ||
290 | */ | ||
291 | if (idnode) | ||
292 | fls->mirror_array[i]->mirror_ds = | ||
293 | FF_LAYOUT_MIRROR_DS(idnode); | ||
294 | else | ||
295 | goto out_err_free; | ||
296 | |||
297 | /* efficiency */ | ||
298 | rc = -EIO; | ||
299 | p = xdr_inline_decode(&stream, 4); | ||
300 | if (!p) | ||
301 | goto out_err_free; | ||
302 | fls->mirror_array[i]->efficiency = be32_to_cpup(p); | ||
303 | |||
304 | /* stateid */ | ||
305 | rc = decode_stateid(&stream, &fls->mirror_array[i]->stateid); | ||
306 | if (rc) | ||
307 | goto out_err_free; | ||
308 | |||
309 | /* fh */ | ||
310 | p = xdr_inline_decode(&stream, 4); | ||
311 | if (!p) | ||
312 | goto out_err_free; | ||
313 | fh_count = be32_to_cpup(p); | ||
314 | |||
315 | fls->mirror_array[i]->fh_versions = | ||
316 | kzalloc(fh_count * sizeof(struct nfs_fh), | ||
317 | gfp_flags); | ||
318 | if (fls->mirror_array[i]->fh_versions == NULL) { | ||
319 | rc = -ENOMEM; | ||
320 | goto out_err_free; | ||
321 | } | ||
322 | |||
323 | for (j = 0; j < fh_count; j++) { | ||
324 | rc = decode_nfs_fh(&stream, | ||
325 | &fls->mirror_array[i]->fh_versions[j]); | ||
326 | if (rc) | ||
327 | goto out_err_free; | ||
328 | } | ||
329 | |||
330 | fls->mirror_array[i]->fh_versions_cnt = fh_count; | ||
331 | |||
332 | /* user */ | ||
333 | rc = decode_name(&stream, &fls->mirror_array[i]->uid); | ||
334 | if (rc) | ||
335 | goto out_err_free; | ||
336 | |||
337 | /* group */ | ||
338 | rc = decode_name(&stream, &fls->mirror_array[i]->gid); | ||
339 | if (rc) | ||
340 | goto out_err_free; | ||
341 | |||
342 | dprintk("%s: uid %d gid %d\n", __func__, | ||
343 | fls->mirror_array[i]->uid, | ||
344 | fls->mirror_array[i]->gid); | ||
345 | } | ||
346 | |||
347 | ff_layout_sort_mirrors(fls); | ||
348 | rc = ff_layout_check_layout(lgr); | ||
349 | if (rc) | ||
350 | goto out_err_free; | ||
351 | |||
352 | ret = &fls->generic_hdr; | ||
353 | dprintk("<-- %s (success)\n", __func__); | ||
354 | out_free_page: | ||
355 | __free_page(scratch); | ||
356 | return ret; | ||
357 | out_err_free: | ||
358 | _ff_layout_free_lseg(fls); | ||
359 | ret = ERR_PTR(rc); | ||
360 | dprintk("<-- %s (%d)\n", __func__, rc); | ||
361 | goto out_free_page; | ||
362 | } | ||
363 | |||
364 | static bool ff_layout_has_rw_segments(struct pnfs_layout_hdr *layout) | ||
365 | { | ||
366 | struct pnfs_layout_segment *lseg; | ||
367 | |||
368 | list_for_each_entry(lseg, &layout->plh_segs, pls_list) | ||
369 | if (lseg->pls_range.iomode == IOMODE_RW) | ||
370 | return true; | ||
371 | |||
372 | return false; | ||
373 | } | ||
374 | |||
375 | static void | ||
376 | ff_layout_free_lseg(struct pnfs_layout_segment *lseg) | ||
377 | { | ||
378 | struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg); | ||
379 | int i; | ||
380 | |||
381 | dprintk("--> %s\n", __func__); | ||
382 | |||
383 | for (i = 0; i < fls->mirror_array_cnt; i++) { | ||
384 | if (fls->mirror_array[i]) { | ||
385 | nfs4_ff_layout_put_deviceid(fls->mirror_array[i]->mirror_ds); | ||
386 | fls->mirror_array[i]->mirror_ds = NULL; | ||
387 | if (fls->mirror_array[i]->cred) { | ||
388 | put_rpccred(fls->mirror_array[i]->cred); | ||
389 | fls->mirror_array[i]->cred = NULL; | ||
390 | } | ||
391 | } | ||
392 | } | ||
393 | |||
394 | if (lseg->pls_range.iomode == IOMODE_RW) { | ||
395 | struct nfs4_flexfile_layout *ffl; | ||
396 | struct inode *inode; | ||
397 | |||
398 | ffl = FF_LAYOUT_FROM_HDR(lseg->pls_layout); | ||
399 | inode = ffl->generic_hdr.plh_inode; | ||
400 | spin_lock(&inode->i_lock); | ||
401 | if (!ff_layout_has_rw_segments(lseg->pls_layout)) { | ||
402 | ffl->commit_info.nbuckets = 0; | ||
403 | kfree(ffl->commit_info.buckets); | ||
404 | ffl->commit_info.buckets = NULL; | ||
405 | } | ||
406 | spin_unlock(&inode->i_lock); | ||
407 | } | ||
408 | _ff_layout_free_lseg(fls); | ||
409 | } | ||
410 | |||
411 | /* Return 1 until we have multiple lsegs support */ | ||
412 | static int | ||
413 | ff_layout_get_lseg_count(struct nfs4_ff_layout_segment *fls) | ||
414 | { | ||
415 | return 1; | ||
416 | } | ||
417 | |||
418 | static int | ||
419 | ff_layout_alloc_commit_info(struct pnfs_layout_segment *lseg, | ||
420 | struct nfs_commit_info *cinfo, | ||
421 | gfp_t gfp_flags) | ||
422 | { | ||
423 | struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg); | ||
424 | struct pnfs_commit_bucket *buckets; | ||
425 | int size; | ||
426 | |||
427 | if (cinfo->ds->nbuckets != 0) { | ||
428 | /* This assumes there is only one RW lseg per file. | ||
429 | * To support multiple lseg per file, we need to | ||
430 | * change struct pnfs_commit_bucket to allow dynamic | ||
431 | * increasing nbuckets. | ||
432 | */ | ||
433 | return 0; | ||
434 | } | ||
435 | |||
436 | size = ff_layout_get_lseg_count(fls) * FF_LAYOUT_MIRROR_COUNT(lseg); | ||
437 | |||
438 | buckets = kcalloc(size, sizeof(struct pnfs_commit_bucket), | ||
439 | gfp_flags); | ||
440 | if (!buckets) | ||
441 | return -ENOMEM; | ||
442 | else { | ||
443 | int i; | ||
444 | |||
445 | spin_lock(cinfo->lock); | ||
446 | if (cinfo->ds->nbuckets != 0) | ||
447 | kfree(buckets); | ||
448 | else { | ||
449 | cinfo->ds->buckets = buckets; | ||
450 | cinfo->ds->nbuckets = size; | ||
451 | for (i = 0; i < size; i++) { | ||
452 | INIT_LIST_HEAD(&buckets[i].written); | ||
453 | INIT_LIST_HEAD(&buckets[i].committing); | ||
454 | /* mark direct verifier as unset */ | ||
455 | buckets[i].direct_verf.committed = | ||
456 | NFS_INVALID_STABLE_HOW; | ||
457 | } | ||
458 | } | ||
459 | spin_unlock(cinfo->lock); | ||
460 | return 0; | ||
461 | } | ||
462 | } | ||
463 | |||
464 | static struct nfs4_pnfs_ds * | ||
465 | ff_layout_choose_best_ds_for_read(struct nfs_pageio_descriptor *pgio, | ||
466 | int *best_idx) | ||
467 | { | ||
468 | struct nfs4_ff_layout_segment *fls; | ||
469 | struct nfs4_pnfs_ds *ds; | ||
470 | int idx; | ||
471 | |||
472 | fls = FF_LAYOUT_LSEG(pgio->pg_lseg); | ||
473 | /* mirrors are sorted by efficiency */ | ||
474 | for (idx = 0; idx < fls->mirror_array_cnt; idx++) { | ||
475 | ds = nfs4_ff_layout_prepare_ds(pgio->pg_lseg, idx, false); | ||
476 | if (ds) { | ||
477 | *best_idx = idx; | ||
478 | return ds; | ||
479 | } | ||
480 | } | ||
481 | |||
482 | return NULL; | ||
483 | } | ||
484 | |||
485 | static void | ||
486 | ff_layout_pg_init_read(struct nfs_pageio_descriptor *pgio, | ||
487 | struct nfs_page *req) | ||
488 | { | ||
489 | struct nfs_pgio_mirror *pgm; | ||
490 | struct nfs4_ff_layout_mirror *mirror; | ||
491 | struct nfs4_pnfs_ds *ds; | ||
492 | int ds_idx; | ||
493 | |||
494 | /* Use full layout for now */ | ||
495 | if (!pgio->pg_lseg) | ||
496 | pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, | ||
497 | req->wb_context, | ||
498 | 0, | ||
499 | NFS4_MAX_UINT64, | ||
500 | IOMODE_READ, | ||
501 | GFP_KERNEL); | ||
502 | /* If no lseg, fall back to read through mds */ | ||
503 | if (pgio->pg_lseg == NULL) | ||
504 | goto out_mds; | ||
505 | |||
506 | ds = ff_layout_choose_best_ds_for_read(pgio, &ds_idx); | ||
507 | if (!ds) | ||
508 | goto out_mds; | ||
509 | mirror = FF_LAYOUT_COMP(pgio->pg_lseg, ds_idx); | ||
510 | |||
511 | pgio->pg_mirror_idx = ds_idx; | ||
512 | |||
513 | /* read always uses only one mirror - idx 0 for pgio layer */ | ||
514 | pgm = &pgio->pg_mirrors[0]; | ||
515 | pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].rsize; | ||
516 | |||
517 | return; | ||
518 | out_mds: | ||
519 | pnfs_put_lseg(pgio->pg_lseg); | ||
520 | pgio->pg_lseg = NULL; | ||
521 | nfs_pageio_reset_read_mds(pgio); | ||
522 | } | ||
523 | |||
524 | static void | ||
525 | ff_layout_pg_init_write(struct nfs_pageio_descriptor *pgio, | ||
526 | struct nfs_page *req) | ||
527 | { | ||
528 | struct nfs4_ff_layout_mirror *mirror; | ||
529 | struct nfs_pgio_mirror *pgm; | ||
530 | struct nfs_commit_info cinfo; | ||
531 | struct nfs4_pnfs_ds *ds; | ||
532 | int i; | ||
533 | int status; | ||
534 | |||
535 | if (!pgio->pg_lseg) | ||
536 | pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, | ||
537 | req->wb_context, | ||
538 | 0, | ||
539 | NFS4_MAX_UINT64, | ||
540 | IOMODE_RW, | ||
541 | GFP_NOFS); | ||
542 | /* If no lseg, fall back to write through mds */ | ||
543 | if (pgio->pg_lseg == NULL) | ||
544 | goto out_mds; | ||
545 | |||
546 | nfs_init_cinfo(&cinfo, pgio->pg_inode, pgio->pg_dreq); | ||
547 | status = ff_layout_alloc_commit_info(pgio->pg_lseg, &cinfo, GFP_NOFS); | ||
548 | if (status < 0) | ||
549 | goto out_mds; | ||
550 | |||
551 | /* Use a direct mapping of ds_idx to pgio mirror_idx */ | ||
552 | if (WARN_ON_ONCE(pgio->pg_mirror_count != | ||
553 | FF_LAYOUT_MIRROR_COUNT(pgio->pg_lseg))) | ||
554 | goto out_mds; | ||
555 | |||
556 | for (i = 0; i < pgio->pg_mirror_count; i++) { | ||
557 | ds = nfs4_ff_layout_prepare_ds(pgio->pg_lseg, i, true); | ||
558 | if (!ds) | ||
559 | goto out_mds; | ||
560 | pgm = &pgio->pg_mirrors[i]; | ||
561 | mirror = FF_LAYOUT_COMP(pgio->pg_lseg, i); | ||
562 | pgm->pg_bsize = mirror->mirror_ds->ds_versions[0].wsize; | ||
563 | } | ||
564 | |||
565 | return; | ||
566 | |||
567 | out_mds: | ||
568 | pnfs_put_lseg(pgio->pg_lseg); | ||
569 | pgio->pg_lseg = NULL; | ||
570 | nfs_pageio_reset_write_mds(pgio); | ||
571 | } | ||
572 | |||
573 | static unsigned int | ||
574 | ff_layout_pg_get_mirror_count_write(struct nfs_pageio_descriptor *pgio, | ||
575 | struct nfs_page *req) | ||
576 | { | ||
577 | if (!pgio->pg_lseg) | ||
578 | pgio->pg_lseg = pnfs_update_layout(pgio->pg_inode, | ||
579 | req->wb_context, | ||
580 | 0, | ||
581 | NFS4_MAX_UINT64, | ||
582 | IOMODE_RW, | ||
583 | GFP_NOFS); | ||
584 | if (pgio->pg_lseg) | ||
585 | return FF_LAYOUT_MIRROR_COUNT(pgio->pg_lseg); | ||
586 | |||
587 | /* no lseg means that pnfs is not in use, so no mirroring here */ | ||
588 | pnfs_put_lseg(pgio->pg_lseg); | ||
589 | pgio->pg_lseg = NULL; | ||
590 | nfs_pageio_reset_write_mds(pgio); | ||
591 | return 1; | ||
592 | } | ||
593 | |||
594 | static const struct nfs_pageio_ops ff_layout_pg_read_ops = { | ||
595 | .pg_init = ff_layout_pg_init_read, | ||
596 | .pg_test = pnfs_generic_pg_test, | ||
597 | .pg_doio = pnfs_generic_pg_readpages, | ||
598 | .pg_cleanup = pnfs_generic_pg_cleanup, | ||
599 | }; | ||
600 | |||
601 | static const struct nfs_pageio_ops ff_layout_pg_write_ops = { | ||
602 | .pg_init = ff_layout_pg_init_write, | ||
603 | .pg_test = pnfs_generic_pg_test, | ||
604 | .pg_doio = pnfs_generic_pg_writepages, | ||
605 | .pg_get_mirror_count = ff_layout_pg_get_mirror_count_write, | ||
606 | .pg_cleanup = pnfs_generic_pg_cleanup, | ||
607 | }; | ||
608 | |||
609 | static void ff_layout_reset_write(struct nfs_pgio_header *hdr, bool retry_pnfs) | ||
610 | { | ||
611 | struct rpc_task *task = &hdr->task; | ||
612 | |||
613 | pnfs_layoutcommit_inode(hdr->inode, false); | ||
614 | |||
615 | if (retry_pnfs) { | ||
616 | dprintk("%s Reset task %5u for i/o through pNFS " | ||
617 | "(req %s/%llu, %u bytes @ offset %llu)\n", __func__, | ||
618 | hdr->task.tk_pid, | ||
619 | hdr->inode->i_sb->s_id, | ||
620 | (unsigned long long)NFS_FILEID(hdr->inode), | ||
621 | hdr->args.count, | ||
622 | (unsigned long long)hdr->args.offset); | ||
623 | |||
624 | if (!hdr->dreq) { | ||
625 | struct nfs_open_context *ctx; | ||
626 | |||
627 | ctx = nfs_list_entry(hdr->pages.next)->wb_context; | ||
628 | set_bit(NFS_CONTEXT_RESEND_WRITES, &ctx->flags); | ||
629 | hdr->completion_ops->error_cleanup(&hdr->pages); | ||
630 | } else { | ||
631 | nfs_direct_set_resched_writes(hdr->dreq); | ||
632 | /* fake unstable write to let common nfs resend pages */ | ||
633 | hdr->verf.committed = NFS_UNSTABLE; | ||
634 | hdr->good_bytes = 0; | ||
635 | } | ||
636 | return; | ||
637 | } | ||
638 | |||
639 | if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { | ||
640 | dprintk("%s Reset task %5u for i/o through MDS " | ||
641 | "(req %s/%llu, %u bytes @ offset %llu)\n", __func__, | ||
642 | hdr->task.tk_pid, | ||
643 | hdr->inode->i_sb->s_id, | ||
644 | (unsigned long long)NFS_FILEID(hdr->inode), | ||
645 | hdr->args.count, | ||
646 | (unsigned long long)hdr->args.offset); | ||
647 | |||
648 | task->tk_status = pnfs_write_done_resend_to_mds(hdr); | ||
649 | } | ||
650 | } | ||
651 | |||
652 | static void ff_layout_reset_read(struct nfs_pgio_header *hdr) | ||
653 | { | ||
654 | struct rpc_task *task = &hdr->task; | ||
655 | |||
656 | pnfs_layoutcommit_inode(hdr->inode, false); | ||
657 | |||
658 | if (!test_and_set_bit(NFS_IOHDR_REDO, &hdr->flags)) { | ||
659 | dprintk("%s Reset task %5u for i/o through MDS " | ||
660 | "(req %s/%llu, %u bytes @ offset %llu)\n", __func__, | ||
661 | hdr->task.tk_pid, | ||
662 | hdr->inode->i_sb->s_id, | ||
663 | (unsigned long long)NFS_FILEID(hdr->inode), | ||
664 | hdr->args.count, | ||
665 | (unsigned long long)hdr->args.offset); | ||
666 | |||
667 | task->tk_status = pnfs_read_done_resend_to_mds(hdr); | ||
668 | } | ||
669 | } | ||
670 | |||
671 | static int ff_layout_async_handle_error_v4(struct rpc_task *task, | ||
672 | struct nfs4_state *state, | ||
673 | struct nfs_client *clp, | ||
674 | struct pnfs_layout_segment *lseg, | ||
675 | int idx) | ||
676 | { | ||
677 | struct pnfs_layout_hdr *lo = lseg->pls_layout; | ||
678 | struct inode *inode = lo->plh_inode; | ||
679 | struct nfs_server *mds_server = NFS_SERVER(inode); | ||
680 | |||
681 | struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx); | ||
682 | struct nfs_client *mds_client = mds_server->nfs_client; | ||
683 | struct nfs4_slot_table *tbl = &clp->cl_session->fc_slot_table; | ||
684 | |||
685 | if (task->tk_status >= 0) | ||
686 | return 0; | ||
687 | |||
688 | switch (task->tk_status) { | ||
689 | /* MDS state errors */ | ||
690 | case -NFS4ERR_DELEG_REVOKED: | ||
691 | case -NFS4ERR_ADMIN_REVOKED: | ||
692 | case -NFS4ERR_BAD_STATEID: | ||
693 | if (state == NULL) | ||
694 | break; | ||
695 | nfs_remove_bad_delegation(state->inode); | ||
696 | case -NFS4ERR_OPENMODE: | ||
697 | if (state == NULL) | ||
698 | break; | ||
699 | if (nfs4_schedule_stateid_recovery(mds_server, state) < 0) | ||
700 | goto out_bad_stateid; | ||
701 | goto wait_on_recovery; | ||
702 | case -NFS4ERR_EXPIRED: | ||
703 | if (state != NULL) { | ||
704 | if (nfs4_schedule_stateid_recovery(mds_server, state) < 0) | ||
705 | goto out_bad_stateid; | ||
706 | } | ||
707 | nfs4_schedule_lease_recovery(mds_client); | ||
708 | goto wait_on_recovery; | ||
709 | /* DS session errors */ | ||
710 | case -NFS4ERR_BADSESSION: | ||
711 | case -NFS4ERR_BADSLOT: | ||
712 | case -NFS4ERR_BAD_HIGH_SLOT: | ||
713 | case -NFS4ERR_DEADSESSION: | ||
714 | case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION: | ||
715 | case -NFS4ERR_SEQ_FALSE_RETRY: | ||
716 | case -NFS4ERR_SEQ_MISORDERED: | ||
717 | dprintk("%s ERROR %d, Reset session. Exchangeid " | ||
718 | "flags 0x%x\n", __func__, task->tk_status, | ||
719 | clp->cl_exchange_flags); | ||
720 | nfs4_schedule_session_recovery(clp->cl_session, task->tk_status); | ||
721 | break; | ||
722 | case -NFS4ERR_DELAY: | ||
723 | case -NFS4ERR_GRACE: | ||
724 | rpc_delay(task, FF_LAYOUT_POLL_RETRY_MAX); | ||
725 | break; | ||
726 | case -NFS4ERR_RETRY_UNCACHED_REP: | ||
727 | break; | ||
728 | /* Invalidate Layout errors */ | ||
729 | case -NFS4ERR_PNFS_NO_LAYOUT: | ||
730 | case -ESTALE: /* mapped NFS4ERR_STALE */ | ||
731 | case -EBADHANDLE: /* mapped NFS4ERR_BADHANDLE */ | ||
732 | case -EISDIR: /* mapped NFS4ERR_ISDIR */ | ||
733 | case -NFS4ERR_FHEXPIRED: | ||
734 | case -NFS4ERR_WRONG_TYPE: | ||
735 | dprintk("%s Invalid layout error %d\n", __func__, | ||
736 | task->tk_status); | ||
737 | /* | ||
738 | * Destroy layout so new i/o will get a new layout. | ||
739 | * Layout will not be destroyed until all current lseg | ||
740 | * references are put. Mark layout as invalid to resend failed | ||
741 | * i/o and all i/o waiting on the slot table to the MDS until | ||
742 | * layout is destroyed and a new valid layout is obtained. | ||
743 | */ | ||
744 | pnfs_destroy_layout(NFS_I(inode)); | ||
745 | rpc_wake_up(&tbl->slot_tbl_waitq); | ||
746 | goto reset; | ||
747 | /* RPC connection errors */ | ||
748 | case -ECONNREFUSED: | ||
749 | case -EHOSTDOWN: | ||
750 | case -EHOSTUNREACH: | ||
751 | case -ENETUNREACH: | ||
752 | case -EIO: | ||
753 | case -ETIMEDOUT: | ||
754 | case -EPIPE: | ||
755 | dprintk("%s DS connection error %d\n", __func__, | ||
756 | task->tk_status); | ||
757 | nfs4_mark_deviceid_unavailable(devid); | ||
758 | rpc_wake_up(&tbl->slot_tbl_waitq); | ||
759 | /* fall through */ | ||
760 | default: | ||
761 | if (ff_layout_has_available_ds(lseg)) | ||
762 | return -NFS4ERR_RESET_TO_PNFS; | ||
763 | reset: | ||
764 | dprintk("%s Retry through MDS. Error %d\n", __func__, | ||
765 | task->tk_status); | ||
766 | return -NFS4ERR_RESET_TO_MDS; | ||
767 | } | ||
768 | out: | ||
769 | task->tk_status = 0; | ||
770 | return -EAGAIN; | ||
771 | out_bad_stateid: | ||
772 | task->tk_status = -EIO; | ||
773 | return 0; | ||
774 | wait_on_recovery: | ||
775 | rpc_sleep_on(&mds_client->cl_rpcwaitq, task, NULL); | ||
776 | if (test_bit(NFS4CLNT_MANAGER_RUNNING, &mds_client->cl_state) == 0) | ||
777 | rpc_wake_up_queued_task(&mds_client->cl_rpcwaitq, task); | ||
778 | goto out; | ||
779 | } | ||
780 | |||
781 | /* Retry all errors through either pNFS or MDS except for -EJUKEBOX */ | ||
782 | static int ff_layout_async_handle_error_v3(struct rpc_task *task, | ||
783 | struct pnfs_layout_segment *lseg, | ||
784 | int idx) | ||
785 | { | ||
786 | struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx); | ||
787 | |||
788 | if (task->tk_status >= 0) | ||
789 | return 0; | ||
790 | |||
791 | if (task->tk_status != -EJUKEBOX) { | ||
792 | dprintk("%s DS connection error %d\n", __func__, | ||
793 | task->tk_status); | ||
794 | nfs4_mark_deviceid_unavailable(devid); | ||
795 | if (ff_layout_has_available_ds(lseg)) | ||
796 | return -NFS4ERR_RESET_TO_PNFS; | ||
797 | else | ||
798 | return -NFS4ERR_RESET_TO_MDS; | ||
799 | } | ||
800 | |||
801 | if (task->tk_status == -EJUKEBOX) | ||
802 | nfs_inc_stats(lseg->pls_layout->plh_inode, NFSIOS_DELAY); | ||
803 | task->tk_status = 0; | ||
804 | rpc_restart_call(task); | ||
805 | rpc_delay(task, NFS_JUKEBOX_RETRY_TIME); | ||
806 | return -EAGAIN; | ||
807 | } | ||
808 | |||
809 | static int ff_layout_async_handle_error(struct rpc_task *task, | ||
810 | struct nfs4_state *state, | ||
811 | struct nfs_client *clp, | ||
812 | struct pnfs_layout_segment *lseg, | ||
813 | int idx) | ||
814 | { | ||
815 | int vers = clp->cl_nfs_mod->rpc_vers->number; | ||
816 | |||
817 | switch (vers) { | ||
818 | case 3: | ||
819 | return ff_layout_async_handle_error_v3(task, lseg, idx); | ||
820 | case 4: | ||
821 | return ff_layout_async_handle_error_v4(task, state, clp, | ||
822 | lseg, idx); | ||
823 | default: | ||
824 | /* should never happen */ | ||
825 | WARN_ON_ONCE(1); | ||
826 | return 0; | ||
827 | } | ||
828 | } | ||
829 | |||
830 | static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg, | ||
831 | int idx, u64 offset, u64 length, | ||
832 | u32 status, int opnum) | ||
833 | { | ||
834 | struct nfs4_ff_layout_mirror *mirror; | ||
835 | int err; | ||
836 | |||
837 | mirror = FF_LAYOUT_COMP(lseg, idx); | ||
838 | err = ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout), | ||
839 | mirror, offset, length, status, opnum, | ||
840 | GFP_NOIO); | ||
841 | dprintk("%s: err %d op %d status %u\n", __func__, err, opnum, status); | ||
842 | } | ||
843 | |||
844 | /* NFS_PROTO call done callback routines */ | ||
845 | |||
846 | static int ff_layout_read_done_cb(struct rpc_task *task, | ||
847 | struct nfs_pgio_header *hdr) | ||
848 | { | ||
849 | struct inode *inode; | ||
850 | int err; | ||
851 | |||
852 | trace_nfs4_pnfs_read(hdr, task->tk_status); | ||
853 | if (task->tk_status == -ETIMEDOUT && !hdr->res.op_status) | ||
854 | hdr->res.op_status = NFS4ERR_NXIO; | ||
855 | if (task->tk_status < 0 && hdr->res.op_status) | ||
856 | ff_layout_io_track_ds_error(hdr->lseg, hdr->pgio_mirror_idx, | ||
857 | hdr->args.offset, hdr->args.count, | ||
858 | hdr->res.op_status, OP_READ); | ||
859 | err = ff_layout_async_handle_error(task, hdr->args.context->state, | ||
860 | hdr->ds_clp, hdr->lseg, | ||
861 | hdr->pgio_mirror_idx); | ||
862 | |||
863 | switch (err) { | ||
864 | case -NFS4ERR_RESET_TO_PNFS: | ||
865 | set_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, | ||
866 | &hdr->lseg->pls_layout->plh_flags); | ||
867 | pnfs_read_resend_pnfs(hdr); | ||
868 | return task->tk_status; | ||
869 | case -NFS4ERR_RESET_TO_MDS: | ||
870 | inode = hdr->lseg->pls_layout->plh_inode; | ||
871 | pnfs_error_mark_layout_for_return(inode, hdr->lseg); | ||
872 | ff_layout_reset_read(hdr); | ||
873 | return task->tk_status; | ||
874 | case -EAGAIN: | ||
875 | rpc_restart_call_prepare(task); | ||
876 | return -EAGAIN; | ||
877 | } | ||
878 | |||
879 | return 0; | ||
880 | } | ||
881 | |||
882 | /* | ||
883 | * We reference the rpc_cred of the first WRITE that triggers the need for | ||
884 | * a LAYOUTCOMMIT, and use it to send the layoutcommit compound. | ||
885 | * rfc5661 is not clear about which credential should be used. | ||
886 | * | ||
887 | * Flexlayout client should treat DS replied FILE_SYNC as DATA_SYNC, so | ||
888 | * to follow http://www.rfc-editor.org/errata_search.php?rfc=5661&eid=2751 | ||
889 | * we always send layoutcommit after DS writes. | ||
890 | */ | ||
891 | static void | ||
892 | ff_layout_set_layoutcommit(struct nfs_pgio_header *hdr) | ||
893 | { | ||
894 | pnfs_set_layoutcommit(hdr); | ||
895 | dprintk("%s inode %lu pls_end_pos %lu\n", __func__, hdr->inode->i_ino, | ||
896 | (unsigned long) NFS_I(hdr->inode)->layout->plh_lwb); | ||
897 | } | ||
898 | |||
899 | static bool | ||
900 | ff_layout_reset_to_mds(struct pnfs_layout_segment *lseg, int idx) | ||
901 | { | ||
902 | /* No mirroring for now */ | ||
903 | struct nfs4_deviceid_node *node = FF_LAYOUT_DEVID_NODE(lseg, idx); | ||
904 | |||
905 | return ff_layout_test_devid_unavailable(node); | ||
906 | } | ||
907 | |||
908 | static int ff_layout_read_prepare_common(struct rpc_task *task, | ||
909 | struct nfs_pgio_header *hdr) | ||
910 | { | ||
911 | if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) { | ||
912 | rpc_exit(task, -EIO); | ||
913 | return -EIO; | ||
914 | } | ||
915 | if (ff_layout_reset_to_mds(hdr->lseg, hdr->pgio_mirror_idx)) { | ||
916 | dprintk("%s task %u reset io to MDS\n", __func__, task->tk_pid); | ||
917 | if (ff_layout_has_available_ds(hdr->lseg)) | ||
918 | pnfs_read_resend_pnfs(hdr); | ||
919 | else | ||
920 | ff_layout_reset_read(hdr); | ||
921 | rpc_exit(task, 0); | ||
922 | return -EAGAIN; | ||
923 | } | ||
924 | hdr->pgio_done_cb = ff_layout_read_done_cb; | ||
925 | |||
926 | return 0; | ||
927 | } | ||
928 | |||
929 | /* | ||
930 | * Call ops for the async read/write cases | ||
931 | * In the case of dense layouts, the offset needs to be reset to its | ||
932 | * original value. | ||
933 | */ | ||
934 | static void ff_layout_read_prepare_v3(struct rpc_task *task, void *data) | ||
935 | { | ||
936 | struct nfs_pgio_header *hdr = data; | ||
937 | |||
938 | if (ff_layout_read_prepare_common(task, hdr)) | ||
939 | return; | ||
940 | |||
941 | rpc_call_start(task); | ||
942 | } | ||
943 | |||
944 | static int ff_layout_setup_sequence(struct nfs_client *ds_clp, | ||
945 | struct nfs4_sequence_args *args, | ||
946 | struct nfs4_sequence_res *res, | ||
947 | struct rpc_task *task) | ||
948 | { | ||
949 | if (ds_clp->cl_session) | ||
950 | return nfs41_setup_sequence(ds_clp->cl_session, | ||
951 | args, | ||
952 | res, | ||
953 | task); | ||
954 | return nfs40_setup_sequence(ds_clp->cl_slot_tbl, | ||
955 | args, | ||
956 | res, | ||
957 | task); | ||
958 | } | ||
959 | |||
960 | static void ff_layout_read_prepare_v4(struct rpc_task *task, void *data) | ||
961 | { | ||
962 | struct nfs_pgio_header *hdr = data; | ||
963 | |||
964 | if (ff_layout_read_prepare_common(task, hdr)) | ||
965 | return; | ||
966 | |||
967 | if (ff_layout_setup_sequence(hdr->ds_clp, | ||
968 | &hdr->args.seq_args, | ||
969 | &hdr->res.seq_res, | ||
970 | task)) | ||
971 | return; | ||
972 | |||
973 | if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context, | ||
974 | hdr->args.lock_context, FMODE_READ) == -EIO) | ||
975 | rpc_exit(task, -EIO); /* lost lock, terminate I/O */ | ||
976 | } | ||
977 | |||
978 | static void ff_layout_read_call_done(struct rpc_task *task, void *data) | ||
979 | { | ||
980 | struct nfs_pgio_header *hdr = data; | ||
981 | |||
982 | dprintk("--> %s task->tk_status %d\n", __func__, task->tk_status); | ||
983 | |||
984 | if (test_bit(NFS_IOHDR_REDO, &hdr->flags) && | ||
985 | task->tk_status == 0) { | ||
986 | nfs4_sequence_done(task, &hdr->res.seq_res); | ||
987 | return; | ||
988 | } | ||
989 | |||
990 | /* Note this may cause RPC to be resent */ | ||
991 | hdr->mds_ops->rpc_call_done(task, hdr); | ||
992 | } | ||
993 | |||
994 | static void ff_layout_read_count_stats(struct rpc_task *task, void *data) | ||
995 | { | ||
996 | struct nfs_pgio_header *hdr = data; | ||
997 | |||
998 | rpc_count_iostats_metrics(task, | ||
999 | &NFS_CLIENT(hdr->inode)->cl_metrics[NFSPROC4_CLNT_READ]); | ||
1000 | } | ||
1001 | |||
1002 | static int ff_layout_write_done_cb(struct rpc_task *task, | ||
1003 | struct nfs_pgio_header *hdr) | ||
1004 | { | ||
1005 | struct inode *inode; | ||
1006 | int err; | ||
1007 | |||
1008 | trace_nfs4_pnfs_write(hdr, task->tk_status); | ||
1009 | if (task->tk_status == -ETIMEDOUT && !hdr->res.op_status) | ||
1010 | hdr->res.op_status = NFS4ERR_NXIO; | ||
1011 | if (task->tk_status < 0 && hdr->res.op_status) | ||
1012 | ff_layout_io_track_ds_error(hdr->lseg, hdr->pgio_mirror_idx, | ||
1013 | hdr->args.offset, hdr->args.count, | ||
1014 | hdr->res.op_status, OP_WRITE); | ||
1015 | err = ff_layout_async_handle_error(task, hdr->args.context->state, | ||
1016 | hdr->ds_clp, hdr->lseg, | ||
1017 | hdr->pgio_mirror_idx); | ||
1018 | |||
1019 | switch (err) { | ||
1020 | case -NFS4ERR_RESET_TO_PNFS: | ||
1021 | case -NFS4ERR_RESET_TO_MDS: | ||
1022 | inode = hdr->lseg->pls_layout->plh_inode; | ||
1023 | pnfs_error_mark_layout_for_return(inode, hdr->lseg); | ||
1024 | if (err == -NFS4ERR_RESET_TO_PNFS) { | ||
1025 | pnfs_set_retry_layoutget(hdr->lseg->pls_layout); | ||
1026 | ff_layout_reset_write(hdr, true); | ||
1027 | } else { | ||
1028 | pnfs_clear_retry_layoutget(hdr->lseg->pls_layout); | ||
1029 | ff_layout_reset_write(hdr, false); | ||
1030 | } | ||
1031 | return task->tk_status; | ||
1032 | case -EAGAIN: | ||
1033 | rpc_restart_call_prepare(task); | ||
1034 | return -EAGAIN; | ||
1035 | } | ||
1036 | |||
1037 | if (hdr->res.verf->committed == NFS_FILE_SYNC || | ||
1038 | hdr->res.verf->committed == NFS_DATA_SYNC) | ||
1039 | ff_layout_set_layoutcommit(hdr); | ||
1040 | |||
1041 | return 0; | ||
1042 | } | ||
1043 | |||
1044 | static int ff_layout_commit_done_cb(struct rpc_task *task, | ||
1045 | struct nfs_commit_data *data) | ||
1046 | { | ||
1047 | struct inode *inode; | ||
1048 | int err; | ||
1049 | |||
1050 | trace_nfs4_pnfs_commit_ds(data, task->tk_status); | ||
1051 | if (task->tk_status == -ETIMEDOUT && !data->res.op_status) | ||
1052 | data->res.op_status = NFS4ERR_NXIO; | ||
1053 | if (task->tk_status < 0 && data->res.op_status) | ||
1054 | ff_layout_io_track_ds_error(data->lseg, data->ds_commit_index, | ||
1055 | data->args.offset, data->args.count, | ||
1056 | data->res.op_status, OP_COMMIT); | ||
1057 | err = ff_layout_async_handle_error(task, NULL, data->ds_clp, | ||
1058 | data->lseg, data->ds_commit_index); | ||
1059 | |||
1060 | switch (err) { | ||
1061 | case -NFS4ERR_RESET_TO_PNFS: | ||
1062 | case -NFS4ERR_RESET_TO_MDS: | ||
1063 | inode = data->lseg->pls_layout->plh_inode; | ||
1064 | pnfs_error_mark_layout_for_return(inode, data->lseg); | ||
1065 | if (err == -NFS4ERR_RESET_TO_PNFS) | ||
1066 | pnfs_set_retry_layoutget(data->lseg->pls_layout); | ||
1067 | else | ||
1068 | pnfs_clear_retry_layoutget(data->lseg->pls_layout); | ||
1069 | pnfs_generic_prepare_to_resend_writes(data); | ||
1070 | return -EAGAIN; | ||
1071 | case -EAGAIN: | ||
1072 | rpc_restart_call_prepare(task); | ||
1073 | return -EAGAIN; | ||
1074 | } | ||
1075 | |||
1076 | if (data->verf.committed == NFS_UNSTABLE) | ||
1077 | pnfs_commit_set_layoutcommit(data); | ||
1078 | |||
1079 | return 0; | ||
1080 | } | ||
1081 | |||
1082 | static int ff_layout_write_prepare_common(struct rpc_task *task, | ||
1083 | struct nfs_pgio_header *hdr) | ||
1084 | { | ||
1085 | if (unlikely(test_bit(NFS_CONTEXT_BAD, &hdr->args.context->flags))) { | ||
1086 | rpc_exit(task, -EIO); | ||
1087 | return -EIO; | ||
1088 | } | ||
1089 | |||
1090 | if (ff_layout_reset_to_mds(hdr->lseg, hdr->pgio_mirror_idx)) { | ||
1091 | bool retry_pnfs; | ||
1092 | |||
1093 | retry_pnfs = ff_layout_has_available_ds(hdr->lseg); | ||
1094 | dprintk("%s task %u reset io to %s\n", __func__, | ||
1095 | task->tk_pid, retry_pnfs ? "pNFS" : "MDS"); | ||
1096 | ff_layout_reset_write(hdr, retry_pnfs); | ||
1097 | rpc_exit(task, 0); | ||
1098 | return -EAGAIN; | ||
1099 | } | ||
1100 | |||
1101 | return 0; | ||
1102 | } | ||
1103 | |||
1104 | static void ff_layout_write_prepare_v3(struct rpc_task *task, void *data) | ||
1105 | { | ||
1106 | struct nfs_pgio_header *hdr = data; | ||
1107 | |||
1108 | if (ff_layout_write_prepare_common(task, hdr)) | ||
1109 | return; | ||
1110 | |||
1111 | rpc_call_start(task); | ||
1112 | } | ||
1113 | |||
1114 | static void ff_layout_write_prepare_v4(struct rpc_task *task, void *data) | ||
1115 | { | ||
1116 | struct nfs_pgio_header *hdr = data; | ||
1117 | |||
1118 | if (ff_layout_write_prepare_common(task, hdr)) | ||
1119 | return; | ||
1120 | |||
1121 | if (ff_layout_setup_sequence(hdr->ds_clp, | ||
1122 | &hdr->args.seq_args, | ||
1123 | &hdr->res.seq_res, | ||
1124 | task)) | ||
1125 | return; | ||
1126 | |||
1127 | if (nfs4_set_rw_stateid(&hdr->args.stateid, hdr->args.context, | ||
1128 | hdr->args.lock_context, FMODE_WRITE) == -EIO) | ||
1129 | rpc_exit(task, -EIO); /* lost lock, terminate I/O */ | ||
1130 | } | ||
1131 | |||
1132 | static void ff_layout_write_call_done(struct rpc_task *task, void *data) | ||
1133 | { | ||
1134 | struct nfs_pgio_header *hdr = data; | ||
1135 | |||
1136 | if (test_bit(NFS_IOHDR_REDO, &hdr->flags) && | ||
1137 | task->tk_status == 0) { | ||
1138 | nfs4_sequence_done(task, &hdr->res.seq_res); | ||
1139 | return; | ||
1140 | } | ||
1141 | |||
1142 | /* Note this may cause RPC to be resent */ | ||
1143 | hdr->mds_ops->rpc_call_done(task, hdr); | ||
1144 | } | ||
1145 | |||
1146 | static void ff_layout_write_count_stats(struct rpc_task *task, void *data) | ||
1147 | { | ||
1148 | struct nfs_pgio_header *hdr = data; | ||
1149 | |||
1150 | rpc_count_iostats_metrics(task, | ||
1151 | &NFS_CLIENT(hdr->inode)->cl_metrics[NFSPROC4_CLNT_WRITE]); | ||
1152 | } | ||
1153 | |||
1154 | static void ff_layout_commit_prepare_v3(struct rpc_task *task, void *data) | ||
1155 | { | ||
1156 | rpc_call_start(task); | ||
1157 | } | ||
1158 | |||
1159 | static void ff_layout_commit_prepare_v4(struct rpc_task *task, void *data) | ||
1160 | { | ||
1161 | struct nfs_commit_data *wdata = data; | ||
1162 | |||
1163 | ff_layout_setup_sequence(wdata->ds_clp, | ||
1164 | &wdata->args.seq_args, | ||
1165 | &wdata->res.seq_res, | ||
1166 | task); | ||
1167 | } | ||
1168 | |||
1169 | static void ff_layout_commit_count_stats(struct rpc_task *task, void *data) | ||
1170 | { | ||
1171 | struct nfs_commit_data *cdata = data; | ||
1172 | |||
1173 | rpc_count_iostats_metrics(task, | ||
1174 | &NFS_CLIENT(cdata->inode)->cl_metrics[NFSPROC4_CLNT_COMMIT]); | ||
1175 | } | ||
1176 | |||
1177 | static const struct rpc_call_ops ff_layout_read_call_ops_v3 = { | ||
1178 | .rpc_call_prepare = ff_layout_read_prepare_v3, | ||
1179 | .rpc_call_done = ff_layout_read_call_done, | ||
1180 | .rpc_count_stats = ff_layout_read_count_stats, | ||
1181 | .rpc_release = pnfs_generic_rw_release, | ||
1182 | }; | ||
1183 | |||
1184 | static const struct rpc_call_ops ff_layout_read_call_ops_v4 = { | ||
1185 | .rpc_call_prepare = ff_layout_read_prepare_v4, | ||
1186 | .rpc_call_done = ff_layout_read_call_done, | ||
1187 | .rpc_count_stats = ff_layout_read_count_stats, | ||
1188 | .rpc_release = pnfs_generic_rw_release, | ||
1189 | }; | ||
1190 | |||
1191 | static const struct rpc_call_ops ff_layout_write_call_ops_v3 = { | ||
1192 | .rpc_call_prepare = ff_layout_write_prepare_v3, | ||
1193 | .rpc_call_done = ff_layout_write_call_done, | ||
1194 | .rpc_count_stats = ff_layout_write_count_stats, | ||
1195 | .rpc_release = pnfs_generic_rw_release, | ||
1196 | }; | ||
1197 | |||
1198 | static const struct rpc_call_ops ff_layout_write_call_ops_v4 = { | ||
1199 | .rpc_call_prepare = ff_layout_write_prepare_v4, | ||
1200 | .rpc_call_done = ff_layout_write_call_done, | ||
1201 | .rpc_count_stats = ff_layout_write_count_stats, | ||
1202 | .rpc_release = pnfs_generic_rw_release, | ||
1203 | }; | ||
1204 | |||
1205 | static const struct rpc_call_ops ff_layout_commit_call_ops_v3 = { | ||
1206 | .rpc_call_prepare = ff_layout_commit_prepare_v3, | ||
1207 | .rpc_call_done = pnfs_generic_write_commit_done, | ||
1208 | .rpc_count_stats = ff_layout_commit_count_stats, | ||
1209 | .rpc_release = pnfs_generic_commit_release, | ||
1210 | }; | ||
1211 | |||
1212 | static const struct rpc_call_ops ff_layout_commit_call_ops_v4 = { | ||
1213 | .rpc_call_prepare = ff_layout_commit_prepare_v4, | ||
1214 | .rpc_call_done = pnfs_generic_write_commit_done, | ||
1215 | .rpc_count_stats = ff_layout_commit_count_stats, | ||
1216 | .rpc_release = pnfs_generic_commit_release, | ||
1217 | }; | ||
1218 | |||
1219 | static enum pnfs_try_status | ||
1220 | ff_layout_read_pagelist(struct nfs_pgio_header *hdr) | ||
1221 | { | ||
1222 | struct pnfs_layout_segment *lseg = hdr->lseg; | ||
1223 | struct nfs4_pnfs_ds *ds; | ||
1224 | struct rpc_clnt *ds_clnt; | ||
1225 | struct rpc_cred *ds_cred; | ||
1226 | loff_t offset = hdr->args.offset; | ||
1227 | u32 idx = hdr->pgio_mirror_idx; | ||
1228 | int vers; | ||
1229 | struct nfs_fh *fh; | ||
1230 | |||
1231 | dprintk("--> %s ino %lu pgbase %u req %Zu@%llu\n", | ||
1232 | __func__, hdr->inode->i_ino, | ||
1233 | hdr->args.pgbase, (size_t)hdr->args.count, offset); | ||
1234 | |||
1235 | ds = nfs4_ff_layout_prepare_ds(lseg, idx, false); | ||
1236 | if (!ds) | ||
1237 | goto out_failed; | ||
1238 | |||
1239 | ds_clnt = nfs4_ff_find_or_create_ds_client(lseg, idx, ds->ds_clp, | ||
1240 | hdr->inode); | ||
1241 | if (IS_ERR(ds_clnt)) | ||
1242 | goto out_failed; | ||
1243 | |||
1244 | ds_cred = ff_layout_get_ds_cred(lseg, idx, hdr->cred); | ||
1245 | if (IS_ERR(ds_cred)) | ||
1246 | goto out_failed; | ||
1247 | |||
1248 | vers = nfs4_ff_layout_ds_version(lseg, idx); | ||
1249 | |||
1250 | dprintk("%s USE DS: %s cl_count %d vers %d\n", __func__, | ||
1251 | ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count), vers); | ||
1252 | |||
1253 | atomic_inc(&ds->ds_clp->cl_count); | ||
1254 | hdr->ds_clp = ds->ds_clp; | ||
1255 | fh = nfs4_ff_layout_select_ds_fh(lseg, idx); | ||
1256 | if (fh) | ||
1257 | hdr->args.fh = fh; | ||
1258 | |||
1259 | /* | ||
1260 | * Note that if we ever decide to split across DSes, | ||
1261 | * then we may need to handle dense-like offsets. | ||
1262 | */ | ||
1263 | hdr->args.offset = offset; | ||
1264 | hdr->mds_offset = offset; | ||
1265 | |||
1266 | /* Perform an asynchronous read to ds */ | ||
1267 | nfs_initiate_pgio(ds_clnt, hdr, ds_cred, ds->ds_clp->rpc_ops, | ||
1268 | vers == 3 ? &ff_layout_read_call_ops_v3 : | ||
1269 | &ff_layout_read_call_ops_v4, | ||
1270 | 0, RPC_TASK_SOFTCONN); | ||
1271 | |||
1272 | return PNFS_ATTEMPTED; | ||
1273 | |||
1274 | out_failed: | ||
1275 | if (ff_layout_has_available_ds(lseg)) | ||
1276 | return PNFS_TRY_AGAIN; | ||
1277 | return PNFS_NOT_ATTEMPTED; | ||
1278 | } | ||
1279 | |||
1280 | /* Perform async writes. */ | ||
1281 | static enum pnfs_try_status | ||
1282 | ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync) | ||
1283 | { | ||
1284 | struct pnfs_layout_segment *lseg = hdr->lseg; | ||
1285 | struct nfs4_pnfs_ds *ds; | ||
1286 | struct rpc_clnt *ds_clnt; | ||
1287 | struct rpc_cred *ds_cred; | ||
1288 | loff_t offset = hdr->args.offset; | ||
1289 | int vers; | ||
1290 | struct nfs_fh *fh; | ||
1291 | int idx = hdr->pgio_mirror_idx; | ||
1292 | |||
1293 | ds = nfs4_ff_layout_prepare_ds(lseg, idx, true); | ||
1294 | if (!ds) | ||
1295 | return PNFS_NOT_ATTEMPTED; | ||
1296 | |||
1297 | ds_clnt = nfs4_ff_find_or_create_ds_client(lseg, idx, ds->ds_clp, | ||
1298 | hdr->inode); | ||
1299 | if (IS_ERR(ds_clnt)) | ||
1300 | return PNFS_NOT_ATTEMPTED; | ||
1301 | |||
1302 | ds_cred = ff_layout_get_ds_cred(lseg, idx, hdr->cred); | ||
1303 | if (IS_ERR(ds_cred)) | ||
1304 | return PNFS_NOT_ATTEMPTED; | ||
1305 | |||
1306 | vers = nfs4_ff_layout_ds_version(lseg, idx); | ||
1307 | |||
1308 | dprintk("%s ino %lu sync %d req %Zu@%llu DS: %s cl_count %d vers %d\n", | ||
1309 | __func__, hdr->inode->i_ino, sync, (size_t) hdr->args.count, | ||
1310 | offset, ds->ds_remotestr, atomic_read(&ds->ds_clp->cl_count), | ||
1311 | vers); | ||
1312 | |||
1313 | hdr->pgio_done_cb = ff_layout_write_done_cb; | ||
1314 | atomic_inc(&ds->ds_clp->cl_count); | ||
1315 | hdr->ds_clp = ds->ds_clp; | ||
1316 | hdr->ds_commit_idx = idx; | ||
1317 | fh = nfs4_ff_layout_select_ds_fh(lseg, idx); | ||
1318 | if (fh) | ||
1319 | hdr->args.fh = fh; | ||
1320 | |||
1321 | /* | ||
1322 | * Note that if we ever decide to split across DSes, | ||
1323 | * then we may need to handle dense-like offsets. | ||
1324 | */ | ||
1325 | hdr->args.offset = offset; | ||
1326 | |||
1327 | /* Perform an asynchronous write */ | ||
1328 | nfs_initiate_pgio(ds_clnt, hdr, ds_cred, ds->ds_clp->rpc_ops, | ||
1329 | vers == 3 ? &ff_layout_write_call_ops_v3 : | ||
1330 | &ff_layout_write_call_ops_v4, | ||
1331 | sync, RPC_TASK_SOFTCONN); | ||
1332 | return PNFS_ATTEMPTED; | ||
1333 | } | ||
1334 | |||
1335 | static void | ||
1336 | ff_layout_mark_request_commit(struct nfs_page *req, | ||
1337 | struct pnfs_layout_segment *lseg, | ||
1338 | struct nfs_commit_info *cinfo, | ||
1339 | u32 ds_commit_idx) | ||
1340 | { | ||
1341 | struct list_head *list; | ||
1342 | struct pnfs_commit_bucket *buckets; | ||
1343 | |||
1344 | spin_lock(cinfo->lock); | ||
1345 | buckets = cinfo->ds->buckets; | ||
1346 | list = &buckets[ds_commit_idx].written; | ||
1347 | if (list_empty(list)) { | ||
1348 | /* Non-empty buckets hold a reference on the lseg. That ref | ||
1349 | * is normally transferred to the COMMIT call and released | ||
1350 | * there. It could also be released if the last req is pulled | ||
1351 | * off due to a rewrite, in which case it will be done in | ||
1352 | * pnfs_common_clear_request_commit | ||
1353 | */ | ||
1354 | WARN_ON_ONCE(buckets[ds_commit_idx].wlseg != NULL); | ||
1355 | buckets[ds_commit_idx].wlseg = pnfs_get_lseg(lseg); | ||
1356 | } | ||
1357 | set_bit(PG_COMMIT_TO_DS, &req->wb_flags); | ||
1358 | cinfo->ds->nwritten++; | ||
1359 | |||
1360 | /* nfs_request_add_commit_list(). We need to add req to list without | ||
1361 | * dropping cinfo lock. | ||
1362 | */ | ||
1363 | set_bit(PG_CLEAN, &(req)->wb_flags); | ||
1364 | nfs_list_add_request(req, list); | ||
1365 | cinfo->mds->ncommit++; | ||
1366 | spin_unlock(cinfo->lock); | ||
1367 | if (!cinfo->dreq) { | ||
1368 | inc_zone_page_state(req->wb_page, NR_UNSTABLE_NFS); | ||
1369 | inc_bdi_stat(page_file_mapping(req->wb_page)->backing_dev_info, | ||
1370 | BDI_RECLAIMABLE); | ||
1371 | __mark_inode_dirty(req->wb_context->dentry->d_inode, | ||
1372 | I_DIRTY_DATASYNC); | ||
1373 | } | ||
1374 | } | ||
1375 | |||
1376 | static u32 calc_ds_index_from_commit(struct pnfs_layout_segment *lseg, u32 i) | ||
1377 | { | ||
1378 | return i; | ||
1379 | } | ||
1380 | |||
1381 | static struct nfs_fh * | ||
1382 | select_ds_fh_from_commit(struct pnfs_layout_segment *lseg, u32 i) | ||
1383 | { | ||
1384 | struct nfs4_ff_layout_segment *flseg = FF_LAYOUT_LSEG(lseg); | ||
1385 | |||
1386 | /* FIXME: Assume that there is only one NFS version available | ||
1387 | * for the DS. | ||
1388 | */ | ||
1389 | return &flseg->mirror_array[i]->fh_versions[0]; | ||
1390 | } | ||
1391 | |||
1392 | static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how) | ||
1393 | { | ||
1394 | struct pnfs_layout_segment *lseg = data->lseg; | ||
1395 | struct nfs4_pnfs_ds *ds; | ||
1396 | struct rpc_clnt *ds_clnt; | ||
1397 | struct rpc_cred *ds_cred; | ||
1398 | u32 idx; | ||
1399 | int vers; | ||
1400 | struct nfs_fh *fh; | ||
1401 | |||
1402 | idx = calc_ds_index_from_commit(lseg, data->ds_commit_index); | ||
1403 | ds = nfs4_ff_layout_prepare_ds(lseg, idx, true); | ||
1404 | if (!ds) | ||
1405 | goto out_err; | ||
1406 | |||
1407 | ds_clnt = nfs4_ff_find_or_create_ds_client(lseg, idx, ds->ds_clp, | ||
1408 | data->inode); | ||
1409 | if (IS_ERR(ds_clnt)) | ||
1410 | goto out_err; | ||
1411 | |||
1412 | ds_cred = ff_layout_get_ds_cred(lseg, idx, data->cred); | ||
1413 | if (IS_ERR(ds_cred)) | ||
1414 | goto out_err; | ||
1415 | |||
1416 | vers = nfs4_ff_layout_ds_version(lseg, idx); | ||
1417 | |||
1418 | dprintk("%s ino %lu, how %d cl_count %d vers %d\n", __func__, | ||
1419 | data->inode->i_ino, how, atomic_read(&ds->ds_clp->cl_count), | ||
1420 | vers); | ||
1421 | data->commit_done_cb = ff_layout_commit_done_cb; | ||
1422 | data->cred = ds_cred; | ||
1423 | atomic_inc(&ds->ds_clp->cl_count); | ||
1424 | data->ds_clp = ds->ds_clp; | ||
1425 | fh = select_ds_fh_from_commit(lseg, data->ds_commit_index); | ||
1426 | if (fh) | ||
1427 | data->args.fh = fh; | ||
1428 | return nfs_initiate_commit(ds_clnt, data, ds->ds_clp->rpc_ops, | ||
1429 | vers == 3 ? &ff_layout_commit_call_ops_v3 : | ||
1430 | &ff_layout_commit_call_ops_v4, | ||
1431 | how, RPC_TASK_SOFTCONN); | ||
1432 | out_err: | ||
1433 | pnfs_generic_prepare_to_resend_writes(data); | ||
1434 | pnfs_generic_commit_release(data); | ||
1435 | return -EAGAIN; | ||
1436 | } | ||
1437 | |||
1438 | static int | ||
1439 | ff_layout_commit_pagelist(struct inode *inode, struct list_head *mds_pages, | ||
1440 | int how, struct nfs_commit_info *cinfo) | ||
1441 | { | ||
1442 | return pnfs_generic_commit_pagelist(inode, mds_pages, how, cinfo, | ||
1443 | ff_layout_initiate_commit); | ||
1444 | } | ||
1445 | |||
1446 | static struct pnfs_ds_commit_info * | ||
1447 | ff_layout_get_ds_info(struct inode *inode) | ||
1448 | { | ||
1449 | struct pnfs_layout_hdr *layout = NFS_I(inode)->layout; | ||
1450 | |||
1451 | if (layout == NULL) | ||
1452 | return NULL; | ||
1453 | |||
1454 | return &FF_LAYOUT_FROM_HDR(layout)->commit_info; | ||
1455 | } | ||
1456 | |||
1457 | static void | ||
1458 | ff_layout_free_deveiceid_node(struct nfs4_deviceid_node *d) | ||
1459 | { | ||
1460 | nfs4_ff_layout_free_deviceid(container_of(d, struct nfs4_ff_layout_ds, | ||
1461 | id_node)); | ||
1462 | } | ||
1463 | |||
1464 | static int ff_layout_encode_ioerr(struct nfs4_flexfile_layout *flo, | ||
1465 | struct xdr_stream *xdr, | ||
1466 | const struct nfs4_layoutreturn_args *args) | ||
1467 | { | ||
1468 | struct pnfs_layout_hdr *hdr = &flo->generic_hdr; | ||
1469 | __be32 *start; | ||
1470 | int count = 0, ret = 0; | ||
1471 | |||
1472 | start = xdr_reserve_space(xdr, 4); | ||
1473 | if (unlikely(!start)) | ||
1474 | return -E2BIG; | ||
1475 | |||
1476 | /* This assume we always return _ALL_ layouts */ | ||
1477 | spin_lock(&hdr->plh_inode->i_lock); | ||
1478 | ret = ff_layout_encode_ds_ioerr(flo, xdr, &count, &args->range); | ||
1479 | spin_unlock(&hdr->plh_inode->i_lock); | ||
1480 | |||
1481 | *start = cpu_to_be32(count); | ||
1482 | |||
1483 | return ret; | ||
1484 | } | ||
1485 | |||
1486 | /* report nothing for now */ | ||
1487 | static void ff_layout_encode_iostats(struct nfs4_flexfile_layout *flo, | ||
1488 | struct xdr_stream *xdr, | ||
1489 | const struct nfs4_layoutreturn_args *args) | ||
1490 | { | ||
1491 | __be32 *p; | ||
1492 | |||
1493 | p = xdr_reserve_space(xdr, 4); | ||
1494 | if (likely(p)) | ||
1495 | *p = cpu_to_be32(0); | ||
1496 | } | ||
1497 | |||
1498 | static struct nfs4_deviceid_node * | ||
1499 | ff_layout_alloc_deviceid_node(struct nfs_server *server, | ||
1500 | struct pnfs_device *pdev, gfp_t gfp_flags) | ||
1501 | { | ||
1502 | struct nfs4_ff_layout_ds *dsaddr; | ||
1503 | |||
1504 | dsaddr = nfs4_ff_alloc_deviceid_node(server, pdev, gfp_flags); | ||
1505 | if (!dsaddr) | ||
1506 | return NULL; | ||
1507 | return &dsaddr->id_node; | ||
1508 | } | ||
1509 | |||
1510 | static void | ||
1511 | ff_layout_encode_layoutreturn(struct pnfs_layout_hdr *lo, | ||
1512 | struct xdr_stream *xdr, | ||
1513 | const struct nfs4_layoutreturn_args *args) | ||
1514 | { | ||
1515 | struct nfs4_flexfile_layout *flo = FF_LAYOUT_FROM_HDR(lo); | ||
1516 | __be32 *start; | ||
1517 | |||
1518 | dprintk("%s: Begin\n", __func__); | ||
1519 | start = xdr_reserve_space(xdr, 4); | ||
1520 | BUG_ON(!start); | ||
1521 | |||
1522 | if (ff_layout_encode_ioerr(flo, xdr, args)) | ||
1523 | goto out; | ||
1524 | |||
1525 | ff_layout_encode_iostats(flo, xdr, args); | ||
1526 | out: | ||
1527 | *start = cpu_to_be32((xdr->p - start - 1) * 4); | ||
1528 | dprintk("%s: Return\n", __func__); | ||
1529 | } | ||
1530 | |||
1531 | static struct pnfs_layoutdriver_type flexfilelayout_type = { | ||
1532 | .id = LAYOUT_FLEX_FILES, | ||
1533 | .name = "LAYOUT_FLEX_FILES", | ||
1534 | .owner = THIS_MODULE, | ||
1535 | .alloc_layout_hdr = ff_layout_alloc_layout_hdr, | ||
1536 | .free_layout_hdr = ff_layout_free_layout_hdr, | ||
1537 | .alloc_lseg = ff_layout_alloc_lseg, | ||
1538 | .free_lseg = ff_layout_free_lseg, | ||
1539 | .pg_read_ops = &ff_layout_pg_read_ops, | ||
1540 | .pg_write_ops = &ff_layout_pg_write_ops, | ||
1541 | .get_ds_info = ff_layout_get_ds_info, | ||
1542 | .free_deviceid_node = ff_layout_free_deveiceid_node, | ||
1543 | .mark_request_commit = ff_layout_mark_request_commit, | ||
1544 | .clear_request_commit = pnfs_generic_clear_request_commit, | ||
1545 | .scan_commit_lists = pnfs_generic_scan_commit_lists, | ||
1546 | .recover_commit_reqs = pnfs_generic_recover_commit_reqs, | ||
1547 | .commit_pagelist = ff_layout_commit_pagelist, | ||
1548 | .read_pagelist = ff_layout_read_pagelist, | ||
1549 | .write_pagelist = ff_layout_write_pagelist, | ||
1550 | .alloc_deviceid_node = ff_layout_alloc_deviceid_node, | ||
1551 | .encode_layoutreturn = ff_layout_encode_layoutreturn, | ||
1552 | }; | ||
1553 | |||
1554 | static int __init nfs4flexfilelayout_init(void) | ||
1555 | { | ||
1556 | printk(KERN_INFO "%s: NFSv4 Flexfile Layout Driver Registering...\n", | ||
1557 | __func__); | ||
1558 | return pnfs_register_layoutdriver(&flexfilelayout_type); | ||
1559 | } | ||
1560 | |||
1561 | static void __exit nfs4flexfilelayout_exit(void) | ||
1562 | { | ||
1563 | printk(KERN_INFO "%s: NFSv4 Flexfile Layout Driver Unregistering...\n", | ||
1564 | __func__); | ||
1565 | pnfs_unregister_layoutdriver(&flexfilelayout_type); | ||
1566 | } | ||
1567 | |||
1568 | MODULE_ALIAS("nfs-layouttype4-4"); | ||
1569 | |||
1570 | MODULE_LICENSE("GPL"); | ||
1571 | MODULE_DESCRIPTION("The NFSv4 flexfile layout driver"); | ||
1572 | |||
1573 | module_init(nfs4flexfilelayout_init); | ||
1574 | module_exit(nfs4flexfilelayout_exit); | ||
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.h b/fs/nfs/flexfilelayout/flexfilelayout.h new file mode 100644 index 000000000000..070f20445b2d --- /dev/null +++ b/fs/nfs/flexfilelayout/flexfilelayout.h | |||
@@ -0,0 +1,155 @@ | |||
1 | /* | ||
2 | * NFSv4 flexfile layout driver data structures. | ||
3 | * | ||
4 | * Copyright (c) 2014, Primary Data, Inc. All rights reserved. | ||
5 | * | ||
6 | * Tao Peng <bergwolf@primarydata.com> | ||
7 | */ | ||
8 | |||
9 | #ifndef FS_NFS_NFS4FLEXFILELAYOUT_H | ||
10 | #define FS_NFS_NFS4FLEXFILELAYOUT_H | ||
11 | |||
12 | #include "../pnfs.h" | ||
13 | |||
14 | /* XXX: Let's filter out insanely large mirror count for now to avoid oom | ||
15 | * due to network error etc. */ | ||
16 | #define NFS4_FLEXFILE_LAYOUT_MAX_MIRROR_CNT 4096 | ||
17 | |||
18 | struct nfs4_ff_ds_version { | ||
19 | u32 version; | ||
20 | u32 minor_version; | ||
21 | u32 rsize; | ||
22 | u32 wsize; | ||
23 | bool tightly_coupled; | ||
24 | }; | ||
25 | |||
26 | /* chained in global deviceid hlist */ | ||
27 | struct nfs4_ff_layout_ds { | ||
28 | struct nfs4_deviceid_node id_node; | ||
29 | u32 ds_versions_cnt; | ||
30 | struct nfs4_ff_ds_version *ds_versions; | ||
31 | struct nfs4_pnfs_ds *ds; | ||
32 | }; | ||
33 | |||
34 | struct nfs4_ff_layout_ds_err { | ||
35 | struct list_head list; /* linked in mirror error_list */ | ||
36 | u64 offset; | ||
37 | u64 length; | ||
38 | int status; | ||
39 | enum nfs_opnum4 opnum; | ||
40 | nfs4_stateid stateid; | ||
41 | struct nfs4_deviceid deviceid; | ||
42 | }; | ||
43 | |||
44 | struct nfs4_ff_layout_mirror { | ||
45 | u32 ds_count; | ||
46 | u32 efficiency; | ||
47 | struct nfs4_ff_layout_ds *mirror_ds; | ||
48 | u32 fh_versions_cnt; | ||
49 | struct nfs_fh *fh_versions; | ||
50 | nfs4_stateid stateid; | ||
51 | struct nfs4_string user_name; | ||
52 | struct nfs4_string group_name; | ||
53 | u32 uid; | ||
54 | u32 gid; | ||
55 | struct rpc_cred *cred; | ||
56 | spinlock_t lock; | ||
57 | }; | ||
58 | |||
59 | struct nfs4_ff_layout_segment { | ||
60 | struct pnfs_layout_segment generic_hdr; | ||
61 | u64 stripe_unit; | ||
62 | u32 mirror_array_cnt; | ||
63 | struct nfs4_ff_layout_mirror **mirror_array; | ||
64 | }; | ||
65 | |||
66 | struct nfs4_flexfile_layout { | ||
67 | struct pnfs_layout_hdr generic_hdr; | ||
68 | struct pnfs_ds_commit_info commit_info; | ||
69 | struct list_head error_list; /* nfs4_ff_layout_ds_err */ | ||
70 | }; | ||
71 | |||
72 | static inline struct nfs4_flexfile_layout * | ||
73 | FF_LAYOUT_FROM_HDR(struct pnfs_layout_hdr *lo) | ||
74 | { | ||
75 | return container_of(lo, struct nfs4_flexfile_layout, generic_hdr); | ||
76 | } | ||
77 | |||
78 | static inline struct nfs4_ff_layout_segment * | ||
79 | FF_LAYOUT_LSEG(struct pnfs_layout_segment *lseg) | ||
80 | { | ||
81 | return container_of(lseg, | ||
82 | struct nfs4_ff_layout_segment, | ||
83 | generic_hdr); | ||
84 | } | ||
85 | |||
86 | static inline struct nfs4_deviceid_node * | ||
87 | FF_LAYOUT_DEVID_NODE(struct pnfs_layout_segment *lseg, u32 idx) | ||
88 | { | ||
89 | if (idx >= FF_LAYOUT_LSEG(lseg)->mirror_array_cnt || | ||
90 | FF_LAYOUT_LSEG(lseg)->mirror_array[idx] == NULL || | ||
91 | FF_LAYOUT_LSEG(lseg)->mirror_array[idx]->mirror_ds == NULL) | ||
92 | return NULL; | ||
93 | return &FF_LAYOUT_LSEG(lseg)->mirror_array[idx]->mirror_ds->id_node; | ||
94 | } | ||
95 | |||
96 | static inline struct nfs4_ff_layout_ds * | ||
97 | FF_LAYOUT_MIRROR_DS(struct nfs4_deviceid_node *node) | ||
98 | { | ||
99 | return container_of(node, struct nfs4_ff_layout_ds, id_node); | ||
100 | } | ||
101 | |||
102 | static inline struct nfs4_ff_layout_mirror * | ||
103 | FF_LAYOUT_COMP(struct pnfs_layout_segment *lseg, u32 idx) | ||
104 | { | ||
105 | if (idx >= FF_LAYOUT_LSEG(lseg)->mirror_array_cnt) | ||
106 | return NULL; | ||
107 | return FF_LAYOUT_LSEG(lseg)->mirror_array[idx]; | ||
108 | } | ||
109 | |||
110 | static inline u32 | ||
111 | FF_LAYOUT_MIRROR_COUNT(struct pnfs_layout_segment *lseg) | ||
112 | { | ||
113 | return FF_LAYOUT_LSEG(lseg)->mirror_array_cnt; | ||
114 | } | ||
115 | |||
116 | static inline bool | ||
117 | ff_layout_test_devid_unavailable(struct nfs4_deviceid_node *node) | ||
118 | { | ||
119 | return nfs4_test_deviceid_unavailable(node); | ||
120 | } | ||
121 | |||
122 | static inline int | ||
123 | nfs4_ff_layout_ds_version(struct pnfs_layout_segment *lseg, u32 ds_idx) | ||
124 | { | ||
125 | return FF_LAYOUT_COMP(lseg, ds_idx)->mirror_ds->ds_versions[0].version; | ||
126 | } | ||
127 | |||
128 | struct nfs4_ff_layout_ds * | ||
129 | nfs4_ff_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, | ||
130 | gfp_t gfp_flags); | ||
131 | void nfs4_ff_layout_put_deviceid(struct nfs4_ff_layout_ds *mirror_ds); | ||
132 | void nfs4_ff_layout_free_deviceid(struct nfs4_ff_layout_ds *mirror_ds); | ||
133 | int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo, | ||
134 | struct nfs4_ff_layout_mirror *mirror, u64 offset, | ||
135 | u64 length, int status, enum nfs_opnum4 opnum, | ||
136 | gfp_t gfp_flags); | ||
137 | int ff_layout_encode_ds_ioerr(struct nfs4_flexfile_layout *flo, | ||
138 | struct xdr_stream *xdr, int *count, | ||
139 | const struct pnfs_layout_range *range); | ||
140 | struct nfs_fh * | ||
141 | nfs4_ff_layout_select_ds_fh(struct pnfs_layout_segment *lseg, u32 mirror_idx); | ||
142 | |||
143 | struct nfs4_pnfs_ds * | ||
144 | nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx, | ||
145 | bool fail_return); | ||
146 | |||
147 | struct rpc_clnt * | ||
148 | nfs4_ff_find_or_create_ds_client(struct pnfs_layout_segment *lseg, | ||
149 | u32 ds_idx, | ||
150 | struct nfs_client *ds_clp, | ||
151 | struct inode *inode); | ||
152 | struct rpc_cred *ff_layout_get_ds_cred(struct pnfs_layout_segment *lseg, | ||
153 | u32 ds_idx, struct rpc_cred *mdscred); | ||
154 | bool ff_layout_has_available_ds(struct pnfs_layout_segment *lseg); | ||
155 | #endif /* FS_NFS_NFS4FLEXFILELAYOUT_H */ | ||
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c new file mode 100644 index 000000000000..3bbb16b3066f --- /dev/null +++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c | |||
@@ -0,0 +1,552 @@ | |||
1 | /* | ||
2 | * Device operations for the pnfs nfs4 file layout driver. | ||
3 | * | ||
4 | * Copyright (c) 2014, Primary Data, Inc. All rights reserved. | ||
5 | * | ||
6 | * Tao Peng <bergwolf@primarydata.com> | ||
7 | */ | ||
8 | |||
9 | #include <linux/nfs_fs.h> | ||
10 | #include <linux/vmalloc.h> | ||
11 | #include <linux/module.h> | ||
12 | #include <linux/sunrpc/addr.h> | ||
13 | |||
14 | #include "../internal.h" | ||
15 | #include "../nfs4session.h" | ||
16 | #include "flexfilelayout.h" | ||
17 | |||
18 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD | ||
19 | |||
20 | static unsigned int dataserver_timeo = NFS4_DEF_DS_TIMEO; | ||
21 | static unsigned int dataserver_retrans = NFS4_DEF_DS_RETRANS; | ||
22 | |||
23 | void nfs4_ff_layout_put_deviceid(struct nfs4_ff_layout_ds *mirror_ds) | ||
24 | { | ||
25 | if (mirror_ds) | ||
26 | nfs4_put_deviceid_node(&mirror_ds->id_node); | ||
27 | } | ||
28 | |||
29 | void nfs4_ff_layout_free_deviceid(struct nfs4_ff_layout_ds *mirror_ds) | ||
30 | { | ||
31 | nfs4_print_deviceid(&mirror_ds->id_node.deviceid); | ||
32 | nfs4_pnfs_ds_put(mirror_ds->ds); | ||
33 | kfree(mirror_ds); | ||
34 | } | ||
35 | |||
36 | /* Decode opaque device data and construct new_ds using it */ | ||
37 | struct nfs4_ff_layout_ds * | ||
38 | nfs4_ff_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, | ||
39 | gfp_t gfp_flags) | ||
40 | { | ||
41 | struct xdr_stream stream; | ||
42 | struct xdr_buf buf; | ||
43 | struct page *scratch; | ||
44 | struct list_head dsaddrs; | ||
45 | struct nfs4_pnfs_ds_addr *da; | ||
46 | struct nfs4_ff_layout_ds *new_ds = NULL; | ||
47 | struct nfs4_ff_ds_version *ds_versions = NULL; | ||
48 | u32 mp_count; | ||
49 | u32 version_count; | ||
50 | __be32 *p; | ||
51 | int i, ret = -ENOMEM; | ||
52 | |||
53 | /* set up xdr stream */ | ||
54 | scratch = alloc_page(gfp_flags); | ||
55 | if (!scratch) | ||
56 | goto out_err; | ||
57 | |||
58 | new_ds = kzalloc(sizeof(struct nfs4_ff_layout_ds), gfp_flags); | ||
59 | if (!new_ds) | ||
60 | goto out_scratch; | ||
61 | |||
62 | nfs4_init_deviceid_node(&new_ds->id_node, | ||
63 | server, | ||
64 | &pdev->dev_id); | ||
65 | INIT_LIST_HEAD(&dsaddrs); | ||
66 | |||
67 | xdr_init_decode_pages(&stream, &buf, pdev->pages, pdev->pglen); | ||
68 | xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); | ||
69 | |||
70 | /* multipath count */ | ||
71 | p = xdr_inline_decode(&stream, 4); | ||
72 | if (unlikely(!p)) | ||
73 | goto out_err_drain_dsaddrs; | ||
74 | mp_count = be32_to_cpup(p); | ||
75 | dprintk("%s: multipath ds count %d\n", __func__, mp_count); | ||
76 | |||
77 | for (i = 0; i < mp_count; i++) { | ||
78 | /* multipath ds */ | ||
79 | da = nfs4_decode_mp_ds_addr(server->nfs_client->cl_net, | ||
80 | &stream, gfp_flags); | ||
81 | if (da) | ||
82 | list_add_tail(&da->da_node, &dsaddrs); | ||
83 | } | ||
84 | if (list_empty(&dsaddrs)) { | ||
85 | dprintk("%s: no suitable DS addresses found\n", | ||
86 | __func__); | ||
87 | ret = -ENOMEDIUM; | ||
88 | goto out_err_drain_dsaddrs; | ||
89 | } | ||
90 | |||
91 | /* version count */ | ||
92 | p = xdr_inline_decode(&stream, 4); | ||
93 | if (unlikely(!p)) | ||
94 | goto out_err_drain_dsaddrs; | ||
95 | version_count = be32_to_cpup(p); | ||
96 | dprintk("%s: version count %d\n", __func__, version_count); | ||
97 | |||
98 | ds_versions = kzalloc(version_count * sizeof(struct nfs4_ff_ds_version), | ||
99 | gfp_flags); | ||
100 | if (!ds_versions) | ||
101 | goto out_scratch; | ||
102 | |||
103 | for (i = 0; i < version_count; i++) { | ||
104 | /* 20 = version(4) + minor_version(4) + rsize(4) + wsize(4) + | ||
105 | * tightly_coupled(4) */ | ||
106 | p = xdr_inline_decode(&stream, 20); | ||
107 | if (unlikely(!p)) | ||
108 | goto out_err_drain_dsaddrs; | ||
109 | ds_versions[i].version = be32_to_cpup(p++); | ||
110 | ds_versions[i].minor_version = be32_to_cpup(p++); | ||
111 | ds_versions[i].rsize = nfs_block_size(be32_to_cpup(p++), NULL); | ||
112 | ds_versions[i].wsize = nfs_block_size(be32_to_cpup(p++), NULL); | ||
113 | ds_versions[i].tightly_coupled = be32_to_cpup(p); | ||
114 | |||
115 | if (ds_versions[i].rsize > NFS_MAX_FILE_IO_SIZE) | ||
116 | ds_versions[i].rsize = NFS_MAX_FILE_IO_SIZE; | ||
117 | if (ds_versions[i].wsize > NFS_MAX_FILE_IO_SIZE) | ||
118 | ds_versions[i].wsize = NFS_MAX_FILE_IO_SIZE; | ||
119 | |||
120 | if (ds_versions[i].version != 3 || ds_versions[i].minor_version != 0) { | ||
121 | dprintk("%s: [%d] unsupported ds version %d-%d\n", __func__, | ||
122 | i, ds_versions[i].version, | ||
123 | ds_versions[i].minor_version); | ||
124 | ret = -EPROTONOSUPPORT; | ||
125 | goto out_err_drain_dsaddrs; | ||
126 | } | ||
127 | |||
128 | dprintk("%s: [%d] vers %u minor_ver %u rsize %u wsize %u coupled %d\n", | ||
129 | __func__, i, ds_versions[i].version, | ||
130 | ds_versions[i].minor_version, | ||
131 | ds_versions[i].rsize, | ||
132 | ds_versions[i].wsize, | ||
133 | ds_versions[i].tightly_coupled); | ||
134 | } | ||
135 | |||
136 | new_ds->ds_versions = ds_versions; | ||
137 | new_ds->ds_versions_cnt = version_count; | ||
138 | |||
139 | new_ds->ds = nfs4_pnfs_ds_add(&dsaddrs, gfp_flags); | ||
140 | if (!new_ds->ds) | ||
141 | goto out_err_drain_dsaddrs; | ||
142 | |||
143 | /* If DS was already in cache, free ds addrs */ | ||
144 | while (!list_empty(&dsaddrs)) { | ||
145 | da = list_first_entry(&dsaddrs, | ||
146 | struct nfs4_pnfs_ds_addr, | ||
147 | da_node); | ||
148 | list_del_init(&da->da_node); | ||
149 | kfree(da->da_remotestr); | ||
150 | kfree(da); | ||
151 | } | ||
152 | |||
153 | __free_page(scratch); | ||
154 | return new_ds; | ||
155 | |||
156 | out_err_drain_dsaddrs: | ||
157 | while (!list_empty(&dsaddrs)) { | ||
158 | da = list_first_entry(&dsaddrs, struct nfs4_pnfs_ds_addr, | ||
159 | da_node); | ||
160 | list_del_init(&da->da_node); | ||
161 | kfree(da->da_remotestr); | ||
162 | kfree(da); | ||
163 | } | ||
164 | |||
165 | kfree(ds_versions); | ||
166 | out_scratch: | ||
167 | __free_page(scratch); | ||
168 | out_err: | ||
169 | kfree(new_ds); | ||
170 | |||
171 | dprintk("%s ERROR: returning %d\n", __func__, ret); | ||
172 | return NULL; | ||
173 | } | ||
174 | |||
175 | static u64 | ||
176 | end_offset(u64 start, u64 len) | ||
177 | { | ||
178 | u64 end; | ||
179 | |||
180 | end = start + len; | ||
181 | return end >= start ? end : NFS4_MAX_UINT64; | ||
182 | } | ||
183 | |||
184 | static void extend_ds_error(struct nfs4_ff_layout_ds_err *err, | ||
185 | u64 offset, u64 length) | ||
186 | { | ||
187 | u64 end; | ||
188 | |||
189 | end = max_t(u64, end_offset(err->offset, err->length), | ||
190 | end_offset(offset, length)); | ||
191 | err->offset = min_t(u64, err->offset, offset); | ||
192 | err->length = end - err->offset; | ||
193 | } | ||
194 | |||
195 | static bool ds_error_can_merge(struct nfs4_ff_layout_ds_err *err, u64 offset, | ||
196 | u64 length, int status, enum nfs_opnum4 opnum, | ||
197 | nfs4_stateid *stateid, | ||
198 | struct nfs4_deviceid *deviceid) | ||
199 | { | ||
200 | return err->status == status && err->opnum == opnum && | ||
201 | nfs4_stateid_match(&err->stateid, stateid) && | ||
202 | !memcmp(&err->deviceid, deviceid, sizeof(*deviceid)) && | ||
203 | end_offset(err->offset, err->length) >= offset && | ||
204 | err->offset <= end_offset(offset, length); | ||
205 | } | ||
206 | |||
207 | static bool merge_ds_error(struct nfs4_ff_layout_ds_err *old, | ||
208 | struct nfs4_ff_layout_ds_err *new) | ||
209 | { | ||
210 | if (!ds_error_can_merge(old, new->offset, new->length, new->status, | ||
211 | new->opnum, &new->stateid, &new->deviceid)) | ||
212 | return false; | ||
213 | |||
214 | extend_ds_error(old, new->offset, new->length); | ||
215 | return true; | ||
216 | } | ||
217 | |||
218 | static bool | ||
219 | ff_layout_add_ds_error_locked(struct nfs4_flexfile_layout *flo, | ||
220 | struct nfs4_ff_layout_ds_err *dserr) | ||
221 | { | ||
222 | struct nfs4_ff_layout_ds_err *err; | ||
223 | |||
224 | list_for_each_entry(err, &flo->error_list, list) { | ||
225 | if (merge_ds_error(err, dserr)) { | ||
226 | return true; | ||
227 | } | ||
228 | } | ||
229 | |||
230 | list_add(&dserr->list, &flo->error_list); | ||
231 | return false; | ||
232 | } | ||
233 | |||
234 | static bool | ||
235 | ff_layout_update_ds_error(struct nfs4_flexfile_layout *flo, u64 offset, | ||
236 | u64 length, int status, enum nfs_opnum4 opnum, | ||
237 | nfs4_stateid *stateid, struct nfs4_deviceid *deviceid) | ||
238 | { | ||
239 | bool found = false; | ||
240 | struct nfs4_ff_layout_ds_err *err; | ||
241 | |||
242 | list_for_each_entry(err, &flo->error_list, list) { | ||
243 | if (ds_error_can_merge(err, offset, length, status, opnum, | ||
244 | stateid, deviceid)) { | ||
245 | found = true; | ||
246 | extend_ds_error(err, offset, length); | ||
247 | break; | ||
248 | } | ||
249 | } | ||
250 | |||
251 | return found; | ||
252 | } | ||
253 | |||
254 | int ff_layout_track_ds_error(struct nfs4_flexfile_layout *flo, | ||
255 | struct nfs4_ff_layout_mirror *mirror, u64 offset, | ||
256 | u64 length, int status, enum nfs_opnum4 opnum, | ||
257 | gfp_t gfp_flags) | ||
258 | { | ||
259 | struct nfs4_ff_layout_ds_err *dserr; | ||
260 | bool needfree; | ||
261 | |||
262 | if (status == 0) | ||
263 | return 0; | ||
264 | |||
265 | if (mirror->mirror_ds == NULL) | ||
266 | return -EINVAL; | ||
267 | |||
268 | spin_lock(&flo->generic_hdr.plh_inode->i_lock); | ||
269 | if (ff_layout_update_ds_error(flo, offset, length, status, opnum, | ||
270 | &mirror->stateid, | ||
271 | &mirror->mirror_ds->id_node.deviceid)) { | ||
272 | spin_unlock(&flo->generic_hdr.plh_inode->i_lock); | ||
273 | return 0; | ||
274 | } | ||
275 | spin_unlock(&flo->generic_hdr.plh_inode->i_lock); | ||
276 | dserr = kmalloc(sizeof(*dserr), gfp_flags); | ||
277 | if (!dserr) | ||
278 | return -ENOMEM; | ||
279 | |||
280 | INIT_LIST_HEAD(&dserr->list); | ||
281 | dserr->offset = offset; | ||
282 | dserr->length = length; | ||
283 | dserr->status = status; | ||
284 | dserr->opnum = opnum; | ||
285 | nfs4_stateid_copy(&dserr->stateid, &mirror->stateid); | ||
286 | memcpy(&dserr->deviceid, &mirror->mirror_ds->id_node.deviceid, | ||
287 | NFS4_DEVICEID4_SIZE); | ||
288 | |||
289 | spin_lock(&flo->generic_hdr.plh_inode->i_lock); | ||
290 | needfree = ff_layout_add_ds_error_locked(flo, dserr); | ||
291 | spin_unlock(&flo->generic_hdr.plh_inode->i_lock); | ||
292 | if (needfree) | ||
293 | kfree(dserr); | ||
294 | |||
295 | return 0; | ||
296 | } | ||
297 | |||
298 | /* currently we only support AUTH_NONE and AUTH_SYS */ | ||
299 | static rpc_authflavor_t | ||
300 | nfs4_ff_layout_choose_authflavor(struct nfs4_ff_layout_mirror *mirror) | ||
301 | { | ||
302 | if (mirror->uid == (u32)-1) | ||
303 | return RPC_AUTH_NULL; | ||
304 | return RPC_AUTH_UNIX; | ||
305 | } | ||
306 | |||
307 | /* fetch cred for NFSv3 DS */ | ||
308 | static int ff_layout_update_mirror_cred(struct nfs4_ff_layout_mirror *mirror, | ||
309 | struct nfs4_pnfs_ds *ds) | ||
310 | { | ||
311 | if (ds->ds_clp && !mirror->cred && | ||
312 | mirror->mirror_ds->ds_versions[0].version == 3) { | ||
313 | struct rpc_auth *auth = ds->ds_clp->cl_rpcclient->cl_auth; | ||
314 | struct rpc_cred *cred; | ||
315 | struct auth_cred acred = { | ||
316 | .uid = make_kuid(&init_user_ns, mirror->uid), | ||
317 | .gid = make_kgid(&init_user_ns, mirror->gid), | ||
318 | }; | ||
319 | |||
320 | /* AUTH_NULL ignores acred */ | ||
321 | cred = auth->au_ops->lookup_cred(auth, &acred, 0); | ||
322 | if (IS_ERR(cred)) { | ||
323 | dprintk("%s: lookup_cred failed with %ld\n", | ||
324 | __func__, PTR_ERR(cred)); | ||
325 | return PTR_ERR(cred); | ||
326 | } else { | ||
327 | mirror->cred = cred; | ||
328 | } | ||
329 | } | ||
330 | return 0; | ||
331 | } | ||
332 | |||
333 | struct nfs_fh * | ||
334 | nfs4_ff_layout_select_ds_fh(struct pnfs_layout_segment *lseg, u32 mirror_idx) | ||
335 | { | ||
336 | struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, mirror_idx); | ||
337 | struct nfs_fh *fh = NULL; | ||
338 | struct nfs4_deviceid_node *devid; | ||
339 | |||
340 | if (mirror == NULL || mirror->mirror_ds == NULL || | ||
341 | mirror->mirror_ds->ds == NULL) { | ||
342 | printk(KERN_ERR "NFS: %s: No data server for mirror offset index %d\n", | ||
343 | __func__, mirror_idx); | ||
344 | if (mirror && mirror->mirror_ds) { | ||
345 | devid = &mirror->mirror_ds->id_node; | ||
346 | pnfs_generic_mark_devid_invalid(devid); | ||
347 | } | ||
348 | goto out; | ||
349 | } | ||
350 | |||
351 | /* FIXME: For now assume there is only 1 version available for the DS */ | ||
352 | fh = &mirror->fh_versions[0]; | ||
353 | out: | ||
354 | return fh; | ||
355 | } | ||
356 | |||
357 | /* Upon return, either ds is connected, or ds is NULL */ | ||
358 | struct nfs4_pnfs_ds * | ||
359 | nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx, | ||
360 | bool fail_return) | ||
361 | { | ||
362 | struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, ds_idx); | ||
363 | struct nfs4_pnfs_ds *ds = NULL; | ||
364 | struct nfs4_deviceid_node *devid; | ||
365 | struct inode *ino = lseg->pls_layout->plh_inode; | ||
366 | struct nfs_server *s = NFS_SERVER(ino); | ||
367 | unsigned int max_payload; | ||
368 | rpc_authflavor_t flavor; | ||
369 | |||
370 | if (mirror == NULL || mirror->mirror_ds == NULL || | ||
371 | mirror->mirror_ds->ds == NULL) { | ||
372 | printk(KERN_ERR "NFS: %s: No data server for offset index %d\n", | ||
373 | __func__, ds_idx); | ||
374 | if (mirror && mirror->mirror_ds) { | ||
375 | devid = &mirror->mirror_ds->id_node; | ||
376 | pnfs_generic_mark_devid_invalid(devid); | ||
377 | } | ||
378 | goto out; | ||
379 | } | ||
380 | |||
381 | devid = &mirror->mirror_ds->id_node; | ||
382 | if (ff_layout_test_devid_unavailable(devid)) | ||
383 | goto out; | ||
384 | |||
385 | ds = mirror->mirror_ds->ds; | ||
386 | /* matching smp_wmb() in _nfs4_pnfs_v3/4_ds_connect */ | ||
387 | smp_rmb(); | ||
388 | if (ds->ds_clp) | ||
389 | goto out; | ||
390 | |||
391 | flavor = nfs4_ff_layout_choose_authflavor(mirror); | ||
392 | |||
393 | /* FIXME: For now we assume the server sent only one version of NFS | ||
394 | * to use for the DS. | ||
395 | */ | ||
396 | nfs4_pnfs_ds_connect(s, ds, devid, dataserver_timeo, | ||
397 | dataserver_retrans, | ||
398 | mirror->mirror_ds->ds_versions[0].version, | ||
399 | mirror->mirror_ds->ds_versions[0].minor_version, | ||
400 | flavor); | ||
401 | |||
402 | /* connect success, check rsize/wsize limit */ | ||
403 | if (ds->ds_clp) { | ||
404 | max_payload = | ||
405 | nfs_block_size(rpc_max_payload(ds->ds_clp->cl_rpcclient), | ||
406 | NULL); | ||
407 | if (mirror->mirror_ds->ds_versions[0].rsize > max_payload) | ||
408 | mirror->mirror_ds->ds_versions[0].rsize = max_payload; | ||
409 | if (mirror->mirror_ds->ds_versions[0].wsize > max_payload) | ||
410 | mirror->mirror_ds->ds_versions[0].wsize = max_payload; | ||
411 | } else { | ||
412 | ff_layout_track_ds_error(FF_LAYOUT_FROM_HDR(lseg->pls_layout), | ||
413 | mirror, lseg->pls_range.offset, | ||
414 | lseg->pls_range.length, NFS4ERR_NXIO, | ||
415 | OP_ILLEGAL, GFP_NOIO); | ||
416 | if (fail_return) { | ||
417 | pnfs_error_mark_layout_for_return(ino, lseg); | ||
418 | if (ff_layout_has_available_ds(lseg)) | ||
419 | pnfs_set_retry_layoutget(lseg->pls_layout); | ||
420 | else | ||
421 | pnfs_clear_retry_layoutget(lseg->pls_layout); | ||
422 | |||
423 | } else { | ||
424 | if (ff_layout_has_available_ds(lseg)) | ||
425 | set_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, | ||
426 | &lseg->pls_layout->plh_flags); | ||
427 | else { | ||
428 | pnfs_error_mark_layout_for_return(ino, lseg); | ||
429 | pnfs_clear_retry_layoutget(lseg->pls_layout); | ||
430 | } | ||
431 | } | ||
432 | } | ||
433 | |||
434 | if (ff_layout_update_mirror_cred(mirror, ds)) | ||
435 | ds = NULL; | ||
436 | out: | ||
437 | return ds; | ||
438 | } | ||
439 | |||
440 | struct rpc_cred * | ||
441 | ff_layout_get_ds_cred(struct pnfs_layout_segment *lseg, u32 ds_idx, | ||
442 | struct rpc_cred *mdscred) | ||
443 | { | ||
444 | struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, ds_idx); | ||
445 | struct rpc_cred *cred = ERR_PTR(-EINVAL); | ||
446 | |||
447 | if (!nfs4_ff_layout_prepare_ds(lseg, ds_idx, true)) | ||
448 | goto out; | ||
449 | |||
450 | if (mirror && mirror->cred) | ||
451 | cred = mirror->cred; | ||
452 | else | ||
453 | cred = mdscred; | ||
454 | out: | ||
455 | return cred; | ||
456 | } | ||
457 | |||
458 | /** | ||
459 | * Find or create a DS rpc client with th MDS server rpc client auth flavor | ||
460 | * in the nfs_client cl_ds_clients list. | ||
461 | */ | ||
462 | struct rpc_clnt * | ||
463 | nfs4_ff_find_or_create_ds_client(struct pnfs_layout_segment *lseg, u32 ds_idx, | ||
464 | struct nfs_client *ds_clp, struct inode *inode) | ||
465 | { | ||
466 | struct nfs4_ff_layout_mirror *mirror = FF_LAYOUT_COMP(lseg, ds_idx); | ||
467 | |||
468 | switch (mirror->mirror_ds->ds_versions[0].version) { | ||
469 | case 3: | ||
470 | /* For NFSv3 DS, flavor is set when creating DS connections */ | ||
471 | return ds_clp->cl_rpcclient; | ||
472 | case 4: | ||
473 | return nfs4_find_or_create_ds_client(ds_clp, inode); | ||
474 | default: | ||
475 | BUG(); | ||
476 | } | ||
477 | } | ||
478 | |||
479 | static bool is_range_intersecting(u64 offset1, u64 length1, | ||
480 | u64 offset2, u64 length2) | ||
481 | { | ||
482 | u64 end1 = end_offset(offset1, length1); | ||
483 | u64 end2 = end_offset(offset2, length2); | ||
484 | |||
485 | return (end1 == NFS4_MAX_UINT64 || end1 > offset2) && | ||
486 | (end2 == NFS4_MAX_UINT64 || end2 > offset1); | ||
487 | } | ||
488 | |||
489 | /* called with inode i_lock held */ | ||
490 | int ff_layout_encode_ds_ioerr(struct nfs4_flexfile_layout *flo, | ||
491 | struct xdr_stream *xdr, int *count, | ||
492 | const struct pnfs_layout_range *range) | ||
493 | { | ||
494 | struct nfs4_ff_layout_ds_err *err, *n; | ||
495 | __be32 *p; | ||
496 | |||
497 | list_for_each_entry_safe(err, n, &flo->error_list, list) { | ||
498 | if (!is_range_intersecting(err->offset, err->length, | ||
499 | range->offset, range->length)) | ||
500 | continue; | ||
501 | /* offset(8) + length(8) + stateid(NFS4_STATEID_SIZE) | ||
502 | * + deviceid(NFS4_DEVICEID4_SIZE) + status(4) + opnum(4) | ||
503 | */ | ||
504 | p = xdr_reserve_space(xdr, | ||
505 | 24 + NFS4_STATEID_SIZE + NFS4_DEVICEID4_SIZE); | ||
506 | if (unlikely(!p)) | ||
507 | return -ENOBUFS; | ||
508 | p = xdr_encode_hyper(p, err->offset); | ||
509 | p = xdr_encode_hyper(p, err->length); | ||
510 | p = xdr_encode_opaque_fixed(p, &err->stateid, | ||
511 | NFS4_STATEID_SIZE); | ||
512 | p = xdr_encode_opaque_fixed(p, &err->deviceid, | ||
513 | NFS4_DEVICEID4_SIZE); | ||
514 | *p++ = cpu_to_be32(err->status); | ||
515 | *p++ = cpu_to_be32(err->opnum); | ||
516 | *count += 1; | ||
517 | list_del(&err->list); | ||
518 | kfree(err); | ||
519 | dprintk("%s: offset %llu length %llu status %d op %d count %d\n", | ||
520 | __func__, err->offset, err->length, err->status, | ||
521 | err->opnum, *count); | ||
522 | } | ||
523 | |||
524 | return 0; | ||
525 | } | ||
526 | |||
527 | bool ff_layout_has_available_ds(struct pnfs_layout_segment *lseg) | ||
528 | { | ||
529 | struct nfs4_ff_layout_mirror *mirror; | ||
530 | struct nfs4_deviceid_node *devid; | ||
531 | int idx; | ||
532 | |||
533 | for (idx = 0; idx < FF_LAYOUT_MIRROR_COUNT(lseg); idx++) { | ||
534 | mirror = FF_LAYOUT_COMP(lseg, idx); | ||
535 | if (mirror && mirror->mirror_ds) { | ||
536 | devid = &mirror->mirror_ds->id_node; | ||
537 | if (!ff_layout_test_devid_unavailable(devid)) | ||
538 | return true; | ||
539 | } | ||
540 | } | ||
541 | |||
542 | return false; | ||
543 | } | ||
544 | |||
545 | module_param(dataserver_retrans, uint, 0644); | ||
546 | MODULE_PARM_DESC(dataserver_retrans, "The number of times the NFSv4.1 client " | ||
547 | "retries a request before it attempts further " | ||
548 | " recovery action."); | ||
549 | module_param(dataserver_timeo, uint, 0644); | ||
550 | MODULE_PARM_DESC(dataserver_timeo, "The time (in tenths of a second) the " | ||
551 | "NFSv4.1 client waits for a response from a " | ||
552 | " data server before it retries an NFS request."); | ||
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index 2f5db844c172..857e2a99acc8 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c | |||
@@ -152,7 +152,7 @@ void nfs_fattr_map_and_free_names(struct nfs_server *server, struct nfs_fattr *f | |||
152 | nfs_fattr_free_group_name(fattr); | 152 | nfs_fattr_free_group_name(fattr); |
153 | } | 153 | } |
154 | 154 | ||
155 | static int nfs_map_string_to_numeric(const char *name, size_t namelen, __u32 *res) | 155 | int nfs_map_string_to_numeric(const char *name, size_t namelen, __u32 *res) |
156 | { | 156 | { |
157 | unsigned long val; | 157 | unsigned long val; |
158 | char buf[16]; | 158 | char buf[16]; |
@@ -166,6 +166,7 @@ static int nfs_map_string_to_numeric(const char *name, size_t namelen, __u32 *re | |||
166 | *res = val; | 166 | *res = val; |
167 | return 1; | 167 | return 1; |
168 | } | 168 | } |
169 | EXPORT_SYMBOL_GPL(nfs_map_string_to_numeric); | ||
169 | 170 | ||
170 | static int nfs_map_numeric_to_string(__u32 id, char *buf, size_t buflen) | 171 | static int nfs_map_numeric_to_string(__u32 id, char *buf, size_t buflen) |
171 | { | 172 | { |
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 44c600aac907..ca6dda0f68bb 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c | |||
@@ -7796,9 +7796,7 @@ static void nfs4_layoutreturn_release(void *calldata) | |||
7796 | spin_lock(&lo->plh_inode->i_lock); | 7796 | spin_lock(&lo->plh_inode->i_lock); |
7797 | if (lrp->res.lrs_present) | 7797 | if (lrp->res.lrs_present) |
7798 | pnfs_set_layout_stateid(lo, &lrp->res.stateid, true); | 7798 | pnfs_set_layout_stateid(lo, &lrp->res.stateid, true); |
7799 | clear_bit_unlock(NFS_LAYOUT_RETURN, &lo->plh_flags); | 7799 | pnfs_clear_layoutreturn_waitbit(lo); |
7800 | smp_mb__after_atomic(); | ||
7801 | wake_up_bit(&lo->plh_flags, NFS_LAYOUT_RETURN); | ||
7802 | clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, &lo->plh_flags); | 7800 | clear_bit(NFS_LAYOUT_RETURN_BEFORE_CLOSE, &lo->plh_flags); |
7803 | rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq); | 7801 | rpc_wake_up(&NFS_SERVER(lo->plh_inode)->roc_rpcwaitq); |
7804 | lo->plh_block_lgets--; | 7802 | lo->plh_block_lgets--; |
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index c4c9fe606ae6..0fb0f1920a1f 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c | |||
@@ -910,7 +910,9 @@ send_layoutget(struct pnfs_layout_hdr *lo, | |||
910 | pnfs_layout_io_set_failed(lo, range->iomode); | 910 | pnfs_layout_io_set_failed(lo, range->iomode); |
911 | } | 911 | } |
912 | return NULL; | 912 | return NULL; |
913 | } | 913 | } else |
914 | pnfs_layout_clear_fail_bit(lo, | ||
915 | pnfs_iomode_to_fail_bit(range->iomode)); | ||
914 | 916 | ||
915 | return lseg; | 917 | return lseg; |
916 | } | 918 | } |
@@ -930,6 +932,13 @@ static void pnfs_clear_layoutcommit(struct inode *inode, | |||
930 | } | 932 | } |
931 | } | 933 | } |
932 | 934 | ||
935 | void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo) | ||
936 | { | ||
937 | clear_bit_unlock(NFS_LAYOUT_RETURN, &lo->plh_flags); | ||
938 | smp_mb__after_atomic(); | ||
939 | wake_up_bit(&lo->plh_flags, NFS_LAYOUT_RETURN); | ||
940 | } | ||
941 | |||
933 | static int | 942 | static int |
934 | pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, nfs4_stateid stateid, | 943 | pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, nfs4_stateid stateid, |
935 | enum pnfs_iomode iomode, bool sync) | 944 | enum pnfs_iomode iomode, bool sync) |
@@ -943,6 +952,7 @@ pnfs_send_layoutreturn(struct pnfs_layout_hdr *lo, nfs4_stateid stateid, | |||
943 | status = -ENOMEM; | 952 | status = -ENOMEM; |
944 | spin_lock(&ino->i_lock); | 953 | spin_lock(&ino->i_lock); |
945 | lo->plh_block_lgets--; | 954 | lo->plh_block_lgets--; |
955 | pnfs_clear_layoutreturn_waitbit(lo); | ||
946 | rpc_wake_up(&NFS_SERVER(ino)->roc_rpcwaitq); | 956 | rpc_wake_up(&NFS_SERVER(ino)->roc_rpcwaitq); |
947 | spin_unlock(&ino->i_lock); | 957 | spin_unlock(&ino->i_lock); |
948 | pnfs_put_layout_hdr(lo); | 958 | pnfs_put_layout_hdr(lo); |
@@ -1418,6 +1428,15 @@ static bool pnfs_prepare_to_retry_layoutget(struct pnfs_layout_hdr *lo) | |||
1418 | TASK_UNINTERRUPTIBLE); | 1428 | TASK_UNINTERRUPTIBLE); |
1419 | } | 1429 | } |
1420 | 1430 | ||
1431 | static void pnfs_clear_first_layoutget(struct pnfs_layout_hdr *lo) | ||
1432 | { | ||
1433 | unsigned long *bitlock = &lo->plh_flags; | ||
1434 | |||
1435 | clear_bit_unlock(NFS_LAYOUT_FIRST_LAYOUTGET, bitlock); | ||
1436 | smp_mb__after_atomic(); | ||
1437 | wake_up_bit(bitlock, NFS_LAYOUT_FIRST_LAYOUTGET); | ||
1438 | } | ||
1439 | |||
1421 | /* | 1440 | /* |
1422 | * Layout segment is retreived from the server if not cached. | 1441 | * Layout segment is retreived from the server if not cached. |
1423 | * The appropriate layout segment is referenced and returned to the caller. | 1442 | * The appropriate layout segment is referenced and returned to the caller. |
@@ -1499,6 +1518,8 @@ lookup_again: | |||
1499 | spin_unlock(&ino->i_lock); | 1518 | spin_unlock(&ino->i_lock); |
1500 | dprintk("%s wait for layoutreturn\n", __func__); | 1519 | dprintk("%s wait for layoutreturn\n", __func__); |
1501 | if (pnfs_prepare_to_retry_layoutget(lo)) { | 1520 | if (pnfs_prepare_to_retry_layoutget(lo)) { |
1521 | if (first) | ||
1522 | pnfs_clear_first_layoutget(lo); | ||
1502 | pnfs_put_layout_hdr(lo); | 1523 | pnfs_put_layout_hdr(lo); |
1503 | dprintk("%s retrying\n", __func__); | 1524 | dprintk("%s retrying\n", __func__); |
1504 | goto lookup_again; | 1525 | goto lookup_again; |
@@ -1533,13 +1554,8 @@ lookup_again: | |||
1533 | pnfs_clear_retry_layoutget(lo); | 1554 | pnfs_clear_retry_layoutget(lo); |
1534 | atomic_dec(&lo->plh_outstanding); | 1555 | atomic_dec(&lo->plh_outstanding); |
1535 | out_put_layout_hdr: | 1556 | out_put_layout_hdr: |
1536 | if (first) { | 1557 | if (first) |
1537 | unsigned long *bitlock = &lo->plh_flags; | 1558 | pnfs_clear_first_layoutget(lo); |
1538 | |||
1539 | clear_bit_unlock(NFS_LAYOUT_FIRST_LAYOUTGET, bitlock); | ||
1540 | smp_mb__after_atomic(); | ||
1541 | wake_up_bit(bitlock, NFS_LAYOUT_FIRST_LAYOUTGET); | ||
1542 | } | ||
1543 | pnfs_put_layout_hdr(lo); | 1559 | pnfs_put_layout_hdr(lo); |
1544 | out: | 1560 | out: |
1545 | dprintk("%s: inode %s/%llu pNFS layout segment %s for " | 1561 | dprintk("%s: inode %s/%llu pNFS layout segment %s for " |
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h index 49a466708400..7642021484bf 100644 --- a/fs/nfs/pnfs.h +++ b/fs/nfs/pnfs.h | |||
@@ -278,6 +278,7 @@ struct pnfs_layout_segment *pnfs_update_layout(struct inode *ino, | |||
278 | u64 count, | 278 | u64 count, |
279 | enum pnfs_iomode iomode, | 279 | enum pnfs_iomode iomode, |
280 | gfp_t gfp_flags); | 280 | gfp_t gfp_flags); |
281 | void pnfs_clear_layoutreturn_waitbit(struct pnfs_layout_hdr *lo); | ||
281 | 282 | ||
282 | void nfs4_deviceid_mark_client_invalid(struct nfs_client *clp); | 283 | void nfs4_deviceid_mark_client_invalid(struct nfs_client *clp); |
283 | int pnfs_read_done_resend_to_mds(struct nfs_pgio_header *); | 284 | int pnfs_read_done_resend_to_mds(struct nfs_pgio_header *); |
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 022b761dbf0a..de7c91ca427e 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h | |||
@@ -516,6 +516,7 @@ enum pnfs_layouttype { | |||
516 | LAYOUT_NFSV4_1_FILES = 1, | 516 | LAYOUT_NFSV4_1_FILES = 1, |
517 | LAYOUT_OSD2_OBJECTS = 2, | 517 | LAYOUT_OSD2_OBJECTS = 2, |
518 | LAYOUT_BLOCK_VOLUME = 3, | 518 | LAYOUT_BLOCK_VOLUME = 3, |
519 | LAYOUT_FLEX_FILES = 4, | ||
519 | }; | 520 | }; |
520 | 521 | ||
521 | /* used for both layout return and recall */ | 522 | /* used for both layout return and recall */ |
diff --git a/include/linux/nfs_idmap.h b/include/linux/nfs_idmap.h index 0f4b79da6584..333844e38f66 100644 --- a/include/linux/nfs_idmap.h +++ b/include/linux/nfs_idmap.h | |||
@@ -73,5 +73,7 @@ int nfs_map_group_to_gid(const struct nfs_server *, const char *, size_t, kgid_t | |||
73 | int nfs_map_uid_to_name(const struct nfs_server *, kuid_t, char *, size_t); | 73 | int nfs_map_uid_to_name(const struct nfs_server *, kuid_t, char *, size_t); |
74 | int nfs_map_gid_to_group(const struct nfs_server *, kgid_t, char *, size_t); | 74 | int nfs_map_gid_to_group(const struct nfs_server *, kgid_t, char *, size_t); |
75 | 75 | ||
76 | int nfs_map_string_to_numeric(const char *name, size_t namelen, __u32 *res); | ||
77 | |||
76 | extern unsigned int nfs_idmap_cache_timeout; | 78 | extern unsigned int nfs_idmap_cache_timeout; |
77 | #endif /* NFS_IDMAP_H */ | 79 | #endif /* NFS_IDMAP_H */ |
diff --git a/include/linux/sunrpc/metrics.h b/include/linux/sunrpc/metrics.h index 89f2ca178873..7e61a17030a4 100644 --- a/include/linux/sunrpc/metrics.h +++ b/include/linux/sunrpc/metrics.h | |||
@@ -89,6 +89,8 @@ void rpc_free_iostats(struct rpc_iostats *); | |||
89 | static inline struct rpc_iostats *rpc_alloc_iostats(struct rpc_clnt *clnt) { return NULL; } | 89 | static inline struct rpc_iostats *rpc_alloc_iostats(struct rpc_clnt *clnt) { return NULL; } |
90 | static inline void rpc_count_iostats(const struct rpc_task *task, | 90 | static inline void rpc_count_iostats(const struct rpc_task *task, |
91 | struct rpc_iostats *stats) {} | 91 | struct rpc_iostats *stats) {} |
92 | static inline void rpc_count_iostats_metrics(const struct rpc_task *, | ||
93 | struct rpc_iostats *) {} | ||
92 | static inline void rpc_print_iostats(struct seq_file *seq, struct rpc_clnt *clnt) {} | 94 | static inline void rpc_print_iostats(struct seq_file *seq, struct rpc_clnt *clnt) {} |
93 | static inline void rpc_free_iostats(struct rpc_iostats *stats) {} | 95 | static inline void rpc_free_iostats(struct rpc_iostats *stats) {} |
94 | 96 | ||