diff options
Diffstat (limited to 'fs/nfs/objlayout/objlayout.c')
-rw-r--r-- | fs/nfs/objlayout/objlayout.c | 712 |
1 files changed, 712 insertions, 0 deletions
diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c new file mode 100644 index 000000000000..dc3956c0de80 --- /dev/null +++ b/fs/nfs/objlayout/objlayout.c | |||
@@ -0,0 +1,712 @@ | |||
1 | /* | ||
2 | * pNFS Objects layout driver high level definitions | ||
3 | * | ||
4 | * Copyright (C) 2007 Panasas Inc. [year of first publication] | ||
5 | * All rights reserved. | ||
6 | * | ||
7 | * Benny Halevy <bhalevy@panasas.com> | ||
8 | * Boaz Harrosh <bharrosh@panasas.com> | ||
9 | * | ||
10 | * This program is free software; you can redistribute it and/or modify | ||
11 | * it under the terms of the GNU General Public License version 2 | ||
12 | * See the file COPYING included with this distribution for more details. | ||
13 | * | ||
14 | * Redistribution and use in source and binary forms, with or without | ||
15 | * modification, are permitted provided that the following conditions | ||
16 | * are met: | ||
17 | * | ||
18 | * 1. Redistributions of source code must retain the above copyright | ||
19 | * notice, this list of conditions and the following disclaimer. | ||
20 | * 2. Redistributions in binary form must reproduce the above copyright | ||
21 | * notice, this list of conditions and the following disclaimer in the | ||
22 | * documentation and/or other materials provided with the distribution. | ||
23 | * 3. Neither the name of the Panasas company nor the names of its | ||
24 | * contributors may be used to endorse or promote products derived | ||
25 | * from this software without specific prior written permission. | ||
26 | * | ||
27 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED | ||
28 | * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF | ||
29 | * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | ||
30 | * DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE | ||
31 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | ||
32 | * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | ||
33 | * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR | ||
34 | * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF | ||
35 | * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING | ||
36 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS | ||
37 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
38 | */ | ||
39 | |||
40 | #include <scsi/osd_initiator.h> | ||
41 | #include "objlayout.h" | ||
42 | |||
43 | #define NFSDBG_FACILITY NFSDBG_PNFS_LD | ||
44 | /* | ||
45 | * Create a objlayout layout structure for the given inode and return it. | ||
46 | */ | ||
47 | struct pnfs_layout_hdr * | ||
48 | objlayout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags) | ||
49 | { | ||
50 | struct objlayout *objlay; | ||
51 | |||
52 | objlay = kzalloc(sizeof(struct objlayout), gfp_flags); | ||
53 | if (objlay) { | ||
54 | spin_lock_init(&objlay->lock); | ||
55 | INIT_LIST_HEAD(&objlay->err_list); | ||
56 | } | ||
57 | dprintk("%s: Return %p\n", __func__, objlay); | ||
58 | return &objlay->pnfs_layout; | ||
59 | } | ||
60 | |||
61 | /* | ||
62 | * Free an objlayout layout structure | ||
63 | */ | ||
64 | void | ||
65 | objlayout_free_layout_hdr(struct pnfs_layout_hdr *lo) | ||
66 | { | ||
67 | struct objlayout *objlay = OBJLAYOUT(lo); | ||
68 | |||
69 | dprintk("%s: objlay %p\n", __func__, objlay); | ||
70 | |||
71 | WARN_ON(!list_empty(&objlay->err_list)); | ||
72 | kfree(objlay); | ||
73 | } | ||
74 | |||
75 | /* | ||
76 | * Unmarshall layout and store it in pnfslay. | ||
77 | */ | ||
78 | struct pnfs_layout_segment * | ||
79 | objlayout_alloc_lseg(struct pnfs_layout_hdr *pnfslay, | ||
80 | struct nfs4_layoutget_res *lgr, | ||
81 | gfp_t gfp_flags) | ||
82 | { | ||
83 | int status = -ENOMEM; | ||
84 | struct xdr_stream stream; | ||
85 | struct xdr_buf buf = { | ||
86 | .pages = lgr->layoutp->pages, | ||
87 | .page_len = lgr->layoutp->len, | ||
88 | .buflen = lgr->layoutp->len, | ||
89 | .len = lgr->layoutp->len, | ||
90 | }; | ||
91 | struct page *scratch; | ||
92 | struct pnfs_layout_segment *lseg; | ||
93 | |||
94 | dprintk("%s: Begin pnfslay %p\n", __func__, pnfslay); | ||
95 | |||
96 | scratch = alloc_page(gfp_flags); | ||
97 | if (!scratch) | ||
98 | goto err_nofree; | ||
99 | |||
100 | xdr_init_decode(&stream, &buf, NULL); | ||
101 | xdr_set_scratch_buffer(&stream, page_address(scratch), PAGE_SIZE); | ||
102 | |||
103 | status = objio_alloc_lseg(&lseg, pnfslay, &lgr->range, &stream, gfp_flags); | ||
104 | if (unlikely(status)) { | ||
105 | dprintk("%s: objio_alloc_lseg Return err %d\n", __func__, | ||
106 | status); | ||
107 | goto err; | ||
108 | } | ||
109 | |||
110 | __free_page(scratch); | ||
111 | |||
112 | dprintk("%s: Return %p\n", __func__, lseg); | ||
113 | return lseg; | ||
114 | |||
115 | err: | ||
116 | __free_page(scratch); | ||
117 | err_nofree: | ||
118 | dprintk("%s: Err Return=>%d\n", __func__, status); | ||
119 | return ERR_PTR(status); | ||
120 | } | ||
121 | |||
122 | /* | ||
123 | * Free a layout segement | ||
124 | */ | ||
125 | void | ||
126 | objlayout_free_lseg(struct pnfs_layout_segment *lseg) | ||
127 | { | ||
128 | dprintk("%s: freeing layout segment %p\n", __func__, lseg); | ||
129 | |||
130 | if (unlikely(!lseg)) | ||
131 | return; | ||
132 | |||
133 | objio_free_lseg(lseg); | ||
134 | } | ||
135 | |||
136 | /* | ||
137 | * I/O Operations | ||
138 | */ | ||
139 | static inline u64 | ||
140 | end_offset(u64 start, u64 len) | ||
141 | { | ||
142 | u64 end; | ||
143 | |||
144 | end = start + len; | ||
145 | return end >= start ? end : NFS4_MAX_UINT64; | ||
146 | } | ||
147 | |||
148 | /* last octet in a range */ | ||
149 | static inline u64 | ||
150 | last_byte_offset(u64 start, u64 len) | ||
151 | { | ||
152 | u64 end; | ||
153 | |||
154 | BUG_ON(!len); | ||
155 | end = start + len; | ||
156 | return end > start ? end - 1 : NFS4_MAX_UINT64; | ||
157 | } | ||
158 | |||
159 | static struct objlayout_io_state * | ||
160 | objlayout_alloc_io_state(struct pnfs_layout_hdr *pnfs_layout_type, | ||
161 | struct page **pages, | ||
162 | unsigned pgbase, | ||
163 | loff_t offset, | ||
164 | size_t count, | ||
165 | struct pnfs_layout_segment *lseg, | ||
166 | void *rpcdata, | ||
167 | gfp_t gfp_flags) | ||
168 | { | ||
169 | struct objlayout_io_state *state; | ||
170 | u64 lseg_end_offset; | ||
171 | |||
172 | dprintk("%s: allocating io_state\n", __func__); | ||
173 | if (objio_alloc_io_state(lseg, &state, gfp_flags)) | ||
174 | return NULL; | ||
175 | |||
176 | BUG_ON(offset < lseg->pls_range.offset); | ||
177 | lseg_end_offset = end_offset(lseg->pls_range.offset, | ||
178 | lseg->pls_range.length); | ||
179 | BUG_ON(offset >= lseg_end_offset); | ||
180 | if (offset + count > lseg_end_offset) { | ||
181 | count = lseg->pls_range.length - | ||
182 | (offset - lseg->pls_range.offset); | ||
183 | dprintk("%s: truncated count %Zd\n", __func__, count); | ||
184 | } | ||
185 | |||
186 | if (pgbase > PAGE_SIZE) { | ||
187 | pages += pgbase >> PAGE_SHIFT; | ||
188 | pgbase &= ~PAGE_MASK; | ||
189 | } | ||
190 | |||
191 | INIT_LIST_HEAD(&state->err_list); | ||
192 | state->lseg = lseg; | ||
193 | state->rpcdata = rpcdata; | ||
194 | state->pages = pages; | ||
195 | state->pgbase = pgbase; | ||
196 | state->nr_pages = (pgbase + count + PAGE_SIZE - 1) >> PAGE_SHIFT; | ||
197 | state->offset = offset; | ||
198 | state->count = count; | ||
199 | state->sync = 0; | ||
200 | |||
201 | return state; | ||
202 | } | ||
203 | |||
204 | static void | ||
205 | objlayout_free_io_state(struct objlayout_io_state *state) | ||
206 | { | ||
207 | dprintk("%s: freeing io_state\n", __func__); | ||
208 | if (unlikely(!state)) | ||
209 | return; | ||
210 | |||
211 | objio_free_io_state(state); | ||
212 | } | ||
213 | |||
214 | /* | ||
215 | * I/O done common code | ||
216 | */ | ||
217 | static void | ||
218 | objlayout_iodone(struct objlayout_io_state *state) | ||
219 | { | ||
220 | dprintk("%s: state %p status\n", __func__, state); | ||
221 | |||
222 | if (likely(state->status >= 0)) { | ||
223 | objlayout_free_io_state(state); | ||
224 | } else { | ||
225 | struct objlayout *objlay = OBJLAYOUT(state->lseg->pls_layout); | ||
226 | |||
227 | spin_lock(&objlay->lock); | ||
228 | objlay->delta_space_valid = OBJ_DSU_INVALID; | ||
229 | list_add(&objlay->err_list, &state->err_list); | ||
230 | spin_unlock(&objlay->lock); | ||
231 | } | ||
232 | } | ||
233 | |||
234 | /* | ||
235 | * objlayout_io_set_result - Set an osd_error code on a specific osd comp. | ||
236 | * | ||
237 | * The @index component IO failed (error returned from target). Register | ||
238 | * the error for later reporting at layout-return. | ||
239 | */ | ||
240 | void | ||
241 | objlayout_io_set_result(struct objlayout_io_state *state, unsigned index, | ||
242 | struct pnfs_osd_objid *pooid, int osd_error, | ||
243 | u64 offset, u64 length, bool is_write) | ||
244 | { | ||
245 | struct pnfs_osd_ioerr *ioerr = &state->ioerrs[index]; | ||
246 | |||
247 | BUG_ON(index >= state->num_comps); | ||
248 | if (osd_error) { | ||
249 | ioerr->oer_component = *pooid; | ||
250 | ioerr->oer_comp_offset = offset; | ||
251 | ioerr->oer_comp_length = length; | ||
252 | ioerr->oer_iswrite = is_write; | ||
253 | ioerr->oer_errno = osd_error; | ||
254 | |||
255 | dprintk("%s: err[%d]: errno=%d is_write=%d dev(%llx:%llx) " | ||
256 | "par=0x%llx obj=0x%llx offset=0x%llx length=0x%llx\n", | ||
257 | __func__, index, ioerr->oer_errno, | ||
258 | ioerr->oer_iswrite, | ||
259 | _DEVID_LO(&ioerr->oer_component.oid_device_id), | ||
260 | _DEVID_HI(&ioerr->oer_component.oid_device_id), | ||
261 | ioerr->oer_component.oid_partition_id, | ||
262 | ioerr->oer_component.oid_object_id, | ||
263 | ioerr->oer_comp_offset, | ||
264 | ioerr->oer_comp_length); | ||
265 | } else { | ||
266 | /* User need not call if no error is reported */ | ||
267 | ioerr->oer_errno = 0; | ||
268 | } | ||
269 | } | ||
270 | |||
271 | /* Function scheduled on rpc workqueue to call ->nfs_readlist_complete(). | ||
272 | * This is because the osd completion is called with ints-off from | ||
273 | * the block layer | ||
274 | */ | ||
275 | static void _rpc_read_complete(struct work_struct *work) | ||
276 | { | ||
277 | struct rpc_task *task; | ||
278 | struct nfs_read_data *rdata; | ||
279 | |||
280 | dprintk("%s enter\n", __func__); | ||
281 | task = container_of(work, struct rpc_task, u.tk_work); | ||
282 | rdata = container_of(task, struct nfs_read_data, task); | ||
283 | |||
284 | pnfs_ld_read_done(rdata); | ||
285 | } | ||
286 | |||
287 | void | ||
288 | objlayout_read_done(struct objlayout_io_state *state, ssize_t status, bool sync) | ||
289 | { | ||
290 | int eof = state->eof; | ||
291 | struct nfs_read_data *rdata; | ||
292 | |||
293 | state->status = status; | ||
294 | dprintk("%s: Begin status=%ld eof=%d\n", __func__, status, eof); | ||
295 | rdata = state->rpcdata; | ||
296 | rdata->task.tk_status = status; | ||
297 | if (status >= 0) { | ||
298 | rdata->res.count = status; | ||
299 | rdata->res.eof = eof; | ||
300 | } | ||
301 | objlayout_iodone(state); | ||
302 | /* must not use state after this point */ | ||
303 | |||
304 | if (sync) | ||
305 | pnfs_ld_read_done(rdata); | ||
306 | else { | ||
307 | INIT_WORK(&rdata->task.u.tk_work, _rpc_read_complete); | ||
308 | schedule_work(&rdata->task.u.tk_work); | ||
309 | } | ||
310 | } | ||
311 | |||
312 | /* | ||
313 | * Perform sync or async reads. | ||
314 | */ | ||
315 | enum pnfs_try_status | ||
316 | objlayout_read_pagelist(struct nfs_read_data *rdata) | ||
317 | { | ||
318 | loff_t offset = rdata->args.offset; | ||
319 | size_t count = rdata->args.count; | ||
320 | struct objlayout_io_state *state; | ||
321 | ssize_t status = 0; | ||
322 | loff_t eof; | ||
323 | |||
324 | dprintk("%s: Begin inode %p offset %llu count %d\n", | ||
325 | __func__, rdata->inode, offset, (int)count); | ||
326 | |||
327 | eof = i_size_read(rdata->inode); | ||
328 | if (unlikely(offset + count > eof)) { | ||
329 | if (offset >= eof) { | ||
330 | status = 0; | ||
331 | rdata->res.count = 0; | ||
332 | rdata->res.eof = 1; | ||
333 | goto out; | ||
334 | } | ||
335 | count = eof - offset; | ||
336 | } | ||
337 | |||
338 | state = objlayout_alloc_io_state(NFS_I(rdata->inode)->layout, | ||
339 | rdata->args.pages, rdata->args.pgbase, | ||
340 | offset, count, | ||
341 | rdata->lseg, rdata, | ||
342 | GFP_KERNEL); | ||
343 | if (unlikely(!state)) { | ||
344 | status = -ENOMEM; | ||
345 | goto out; | ||
346 | } | ||
347 | |||
348 | state->eof = state->offset + state->count >= eof; | ||
349 | |||
350 | status = objio_read_pagelist(state); | ||
351 | out: | ||
352 | dprintk("%s: Return status %Zd\n", __func__, status); | ||
353 | rdata->pnfs_error = status; | ||
354 | return PNFS_ATTEMPTED; | ||
355 | } | ||
356 | |||
357 | /* Function scheduled on rpc workqueue to call ->nfs_writelist_complete(). | ||
358 | * This is because the osd completion is called with ints-off from | ||
359 | * the block layer | ||
360 | */ | ||
361 | static void _rpc_write_complete(struct work_struct *work) | ||
362 | { | ||
363 | struct rpc_task *task; | ||
364 | struct nfs_write_data *wdata; | ||
365 | |||
366 | dprintk("%s enter\n", __func__); | ||
367 | task = container_of(work, struct rpc_task, u.tk_work); | ||
368 | wdata = container_of(task, struct nfs_write_data, task); | ||
369 | |||
370 | pnfs_ld_write_done(wdata); | ||
371 | } | ||
372 | |||
373 | void | ||
374 | objlayout_write_done(struct objlayout_io_state *state, ssize_t status, | ||
375 | bool sync) | ||
376 | { | ||
377 | struct nfs_write_data *wdata; | ||
378 | |||
379 | dprintk("%s: Begin\n", __func__); | ||
380 | wdata = state->rpcdata; | ||
381 | state->status = status; | ||
382 | wdata->task.tk_status = status; | ||
383 | if (status >= 0) { | ||
384 | wdata->res.count = status; | ||
385 | wdata->verf.committed = state->committed; | ||
386 | dprintk("%s: Return status %d committed %d\n", | ||
387 | __func__, wdata->task.tk_status, | ||
388 | wdata->verf.committed); | ||
389 | } else | ||
390 | dprintk("%s: Return status %d\n", | ||
391 | __func__, wdata->task.tk_status); | ||
392 | objlayout_iodone(state); | ||
393 | /* must not use state after this point */ | ||
394 | |||
395 | if (sync) | ||
396 | pnfs_ld_write_done(wdata); | ||
397 | else { | ||
398 | INIT_WORK(&wdata->task.u.tk_work, _rpc_write_complete); | ||
399 | schedule_work(&wdata->task.u.tk_work); | ||
400 | } | ||
401 | } | ||
402 | |||
403 | /* | ||
404 | * Perform sync or async writes. | ||
405 | */ | ||
406 | enum pnfs_try_status | ||
407 | objlayout_write_pagelist(struct nfs_write_data *wdata, | ||
408 | int how) | ||
409 | { | ||
410 | struct objlayout_io_state *state; | ||
411 | ssize_t status; | ||
412 | |||
413 | dprintk("%s: Begin inode %p offset %llu count %u\n", | ||
414 | __func__, wdata->inode, wdata->args.offset, wdata->args.count); | ||
415 | |||
416 | state = objlayout_alloc_io_state(NFS_I(wdata->inode)->layout, | ||
417 | wdata->args.pages, | ||
418 | wdata->args.pgbase, | ||
419 | wdata->args.offset, | ||
420 | wdata->args.count, | ||
421 | wdata->lseg, wdata, | ||
422 | GFP_NOFS); | ||
423 | if (unlikely(!state)) { | ||
424 | status = -ENOMEM; | ||
425 | goto out; | ||
426 | } | ||
427 | |||
428 | state->sync = how & FLUSH_SYNC; | ||
429 | |||
430 | status = objio_write_pagelist(state, how & FLUSH_STABLE); | ||
431 | out: | ||
432 | dprintk("%s: Return status %Zd\n", __func__, status); | ||
433 | wdata->pnfs_error = status; | ||
434 | return PNFS_ATTEMPTED; | ||
435 | } | ||
436 | |||
437 | void | ||
438 | objlayout_encode_layoutcommit(struct pnfs_layout_hdr *pnfslay, | ||
439 | struct xdr_stream *xdr, | ||
440 | const struct nfs4_layoutcommit_args *args) | ||
441 | { | ||
442 | struct objlayout *objlay = OBJLAYOUT(pnfslay); | ||
443 | struct pnfs_osd_layoutupdate lou; | ||
444 | __be32 *start; | ||
445 | |||
446 | dprintk("%s: Begin\n", __func__); | ||
447 | |||
448 | spin_lock(&objlay->lock); | ||
449 | lou.dsu_valid = (objlay->delta_space_valid == OBJ_DSU_VALID); | ||
450 | lou.dsu_delta = objlay->delta_space_used; | ||
451 | objlay->delta_space_used = 0; | ||
452 | objlay->delta_space_valid = OBJ_DSU_INIT; | ||
453 | lou.olu_ioerr_flag = !list_empty(&objlay->err_list); | ||
454 | spin_unlock(&objlay->lock); | ||
455 | |||
456 | start = xdr_reserve_space(xdr, 4); | ||
457 | |||
458 | BUG_ON(pnfs_osd_xdr_encode_layoutupdate(xdr, &lou)); | ||
459 | |||
460 | *start = cpu_to_be32((xdr->p - start - 1) * 4); | ||
461 | |||
462 | dprintk("%s: Return delta_space_used %lld err %d\n", __func__, | ||
463 | lou.dsu_delta, lou.olu_ioerr_flag); | ||
464 | } | ||
465 | |||
466 | static int | ||
467 | err_prio(u32 oer_errno) | ||
468 | { | ||
469 | switch (oer_errno) { | ||
470 | case 0: | ||
471 | return 0; | ||
472 | |||
473 | case PNFS_OSD_ERR_RESOURCE: | ||
474 | return OSD_ERR_PRI_RESOURCE; | ||
475 | case PNFS_OSD_ERR_BAD_CRED: | ||
476 | return OSD_ERR_PRI_BAD_CRED; | ||
477 | case PNFS_OSD_ERR_NO_ACCESS: | ||
478 | return OSD_ERR_PRI_NO_ACCESS; | ||
479 | case PNFS_OSD_ERR_UNREACHABLE: | ||
480 | return OSD_ERR_PRI_UNREACHABLE; | ||
481 | case PNFS_OSD_ERR_NOT_FOUND: | ||
482 | return OSD_ERR_PRI_NOT_FOUND; | ||
483 | case PNFS_OSD_ERR_NO_SPACE: | ||
484 | return OSD_ERR_PRI_NO_SPACE; | ||
485 | default: | ||
486 | WARN_ON(1); | ||
487 | /* fallthrough */ | ||
488 | case PNFS_OSD_ERR_EIO: | ||
489 | return OSD_ERR_PRI_EIO; | ||
490 | } | ||
491 | } | ||
492 | |||
493 | static void | ||
494 | merge_ioerr(struct pnfs_osd_ioerr *dest_err, | ||
495 | const struct pnfs_osd_ioerr *src_err) | ||
496 | { | ||
497 | u64 dest_end, src_end; | ||
498 | |||
499 | if (!dest_err->oer_errno) { | ||
500 | *dest_err = *src_err; | ||
501 | /* accumulated device must be blank */ | ||
502 | memset(&dest_err->oer_component.oid_device_id, 0, | ||
503 | sizeof(dest_err->oer_component.oid_device_id)); | ||
504 | |||
505 | return; | ||
506 | } | ||
507 | |||
508 | if (dest_err->oer_component.oid_partition_id != | ||
509 | src_err->oer_component.oid_partition_id) | ||
510 | dest_err->oer_component.oid_partition_id = 0; | ||
511 | |||
512 | if (dest_err->oer_component.oid_object_id != | ||
513 | src_err->oer_component.oid_object_id) | ||
514 | dest_err->oer_component.oid_object_id = 0; | ||
515 | |||
516 | if (dest_err->oer_comp_offset > src_err->oer_comp_offset) | ||
517 | dest_err->oer_comp_offset = src_err->oer_comp_offset; | ||
518 | |||
519 | dest_end = end_offset(dest_err->oer_comp_offset, | ||
520 | dest_err->oer_comp_length); | ||
521 | src_end = end_offset(src_err->oer_comp_offset, | ||
522 | src_err->oer_comp_length); | ||
523 | if (dest_end < src_end) | ||
524 | dest_end = src_end; | ||
525 | |||
526 | dest_err->oer_comp_length = dest_end - dest_err->oer_comp_offset; | ||
527 | |||
528 | if ((src_err->oer_iswrite == dest_err->oer_iswrite) && | ||
529 | (err_prio(src_err->oer_errno) > err_prio(dest_err->oer_errno))) { | ||
530 | dest_err->oer_errno = src_err->oer_errno; | ||
531 | } else if (src_err->oer_iswrite) { | ||
532 | dest_err->oer_iswrite = true; | ||
533 | dest_err->oer_errno = src_err->oer_errno; | ||
534 | } | ||
535 | } | ||
536 | |||
537 | static void | ||
538 | encode_accumulated_error(struct objlayout *objlay, __be32 *p) | ||
539 | { | ||
540 | struct objlayout_io_state *state, *tmp; | ||
541 | struct pnfs_osd_ioerr accumulated_err = {.oer_errno = 0}; | ||
542 | |||
543 | list_for_each_entry_safe(state, tmp, &objlay->err_list, err_list) { | ||
544 | unsigned i; | ||
545 | |||
546 | for (i = 0; i < state->num_comps; i++) { | ||
547 | struct pnfs_osd_ioerr *ioerr = &state->ioerrs[i]; | ||
548 | |||
549 | if (!ioerr->oer_errno) | ||
550 | continue; | ||
551 | |||
552 | printk(KERN_ERR "%s: err[%d]: errno=%d is_write=%d " | ||
553 | "dev(%llx:%llx) par=0x%llx obj=0x%llx " | ||
554 | "offset=0x%llx length=0x%llx\n", | ||
555 | __func__, i, ioerr->oer_errno, | ||
556 | ioerr->oer_iswrite, | ||
557 | _DEVID_LO(&ioerr->oer_component.oid_device_id), | ||
558 | _DEVID_HI(&ioerr->oer_component.oid_device_id), | ||
559 | ioerr->oer_component.oid_partition_id, | ||
560 | ioerr->oer_component.oid_object_id, | ||
561 | ioerr->oer_comp_offset, | ||
562 | ioerr->oer_comp_length); | ||
563 | |||
564 | merge_ioerr(&accumulated_err, ioerr); | ||
565 | } | ||
566 | list_del(&state->err_list); | ||
567 | objlayout_free_io_state(state); | ||
568 | } | ||
569 | |||
570 | pnfs_osd_xdr_encode_ioerr(p, &accumulated_err); | ||
571 | } | ||
572 | |||
573 | void | ||
574 | objlayout_encode_layoutreturn(struct pnfs_layout_hdr *pnfslay, | ||
575 | struct xdr_stream *xdr, | ||
576 | const struct nfs4_layoutreturn_args *args) | ||
577 | { | ||
578 | struct objlayout *objlay = OBJLAYOUT(pnfslay); | ||
579 | struct objlayout_io_state *state, *tmp; | ||
580 | __be32 *start; | ||
581 | |||
582 | dprintk("%s: Begin\n", __func__); | ||
583 | start = xdr_reserve_space(xdr, 4); | ||
584 | BUG_ON(!start); | ||
585 | |||
586 | spin_lock(&objlay->lock); | ||
587 | |||
588 | list_for_each_entry_safe(state, tmp, &objlay->err_list, err_list) { | ||
589 | __be32 *last_xdr = NULL, *p; | ||
590 | unsigned i; | ||
591 | int res = 0; | ||
592 | |||
593 | for (i = 0; i < state->num_comps; i++) { | ||
594 | struct pnfs_osd_ioerr *ioerr = &state->ioerrs[i]; | ||
595 | |||
596 | if (!ioerr->oer_errno) | ||
597 | continue; | ||
598 | |||
599 | dprintk("%s: err[%d]: errno=%d is_write=%d " | ||
600 | "dev(%llx:%llx) par=0x%llx obj=0x%llx " | ||
601 | "offset=0x%llx length=0x%llx\n", | ||
602 | __func__, i, ioerr->oer_errno, | ||
603 | ioerr->oer_iswrite, | ||
604 | _DEVID_LO(&ioerr->oer_component.oid_device_id), | ||
605 | _DEVID_HI(&ioerr->oer_component.oid_device_id), | ||
606 | ioerr->oer_component.oid_partition_id, | ||
607 | ioerr->oer_component.oid_object_id, | ||
608 | ioerr->oer_comp_offset, | ||
609 | ioerr->oer_comp_length); | ||
610 | |||
611 | p = pnfs_osd_xdr_ioerr_reserve_space(xdr); | ||
612 | if (unlikely(!p)) { | ||
613 | res = -E2BIG; | ||
614 | break; /* accumulated_error */ | ||
615 | } | ||
616 | |||
617 | last_xdr = p; | ||
618 | pnfs_osd_xdr_encode_ioerr(p, &state->ioerrs[i]); | ||
619 | } | ||
620 | |||
621 | /* TODO: use xdr_write_pages */ | ||
622 | if (unlikely(res)) { | ||
623 | /* no space for even one error descriptor */ | ||
624 | BUG_ON(!last_xdr); | ||
625 | |||
626 | /* we've encountered a situation with lots and lots of | ||
627 | * errors and no space to encode them all. Use the last | ||
628 | * available slot to report the union of all the | ||
629 | * remaining errors. | ||
630 | */ | ||
631 | encode_accumulated_error(objlay, last_xdr); | ||
632 | goto loop_done; | ||
633 | } | ||
634 | list_del(&state->err_list); | ||
635 | objlayout_free_io_state(state); | ||
636 | } | ||
637 | loop_done: | ||
638 | spin_unlock(&objlay->lock); | ||
639 | |||
640 | *start = cpu_to_be32((xdr->p - start - 1) * 4); | ||
641 | dprintk("%s: Return\n", __func__); | ||
642 | } | ||
643 | |||
644 | |||
645 | /* | ||
646 | * Get Device Info API for io engines | ||
647 | */ | ||
648 | struct objlayout_deviceinfo { | ||
649 | struct page *page; | ||
650 | struct pnfs_osd_deviceaddr da; /* This must be last */ | ||
651 | }; | ||
652 | |||
653 | /* Initialize and call nfs_getdeviceinfo, then decode and return a | ||
654 | * "struct pnfs_osd_deviceaddr *" Eventually objlayout_put_deviceinfo() | ||
655 | * should be called. | ||
656 | */ | ||
657 | int objlayout_get_deviceinfo(struct pnfs_layout_hdr *pnfslay, | ||
658 | struct nfs4_deviceid *d_id, struct pnfs_osd_deviceaddr **deviceaddr, | ||
659 | gfp_t gfp_flags) | ||
660 | { | ||
661 | struct objlayout_deviceinfo *odi; | ||
662 | struct pnfs_device pd; | ||
663 | struct super_block *sb; | ||
664 | struct page *page, **pages; | ||
665 | u32 *p; | ||
666 | int err; | ||
667 | |||
668 | page = alloc_page(gfp_flags); | ||
669 | if (!page) | ||
670 | return -ENOMEM; | ||
671 | |||
672 | pages = &page; | ||
673 | pd.pages = pages; | ||
674 | |||
675 | memcpy(&pd.dev_id, d_id, sizeof(*d_id)); | ||
676 | pd.layout_type = LAYOUT_OSD2_OBJECTS; | ||
677 | pd.pages = &page; | ||
678 | pd.pgbase = 0; | ||
679 | pd.pglen = PAGE_SIZE; | ||
680 | pd.mincount = 0; | ||
681 | |||
682 | sb = pnfslay->plh_inode->i_sb; | ||
683 | err = nfs4_proc_getdeviceinfo(NFS_SERVER(pnfslay->plh_inode), &pd); | ||
684 | dprintk("%s nfs_getdeviceinfo returned %d\n", __func__, err); | ||
685 | if (err) | ||
686 | goto err_out; | ||
687 | |||
688 | p = page_address(page); | ||
689 | odi = kzalloc(sizeof(*odi), gfp_flags); | ||
690 | if (!odi) { | ||
691 | err = -ENOMEM; | ||
692 | goto err_out; | ||
693 | } | ||
694 | pnfs_osd_xdr_decode_deviceaddr(&odi->da, p); | ||
695 | odi->page = page; | ||
696 | *deviceaddr = &odi->da; | ||
697 | return 0; | ||
698 | |||
699 | err_out: | ||
700 | __free_page(page); | ||
701 | return err; | ||
702 | } | ||
703 | |||
704 | void objlayout_put_deviceinfo(struct pnfs_osd_deviceaddr *deviceaddr) | ||
705 | { | ||
706 | struct objlayout_deviceinfo *odi = container_of(deviceaddr, | ||
707 | struct objlayout_deviceinfo, | ||
708 | da); | ||
709 | |||
710 | __free_page(odi->page); | ||
711 | kfree(odi); | ||
712 | } | ||