diff options
author | Boaz Harrosh <bharrosh@panasas.com> | 2011-05-26 14:49:46 -0400 |
---|---|---|
committer | Boaz Harrosh <bharrosh@panasas.com> | 2011-05-29 13:54:45 -0400 |
commit | adb58535e604a564495a7d50dfb0afa0ddc21bcb (patch) | |
tree | 54af4fe9569650342e61dbb314334e3617c48dc2 /fs | |
parent | 04a555498e03b3804e2dec916a4669f5f560e503 (diff) |
pnfs-obj: report errors and .encode_layoutreturn Implementation.
An io_state pre-allocates an error information structure for each
possible osd-device that might error during IO. When IO is done if all
was well the io_state is freed. (as today). If the I/O has ended with an
error, the io_state is queued on a per-layout err_list. When eventually
encode_layoutreturn() is called, each error is properly encoded on the
XDR buffer and only then the io_state is removed from err_list and
de-allocated.
It is up to the io_engine to fill in the segment that fault and the type
of osd_error that occurred. By calling objlayout_io_set_result() for
each failing device.
In objio_osd:
* Allocate io-error descriptors space as part of io_state
* Use generic objlayout error reporting at end of io.
Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
Signed-off-by: Benny Halevy <bhalevy@panasas.com>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/nfs/objlayout/objio_osd.c | 44 | ||||
-rw-r--r-- | fs/nfs/objlayout/objlayout.c | 232 | ||||
-rw-r--r-- | fs/nfs/objlayout/objlayout.h | 23 |
3 files changed, 297 insertions, 2 deletions
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index 4e8de3ec9a63..8bca5e13f3ef 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c | |||
@@ -396,12 +396,16 @@ int objio_alloc_io_state(struct pnfs_layout_segment *lseg, | |||
396 | struct objio_state *ios; | 396 | struct objio_state *ios; |
397 | const unsigned first_size = sizeof(*ios) + | 397 | const unsigned first_size = sizeof(*ios) + |
398 | objio_seg->num_comps * sizeof(ios->per_dev[0]); | 398 | objio_seg->num_comps * sizeof(ios->per_dev[0]); |
399 | const unsigned sec_size = objio_seg->num_comps * | ||
400 | sizeof(ios->ol_state.ioerrs[0]); | ||
399 | 401 | ||
400 | ios = kzalloc(first_size, gfp_flags); | 402 | ios = kzalloc(first_size + sec_size, gfp_flags); |
401 | if (unlikely(!ios)) | 403 | if (unlikely(!ios)) |
402 | return -ENOMEM; | 404 | return -ENOMEM; |
403 | 405 | ||
404 | ios->layout = objio_seg; | 406 | ios->layout = objio_seg; |
407 | ios->ol_state.ioerrs = ((void *)ios) + first_size; | ||
408 | ios->ol_state.num_comps = objio_seg->num_comps; | ||
405 | 409 | ||
406 | *outp = &ios->ol_state; | 410 | *outp = &ios->ol_state; |
407 | return 0; | 411 | return 0; |
@@ -415,6 +419,36 @@ void objio_free_io_state(struct objlayout_io_state *ol_state) | |||
415 | kfree(ios); | 419 | kfree(ios); |
416 | } | 420 | } |
417 | 421 | ||
422 | enum pnfs_osd_errno osd_pri_2_pnfs_err(enum osd_err_priority oep) | ||
423 | { | ||
424 | switch (oep) { | ||
425 | case OSD_ERR_PRI_NO_ERROR: | ||
426 | return (enum pnfs_osd_errno)0; | ||
427 | |||
428 | case OSD_ERR_PRI_CLEAR_PAGES: | ||
429 | BUG_ON(1); | ||
430 | return 0; | ||
431 | |||
432 | case OSD_ERR_PRI_RESOURCE: | ||
433 | return PNFS_OSD_ERR_RESOURCE; | ||
434 | case OSD_ERR_PRI_BAD_CRED: | ||
435 | return PNFS_OSD_ERR_BAD_CRED; | ||
436 | case OSD_ERR_PRI_NO_ACCESS: | ||
437 | return PNFS_OSD_ERR_NO_ACCESS; | ||
438 | case OSD_ERR_PRI_UNREACHABLE: | ||
439 | return PNFS_OSD_ERR_UNREACHABLE; | ||
440 | case OSD_ERR_PRI_NOT_FOUND: | ||
441 | return PNFS_OSD_ERR_NOT_FOUND; | ||
442 | case OSD_ERR_PRI_NO_SPACE: | ||
443 | return PNFS_OSD_ERR_NO_SPACE; | ||
444 | default: | ||
445 | WARN_ON(1); | ||
446 | /* fallthrough */ | ||
447 | case OSD_ERR_PRI_EIO: | ||
448 | return PNFS_OSD_ERR_EIO; | ||
449 | } | ||
450 | } | ||
451 | |||
418 | static void _clear_bio(struct bio *bio) | 452 | static void _clear_bio(struct bio *bio) |
419 | { | 453 | { |
420 | struct bio_vec *bv; | 454 | struct bio_vec *bv; |
@@ -461,6 +495,12 @@ static int _io_check(struct objio_state *ios, bool is_write) | |||
461 | continue; /* we recovered */ | 495 | continue; /* we recovered */ |
462 | } | 496 | } |
463 | dev = ios->per_dev[i].dev; | 497 | dev = ios->per_dev[i].dev; |
498 | objlayout_io_set_result(&ios->ol_state, dev, | ||
499 | &ios->layout->comps[dev].oc_object_id, | ||
500 | osd_pri_2_pnfs_err(osi.osd_err_pri), | ||
501 | ios->per_dev[i].offset, | ||
502 | ios->per_dev[i].length, | ||
503 | is_write); | ||
464 | 504 | ||
465 | if (osi.osd_err_pri >= oep) { | 505 | if (osi.osd_err_pri >= oep) { |
466 | oep = osi.osd_err_pri; | 506 | oep = osi.osd_err_pri; |
@@ -977,6 +1017,8 @@ static struct pnfs_layoutdriver_type objlayout_type = { | |||
977 | .pg_test = objlayout_pg_test, | 1017 | .pg_test = objlayout_pg_test, |
978 | 1018 | ||
979 | .free_deviceid_node = objio_free_deviceid_node, | 1019 | .free_deviceid_node = objio_free_deviceid_node, |
1020 | |||
1021 | .encode_layoutreturn = objlayout_encode_layoutreturn, | ||
980 | }; | 1022 | }; |
981 | 1023 | ||
982 | MODULE_DESCRIPTION("pNFS Layout Driver for OSD2 objects"); | 1024 | MODULE_DESCRIPTION("pNFS Layout Driver for OSD2 objects"); |
diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c index 5157ef6d0041..f7caecff6b4d 100644 --- a/fs/nfs/objlayout/objlayout.c +++ b/fs/nfs/objlayout/objlayout.c | |||
@@ -50,6 +50,10 @@ objlayout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags) | |||
50 | struct objlayout *objlay; | 50 | struct objlayout *objlay; |
51 | 51 | ||
52 | objlay = kzalloc(sizeof(struct objlayout), gfp_flags); | 52 | objlay = kzalloc(sizeof(struct objlayout), gfp_flags); |
53 | if (objlay) { | ||
54 | spin_lock_init(&objlay->lock); | ||
55 | INIT_LIST_HEAD(&objlay->err_list); | ||
56 | } | ||
53 | dprintk("%s: Return %p\n", __func__, objlay); | 57 | dprintk("%s: Return %p\n", __func__, objlay); |
54 | return &objlay->pnfs_layout; | 58 | return &objlay->pnfs_layout; |
55 | } | 59 | } |
@@ -64,6 +68,7 @@ objlayout_free_layout_hdr(struct pnfs_layout_hdr *lo) | |||
64 | 68 | ||
65 | dprintk("%s: objlay %p\n", __func__, objlay); | 69 | dprintk("%s: objlay %p\n", __func__, objlay); |
66 | 70 | ||
71 | WARN_ON(!list_empty(&objlay->err_list)); | ||
67 | kfree(objlay); | 72 | kfree(objlay); |
68 | } | 73 | } |
69 | 74 | ||
@@ -183,6 +188,7 @@ objlayout_alloc_io_state(struct pnfs_layout_hdr *pnfs_layout_type, | |||
183 | pgbase &= ~PAGE_MASK; | 188 | pgbase &= ~PAGE_MASK; |
184 | } | 189 | } |
185 | 190 | ||
191 | INIT_LIST_HEAD(&state->err_list); | ||
186 | state->lseg = lseg; | 192 | state->lseg = lseg; |
187 | state->rpcdata = rpcdata; | 193 | state->rpcdata = rpcdata; |
188 | state->pages = pages; | 194 | state->pages = pages; |
@@ -213,7 +219,52 @@ objlayout_iodone(struct objlayout_io_state *state) | |||
213 | { | 219 | { |
214 | dprintk("%s: state %p status\n", __func__, state); | 220 | dprintk("%s: state %p status\n", __func__, state); |
215 | 221 | ||
216 | objlayout_free_io_state(state); | 222 | if (likely(state->status >= 0)) { |
223 | objlayout_free_io_state(state); | ||
224 | } else { | ||
225 | struct objlayout *objlay = OBJLAYOUT(state->lseg->pls_layout); | ||
226 | |||
227 | spin_lock(&objlay->lock); | ||
228 | list_add(&objlay->err_list, &state->err_list); | ||
229 | spin_unlock(&objlay->lock); | ||
230 | } | ||
231 | } | ||
232 | |||
233 | /* | ||
234 | * objlayout_io_set_result - Set an osd_error code on a specific osd comp. | ||
235 | * | ||
236 | * The @index component IO failed (error returned from target). Register | ||
237 | * the error for later reporting at layout-return. | ||
238 | */ | ||
239 | void | ||
240 | objlayout_io_set_result(struct objlayout_io_state *state, unsigned index, | ||
241 | struct pnfs_osd_objid *pooid, int osd_error, | ||
242 | u64 offset, u64 length, bool is_write) | ||
243 | { | ||
244 | struct pnfs_osd_ioerr *ioerr = &state->ioerrs[index]; | ||
245 | |||
246 | BUG_ON(index >= state->num_comps); | ||
247 | if (osd_error) { | ||
248 | ioerr->oer_component = *pooid; | ||
249 | ioerr->oer_comp_offset = offset; | ||
250 | ioerr->oer_comp_length = length; | ||
251 | ioerr->oer_iswrite = is_write; | ||
252 | ioerr->oer_errno = osd_error; | ||
253 | |||
254 | dprintk("%s: err[%d]: errno=%d is_write=%d dev(%llx:%llx) " | ||
255 | "par=0x%llx obj=0x%llx offset=0x%llx length=0x%llx\n", | ||
256 | __func__, index, ioerr->oer_errno, | ||
257 | ioerr->oer_iswrite, | ||
258 | _DEVID_LO(&ioerr->oer_component.oid_device_id), | ||
259 | _DEVID_HI(&ioerr->oer_component.oid_device_id), | ||
260 | ioerr->oer_component.oid_partition_id, | ||
261 | ioerr->oer_component.oid_object_id, | ||
262 | ioerr->oer_comp_offset, | ||
263 | ioerr->oer_comp_length); | ||
264 | } else { | ||
265 | /* User need not call if no error is reported */ | ||
266 | ioerr->oer_errno = 0; | ||
267 | } | ||
217 | } | 268 | } |
218 | 269 | ||
219 | /* Function scheduled on rpc workqueue to call ->nfs_readlist_complete(). | 270 | /* Function scheduled on rpc workqueue to call ->nfs_readlist_complete(). |
@@ -382,6 +433,185 @@ objlayout_write_pagelist(struct nfs_write_data *wdata, | |||
382 | return PNFS_ATTEMPTED; | 433 | return PNFS_ATTEMPTED; |
383 | } | 434 | } |
384 | 435 | ||
436 | static int | ||
437 | err_prio(u32 oer_errno) | ||
438 | { | ||
439 | switch (oer_errno) { | ||
440 | case 0: | ||
441 | return 0; | ||
442 | |||
443 | case PNFS_OSD_ERR_RESOURCE: | ||
444 | return OSD_ERR_PRI_RESOURCE; | ||
445 | case PNFS_OSD_ERR_BAD_CRED: | ||
446 | return OSD_ERR_PRI_BAD_CRED; | ||
447 | case PNFS_OSD_ERR_NO_ACCESS: | ||
448 | return OSD_ERR_PRI_NO_ACCESS; | ||
449 | case PNFS_OSD_ERR_UNREACHABLE: | ||
450 | return OSD_ERR_PRI_UNREACHABLE; | ||
451 | case PNFS_OSD_ERR_NOT_FOUND: | ||
452 | return OSD_ERR_PRI_NOT_FOUND; | ||
453 | case PNFS_OSD_ERR_NO_SPACE: | ||
454 | return OSD_ERR_PRI_NO_SPACE; | ||
455 | default: | ||
456 | WARN_ON(1); | ||
457 | /* fallthrough */ | ||
458 | case PNFS_OSD_ERR_EIO: | ||
459 | return OSD_ERR_PRI_EIO; | ||
460 | } | ||
461 | } | ||
462 | |||
463 | static void | ||
464 | merge_ioerr(struct pnfs_osd_ioerr *dest_err, | ||
465 | const struct pnfs_osd_ioerr *src_err) | ||
466 | { | ||
467 | u64 dest_end, src_end; | ||
468 | |||
469 | if (!dest_err->oer_errno) { | ||
470 | *dest_err = *src_err; | ||
471 | /* accumulated device must be blank */ | ||
472 | memset(&dest_err->oer_component.oid_device_id, 0, | ||
473 | sizeof(dest_err->oer_component.oid_device_id)); | ||
474 | |||
475 | return; | ||
476 | } | ||
477 | |||
478 | if (dest_err->oer_component.oid_partition_id != | ||
479 | src_err->oer_component.oid_partition_id) | ||
480 | dest_err->oer_component.oid_partition_id = 0; | ||
481 | |||
482 | if (dest_err->oer_component.oid_object_id != | ||
483 | src_err->oer_component.oid_object_id) | ||
484 | dest_err->oer_component.oid_object_id = 0; | ||
485 | |||
486 | if (dest_err->oer_comp_offset > src_err->oer_comp_offset) | ||
487 | dest_err->oer_comp_offset = src_err->oer_comp_offset; | ||
488 | |||
489 | dest_end = end_offset(dest_err->oer_comp_offset, | ||
490 | dest_err->oer_comp_length); | ||
491 | src_end = end_offset(src_err->oer_comp_offset, | ||
492 | src_err->oer_comp_length); | ||
493 | if (dest_end < src_end) | ||
494 | dest_end = src_end; | ||
495 | |||
496 | dest_err->oer_comp_length = dest_end - dest_err->oer_comp_offset; | ||
497 | |||
498 | if ((src_err->oer_iswrite == dest_err->oer_iswrite) && | ||
499 | (err_prio(src_err->oer_errno) > err_prio(dest_err->oer_errno))) { | ||
500 | dest_err->oer_errno = src_err->oer_errno; | ||
501 | } else if (src_err->oer_iswrite) { | ||
502 | dest_err->oer_iswrite = true; | ||
503 | dest_err->oer_errno = src_err->oer_errno; | ||
504 | } | ||
505 | } | ||
506 | |||
507 | static void | ||
508 | encode_accumulated_error(struct objlayout *objlay, __be32 *p) | ||
509 | { | ||
510 | struct objlayout_io_state *state, *tmp; | ||
511 | struct pnfs_osd_ioerr accumulated_err = {.oer_errno = 0}; | ||
512 | |||
513 | list_for_each_entry_safe(state, tmp, &objlay->err_list, err_list) { | ||
514 | unsigned i; | ||
515 | |||
516 | for (i = 0; i < state->num_comps; i++) { | ||
517 | struct pnfs_osd_ioerr *ioerr = &state->ioerrs[i]; | ||
518 | |||
519 | if (!ioerr->oer_errno) | ||
520 | continue; | ||
521 | |||
522 | printk(KERN_ERR "%s: err[%d]: errno=%d is_write=%d " | ||
523 | "dev(%llx:%llx) par=0x%llx obj=0x%llx " | ||
524 | "offset=0x%llx length=0x%llx\n", | ||
525 | __func__, i, ioerr->oer_errno, | ||
526 | ioerr->oer_iswrite, | ||
527 | _DEVID_LO(&ioerr->oer_component.oid_device_id), | ||
528 | _DEVID_HI(&ioerr->oer_component.oid_device_id), | ||
529 | ioerr->oer_component.oid_partition_id, | ||
530 | ioerr->oer_component.oid_object_id, | ||
531 | ioerr->oer_comp_offset, | ||
532 | ioerr->oer_comp_length); | ||
533 | |||
534 | merge_ioerr(&accumulated_err, ioerr); | ||
535 | } | ||
536 | list_del(&state->err_list); | ||
537 | objlayout_free_io_state(state); | ||
538 | } | ||
539 | |||
540 | pnfs_osd_xdr_encode_ioerr(p, &accumulated_err); | ||
541 | } | ||
542 | |||
543 | void | ||
544 | objlayout_encode_layoutreturn(struct pnfs_layout_hdr *pnfslay, | ||
545 | struct xdr_stream *xdr, | ||
546 | const struct nfs4_layoutreturn_args *args) | ||
547 | { | ||
548 | struct objlayout *objlay = OBJLAYOUT(pnfslay); | ||
549 | struct objlayout_io_state *state, *tmp; | ||
550 | __be32 *start; | ||
551 | |||
552 | dprintk("%s: Begin\n", __func__); | ||
553 | start = xdr_reserve_space(xdr, 4); | ||
554 | BUG_ON(!start); | ||
555 | |||
556 | spin_lock(&objlay->lock); | ||
557 | |||
558 | list_for_each_entry_safe(state, tmp, &objlay->err_list, err_list) { | ||
559 | __be32 *last_xdr = NULL, *p; | ||
560 | unsigned i; | ||
561 | int res = 0; | ||
562 | |||
563 | for (i = 0; i < state->num_comps; i++) { | ||
564 | struct pnfs_osd_ioerr *ioerr = &state->ioerrs[i]; | ||
565 | |||
566 | if (!ioerr->oer_errno) | ||
567 | continue; | ||
568 | |||
569 | dprintk("%s: err[%d]: errno=%d is_write=%d " | ||
570 | "dev(%llx:%llx) par=0x%llx obj=0x%llx " | ||
571 | "offset=0x%llx length=0x%llx\n", | ||
572 | __func__, i, ioerr->oer_errno, | ||
573 | ioerr->oer_iswrite, | ||
574 | _DEVID_LO(&ioerr->oer_component.oid_device_id), | ||
575 | _DEVID_HI(&ioerr->oer_component.oid_device_id), | ||
576 | ioerr->oer_component.oid_partition_id, | ||
577 | ioerr->oer_component.oid_object_id, | ||
578 | ioerr->oer_comp_offset, | ||
579 | ioerr->oer_comp_length); | ||
580 | |||
581 | p = pnfs_osd_xdr_ioerr_reserve_space(xdr); | ||
582 | if (unlikely(!p)) { | ||
583 | res = -E2BIG; | ||
584 | break; /* accumulated_error */ | ||
585 | } | ||
586 | |||
587 | last_xdr = p; | ||
588 | pnfs_osd_xdr_encode_ioerr(p, &state->ioerrs[i]); | ||
589 | } | ||
590 | |||
591 | /* TODO: use xdr_write_pages */ | ||
592 | if (unlikely(res)) { | ||
593 | /* no space for even one error descriptor */ | ||
594 | BUG_ON(!last_xdr); | ||
595 | |||
596 | /* we've encountered a situation with lots and lots of | ||
597 | * errors and no space to encode them all. Use the last | ||
598 | * available slot to report the union of all the | ||
599 | * remaining errors. | ||
600 | */ | ||
601 | encode_accumulated_error(objlay, last_xdr); | ||
602 | goto loop_done; | ||
603 | } | ||
604 | list_del(&state->err_list); | ||
605 | objlayout_free_io_state(state); | ||
606 | } | ||
607 | loop_done: | ||
608 | spin_unlock(&objlay->lock); | ||
609 | |||
610 | *start = cpu_to_be32((xdr->p - start - 1) * 4); | ||
611 | dprintk("%s: Return\n", __func__); | ||
612 | } | ||
613 | |||
614 | |||
385 | /* | 615 | /* |
386 | * Get Device Info API for io engines | 616 | * Get Device Info API for io engines |
387 | */ | 617 | */ |
diff --git a/fs/nfs/objlayout/objlayout.h b/fs/nfs/objlayout/objlayout.h index 9a405e8069f3..b0bb975058e4 100644 --- a/fs/nfs/objlayout/objlayout.h +++ b/fs/nfs/objlayout/objlayout.h | |||
@@ -50,6 +50,10 @@ | |||
50 | */ | 50 | */ |
51 | struct objlayout { | 51 | struct objlayout { |
52 | struct pnfs_layout_hdr pnfs_layout; | 52 | struct pnfs_layout_hdr pnfs_layout; |
53 | |||
54 | /* for layout_return */ | ||
55 | spinlock_t lock; | ||
56 | struct list_head err_list; | ||
53 | }; | 57 | }; |
54 | 58 | ||
55 | static inline struct objlayout * | 59 | static inline struct objlayout * |
@@ -76,6 +80,16 @@ struct objlayout_io_state { | |||
76 | int status; /* res */ | 80 | int status; /* res */ |
77 | int eof; /* res */ | 81 | int eof; /* res */ |
78 | int committed; /* res */ | 82 | int committed; /* res */ |
83 | |||
84 | /* Error reporting (layout_return) */ | ||
85 | struct list_head err_list; | ||
86 | unsigned num_comps; | ||
87 | /* Pointer to array of error descriptors of size num_comps. | ||
88 | * It should contain as many entries as devices in the osd_layout | ||
89 | * that participate in the I/O. It is up to the io_engine to allocate | ||
90 | * needed space and set num_comps. | ||
91 | */ | ||
92 | struct pnfs_osd_ioerr *ioerrs; | ||
79 | }; | 93 | }; |
80 | 94 | ||
81 | /* | 95 | /* |
@@ -101,6 +115,10 @@ extern ssize_t objio_write_pagelist(struct objlayout_io_state *ol_state, | |||
101 | /* | 115 | /* |
102 | * callback API | 116 | * callback API |
103 | */ | 117 | */ |
118 | extern void objlayout_io_set_result(struct objlayout_io_state *state, | ||
119 | unsigned index, struct pnfs_osd_objid *pooid, | ||
120 | int osd_error, u64 offset, u64 length, bool is_write); | ||
121 | |||
104 | extern void objlayout_read_done(struct objlayout_io_state *state, | 122 | extern void objlayout_read_done(struct objlayout_io_state *state, |
105 | ssize_t status, bool sync); | 123 | ssize_t status, bool sync); |
106 | extern void objlayout_write_done(struct objlayout_io_state *state, | 124 | extern void objlayout_write_done(struct objlayout_io_state *state, |
@@ -131,4 +149,9 @@ extern enum pnfs_try_status objlayout_write_pagelist( | |||
131 | struct nfs_write_data *, | 149 | struct nfs_write_data *, |
132 | int how); | 150 | int how); |
133 | 151 | ||
152 | extern void objlayout_encode_layoutreturn( | ||
153 | struct pnfs_layout_hdr *, | ||
154 | struct xdr_stream *, | ||
155 | const struct nfs4_layoutreturn_args *); | ||
156 | |||
134 | #endif /* _OBJLAYOUT_H */ | 157 | #endif /* _OBJLAYOUT_H */ |