aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorBoaz Harrosh <bharrosh@panasas.com>2011-05-26 14:49:46 -0400
committerBoaz Harrosh <bharrosh@panasas.com>2011-05-29 13:54:45 -0400
commitadb58535e604a564495a7d50dfb0afa0ddc21bcb (patch)
tree54af4fe9569650342e61dbb314334e3617c48dc2
parent04a555498e03b3804e2dec916a4669f5f560e503 (diff)
pnfs-obj: report errors and .encode_layoutreturn Implementation.
An io_state pre-allocates an error information structure for each possible osd-device that might error during IO. When IO is done if all was well the io_state is freed. (as today). If the I/O has ended with an error, the io_state is queued on a per-layout err_list. When eventually encode_layoutreturn() is called, each error is properly encoded on the XDR buffer and only then the io_state is removed from err_list and de-allocated. It is up to the io_engine to fill in the segment that fault and the type of osd_error that occurred. By calling objlayout_io_set_result() for each failing device. In objio_osd: * Allocate io-error descriptors space as part of io_state * Use generic objlayout error reporting at end of io. Signed-off-by: Boaz Harrosh <bharrosh@panasas.com> Signed-off-by: Benny Halevy <bhalevy@panasas.com>
-rw-r--r--fs/nfs/objlayout/objio_osd.c44
-rw-r--r--fs/nfs/objlayout/objlayout.c232
-rw-r--r--fs/nfs/objlayout/objlayout.h23
3 files changed, 297 insertions, 2 deletions
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index 4e8de3ec9a63..8bca5e13f3ef 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -396,12 +396,16 @@ int objio_alloc_io_state(struct pnfs_layout_segment *lseg,
396 struct objio_state *ios; 396 struct objio_state *ios;
397 const unsigned first_size = sizeof(*ios) + 397 const unsigned first_size = sizeof(*ios) +
398 objio_seg->num_comps * sizeof(ios->per_dev[0]); 398 objio_seg->num_comps * sizeof(ios->per_dev[0]);
399 const unsigned sec_size = objio_seg->num_comps *
400 sizeof(ios->ol_state.ioerrs[0]);
399 401
400 ios = kzalloc(first_size, gfp_flags); 402 ios = kzalloc(first_size + sec_size, gfp_flags);
401 if (unlikely(!ios)) 403 if (unlikely(!ios))
402 return -ENOMEM; 404 return -ENOMEM;
403 405
404 ios->layout = objio_seg; 406 ios->layout = objio_seg;
407 ios->ol_state.ioerrs = ((void *)ios) + first_size;
408 ios->ol_state.num_comps = objio_seg->num_comps;
405 409
406 *outp = &ios->ol_state; 410 *outp = &ios->ol_state;
407 return 0; 411 return 0;
@@ -415,6 +419,36 @@ void objio_free_io_state(struct objlayout_io_state *ol_state)
415 kfree(ios); 419 kfree(ios);
416} 420}
417 421
422enum pnfs_osd_errno osd_pri_2_pnfs_err(enum osd_err_priority oep)
423{
424 switch (oep) {
425 case OSD_ERR_PRI_NO_ERROR:
426 return (enum pnfs_osd_errno)0;
427
428 case OSD_ERR_PRI_CLEAR_PAGES:
429 BUG_ON(1);
430 return 0;
431
432 case OSD_ERR_PRI_RESOURCE:
433 return PNFS_OSD_ERR_RESOURCE;
434 case OSD_ERR_PRI_BAD_CRED:
435 return PNFS_OSD_ERR_BAD_CRED;
436 case OSD_ERR_PRI_NO_ACCESS:
437 return PNFS_OSD_ERR_NO_ACCESS;
438 case OSD_ERR_PRI_UNREACHABLE:
439 return PNFS_OSD_ERR_UNREACHABLE;
440 case OSD_ERR_PRI_NOT_FOUND:
441 return PNFS_OSD_ERR_NOT_FOUND;
442 case OSD_ERR_PRI_NO_SPACE:
443 return PNFS_OSD_ERR_NO_SPACE;
444 default:
445 WARN_ON(1);
446 /* fallthrough */
447 case OSD_ERR_PRI_EIO:
448 return PNFS_OSD_ERR_EIO;
449 }
450}
451
418static void _clear_bio(struct bio *bio) 452static void _clear_bio(struct bio *bio)
419{ 453{
420 struct bio_vec *bv; 454 struct bio_vec *bv;
@@ -461,6 +495,12 @@ static int _io_check(struct objio_state *ios, bool is_write)
461 continue; /* we recovered */ 495 continue; /* we recovered */
462 } 496 }
463 dev = ios->per_dev[i].dev; 497 dev = ios->per_dev[i].dev;
498 objlayout_io_set_result(&ios->ol_state, dev,
499 &ios->layout->comps[dev].oc_object_id,
500 osd_pri_2_pnfs_err(osi.osd_err_pri),
501 ios->per_dev[i].offset,
502 ios->per_dev[i].length,
503 is_write);
464 504
465 if (osi.osd_err_pri >= oep) { 505 if (osi.osd_err_pri >= oep) {
466 oep = osi.osd_err_pri; 506 oep = osi.osd_err_pri;
@@ -977,6 +1017,8 @@ static struct pnfs_layoutdriver_type objlayout_type = {
977 .pg_test = objlayout_pg_test, 1017 .pg_test = objlayout_pg_test,
978 1018
979 .free_deviceid_node = objio_free_deviceid_node, 1019 .free_deviceid_node = objio_free_deviceid_node,
1020
1021 .encode_layoutreturn = objlayout_encode_layoutreturn,
980}; 1022};
981 1023
982MODULE_DESCRIPTION("pNFS Layout Driver for OSD2 objects"); 1024MODULE_DESCRIPTION("pNFS Layout Driver for OSD2 objects");
diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c
index 5157ef6d0041..f7caecff6b4d 100644
--- a/fs/nfs/objlayout/objlayout.c
+++ b/fs/nfs/objlayout/objlayout.c
@@ -50,6 +50,10 @@ objlayout_alloc_layout_hdr(struct inode *inode, gfp_t gfp_flags)
50 struct objlayout *objlay; 50 struct objlayout *objlay;
51 51
52 objlay = kzalloc(sizeof(struct objlayout), gfp_flags); 52 objlay = kzalloc(sizeof(struct objlayout), gfp_flags);
53 if (objlay) {
54 spin_lock_init(&objlay->lock);
55 INIT_LIST_HEAD(&objlay->err_list);
56 }
53 dprintk("%s: Return %p\n", __func__, objlay); 57 dprintk("%s: Return %p\n", __func__, objlay);
54 return &objlay->pnfs_layout; 58 return &objlay->pnfs_layout;
55} 59}
@@ -64,6 +68,7 @@ objlayout_free_layout_hdr(struct pnfs_layout_hdr *lo)
64 68
65 dprintk("%s: objlay %p\n", __func__, objlay); 69 dprintk("%s: objlay %p\n", __func__, objlay);
66 70
71 WARN_ON(!list_empty(&objlay->err_list));
67 kfree(objlay); 72 kfree(objlay);
68} 73}
69 74
@@ -183,6 +188,7 @@ objlayout_alloc_io_state(struct pnfs_layout_hdr *pnfs_layout_type,
183 pgbase &= ~PAGE_MASK; 188 pgbase &= ~PAGE_MASK;
184 } 189 }
185 190
191 INIT_LIST_HEAD(&state->err_list);
186 state->lseg = lseg; 192 state->lseg = lseg;
187 state->rpcdata = rpcdata; 193 state->rpcdata = rpcdata;
188 state->pages = pages; 194 state->pages = pages;
@@ -213,7 +219,52 @@ objlayout_iodone(struct objlayout_io_state *state)
213{ 219{
214 dprintk("%s: state %p status\n", __func__, state); 220 dprintk("%s: state %p status\n", __func__, state);
215 221
216 objlayout_free_io_state(state); 222 if (likely(state->status >= 0)) {
223 objlayout_free_io_state(state);
224 } else {
225 struct objlayout *objlay = OBJLAYOUT(state->lseg->pls_layout);
226
227 spin_lock(&objlay->lock);
228 list_add(&objlay->err_list, &state->err_list);
229 spin_unlock(&objlay->lock);
230 }
231}
232
233/*
234 * objlayout_io_set_result - Set an osd_error code on a specific osd comp.
235 *
236 * The @index component IO failed (error returned from target). Register
237 * the error for later reporting at layout-return.
238 */
239void
240objlayout_io_set_result(struct objlayout_io_state *state, unsigned index,
241 struct pnfs_osd_objid *pooid, int osd_error,
242 u64 offset, u64 length, bool is_write)
243{
244 struct pnfs_osd_ioerr *ioerr = &state->ioerrs[index];
245
246 BUG_ON(index >= state->num_comps);
247 if (osd_error) {
248 ioerr->oer_component = *pooid;
249 ioerr->oer_comp_offset = offset;
250 ioerr->oer_comp_length = length;
251 ioerr->oer_iswrite = is_write;
252 ioerr->oer_errno = osd_error;
253
254 dprintk("%s: err[%d]: errno=%d is_write=%d dev(%llx:%llx) "
255 "par=0x%llx obj=0x%llx offset=0x%llx length=0x%llx\n",
256 __func__, index, ioerr->oer_errno,
257 ioerr->oer_iswrite,
258 _DEVID_LO(&ioerr->oer_component.oid_device_id),
259 _DEVID_HI(&ioerr->oer_component.oid_device_id),
260 ioerr->oer_component.oid_partition_id,
261 ioerr->oer_component.oid_object_id,
262 ioerr->oer_comp_offset,
263 ioerr->oer_comp_length);
264 } else {
265 /* User need not call if no error is reported */
266 ioerr->oer_errno = 0;
267 }
217} 268}
218 269
219/* Function scheduled on rpc workqueue to call ->nfs_readlist_complete(). 270/* Function scheduled on rpc workqueue to call ->nfs_readlist_complete().
@@ -382,6 +433,185 @@ objlayout_write_pagelist(struct nfs_write_data *wdata,
382 return PNFS_ATTEMPTED; 433 return PNFS_ATTEMPTED;
383} 434}
384 435
436static int
437err_prio(u32 oer_errno)
438{
439 switch (oer_errno) {
440 case 0:
441 return 0;
442
443 case PNFS_OSD_ERR_RESOURCE:
444 return OSD_ERR_PRI_RESOURCE;
445 case PNFS_OSD_ERR_BAD_CRED:
446 return OSD_ERR_PRI_BAD_CRED;
447 case PNFS_OSD_ERR_NO_ACCESS:
448 return OSD_ERR_PRI_NO_ACCESS;
449 case PNFS_OSD_ERR_UNREACHABLE:
450 return OSD_ERR_PRI_UNREACHABLE;
451 case PNFS_OSD_ERR_NOT_FOUND:
452 return OSD_ERR_PRI_NOT_FOUND;
453 case PNFS_OSD_ERR_NO_SPACE:
454 return OSD_ERR_PRI_NO_SPACE;
455 default:
456 WARN_ON(1);
457 /* fallthrough */
458 case PNFS_OSD_ERR_EIO:
459 return OSD_ERR_PRI_EIO;
460 }
461}
462
463static void
464merge_ioerr(struct pnfs_osd_ioerr *dest_err,
465 const struct pnfs_osd_ioerr *src_err)
466{
467 u64 dest_end, src_end;
468
469 if (!dest_err->oer_errno) {
470 *dest_err = *src_err;
471 /* accumulated device must be blank */
472 memset(&dest_err->oer_component.oid_device_id, 0,
473 sizeof(dest_err->oer_component.oid_device_id));
474
475 return;
476 }
477
478 if (dest_err->oer_component.oid_partition_id !=
479 src_err->oer_component.oid_partition_id)
480 dest_err->oer_component.oid_partition_id = 0;
481
482 if (dest_err->oer_component.oid_object_id !=
483 src_err->oer_component.oid_object_id)
484 dest_err->oer_component.oid_object_id = 0;
485
486 if (dest_err->oer_comp_offset > src_err->oer_comp_offset)
487 dest_err->oer_comp_offset = src_err->oer_comp_offset;
488
489 dest_end = end_offset(dest_err->oer_comp_offset,
490 dest_err->oer_comp_length);
491 src_end = end_offset(src_err->oer_comp_offset,
492 src_err->oer_comp_length);
493 if (dest_end < src_end)
494 dest_end = src_end;
495
496 dest_err->oer_comp_length = dest_end - dest_err->oer_comp_offset;
497
498 if ((src_err->oer_iswrite == dest_err->oer_iswrite) &&
499 (err_prio(src_err->oer_errno) > err_prio(dest_err->oer_errno))) {
500 dest_err->oer_errno = src_err->oer_errno;
501 } else if (src_err->oer_iswrite) {
502 dest_err->oer_iswrite = true;
503 dest_err->oer_errno = src_err->oer_errno;
504 }
505}
506
507static void
508encode_accumulated_error(struct objlayout *objlay, __be32 *p)
509{
510 struct objlayout_io_state *state, *tmp;
511 struct pnfs_osd_ioerr accumulated_err = {.oer_errno = 0};
512
513 list_for_each_entry_safe(state, tmp, &objlay->err_list, err_list) {
514 unsigned i;
515
516 for (i = 0; i < state->num_comps; i++) {
517 struct pnfs_osd_ioerr *ioerr = &state->ioerrs[i];
518
519 if (!ioerr->oer_errno)
520 continue;
521
522 printk(KERN_ERR "%s: err[%d]: errno=%d is_write=%d "
523 "dev(%llx:%llx) par=0x%llx obj=0x%llx "
524 "offset=0x%llx length=0x%llx\n",
525 __func__, i, ioerr->oer_errno,
526 ioerr->oer_iswrite,
527 _DEVID_LO(&ioerr->oer_component.oid_device_id),
528 _DEVID_HI(&ioerr->oer_component.oid_device_id),
529 ioerr->oer_component.oid_partition_id,
530 ioerr->oer_component.oid_object_id,
531 ioerr->oer_comp_offset,
532 ioerr->oer_comp_length);
533
534 merge_ioerr(&accumulated_err, ioerr);
535 }
536 list_del(&state->err_list);
537 objlayout_free_io_state(state);
538 }
539
540 pnfs_osd_xdr_encode_ioerr(p, &accumulated_err);
541}
542
543void
544objlayout_encode_layoutreturn(struct pnfs_layout_hdr *pnfslay,
545 struct xdr_stream *xdr,
546 const struct nfs4_layoutreturn_args *args)
547{
548 struct objlayout *objlay = OBJLAYOUT(pnfslay);
549 struct objlayout_io_state *state, *tmp;
550 __be32 *start;
551
552 dprintk("%s: Begin\n", __func__);
553 start = xdr_reserve_space(xdr, 4);
554 BUG_ON(!start);
555
556 spin_lock(&objlay->lock);
557
558 list_for_each_entry_safe(state, tmp, &objlay->err_list, err_list) {
559 __be32 *last_xdr = NULL, *p;
560 unsigned i;
561 int res = 0;
562
563 for (i = 0; i < state->num_comps; i++) {
564 struct pnfs_osd_ioerr *ioerr = &state->ioerrs[i];
565
566 if (!ioerr->oer_errno)
567 continue;
568
569 dprintk("%s: err[%d]: errno=%d is_write=%d "
570 "dev(%llx:%llx) par=0x%llx obj=0x%llx "
571 "offset=0x%llx length=0x%llx\n",
572 __func__, i, ioerr->oer_errno,
573 ioerr->oer_iswrite,
574 _DEVID_LO(&ioerr->oer_component.oid_device_id),
575 _DEVID_HI(&ioerr->oer_component.oid_device_id),
576 ioerr->oer_component.oid_partition_id,
577 ioerr->oer_component.oid_object_id,
578 ioerr->oer_comp_offset,
579 ioerr->oer_comp_length);
580
581 p = pnfs_osd_xdr_ioerr_reserve_space(xdr);
582 if (unlikely(!p)) {
583 res = -E2BIG;
584 break; /* accumulated_error */
585 }
586
587 last_xdr = p;
588 pnfs_osd_xdr_encode_ioerr(p, &state->ioerrs[i]);
589 }
590
591 /* TODO: use xdr_write_pages */
592 if (unlikely(res)) {
593 /* no space for even one error descriptor */
594 BUG_ON(!last_xdr);
595
596 /* we've encountered a situation with lots and lots of
597 * errors and no space to encode them all. Use the last
598 * available slot to report the union of all the
599 * remaining errors.
600 */
601 encode_accumulated_error(objlay, last_xdr);
602 goto loop_done;
603 }
604 list_del(&state->err_list);
605 objlayout_free_io_state(state);
606 }
607loop_done:
608 spin_unlock(&objlay->lock);
609
610 *start = cpu_to_be32((xdr->p - start - 1) * 4);
611 dprintk("%s: Return\n", __func__);
612}
613
614
385/* 615/*
386 * Get Device Info API for io engines 616 * Get Device Info API for io engines
387 */ 617 */
diff --git a/fs/nfs/objlayout/objlayout.h b/fs/nfs/objlayout/objlayout.h
index 9a405e8069f3..b0bb975058e4 100644
--- a/fs/nfs/objlayout/objlayout.h
+++ b/fs/nfs/objlayout/objlayout.h
@@ -50,6 +50,10 @@
50 */ 50 */
51struct objlayout { 51struct objlayout {
52 struct pnfs_layout_hdr pnfs_layout; 52 struct pnfs_layout_hdr pnfs_layout;
53
54 /* for layout_return */
55 spinlock_t lock;
56 struct list_head err_list;
53}; 57};
54 58
55static inline struct objlayout * 59static inline struct objlayout *
@@ -76,6 +80,16 @@ struct objlayout_io_state {
76 int status; /* res */ 80 int status; /* res */
77 int eof; /* res */ 81 int eof; /* res */
78 int committed; /* res */ 82 int committed; /* res */
83
84 /* Error reporting (layout_return) */
85 struct list_head err_list;
86 unsigned num_comps;
87 /* Pointer to array of error descriptors of size num_comps.
88 * It should contain as many entries as devices in the osd_layout
89 * that participate in the I/O. It is up to the io_engine to allocate
90 * needed space and set num_comps.
91 */
92 struct pnfs_osd_ioerr *ioerrs;
79}; 93};
80 94
81/* 95/*
@@ -101,6 +115,10 @@ extern ssize_t objio_write_pagelist(struct objlayout_io_state *ol_state,
101/* 115/*
102 * callback API 116 * callback API
103 */ 117 */
118extern void objlayout_io_set_result(struct objlayout_io_state *state,
119 unsigned index, struct pnfs_osd_objid *pooid,
120 int osd_error, u64 offset, u64 length, bool is_write);
121
104extern void objlayout_read_done(struct objlayout_io_state *state, 122extern void objlayout_read_done(struct objlayout_io_state *state,
105 ssize_t status, bool sync); 123 ssize_t status, bool sync);
106extern void objlayout_write_done(struct objlayout_io_state *state, 124extern void objlayout_write_done(struct objlayout_io_state *state,
@@ -131,4 +149,9 @@ extern enum pnfs_try_status objlayout_write_pagelist(
131 struct nfs_write_data *, 149 struct nfs_write_data *,
132 int how); 150 int how);
133 151
152extern void objlayout_encode_layoutreturn(
153 struct pnfs_layout_hdr *,
154 struct xdr_stream *,
155 const struct nfs4_layoutreturn_args *);
156
134#endif /* _OBJLAYOUT_H */ 157#endif /* _OBJLAYOUT_H */