aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorBoaz Harrosh <bharrosh@panasas.com>2011-09-28 06:25:50 -0400
committerBoaz Harrosh <bharrosh@panasas.com>2011-10-14 12:54:42 -0400
commit4b46c9f5cf69505f0bc708995b88b0cc60317ffd (patch)
treee5369fe948509c230470f922a0cd89cda60f2692 /fs
parent5a51c0c7e9a913649aa65d8233470682bcbb7694 (diff)
ore/exofs: Change ore_check_io API
Current ore_check_io API receives a residual pointer, to report partial IO. But it is actually not used, because in a multiple devices IO there is never a linearity in the IO failure. On the other hand if every failing device is reported through a received callback measures can be taken to handle only failed devices. One at a time. This will also be needed by the objects-layout-driver for it's error reporting facility. Exofs is not currently using the new information and keeps the old behaviour of failing the complete IO in case of an error. (No partial completion) TODO: Use an ore_check_io callback to set_page_error only the failing pages. And re-dirty write pages. Signed-off-by: Boaz Harrosh <bharrosh@panasas.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/exofs/inode.c14
-rw-r--r--fs/exofs/ore.c29
2 files changed, 20 insertions, 23 deletions
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index 5a62420cbdb1..86c0ac87b8e3 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -194,19 +194,16 @@ static void update_write_page(struct page *page, int ret)
194static int __readpages_done(struct page_collect *pcol) 194static int __readpages_done(struct page_collect *pcol)
195{ 195{
196 int i; 196 int i;
197 u64 resid;
198 u64 good_bytes; 197 u64 good_bytes;
199 u64 length = 0; 198 u64 length = 0;
200 int ret = ore_check_io(pcol->ios, &resid); 199 int ret = ore_check_io(pcol->ios, NULL);
201 200
202 if (likely(!ret)) { 201 if (likely(!ret)) {
203 good_bytes = pcol->length; 202 good_bytes = pcol->length;
204 ret = PAGE_WAS_NOT_IN_IO; 203 ret = PAGE_WAS_NOT_IN_IO;
205 } else { 204 } else {
206 good_bytes = pcol->length - resid; 205 good_bytes = 0;
207 } 206 }
208 if (good_bytes > pcol->ios->length)
209 good_bytes = pcol->ios->length;
210 207
211 EXOFS_DBGMSG2("readpages_done(0x%lx) good_bytes=0x%llx" 208 EXOFS_DBGMSG2("readpages_done(0x%lx) good_bytes=0x%llx"
212 " length=0x%lx nr_pages=%u\n", 209 " length=0x%lx nr_pages=%u\n",
@@ -519,10 +516,9 @@ static void writepages_done(struct ore_io_state *ios, void *p)
519{ 516{
520 struct page_collect *pcol = p; 517 struct page_collect *pcol = p;
521 int i; 518 int i;
522 u64 resid;
523 u64 good_bytes; 519 u64 good_bytes;
524 u64 length = 0; 520 u64 length = 0;
525 int ret = ore_check_io(ios, &resid); 521 int ret = ore_check_io(ios, NULL);
526 522
527 atomic_dec(&pcol->sbi->s_curr_pending); 523 atomic_dec(&pcol->sbi->s_curr_pending);
528 524
@@ -530,10 +526,8 @@ static void writepages_done(struct ore_io_state *ios, void *p)
530 good_bytes = pcol->length; 526 good_bytes = pcol->length;
531 ret = PAGE_WAS_NOT_IN_IO; 527 ret = PAGE_WAS_NOT_IN_IO;
532 } else { 528 } else {
533 good_bytes = pcol->length - resid; 529 good_bytes = 0;
534 } 530 }
535 if (good_bytes > pcol->ios->length)
536 good_bytes = pcol->ios->length;
537 531
538 EXOFS_DBGMSG2("writepages_done(0x%lx) good_bytes=0x%llx" 532 EXOFS_DBGMSG2("writepages_done(0x%lx) good_bytes=0x%llx"
539 " length=0x%lx nr_pages=%u\n", 533 " length=0x%lx nr_pages=%u\n",
diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c
index 4ca59d492798..3b1cc3a132d7 100644
--- a/fs/exofs/ore.c
+++ b/fs/exofs/ore.c
@@ -317,7 +317,7 @@ static void _clear_bio(struct bio *bio)
317 } 317 }
318} 318}
319 319
320int ore_check_io(struct ore_io_state *ios, u64 *resid) 320int ore_check_io(struct ore_io_state *ios, ore_on_dev_error on_dev_error)
321{ 321{
322 enum osd_err_priority acumulated_osd_err = 0; 322 enum osd_err_priority acumulated_osd_err = 0;
323 int acumulated_lin_err = 0; 323 int acumulated_lin_err = 0;
@@ -325,7 +325,8 @@ int ore_check_io(struct ore_io_state *ios, u64 *resid)
325 325
326 for (i = 0; i < ios->numdevs; i++) { 326 for (i = 0; i < ios->numdevs; i++) {
327 struct osd_sense_info osi; 327 struct osd_sense_info osi;
328 struct osd_request *or = ios->per_dev[i].or; 328 struct ore_per_dev_state *per_dev = &ios->per_dev[i];
329 struct osd_request *or = per_dev->or;
329 int ret; 330 int ret;
330 331
331 if (unlikely(!or)) 332 if (unlikely(!or))
@@ -337,29 +338,31 @@ int ore_check_io(struct ore_io_state *ios, u64 *resid)
337 338
338 if (OSD_ERR_PRI_CLEAR_PAGES == osi.osd_err_pri) { 339 if (OSD_ERR_PRI_CLEAR_PAGES == osi.osd_err_pri) {
339 /* start read offset passed endof file */ 340 /* start read offset passed endof file */
340 _clear_bio(ios->per_dev[i].bio); 341 _clear_bio(per_dev->bio);
341 ORE_DBGMSG("start read offset passed end of file " 342 ORE_DBGMSG("start read offset passed end of file "
342 "offset=0x%llx, length=0x%llx\n", 343 "offset=0x%llx, length=0x%llx\n",
343 _LLU(ios->per_dev[i].offset), 344 _LLU(per_dev->offset),
344 _LLU(ios->per_dev[i].length)); 345 _LLU(per_dev->length));
345 346
346 continue; /* we recovered */ 347 continue; /* we recovered */
347 } 348 }
348 349
350 if (on_dev_error) {
351 u64 residual = ios->reading ?
352 or->in.residual : or->out.residual;
353 u64 offset = (ios->offset + ios->length) - residual;
354 struct ore_dev *od = ios->oc->ods[
355 per_dev->dev - ios->oc->first_dev];
356
357 on_dev_error(ios, od, per_dev->dev, osi.osd_err_pri,
358 offset, residual);
359 }
349 if (osi.osd_err_pri >= acumulated_osd_err) { 360 if (osi.osd_err_pri >= acumulated_osd_err) {
350 acumulated_osd_err = osi.osd_err_pri; 361 acumulated_osd_err = osi.osd_err_pri;
351 acumulated_lin_err = ret; 362 acumulated_lin_err = ret;
352 } 363 }
353 } 364 }
354 365
355 /* TODO: raid specific residual calculations */
356 if (resid) {
357 if (likely(!acumulated_lin_err))
358 *resid = 0;
359 else
360 *resid = ios->length;
361 }
362
363 return acumulated_lin_err; 366 return acumulated_lin_err;
364} 367}
365EXPORT_SYMBOL(ore_check_io); 368EXPORT_SYMBOL(ore_check_io);