summaryrefslogtreecommitdiffstats
path: root/fs/btrfs/scrub.c
diff options
context:
space:
mode:
authorIlya Dryomov <idryomov@gmail.com>2011-04-09 07:27:01 -0400
committerArne Jansen <sensille@gmx.net>2011-05-12 08:48:28 -0400
commit96e369208e65a7d017a52361fd572df41fde8472 (patch)
treeb72e4aafce997d87934a15f3b5594d8d162b1eef /fs/btrfs/scrub.c
parent475f63874d739d7842a56da94687f18d583ae654 (diff)
btrfs scrub: make fixups sync
btrfs scrub - make fixups sync, don't reuse fixup bios Fixups are already sync for csum failures, this patch makes them sync for EIO case as well. Fixups are now sharing pages with the parent sbio - instead of allocating a separate page to do a fixup we grab the page from the sbio buffer. Fixup bios are no longer reused. struct fixup is no longer needed, instead pass [sbio pointer, index]. Originally this was added to look at the possibility of sharing the code between drive swap and scrub, but it actually fixes a serious bug in scrub code where errors that could be corrected were ignored and reported as uncorrectable. btrfs scrub - restore bios properly after media errors The current code reallocates a bio after a media error. This is a temporary measure introduced in v3 after a serious problem related to bio reuse was found in v2 of scrub patchset. Basically we did not reset bv_offset and bv_len fields of the bio_vec structure. They are changed in case I/O error happens, for example, at offset 512 or 1024 into the page. Also bi_flags field wasn't properly setup before reusing the bio. Signed-off-by: Arne Jansen <sensille@gmx.net>
Diffstat (limited to 'fs/btrfs/scrub.c')
-rw-r--r--fs/btrfs/scrub.c287
1 files changed, 80 insertions, 207 deletions
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 70f9fa772ee9..6a50801ecfa0 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -50,7 +50,6 @@
50struct scrub_bio; 50struct scrub_bio;
51struct scrub_page; 51struct scrub_page;
52struct scrub_dev; 52struct scrub_dev;
53struct scrub_fixup;
54static void scrub_bio_end_io(struct bio *bio, int err); 53static void scrub_bio_end_io(struct bio *bio, int err);
55static void scrub_checksum(struct btrfs_work *work); 54static void scrub_checksum(struct btrfs_work *work);
56static int scrub_checksum_data(struct scrub_dev *sdev, 55static int scrub_checksum_data(struct scrub_dev *sdev,
@@ -59,9 +58,11 @@ static int scrub_checksum_tree_block(struct scrub_dev *sdev,
59 struct scrub_page *spag, u64 logical, 58 struct scrub_page *spag, u64 logical,
60 void *buffer); 59 void *buffer);
61static int scrub_checksum_super(struct scrub_bio *sbio, void *buffer); 60static int scrub_checksum_super(struct scrub_bio *sbio, void *buffer);
62static void scrub_recheck_end_io(struct bio *bio, int err); 61static int scrub_fixup_check(struct scrub_bio *sbio, int ix);
63static void scrub_fixup_worker(struct btrfs_work *work); 62static void scrub_fixup_end_io(struct bio *bio, int err);
64static void scrub_fixup(struct scrub_fixup *fixup); 63static int scrub_fixup_io(int rw, struct block_device *bdev, sector_t sector,
64 struct page *page);
65static void scrub_fixup(struct scrub_bio *sbio, int ix);
65 66
66#define SCRUB_PAGES_PER_BIO 16 /* 64k per bio */ 67#define SCRUB_PAGES_PER_BIO 16 /* 64k per bio */
67#define SCRUB_BIOS_PER_DEV 16 /* 1 MB per device in flight */ 68#define SCRUB_BIOS_PER_DEV 16 /* 1 MB per device in flight */
@@ -105,17 +106,6 @@ struct scrub_dev {
105 spinlock_t stat_lock; 106 spinlock_t stat_lock;
106}; 107};
107 108
108struct scrub_fixup {
109 struct scrub_dev *sdev;
110 struct bio *bio;
111 u64 logical;
112 u64 physical;
113 struct scrub_page spag;
114 struct btrfs_work work;
115 int err;
116 int recheck;
117};
118
119static void scrub_free_csums(struct scrub_dev *sdev) 109static void scrub_free_csums(struct scrub_dev *sdev)
120{ 110{
121 while (!list_empty(&sdev->csum_list)) { 111 while (!list_empty(&sdev->csum_list)) {
@@ -240,107 +230,34 @@ nomem:
240 */ 230 */
241static void scrub_recheck_error(struct scrub_bio *sbio, int ix) 231static void scrub_recheck_error(struct scrub_bio *sbio, int ix)
242{ 232{
243 struct scrub_dev *sdev = sbio->sdev; 233 if (sbio->err) {
244 struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info; 234 if (scrub_fixup_io(READ, sbio->sdev->dev->bdev,
245 struct bio *bio = NULL; 235 (sbio->physical + ix * PAGE_SIZE) >> 9,
246 struct page *page = NULL; 236 sbio->bio->bi_io_vec[ix].bv_page) == 0) {
247 struct scrub_fixup *fixup = NULL; 237 if (scrub_fixup_check(sbio, ix) == 0)
248 int ret; 238 return;
249 239 }
250 /*
251 * while we're in here we do not want the transaction to commit.
252 * To prevent it, we increment scrubs_running. scrub_pause will
253 * have to wait until we're finished
254 * we can safely increment scrubs_running here, because we're
255 * in the context of the original bio which is still marked in_flight
256 */
257 atomic_inc(&fs_info->scrubs_running);
258
259 fixup = kzalloc(sizeof(*fixup), GFP_NOFS);
260 if (!fixup)
261 goto malloc_error;
262
263 fixup->logical = sbio->logical + ix * PAGE_SIZE;
264 fixup->physical = sbio->physical + ix * PAGE_SIZE;
265 fixup->spag = sbio->spag[ix];
266 fixup->sdev = sdev;
267
268 bio = bio_alloc(GFP_NOFS, 1);
269 if (!bio)
270 goto malloc_error;
271 bio->bi_private = fixup;
272 bio->bi_size = 0;
273 bio->bi_bdev = sdev->dev->bdev;
274 fixup->bio = bio;
275 fixup->recheck = 0;
276
277 page = alloc_page(GFP_NOFS);
278 if (!page)
279 goto malloc_error;
280
281 ret = bio_add_page(bio, page, PAGE_SIZE, 0);
282 if (!ret)
283 goto malloc_error;
284
285 if (!sbio->err) {
286 /*
287 * shorter path: just a checksum error, go ahead and correct it
288 */
289 scrub_fixup_worker(&fixup->work);
290 return;
291 } 240 }
292 241
293 /* 242 scrub_fixup(sbio, ix);
294 * an I/O-error occured for one of the blocks in the bio, not
295 * necessarily for this one, so first try to read it separately
296 */
297 fixup->work.func = scrub_fixup_worker;
298 fixup->recheck = 1;
299 bio->bi_end_io = scrub_recheck_end_io;
300 bio->bi_sector = fixup->physical >> 9;
301 bio->bi_bdev = sdev->dev->bdev;
302 submit_bio(0, bio);
303
304 return;
305
306malloc_error:
307 if (bio)
308 bio_put(bio);
309 if (page)
310 __free_page(page);
311 kfree(fixup);
312 spin_lock(&sdev->stat_lock);
313 ++sdev->stat.malloc_errors;
314 spin_unlock(&sdev->stat_lock);
315 atomic_dec(&fs_info->scrubs_running);
316 wake_up(&fs_info->scrub_pause_wait);
317} 243}
318 244
319static void scrub_recheck_end_io(struct bio *bio, int err) 245static int scrub_fixup_check(struct scrub_bio *sbio, int ix)
320{
321 struct scrub_fixup *fixup = bio->bi_private;
322 struct btrfs_fs_info *fs_info = fixup->sdev->dev->dev_root->fs_info;
323
324 fixup->err = err;
325 btrfs_queue_worker(&fs_info->scrub_workers, &fixup->work);
326}
327
328static int scrub_fixup_check(struct scrub_fixup *fixup)
329{ 246{
330 int ret = 1; 247 int ret = 1;
331 struct page *page; 248 struct page *page;
332 void *buffer; 249 void *buffer;
333 u64 flags = fixup->spag.flags; 250 u64 flags = sbio->spag[ix].flags;
334 251
335 page = fixup->bio->bi_io_vec[0].bv_page; 252 page = sbio->bio->bi_io_vec[ix].bv_page;
336 buffer = kmap_atomic(page, KM_USER0); 253 buffer = kmap_atomic(page, KM_USER0);
337 if (flags & BTRFS_EXTENT_FLAG_DATA) { 254 if (flags & BTRFS_EXTENT_FLAG_DATA) {
338 ret = scrub_checksum_data(fixup->sdev, 255 ret = scrub_checksum_data(sbio->sdev,
339 &fixup->spag, buffer); 256 sbio->spag + ix, buffer);
340 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { 257 } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
341 ret = scrub_checksum_tree_block(fixup->sdev, 258 ret = scrub_checksum_tree_block(sbio->sdev,
342 &fixup->spag, 259 sbio->spag + ix,
343 fixup->logical, 260 sbio->logical + ix * PAGE_SIZE,
344 buffer); 261 buffer);
345 } else { 262 } else {
346 WARN_ON(1); 263 WARN_ON(1);
@@ -350,51 +267,25 @@ static int scrub_fixup_check(struct scrub_fixup *fixup)
350 return ret; 267 return ret;
351} 268}
352 269
353static void scrub_fixup_worker(struct btrfs_work *work)
354{
355 struct scrub_fixup *fixup;
356 struct btrfs_fs_info *fs_info;
357 u64 flags;
358 int ret = 1;
359
360 fixup = container_of(work, struct scrub_fixup, work);
361 fs_info = fixup->sdev->dev->dev_root->fs_info;
362 flags = fixup->spag.flags;
363
364 if (fixup->recheck && fixup->err == 0)
365 ret = scrub_fixup_check(fixup);
366
367 if (ret || fixup->err)
368 scrub_fixup(fixup);
369
370 __free_page(fixup->bio->bi_io_vec[0].bv_page);
371 bio_put(fixup->bio);
372
373 atomic_dec(&fs_info->scrubs_running);
374 wake_up(&fs_info->scrub_pause_wait);
375
376 kfree(fixup);
377}
378
379static void scrub_fixup_end_io(struct bio *bio, int err) 270static void scrub_fixup_end_io(struct bio *bio, int err)
380{ 271{
381 complete((struct completion *)bio->bi_private); 272 complete((struct completion *)bio->bi_private);
382} 273}
383 274
384static void scrub_fixup(struct scrub_fixup *fixup) 275static void scrub_fixup(struct scrub_bio *sbio, int ix)
385{ 276{
386 struct scrub_dev *sdev = fixup->sdev; 277 struct scrub_dev *sdev = sbio->sdev;
387 struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info; 278 struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info;
388 struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; 279 struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
389 struct btrfs_multi_bio *multi = NULL; 280 struct btrfs_multi_bio *multi = NULL;
390 struct bio *bio = fixup->bio; 281 u64 logical = sbio->logical + ix * PAGE_SIZE;
391 u64 length; 282 u64 length;
392 int i; 283 int i;
393 int ret; 284 int ret;
394 DECLARE_COMPLETION_ONSTACK(complete); 285 DECLARE_COMPLETION_ONSTACK(complete);
395 286
396 if ((fixup->spag.flags & BTRFS_EXTENT_FLAG_DATA) && 287 if ((sbio->spag[ix].flags & BTRFS_EXTENT_FLAG_DATA) &&
397 (fixup->spag.have_csum == 0)) { 288 (sbio->spag[ix].have_csum == 0)) {
398 /* 289 /*
399 * nodatasum, don't try to fix anything 290 * nodatasum, don't try to fix anything
400 * FIXME: we can do better, open the inode and trigger a 291 * FIXME: we can do better, open the inode and trigger a
@@ -404,71 +295,49 @@ static void scrub_fixup(struct scrub_fixup *fixup)
404 } 295 }
405 296
406 length = PAGE_SIZE; 297 length = PAGE_SIZE;
407 ret = btrfs_map_block(map_tree, REQ_WRITE, fixup->logical, &length, 298 ret = btrfs_map_block(map_tree, REQ_WRITE, logical, &length,
408 &multi, 0); 299 &multi, 0);
409 if (ret || !multi || length < PAGE_SIZE) { 300 if (ret || !multi || length < PAGE_SIZE) {
410 printk(KERN_ERR 301 printk(KERN_ERR
411 "scrub_fixup: btrfs_map_block failed us for %llu\n", 302 "scrub_fixup: btrfs_map_block failed us for %llu\n",
412 (unsigned long long)fixup->logical); 303 (unsigned long long)logical);
413 WARN_ON(1); 304 WARN_ON(1);
414 return; 305 return;
415 } 306 }
416 307
417 if (multi->num_stripes == 1) { 308 if (multi->num_stripes == 1)
418 /* there aren't any replicas */ 309 /* there aren't any replicas */
419 goto uncorrectable; 310 goto uncorrectable;
420 }
421 311
422 /* 312 /*
423 * first find a good copy 313 * first find a good copy
424 */ 314 */
425 for (i = 0; i < multi->num_stripes; ++i) { 315 for (i = 0; i < multi->num_stripes; ++i) {
426 if (i == fixup->spag.mirror_num) 316 if (i == sbio->spag[ix].mirror_num)
427 continue; 317 continue;
428 318
429 bio->bi_sector = multi->stripes[i].physical >> 9; 319 if (scrub_fixup_io(READ, multi->stripes[i].dev->bdev,
430 bio->bi_bdev = multi->stripes[i].dev->bdev; 320 multi->stripes[i].physical >> 9,
431 bio->bi_size = PAGE_SIZE; 321 sbio->bio->bi_io_vec[ix].bv_page)) {
432 bio->bi_next = NULL;
433 bio->bi_flags |= 1 << BIO_UPTODATE;
434 bio->bi_comp_cpu = -1;
435 bio->bi_end_io = scrub_fixup_end_io;
436 bio->bi_private = &complete;
437
438 submit_bio(0, bio);
439
440 wait_for_completion(&complete);
441
442 if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
443 /* I/O-error, this is not a good copy */ 322 /* I/O-error, this is not a good copy */
444 continue; 323 continue;
324 }
445 325
446 ret = scrub_fixup_check(fixup); 326 if (scrub_fixup_check(sbio, ix) == 0)
447 if (ret == 0)
448 break; 327 break;
449 } 328 }
450 if (i == multi->num_stripes) 329 if (i == multi->num_stripes)
451 goto uncorrectable; 330 goto uncorrectable;
452 331
453 /* 332 /*
454 * the bio now contains good data, write it back 333 * bi_io_vec[ix].bv_page now contains good data, write it back
455 */ 334 */
456 bio->bi_sector = fixup->physical >> 9; 335 if (scrub_fixup_io(WRITE, sdev->dev->bdev,
457 bio->bi_bdev = sdev->dev->bdev; 336 (sbio->physical + ix * PAGE_SIZE) >> 9,
458 bio->bi_size = PAGE_SIZE; 337 sbio->bio->bi_io_vec[ix].bv_page)) {
459 bio->bi_next = NULL;
460 bio->bi_flags |= 1 << BIO_UPTODATE;
461 bio->bi_comp_cpu = -1;
462 bio->bi_end_io = scrub_fixup_end_io;
463 bio->bi_private = &complete;
464
465 submit_bio(REQ_WRITE, bio);
466
467 wait_for_completion(&complete);
468
469 if (!test_bit(BIO_UPTODATE, &bio->bi_flags))
470 /* I/O-error, writeback failed, give up */ 338 /* I/O-error, writeback failed, give up */
471 goto uncorrectable; 339 goto uncorrectable;
340 }
472 341
473 kfree(multi); 342 kfree(multi);
474 spin_lock(&sdev->stat_lock); 343 spin_lock(&sdev->stat_lock);
@@ -477,7 +346,7 @@ static void scrub_fixup(struct scrub_fixup *fixup)
477 346
478 if (printk_ratelimit()) 347 if (printk_ratelimit())
479 printk(KERN_ERR "btrfs: fixed up at %llu\n", 348 printk(KERN_ERR "btrfs: fixed up at %llu\n",
480 (unsigned long long)fixup->logical); 349 (unsigned long long)logical);
481 return; 350 return;
482 351
483uncorrectable: 352uncorrectable:
@@ -488,7 +357,32 @@ uncorrectable:
488 357
489 if (printk_ratelimit()) 358 if (printk_ratelimit())
490 printk(KERN_ERR "btrfs: unable to fixup at %llu\n", 359 printk(KERN_ERR "btrfs: unable to fixup at %llu\n",
491 (unsigned long long)fixup->logical); 360 (unsigned long long)logical);
361}
362
363static int scrub_fixup_io(int rw, struct block_device *bdev, sector_t sector,
364 struct page *page)
365{
366 struct bio *bio = NULL;
367 int ret;
368 DECLARE_COMPLETION_ONSTACK(complete);
369
370 /* we are going to wait on this IO */
371 rw |= REQ_SYNC | REQ_UNPLUG;
372
373 bio = bio_alloc(GFP_NOFS, 1);
374 bio->bi_bdev = bdev;
375 bio->bi_sector = sector;
376 bio_add_page(bio, page, PAGE_SIZE, 0);
377 bio->bi_end_io = scrub_fixup_end_io;
378 bio->bi_private = &complete;
379 submit_bio(rw, bio);
380
381 wait_for_completion(&complete);
382
383 ret = !test_bit(BIO_UPTODATE, &bio->bi_flags);
384 bio_put(bio);
385 return ret;
492} 386}
493 387
494static void scrub_bio_end_io(struct bio *bio, int err) 388static void scrub_bio_end_io(struct bio *bio, int err)
@@ -514,44 +408,24 @@ static void scrub_checksum(struct btrfs_work *work)
514 int ret; 408 int ret;
515 409
516 if (sbio->err) { 410 if (sbio->err) {
517 struct bio *bio;
518 struct bio *old_bio;
519
520 for (i = 0; i < sbio->count; ++i) 411 for (i = 0; i < sbio->count; ++i)
521 scrub_recheck_error(sbio, i); 412 scrub_recheck_error(sbio, i);
413
414 sbio->bio->bi_flags &= ~(BIO_POOL_MASK - 1);
415 sbio->bio->bi_flags |= 1 << BIO_UPTODATE;
416 sbio->bio->bi_phys_segments = 0;
417 sbio->bio->bi_idx = 0;
418
419 for (i = 0; i < sbio->count; i++) {
420 struct bio_vec *bi;
421 bi = &sbio->bio->bi_io_vec[i];
422 bi->bv_offset = 0;
423 bi->bv_len = PAGE_SIZE;
424 }
425
522 spin_lock(&sdev->stat_lock); 426 spin_lock(&sdev->stat_lock);
523 ++sdev->stat.read_errors; 427 ++sdev->stat.read_errors;
524 spin_unlock(&sdev->stat_lock); 428 spin_unlock(&sdev->stat_lock);
525
526 /*
527 * FIXME: allocate a new bio after a media error. I haven't
528 * figured out how to reuse this one
529 */
530 old_bio = sbio->bio;
531 bio = bio_kmalloc(GFP_NOFS, SCRUB_PAGES_PER_BIO);
532 if (!bio) {
533 /*
534 * alloc failed. cancel the scrub and don't requeue
535 * this sbio
536 */
537 printk(KERN_ERR "btrfs scrub: allocation failure, "
538 "cancelling scrub\n");
539 atomic_inc(&sdev->dev->dev_root->fs_info->
540 scrub_cancel_req);
541 goto out_no_enqueue;
542 }
543 sbio->bio = bio;
544 bio->bi_private = sbio;
545 bio->bi_end_io = scrub_bio_end_io;
546 bio->bi_sector = 0;
547 bio->bi_bdev = sbio->sdev->dev->bdev;
548 bio->bi_size = 0;
549 for (i = 0; i < SCRUB_PAGES_PER_BIO; ++i) {
550 struct page *page;
551 page = old_bio->bi_io_vec[i].bv_page;
552 bio_add_page(bio, page, PAGE_SIZE, 0);
553 }
554 bio_put(old_bio);
555 goto out; 429 goto out;
556 } 430 }
557 for (i = 0; i < sbio->count; ++i) { 431 for (i = 0; i < sbio->count; ++i) {
@@ -581,7 +455,6 @@ out:
581 sbio->next_free = sdev->first_free; 455 sbio->next_free = sdev->first_free;
582 sdev->first_free = sbio->index; 456 sdev->first_free = sbio->index;
583 spin_unlock(&sdev->list_lock); 457 spin_unlock(&sdev->list_lock);
584out_no_enqueue:
585 atomic_dec(&sdev->in_flight); 458 atomic_dec(&sdev->in_flight);
586 wake_up(&sdev->list_wait); 459 wake_up(&sdev->list_wait);
587} 460}