diff options
-rw-r--r-- | fs/btrfs/scrub.c | 287 |
1 files changed, 80 insertions, 207 deletions
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 70f9fa772ee9..6a50801ecfa0 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c | |||
@@ -50,7 +50,6 @@ | |||
50 | struct scrub_bio; | 50 | struct scrub_bio; |
51 | struct scrub_page; | 51 | struct scrub_page; |
52 | struct scrub_dev; | 52 | struct scrub_dev; |
53 | struct scrub_fixup; | ||
54 | static void scrub_bio_end_io(struct bio *bio, int err); | 53 | static void scrub_bio_end_io(struct bio *bio, int err); |
55 | static void scrub_checksum(struct btrfs_work *work); | 54 | static void scrub_checksum(struct btrfs_work *work); |
56 | static int scrub_checksum_data(struct scrub_dev *sdev, | 55 | static int scrub_checksum_data(struct scrub_dev *sdev, |
@@ -59,9 +58,11 @@ static int scrub_checksum_tree_block(struct scrub_dev *sdev, | |||
59 | struct scrub_page *spag, u64 logical, | 58 | struct scrub_page *spag, u64 logical, |
60 | void *buffer); | 59 | void *buffer); |
61 | static int scrub_checksum_super(struct scrub_bio *sbio, void *buffer); | 60 | static int scrub_checksum_super(struct scrub_bio *sbio, void *buffer); |
62 | static void scrub_recheck_end_io(struct bio *bio, int err); | 61 | static int scrub_fixup_check(struct scrub_bio *sbio, int ix); |
63 | static void scrub_fixup_worker(struct btrfs_work *work); | 62 | static void scrub_fixup_end_io(struct bio *bio, int err); |
64 | static void scrub_fixup(struct scrub_fixup *fixup); | 63 | static int scrub_fixup_io(int rw, struct block_device *bdev, sector_t sector, |
64 | struct page *page); | ||
65 | static void scrub_fixup(struct scrub_bio *sbio, int ix); | ||
65 | 66 | ||
66 | #define SCRUB_PAGES_PER_BIO 16 /* 64k per bio */ | 67 | #define SCRUB_PAGES_PER_BIO 16 /* 64k per bio */ |
67 | #define SCRUB_BIOS_PER_DEV 16 /* 1 MB per device in flight */ | 68 | #define SCRUB_BIOS_PER_DEV 16 /* 1 MB per device in flight */ |
@@ -105,17 +106,6 @@ struct scrub_dev { | |||
105 | spinlock_t stat_lock; | 106 | spinlock_t stat_lock; |
106 | }; | 107 | }; |
107 | 108 | ||
108 | struct scrub_fixup { | ||
109 | struct scrub_dev *sdev; | ||
110 | struct bio *bio; | ||
111 | u64 logical; | ||
112 | u64 physical; | ||
113 | struct scrub_page spag; | ||
114 | struct btrfs_work work; | ||
115 | int err; | ||
116 | int recheck; | ||
117 | }; | ||
118 | |||
119 | static void scrub_free_csums(struct scrub_dev *sdev) | 109 | static void scrub_free_csums(struct scrub_dev *sdev) |
120 | { | 110 | { |
121 | while (!list_empty(&sdev->csum_list)) { | 111 | while (!list_empty(&sdev->csum_list)) { |
@@ -240,107 +230,34 @@ nomem: | |||
240 | */ | 230 | */ |
241 | static void scrub_recheck_error(struct scrub_bio *sbio, int ix) | 231 | static void scrub_recheck_error(struct scrub_bio *sbio, int ix) |
242 | { | 232 | { |
243 | struct scrub_dev *sdev = sbio->sdev; | 233 | if (sbio->err) { |
244 | struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info; | 234 | if (scrub_fixup_io(READ, sbio->sdev->dev->bdev, |
245 | struct bio *bio = NULL; | 235 | (sbio->physical + ix * PAGE_SIZE) >> 9, |
246 | struct page *page = NULL; | 236 | sbio->bio->bi_io_vec[ix].bv_page) == 0) { |
247 | struct scrub_fixup *fixup = NULL; | 237 | if (scrub_fixup_check(sbio, ix) == 0) |
248 | int ret; | 238 | return; |
249 | 239 | } | |
250 | /* | ||
251 | * while we're in here we do not want the transaction to commit. | ||
252 | * To prevent it, we increment scrubs_running. scrub_pause will | ||
253 | * have to wait until we're finished | ||
254 | * we can safely increment scrubs_running here, because we're | ||
255 | * in the context of the original bio which is still marked in_flight | ||
256 | */ | ||
257 | atomic_inc(&fs_info->scrubs_running); | ||
258 | |||
259 | fixup = kzalloc(sizeof(*fixup), GFP_NOFS); | ||
260 | if (!fixup) | ||
261 | goto malloc_error; | ||
262 | |||
263 | fixup->logical = sbio->logical + ix * PAGE_SIZE; | ||
264 | fixup->physical = sbio->physical + ix * PAGE_SIZE; | ||
265 | fixup->spag = sbio->spag[ix]; | ||
266 | fixup->sdev = sdev; | ||
267 | |||
268 | bio = bio_alloc(GFP_NOFS, 1); | ||
269 | if (!bio) | ||
270 | goto malloc_error; | ||
271 | bio->bi_private = fixup; | ||
272 | bio->bi_size = 0; | ||
273 | bio->bi_bdev = sdev->dev->bdev; | ||
274 | fixup->bio = bio; | ||
275 | fixup->recheck = 0; | ||
276 | |||
277 | page = alloc_page(GFP_NOFS); | ||
278 | if (!page) | ||
279 | goto malloc_error; | ||
280 | |||
281 | ret = bio_add_page(bio, page, PAGE_SIZE, 0); | ||
282 | if (!ret) | ||
283 | goto malloc_error; | ||
284 | |||
285 | if (!sbio->err) { | ||
286 | /* | ||
287 | * shorter path: just a checksum error, go ahead and correct it | ||
288 | */ | ||
289 | scrub_fixup_worker(&fixup->work); | ||
290 | return; | ||
291 | } | 240 | } |
292 | 241 | ||
293 | /* | 242 | scrub_fixup(sbio, ix); |
294 | * an I/O-error occured for one of the blocks in the bio, not | ||
295 | * necessarily for this one, so first try to read it separately | ||
296 | */ | ||
297 | fixup->work.func = scrub_fixup_worker; | ||
298 | fixup->recheck = 1; | ||
299 | bio->bi_end_io = scrub_recheck_end_io; | ||
300 | bio->bi_sector = fixup->physical >> 9; | ||
301 | bio->bi_bdev = sdev->dev->bdev; | ||
302 | submit_bio(0, bio); | ||
303 | |||
304 | return; | ||
305 | |||
306 | malloc_error: | ||
307 | if (bio) | ||
308 | bio_put(bio); | ||
309 | if (page) | ||
310 | __free_page(page); | ||
311 | kfree(fixup); | ||
312 | spin_lock(&sdev->stat_lock); | ||
313 | ++sdev->stat.malloc_errors; | ||
314 | spin_unlock(&sdev->stat_lock); | ||
315 | atomic_dec(&fs_info->scrubs_running); | ||
316 | wake_up(&fs_info->scrub_pause_wait); | ||
317 | } | 243 | } |
318 | 244 | ||
319 | static void scrub_recheck_end_io(struct bio *bio, int err) | 245 | static int scrub_fixup_check(struct scrub_bio *sbio, int ix) |
320 | { | ||
321 | struct scrub_fixup *fixup = bio->bi_private; | ||
322 | struct btrfs_fs_info *fs_info = fixup->sdev->dev->dev_root->fs_info; | ||
323 | |||
324 | fixup->err = err; | ||
325 | btrfs_queue_worker(&fs_info->scrub_workers, &fixup->work); | ||
326 | } | ||
327 | |||
328 | static int scrub_fixup_check(struct scrub_fixup *fixup) | ||
329 | { | 246 | { |
330 | int ret = 1; | 247 | int ret = 1; |
331 | struct page *page; | 248 | struct page *page; |
332 | void *buffer; | 249 | void *buffer; |
333 | u64 flags = fixup->spag.flags; | 250 | u64 flags = sbio->spag[ix].flags; |
334 | 251 | ||
335 | page = fixup->bio->bi_io_vec[0].bv_page; | 252 | page = sbio->bio->bi_io_vec[ix].bv_page; |
336 | buffer = kmap_atomic(page, KM_USER0); | 253 | buffer = kmap_atomic(page, KM_USER0); |
337 | if (flags & BTRFS_EXTENT_FLAG_DATA) { | 254 | if (flags & BTRFS_EXTENT_FLAG_DATA) { |
338 | ret = scrub_checksum_data(fixup->sdev, | 255 | ret = scrub_checksum_data(sbio->sdev, |
339 | &fixup->spag, buffer); | 256 | sbio->spag + ix, buffer); |
340 | } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { | 257 | } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { |
341 | ret = scrub_checksum_tree_block(fixup->sdev, | 258 | ret = scrub_checksum_tree_block(sbio->sdev, |
342 | &fixup->spag, | 259 | sbio->spag + ix, |
343 | fixup->logical, | 260 | sbio->logical + ix * PAGE_SIZE, |
344 | buffer); | 261 | buffer); |
345 | } else { | 262 | } else { |
346 | WARN_ON(1); | 263 | WARN_ON(1); |
@@ -350,51 +267,25 @@ static int scrub_fixup_check(struct scrub_fixup *fixup) | |||
350 | return ret; | 267 | return ret; |
351 | } | 268 | } |
352 | 269 | ||
353 | static void scrub_fixup_worker(struct btrfs_work *work) | ||
354 | { | ||
355 | struct scrub_fixup *fixup; | ||
356 | struct btrfs_fs_info *fs_info; | ||
357 | u64 flags; | ||
358 | int ret = 1; | ||
359 | |||
360 | fixup = container_of(work, struct scrub_fixup, work); | ||
361 | fs_info = fixup->sdev->dev->dev_root->fs_info; | ||
362 | flags = fixup->spag.flags; | ||
363 | |||
364 | if (fixup->recheck && fixup->err == 0) | ||
365 | ret = scrub_fixup_check(fixup); | ||
366 | |||
367 | if (ret || fixup->err) | ||
368 | scrub_fixup(fixup); | ||
369 | |||
370 | __free_page(fixup->bio->bi_io_vec[0].bv_page); | ||
371 | bio_put(fixup->bio); | ||
372 | |||
373 | atomic_dec(&fs_info->scrubs_running); | ||
374 | wake_up(&fs_info->scrub_pause_wait); | ||
375 | |||
376 | kfree(fixup); | ||
377 | } | ||
378 | |||
379 | static void scrub_fixup_end_io(struct bio *bio, int err) | 270 | static void scrub_fixup_end_io(struct bio *bio, int err) |
380 | { | 271 | { |
381 | complete((struct completion *)bio->bi_private); | 272 | complete((struct completion *)bio->bi_private); |
382 | } | 273 | } |
383 | 274 | ||
384 | static void scrub_fixup(struct scrub_fixup *fixup) | 275 | static void scrub_fixup(struct scrub_bio *sbio, int ix) |
385 | { | 276 | { |
386 | struct scrub_dev *sdev = fixup->sdev; | 277 | struct scrub_dev *sdev = sbio->sdev; |
387 | struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info; | 278 | struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info; |
388 | struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; | 279 | struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; |
389 | struct btrfs_multi_bio *multi = NULL; | 280 | struct btrfs_multi_bio *multi = NULL; |
390 | struct bio *bio = fixup->bio; | 281 | u64 logical = sbio->logical + ix * PAGE_SIZE; |
391 | u64 length; | 282 | u64 length; |
392 | int i; | 283 | int i; |
393 | int ret; | 284 | int ret; |
394 | DECLARE_COMPLETION_ONSTACK(complete); | 285 | DECLARE_COMPLETION_ONSTACK(complete); |
395 | 286 | ||
396 | if ((fixup->spag.flags & BTRFS_EXTENT_FLAG_DATA) && | 287 | if ((sbio->spag[ix].flags & BTRFS_EXTENT_FLAG_DATA) && |
397 | (fixup->spag.have_csum == 0)) { | 288 | (sbio->spag[ix].have_csum == 0)) { |
398 | /* | 289 | /* |
399 | * nodatasum, don't try to fix anything | 290 | * nodatasum, don't try to fix anything |
400 | * FIXME: we can do better, open the inode and trigger a | 291 | * FIXME: we can do better, open the inode and trigger a |
@@ -404,71 +295,49 @@ static void scrub_fixup(struct scrub_fixup *fixup) | |||
404 | } | 295 | } |
405 | 296 | ||
406 | length = PAGE_SIZE; | 297 | length = PAGE_SIZE; |
407 | ret = btrfs_map_block(map_tree, REQ_WRITE, fixup->logical, &length, | 298 | ret = btrfs_map_block(map_tree, REQ_WRITE, logical, &length, |
408 | &multi, 0); | 299 | &multi, 0); |
409 | if (ret || !multi || length < PAGE_SIZE) { | 300 | if (ret || !multi || length < PAGE_SIZE) { |
410 | printk(KERN_ERR | 301 | printk(KERN_ERR |
411 | "scrub_fixup: btrfs_map_block failed us for %llu\n", | 302 | "scrub_fixup: btrfs_map_block failed us for %llu\n", |
412 | (unsigned long long)fixup->logical); | 303 | (unsigned long long)logical); |
413 | WARN_ON(1); | 304 | WARN_ON(1); |
414 | return; | 305 | return; |
415 | } | 306 | } |
416 | 307 | ||
417 | if (multi->num_stripes == 1) { | 308 | if (multi->num_stripes == 1) |
418 | /* there aren't any replicas */ | 309 | /* there aren't any replicas */ |
419 | goto uncorrectable; | 310 | goto uncorrectable; |
420 | } | ||
421 | 311 | ||
422 | /* | 312 | /* |
423 | * first find a good copy | 313 | * first find a good copy |
424 | */ | 314 | */ |
425 | for (i = 0; i < multi->num_stripes; ++i) { | 315 | for (i = 0; i < multi->num_stripes; ++i) { |
426 | if (i == fixup->spag.mirror_num) | 316 | if (i == sbio->spag[ix].mirror_num) |
427 | continue; | 317 | continue; |
428 | 318 | ||
429 | bio->bi_sector = multi->stripes[i].physical >> 9; | 319 | if (scrub_fixup_io(READ, multi->stripes[i].dev->bdev, |
430 | bio->bi_bdev = multi->stripes[i].dev->bdev; | 320 | multi->stripes[i].physical >> 9, |
431 | bio->bi_size = PAGE_SIZE; | 321 | sbio->bio->bi_io_vec[ix].bv_page)) { |
432 | bio->bi_next = NULL; | ||
433 | bio->bi_flags |= 1 << BIO_UPTODATE; | ||
434 | bio->bi_comp_cpu = -1; | ||
435 | bio->bi_end_io = scrub_fixup_end_io; | ||
436 | bio->bi_private = &complete; | ||
437 | |||
438 | submit_bio(0, bio); | ||
439 | |||
440 | wait_for_completion(&complete); | ||
441 | |||
442 | if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) | ||
443 | /* I/O-error, this is not a good copy */ | 322 | /* I/O-error, this is not a good copy */ |
444 | continue; | 323 | continue; |
324 | } | ||
445 | 325 | ||
446 | ret = scrub_fixup_check(fixup); | 326 | if (scrub_fixup_check(sbio, ix) == 0) |
447 | if (ret == 0) | ||
448 | break; | 327 | break; |
449 | } | 328 | } |
450 | if (i == multi->num_stripes) | 329 | if (i == multi->num_stripes) |
451 | goto uncorrectable; | 330 | goto uncorrectable; |
452 | 331 | ||
453 | /* | 332 | /* |
454 | * the bio now contains good data, write it back | 333 | * bi_io_vec[ix].bv_page now contains good data, write it back |
455 | */ | 334 | */ |
456 | bio->bi_sector = fixup->physical >> 9; | 335 | if (scrub_fixup_io(WRITE, sdev->dev->bdev, |
457 | bio->bi_bdev = sdev->dev->bdev; | 336 | (sbio->physical + ix * PAGE_SIZE) >> 9, |
458 | bio->bi_size = PAGE_SIZE; | 337 | sbio->bio->bi_io_vec[ix].bv_page)) { |
459 | bio->bi_next = NULL; | ||
460 | bio->bi_flags |= 1 << BIO_UPTODATE; | ||
461 | bio->bi_comp_cpu = -1; | ||
462 | bio->bi_end_io = scrub_fixup_end_io; | ||
463 | bio->bi_private = &complete; | ||
464 | |||
465 | submit_bio(REQ_WRITE, bio); | ||
466 | |||
467 | wait_for_completion(&complete); | ||
468 | |||
469 | if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) | ||
470 | /* I/O-error, writeback failed, give up */ | 338 | /* I/O-error, writeback failed, give up */ |
471 | goto uncorrectable; | 339 | goto uncorrectable; |
340 | } | ||
472 | 341 | ||
473 | kfree(multi); | 342 | kfree(multi); |
474 | spin_lock(&sdev->stat_lock); | 343 | spin_lock(&sdev->stat_lock); |
@@ -477,7 +346,7 @@ static void scrub_fixup(struct scrub_fixup *fixup) | |||
477 | 346 | ||
478 | if (printk_ratelimit()) | 347 | if (printk_ratelimit()) |
479 | printk(KERN_ERR "btrfs: fixed up at %llu\n", | 348 | printk(KERN_ERR "btrfs: fixed up at %llu\n", |
480 | (unsigned long long)fixup->logical); | 349 | (unsigned long long)logical); |
481 | return; | 350 | return; |
482 | 351 | ||
483 | uncorrectable: | 352 | uncorrectable: |
@@ -488,7 +357,32 @@ uncorrectable: | |||
488 | 357 | ||
489 | if (printk_ratelimit()) | 358 | if (printk_ratelimit()) |
490 | printk(KERN_ERR "btrfs: unable to fixup at %llu\n", | 359 | printk(KERN_ERR "btrfs: unable to fixup at %llu\n", |
491 | (unsigned long long)fixup->logical); | 360 | (unsigned long long)logical); |
361 | } | ||
362 | |||
363 | static int scrub_fixup_io(int rw, struct block_device *bdev, sector_t sector, | ||
364 | struct page *page) | ||
365 | { | ||
366 | struct bio *bio = NULL; | ||
367 | int ret; | ||
368 | DECLARE_COMPLETION_ONSTACK(complete); | ||
369 | |||
370 | /* we are going to wait on this IO */ | ||
371 | rw |= REQ_SYNC | REQ_UNPLUG; | ||
372 | |||
373 | bio = bio_alloc(GFP_NOFS, 1); | ||
374 | bio->bi_bdev = bdev; | ||
375 | bio->bi_sector = sector; | ||
376 | bio_add_page(bio, page, PAGE_SIZE, 0); | ||
377 | bio->bi_end_io = scrub_fixup_end_io; | ||
378 | bio->bi_private = &complete; | ||
379 | submit_bio(rw, bio); | ||
380 | |||
381 | wait_for_completion(&complete); | ||
382 | |||
383 | ret = !test_bit(BIO_UPTODATE, &bio->bi_flags); | ||
384 | bio_put(bio); | ||
385 | return ret; | ||
492 | } | 386 | } |
493 | 387 | ||
494 | static void scrub_bio_end_io(struct bio *bio, int err) | 388 | static void scrub_bio_end_io(struct bio *bio, int err) |
@@ -514,44 +408,24 @@ static void scrub_checksum(struct btrfs_work *work) | |||
514 | int ret; | 408 | int ret; |
515 | 409 | ||
516 | if (sbio->err) { | 410 | if (sbio->err) { |
517 | struct bio *bio; | ||
518 | struct bio *old_bio; | ||
519 | |||
520 | for (i = 0; i < sbio->count; ++i) | 411 | for (i = 0; i < sbio->count; ++i) |
521 | scrub_recheck_error(sbio, i); | 412 | scrub_recheck_error(sbio, i); |
413 | |||
414 | sbio->bio->bi_flags &= ~(BIO_POOL_MASK - 1); | ||
415 | sbio->bio->bi_flags |= 1 << BIO_UPTODATE; | ||
416 | sbio->bio->bi_phys_segments = 0; | ||
417 | sbio->bio->bi_idx = 0; | ||
418 | |||
419 | for (i = 0; i < sbio->count; i++) { | ||
420 | struct bio_vec *bi; | ||
421 | bi = &sbio->bio->bi_io_vec[i]; | ||
422 | bi->bv_offset = 0; | ||
423 | bi->bv_len = PAGE_SIZE; | ||
424 | } | ||
425 | |||
522 | spin_lock(&sdev->stat_lock); | 426 | spin_lock(&sdev->stat_lock); |
523 | ++sdev->stat.read_errors; | 427 | ++sdev->stat.read_errors; |
524 | spin_unlock(&sdev->stat_lock); | 428 | spin_unlock(&sdev->stat_lock); |
525 | |||
526 | /* | ||
527 | * FIXME: allocate a new bio after a media error. I haven't | ||
528 | * figured out how to reuse this one | ||
529 | */ | ||
530 | old_bio = sbio->bio; | ||
531 | bio = bio_kmalloc(GFP_NOFS, SCRUB_PAGES_PER_BIO); | ||
532 | if (!bio) { | ||
533 | /* | ||
534 | * alloc failed. cancel the scrub and don't requeue | ||
535 | * this sbio | ||
536 | */ | ||
537 | printk(KERN_ERR "btrfs scrub: allocation failure, " | ||
538 | "cancelling scrub\n"); | ||
539 | atomic_inc(&sdev->dev->dev_root->fs_info-> | ||
540 | scrub_cancel_req); | ||
541 | goto out_no_enqueue; | ||
542 | } | ||
543 | sbio->bio = bio; | ||
544 | bio->bi_private = sbio; | ||
545 | bio->bi_end_io = scrub_bio_end_io; | ||
546 | bio->bi_sector = 0; | ||
547 | bio->bi_bdev = sbio->sdev->dev->bdev; | ||
548 | bio->bi_size = 0; | ||
549 | for (i = 0; i < SCRUB_PAGES_PER_BIO; ++i) { | ||
550 | struct page *page; | ||
551 | page = old_bio->bi_io_vec[i].bv_page; | ||
552 | bio_add_page(bio, page, PAGE_SIZE, 0); | ||
553 | } | ||
554 | bio_put(old_bio); | ||
555 | goto out; | 429 | goto out; |
556 | } | 430 | } |
557 | for (i = 0; i < sbio->count; ++i) { | 431 | for (i = 0; i < sbio->count; ++i) { |
@@ -581,7 +455,6 @@ out: | |||
581 | sbio->next_free = sdev->first_free; | 455 | sbio->next_free = sdev->first_free; |
582 | sdev->first_free = sbio->index; | 456 | sdev->first_free = sbio->index; |
583 | spin_unlock(&sdev->list_lock); | 457 | spin_unlock(&sdev->list_lock); |
584 | out_no_enqueue: | ||
585 | atomic_dec(&sdev->in_flight); | 458 | atomic_dec(&sdev->in_flight); |
586 | wake_up(&sdev->list_wait); | 459 | wake_up(&sdev->list_wait); |
587 | } | 460 | } |