aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/scrub.c
diff options
context:
space:
mode:
authorJan Schmidt <list.btrfs@jan-o-sch.net>2011-06-13 14:04:15 -0400
committerJan Schmidt <list.btrfs@jan-o-sch.net>2011-09-29 06:54:28 -0400
commit0ef8e45158f97dde4801b535e25f70f7caf01a27 (patch)
treed1c29055e475402613d529738225df79d8789d20 /fs/btrfs/scrub.c
parente12fa9cd390f8e93a9144bd99bd6f6ed316fbc1e (diff)
btrfs scrub: add fixup code for errors on nodatasum files
This removes a FIXME comment and introduces the first part of nodatasum fixup: It gets the corresponding inode for a logical address and triggers a regular readpage for the corrupted sector. Once we have on-the-fly error correction our error will be automatically corrected. The correction code is expected to clear the newly introduced EXTENT_DAMAGED flag, making scrub report that error as "corrected" instead of "uncorrectable" eventually. Signed-off-by: Jan Schmidt <list.btrfs@jan-o-sch.net>
Diffstat (limited to 'fs/btrfs/scrub.c')
-rw-r--r--fs/btrfs/scrub.c188
1 files changed, 182 insertions, 6 deletions
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 41a01147b959..db09f01c0e4f 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -22,6 +22,7 @@
22#include "volumes.h" 22#include "volumes.h"
23#include "disk-io.h" 23#include "disk-io.h"
24#include "ordered-data.h" 24#include "ordered-data.h"
25#include "transaction.h"
25#include "backref.h" 26#include "backref.h"
26 27
27/* 28/*
@@ -89,6 +90,7 @@ struct scrub_dev {
89 int first_free; 90 int first_free;
90 int curr; 91 int curr;
91 atomic_t in_flight; 92 atomic_t in_flight;
93 atomic_t fixup_cnt;
92 spinlock_t list_lock; 94 spinlock_t list_lock;
93 wait_queue_head_t list_wait; 95 wait_queue_head_t list_wait;
94 u16 csum_size; 96 u16 csum_size;
@@ -102,6 +104,14 @@ struct scrub_dev {
102 spinlock_t stat_lock; 104 spinlock_t stat_lock;
103}; 105};
104 106
107struct scrub_fixup_nodatasum {
108 struct scrub_dev *sdev;
109 u64 logical;
110 struct btrfs_root *root;
111 struct btrfs_work work;
112 int mirror_num;
113};
114
105struct scrub_warning { 115struct scrub_warning {
106 struct btrfs_path *path; 116 struct btrfs_path *path;
107 u64 extent_item_size; 117 u64 extent_item_size;
@@ -190,12 +200,13 @@ struct scrub_dev *scrub_setup_dev(struct btrfs_device *dev)
190 200
191 if (i != SCRUB_BIOS_PER_DEV-1) 201 if (i != SCRUB_BIOS_PER_DEV-1)
192 sdev->bios[i]->next_free = i + 1; 202 sdev->bios[i]->next_free = i + 1;
193 else 203 else
194 sdev->bios[i]->next_free = -1; 204 sdev->bios[i]->next_free = -1;
195 } 205 }
196 sdev->first_free = 0; 206 sdev->first_free = 0;
197 sdev->curr = -1; 207 sdev->curr = -1;
198 atomic_set(&sdev->in_flight, 0); 208 atomic_set(&sdev->in_flight, 0);
209 atomic_set(&sdev->fixup_cnt, 0);
199 atomic_set(&sdev->cancel_req, 0); 210 atomic_set(&sdev->cancel_req, 0);
200 sdev->csum_size = btrfs_super_csum_size(&fs_info->super_copy); 211 sdev->csum_size = btrfs_super_csum_size(&fs_info->super_copy);
201 INIT_LIST_HEAD(&sdev->csum_list); 212 INIT_LIST_HEAD(&sdev->csum_list);
@@ -347,6 +358,151 @@ out:
347 kfree(swarn.msg_buf); 358 kfree(swarn.msg_buf);
348} 359}
349 360
361static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *ctx)
362{
363 struct page *page;
364 unsigned long index;
365 struct scrub_fixup_nodatasum *fixup = ctx;
366 int ret;
367 int corrected;
368 struct btrfs_key key;
369 struct inode *inode;
370 u64 end = offset + PAGE_SIZE - 1;
371 struct btrfs_root *local_root;
372
373 key.objectid = root;
374 key.type = BTRFS_ROOT_ITEM_KEY;
375 key.offset = (u64)-1;
376 local_root = btrfs_read_fs_root_no_name(fixup->root->fs_info, &key);
377 if (IS_ERR(local_root))
378 return PTR_ERR(local_root);
379
380 key.type = BTRFS_INODE_ITEM_KEY;
381 key.objectid = inum;
382 key.offset = 0;
383 inode = btrfs_iget(fixup->root->fs_info->sb, &key, local_root, NULL);
384 if (IS_ERR(inode))
385 return PTR_ERR(inode);
386
387 ret = set_extent_bit(&BTRFS_I(inode)->io_tree, offset, end,
388 EXTENT_DAMAGED, 0, NULL, NULL, GFP_NOFS);
389
390 /* set_extent_bit should either succeed or give proper error */
391 WARN_ON(ret > 0);
392 if (ret)
393 return ret < 0 ? ret : -EFAULT;
394
395 index = offset >> PAGE_CACHE_SHIFT;
396
397 page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
398 if (!page)
399 return -ENOMEM;
400
401 ret = extent_read_full_page(&BTRFS_I(inode)->io_tree, page,
402 btrfs_get_extent, fixup->mirror_num);
403 wait_on_page_locked(page);
404 corrected = !test_range_bit(&BTRFS_I(inode)->io_tree, offset, end,
405 EXTENT_DAMAGED, 0, NULL);
406
407 if (corrected)
408 WARN_ON(!PageUptodate(page));
409 else
410 clear_extent_bit(&BTRFS_I(inode)->io_tree, offset, end,
411 EXTENT_DAMAGED, 0, 0, NULL, GFP_NOFS);
412
413 put_page(page);
414 iput(inode);
415
416 if (ret < 0)
417 return ret;
418
419 if (ret == 0 && corrected) {
420 /*
421 * we only need to call readpage for one of the inodes belonging
422 * to this extent. so make iterate_extent_inodes stop
423 */
424 return 1;
425 }
426
427 return -EIO;
428}
429
430static void scrub_fixup_nodatasum(struct btrfs_work *work)
431{
432 int ret;
433 struct scrub_fixup_nodatasum *fixup;
434 struct scrub_dev *sdev;
435 struct btrfs_trans_handle *trans = NULL;
436 struct btrfs_fs_info *fs_info;
437 struct btrfs_path *path;
438 int uncorrectable = 0;
439
440 fixup = container_of(work, struct scrub_fixup_nodatasum, work);
441 sdev = fixup->sdev;
442 fs_info = fixup->root->fs_info;
443
444 path = btrfs_alloc_path();
445 if (!path) {
446 spin_lock(&sdev->stat_lock);
447 ++sdev->stat.malloc_errors;
448 spin_unlock(&sdev->stat_lock);
449 uncorrectable = 1;
450 goto out;
451 }
452
453 trans = btrfs_join_transaction(fixup->root);
454 if (IS_ERR(trans)) {
455 uncorrectable = 1;
456 goto out;
457 }
458
459 /*
460 * the idea is to trigger a regular read through the standard path. we
461 * read a page from the (failed) logical address by specifying the
462 * corresponding copynum of the failed sector. thus, that readpage is
463 * expected to fail.
464 * that is the point where on-the-fly error correction will kick in
465 * (once it's finished) and rewrite the failed sector if a good copy
466 * can be found.
467 */
468 ret = iterate_inodes_from_logical(fixup->logical, fixup->root->fs_info,
469 path, scrub_fixup_readpage,
470 fixup);
471 if (ret < 0) {
472 uncorrectable = 1;
473 goto out;
474 }
475 WARN_ON(ret != 1);
476
477 spin_lock(&sdev->stat_lock);
478 ++sdev->stat.corrected_errors;
479 spin_unlock(&sdev->stat_lock);
480
481out:
482 if (trans && !IS_ERR(trans))
483 btrfs_end_transaction(trans, fixup->root);
484 if (uncorrectable) {
485 spin_lock(&sdev->stat_lock);
486 ++sdev->stat.uncorrectable_errors;
487 spin_unlock(&sdev->stat_lock);
488 printk_ratelimited(KERN_ERR "btrfs: unable to fixup "
489 "(nodatasum) error at logical %llu\n",
490 fixup->logical);
491 }
492
493 btrfs_free_path(path);
494 kfree(fixup);
495
496 /* see caller why we're pretending to be paused in the scrub counters */
497 mutex_lock(&fs_info->scrub_lock);
498 atomic_dec(&fs_info->scrubs_running);
499 atomic_dec(&fs_info->scrubs_paused);
500 mutex_unlock(&fs_info->scrub_lock);
501 atomic_dec(&sdev->fixup_cnt);
502 wake_up(&fs_info->scrub_pause_wait);
503 wake_up(&sdev->list_wait);
504}
505
350/* 506/*
351 * scrub_recheck_error gets called when either verification of the page 507 * scrub_recheck_error gets called when either verification of the page
352 * failed or the bio failed to read, e.g. with EIO. In the latter case, 508 * failed or the bio failed to read, e.g. with EIO. In the latter case,
@@ -417,6 +573,7 @@ static void scrub_fixup(struct scrub_bio *sbio, int ix)
417 struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info; 573 struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info;
418 struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; 574 struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
419 struct btrfs_multi_bio *multi = NULL; 575 struct btrfs_multi_bio *multi = NULL;
576 struct scrub_fixup_nodatasum *fixup;
420 u64 logical = sbio->logical + ix * PAGE_SIZE; 577 u64 logical = sbio->logical + ix * PAGE_SIZE;
421 u64 length; 578 u64 length;
422 int i; 579 int i;
@@ -425,12 +582,30 @@ static void scrub_fixup(struct scrub_bio *sbio, int ix)
425 582
426 if ((sbio->spag[ix].flags & BTRFS_EXTENT_FLAG_DATA) && 583 if ((sbio->spag[ix].flags & BTRFS_EXTENT_FLAG_DATA) &&
427 (sbio->spag[ix].have_csum == 0)) { 584 (sbio->spag[ix].have_csum == 0)) {
585 fixup = kzalloc(sizeof(*fixup), GFP_NOFS);
586 if (!fixup)
587 goto uncorrectable;
588 fixup->sdev = sdev;
589 fixup->logical = logical;
590 fixup->root = fs_info->extent_root;
591 fixup->mirror_num = sbio->spag[ix].mirror_num;
428 /* 592 /*
429 * nodatasum, don't try to fix anything 593 * increment scrubs_running to prevent cancel requests from
430 * FIXME: we can do better, open the inode and trigger a 594 * completing as long as a fixup worker is running. we must also
431 * writeback 595 * increment scrubs_paused to prevent deadlocking on pause
596 * requests used for transactions commits (as the worker uses a
597 * transaction context). it is safe to regard the fixup worker
598 * as paused for all matters practical. effectively, we only
599 * avoid cancellation requests from completing.
432 */ 600 */
433 goto uncorrectable; 601 mutex_lock(&fs_info->scrub_lock);
602 atomic_inc(&fs_info->scrubs_running);
603 atomic_inc(&fs_info->scrubs_paused);
604 mutex_unlock(&fs_info->scrub_lock);
605 atomic_inc(&sdev->fixup_cnt);
606 fixup->work.func = scrub_fixup_nodatasum;
607 btrfs_queue_worker(&fs_info->scrub_workers, &fixup->work);
608 return;
434 } 609 }
435 610
436 length = PAGE_SIZE; 611 length = PAGE_SIZE;
@@ -1425,10 +1600,11 @@ int btrfs_scrub_dev(struct btrfs_root *root, u64 devid, u64 start, u64 end,
1425 ret = scrub_enumerate_chunks(sdev, start, end); 1600 ret = scrub_enumerate_chunks(sdev, start, end);
1426 1601
1427 wait_event(sdev->list_wait, atomic_read(&sdev->in_flight) == 0); 1602 wait_event(sdev->list_wait, atomic_read(&sdev->in_flight) == 0);
1428
1429 atomic_dec(&fs_info->scrubs_running); 1603 atomic_dec(&fs_info->scrubs_running);
1430 wake_up(&fs_info->scrub_pause_wait); 1604 wake_up(&fs_info->scrub_pause_wait);
1431 1605
1606 wait_event(sdev->list_wait, atomic_read(&sdev->fixup_cnt) == 0);
1607
1432 if (progress) 1608 if (progress)
1433 memcpy(progress, &sdev->stat, sizeof(*progress)); 1609 memcpy(progress, &sdev->stat, sizeof(*progress));
1434 1610