aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/scrub.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/scrub.c')
-rw-r--r--fs/btrfs/scrub.c476
1 files changed, 433 insertions, 43 deletions
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 5bc4ec827b3d..94cd3a19e9c8 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -17,10 +17,14 @@
17 */ 17 */
18 18
19#include <linux/blkdev.h> 19#include <linux/blkdev.h>
20#include <linux/ratelimit.h>
20#include "ctree.h" 21#include "ctree.h"
21#include "volumes.h" 22#include "volumes.h"
22#include "disk-io.h" 23#include "disk-io.h"
23#include "ordered-data.h" 24#include "ordered-data.h"
25#include "transaction.h"
26#include "backref.h"
27#include "extent_io.h"
24 28
25/* 29/*
26 * This is only the first step towards a full-features scrub. It reads all 30 * This is only the first step towards a full-features scrub. It reads all
@@ -60,7 +64,7 @@ static void scrub_fixup(struct scrub_bio *sbio, int ix);
60struct scrub_page { 64struct scrub_page {
61 u64 flags; /* extent flags */ 65 u64 flags; /* extent flags */
62 u64 generation; 66 u64 generation;
63 u64 mirror_num; 67 int mirror_num;
64 int have_csum; 68 int have_csum;
65 u8 csum[BTRFS_CSUM_SIZE]; 69 u8 csum[BTRFS_CSUM_SIZE];
66}; 70};
@@ -84,6 +88,7 @@ struct scrub_dev {
84 int first_free; 88 int first_free;
85 int curr; 89 int curr;
86 atomic_t in_flight; 90 atomic_t in_flight;
91 atomic_t fixup_cnt;
87 spinlock_t list_lock; 92 spinlock_t list_lock;
88 wait_queue_head_t list_wait; 93 wait_queue_head_t list_wait;
89 u16 csum_size; 94 u16 csum_size;
@@ -97,6 +102,27 @@ struct scrub_dev {
97 spinlock_t stat_lock; 102 spinlock_t stat_lock;
98}; 103};
99 104
105struct scrub_fixup_nodatasum {
106 struct scrub_dev *sdev;
107 u64 logical;
108 struct btrfs_root *root;
109 struct btrfs_work work;
110 int mirror_num;
111};
112
113struct scrub_warning {
114 struct btrfs_path *path;
115 u64 extent_item_size;
116 char *scratch_buf;
117 char *msg_buf;
118 const char *errstr;
119 sector_t sector;
120 u64 logical;
121 struct btrfs_device *dev;
122 int msg_bufsize;
123 int scratch_bufsize;
124};
125
100static void scrub_free_csums(struct scrub_dev *sdev) 126static void scrub_free_csums(struct scrub_dev *sdev)
101{ 127{
102 while (!list_empty(&sdev->csum_list)) { 128 while (!list_empty(&sdev->csum_list)) {
@@ -172,12 +198,13 @@ struct scrub_dev *scrub_setup_dev(struct btrfs_device *dev)
172 198
173 if (i != SCRUB_BIOS_PER_DEV-1) 199 if (i != SCRUB_BIOS_PER_DEV-1)
174 sdev->bios[i]->next_free = i + 1; 200 sdev->bios[i]->next_free = i + 1;
175 else 201 else
176 sdev->bios[i]->next_free = -1; 202 sdev->bios[i]->next_free = -1;
177 } 203 }
178 sdev->first_free = 0; 204 sdev->first_free = 0;
179 sdev->curr = -1; 205 sdev->curr = -1;
180 atomic_set(&sdev->in_flight, 0); 206 atomic_set(&sdev->in_flight, 0);
207 atomic_set(&sdev->fixup_cnt, 0);
181 atomic_set(&sdev->cancel_req, 0); 208 atomic_set(&sdev->cancel_req, 0);
182 sdev->csum_size = btrfs_super_csum_size(fs_info->super_copy); 209 sdev->csum_size = btrfs_super_csum_size(fs_info->super_copy);
183 INIT_LIST_HEAD(&sdev->csum_list); 210 INIT_LIST_HEAD(&sdev->csum_list);
@@ -192,24 +219,361 @@ nomem:
192 return ERR_PTR(-ENOMEM); 219 return ERR_PTR(-ENOMEM);
193} 220}
194 221
222static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root, void *ctx)
223{
224 u64 isize;
225 u32 nlink;
226 int ret;
227 int i;
228 struct extent_buffer *eb;
229 struct btrfs_inode_item *inode_item;
230 struct scrub_warning *swarn = ctx;
231 struct btrfs_fs_info *fs_info = swarn->dev->dev_root->fs_info;
232 struct inode_fs_paths *ipath = NULL;
233 struct btrfs_root *local_root;
234 struct btrfs_key root_key;
235
236 root_key.objectid = root;
237 root_key.type = BTRFS_ROOT_ITEM_KEY;
238 root_key.offset = (u64)-1;
239 local_root = btrfs_read_fs_root_no_name(fs_info, &root_key);
240 if (IS_ERR(local_root)) {
241 ret = PTR_ERR(local_root);
242 goto err;
243 }
244
245 ret = inode_item_info(inum, 0, local_root, swarn->path);
246 if (ret) {
247 btrfs_release_path(swarn->path);
248 goto err;
249 }
250
251 eb = swarn->path->nodes[0];
252 inode_item = btrfs_item_ptr(eb, swarn->path->slots[0],
253 struct btrfs_inode_item);
254 isize = btrfs_inode_size(eb, inode_item);
255 nlink = btrfs_inode_nlink(eb, inode_item);
256 btrfs_release_path(swarn->path);
257
258 ipath = init_ipath(4096, local_root, swarn->path);
259 ret = paths_from_inode(inum, ipath);
260
261 if (ret < 0)
262 goto err;
263
264 /*
265 * we deliberately ignore the bit ipath might have been too small to
266 * hold all of the paths here
267 */
268 for (i = 0; i < ipath->fspath->elem_cnt; ++i)
269 printk(KERN_WARNING "btrfs: %s at logical %llu on dev "
270 "%s, sector %llu, root %llu, inode %llu, offset %llu, "
271 "length %llu, links %u (path: %s)\n", swarn->errstr,
272 swarn->logical, swarn->dev->name,
273 (unsigned long long)swarn->sector, root, inum, offset,
274 min(isize - offset, (u64)PAGE_SIZE), nlink,
275 ipath->fspath->str[i]);
276
277 free_ipath(ipath);
278 return 0;
279
280err:
281 printk(KERN_WARNING "btrfs: %s at logical %llu on dev "
282 "%s, sector %llu, root %llu, inode %llu, offset %llu: path "
283 "resolving failed with ret=%d\n", swarn->errstr,
284 swarn->logical, swarn->dev->name,
285 (unsigned long long)swarn->sector, root, inum, offset, ret);
286
287 free_ipath(ipath);
288 return 0;
289}
290
291static void scrub_print_warning(const char *errstr, struct scrub_bio *sbio,
292 int ix)
293{
294 struct btrfs_device *dev = sbio->sdev->dev;
295 struct btrfs_fs_info *fs_info = dev->dev_root->fs_info;
296 struct btrfs_path *path;
297 struct btrfs_key found_key;
298 struct extent_buffer *eb;
299 struct btrfs_extent_item *ei;
300 struct scrub_warning swarn;
301 u32 item_size;
302 int ret;
303 u64 ref_root;
304 u8 ref_level;
305 unsigned long ptr = 0;
306 const int bufsize = 4096;
307 u64 extent_offset;
308
309 path = btrfs_alloc_path();
310
311 swarn.scratch_buf = kmalloc(bufsize, GFP_NOFS);
312 swarn.msg_buf = kmalloc(bufsize, GFP_NOFS);
313 swarn.sector = (sbio->physical + ix * PAGE_SIZE) >> 9;
314 swarn.logical = sbio->logical + ix * PAGE_SIZE;
315 swarn.errstr = errstr;
316 swarn.dev = dev;
317 swarn.msg_bufsize = bufsize;
318 swarn.scratch_bufsize = bufsize;
319
320 if (!path || !swarn.scratch_buf || !swarn.msg_buf)
321 goto out;
322
323 ret = extent_from_logical(fs_info, swarn.logical, path, &found_key);
324 if (ret < 0)
325 goto out;
326
327 extent_offset = swarn.logical - found_key.objectid;
328 swarn.extent_item_size = found_key.offset;
329
330 eb = path->nodes[0];
331 ei = btrfs_item_ptr(eb, path->slots[0], struct btrfs_extent_item);
332 item_size = btrfs_item_size_nr(eb, path->slots[0]);
333
334 if (ret & BTRFS_EXTENT_FLAG_TREE_BLOCK) {
335 do {
336 ret = tree_backref_for_extent(&ptr, eb, ei, item_size,
337 &ref_root, &ref_level);
338 printk(KERN_WARNING "%s at logical %llu on dev %s, "
339 "sector %llu: metadata %s (level %d) in tree "
340 "%llu\n", errstr, swarn.logical, dev->name,
341 (unsigned long long)swarn.sector,
342 ref_level ? "node" : "leaf",
343 ret < 0 ? -1 : ref_level,
344 ret < 0 ? -1 : ref_root);
345 } while (ret != 1);
346 } else {
347 swarn.path = path;
348 iterate_extent_inodes(fs_info, path, found_key.objectid,
349 extent_offset,
350 scrub_print_warning_inode, &swarn);
351 }
352
353out:
354 btrfs_free_path(path);
355 kfree(swarn.scratch_buf);
356 kfree(swarn.msg_buf);
357}
358
359static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *ctx)
360{
361 struct page *page = NULL;
362 unsigned long index;
363 struct scrub_fixup_nodatasum *fixup = ctx;
364 int ret;
365 int corrected = 0;
366 struct btrfs_key key;
367 struct inode *inode = NULL;
368 u64 end = offset + PAGE_SIZE - 1;
369 struct btrfs_root *local_root;
370
371 key.objectid = root;
372 key.type = BTRFS_ROOT_ITEM_KEY;
373 key.offset = (u64)-1;
374 local_root = btrfs_read_fs_root_no_name(fixup->root->fs_info, &key);
375 if (IS_ERR(local_root))
376 return PTR_ERR(local_root);
377
378 key.type = BTRFS_INODE_ITEM_KEY;
379 key.objectid = inum;
380 key.offset = 0;
381 inode = btrfs_iget(fixup->root->fs_info->sb, &key, local_root, NULL);
382 if (IS_ERR(inode))
383 return PTR_ERR(inode);
384
385 index = offset >> PAGE_CACHE_SHIFT;
386
387 page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
388 if (!page) {
389 ret = -ENOMEM;
390 goto out;
391 }
392
393 if (PageUptodate(page)) {
394 struct btrfs_mapping_tree *map_tree;
395 if (PageDirty(page)) {
396 /*
397 * we need to write the data to the defect sector. the
398 * data that was in that sector is not in memory,
399 * because the page was modified. we must not write the
400 * modified page to that sector.
401 *
402 * TODO: what could be done here: wait for the delalloc
403 * runner to write out that page (might involve
404 * COW) and see whether the sector is still
405 * referenced afterwards.
406 *
407 * For the meantime, we'll treat this error
408 * incorrectable, although there is a chance that a
409 * later scrub will find the bad sector again and that
410 * there's no dirty page in memory, then.
411 */
412 ret = -EIO;
413 goto out;
414 }
415 map_tree = &BTRFS_I(inode)->root->fs_info->mapping_tree;
416 ret = repair_io_failure(map_tree, offset, PAGE_SIZE,
417 fixup->logical, page,
418 fixup->mirror_num);
419 unlock_page(page);
420 corrected = !ret;
421 } else {
422 /*
423 * we need to get good data first. the general readpage path
424 * will call repair_io_failure for us, we just have to make
425 * sure we read the bad mirror.
426 */
427 ret = set_extent_bits(&BTRFS_I(inode)->io_tree, offset, end,
428 EXTENT_DAMAGED, GFP_NOFS);
429 if (ret) {
430 /* set_extent_bits should give proper error */
431 WARN_ON(ret > 0);
432 if (ret > 0)
433 ret = -EFAULT;
434 goto out;
435 }
436
437 ret = extent_read_full_page(&BTRFS_I(inode)->io_tree, page,
438 btrfs_get_extent,
439 fixup->mirror_num);
440 wait_on_page_locked(page);
441
442 corrected = !test_range_bit(&BTRFS_I(inode)->io_tree, offset,
443 end, EXTENT_DAMAGED, 0, NULL);
444 if (!corrected)
445 clear_extent_bits(&BTRFS_I(inode)->io_tree, offset, end,
446 EXTENT_DAMAGED, GFP_NOFS);
447 }
448
449out:
450 if (page)
451 put_page(page);
452 if (inode)
453 iput(inode);
454
455 if (ret < 0)
456 return ret;
457
458 if (ret == 0 && corrected) {
459 /*
460 * we only need to call readpage for one of the inodes belonging
461 * to this extent. so make iterate_extent_inodes stop
462 */
463 return 1;
464 }
465
466 return -EIO;
467}
468
469static void scrub_fixup_nodatasum(struct btrfs_work *work)
470{
471 int ret;
472 struct scrub_fixup_nodatasum *fixup;
473 struct scrub_dev *sdev;
474 struct btrfs_trans_handle *trans = NULL;
475 struct btrfs_fs_info *fs_info;
476 struct btrfs_path *path;
477 int uncorrectable = 0;
478
479 fixup = container_of(work, struct scrub_fixup_nodatasum, work);
480 sdev = fixup->sdev;
481 fs_info = fixup->root->fs_info;
482
483 path = btrfs_alloc_path();
484 if (!path) {
485 spin_lock(&sdev->stat_lock);
486 ++sdev->stat.malloc_errors;
487 spin_unlock(&sdev->stat_lock);
488 uncorrectable = 1;
489 goto out;
490 }
491
492 trans = btrfs_join_transaction(fixup->root);
493 if (IS_ERR(trans)) {
494 uncorrectable = 1;
495 goto out;
496 }
497
498 /*
499 * the idea is to trigger a regular read through the standard path. we
500 * read a page from the (failed) logical address by specifying the
501 * corresponding copynum of the failed sector. thus, that readpage is
502 * expected to fail.
503 * that is the point where on-the-fly error correction will kick in
504 * (once it's finished) and rewrite the failed sector if a good copy
505 * can be found.
506 */
507 ret = iterate_inodes_from_logical(fixup->logical, fixup->root->fs_info,
508 path, scrub_fixup_readpage,
509 fixup);
510 if (ret < 0) {
511 uncorrectable = 1;
512 goto out;
513 }
514 WARN_ON(ret != 1);
515
516 spin_lock(&sdev->stat_lock);
517 ++sdev->stat.corrected_errors;
518 spin_unlock(&sdev->stat_lock);
519
520out:
521 if (trans && !IS_ERR(trans))
522 btrfs_end_transaction(trans, fixup->root);
523 if (uncorrectable) {
524 spin_lock(&sdev->stat_lock);
525 ++sdev->stat.uncorrectable_errors;
526 spin_unlock(&sdev->stat_lock);
527 printk_ratelimited(KERN_ERR "btrfs: unable to fixup "
528 "(nodatasum) error at logical %llu\n",
529 fixup->logical);
530 }
531
532 btrfs_free_path(path);
533 kfree(fixup);
534
535 /* see caller why we're pretending to be paused in the scrub counters */
536 mutex_lock(&fs_info->scrub_lock);
537 atomic_dec(&fs_info->scrubs_running);
538 atomic_dec(&fs_info->scrubs_paused);
539 mutex_unlock(&fs_info->scrub_lock);
540 atomic_dec(&sdev->fixup_cnt);
541 wake_up(&fs_info->scrub_pause_wait);
542 wake_up(&sdev->list_wait);
543}
544
195/* 545/*
196 * scrub_recheck_error gets called when either verification of the page 546 * scrub_recheck_error gets called when either verification of the page
197 * failed or the bio failed to read, e.g. with EIO. In the latter case, 547 * failed or the bio failed to read, e.g. with EIO. In the latter case,
198 * recheck_error gets called for every page in the bio, even though only 548 * recheck_error gets called for every page in the bio, even though only
199 * one may be bad 549 * one may be bad
200 */ 550 */
201static void scrub_recheck_error(struct scrub_bio *sbio, int ix) 551static int scrub_recheck_error(struct scrub_bio *sbio, int ix)
202{ 552{
553 struct scrub_dev *sdev = sbio->sdev;
554 u64 sector = (sbio->physical + ix * PAGE_SIZE) >> 9;
555 static DEFINE_RATELIMIT_STATE(_rs, DEFAULT_RATELIMIT_INTERVAL,
556 DEFAULT_RATELIMIT_BURST);
557
203 if (sbio->err) { 558 if (sbio->err) {
204 if (scrub_fixup_io(READ, sbio->sdev->dev->bdev, 559 if (scrub_fixup_io(READ, sbio->sdev->dev->bdev, sector,
205 (sbio->physical + ix * PAGE_SIZE) >> 9,
206 sbio->bio->bi_io_vec[ix].bv_page) == 0) { 560 sbio->bio->bi_io_vec[ix].bv_page) == 0) {
207 if (scrub_fixup_check(sbio, ix) == 0) 561 if (scrub_fixup_check(sbio, ix) == 0)
208 return; 562 return 0;
209 } 563 }
564 if (__ratelimit(&_rs))
565 scrub_print_warning("i/o error", sbio, ix);
566 } else {
567 if (__ratelimit(&_rs))
568 scrub_print_warning("checksum error", sbio, ix);
210 } 569 }
211 570
571 spin_lock(&sdev->stat_lock);
572 ++sdev->stat.read_errors;
573 spin_unlock(&sdev->stat_lock);
574
212 scrub_fixup(sbio, ix); 575 scrub_fixup(sbio, ix);
576 return 1;
213} 577}
214 578
215static int scrub_fixup_check(struct scrub_bio *sbio, int ix) 579static int scrub_fixup_check(struct scrub_bio *sbio, int ix)
@@ -247,7 +611,8 @@ static void scrub_fixup(struct scrub_bio *sbio, int ix)
247 struct scrub_dev *sdev = sbio->sdev; 611 struct scrub_dev *sdev = sbio->sdev;
248 struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info; 612 struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info;
249 struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; 613 struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree;
250 struct btrfs_multi_bio *multi = NULL; 614 struct btrfs_bio *bbio = NULL;
615 struct scrub_fixup_nodatasum *fixup;
251 u64 logical = sbio->logical + ix * PAGE_SIZE; 616 u64 logical = sbio->logical + ix * PAGE_SIZE;
252 u64 length; 617 u64 length;
253 int i; 618 int i;
@@ -256,18 +621,36 @@ static void scrub_fixup(struct scrub_bio *sbio, int ix)
256 621
257 if ((sbio->spag[ix].flags & BTRFS_EXTENT_FLAG_DATA) && 622 if ((sbio->spag[ix].flags & BTRFS_EXTENT_FLAG_DATA) &&
258 (sbio->spag[ix].have_csum == 0)) { 623 (sbio->spag[ix].have_csum == 0)) {
624 fixup = kzalloc(sizeof(*fixup), GFP_NOFS);
625 if (!fixup)
626 goto uncorrectable;
627 fixup->sdev = sdev;
628 fixup->logical = logical;
629 fixup->root = fs_info->extent_root;
630 fixup->mirror_num = sbio->spag[ix].mirror_num;
259 /* 631 /*
260 * nodatasum, don't try to fix anything 632 * increment scrubs_running to prevent cancel requests from
261 * FIXME: we can do better, open the inode and trigger a 633 * completing as long as a fixup worker is running. we must also
262 * writeback 634 * increment scrubs_paused to prevent deadlocking on pause
635 * requests used for transactions commits (as the worker uses a
636 * transaction context). it is safe to regard the fixup worker
637 * as paused for all matters practical. effectively, we only
638 * avoid cancellation requests from completing.
263 */ 639 */
264 goto uncorrectable; 640 mutex_lock(&fs_info->scrub_lock);
641 atomic_inc(&fs_info->scrubs_running);
642 atomic_inc(&fs_info->scrubs_paused);
643 mutex_unlock(&fs_info->scrub_lock);
644 atomic_inc(&sdev->fixup_cnt);
645 fixup->work.func = scrub_fixup_nodatasum;
646 btrfs_queue_worker(&fs_info->scrub_workers, &fixup->work);
647 return;
265 } 648 }
266 649
267 length = PAGE_SIZE; 650 length = PAGE_SIZE;
268 ret = btrfs_map_block(map_tree, REQ_WRITE, logical, &length, 651 ret = btrfs_map_block(map_tree, REQ_WRITE, logical, &length,
269 &multi, 0); 652 &bbio, 0);
270 if (ret || !multi || length < PAGE_SIZE) { 653 if (ret || !bbio || length < PAGE_SIZE) {
271 printk(KERN_ERR 654 printk(KERN_ERR
272 "scrub_fixup: btrfs_map_block failed us for %llu\n", 655 "scrub_fixup: btrfs_map_block failed us for %llu\n",
273 (unsigned long long)logical); 656 (unsigned long long)logical);
@@ -275,19 +658,19 @@ static void scrub_fixup(struct scrub_bio *sbio, int ix)
275 return; 658 return;
276 } 659 }
277 660
278 if (multi->num_stripes == 1) 661 if (bbio->num_stripes == 1)
279 /* there aren't any replicas */ 662 /* there aren't any replicas */
280 goto uncorrectable; 663 goto uncorrectable;
281 664
282 /* 665 /*
283 * first find a good copy 666 * first find a good copy
284 */ 667 */
285 for (i = 0; i < multi->num_stripes; ++i) { 668 for (i = 0; i < bbio->num_stripes; ++i) {
286 if (i == sbio->spag[ix].mirror_num) 669 if (i + 1 == sbio->spag[ix].mirror_num)
287 continue; 670 continue;
288 671
289 if (scrub_fixup_io(READ, multi->stripes[i].dev->bdev, 672 if (scrub_fixup_io(READ, bbio->stripes[i].dev->bdev,
290 multi->stripes[i].physical >> 9, 673 bbio->stripes[i].physical >> 9,
291 sbio->bio->bi_io_vec[ix].bv_page)) { 674 sbio->bio->bi_io_vec[ix].bv_page)) {
292 /* I/O-error, this is not a good copy */ 675 /* I/O-error, this is not a good copy */
293 continue; 676 continue;
@@ -296,7 +679,7 @@ static void scrub_fixup(struct scrub_bio *sbio, int ix)
296 if (scrub_fixup_check(sbio, ix) == 0) 679 if (scrub_fixup_check(sbio, ix) == 0)
297 break; 680 break;
298 } 681 }
299 if (i == multi->num_stripes) 682 if (i == bbio->num_stripes)
300 goto uncorrectable; 683 goto uncorrectable;
301 684
302 if (!sdev->readonly) { 685 if (!sdev->readonly) {
@@ -311,25 +694,23 @@ static void scrub_fixup(struct scrub_bio *sbio, int ix)
311 } 694 }
312 } 695 }
313 696
314 kfree(multi); 697 kfree(bbio);
315 spin_lock(&sdev->stat_lock); 698 spin_lock(&sdev->stat_lock);
316 ++sdev->stat.corrected_errors; 699 ++sdev->stat.corrected_errors;
317 spin_unlock(&sdev->stat_lock); 700 spin_unlock(&sdev->stat_lock);
318 701
319 if (printk_ratelimit()) 702 printk_ratelimited(KERN_ERR "btrfs: fixed up error at logical %llu\n",
320 printk(KERN_ERR "btrfs: fixed up at %llu\n", 703 (unsigned long long)logical);
321 (unsigned long long)logical);
322 return; 704 return;
323 705
324uncorrectable: 706uncorrectable:
325 kfree(multi); 707 kfree(bbio);
326 spin_lock(&sdev->stat_lock); 708 spin_lock(&sdev->stat_lock);
327 ++sdev->stat.uncorrectable_errors; 709 ++sdev->stat.uncorrectable_errors;
328 spin_unlock(&sdev->stat_lock); 710 spin_unlock(&sdev->stat_lock);
329 711
330 if (printk_ratelimit()) 712 printk_ratelimited(KERN_ERR "btrfs: unable to fixup (regular) error at "
331 printk(KERN_ERR "btrfs: unable to fixup at %llu\n", 713 "logical %llu\n", (unsigned long long)logical);
332 (unsigned long long)logical);
333} 714}
334 715
335static int scrub_fixup_io(int rw, struct block_device *bdev, sector_t sector, 716static int scrub_fixup_io(int rw, struct block_device *bdev, sector_t sector,
@@ -379,8 +760,14 @@ static void scrub_checksum(struct btrfs_work *work)
379 int ret; 760 int ret;
380 761
381 if (sbio->err) { 762 if (sbio->err) {
763 ret = 0;
382 for (i = 0; i < sbio->count; ++i) 764 for (i = 0; i < sbio->count; ++i)
383 scrub_recheck_error(sbio, i); 765 ret |= scrub_recheck_error(sbio, i);
766 if (!ret) {
767 spin_lock(&sdev->stat_lock);
768 ++sdev->stat.unverified_errors;
769 spin_unlock(&sdev->stat_lock);
770 }
384 771
385 sbio->bio->bi_flags &= ~(BIO_POOL_MASK - 1); 772 sbio->bio->bi_flags &= ~(BIO_POOL_MASK - 1);
386 sbio->bio->bi_flags |= 1 << BIO_UPTODATE; 773 sbio->bio->bi_flags |= 1 << BIO_UPTODATE;
@@ -393,10 +780,6 @@ static void scrub_checksum(struct btrfs_work *work)
393 bi->bv_offset = 0; 780 bi->bv_offset = 0;
394 bi->bv_len = PAGE_SIZE; 781 bi->bv_len = PAGE_SIZE;
395 } 782 }
396
397 spin_lock(&sdev->stat_lock);
398 ++sdev->stat.read_errors;
399 spin_unlock(&sdev->stat_lock);
400 goto out; 783 goto out;
401 } 784 }
402 for (i = 0; i < sbio->count; ++i) { 785 for (i = 0; i < sbio->count; ++i) {
@@ -417,8 +800,14 @@ static void scrub_checksum(struct btrfs_work *work)
417 WARN_ON(1); 800 WARN_ON(1);
418 } 801 }
419 kunmap_atomic(buffer, KM_USER0); 802 kunmap_atomic(buffer, KM_USER0);
420 if (ret) 803 if (ret) {
421 scrub_recheck_error(sbio, i); 804 ret = scrub_recheck_error(sbio, i);
805 if (!ret) {
806 spin_lock(&sdev->stat_lock);
807 ++sdev->stat.unverified_errors;
808 spin_unlock(&sdev->stat_lock);
809 }
810 }
422 } 811 }
423 812
424out: 813out:
@@ -601,7 +990,7 @@ nomem:
601} 990}
602 991
603static int scrub_page(struct scrub_dev *sdev, u64 logical, u64 len, 992static int scrub_page(struct scrub_dev *sdev, u64 logical, u64 len,
604 u64 physical, u64 flags, u64 gen, u64 mirror_num, 993 u64 physical, u64 flags, u64 gen, int mirror_num,
605 u8 *csum, int force) 994 u8 *csum, int force)
606{ 995{
607 struct scrub_bio *sbio; 996 struct scrub_bio *sbio;
@@ -698,7 +1087,7 @@ static int scrub_find_csum(struct scrub_dev *sdev, u64 logical, u64 len,
698 1087
699/* scrub extent tries to collect up to 64 kB for each bio */ 1088/* scrub extent tries to collect up to 64 kB for each bio */
700static int scrub_extent(struct scrub_dev *sdev, u64 logical, u64 len, 1089static int scrub_extent(struct scrub_dev *sdev, u64 logical, u64 len,
701 u64 physical, u64 flags, u64 gen, u64 mirror_num) 1090 u64 physical, u64 flags, u64 gen, int mirror_num)
702{ 1091{
703 int ret; 1092 int ret;
704 u8 csum[BTRFS_CSUM_SIZE]; 1093 u8 csum[BTRFS_CSUM_SIZE];
@@ -743,7 +1132,7 @@ static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev,
743 u64 physical; 1132 u64 physical;
744 u64 logical; 1133 u64 logical;
745 u64 generation; 1134 u64 generation;
746 u64 mirror_num; 1135 int mirror_num;
747 struct reada_control *reada1; 1136 struct reada_control *reada1;
748 struct reada_control *reada2; 1137 struct reada_control *reada2;
749 struct btrfs_key key_start; 1138 struct btrfs_key key_start;
@@ -758,21 +1147,21 @@ static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev,
758 if (map->type & BTRFS_BLOCK_GROUP_RAID0) { 1147 if (map->type & BTRFS_BLOCK_GROUP_RAID0) {
759 offset = map->stripe_len * num; 1148 offset = map->stripe_len * num;
760 increment = map->stripe_len * map->num_stripes; 1149 increment = map->stripe_len * map->num_stripes;
761 mirror_num = 0; 1150 mirror_num = 1;
762 } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) { 1151 } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) {
763 int factor = map->num_stripes / map->sub_stripes; 1152 int factor = map->num_stripes / map->sub_stripes;
764 offset = map->stripe_len * (num / map->sub_stripes); 1153 offset = map->stripe_len * (num / map->sub_stripes);
765 increment = map->stripe_len * factor; 1154 increment = map->stripe_len * factor;
766 mirror_num = num % map->sub_stripes; 1155 mirror_num = num % map->sub_stripes + 1;
767 } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) { 1156 } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) {
768 increment = map->stripe_len; 1157 increment = map->stripe_len;
769 mirror_num = num % map->num_stripes; 1158 mirror_num = num % map->num_stripes + 1;
770 } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { 1159 } else if (map->type & BTRFS_BLOCK_GROUP_DUP) {
771 increment = map->stripe_len; 1160 increment = map->stripe_len;
772 mirror_num = num % map->num_stripes; 1161 mirror_num = num % map->num_stripes + 1;
773 } else { 1162 } else {
774 increment = map->stripe_len; 1163 increment = map->stripe_len;
775 mirror_num = 0; 1164 mirror_num = 1;
776 } 1165 }
777 1166
778 path = btrfs_alloc_path(); 1167 path = btrfs_alloc_path();
@@ -1241,10 +1630,11 @@ int btrfs_scrub_dev(struct btrfs_root *root, u64 devid, u64 start, u64 end,
1241 ret = scrub_enumerate_chunks(sdev, start, end); 1630 ret = scrub_enumerate_chunks(sdev, start, end);
1242 1631
1243 wait_event(sdev->list_wait, atomic_read(&sdev->in_flight) == 0); 1632 wait_event(sdev->list_wait, atomic_read(&sdev->in_flight) == 0);
1244
1245 atomic_dec(&fs_info->scrubs_running); 1633 atomic_dec(&fs_info->scrubs_running);
1246 wake_up(&fs_info->scrub_pause_wait); 1634 wake_up(&fs_info->scrub_pause_wait);
1247 1635
1636 wait_event(sdev->list_wait, atomic_read(&sdev->fixup_cnt) == 0);
1637
1248 if (progress) 1638 if (progress)
1249 memcpy(progress, &sdev->stat, sizeof(*progress)); 1639 memcpy(progress, &sdev->stat, sizeof(*progress));
1250 1640