diff options
Diffstat (limited to 'fs/btrfs/scrub.c')
-rw-r--r-- | fs/btrfs/scrub.c | 883 |
1 files changed, 812 insertions, 71 deletions
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 460e30bb1884..61157a26cf2a 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c | |||
@@ -25,6 +25,7 @@ | |||
25 | #include "transaction.h" | 25 | #include "transaction.h" |
26 | #include "backref.h" | 26 | #include "backref.h" |
27 | #include "extent_io.h" | 27 | #include "extent_io.h" |
28 | #include "dev-replace.h" | ||
28 | #include "check-integrity.h" | 29 | #include "check-integrity.h" |
29 | #include "rcu-string.h" | 30 | #include "rcu-string.h" |
30 | 31 | ||
@@ -44,8 +45,15 @@ | |||
44 | struct scrub_block; | 45 | struct scrub_block; |
45 | struct scrub_ctx; | 46 | struct scrub_ctx; |
46 | 47 | ||
47 | #define SCRUB_PAGES_PER_BIO 16 /* 64k per bio */ | 48 | /* |
48 | #define SCRUB_BIOS_PER_CTX 16 /* 1 MB per device in flight */ | 49 | * the following three values only influence the performance. |
50 | * The last one configures the number of parallel and outstanding I/O | ||
51 | * operations. The first two values configure an upper limit for the number | ||
52 | * of (dynamically allocated) pages that are added to a bio. | ||
53 | */ | ||
54 | #define SCRUB_PAGES_PER_RD_BIO 32 /* 128k per bio */ | ||
55 | #define SCRUB_PAGES_PER_WR_BIO 32 /* 128k per bio */ | ||
56 | #define SCRUB_BIOS_PER_SCTX 64 /* 8MB per device in flight */ | ||
49 | 57 | ||
50 | /* | 58 | /* |
51 | * the following value times PAGE_SIZE needs to be large enough to match the | 59 | * the following value times PAGE_SIZE needs to be large enough to match the |
@@ -62,6 +70,7 @@ struct scrub_page { | |||
62 | u64 generation; | 70 | u64 generation; |
63 | u64 logical; | 71 | u64 logical; |
64 | u64 physical; | 72 | u64 physical; |
73 | u64 physical_for_dev_replace; | ||
65 | atomic_t ref_count; | 74 | atomic_t ref_count; |
66 | struct { | 75 | struct { |
67 | unsigned int mirror_num:8; | 76 | unsigned int mirror_num:8; |
@@ -79,7 +88,11 @@ struct scrub_bio { | |||
79 | int err; | 88 | int err; |
80 | u64 logical; | 89 | u64 logical; |
81 | u64 physical; | 90 | u64 physical; |
82 | struct scrub_page *pagev[SCRUB_PAGES_PER_BIO]; | 91 | #if SCRUB_PAGES_PER_WR_BIO >= SCRUB_PAGES_PER_RD_BIO |
92 | struct scrub_page *pagev[SCRUB_PAGES_PER_WR_BIO]; | ||
93 | #else | ||
94 | struct scrub_page *pagev[SCRUB_PAGES_PER_RD_BIO]; | ||
95 | #endif | ||
83 | int page_count; | 96 | int page_count; |
84 | int next_free; | 97 | int next_free; |
85 | struct btrfs_work work; | 98 | struct btrfs_work work; |
@@ -99,8 +112,16 @@ struct scrub_block { | |||
99 | }; | 112 | }; |
100 | }; | 113 | }; |
101 | 114 | ||
115 | struct scrub_wr_ctx { | ||
116 | struct scrub_bio *wr_curr_bio; | ||
117 | struct btrfs_device *tgtdev; | ||
118 | int pages_per_wr_bio; /* <= SCRUB_PAGES_PER_WR_BIO */ | ||
119 | atomic_t flush_all_writes; | ||
120 | struct mutex wr_lock; | ||
121 | }; | ||
122 | |||
102 | struct scrub_ctx { | 123 | struct scrub_ctx { |
103 | struct scrub_bio *bios[SCRUB_BIOS_PER_CTX]; | 124 | struct scrub_bio *bios[SCRUB_BIOS_PER_SCTX]; |
104 | struct btrfs_root *dev_root; | 125 | struct btrfs_root *dev_root; |
105 | int first_free; | 126 | int first_free; |
106 | int curr; | 127 | int curr; |
@@ -112,12 +133,13 @@ struct scrub_ctx { | |||
112 | struct list_head csum_list; | 133 | struct list_head csum_list; |
113 | atomic_t cancel_req; | 134 | atomic_t cancel_req; |
114 | int readonly; | 135 | int readonly; |
115 | int pages_per_bio; /* <= SCRUB_PAGES_PER_BIO */ | 136 | int pages_per_rd_bio; |
116 | u32 sectorsize; | 137 | u32 sectorsize; |
117 | u32 nodesize; | 138 | u32 nodesize; |
118 | u32 leafsize; | 139 | u32 leafsize; |
119 | 140 | ||
120 | int is_dev_replace; | 141 | int is_dev_replace; |
142 | struct scrub_wr_ctx wr_ctx; | ||
121 | 143 | ||
122 | /* | 144 | /* |
123 | * statistics | 145 | * statistics |
@@ -135,6 +157,15 @@ struct scrub_fixup_nodatasum { | |||
135 | int mirror_num; | 157 | int mirror_num; |
136 | }; | 158 | }; |
137 | 159 | ||
160 | struct scrub_copy_nocow_ctx { | ||
161 | struct scrub_ctx *sctx; | ||
162 | u64 logical; | ||
163 | u64 len; | ||
164 | int mirror_num; | ||
165 | u64 physical_for_dev_replace; | ||
166 | struct btrfs_work work; | ||
167 | }; | ||
168 | |||
138 | struct scrub_warning { | 169 | struct scrub_warning { |
139 | struct btrfs_path *path; | 170 | struct btrfs_path *path; |
140 | u64 extent_item_size; | 171 | u64 extent_item_size; |
@@ -156,8 +187,9 @@ static void scrub_pending_trans_workers_dec(struct scrub_ctx *sctx); | |||
156 | static int scrub_handle_errored_block(struct scrub_block *sblock_to_check); | 187 | static int scrub_handle_errored_block(struct scrub_block *sblock_to_check); |
157 | static int scrub_setup_recheck_block(struct scrub_ctx *sctx, | 188 | static int scrub_setup_recheck_block(struct scrub_ctx *sctx, |
158 | struct btrfs_fs_info *fs_info, | 189 | struct btrfs_fs_info *fs_info, |
190 | struct scrub_block *original_sblock, | ||
159 | u64 length, u64 logical, | 191 | u64 length, u64 logical, |
160 | struct scrub_block *sblock); | 192 | struct scrub_block *sblocks_for_recheck); |
161 | static void scrub_recheck_block(struct btrfs_fs_info *fs_info, | 193 | static void scrub_recheck_block(struct btrfs_fs_info *fs_info, |
162 | struct scrub_block *sblock, int is_metadata, | 194 | struct scrub_block *sblock, int is_metadata, |
163 | int have_csum, u8 *csum, u64 generation, | 195 | int have_csum, u8 *csum, u64 generation, |
@@ -174,6 +206,9 @@ static int scrub_repair_block_from_good_copy(struct scrub_block *sblock_bad, | |||
174 | static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad, | 206 | static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad, |
175 | struct scrub_block *sblock_good, | 207 | struct scrub_block *sblock_good, |
176 | int page_num, int force_write); | 208 | int page_num, int force_write); |
209 | static void scrub_write_block_to_dev_replace(struct scrub_block *sblock); | ||
210 | static int scrub_write_page_to_dev_replace(struct scrub_block *sblock, | ||
211 | int page_num); | ||
177 | static int scrub_checksum_data(struct scrub_block *sblock); | 212 | static int scrub_checksum_data(struct scrub_block *sblock); |
178 | static int scrub_checksum_tree_block(struct scrub_block *sblock); | 213 | static int scrub_checksum_tree_block(struct scrub_block *sblock); |
179 | static int scrub_checksum_super(struct scrub_block *sblock); | 214 | static int scrub_checksum_super(struct scrub_block *sblock); |
@@ -181,14 +216,38 @@ static void scrub_block_get(struct scrub_block *sblock); | |||
181 | static void scrub_block_put(struct scrub_block *sblock); | 216 | static void scrub_block_put(struct scrub_block *sblock); |
182 | static void scrub_page_get(struct scrub_page *spage); | 217 | static void scrub_page_get(struct scrub_page *spage); |
183 | static void scrub_page_put(struct scrub_page *spage); | 218 | static void scrub_page_put(struct scrub_page *spage); |
184 | static int scrub_add_page_to_bio(struct scrub_ctx *sctx, | 219 | static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx, |
185 | struct scrub_page *spage); | 220 | struct scrub_page *spage); |
186 | static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len, | 221 | static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len, |
187 | u64 physical, struct btrfs_device *dev, u64 flags, | 222 | u64 physical, struct btrfs_device *dev, u64 flags, |
188 | u64 gen, int mirror_num, u8 *csum, int force); | 223 | u64 gen, int mirror_num, u8 *csum, int force, |
224 | u64 physical_for_dev_replace); | ||
189 | static void scrub_bio_end_io(struct bio *bio, int err); | 225 | static void scrub_bio_end_io(struct bio *bio, int err); |
190 | static void scrub_bio_end_io_worker(struct btrfs_work *work); | 226 | static void scrub_bio_end_io_worker(struct btrfs_work *work); |
191 | static void scrub_block_complete(struct scrub_block *sblock); | 227 | static void scrub_block_complete(struct scrub_block *sblock); |
228 | static void scrub_remap_extent(struct btrfs_fs_info *fs_info, | ||
229 | u64 extent_logical, u64 extent_len, | ||
230 | u64 *extent_physical, | ||
231 | struct btrfs_device **extent_dev, | ||
232 | int *extent_mirror_num); | ||
233 | static int scrub_setup_wr_ctx(struct scrub_ctx *sctx, | ||
234 | struct scrub_wr_ctx *wr_ctx, | ||
235 | struct btrfs_fs_info *fs_info, | ||
236 | struct btrfs_device *dev, | ||
237 | int is_dev_replace); | ||
238 | static void scrub_free_wr_ctx(struct scrub_wr_ctx *wr_ctx); | ||
239 | static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx, | ||
240 | struct scrub_page *spage); | ||
241 | static void scrub_wr_submit(struct scrub_ctx *sctx); | ||
242 | static void scrub_wr_bio_end_io(struct bio *bio, int err); | ||
243 | static void scrub_wr_bio_end_io_worker(struct btrfs_work *work); | ||
244 | static int write_page_nocow(struct scrub_ctx *sctx, | ||
245 | u64 physical_for_dev_replace, struct page *page); | ||
246 | static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, | ||
247 | void *ctx); | ||
248 | static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len, | ||
249 | int mirror_num, u64 physical_for_dev_replace); | ||
250 | static void copy_nocow_pages_worker(struct btrfs_work *work); | ||
192 | 251 | ||
193 | 252 | ||
194 | static void scrub_pending_bio_inc(struct scrub_ctx *sctx) | 253 | static void scrub_pending_bio_inc(struct scrub_ctx *sctx) |
@@ -262,19 +321,20 @@ static noinline_for_stack void scrub_free_ctx(struct scrub_ctx *sctx) | |||
262 | if (!sctx) | 321 | if (!sctx) |
263 | return; | 322 | return; |
264 | 323 | ||
324 | scrub_free_wr_ctx(&sctx->wr_ctx); | ||
325 | |||
265 | /* this can happen when scrub is cancelled */ | 326 | /* this can happen when scrub is cancelled */ |
266 | if (sctx->curr != -1) { | 327 | if (sctx->curr != -1) { |
267 | struct scrub_bio *sbio = sctx->bios[sctx->curr]; | 328 | struct scrub_bio *sbio = sctx->bios[sctx->curr]; |
268 | 329 | ||
269 | for (i = 0; i < sbio->page_count; i++) { | 330 | for (i = 0; i < sbio->page_count; i++) { |
270 | BUG_ON(!sbio->pagev[i]); | 331 | WARN_ON(!sbio->pagev[i]->page); |
271 | BUG_ON(!sbio->pagev[i]->page); | ||
272 | scrub_block_put(sbio->pagev[i]->sblock); | 332 | scrub_block_put(sbio->pagev[i]->sblock); |
273 | } | 333 | } |
274 | bio_put(sbio->bio); | 334 | bio_put(sbio->bio); |
275 | } | 335 | } |
276 | 336 | ||
277 | for (i = 0; i < SCRUB_BIOS_PER_CTX; ++i) { | 337 | for (i = 0; i < SCRUB_BIOS_PER_SCTX; ++i) { |
278 | struct scrub_bio *sbio = sctx->bios[i]; | 338 | struct scrub_bio *sbio = sctx->bios[i]; |
279 | 339 | ||
280 | if (!sbio) | 340 | if (!sbio) |
@@ -292,18 +352,29 @@ struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace) | |||
292 | struct scrub_ctx *sctx; | 352 | struct scrub_ctx *sctx; |
293 | int i; | 353 | int i; |
294 | struct btrfs_fs_info *fs_info = dev->dev_root->fs_info; | 354 | struct btrfs_fs_info *fs_info = dev->dev_root->fs_info; |
295 | int pages_per_bio; | 355 | int pages_per_rd_bio; |
356 | int ret; | ||
296 | 357 | ||
297 | pages_per_bio = min_t(int, SCRUB_PAGES_PER_BIO, | 358 | /* |
298 | bio_get_nr_vecs(dev->bdev)); | 359 | * the setting of pages_per_rd_bio is correct for scrub but might |
360 | * be wrong for the dev_replace code where we might read from | ||
361 | * different devices in the initial huge bios. However, that | ||
362 | * code is able to correctly handle the case when adding a page | ||
363 | * to a bio fails. | ||
364 | */ | ||
365 | if (dev->bdev) | ||
366 | pages_per_rd_bio = min_t(int, SCRUB_PAGES_PER_RD_BIO, | ||
367 | bio_get_nr_vecs(dev->bdev)); | ||
368 | else | ||
369 | pages_per_rd_bio = SCRUB_PAGES_PER_RD_BIO; | ||
299 | sctx = kzalloc(sizeof(*sctx), GFP_NOFS); | 370 | sctx = kzalloc(sizeof(*sctx), GFP_NOFS); |
300 | if (!sctx) | 371 | if (!sctx) |
301 | goto nomem; | 372 | goto nomem; |
302 | sctx->is_dev_replace = is_dev_replace; | 373 | sctx->is_dev_replace = is_dev_replace; |
303 | sctx->pages_per_bio = pages_per_bio; | 374 | sctx->pages_per_rd_bio = pages_per_rd_bio; |
304 | sctx->curr = -1; | 375 | sctx->curr = -1; |
305 | sctx->dev_root = dev->dev_root; | 376 | sctx->dev_root = dev->dev_root; |
306 | for (i = 0; i < SCRUB_BIOS_PER_CTX; ++i) { | 377 | for (i = 0; i < SCRUB_BIOS_PER_SCTX; ++i) { |
307 | struct scrub_bio *sbio; | 378 | struct scrub_bio *sbio; |
308 | 379 | ||
309 | sbio = kzalloc(sizeof(*sbio), GFP_NOFS); | 380 | sbio = kzalloc(sizeof(*sbio), GFP_NOFS); |
@@ -316,7 +387,7 @@ struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace) | |||
316 | sbio->page_count = 0; | 387 | sbio->page_count = 0; |
317 | sbio->work.func = scrub_bio_end_io_worker; | 388 | sbio->work.func = scrub_bio_end_io_worker; |
318 | 389 | ||
319 | if (i != SCRUB_BIOS_PER_CTX - 1) | 390 | if (i != SCRUB_BIOS_PER_SCTX - 1) |
320 | sctx->bios[i]->next_free = i + 1; | 391 | sctx->bios[i]->next_free = i + 1; |
321 | else | 392 | else |
322 | sctx->bios[i]->next_free = -1; | 393 | sctx->bios[i]->next_free = -1; |
@@ -334,6 +405,13 @@ struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace) | |||
334 | spin_lock_init(&sctx->list_lock); | 405 | spin_lock_init(&sctx->list_lock); |
335 | spin_lock_init(&sctx->stat_lock); | 406 | spin_lock_init(&sctx->stat_lock); |
336 | init_waitqueue_head(&sctx->list_wait); | 407 | init_waitqueue_head(&sctx->list_wait); |
408 | |||
409 | ret = scrub_setup_wr_ctx(sctx, &sctx->wr_ctx, fs_info, | ||
410 | fs_info->dev_replace.tgtdev, is_dev_replace); | ||
411 | if (ret) { | ||
412 | scrub_free_ctx(sctx); | ||
413 | return ERR_PTR(ret); | ||
414 | } | ||
337 | return sctx; | 415 | return sctx; |
338 | 416 | ||
339 | nomem: | 417 | nomem: |
@@ -341,7 +419,8 @@ nomem: | |||
341 | return ERR_PTR(-ENOMEM); | 419 | return ERR_PTR(-ENOMEM); |
342 | } | 420 | } |
343 | 421 | ||
344 | static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root, void *ctx) | 422 | static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root, |
423 | void *warn_ctx) | ||
345 | { | 424 | { |
346 | u64 isize; | 425 | u64 isize; |
347 | u32 nlink; | 426 | u32 nlink; |
@@ -349,7 +428,7 @@ static int scrub_print_warning_inode(u64 inum, u64 offset, u64 root, void *ctx) | |||
349 | int i; | 428 | int i; |
350 | struct extent_buffer *eb; | 429 | struct extent_buffer *eb; |
351 | struct btrfs_inode_item *inode_item; | 430 | struct btrfs_inode_item *inode_item; |
352 | struct scrub_warning *swarn = ctx; | 431 | struct scrub_warning *swarn = warn_ctx; |
353 | struct btrfs_fs_info *fs_info = swarn->dev->dev_root->fs_info; | 432 | struct btrfs_fs_info *fs_info = swarn->dev->dev_root->fs_info; |
354 | struct inode_fs_paths *ipath = NULL; | 433 | struct inode_fs_paths *ipath = NULL; |
355 | struct btrfs_root *local_root; | 434 | struct btrfs_root *local_root; |
@@ -492,11 +571,11 @@ out: | |||
492 | kfree(swarn.msg_buf); | 571 | kfree(swarn.msg_buf); |
493 | } | 572 | } |
494 | 573 | ||
495 | static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *ctx) | 574 | static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *fixup_ctx) |
496 | { | 575 | { |
497 | struct page *page = NULL; | 576 | struct page *page = NULL; |
498 | unsigned long index; | 577 | unsigned long index; |
499 | struct scrub_fixup_nodatasum *fixup = ctx; | 578 | struct scrub_fixup_nodatasum *fixup = fixup_ctx; |
500 | int ret; | 579 | int ret; |
501 | int corrected = 0; | 580 | int corrected = 0; |
502 | struct btrfs_key key; | 581 | struct btrfs_key key; |
@@ -660,7 +739,9 @@ out: | |||
660 | spin_lock(&sctx->stat_lock); | 739 | spin_lock(&sctx->stat_lock); |
661 | ++sctx->stat.uncorrectable_errors; | 740 | ++sctx->stat.uncorrectable_errors; |
662 | spin_unlock(&sctx->stat_lock); | 741 | spin_unlock(&sctx->stat_lock); |
663 | 742 | btrfs_dev_replace_stats_inc( | |
743 | &sctx->dev_root->fs_info->dev_replace. | ||
744 | num_uncorrectable_read_errors); | ||
664 | printk_ratelimited_in_rcu(KERN_ERR | 745 | printk_ratelimited_in_rcu(KERN_ERR |
665 | "btrfs: unable to fixup (nodatasum) error at logical %llu on dev %s\n", | 746 | "btrfs: unable to fixup (nodatasum) error at logical %llu on dev %s\n", |
666 | (unsigned long long)fixup->logical, | 747 | (unsigned long long)fixup->logical, |
@@ -715,6 +796,11 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) | |||
715 | csum = sblock_to_check->pagev[0]->csum; | 796 | csum = sblock_to_check->pagev[0]->csum; |
716 | dev = sblock_to_check->pagev[0]->dev; | 797 | dev = sblock_to_check->pagev[0]->dev; |
717 | 798 | ||
799 | if (sctx->is_dev_replace && !is_metadata && !have_csum) { | ||
800 | sblocks_for_recheck = NULL; | ||
801 | goto nodatasum_case; | ||
802 | } | ||
803 | |||
718 | /* | 804 | /* |
719 | * read all mirrors one after the other. This includes to | 805 | * read all mirrors one after the other. This includes to |
720 | * re-read the extent or metadata block that failed (that was | 806 | * re-read the extent or metadata block that failed (that was |
@@ -758,7 +844,7 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) | |||
758 | } | 844 | } |
759 | 845 | ||
760 | /* setup the context, map the logical blocks and alloc the pages */ | 846 | /* setup the context, map the logical blocks and alloc the pages */ |
761 | ret = scrub_setup_recheck_block(sctx, fs_info, length, | 847 | ret = scrub_setup_recheck_block(sctx, fs_info, sblock_to_check, length, |
762 | logical, sblocks_for_recheck); | 848 | logical, sblocks_for_recheck); |
763 | if (ret) { | 849 | if (ret) { |
764 | spin_lock(&sctx->stat_lock); | 850 | spin_lock(&sctx->stat_lock); |
@@ -789,6 +875,8 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) | |||
789 | sctx->stat.unverified_errors++; | 875 | sctx->stat.unverified_errors++; |
790 | spin_unlock(&sctx->stat_lock); | 876 | spin_unlock(&sctx->stat_lock); |
791 | 877 | ||
878 | if (sctx->is_dev_replace) | ||
879 | scrub_write_block_to_dev_replace(sblock_bad); | ||
792 | goto out; | 880 | goto out; |
793 | } | 881 | } |
794 | 882 | ||
@@ -822,12 +910,15 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) | |||
822 | BTRFS_DEV_STAT_CORRUPTION_ERRS); | 910 | BTRFS_DEV_STAT_CORRUPTION_ERRS); |
823 | } | 911 | } |
824 | 912 | ||
825 | if (sctx->readonly) | 913 | if (sctx->readonly && !sctx->is_dev_replace) |
826 | goto did_not_correct_error; | 914 | goto did_not_correct_error; |
827 | 915 | ||
828 | if (!is_metadata && !have_csum) { | 916 | if (!is_metadata && !have_csum) { |
829 | struct scrub_fixup_nodatasum *fixup_nodatasum; | 917 | struct scrub_fixup_nodatasum *fixup_nodatasum; |
830 | 918 | ||
919 | nodatasum_case: | ||
920 | WARN_ON(sctx->is_dev_replace); | ||
921 | |||
831 | /* | 922 | /* |
832 | * !is_metadata and !have_csum, this means that the data | 923 | * !is_metadata and !have_csum, this means that the data |
833 | * might not be COW'ed, that it might be modified | 924 | * might not be COW'ed, that it might be modified |
@@ -883,18 +974,79 @@ static int scrub_handle_errored_block(struct scrub_block *sblock_to_check) | |||
883 | if (!sblock_other->header_error && | 974 | if (!sblock_other->header_error && |
884 | !sblock_other->checksum_error && | 975 | !sblock_other->checksum_error && |
885 | sblock_other->no_io_error_seen) { | 976 | sblock_other->no_io_error_seen) { |
886 | int force_write = is_metadata || have_csum; | 977 | if (sctx->is_dev_replace) { |
887 | 978 | scrub_write_block_to_dev_replace(sblock_other); | |
888 | ret = scrub_repair_block_from_good_copy(sblock_bad, | 979 | } else { |
889 | sblock_other, | 980 | int force_write = is_metadata || have_csum; |
890 | force_write); | 981 | |
982 | ret = scrub_repair_block_from_good_copy( | ||
983 | sblock_bad, sblock_other, | ||
984 | force_write); | ||
985 | } | ||
891 | if (0 == ret) | 986 | if (0 == ret) |
892 | goto corrected_error; | 987 | goto corrected_error; |
893 | } | 988 | } |
894 | } | 989 | } |
895 | 990 | ||
896 | /* | 991 | /* |
897 | * in case of I/O errors in the area that is supposed to be | 992 | * for dev_replace, pick good pages and write to the target device. |
993 | */ | ||
994 | if (sctx->is_dev_replace) { | ||
995 | success = 1; | ||
996 | for (page_num = 0; page_num < sblock_bad->page_count; | ||
997 | page_num++) { | ||
998 | int sub_success; | ||
999 | |||
1000 | sub_success = 0; | ||
1001 | for (mirror_index = 0; | ||
1002 | mirror_index < BTRFS_MAX_MIRRORS && | ||
1003 | sblocks_for_recheck[mirror_index].page_count > 0; | ||
1004 | mirror_index++) { | ||
1005 | struct scrub_block *sblock_other = | ||
1006 | sblocks_for_recheck + mirror_index; | ||
1007 | struct scrub_page *page_other = | ||
1008 | sblock_other->pagev[page_num]; | ||
1009 | |||
1010 | if (!page_other->io_error) { | ||
1011 | ret = scrub_write_page_to_dev_replace( | ||
1012 | sblock_other, page_num); | ||
1013 | if (ret == 0) { | ||
1014 | /* succeeded for this page */ | ||
1015 | sub_success = 1; | ||
1016 | break; | ||
1017 | } else { | ||
1018 | btrfs_dev_replace_stats_inc( | ||
1019 | &sctx->dev_root-> | ||
1020 | fs_info->dev_replace. | ||
1021 | num_write_errors); | ||
1022 | } | ||
1023 | } | ||
1024 | } | ||
1025 | |||
1026 | if (!sub_success) { | ||
1027 | /* | ||
1028 | * did not find a mirror to fetch the page | ||
1029 | * from. scrub_write_page_to_dev_replace() | ||
1030 | * handles this case (page->io_error), by | ||
1031 | * filling the block with zeros before | ||
1032 | * submitting the write request | ||
1033 | */ | ||
1034 | success = 0; | ||
1035 | ret = scrub_write_page_to_dev_replace( | ||
1036 | sblock_bad, page_num); | ||
1037 | if (ret) | ||
1038 | btrfs_dev_replace_stats_inc( | ||
1039 | &sctx->dev_root->fs_info-> | ||
1040 | dev_replace.num_write_errors); | ||
1041 | } | ||
1042 | } | ||
1043 | |||
1044 | goto out; | ||
1045 | } | ||
1046 | |||
1047 | /* | ||
1048 | * for regular scrub, repair those pages that are errored. | ||
1049 | * In case of I/O errors in the area that is supposed to be | ||
898 | * repaired, continue by picking good copies of those pages. | 1050 | * repaired, continue by picking good copies of those pages. |
899 | * Select the good pages from mirrors to rewrite bad pages from | 1051 | * Select the good pages from mirrors to rewrite bad pages from |
900 | * the area to fix. Afterwards verify the checksum of the block | 1052 | * the area to fix. Afterwards verify the checksum of the block |
@@ -1017,6 +1169,7 @@ out: | |||
1017 | 1169 | ||
1018 | static int scrub_setup_recheck_block(struct scrub_ctx *sctx, | 1170 | static int scrub_setup_recheck_block(struct scrub_ctx *sctx, |
1019 | struct btrfs_fs_info *fs_info, | 1171 | struct btrfs_fs_info *fs_info, |
1172 | struct scrub_block *original_sblock, | ||
1020 | u64 length, u64 logical, | 1173 | u64 length, u64 logical, |
1021 | struct scrub_block *sblocks_for_recheck) | 1174 | struct scrub_block *sblocks_for_recheck) |
1022 | { | 1175 | { |
@@ -1047,7 +1200,7 @@ static int scrub_setup_recheck_block(struct scrub_ctx *sctx, | |||
1047 | return -EIO; | 1200 | return -EIO; |
1048 | } | 1201 | } |
1049 | 1202 | ||
1050 | BUG_ON(page_index >= SCRUB_PAGES_PER_BIO); | 1203 | BUG_ON(page_index >= SCRUB_PAGES_PER_RD_BIO); |
1051 | for (mirror_index = 0; mirror_index < (int)bbio->num_stripes; | 1204 | for (mirror_index = 0; mirror_index < (int)bbio->num_stripes; |
1052 | mirror_index++) { | 1205 | mirror_index++) { |
1053 | struct scrub_block *sblock; | 1206 | struct scrub_block *sblock; |
@@ -1071,6 +1224,10 @@ leave_nomem: | |||
1071 | sblock->pagev[page_index] = page; | 1224 | sblock->pagev[page_index] = page; |
1072 | page->logical = logical; | 1225 | page->logical = logical; |
1073 | page->physical = bbio->stripes[mirror_index].physical; | 1226 | page->physical = bbio->stripes[mirror_index].physical; |
1227 | BUG_ON(page_index >= original_sblock->page_count); | ||
1228 | page->physical_for_dev_replace = | ||
1229 | original_sblock->pagev[page_index]-> | ||
1230 | physical_for_dev_replace; | ||
1074 | /* for missing devices, dev->bdev is NULL */ | 1231 | /* for missing devices, dev->bdev is NULL */ |
1075 | page->dev = bbio->stripes[mirror_index].dev; | 1232 | page->dev = bbio->stripes[mirror_index].dev; |
1076 | page->mirror_num = mirror_index + 1; | 1233 | page->mirror_num = mirror_index + 1; |
@@ -1249,6 +1406,12 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad, | |||
1249 | int ret; | 1406 | int ret; |
1250 | DECLARE_COMPLETION_ONSTACK(complete); | 1407 | DECLARE_COMPLETION_ONSTACK(complete); |
1251 | 1408 | ||
1409 | if (!page_bad->dev->bdev) { | ||
1410 | printk_ratelimited(KERN_WARNING | ||
1411 | "btrfs: scrub_repair_page_from_good_copy(bdev == NULL) is unexpected!\n"); | ||
1412 | return -EIO; | ||
1413 | } | ||
1414 | |||
1252 | bio = bio_alloc(GFP_NOFS, 1); | 1415 | bio = bio_alloc(GFP_NOFS, 1); |
1253 | if (!bio) | 1416 | if (!bio) |
1254 | return -EIO; | 1417 | return -EIO; |
@@ -1269,6 +1432,9 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad, | |||
1269 | if (!bio_flagged(bio, BIO_UPTODATE)) { | 1432 | if (!bio_flagged(bio, BIO_UPTODATE)) { |
1270 | btrfs_dev_stat_inc_and_print(page_bad->dev, | 1433 | btrfs_dev_stat_inc_and_print(page_bad->dev, |
1271 | BTRFS_DEV_STAT_WRITE_ERRS); | 1434 | BTRFS_DEV_STAT_WRITE_ERRS); |
1435 | btrfs_dev_replace_stats_inc( | ||
1436 | &sblock_bad->sctx->dev_root->fs_info-> | ||
1437 | dev_replace.num_write_errors); | ||
1272 | bio_put(bio); | 1438 | bio_put(bio); |
1273 | return -EIO; | 1439 | return -EIO; |
1274 | } | 1440 | } |
@@ -1278,7 +1444,168 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad, | |||
1278 | return 0; | 1444 | return 0; |
1279 | } | 1445 | } |
1280 | 1446 | ||
1281 | static void scrub_checksum(struct scrub_block *sblock) | 1447 | static void scrub_write_block_to_dev_replace(struct scrub_block *sblock) |
1448 | { | ||
1449 | int page_num; | ||
1450 | |||
1451 | for (page_num = 0; page_num < sblock->page_count; page_num++) { | ||
1452 | int ret; | ||
1453 | |||
1454 | ret = scrub_write_page_to_dev_replace(sblock, page_num); | ||
1455 | if (ret) | ||
1456 | btrfs_dev_replace_stats_inc( | ||
1457 | &sblock->sctx->dev_root->fs_info->dev_replace. | ||
1458 | num_write_errors); | ||
1459 | } | ||
1460 | } | ||
1461 | |||
1462 | static int scrub_write_page_to_dev_replace(struct scrub_block *sblock, | ||
1463 | int page_num) | ||
1464 | { | ||
1465 | struct scrub_page *spage = sblock->pagev[page_num]; | ||
1466 | |||
1467 | BUG_ON(spage->page == NULL); | ||
1468 | if (spage->io_error) { | ||
1469 | void *mapped_buffer = kmap_atomic(spage->page); | ||
1470 | |||
1471 | memset(mapped_buffer, 0, PAGE_CACHE_SIZE); | ||
1472 | flush_dcache_page(spage->page); | ||
1473 | kunmap_atomic(mapped_buffer); | ||
1474 | } | ||
1475 | return scrub_add_page_to_wr_bio(sblock->sctx, spage); | ||
1476 | } | ||
1477 | |||
1478 | static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx, | ||
1479 | struct scrub_page *spage) | ||
1480 | { | ||
1481 | struct scrub_wr_ctx *wr_ctx = &sctx->wr_ctx; | ||
1482 | struct scrub_bio *sbio; | ||
1483 | int ret; | ||
1484 | |||
1485 | mutex_lock(&wr_ctx->wr_lock); | ||
1486 | again: | ||
1487 | if (!wr_ctx->wr_curr_bio) { | ||
1488 | wr_ctx->wr_curr_bio = kzalloc(sizeof(*wr_ctx->wr_curr_bio), | ||
1489 | GFP_NOFS); | ||
1490 | if (!wr_ctx->wr_curr_bio) { | ||
1491 | mutex_unlock(&wr_ctx->wr_lock); | ||
1492 | return -ENOMEM; | ||
1493 | } | ||
1494 | wr_ctx->wr_curr_bio->sctx = sctx; | ||
1495 | wr_ctx->wr_curr_bio->page_count = 0; | ||
1496 | } | ||
1497 | sbio = wr_ctx->wr_curr_bio; | ||
1498 | if (sbio->page_count == 0) { | ||
1499 | struct bio *bio; | ||
1500 | |||
1501 | sbio->physical = spage->physical_for_dev_replace; | ||
1502 | sbio->logical = spage->logical; | ||
1503 | sbio->dev = wr_ctx->tgtdev; | ||
1504 | bio = sbio->bio; | ||
1505 | if (!bio) { | ||
1506 | bio = bio_alloc(GFP_NOFS, wr_ctx->pages_per_wr_bio); | ||
1507 | if (!bio) { | ||
1508 | mutex_unlock(&wr_ctx->wr_lock); | ||
1509 | return -ENOMEM; | ||
1510 | } | ||
1511 | sbio->bio = bio; | ||
1512 | } | ||
1513 | |||
1514 | bio->bi_private = sbio; | ||
1515 | bio->bi_end_io = scrub_wr_bio_end_io; | ||
1516 | bio->bi_bdev = sbio->dev->bdev; | ||
1517 | bio->bi_sector = sbio->physical >> 9; | ||
1518 | sbio->err = 0; | ||
1519 | } else if (sbio->physical + sbio->page_count * PAGE_SIZE != | ||
1520 | spage->physical_for_dev_replace || | ||
1521 | sbio->logical + sbio->page_count * PAGE_SIZE != | ||
1522 | spage->logical) { | ||
1523 | scrub_wr_submit(sctx); | ||
1524 | goto again; | ||
1525 | } | ||
1526 | |||
1527 | ret = bio_add_page(sbio->bio, spage->page, PAGE_SIZE, 0); | ||
1528 | if (ret != PAGE_SIZE) { | ||
1529 | if (sbio->page_count < 1) { | ||
1530 | bio_put(sbio->bio); | ||
1531 | sbio->bio = NULL; | ||
1532 | mutex_unlock(&wr_ctx->wr_lock); | ||
1533 | return -EIO; | ||
1534 | } | ||
1535 | scrub_wr_submit(sctx); | ||
1536 | goto again; | ||
1537 | } | ||
1538 | |||
1539 | sbio->pagev[sbio->page_count] = spage; | ||
1540 | scrub_page_get(spage); | ||
1541 | sbio->page_count++; | ||
1542 | if (sbio->page_count == wr_ctx->pages_per_wr_bio) | ||
1543 | scrub_wr_submit(sctx); | ||
1544 | mutex_unlock(&wr_ctx->wr_lock); | ||
1545 | |||
1546 | return 0; | ||
1547 | } | ||
1548 | |||
1549 | static void scrub_wr_submit(struct scrub_ctx *sctx) | ||
1550 | { | ||
1551 | struct scrub_wr_ctx *wr_ctx = &sctx->wr_ctx; | ||
1552 | struct scrub_bio *sbio; | ||
1553 | |||
1554 | if (!wr_ctx->wr_curr_bio) | ||
1555 | return; | ||
1556 | |||
1557 | sbio = wr_ctx->wr_curr_bio; | ||
1558 | wr_ctx->wr_curr_bio = NULL; | ||
1559 | WARN_ON(!sbio->bio->bi_bdev); | ||
1560 | scrub_pending_bio_inc(sctx); | ||
1561 | /* process all writes in a single worker thread. Then the block layer | ||
1562 | * orders the requests before sending them to the driver which | ||
1563 | * doubled the write performance on spinning disks when measured | ||
1564 | * with Linux 3.5 */ | ||
1565 | btrfsic_submit_bio(WRITE, sbio->bio); | ||
1566 | } | ||
1567 | |||
1568 | static void scrub_wr_bio_end_io(struct bio *bio, int err) | ||
1569 | { | ||
1570 | struct scrub_bio *sbio = bio->bi_private; | ||
1571 | struct btrfs_fs_info *fs_info = sbio->dev->dev_root->fs_info; | ||
1572 | |||
1573 | sbio->err = err; | ||
1574 | sbio->bio = bio; | ||
1575 | |||
1576 | sbio->work.func = scrub_wr_bio_end_io_worker; | ||
1577 | btrfs_queue_worker(&fs_info->scrub_wr_completion_workers, &sbio->work); | ||
1578 | } | ||
1579 | |||
1580 | static void scrub_wr_bio_end_io_worker(struct btrfs_work *work) | ||
1581 | { | ||
1582 | struct scrub_bio *sbio = container_of(work, struct scrub_bio, work); | ||
1583 | struct scrub_ctx *sctx = sbio->sctx; | ||
1584 | int i; | ||
1585 | |||
1586 | WARN_ON(sbio->page_count > SCRUB_PAGES_PER_WR_BIO); | ||
1587 | if (sbio->err) { | ||
1588 | struct btrfs_dev_replace *dev_replace = | ||
1589 | &sbio->sctx->dev_root->fs_info->dev_replace; | ||
1590 | |||
1591 | for (i = 0; i < sbio->page_count; i++) { | ||
1592 | struct scrub_page *spage = sbio->pagev[i]; | ||
1593 | |||
1594 | spage->io_error = 1; | ||
1595 | btrfs_dev_replace_stats_inc(&dev_replace-> | ||
1596 | num_write_errors); | ||
1597 | } | ||
1598 | } | ||
1599 | |||
1600 | for (i = 0; i < sbio->page_count; i++) | ||
1601 | scrub_page_put(sbio->pagev[i]); | ||
1602 | |||
1603 | bio_put(sbio->bio); | ||
1604 | kfree(sbio); | ||
1605 | scrub_pending_bio_dec(sctx); | ||
1606 | } | ||
1607 | |||
1608 | static int scrub_checksum(struct scrub_block *sblock) | ||
1282 | { | 1609 | { |
1283 | u64 flags; | 1610 | u64 flags; |
1284 | int ret; | 1611 | int ret; |
@@ -1296,6 +1623,8 @@ static void scrub_checksum(struct scrub_block *sblock) | |||
1296 | WARN_ON(1); | 1623 | WARN_ON(1); |
1297 | if (ret) | 1624 | if (ret) |
1298 | scrub_handle_errored_block(sblock); | 1625 | scrub_handle_errored_block(sblock); |
1626 | |||
1627 | return ret; | ||
1299 | } | 1628 | } |
1300 | 1629 | ||
1301 | static int scrub_checksum_data(struct scrub_block *sblock) | 1630 | static int scrub_checksum_data(struct scrub_block *sblock) |
@@ -1386,7 +1715,7 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock) | |||
1386 | BTRFS_UUID_SIZE)) | 1715 | BTRFS_UUID_SIZE)) |
1387 | ++fail; | 1716 | ++fail; |
1388 | 1717 | ||
1389 | BUG_ON(sctx->nodesize != sctx->leafsize); | 1718 | WARN_ON(sctx->nodesize != sctx->leafsize); |
1390 | len = sctx->nodesize - BTRFS_CSUM_SIZE; | 1719 | len = sctx->nodesize - BTRFS_CSUM_SIZE; |
1391 | mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE; | 1720 | mapped_size = PAGE_SIZE - BTRFS_CSUM_SIZE; |
1392 | p = ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE; | 1721 | p = ((u8 *)mapped_buffer) + BTRFS_CSUM_SIZE; |
@@ -1534,11 +1863,24 @@ static void scrub_submit(struct scrub_ctx *sctx) | |||
1534 | sctx->curr = -1; | 1863 | sctx->curr = -1; |
1535 | scrub_pending_bio_inc(sctx); | 1864 | scrub_pending_bio_inc(sctx); |
1536 | 1865 | ||
1537 | btrfsic_submit_bio(READ, sbio->bio); | 1866 | if (!sbio->bio->bi_bdev) { |
1867 | /* | ||
1868 | * this case should not happen. If btrfs_map_block() is | ||
1869 | * wrong, it could happen for dev-replace operations on | ||
1870 | * missing devices when no mirrors are available, but in | ||
1871 | * this case it should already fail the mount. | ||
1872 | * This case is handled correctly (but _very_ slowly). | ||
1873 | */ | ||
1874 | printk_ratelimited(KERN_WARNING | ||
1875 | "btrfs: scrub_submit(bio bdev == NULL) is unexpected!\n"); | ||
1876 | bio_endio(sbio->bio, -EIO); | ||
1877 | } else { | ||
1878 | btrfsic_submit_bio(READ, sbio->bio); | ||
1879 | } | ||
1538 | } | 1880 | } |
1539 | 1881 | ||
1540 | static int scrub_add_page_to_bio(struct scrub_ctx *sctx, | 1882 | static int scrub_add_page_to_rd_bio(struct scrub_ctx *sctx, |
1541 | struct scrub_page *spage) | 1883 | struct scrub_page *spage) |
1542 | { | 1884 | { |
1543 | struct scrub_block *sblock = spage->sblock; | 1885 | struct scrub_block *sblock = spage->sblock; |
1544 | struct scrub_bio *sbio; | 1886 | struct scrub_bio *sbio; |
@@ -1570,7 +1912,7 @@ again: | |||
1570 | sbio->dev = spage->dev; | 1912 | sbio->dev = spage->dev; |
1571 | bio = sbio->bio; | 1913 | bio = sbio->bio; |
1572 | if (!bio) { | 1914 | if (!bio) { |
1573 | bio = bio_alloc(GFP_NOFS, sctx->pages_per_bio); | 1915 | bio = bio_alloc(GFP_NOFS, sctx->pages_per_rd_bio); |
1574 | if (!bio) | 1916 | if (!bio) |
1575 | return -ENOMEM; | 1917 | return -ENOMEM; |
1576 | sbio->bio = bio; | 1918 | sbio->bio = bio; |
@@ -1602,10 +1944,10 @@ again: | |||
1602 | goto again; | 1944 | goto again; |
1603 | } | 1945 | } |
1604 | 1946 | ||
1605 | scrub_block_get(sblock); /* one for the added page */ | 1947 | scrub_block_get(sblock); /* one for the page added to the bio */ |
1606 | atomic_inc(&sblock->outstanding_pages); | 1948 | atomic_inc(&sblock->outstanding_pages); |
1607 | sbio->page_count++; | 1949 | sbio->page_count++; |
1608 | if (sbio->page_count == sctx->pages_per_bio) | 1950 | if (sbio->page_count == sctx->pages_per_rd_bio) |
1609 | scrub_submit(sctx); | 1951 | scrub_submit(sctx); |
1610 | 1952 | ||
1611 | return 0; | 1953 | return 0; |
@@ -1613,7 +1955,8 @@ again: | |||
1613 | 1955 | ||
1614 | static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len, | 1956 | static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len, |
1615 | u64 physical, struct btrfs_device *dev, u64 flags, | 1957 | u64 physical, struct btrfs_device *dev, u64 flags, |
1616 | u64 gen, int mirror_num, u8 *csum, int force) | 1958 | u64 gen, int mirror_num, u8 *csum, int force, |
1959 | u64 physical_for_dev_replace) | ||
1617 | { | 1960 | { |
1618 | struct scrub_block *sblock; | 1961 | struct scrub_block *sblock; |
1619 | int index; | 1962 | int index; |
@@ -1654,6 +1997,7 @@ leave_nomem: | |||
1654 | spage->generation = gen; | 1997 | spage->generation = gen; |
1655 | spage->logical = logical; | 1998 | spage->logical = logical; |
1656 | spage->physical = physical; | 1999 | spage->physical = physical; |
2000 | spage->physical_for_dev_replace = physical_for_dev_replace; | ||
1657 | spage->mirror_num = mirror_num; | 2001 | spage->mirror_num = mirror_num; |
1658 | if (csum) { | 2002 | if (csum) { |
1659 | spage->have_csum = 1; | 2003 | spage->have_csum = 1; |
@@ -1668,6 +2012,7 @@ leave_nomem: | |||
1668 | len -= l; | 2012 | len -= l; |
1669 | logical += l; | 2013 | logical += l; |
1670 | physical += l; | 2014 | physical += l; |
2015 | physical_for_dev_replace += l; | ||
1671 | } | 2016 | } |
1672 | 2017 | ||
1673 | WARN_ON(sblock->page_count == 0); | 2018 | WARN_ON(sblock->page_count == 0); |
@@ -1675,7 +2020,7 @@ leave_nomem: | |||
1675 | struct scrub_page *spage = sblock->pagev[index]; | 2020 | struct scrub_page *spage = sblock->pagev[index]; |
1676 | int ret; | 2021 | int ret; |
1677 | 2022 | ||
1678 | ret = scrub_add_page_to_bio(sctx, spage); | 2023 | ret = scrub_add_page_to_rd_bio(sctx, spage); |
1679 | if (ret) { | 2024 | if (ret) { |
1680 | scrub_block_put(sblock); | 2025 | scrub_block_put(sblock); |
1681 | return ret; | 2026 | return ret; |
@@ -1707,7 +2052,7 @@ static void scrub_bio_end_io_worker(struct btrfs_work *work) | |||
1707 | struct scrub_ctx *sctx = sbio->sctx; | 2052 | struct scrub_ctx *sctx = sbio->sctx; |
1708 | int i; | 2053 | int i; |
1709 | 2054 | ||
1710 | BUG_ON(sbio->page_count > SCRUB_PAGES_PER_BIO); | 2055 | BUG_ON(sbio->page_count > SCRUB_PAGES_PER_RD_BIO); |
1711 | if (sbio->err) { | 2056 | if (sbio->err) { |
1712 | for (i = 0; i < sbio->page_count; i++) { | 2057 | for (i = 0; i < sbio->page_count; i++) { |
1713 | struct scrub_page *spage = sbio->pagev[i]; | 2058 | struct scrub_page *spage = sbio->pagev[i]; |
@@ -1733,15 +2078,30 @@ static void scrub_bio_end_io_worker(struct btrfs_work *work) | |||
1733 | sbio->next_free = sctx->first_free; | 2078 | sbio->next_free = sctx->first_free; |
1734 | sctx->first_free = sbio->index; | 2079 | sctx->first_free = sbio->index; |
1735 | spin_unlock(&sctx->list_lock); | 2080 | spin_unlock(&sctx->list_lock); |
2081 | |||
2082 | if (sctx->is_dev_replace && | ||
2083 | atomic_read(&sctx->wr_ctx.flush_all_writes)) { | ||
2084 | mutex_lock(&sctx->wr_ctx.wr_lock); | ||
2085 | scrub_wr_submit(sctx); | ||
2086 | mutex_unlock(&sctx->wr_ctx.wr_lock); | ||
2087 | } | ||
2088 | |||
1736 | scrub_pending_bio_dec(sctx); | 2089 | scrub_pending_bio_dec(sctx); |
1737 | } | 2090 | } |
1738 | 2091 | ||
1739 | static void scrub_block_complete(struct scrub_block *sblock) | 2092 | static void scrub_block_complete(struct scrub_block *sblock) |
1740 | { | 2093 | { |
1741 | if (!sblock->no_io_error_seen) | 2094 | if (!sblock->no_io_error_seen) { |
1742 | scrub_handle_errored_block(sblock); | 2095 | scrub_handle_errored_block(sblock); |
1743 | else | 2096 | } else { |
1744 | scrub_checksum(sblock); | 2097 | /* |
2098 | * if has checksum error, write via repair mechanism in | ||
2099 | * dev replace case, otherwise write here in dev replace | ||
2100 | * case. | ||
2101 | */ | ||
2102 | if (!scrub_checksum(sblock) && sblock->sctx->is_dev_replace) | ||
2103 | scrub_write_block_to_dev_replace(sblock); | ||
2104 | } | ||
1745 | } | 2105 | } |
1746 | 2106 | ||
1747 | static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u64 len, | 2107 | static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u64 len, |
@@ -1786,7 +2146,7 @@ static int scrub_find_csum(struct scrub_ctx *sctx, u64 logical, u64 len, | |||
1786 | /* scrub extent tries to collect up to 64 kB for each bio */ | 2146 | /* scrub extent tries to collect up to 64 kB for each bio */ |
1787 | static int scrub_extent(struct scrub_ctx *sctx, u64 logical, u64 len, | 2147 | static int scrub_extent(struct scrub_ctx *sctx, u64 logical, u64 len, |
1788 | u64 physical, struct btrfs_device *dev, u64 flags, | 2148 | u64 physical, struct btrfs_device *dev, u64 flags, |
1789 | u64 gen, int mirror_num) | 2149 | u64 gen, int mirror_num, u64 physical_for_dev_replace) |
1790 | { | 2150 | { |
1791 | int ret; | 2151 | int ret; |
1792 | u8 csum[BTRFS_CSUM_SIZE]; | 2152 | u8 csum[BTRFS_CSUM_SIZE]; |
@@ -1799,7 +2159,7 @@ static int scrub_extent(struct scrub_ctx *sctx, u64 logical, u64 len, | |||
1799 | sctx->stat.data_bytes_scrubbed += len; | 2159 | sctx->stat.data_bytes_scrubbed += len; |
1800 | spin_unlock(&sctx->stat_lock); | 2160 | spin_unlock(&sctx->stat_lock); |
1801 | } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { | 2161 | } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { |
1802 | BUG_ON(sctx->nodesize != sctx->leafsize); | 2162 | WARN_ON(sctx->nodesize != sctx->leafsize); |
1803 | blocksize = sctx->nodesize; | 2163 | blocksize = sctx->nodesize; |
1804 | spin_lock(&sctx->stat_lock); | 2164 | spin_lock(&sctx->stat_lock); |
1805 | sctx->stat.tree_extents_scrubbed++; | 2165 | sctx->stat.tree_extents_scrubbed++; |
@@ -1807,7 +2167,7 @@ static int scrub_extent(struct scrub_ctx *sctx, u64 logical, u64 len, | |||
1807 | spin_unlock(&sctx->stat_lock); | 2167 | spin_unlock(&sctx->stat_lock); |
1808 | } else { | 2168 | } else { |
1809 | blocksize = sctx->sectorsize; | 2169 | blocksize = sctx->sectorsize; |
1810 | BUG_ON(1); | 2170 | WARN_ON(1); |
1811 | } | 2171 | } |
1812 | 2172 | ||
1813 | while (len) { | 2173 | while (len) { |
@@ -1819,14 +2179,23 @@ static int scrub_extent(struct scrub_ctx *sctx, u64 logical, u64 len, | |||
1819 | have_csum = scrub_find_csum(sctx, logical, l, csum); | 2179 | have_csum = scrub_find_csum(sctx, logical, l, csum); |
1820 | if (have_csum == 0) | 2180 | if (have_csum == 0) |
1821 | ++sctx->stat.no_csum; | 2181 | ++sctx->stat.no_csum; |
2182 | if (sctx->is_dev_replace && !have_csum) { | ||
2183 | ret = copy_nocow_pages(sctx, logical, l, | ||
2184 | mirror_num, | ||
2185 | physical_for_dev_replace); | ||
2186 | goto behind_scrub_pages; | ||
2187 | } | ||
1822 | } | 2188 | } |
1823 | ret = scrub_pages(sctx, logical, l, physical, dev, flags, gen, | 2189 | ret = scrub_pages(sctx, logical, l, physical, dev, flags, gen, |
1824 | mirror_num, have_csum ? csum : NULL, 0); | 2190 | mirror_num, have_csum ? csum : NULL, 0, |
2191 | physical_for_dev_replace); | ||
2192 | behind_scrub_pages: | ||
1825 | if (ret) | 2193 | if (ret) |
1826 | return ret; | 2194 | return ret; |
1827 | len -= l; | 2195 | len -= l; |
1828 | logical += l; | 2196 | logical += l; |
1829 | physical += l; | 2197 | physical += l; |
2198 | physical_for_dev_replace += l; | ||
1830 | } | 2199 | } |
1831 | return 0; | 2200 | return 0; |
1832 | } | 2201 | } |
@@ -1834,7 +2203,8 @@ static int scrub_extent(struct scrub_ctx *sctx, u64 logical, u64 len, | |||
1834 | static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, | 2203 | static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, |
1835 | struct map_lookup *map, | 2204 | struct map_lookup *map, |
1836 | struct btrfs_device *scrub_dev, | 2205 | struct btrfs_device *scrub_dev, |
1837 | int num, u64 base, u64 length) | 2206 | int num, u64 base, u64 length, |
2207 | int is_dev_replace) | ||
1838 | { | 2208 | { |
1839 | struct btrfs_path *path; | 2209 | struct btrfs_path *path; |
1840 | struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info; | 2210 | struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info; |
@@ -1859,6 +2229,11 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, | |||
1859 | struct btrfs_key key_end; | 2229 | struct btrfs_key key_end; |
1860 | u64 increment = map->stripe_len; | 2230 | u64 increment = map->stripe_len; |
1861 | u64 offset; | 2231 | u64 offset; |
2232 | u64 extent_logical; | ||
2233 | u64 extent_physical; | ||
2234 | u64 extent_len; | ||
2235 | struct btrfs_device *extent_dev; | ||
2236 | int extent_mirror_num; | ||
1862 | 2237 | ||
1863 | nstripes = length; | 2238 | nstripes = length; |
1864 | offset = 0; | 2239 | offset = 0; |
@@ -1966,9 +2341,14 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, | |||
1966 | */ | 2341 | */ |
1967 | if (atomic_read(&fs_info->scrub_pause_req)) { | 2342 | if (atomic_read(&fs_info->scrub_pause_req)) { |
1968 | /* push queued extents */ | 2343 | /* push queued extents */ |
2344 | atomic_set(&sctx->wr_ctx.flush_all_writes, 1); | ||
1969 | scrub_submit(sctx); | 2345 | scrub_submit(sctx); |
2346 | mutex_lock(&sctx->wr_ctx.wr_lock); | ||
2347 | scrub_wr_submit(sctx); | ||
2348 | mutex_unlock(&sctx->wr_ctx.wr_lock); | ||
1970 | wait_event(sctx->list_wait, | 2349 | wait_event(sctx->list_wait, |
1971 | atomic_read(&sctx->bios_in_flight) == 0); | 2350 | atomic_read(&sctx->bios_in_flight) == 0); |
2351 | atomic_set(&sctx->wr_ctx.flush_all_writes, 0); | ||
1972 | atomic_inc(&fs_info->scrubs_paused); | 2352 | atomic_inc(&fs_info->scrubs_paused); |
1973 | wake_up(&fs_info->scrub_pause_wait); | 2353 | wake_up(&fs_info->scrub_pause_wait); |
1974 | mutex_lock(&fs_info->scrub_lock); | 2354 | mutex_lock(&fs_info->scrub_lock); |
@@ -2063,10 +2443,20 @@ static noinline_for_stack int scrub_stripe(struct scrub_ctx *sctx, | |||
2063 | key.objectid; | 2443 | key.objectid; |
2064 | } | 2444 | } |
2065 | 2445 | ||
2066 | ret = scrub_extent(sctx, key.objectid, key.offset, | 2446 | extent_logical = key.objectid; |
2067 | key.objectid - logical + physical, | 2447 | extent_physical = key.objectid - logical + physical; |
2068 | scrub_dev, flags, generation, | 2448 | extent_len = key.offset; |
2069 | mirror_num); | 2449 | extent_dev = scrub_dev; |
2450 | extent_mirror_num = mirror_num; | ||
2451 | if (is_dev_replace) | ||
2452 | scrub_remap_extent(fs_info, extent_logical, | ||
2453 | extent_len, &extent_physical, | ||
2454 | &extent_dev, | ||
2455 | &extent_mirror_num); | ||
2456 | ret = scrub_extent(sctx, extent_logical, extent_len, | ||
2457 | extent_physical, extent_dev, flags, | ||
2458 | generation, extent_mirror_num, | ||
2459 | key.objectid - logical + physical); | ||
2070 | if (ret) | 2460 | if (ret) |
2071 | goto out; | 2461 | goto out; |
2072 | 2462 | ||
@@ -2080,10 +2470,13 @@ next: | |||
2080 | sctx->stat.last_physical = physical; | 2470 | sctx->stat.last_physical = physical; |
2081 | spin_unlock(&sctx->stat_lock); | 2471 | spin_unlock(&sctx->stat_lock); |
2082 | } | 2472 | } |
2473 | out: | ||
2083 | /* push queued extents */ | 2474 | /* push queued extents */ |
2084 | scrub_submit(sctx); | 2475 | scrub_submit(sctx); |
2476 | mutex_lock(&sctx->wr_ctx.wr_lock); | ||
2477 | scrub_wr_submit(sctx); | ||
2478 | mutex_unlock(&sctx->wr_ctx.wr_lock); | ||
2085 | 2479 | ||
2086 | out: | ||
2087 | blk_finish_plug(&plug); | 2480 | blk_finish_plug(&plug); |
2088 | btrfs_free_path(path); | 2481 | btrfs_free_path(path); |
2089 | return ret < 0 ? ret : 0; | 2482 | return ret < 0 ? ret : 0; |
@@ -2093,14 +2486,14 @@ static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx, | |||
2093 | struct btrfs_device *scrub_dev, | 2486 | struct btrfs_device *scrub_dev, |
2094 | u64 chunk_tree, u64 chunk_objectid, | 2487 | u64 chunk_tree, u64 chunk_objectid, |
2095 | u64 chunk_offset, u64 length, | 2488 | u64 chunk_offset, u64 length, |
2096 | u64 dev_offset) | 2489 | u64 dev_offset, int is_dev_replace) |
2097 | { | 2490 | { |
2098 | struct btrfs_mapping_tree *map_tree = | 2491 | struct btrfs_mapping_tree *map_tree = |
2099 | &sctx->dev_root->fs_info->mapping_tree; | 2492 | &sctx->dev_root->fs_info->mapping_tree; |
2100 | struct map_lookup *map; | 2493 | struct map_lookup *map; |
2101 | struct extent_map *em; | 2494 | struct extent_map *em; |
2102 | int i; | 2495 | int i; |
2103 | int ret = -EINVAL; | 2496 | int ret = 0; |
2104 | 2497 | ||
2105 | read_lock(&map_tree->map_tree.lock); | 2498 | read_lock(&map_tree->map_tree.lock); |
2106 | em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1); | 2499 | em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1); |
@@ -2120,7 +2513,8 @@ static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx, | |||
2120 | if (map->stripes[i].dev->bdev == scrub_dev->bdev && | 2513 | if (map->stripes[i].dev->bdev == scrub_dev->bdev && |
2121 | map->stripes[i].physical == dev_offset) { | 2514 | map->stripes[i].physical == dev_offset) { |
2122 | ret = scrub_stripe(sctx, map, scrub_dev, i, | 2515 | ret = scrub_stripe(sctx, map, scrub_dev, i, |
2123 | chunk_offset, length); | 2516 | chunk_offset, length, |
2517 | is_dev_replace); | ||
2124 | if (ret) | 2518 | if (ret) |
2125 | goto out; | 2519 | goto out; |
2126 | } | 2520 | } |
@@ -2133,7 +2527,8 @@ out: | |||
2133 | 2527 | ||
2134 | static noinline_for_stack | 2528 | static noinline_for_stack |
2135 | int scrub_enumerate_chunks(struct scrub_ctx *sctx, | 2529 | int scrub_enumerate_chunks(struct scrub_ctx *sctx, |
2136 | struct btrfs_device *scrub_dev, u64 start, u64 end) | 2530 | struct btrfs_device *scrub_dev, u64 start, u64 end, |
2531 | int is_dev_replace) | ||
2137 | { | 2532 | { |
2138 | struct btrfs_dev_extent *dev_extent = NULL; | 2533 | struct btrfs_dev_extent *dev_extent = NULL; |
2139 | struct btrfs_path *path; | 2534 | struct btrfs_path *path; |
@@ -2149,6 +2544,7 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, | |||
2149 | struct btrfs_key key; | 2544 | struct btrfs_key key; |
2150 | struct btrfs_key found_key; | 2545 | struct btrfs_key found_key; |
2151 | struct btrfs_block_group_cache *cache; | 2546 | struct btrfs_block_group_cache *cache; |
2547 | struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; | ||
2152 | 2548 | ||
2153 | path = btrfs_alloc_path(); | 2549 | path = btrfs_alloc_path(); |
2154 | if (!path) | 2550 | if (!path) |
@@ -2214,11 +2610,61 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, | |||
2214 | ret = -ENOENT; | 2610 | ret = -ENOENT; |
2215 | break; | 2611 | break; |
2216 | } | 2612 | } |
2613 | dev_replace->cursor_right = found_key.offset + length; | ||
2614 | dev_replace->cursor_left = found_key.offset; | ||
2615 | dev_replace->item_needs_writeback = 1; | ||
2217 | ret = scrub_chunk(sctx, scrub_dev, chunk_tree, chunk_objectid, | 2616 | ret = scrub_chunk(sctx, scrub_dev, chunk_tree, chunk_objectid, |
2218 | chunk_offset, length, found_key.offset); | 2617 | chunk_offset, length, found_key.offset, |
2618 | is_dev_replace); | ||
2619 | |||
2620 | /* | ||
2621 | * flush, submit all pending read and write bios, afterwards | ||
2622 | * wait for them. | ||
2623 | * Note that in the dev replace case, a read request causes | ||
2624 | * write requests that are submitted in the read completion | ||
2625 | * worker. Therefore in the current situation, it is required | ||
2626 | * that all write requests are flushed, so that all read and | ||
2627 | * write requests are really completed when bios_in_flight | ||
2628 | * changes to 0. | ||
2629 | */ | ||
2630 | atomic_set(&sctx->wr_ctx.flush_all_writes, 1); | ||
2631 | scrub_submit(sctx); | ||
2632 | mutex_lock(&sctx->wr_ctx.wr_lock); | ||
2633 | scrub_wr_submit(sctx); | ||
2634 | mutex_unlock(&sctx->wr_ctx.wr_lock); | ||
2635 | |||
2636 | wait_event(sctx->list_wait, | ||
2637 | atomic_read(&sctx->bios_in_flight) == 0); | ||
2638 | atomic_set(&sctx->wr_ctx.flush_all_writes, 0); | ||
2639 | atomic_inc(&fs_info->scrubs_paused); | ||
2640 | wake_up(&fs_info->scrub_pause_wait); | ||
2641 | wait_event(sctx->list_wait, | ||
2642 | atomic_read(&sctx->workers_pending) == 0); | ||
2643 | |||
2644 | mutex_lock(&fs_info->scrub_lock); | ||
2645 | while (atomic_read(&fs_info->scrub_pause_req)) { | ||
2646 | mutex_unlock(&fs_info->scrub_lock); | ||
2647 | wait_event(fs_info->scrub_pause_wait, | ||
2648 | atomic_read(&fs_info->scrub_pause_req) == 0); | ||
2649 | mutex_lock(&fs_info->scrub_lock); | ||
2650 | } | ||
2651 | atomic_dec(&fs_info->scrubs_paused); | ||
2652 | mutex_unlock(&fs_info->scrub_lock); | ||
2653 | wake_up(&fs_info->scrub_pause_wait); | ||
2654 | |||
2655 | dev_replace->cursor_left = dev_replace->cursor_right; | ||
2656 | dev_replace->item_needs_writeback = 1; | ||
2219 | btrfs_put_block_group(cache); | 2657 | btrfs_put_block_group(cache); |
2220 | if (ret) | 2658 | if (ret) |
2221 | break; | 2659 | break; |
2660 | if (atomic64_read(&dev_replace->num_write_errors) > 0) { | ||
2661 | ret = -EIO; | ||
2662 | break; | ||
2663 | } | ||
2664 | if (sctx->stat.malloc_errors > 0) { | ||
2665 | ret = -ENOMEM; | ||
2666 | break; | ||
2667 | } | ||
2222 | 2668 | ||
2223 | key.offset = found_key.offset + length; | 2669 | key.offset = found_key.offset + length; |
2224 | btrfs_release_path(path); | 2670 | btrfs_release_path(path); |
@@ -2254,7 +2700,7 @@ static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx, | |||
2254 | 2700 | ||
2255 | ret = scrub_pages(sctx, bytenr, BTRFS_SUPER_INFO_SIZE, bytenr, | 2701 | ret = scrub_pages(sctx, bytenr, BTRFS_SUPER_INFO_SIZE, bytenr, |
2256 | scrub_dev, BTRFS_EXTENT_FLAG_SUPER, gen, i, | 2702 | scrub_dev, BTRFS_EXTENT_FLAG_SUPER, gen, i, |
2257 | NULL, 1); | 2703 | NULL, 1, bytenr); |
2258 | if (ret) | 2704 | if (ret) |
2259 | return ret; | 2705 | return ret; |
2260 | } | 2706 | } |
@@ -2266,18 +2712,38 @@ static noinline_for_stack int scrub_supers(struct scrub_ctx *sctx, | |||
2266 | /* | 2712 | /* |
2267 | * get a reference count on fs_info->scrub_workers. start worker if necessary | 2713 | * get a reference count on fs_info->scrub_workers. start worker if necessary |
2268 | */ | 2714 | */ |
2269 | static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info) | 2715 | static noinline_for_stack int scrub_workers_get(struct btrfs_fs_info *fs_info, |
2716 | int is_dev_replace) | ||
2270 | { | 2717 | { |
2271 | int ret = 0; | 2718 | int ret = 0; |
2272 | 2719 | ||
2273 | mutex_lock(&fs_info->scrub_lock); | 2720 | mutex_lock(&fs_info->scrub_lock); |
2274 | if (fs_info->scrub_workers_refcnt == 0) { | 2721 | if (fs_info->scrub_workers_refcnt == 0) { |
2275 | btrfs_init_workers(&fs_info->scrub_workers, "scrub", | 2722 | if (is_dev_replace) |
2276 | fs_info->thread_pool_size, &fs_info->generic_worker); | 2723 | btrfs_init_workers(&fs_info->scrub_workers, "scrub", 1, |
2724 | &fs_info->generic_worker); | ||
2725 | else | ||
2726 | btrfs_init_workers(&fs_info->scrub_workers, "scrub", | ||
2727 | fs_info->thread_pool_size, | ||
2728 | &fs_info->generic_worker); | ||
2277 | fs_info->scrub_workers.idle_thresh = 4; | 2729 | fs_info->scrub_workers.idle_thresh = 4; |
2278 | ret = btrfs_start_workers(&fs_info->scrub_workers); | 2730 | ret = btrfs_start_workers(&fs_info->scrub_workers); |
2279 | if (ret) | 2731 | if (ret) |
2280 | goto out; | 2732 | goto out; |
2733 | btrfs_init_workers(&fs_info->scrub_wr_completion_workers, | ||
2734 | "scrubwrc", | ||
2735 | fs_info->thread_pool_size, | ||
2736 | &fs_info->generic_worker); | ||
2737 | fs_info->scrub_wr_completion_workers.idle_thresh = 2; | ||
2738 | ret = btrfs_start_workers( | ||
2739 | &fs_info->scrub_wr_completion_workers); | ||
2740 | if (ret) | ||
2741 | goto out; | ||
2742 | btrfs_init_workers(&fs_info->scrub_nocow_workers, "scrubnc", 1, | ||
2743 | &fs_info->generic_worker); | ||
2744 | ret = btrfs_start_workers(&fs_info->scrub_nocow_workers); | ||
2745 | if (ret) | ||
2746 | goto out; | ||
2281 | } | 2747 | } |
2282 | ++fs_info->scrub_workers_refcnt; | 2748 | ++fs_info->scrub_workers_refcnt; |
2283 | out: | 2749 | out: |
@@ -2289,8 +2755,11 @@ out: | |||
2289 | static noinline_for_stack void scrub_workers_put(struct btrfs_fs_info *fs_info) | 2755 | static noinline_for_stack void scrub_workers_put(struct btrfs_fs_info *fs_info) |
2290 | { | 2756 | { |
2291 | mutex_lock(&fs_info->scrub_lock); | 2757 | mutex_lock(&fs_info->scrub_lock); |
2292 | if (--fs_info->scrub_workers_refcnt == 0) | 2758 | if (--fs_info->scrub_workers_refcnt == 0) { |
2293 | btrfs_stop_workers(&fs_info->scrub_workers); | 2759 | btrfs_stop_workers(&fs_info->scrub_workers); |
2760 | btrfs_stop_workers(&fs_info->scrub_wr_completion_workers); | ||
2761 | btrfs_stop_workers(&fs_info->scrub_nocow_workers); | ||
2762 | } | ||
2294 | WARN_ON(fs_info->scrub_workers_refcnt < 0); | 2763 | WARN_ON(fs_info->scrub_workers_refcnt < 0); |
2295 | mutex_unlock(&fs_info->scrub_lock); | 2764 | mutex_unlock(&fs_info->scrub_lock); |
2296 | } | 2765 | } |
@@ -2354,7 +2823,7 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, | |||
2354 | return -EINVAL; | 2823 | return -EINVAL; |
2355 | } | 2824 | } |
2356 | 2825 | ||
2357 | ret = scrub_workers_get(fs_info); | 2826 | ret = scrub_workers_get(fs_info, is_dev_replace); |
2358 | if (ret) | 2827 | if (ret) |
2359 | return ret; | 2828 | return ret; |
2360 | 2829 | ||
@@ -2394,12 +2863,15 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start, | |||
2394 | mutex_unlock(&fs_info->scrub_lock); | 2863 | mutex_unlock(&fs_info->scrub_lock); |
2395 | mutex_unlock(&fs_info->fs_devices->device_list_mutex); | 2864 | mutex_unlock(&fs_info->fs_devices->device_list_mutex); |
2396 | 2865 | ||
2397 | down_read(&fs_info->scrub_super_lock); | 2866 | if (!is_dev_replace) { |
2398 | ret = scrub_supers(sctx, dev); | 2867 | down_read(&fs_info->scrub_super_lock); |
2399 | up_read(&fs_info->scrub_super_lock); | 2868 | ret = scrub_supers(sctx, dev); |
2869 | up_read(&fs_info->scrub_super_lock); | ||
2870 | } | ||
2400 | 2871 | ||
2401 | if (!ret) | 2872 | if (!ret) |
2402 | ret = scrub_enumerate_chunks(sctx, dev, start, end); | 2873 | ret = scrub_enumerate_chunks(sctx, dev, start, end, |
2874 | is_dev_replace); | ||
2403 | 2875 | ||
2404 | wait_event(sctx->list_wait, atomic_read(&sctx->bios_in_flight) == 0); | 2876 | wait_event(sctx->list_wait, atomic_read(&sctx->bios_in_flight) == 0); |
2405 | atomic_dec(&fs_info->scrubs_running); | 2877 | atomic_dec(&fs_info->scrubs_running); |
@@ -2537,3 +3009,272 @@ int btrfs_scrub_progress(struct btrfs_root *root, u64 devid, | |||
2537 | 3009 | ||
2538 | return dev ? (sctx ? 0 : -ENOTCONN) : -ENODEV; | 3010 | return dev ? (sctx ? 0 : -ENOTCONN) : -ENODEV; |
2539 | } | 3011 | } |
3012 | |||
3013 | static void scrub_remap_extent(struct btrfs_fs_info *fs_info, | ||
3014 | u64 extent_logical, u64 extent_len, | ||
3015 | u64 *extent_physical, | ||
3016 | struct btrfs_device **extent_dev, | ||
3017 | int *extent_mirror_num) | ||
3018 | { | ||
3019 | u64 mapped_length; | ||
3020 | struct btrfs_bio *bbio = NULL; | ||
3021 | int ret; | ||
3022 | |||
3023 | mapped_length = extent_len; | ||
3024 | ret = btrfs_map_block(fs_info, READ, extent_logical, | ||
3025 | &mapped_length, &bbio, 0); | ||
3026 | if (ret || !bbio || mapped_length < extent_len || | ||
3027 | !bbio->stripes[0].dev->bdev) { | ||
3028 | kfree(bbio); | ||
3029 | return; | ||
3030 | } | ||
3031 | |||
3032 | *extent_physical = bbio->stripes[0].physical; | ||
3033 | *extent_mirror_num = bbio->mirror_num; | ||
3034 | *extent_dev = bbio->stripes[0].dev; | ||
3035 | kfree(bbio); | ||
3036 | } | ||
3037 | |||
3038 | static int scrub_setup_wr_ctx(struct scrub_ctx *sctx, | ||
3039 | struct scrub_wr_ctx *wr_ctx, | ||
3040 | struct btrfs_fs_info *fs_info, | ||
3041 | struct btrfs_device *dev, | ||
3042 | int is_dev_replace) | ||
3043 | { | ||
3044 | WARN_ON(wr_ctx->wr_curr_bio != NULL); | ||
3045 | |||
3046 | mutex_init(&wr_ctx->wr_lock); | ||
3047 | wr_ctx->wr_curr_bio = NULL; | ||
3048 | if (!is_dev_replace) | ||
3049 | return 0; | ||
3050 | |||
3051 | WARN_ON(!dev->bdev); | ||
3052 | wr_ctx->pages_per_wr_bio = min_t(int, SCRUB_PAGES_PER_WR_BIO, | ||
3053 | bio_get_nr_vecs(dev->bdev)); | ||
3054 | wr_ctx->tgtdev = dev; | ||
3055 | atomic_set(&wr_ctx->flush_all_writes, 0); | ||
3056 | return 0; | ||
3057 | } | ||
3058 | |||
3059 | static void scrub_free_wr_ctx(struct scrub_wr_ctx *wr_ctx) | ||
3060 | { | ||
3061 | mutex_lock(&wr_ctx->wr_lock); | ||
3062 | kfree(wr_ctx->wr_curr_bio); | ||
3063 | wr_ctx->wr_curr_bio = NULL; | ||
3064 | mutex_unlock(&wr_ctx->wr_lock); | ||
3065 | } | ||
3066 | |||
3067 | static int copy_nocow_pages(struct scrub_ctx *sctx, u64 logical, u64 len, | ||
3068 | int mirror_num, u64 physical_for_dev_replace) | ||
3069 | { | ||
3070 | struct scrub_copy_nocow_ctx *nocow_ctx; | ||
3071 | struct btrfs_fs_info *fs_info = sctx->dev_root->fs_info; | ||
3072 | |||
3073 | nocow_ctx = kzalloc(sizeof(*nocow_ctx), GFP_NOFS); | ||
3074 | if (!nocow_ctx) { | ||
3075 | spin_lock(&sctx->stat_lock); | ||
3076 | sctx->stat.malloc_errors++; | ||
3077 | spin_unlock(&sctx->stat_lock); | ||
3078 | return -ENOMEM; | ||
3079 | } | ||
3080 | |||
3081 | scrub_pending_trans_workers_inc(sctx); | ||
3082 | |||
3083 | nocow_ctx->sctx = sctx; | ||
3084 | nocow_ctx->logical = logical; | ||
3085 | nocow_ctx->len = len; | ||
3086 | nocow_ctx->mirror_num = mirror_num; | ||
3087 | nocow_ctx->physical_for_dev_replace = physical_for_dev_replace; | ||
3088 | nocow_ctx->work.func = copy_nocow_pages_worker; | ||
3089 | btrfs_queue_worker(&fs_info->scrub_nocow_workers, | ||
3090 | &nocow_ctx->work); | ||
3091 | |||
3092 | return 0; | ||
3093 | } | ||
3094 | |||
3095 | static void copy_nocow_pages_worker(struct btrfs_work *work) | ||
3096 | { | ||
3097 | struct scrub_copy_nocow_ctx *nocow_ctx = | ||
3098 | container_of(work, struct scrub_copy_nocow_ctx, work); | ||
3099 | struct scrub_ctx *sctx = nocow_ctx->sctx; | ||
3100 | u64 logical = nocow_ctx->logical; | ||
3101 | u64 len = nocow_ctx->len; | ||
3102 | int mirror_num = nocow_ctx->mirror_num; | ||
3103 | u64 physical_for_dev_replace = nocow_ctx->physical_for_dev_replace; | ||
3104 | int ret; | ||
3105 | struct btrfs_trans_handle *trans = NULL; | ||
3106 | struct btrfs_fs_info *fs_info; | ||
3107 | struct btrfs_path *path; | ||
3108 | struct btrfs_root *root; | ||
3109 | int not_written = 0; | ||
3110 | |||
3111 | fs_info = sctx->dev_root->fs_info; | ||
3112 | root = fs_info->extent_root; | ||
3113 | |||
3114 | path = btrfs_alloc_path(); | ||
3115 | if (!path) { | ||
3116 | spin_lock(&sctx->stat_lock); | ||
3117 | sctx->stat.malloc_errors++; | ||
3118 | spin_unlock(&sctx->stat_lock); | ||
3119 | not_written = 1; | ||
3120 | goto out; | ||
3121 | } | ||
3122 | |||
3123 | trans = btrfs_join_transaction(root); | ||
3124 | if (IS_ERR(trans)) { | ||
3125 | not_written = 1; | ||
3126 | goto out; | ||
3127 | } | ||
3128 | |||
3129 | ret = iterate_inodes_from_logical(logical, fs_info, path, | ||
3130 | copy_nocow_pages_for_inode, | ||
3131 | nocow_ctx); | ||
3132 | if (ret != 0 && ret != -ENOENT) { | ||
3133 | pr_warn("iterate_inodes_from_logical() failed: log %llu, phys %llu, len %llu, mir %llu, ret %d\n", | ||
3134 | (unsigned long long)logical, | ||
3135 | (unsigned long long)physical_for_dev_replace, | ||
3136 | (unsigned long long)len, | ||
3137 | (unsigned long long)mirror_num, ret); | ||
3138 | not_written = 1; | ||
3139 | goto out; | ||
3140 | } | ||
3141 | |||
3142 | out: | ||
3143 | if (trans && !IS_ERR(trans)) | ||
3144 | btrfs_end_transaction(trans, root); | ||
3145 | if (not_written) | ||
3146 | btrfs_dev_replace_stats_inc(&fs_info->dev_replace. | ||
3147 | num_uncorrectable_read_errors); | ||
3148 | |||
3149 | btrfs_free_path(path); | ||
3150 | kfree(nocow_ctx); | ||
3151 | |||
3152 | scrub_pending_trans_workers_dec(sctx); | ||
3153 | } | ||
3154 | |||
3155 | static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root, void *ctx) | ||
3156 | { | ||
3157 | unsigned long index; | ||
3158 | struct scrub_copy_nocow_ctx *nocow_ctx = ctx; | ||
3159 | int ret = 0; | ||
3160 | struct btrfs_key key; | ||
3161 | struct inode *inode = NULL; | ||
3162 | struct btrfs_root *local_root; | ||
3163 | u64 physical_for_dev_replace; | ||
3164 | u64 len; | ||
3165 | struct btrfs_fs_info *fs_info = nocow_ctx->sctx->dev_root->fs_info; | ||
3166 | |||
3167 | key.objectid = root; | ||
3168 | key.type = BTRFS_ROOT_ITEM_KEY; | ||
3169 | key.offset = (u64)-1; | ||
3170 | local_root = btrfs_read_fs_root_no_name(fs_info, &key); | ||
3171 | if (IS_ERR(local_root)) | ||
3172 | return PTR_ERR(local_root); | ||
3173 | |||
3174 | key.type = BTRFS_INODE_ITEM_KEY; | ||
3175 | key.objectid = inum; | ||
3176 | key.offset = 0; | ||
3177 | inode = btrfs_iget(fs_info->sb, &key, local_root, NULL); | ||
3178 | if (IS_ERR(inode)) | ||
3179 | return PTR_ERR(inode); | ||
3180 | |||
3181 | physical_for_dev_replace = nocow_ctx->physical_for_dev_replace; | ||
3182 | len = nocow_ctx->len; | ||
3183 | while (len >= PAGE_CACHE_SIZE) { | ||
3184 | struct page *page = NULL; | ||
3185 | int ret_sub; | ||
3186 | |||
3187 | index = offset >> PAGE_CACHE_SHIFT; | ||
3188 | |||
3189 | page = find_or_create_page(inode->i_mapping, index, GFP_NOFS); | ||
3190 | if (!page) { | ||
3191 | pr_err("find_or_create_page() failed\n"); | ||
3192 | ret = -ENOMEM; | ||
3193 | goto next_page; | ||
3194 | } | ||
3195 | |||
3196 | if (PageUptodate(page)) { | ||
3197 | if (PageDirty(page)) | ||
3198 | goto next_page; | ||
3199 | } else { | ||
3200 | ClearPageError(page); | ||
3201 | ret_sub = extent_read_full_page(&BTRFS_I(inode)-> | ||
3202 | io_tree, | ||
3203 | page, btrfs_get_extent, | ||
3204 | nocow_ctx->mirror_num); | ||
3205 | if (ret_sub) { | ||
3206 | ret = ret_sub; | ||
3207 | goto next_page; | ||
3208 | } | ||
3209 | wait_on_page_locked(page); | ||
3210 | if (!PageUptodate(page)) { | ||
3211 | ret = -EIO; | ||
3212 | goto next_page; | ||
3213 | } | ||
3214 | } | ||
3215 | ret_sub = write_page_nocow(nocow_ctx->sctx, | ||
3216 | physical_for_dev_replace, page); | ||
3217 | if (ret_sub) { | ||
3218 | ret = ret_sub; | ||
3219 | goto next_page; | ||
3220 | } | ||
3221 | |||
3222 | next_page: | ||
3223 | if (page) { | ||
3224 | unlock_page(page); | ||
3225 | put_page(page); | ||
3226 | } | ||
3227 | offset += PAGE_CACHE_SIZE; | ||
3228 | physical_for_dev_replace += PAGE_CACHE_SIZE; | ||
3229 | len -= PAGE_CACHE_SIZE; | ||
3230 | } | ||
3231 | |||
3232 | if (inode) | ||
3233 | iput(inode); | ||
3234 | return ret; | ||
3235 | } | ||
3236 | |||
3237 | static int write_page_nocow(struct scrub_ctx *sctx, | ||
3238 | u64 physical_for_dev_replace, struct page *page) | ||
3239 | { | ||
3240 | struct bio *bio; | ||
3241 | struct btrfs_device *dev; | ||
3242 | int ret; | ||
3243 | DECLARE_COMPLETION_ONSTACK(compl); | ||
3244 | |||
3245 | dev = sctx->wr_ctx.tgtdev; | ||
3246 | if (!dev) | ||
3247 | return -EIO; | ||
3248 | if (!dev->bdev) { | ||
3249 | printk_ratelimited(KERN_WARNING | ||
3250 | "btrfs: scrub write_page_nocow(bdev == NULL) is unexpected!\n"); | ||
3251 | return -EIO; | ||
3252 | } | ||
3253 | bio = bio_alloc(GFP_NOFS, 1); | ||
3254 | if (!bio) { | ||
3255 | spin_lock(&sctx->stat_lock); | ||
3256 | sctx->stat.malloc_errors++; | ||
3257 | spin_unlock(&sctx->stat_lock); | ||
3258 | return -ENOMEM; | ||
3259 | } | ||
3260 | bio->bi_private = &compl; | ||
3261 | bio->bi_end_io = scrub_complete_bio_end_io; | ||
3262 | bio->bi_size = 0; | ||
3263 | bio->bi_sector = physical_for_dev_replace >> 9; | ||
3264 | bio->bi_bdev = dev->bdev; | ||
3265 | ret = bio_add_page(bio, page, PAGE_CACHE_SIZE, 0); | ||
3266 | if (ret != PAGE_CACHE_SIZE) { | ||
3267 | leave_with_eio: | ||
3268 | bio_put(bio); | ||
3269 | btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS); | ||
3270 | return -EIO; | ||
3271 | } | ||
3272 | btrfsic_submit_bio(WRITE_SYNC, bio); | ||
3273 | wait_for_completion(&compl); | ||
3274 | |||
3275 | if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) | ||
3276 | goto leave_with_eio; | ||
3277 | |||
3278 | bio_put(bio); | ||
3279 | return 0; | ||
3280 | } | ||