diff options
Diffstat (limited to 'fs/btrfs/scrub.c')
-rw-r--r-- | fs/btrfs/scrub.c | 1492 |
1 files changed, 1492 insertions, 0 deletions
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c new file mode 100644 index 000000000000..70f9fa772ee9 --- /dev/null +++ b/fs/btrfs/scrub.c | |||
@@ -0,0 +1,1492 @@ | |||
1 | /* | ||
2 | * Copyright (C) 2011 STRATO. All rights reserved. | ||
3 | * | ||
4 | * This program is free software; you can redistribute it and/or | ||
5 | * modify it under the terms of the GNU General Public | ||
6 | * License v2 as published by the Free Software Foundation. | ||
7 | * | ||
8 | * This program is distributed in the hope that it will be useful, | ||
9 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
10 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
11 | * General Public License for more details. | ||
12 | * | ||
13 | * You should have received a copy of the GNU General Public | ||
14 | * License along with this program; if not, write to the | ||
15 | * Free Software Foundation, Inc., 59 Temple Place - Suite 330, | ||
16 | * Boston, MA 021110-1307, USA. | ||
17 | */ | ||
18 | |||
19 | #include <linux/sched.h> | ||
20 | #include <linux/pagemap.h> | ||
21 | #include <linux/writeback.h> | ||
22 | #include <linux/blkdev.h> | ||
23 | #include <linux/rbtree.h> | ||
24 | #include <linux/slab.h> | ||
25 | #include <linux/workqueue.h> | ||
26 | #include "ctree.h" | ||
27 | #include "volumes.h" | ||
28 | #include "disk-io.h" | ||
29 | #include "ordered-data.h" | ||
30 | |||
31 | /* | ||
32 | * This is only the first step towards a full-features scrub. It reads all | ||
33 | * extent and super block and verifies the checksums. In case a bad checksum | ||
34 | * is found or the extent cannot be read, good data will be written back if | ||
35 | * any can be found. | ||
36 | * | ||
37 | * Future enhancements: | ||
38 | * - To enhance the performance, better read-ahead strategies for the | ||
39 | * extent-tree can be employed. | ||
40 | * - In case an unrepairable extent is encountered, track which files are | ||
41 | * affected and report them | ||
42 | * - In case of a read error on files with nodatasum, map the file and read | ||
43 | * the extent to trigger a writeback of the good copy | ||
44 | * - track and record media errors, throw out bad devices | ||
45 | * - add a readonly mode | ||
46 | * - add a mode to also read unallocated space | ||
47 | * - make the prefetch cancellable | ||
48 | */ | ||
49 | |||
50 | struct scrub_bio; | ||
51 | struct scrub_page; | ||
52 | struct scrub_dev; | ||
53 | struct scrub_fixup; | ||
54 | static void scrub_bio_end_io(struct bio *bio, int err); | ||
55 | static void scrub_checksum(struct btrfs_work *work); | ||
56 | static int scrub_checksum_data(struct scrub_dev *sdev, | ||
57 | struct scrub_page *spag, void *buffer); | ||
58 | static int scrub_checksum_tree_block(struct scrub_dev *sdev, | ||
59 | struct scrub_page *spag, u64 logical, | ||
60 | void *buffer); | ||
61 | static int scrub_checksum_super(struct scrub_bio *sbio, void *buffer); | ||
62 | static void scrub_recheck_end_io(struct bio *bio, int err); | ||
63 | static void scrub_fixup_worker(struct btrfs_work *work); | ||
64 | static void scrub_fixup(struct scrub_fixup *fixup); | ||
65 | |||
66 | #define SCRUB_PAGES_PER_BIO 16 /* 64k per bio */ | ||
67 | #define SCRUB_BIOS_PER_DEV 16 /* 1 MB per device in flight */ | ||
68 | |||
69 | struct scrub_page { | ||
70 | u64 flags; /* extent flags */ | ||
71 | u64 generation; | ||
72 | u64 mirror_num; | ||
73 | int have_csum; | ||
74 | u8 csum[BTRFS_CSUM_SIZE]; | ||
75 | }; | ||
76 | |||
77 | struct scrub_bio { | ||
78 | int index; | ||
79 | struct scrub_dev *sdev; | ||
80 | struct bio *bio; | ||
81 | int err; | ||
82 | u64 logical; | ||
83 | u64 physical; | ||
84 | struct scrub_page spag[SCRUB_PAGES_PER_BIO]; | ||
85 | u64 count; | ||
86 | int next_free; | ||
87 | struct btrfs_work work; | ||
88 | }; | ||
89 | |||
90 | struct scrub_dev { | ||
91 | struct scrub_bio *bios[SCRUB_BIOS_PER_DEV]; | ||
92 | struct btrfs_device *dev; | ||
93 | int first_free; | ||
94 | int curr; | ||
95 | atomic_t in_flight; | ||
96 | spinlock_t list_lock; | ||
97 | wait_queue_head_t list_wait; | ||
98 | u16 csum_size; | ||
99 | struct list_head csum_list; | ||
100 | atomic_t cancel_req; | ||
101 | /* | ||
102 | * statistics | ||
103 | */ | ||
104 | struct btrfs_scrub_progress stat; | ||
105 | spinlock_t stat_lock; | ||
106 | }; | ||
107 | |||
108 | struct scrub_fixup { | ||
109 | struct scrub_dev *sdev; | ||
110 | struct bio *bio; | ||
111 | u64 logical; | ||
112 | u64 physical; | ||
113 | struct scrub_page spag; | ||
114 | struct btrfs_work work; | ||
115 | int err; | ||
116 | int recheck; | ||
117 | }; | ||
118 | |||
119 | static void scrub_free_csums(struct scrub_dev *sdev) | ||
120 | { | ||
121 | while (!list_empty(&sdev->csum_list)) { | ||
122 | struct btrfs_ordered_sum *sum; | ||
123 | sum = list_first_entry(&sdev->csum_list, | ||
124 | struct btrfs_ordered_sum, list); | ||
125 | list_del(&sum->list); | ||
126 | kfree(sum); | ||
127 | } | ||
128 | } | ||
129 | |||
130 | static noinline_for_stack void scrub_free_dev(struct scrub_dev *sdev) | ||
131 | { | ||
132 | int i; | ||
133 | int j; | ||
134 | struct page *last_page; | ||
135 | |||
136 | if (!sdev) | ||
137 | return; | ||
138 | |||
139 | for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) { | ||
140 | struct scrub_bio *sbio = sdev->bios[i]; | ||
141 | struct bio *bio; | ||
142 | |||
143 | if (!sbio) | ||
144 | break; | ||
145 | |||
146 | bio = sbio->bio; | ||
147 | if (bio) { | ||
148 | last_page = NULL; | ||
149 | for (j = 0; j < bio->bi_vcnt; ++j) { | ||
150 | if (bio->bi_io_vec[j].bv_page == last_page) | ||
151 | continue; | ||
152 | last_page = bio->bi_io_vec[j].bv_page; | ||
153 | __free_page(last_page); | ||
154 | } | ||
155 | bio_put(bio); | ||
156 | } | ||
157 | kfree(sbio); | ||
158 | } | ||
159 | |||
160 | scrub_free_csums(sdev); | ||
161 | kfree(sdev); | ||
162 | } | ||
163 | |||
164 | static noinline_for_stack | ||
165 | struct scrub_dev *scrub_setup_dev(struct btrfs_device *dev) | ||
166 | { | ||
167 | struct scrub_dev *sdev; | ||
168 | int i; | ||
169 | int j; | ||
170 | int ret; | ||
171 | struct btrfs_fs_info *fs_info = dev->dev_root->fs_info; | ||
172 | |||
173 | sdev = kzalloc(sizeof(*sdev), GFP_NOFS); | ||
174 | if (!sdev) | ||
175 | goto nomem; | ||
176 | sdev->dev = dev; | ||
177 | for (i = 0; i < SCRUB_BIOS_PER_DEV; ++i) { | ||
178 | struct bio *bio; | ||
179 | struct scrub_bio *sbio; | ||
180 | |||
181 | sbio = kzalloc(sizeof(*sbio), GFP_NOFS); | ||
182 | if (!sbio) | ||
183 | goto nomem; | ||
184 | sdev->bios[i] = sbio; | ||
185 | |||
186 | bio = bio_kmalloc(GFP_NOFS, SCRUB_PAGES_PER_BIO); | ||
187 | if (!bio) | ||
188 | goto nomem; | ||
189 | |||
190 | sbio->index = i; | ||
191 | sbio->sdev = sdev; | ||
192 | sbio->bio = bio; | ||
193 | sbio->count = 0; | ||
194 | sbio->work.func = scrub_checksum; | ||
195 | bio->bi_private = sdev->bios[i]; | ||
196 | bio->bi_end_io = scrub_bio_end_io; | ||
197 | bio->bi_sector = 0; | ||
198 | bio->bi_bdev = dev->bdev; | ||
199 | bio->bi_size = 0; | ||
200 | |||
201 | for (j = 0; j < SCRUB_PAGES_PER_BIO; ++j) { | ||
202 | struct page *page; | ||
203 | page = alloc_page(GFP_NOFS); | ||
204 | if (!page) | ||
205 | goto nomem; | ||
206 | |||
207 | ret = bio_add_page(bio, page, PAGE_SIZE, 0); | ||
208 | if (!ret) | ||
209 | goto nomem; | ||
210 | } | ||
211 | WARN_ON(bio->bi_vcnt != SCRUB_PAGES_PER_BIO); | ||
212 | |||
213 | if (i != SCRUB_BIOS_PER_DEV-1) | ||
214 | sdev->bios[i]->next_free = i + 1; | ||
215 | else | ||
216 | sdev->bios[i]->next_free = -1; | ||
217 | } | ||
218 | sdev->first_free = 0; | ||
219 | sdev->curr = -1; | ||
220 | atomic_set(&sdev->in_flight, 0); | ||
221 | atomic_set(&sdev->cancel_req, 0); | ||
222 | sdev->csum_size = btrfs_super_csum_size(&fs_info->super_copy); | ||
223 | INIT_LIST_HEAD(&sdev->csum_list); | ||
224 | |||
225 | spin_lock_init(&sdev->list_lock); | ||
226 | spin_lock_init(&sdev->stat_lock); | ||
227 | init_waitqueue_head(&sdev->list_wait); | ||
228 | return sdev; | ||
229 | |||
230 | nomem: | ||
231 | scrub_free_dev(sdev); | ||
232 | return ERR_PTR(-ENOMEM); | ||
233 | } | ||
234 | |||
235 | /* | ||
236 | * scrub_recheck_error gets called when either verification of the page | ||
237 | * failed or the bio failed to read, e.g. with EIO. In the latter case, | ||
238 | * recheck_error gets called for every page in the bio, even though only | ||
239 | * one may be bad | ||
240 | */ | ||
241 | static void scrub_recheck_error(struct scrub_bio *sbio, int ix) | ||
242 | { | ||
243 | struct scrub_dev *sdev = sbio->sdev; | ||
244 | struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info; | ||
245 | struct bio *bio = NULL; | ||
246 | struct page *page = NULL; | ||
247 | struct scrub_fixup *fixup = NULL; | ||
248 | int ret; | ||
249 | |||
250 | /* | ||
251 | * while we're in here we do not want the transaction to commit. | ||
252 | * To prevent it, we increment scrubs_running. scrub_pause will | ||
253 | * have to wait until we're finished | ||
254 | * we can safely increment scrubs_running here, because we're | ||
255 | * in the context of the original bio which is still marked in_flight | ||
256 | */ | ||
257 | atomic_inc(&fs_info->scrubs_running); | ||
258 | |||
259 | fixup = kzalloc(sizeof(*fixup), GFP_NOFS); | ||
260 | if (!fixup) | ||
261 | goto malloc_error; | ||
262 | |||
263 | fixup->logical = sbio->logical + ix * PAGE_SIZE; | ||
264 | fixup->physical = sbio->physical + ix * PAGE_SIZE; | ||
265 | fixup->spag = sbio->spag[ix]; | ||
266 | fixup->sdev = sdev; | ||
267 | |||
268 | bio = bio_alloc(GFP_NOFS, 1); | ||
269 | if (!bio) | ||
270 | goto malloc_error; | ||
271 | bio->bi_private = fixup; | ||
272 | bio->bi_size = 0; | ||
273 | bio->bi_bdev = sdev->dev->bdev; | ||
274 | fixup->bio = bio; | ||
275 | fixup->recheck = 0; | ||
276 | |||
277 | page = alloc_page(GFP_NOFS); | ||
278 | if (!page) | ||
279 | goto malloc_error; | ||
280 | |||
281 | ret = bio_add_page(bio, page, PAGE_SIZE, 0); | ||
282 | if (!ret) | ||
283 | goto malloc_error; | ||
284 | |||
285 | if (!sbio->err) { | ||
286 | /* | ||
287 | * shorter path: just a checksum error, go ahead and correct it | ||
288 | */ | ||
289 | scrub_fixup_worker(&fixup->work); | ||
290 | return; | ||
291 | } | ||
292 | |||
293 | /* | ||
294 | * an I/O-error occured for one of the blocks in the bio, not | ||
295 | * necessarily for this one, so first try to read it separately | ||
296 | */ | ||
297 | fixup->work.func = scrub_fixup_worker; | ||
298 | fixup->recheck = 1; | ||
299 | bio->bi_end_io = scrub_recheck_end_io; | ||
300 | bio->bi_sector = fixup->physical >> 9; | ||
301 | bio->bi_bdev = sdev->dev->bdev; | ||
302 | submit_bio(0, bio); | ||
303 | |||
304 | return; | ||
305 | |||
306 | malloc_error: | ||
307 | if (bio) | ||
308 | bio_put(bio); | ||
309 | if (page) | ||
310 | __free_page(page); | ||
311 | kfree(fixup); | ||
312 | spin_lock(&sdev->stat_lock); | ||
313 | ++sdev->stat.malloc_errors; | ||
314 | spin_unlock(&sdev->stat_lock); | ||
315 | atomic_dec(&fs_info->scrubs_running); | ||
316 | wake_up(&fs_info->scrub_pause_wait); | ||
317 | } | ||
318 | |||
319 | static void scrub_recheck_end_io(struct bio *bio, int err) | ||
320 | { | ||
321 | struct scrub_fixup *fixup = bio->bi_private; | ||
322 | struct btrfs_fs_info *fs_info = fixup->sdev->dev->dev_root->fs_info; | ||
323 | |||
324 | fixup->err = err; | ||
325 | btrfs_queue_worker(&fs_info->scrub_workers, &fixup->work); | ||
326 | } | ||
327 | |||
328 | static int scrub_fixup_check(struct scrub_fixup *fixup) | ||
329 | { | ||
330 | int ret = 1; | ||
331 | struct page *page; | ||
332 | void *buffer; | ||
333 | u64 flags = fixup->spag.flags; | ||
334 | |||
335 | page = fixup->bio->bi_io_vec[0].bv_page; | ||
336 | buffer = kmap_atomic(page, KM_USER0); | ||
337 | if (flags & BTRFS_EXTENT_FLAG_DATA) { | ||
338 | ret = scrub_checksum_data(fixup->sdev, | ||
339 | &fixup->spag, buffer); | ||
340 | } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { | ||
341 | ret = scrub_checksum_tree_block(fixup->sdev, | ||
342 | &fixup->spag, | ||
343 | fixup->logical, | ||
344 | buffer); | ||
345 | } else { | ||
346 | WARN_ON(1); | ||
347 | } | ||
348 | kunmap_atomic(buffer, KM_USER0); | ||
349 | |||
350 | return ret; | ||
351 | } | ||
352 | |||
353 | static void scrub_fixup_worker(struct btrfs_work *work) | ||
354 | { | ||
355 | struct scrub_fixup *fixup; | ||
356 | struct btrfs_fs_info *fs_info; | ||
357 | u64 flags; | ||
358 | int ret = 1; | ||
359 | |||
360 | fixup = container_of(work, struct scrub_fixup, work); | ||
361 | fs_info = fixup->sdev->dev->dev_root->fs_info; | ||
362 | flags = fixup->spag.flags; | ||
363 | |||
364 | if (fixup->recheck && fixup->err == 0) | ||
365 | ret = scrub_fixup_check(fixup); | ||
366 | |||
367 | if (ret || fixup->err) | ||
368 | scrub_fixup(fixup); | ||
369 | |||
370 | __free_page(fixup->bio->bi_io_vec[0].bv_page); | ||
371 | bio_put(fixup->bio); | ||
372 | |||
373 | atomic_dec(&fs_info->scrubs_running); | ||
374 | wake_up(&fs_info->scrub_pause_wait); | ||
375 | |||
376 | kfree(fixup); | ||
377 | } | ||
378 | |||
379 | static void scrub_fixup_end_io(struct bio *bio, int err) | ||
380 | { | ||
381 | complete((struct completion *)bio->bi_private); | ||
382 | } | ||
383 | |||
384 | static void scrub_fixup(struct scrub_fixup *fixup) | ||
385 | { | ||
386 | struct scrub_dev *sdev = fixup->sdev; | ||
387 | struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info; | ||
388 | struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; | ||
389 | struct btrfs_multi_bio *multi = NULL; | ||
390 | struct bio *bio = fixup->bio; | ||
391 | u64 length; | ||
392 | int i; | ||
393 | int ret; | ||
394 | DECLARE_COMPLETION_ONSTACK(complete); | ||
395 | |||
396 | if ((fixup->spag.flags & BTRFS_EXTENT_FLAG_DATA) && | ||
397 | (fixup->spag.have_csum == 0)) { | ||
398 | /* | ||
399 | * nodatasum, don't try to fix anything | ||
400 | * FIXME: we can do better, open the inode and trigger a | ||
401 | * writeback | ||
402 | */ | ||
403 | goto uncorrectable; | ||
404 | } | ||
405 | |||
406 | length = PAGE_SIZE; | ||
407 | ret = btrfs_map_block(map_tree, REQ_WRITE, fixup->logical, &length, | ||
408 | &multi, 0); | ||
409 | if (ret || !multi || length < PAGE_SIZE) { | ||
410 | printk(KERN_ERR | ||
411 | "scrub_fixup: btrfs_map_block failed us for %llu\n", | ||
412 | (unsigned long long)fixup->logical); | ||
413 | WARN_ON(1); | ||
414 | return; | ||
415 | } | ||
416 | |||
417 | if (multi->num_stripes == 1) { | ||
418 | /* there aren't any replicas */ | ||
419 | goto uncorrectable; | ||
420 | } | ||
421 | |||
422 | /* | ||
423 | * first find a good copy | ||
424 | */ | ||
425 | for (i = 0; i < multi->num_stripes; ++i) { | ||
426 | if (i == fixup->spag.mirror_num) | ||
427 | continue; | ||
428 | |||
429 | bio->bi_sector = multi->stripes[i].physical >> 9; | ||
430 | bio->bi_bdev = multi->stripes[i].dev->bdev; | ||
431 | bio->bi_size = PAGE_SIZE; | ||
432 | bio->bi_next = NULL; | ||
433 | bio->bi_flags |= 1 << BIO_UPTODATE; | ||
434 | bio->bi_comp_cpu = -1; | ||
435 | bio->bi_end_io = scrub_fixup_end_io; | ||
436 | bio->bi_private = &complete; | ||
437 | |||
438 | submit_bio(0, bio); | ||
439 | |||
440 | wait_for_completion(&complete); | ||
441 | |||
442 | if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) | ||
443 | /* I/O-error, this is not a good copy */ | ||
444 | continue; | ||
445 | |||
446 | ret = scrub_fixup_check(fixup); | ||
447 | if (ret == 0) | ||
448 | break; | ||
449 | } | ||
450 | if (i == multi->num_stripes) | ||
451 | goto uncorrectable; | ||
452 | |||
453 | /* | ||
454 | * the bio now contains good data, write it back | ||
455 | */ | ||
456 | bio->bi_sector = fixup->physical >> 9; | ||
457 | bio->bi_bdev = sdev->dev->bdev; | ||
458 | bio->bi_size = PAGE_SIZE; | ||
459 | bio->bi_next = NULL; | ||
460 | bio->bi_flags |= 1 << BIO_UPTODATE; | ||
461 | bio->bi_comp_cpu = -1; | ||
462 | bio->bi_end_io = scrub_fixup_end_io; | ||
463 | bio->bi_private = &complete; | ||
464 | |||
465 | submit_bio(REQ_WRITE, bio); | ||
466 | |||
467 | wait_for_completion(&complete); | ||
468 | |||
469 | if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) | ||
470 | /* I/O-error, writeback failed, give up */ | ||
471 | goto uncorrectable; | ||
472 | |||
473 | kfree(multi); | ||
474 | spin_lock(&sdev->stat_lock); | ||
475 | ++sdev->stat.corrected_errors; | ||
476 | spin_unlock(&sdev->stat_lock); | ||
477 | |||
478 | if (printk_ratelimit()) | ||
479 | printk(KERN_ERR "btrfs: fixed up at %llu\n", | ||
480 | (unsigned long long)fixup->logical); | ||
481 | return; | ||
482 | |||
483 | uncorrectable: | ||
484 | kfree(multi); | ||
485 | spin_lock(&sdev->stat_lock); | ||
486 | ++sdev->stat.uncorrectable_errors; | ||
487 | spin_unlock(&sdev->stat_lock); | ||
488 | |||
489 | if (printk_ratelimit()) | ||
490 | printk(KERN_ERR "btrfs: unable to fixup at %llu\n", | ||
491 | (unsigned long long)fixup->logical); | ||
492 | } | ||
493 | |||
494 | static void scrub_bio_end_io(struct bio *bio, int err) | ||
495 | { | ||
496 | struct scrub_bio *sbio = bio->bi_private; | ||
497 | struct scrub_dev *sdev = sbio->sdev; | ||
498 | struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info; | ||
499 | |||
500 | sbio->err = err; | ||
501 | |||
502 | btrfs_queue_worker(&fs_info->scrub_workers, &sbio->work); | ||
503 | } | ||
504 | |||
505 | static void scrub_checksum(struct btrfs_work *work) | ||
506 | { | ||
507 | struct scrub_bio *sbio = container_of(work, struct scrub_bio, work); | ||
508 | struct scrub_dev *sdev = sbio->sdev; | ||
509 | struct page *page; | ||
510 | void *buffer; | ||
511 | int i; | ||
512 | u64 flags; | ||
513 | u64 logical; | ||
514 | int ret; | ||
515 | |||
516 | if (sbio->err) { | ||
517 | struct bio *bio; | ||
518 | struct bio *old_bio; | ||
519 | |||
520 | for (i = 0; i < sbio->count; ++i) | ||
521 | scrub_recheck_error(sbio, i); | ||
522 | spin_lock(&sdev->stat_lock); | ||
523 | ++sdev->stat.read_errors; | ||
524 | spin_unlock(&sdev->stat_lock); | ||
525 | |||
526 | /* | ||
527 | * FIXME: allocate a new bio after a media error. I haven't | ||
528 | * figured out how to reuse this one | ||
529 | */ | ||
530 | old_bio = sbio->bio; | ||
531 | bio = bio_kmalloc(GFP_NOFS, SCRUB_PAGES_PER_BIO); | ||
532 | if (!bio) { | ||
533 | /* | ||
534 | * alloc failed. cancel the scrub and don't requeue | ||
535 | * this sbio | ||
536 | */ | ||
537 | printk(KERN_ERR "btrfs scrub: allocation failure, " | ||
538 | "cancelling scrub\n"); | ||
539 | atomic_inc(&sdev->dev->dev_root->fs_info-> | ||
540 | scrub_cancel_req); | ||
541 | goto out_no_enqueue; | ||
542 | } | ||
543 | sbio->bio = bio; | ||
544 | bio->bi_private = sbio; | ||
545 | bio->bi_end_io = scrub_bio_end_io; | ||
546 | bio->bi_sector = 0; | ||
547 | bio->bi_bdev = sbio->sdev->dev->bdev; | ||
548 | bio->bi_size = 0; | ||
549 | for (i = 0; i < SCRUB_PAGES_PER_BIO; ++i) { | ||
550 | struct page *page; | ||
551 | page = old_bio->bi_io_vec[i].bv_page; | ||
552 | bio_add_page(bio, page, PAGE_SIZE, 0); | ||
553 | } | ||
554 | bio_put(old_bio); | ||
555 | goto out; | ||
556 | } | ||
557 | for (i = 0; i < sbio->count; ++i) { | ||
558 | page = sbio->bio->bi_io_vec[i].bv_page; | ||
559 | buffer = kmap_atomic(page, KM_USER0); | ||
560 | flags = sbio->spag[i].flags; | ||
561 | logical = sbio->logical + i * PAGE_SIZE; | ||
562 | ret = 0; | ||
563 | if (flags & BTRFS_EXTENT_FLAG_DATA) { | ||
564 | ret = scrub_checksum_data(sdev, sbio->spag + i, buffer); | ||
565 | } else if (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK) { | ||
566 | ret = scrub_checksum_tree_block(sdev, sbio->spag + i, | ||
567 | logical, buffer); | ||
568 | } else if (flags & BTRFS_EXTENT_FLAG_SUPER) { | ||
569 | BUG_ON(i); | ||
570 | (void)scrub_checksum_super(sbio, buffer); | ||
571 | } else { | ||
572 | WARN_ON(1); | ||
573 | } | ||
574 | kunmap_atomic(buffer, KM_USER0); | ||
575 | if (ret) | ||
576 | scrub_recheck_error(sbio, i); | ||
577 | } | ||
578 | |||
579 | out: | ||
580 | spin_lock(&sdev->list_lock); | ||
581 | sbio->next_free = sdev->first_free; | ||
582 | sdev->first_free = sbio->index; | ||
583 | spin_unlock(&sdev->list_lock); | ||
584 | out_no_enqueue: | ||
585 | atomic_dec(&sdev->in_flight); | ||
586 | wake_up(&sdev->list_wait); | ||
587 | } | ||
588 | |||
589 | static int scrub_checksum_data(struct scrub_dev *sdev, | ||
590 | struct scrub_page *spag, void *buffer) | ||
591 | { | ||
592 | u8 csum[BTRFS_CSUM_SIZE]; | ||
593 | u32 crc = ~(u32)0; | ||
594 | int fail = 0; | ||
595 | struct btrfs_root *root = sdev->dev->dev_root; | ||
596 | |||
597 | if (!spag->have_csum) | ||
598 | return 0; | ||
599 | |||
600 | crc = btrfs_csum_data(root, buffer, crc, PAGE_SIZE); | ||
601 | btrfs_csum_final(crc, csum); | ||
602 | if (memcmp(csum, spag->csum, sdev->csum_size)) | ||
603 | fail = 1; | ||
604 | |||
605 | spin_lock(&sdev->stat_lock); | ||
606 | ++sdev->stat.data_extents_scrubbed; | ||
607 | sdev->stat.data_bytes_scrubbed += PAGE_SIZE; | ||
608 | if (fail) | ||
609 | ++sdev->stat.csum_errors; | ||
610 | spin_unlock(&sdev->stat_lock); | ||
611 | |||
612 | return fail; | ||
613 | } | ||
614 | |||
615 | static int scrub_checksum_tree_block(struct scrub_dev *sdev, | ||
616 | struct scrub_page *spag, u64 logical, | ||
617 | void *buffer) | ||
618 | { | ||
619 | struct btrfs_header *h; | ||
620 | struct btrfs_root *root = sdev->dev->dev_root; | ||
621 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
622 | u8 csum[BTRFS_CSUM_SIZE]; | ||
623 | u32 crc = ~(u32)0; | ||
624 | int fail = 0; | ||
625 | int crc_fail = 0; | ||
626 | |||
627 | /* | ||
628 | * we don't use the getter functions here, as we | ||
629 | * a) don't have an extent buffer and | ||
630 | * b) the page is already kmapped | ||
631 | */ | ||
632 | h = (struct btrfs_header *)buffer; | ||
633 | |||
634 | if (logical != le64_to_cpu(h->bytenr)) | ||
635 | ++fail; | ||
636 | |||
637 | if (spag->generation != le64_to_cpu(h->generation)) | ||
638 | ++fail; | ||
639 | |||
640 | if (memcmp(h->fsid, fs_info->fsid, BTRFS_UUID_SIZE)) | ||
641 | ++fail; | ||
642 | |||
643 | if (memcmp(h->chunk_tree_uuid, fs_info->chunk_tree_uuid, | ||
644 | BTRFS_UUID_SIZE)) | ||
645 | ++fail; | ||
646 | |||
647 | crc = btrfs_csum_data(root, buffer + BTRFS_CSUM_SIZE, crc, | ||
648 | PAGE_SIZE - BTRFS_CSUM_SIZE); | ||
649 | btrfs_csum_final(crc, csum); | ||
650 | if (memcmp(csum, h->csum, sdev->csum_size)) | ||
651 | ++crc_fail; | ||
652 | |||
653 | spin_lock(&sdev->stat_lock); | ||
654 | ++sdev->stat.tree_extents_scrubbed; | ||
655 | sdev->stat.tree_bytes_scrubbed += PAGE_SIZE; | ||
656 | if (crc_fail) | ||
657 | ++sdev->stat.csum_errors; | ||
658 | if (fail) | ||
659 | ++sdev->stat.verify_errors; | ||
660 | spin_unlock(&sdev->stat_lock); | ||
661 | |||
662 | return fail || crc_fail; | ||
663 | } | ||
664 | |||
665 | static int scrub_checksum_super(struct scrub_bio *sbio, void *buffer) | ||
666 | { | ||
667 | struct btrfs_super_block *s; | ||
668 | u64 logical; | ||
669 | struct scrub_dev *sdev = sbio->sdev; | ||
670 | struct btrfs_root *root = sdev->dev->dev_root; | ||
671 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
672 | u8 csum[BTRFS_CSUM_SIZE]; | ||
673 | u32 crc = ~(u32)0; | ||
674 | int fail = 0; | ||
675 | |||
676 | s = (struct btrfs_super_block *)buffer; | ||
677 | logical = sbio->logical; | ||
678 | |||
679 | if (logical != le64_to_cpu(s->bytenr)) | ||
680 | ++fail; | ||
681 | |||
682 | if (sbio->spag[0].generation != le64_to_cpu(s->generation)) | ||
683 | ++fail; | ||
684 | |||
685 | if (memcmp(s->fsid, fs_info->fsid, BTRFS_UUID_SIZE)) | ||
686 | ++fail; | ||
687 | |||
688 | crc = btrfs_csum_data(root, buffer + BTRFS_CSUM_SIZE, crc, | ||
689 | PAGE_SIZE - BTRFS_CSUM_SIZE); | ||
690 | btrfs_csum_final(crc, csum); | ||
691 | if (memcmp(csum, s->csum, sbio->sdev->csum_size)) | ||
692 | ++fail; | ||
693 | |||
694 | if (fail) { | ||
695 | /* | ||
696 | * if we find an error in a super block, we just report it. | ||
697 | * They will get written with the next transaction commit | ||
698 | * anyway | ||
699 | */ | ||
700 | spin_lock(&sdev->stat_lock); | ||
701 | ++sdev->stat.super_errors; | ||
702 | spin_unlock(&sdev->stat_lock); | ||
703 | } | ||
704 | |||
705 | return fail; | ||
706 | } | ||
707 | |||
708 | static int scrub_submit(struct scrub_dev *sdev) | ||
709 | { | ||
710 | struct scrub_bio *sbio; | ||
711 | |||
712 | if (sdev->curr == -1) | ||
713 | return 0; | ||
714 | |||
715 | sbio = sdev->bios[sdev->curr]; | ||
716 | |||
717 | sbio->bio->bi_sector = sbio->physical >> 9; | ||
718 | sbio->bio->bi_size = sbio->count * PAGE_SIZE; | ||
719 | sbio->bio->bi_next = NULL; | ||
720 | sbio->bio->bi_flags |= 1 << BIO_UPTODATE; | ||
721 | sbio->bio->bi_comp_cpu = -1; | ||
722 | sbio->bio->bi_bdev = sdev->dev->bdev; | ||
723 | sbio->err = 0; | ||
724 | sdev->curr = -1; | ||
725 | atomic_inc(&sdev->in_flight); | ||
726 | |||
727 | submit_bio(0, sbio->bio); | ||
728 | |||
729 | return 0; | ||
730 | } | ||
731 | |||
732 | static int scrub_page(struct scrub_dev *sdev, u64 logical, u64 len, | ||
733 | u64 physical, u64 flags, u64 gen, u64 mirror_num, | ||
734 | u8 *csum, int force) | ||
735 | { | ||
736 | struct scrub_bio *sbio; | ||
737 | |||
738 | again: | ||
739 | /* | ||
740 | * grab a fresh bio or wait for one to become available | ||
741 | */ | ||
742 | while (sdev->curr == -1) { | ||
743 | spin_lock(&sdev->list_lock); | ||
744 | sdev->curr = sdev->first_free; | ||
745 | if (sdev->curr != -1) { | ||
746 | sdev->first_free = sdev->bios[sdev->curr]->next_free; | ||
747 | sdev->bios[sdev->curr]->next_free = -1; | ||
748 | sdev->bios[sdev->curr]->count = 0; | ||
749 | spin_unlock(&sdev->list_lock); | ||
750 | } else { | ||
751 | spin_unlock(&sdev->list_lock); | ||
752 | wait_event(sdev->list_wait, sdev->first_free != -1); | ||
753 | } | ||
754 | } | ||
755 | sbio = sdev->bios[sdev->curr]; | ||
756 | if (sbio->count == 0) { | ||
757 | sbio->physical = physical; | ||
758 | sbio->logical = logical; | ||
759 | } else if (sbio->physical + sbio->count * PAGE_SIZE != physical) { | ||
760 | scrub_submit(sdev); | ||
761 | goto again; | ||
762 | } | ||
763 | sbio->spag[sbio->count].flags = flags; | ||
764 | sbio->spag[sbio->count].generation = gen; | ||
765 | sbio->spag[sbio->count].have_csum = 0; | ||
766 | sbio->spag[sbio->count].mirror_num = mirror_num; | ||
767 | if (csum) { | ||
768 | sbio->spag[sbio->count].have_csum = 1; | ||
769 | memcpy(sbio->spag[sbio->count].csum, csum, sdev->csum_size); | ||
770 | } | ||
771 | ++sbio->count; | ||
772 | if (sbio->count == SCRUB_PAGES_PER_BIO || force) | ||
773 | scrub_submit(sdev); | ||
774 | |||
775 | return 0; | ||
776 | } | ||
777 | |||
778 | static int scrub_find_csum(struct scrub_dev *sdev, u64 logical, u64 len, | ||
779 | u8 *csum) | ||
780 | { | ||
781 | struct btrfs_ordered_sum *sum = NULL; | ||
782 | int ret = 0; | ||
783 | unsigned long i; | ||
784 | unsigned long num_sectors; | ||
785 | u32 sectorsize = sdev->dev->dev_root->sectorsize; | ||
786 | |||
787 | while (!list_empty(&sdev->csum_list)) { | ||
788 | sum = list_first_entry(&sdev->csum_list, | ||
789 | struct btrfs_ordered_sum, list); | ||
790 | if (sum->bytenr > logical) | ||
791 | return 0; | ||
792 | if (sum->bytenr + sum->len > logical) | ||
793 | break; | ||
794 | |||
795 | ++sdev->stat.csum_discards; | ||
796 | list_del(&sum->list); | ||
797 | kfree(sum); | ||
798 | sum = NULL; | ||
799 | } | ||
800 | if (!sum) | ||
801 | return 0; | ||
802 | |||
803 | num_sectors = sum->len / sectorsize; | ||
804 | for (i = 0; i < num_sectors; ++i) { | ||
805 | if (sum->sums[i].bytenr == logical) { | ||
806 | memcpy(csum, &sum->sums[i].sum, sdev->csum_size); | ||
807 | ret = 1; | ||
808 | break; | ||
809 | } | ||
810 | } | ||
811 | if (ret && i == num_sectors - 1) { | ||
812 | list_del(&sum->list); | ||
813 | kfree(sum); | ||
814 | } | ||
815 | return ret; | ||
816 | } | ||
817 | |||
818 | /* scrub extent tries to collect up to 64 kB for each bio */ | ||
819 | static int scrub_extent(struct scrub_dev *sdev, u64 logical, u64 len, | ||
820 | u64 physical, u64 flags, u64 gen, u64 mirror_num) | ||
821 | { | ||
822 | int ret; | ||
823 | u8 csum[BTRFS_CSUM_SIZE]; | ||
824 | |||
825 | while (len) { | ||
826 | u64 l = min_t(u64, len, PAGE_SIZE); | ||
827 | int have_csum = 0; | ||
828 | |||
829 | if (flags & BTRFS_EXTENT_FLAG_DATA) { | ||
830 | /* push csums to sbio */ | ||
831 | have_csum = scrub_find_csum(sdev, logical, l, csum); | ||
832 | if (have_csum == 0) | ||
833 | ++sdev->stat.no_csum; | ||
834 | } | ||
835 | ret = scrub_page(sdev, logical, l, physical, flags, gen, | ||
836 | mirror_num, have_csum ? csum : NULL, 0); | ||
837 | if (ret) | ||
838 | return ret; | ||
839 | len -= l; | ||
840 | logical += l; | ||
841 | physical += l; | ||
842 | } | ||
843 | return 0; | ||
844 | } | ||
845 | |||
846 | static noinline_for_stack int scrub_stripe(struct scrub_dev *sdev, | ||
847 | struct map_lookup *map, int num, u64 base, u64 length) | ||
848 | { | ||
849 | struct btrfs_path *path; | ||
850 | struct btrfs_fs_info *fs_info = sdev->dev->dev_root->fs_info; | ||
851 | struct btrfs_root *root = fs_info->extent_root; | ||
852 | struct btrfs_root *csum_root = fs_info->csum_root; | ||
853 | struct btrfs_extent_item *extent; | ||
854 | u64 flags; | ||
855 | int ret; | ||
856 | int slot; | ||
857 | int i; | ||
858 | u64 nstripes; | ||
859 | int start_stripe; | ||
860 | struct extent_buffer *l; | ||
861 | struct btrfs_key key; | ||
862 | u64 physical; | ||
863 | u64 logical; | ||
864 | u64 generation; | ||
865 | u64 mirror_num; | ||
866 | |||
867 | u64 increment = map->stripe_len; | ||
868 | u64 offset; | ||
869 | |||
870 | nstripes = length; | ||
871 | offset = 0; | ||
872 | do_div(nstripes, map->stripe_len); | ||
873 | if (map->type & BTRFS_BLOCK_GROUP_RAID0) { | ||
874 | offset = map->stripe_len * num; | ||
875 | increment = map->stripe_len * map->num_stripes; | ||
876 | mirror_num = 0; | ||
877 | } else if (map->type & BTRFS_BLOCK_GROUP_RAID10) { | ||
878 | int factor = map->num_stripes / map->sub_stripes; | ||
879 | offset = map->stripe_len * (num / map->sub_stripes); | ||
880 | increment = map->stripe_len * factor; | ||
881 | mirror_num = num % map->sub_stripes; | ||
882 | } else if (map->type & BTRFS_BLOCK_GROUP_RAID1) { | ||
883 | increment = map->stripe_len; | ||
884 | mirror_num = num % map->num_stripes; | ||
885 | } else if (map->type & BTRFS_BLOCK_GROUP_DUP) { | ||
886 | increment = map->stripe_len; | ||
887 | mirror_num = num % map->num_stripes; | ||
888 | } else { | ||
889 | increment = map->stripe_len; | ||
890 | mirror_num = 0; | ||
891 | } | ||
892 | |||
893 | path = btrfs_alloc_path(); | ||
894 | if (!path) | ||
895 | return -ENOMEM; | ||
896 | |||
897 | path->reada = 2; | ||
898 | path->search_commit_root = 1; | ||
899 | path->skip_locking = 1; | ||
900 | |||
901 | /* | ||
902 | * find all extents for each stripe and just read them to get | ||
903 | * them into the page cache | ||
904 | * FIXME: we can do better. build a more intelligent prefetching | ||
905 | */ | ||
906 | logical = base + offset; | ||
907 | physical = map->stripes[num].physical; | ||
908 | ret = 0; | ||
909 | for (i = 0; i < nstripes; ++i) { | ||
910 | key.objectid = logical; | ||
911 | key.type = BTRFS_EXTENT_ITEM_KEY; | ||
912 | key.offset = (u64)0; | ||
913 | |||
914 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
915 | if (ret < 0) | ||
916 | goto out; | ||
917 | |||
918 | l = path->nodes[0]; | ||
919 | slot = path->slots[0]; | ||
920 | btrfs_item_key_to_cpu(l, &key, slot); | ||
921 | if (key.objectid != logical) { | ||
922 | ret = btrfs_previous_item(root, path, 0, | ||
923 | BTRFS_EXTENT_ITEM_KEY); | ||
924 | if (ret < 0) | ||
925 | goto out; | ||
926 | } | ||
927 | |||
928 | while (1) { | ||
929 | l = path->nodes[0]; | ||
930 | slot = path->slots[0]; | ||
931 | if (slot >= btrfs_header_nritems(l)) { | ||
932 | ret = btrfs_next_leaf(root, path); | ||
933 | if (ret == 0) | ||
934 | continue; | ||
935 | if (ret < 0) | ||
936 | goto out; | ||
937 | |||
938 | break; | ||
939 | } | ||
940 | btrfs_item_key_to_cpu(l, &key, slot); | ||
941 | |||
942 | if (key.objectid >= logical + map->stripe_len) | ||
943 | break; | ||
944 | |||
945 | path->slots[0]++; | ||
946 | } | ||
947 | btrfs_release_path(root, path); | ||
948 | logical += increment; | ||
949 | physical += map->stripe_len; | ||
950 | cond_resched(); | ||
951 | } | ||
952 | |||
953 | /* | ||
954 | * collect all data csums for the stripe to avoid seeking during | ||
955 | * the scrub. This might currently (crc32) end up to be about 1MB | ||
956 | */ | ||
957 | start_stripe = 0; | ||
958 | again: | ||
959 | logical = base + offset + start_stripe * increment; | ||
960 | for (i = start_stripe; i < nstripes; ++i) { | ||
961 | ret = btrfs_lookup_csums_range(csum_root, logical, | ||
962 | logical + map->stripe_len - 1, | ||
963 | &sdev->csum_list, 1); | ||
964 | if (ret) | ||
965 | goto out; | ||
966 | |||
967 | logical += increment; | ||
968 | cond_resched(); | ||
969 | } | ||
970 | /* | ||
971 | * now find all extents for each stripe and scrub them | ||
972 | */ | ||
973 | logical = base + offset + start_stripe * increment; | ||
974 | physical = map->stripes[num].physical + start_stripe * map->stripe_len; | ||
975 | ret = 0; | ||
976 | for (i = start_stripe; i < nstripes; ++i) { | ||
977 | /* | ||
978 | * canceled? | ||
979 | */ | ||
980 | if (atomic_read(&fs_info->scrub_cancel_req) || | ||
981 | atomic_read(&sdev->cancel_req)) { | ||
982 | ret = -ECANCELED; | ||
983 | goto out; | ||
984 | } | ||
985 | /* | ||
986 | * check to see if we have to pause | ||
987 | */ | ||
988 | if (atomic_read(&fs_info->scrub_pause_req)) { | ||
989 | /* push queued extents */ | ||
990 | scrub_submit(sdev); | ||
991 | wait_event(sdev->list_wait, | ||
992 | atomic_read(&sdev->in_flight) == 0); | ||
993 | atomic_inc(&fs_info->scrubs_paused); | ||
994 | wake_up(&fs_info->scrub_pause_wait); | ||
995 | mutex_lock(&fs_info->scrub_lock); | ||
996 | while (atomic_read(&fs_info->scrub_pause_req)) { | ||
997 | mutex_unlock(&fs_info->scrub_lock); | ||
998 | wait_event(fs_info->scrub_pause_wait, | ||
999 | atomic_read(&fs_info->scrub_pause_req) == 0); | ||
1000 | mutex_lock(&fs_info->scrub_lock); | ||
1001 | } | ||
1002 | atomic_dec(&fs_info->scrubs_paused); | ||
1003 | mutex_unlock(&fs_info->scrub_lock); | ||
1004 | wake_up(&fs_info->scrub_pause_wait); | ||
1005 | scrub_free_csums(sdev); | ||
1006 | start_stripe = i; | ||
1007 | goto again; | ||
1008 | } | ||
1009 | |||
1010 | key.objectid = logical; | ||
1011 | key.type = BTRFS_EXTENT_ITEM_KEY; | ||
1012 | key.offset = (u64)0; | ||
1013 | |||
1014 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
1015 | if (ret < 0) | ||
1016 | goto out; | ||
1017 | |||
1018 | l = path->nodes[0]; | ||
1019 | slot = path->slots[0]; | ||
1020 | btrfs_item_key_to_cpu(l, &key, slot); | ||
1021 | if (key.objectid != logical) { | ||
1022 | ret = btrfs_previous_item(root, path, 0, | ||
1023 | BTRFS_EXTENT_ITEM_KEY); | ||
1024 | if (ret < 0) | ||
1025 | goto out; | ||
1026 | } | ||
1027 | |||
1028 | while (1) { | ||
1029 | l = path->nodes[0]; | ||
1030 | slot = path->slots[0]; | ||
1031 | if (slot >= btrfs_header_nritems(l)) { | ||
1032 | ret = btrfs_next_leaf(root, path); | ||
1033 | if (ret == 0) | ||
1034 | continue; | ||
1035 | if (ret < 0) | ||
1036 | goto out; | ||
1037 | |||
1038 | break; | ||
1039 | } | ||
1040 | btrfs_item_key_to_cpu(l, &key, slot); | ||
1041 | |||
1042 | if (key.objectid + key.offset <= logical) | ||
1043 | goto next; | ||
1044 | |||
1045 | if (key.objectid >= logical + map->stripe_len) | ||
1046 | break; | ||
1047 | |||
1048 | if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY) | ||
1049 | goto next; | ||
1050 | |||
1051 | extent = btrfs_item_ptr(l, slot, | ||
1052 | struct btrfs_extent_item); | ||
1053 | flags = btrfs_extent_flags(l, extent); | ||
1054 | generation = btrfs_extent_generation(l, extent); | ||
1055 | |||
1056 | if (key.objectid < logical && | ||
1057 | (flags & BTRFS_EXTENT_FLAG_TREE_BLOCK)) { | ||
1058 | printk(KERN_ERR | ||
1059 | "btrfs scrub: tree block %llu spanning " | ||
1060 | "stripes, ignored. logical=%llu\n", | ||
1061 | (unsigned long long)key.objectid, | ||
1062 | (unsigned long long)logical); | ||
1063 | goto next; | ||
1064 | } | ||
1065 | |||
1066 | /* | ||
1067 | * trim extent to this stripe | ||
1068 | */ | ||
1069 | if (key.objectid < logical) { | ||
1070 | key.offset -= logical - key.objectid; | ||
1071 | key.objectid = logical; | ||
1072 | } | ||
1073 | if (key.objectid + key.offset > | ||
1074 | logical + map->stripe_len) { | ||
1075 | key.offset = logical + map->stripe_len - | ||
1076 | key.objectid; | ||
1077 | } | ||
1078 | |||
1079 | ret = scrub_extent(sdev, key.objectid, key.offset, | ||
1080 | key.objectid - logical + physical, | ||
1081 | flags, generation, mirror_num); | ||
1082 | if (ret) | ||
1083 | goto out; | ||
1084 | |||
1085 | next: | ||
1086 | path->slots[0]++; | ||
1087 | } | ||
1088 | btrfs_release_path(root, path); | ||
1089 | logical += increment; | ||
1090 | physical += map->stripe_len; | ||
1091 | spin_lock(&sdev->stat_lock); | ||
1092 | sdev->stat.last_physical = physical; | ||
1093 | spin_unlock(&sdev->stat_lock); | ||
1094 | } | ||
1095 | /* push queued extents */ | ||
1096 | scrub_submit(sdev); | ||
1097 | |||
1098 | out: | ||
1099 | btrfs_free_path(path); | ||
1100 | return ret < 0 ? ret : 0; | ||
1101 | } | ||
1102 | |||
1103 | static noinline_for_stack int scrub_chunk(struct scrub_dev *sdev, | ||
1104 | u64 chunk_tree, u64 chunk_objectid, u64 chunk_offset, u64 length) | ||
1105 | { | ||
1106 | struct btrfs_mapping_tree *map_tree = | ||
1107 | &sdev->dev->dev_root->fs_info->mapping_tree; | ||
1108 | struct map_lookup *map; | ||
1109 | struct extent_map *em; | ||
1110 | int i; | ||
1111 | int ret = -EINVAL; | ||
1112 | |||
1113 | read_lock(&map_tree->map_tree.lock); | ||
1114 | em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1); | ||
1115 | read_unlock(&map_tree->map_tree.lock); | ||
1116 | |||
1117 | if (!em) | ||
1118 | return -EINVAL; | ||
1119 | |||
1120 | map = (struct map_lookup *)em->bdev; | ||
1121 | if (em->start != chunk_offset) | ||
1122 | goto out; | ||
1123 | |||
1124 | if (em->len < length) | ||
1125 | goto out; | ||
1126 | |||
1127 | for (i = 0; i < map->num_stripes; ++i) { | ||
1128 | if (map->stripes[i].dev == sdev->dev) { | ||
1129 | ret = scrub_stripe(sdev, map, i, chunk_offset, length); | ||
1130 | if (ret) | ||
1131 | goto out; | ||
1132 | } | ||
1133 | } | ||
1134 | out: | ||
1135 | free_extent_map(em); | ||
1136 | |||
1137 | return ret; | ||
1138 | } | ||
1139 | |||
1140 | static noinline_for_stack | ||
1141 | int scrub_enumerate_chunks(struct scrub_dev *sdev, u64 start, u64 end) | ||
1142 | { | ||
1143 | struct btrfs_dev_extent *dev_extent = NULL; | ||
1144 | struct btrfs_path *path; | ||
1145 | struct btrfs_root *root = sdev->dev->dev_root; | ||
1146 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
1147 | u64 length; | ||
1148 | u64 chunk_tree; | ||
1149 | u64 chunk_objectid; | ||
1150 | u64 chunk_offset; | ||
1151 | int ret; | ||
1152 | int slot; | ||
1153 | struct extent_buffer *l; | ||
1154 | struct btrfs_key key; | ||
1155 | struct btrfs_key found_key; | ||
1156 | struct btrfs_block_group_cache *cache; | ||
1157 | |||
1158 | path = btrfs_alloc_path(); | ||
1159 | if (!path) | ||
1160 | return -ENOMEM; | ||
1161 | |||
1162 | path->reada = 2; | ||
1163 | path->search_commit_root = 1; | ||
1164 | path->skip_locking = 1; | ||
1165 | |||
1166 | key.objectid = sdev->dev->devid; | ||
1167 | key.offset = 0ull; | ||
1168 | key.type = BTRFS_DEV_EXTENT_KEY; | ||
1169 | |||
1170 | |||
1171 | while (1) { | ||
1172 | ret = btrfs_search_slot(NULL, root, &key, path, 0, 0); | ||
1173 | if (ret < 0) | ||
1174 | goto out; | ||
1175 | ret = 0; | ||
1176 | |||
1177 | l = path->nodes[0]; | ||
1178 | slot = path->slots[0]; | ||
1179 | |||
1180 | btrfs_item_key_to_cpu(l, &found_key, slot); | ||
1181 | |||
1182 | if (found_key.objectid != sdev->dev->devid) | ||
1183 | break; | ||
1184 | |||
1185 | if (btrfs_key_type(&key) != BTRFS_DEV_EXTENT_KEY) | ||
1186 | break; | ||
1187 | |||
1188 | if (found_key.offset >= end) | ||
1189 | break; | ||
1190 | |||
1191 | if (found_key.offset < key.offset) | ||
1192 | break; | ||
1193 | |||
1194 | dev_extent = btrfs_item_ptr(l, slot, struct btrfs_dev_extent); | ||
1195 | length = btrfs_dev_extent_length(l, dev_extent); | ||
1196 | |||
1197 | if (found_key.offset + length <= start) { | ||
1198 | key.offset = found_key.offset + length; | ||
1199 | btrfs_release_path(root, path); | ||
1200 | continue; | ||
1201 | } | ||
1202 | |||
1203 | chunk_tree = btrfs_dev_extent_chunk_tree(l, dev_extent); | ||
1204 | chunk_objectid = btrfs_dev_extent_chunk_objectid(l, dev_extent); | ||
1205 | chunk_offset = btrfs_dev_extent_chunk_offset(l, dev_extent); | ||
1206 | |||
1207 | /* | ||
1208 | * get a reference on the corresponding block group to prevent | ||
1209 | * the chunk from going away while we scrub it | ||
1210 | */ | ||
1211 | cache = btrfs_lookup_block_group(fs_info, chunk_offset); | ||
1212 | if (!cache) { | ||
1213 | ret = -ENOENT; | ||
1214 | goto out; | ||
1215 | } | ||
1216 | ret = scrub_chunk(sdev, chunk_tree, chunk_objectid, | ||
1217 | chunk_offset, length); | ||
1218 | btrfs_put_block_group(cache); | ||
1219 | if (ret) | ||
1220 | break; | ||
1221 | |||
1222 | key.offset = found_key.offset + length; | ||
1223 | btrfs_release_path(root, path); | ||
1224 | } | ||
1225 | |||
1226 | out: | ||
1227 | btrfs_free_path(path); | ||
1228 | return ret; | ||
1229 | } | ||
1230 | |||
1231 | static noinline_for_stack int scrub_supers(struct scrub_dev *sdev) | ||
1232 | { | ||
1233 | int i; | ||
1234 | u64 bytenr; | ||
1235 | u64 gen; | ||
1236 | int ret; | ||
1237 | struct btrfs_device *device = sdev->dev; | ||
1238 | struct btrfs_root *root = device->dev_root; | ||
1239 | |||
1240 | gen = root->fs_info->last_trans_committed; | ||
1241 | |||
1242 | for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) { | ||
1243 | bytenr = btrfs_sb_offset(i); | ||
1244 | if (bytenr + BTRFS_SUPER_INFO_SIZE >= device->total_bytes) | ||
1245 | break; | ||
1246 | |||
1247 | ret = scrub_page(sdev, bytenr, PAGE_SIZE, bytenr, | ||
1248 | BTRFS_EXTENT_FLAG_SUPER, gen, i, NULL, 1); | ||
1249 | if (ret) | ||
1250 | return ret; | ||
1251 | } | ||
1252 | wait_event(sdev->list_wait, atomic_read(&sdev->in_flight) == 0); | ||
1253 | |||
1254 | return 0; | ||
1255 | } | ||
1256 | |||
1257 | /* | ||
1258 | * get a reference count on fs_info->scrub_workers. start worker if necessary | ||
1259 | */ | ||
1260 | static noinline_for_stack int scrub_workers_get(struct btrfs_root *root) | ||
1261 | { | ||
1262 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
1263 | |||
1264 | mutex_lock(&fs_info->scrub_lock); | ||
1265 | if (fs_info->scrub_workers_refcnt == 0) | ||
1266 | btrfs_start_workers(&fs_info->scrub_workers, 1); | ||
1267 | ++fs_info->scrub_workers_refcnt; | ||
1268 | mutex_unlock(&fs_info->scrub_lock); | ||
1269 | |||
1270 | return 0; | ||
1271 | } | ||
1272 | |||
1273 | static noinline_for_stack void scrub_workers_put(struct btrfs_root *root) | ||
1274 | { | ||
1275 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
1276 | |||
1277 | mutex_lock(&fs_info->scrub_lock); | ||
1278 | if (--fs_info->scrub_workers_refcnt == 0) | ||
1279 | btrfs_stop_workers(&fs_info->scrub_workers); | ||
1280 | WARN_ON(fs_info->scrub_workers_refcnt < 0); | ||
1281 | mutex_unlock(&fs_info->scrub_lock); | ||
1282 | } | ||
1283 | |||
1284 | |||
1285 | int btrfs_scrub_dev(struct btrfs_root *root, u64 devid, u64 start, u64 end, | ||
1286 | struct btrfs_scrub_progress *progress) | ||
1287 | { | ||
1288 | struct scrub_dev *sdev; | ||
1289 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
1290 | int ret; | ||
1291 | struct btrfs_device *dev; | ||
1292 | |||
1293 | if (root->fs_info->closing) | ||
1294 | return -EINVAL; | ||
1295 | |||
1296 | /* | ||
1297 | * check some assumptions | ||
1298 | */ | ||
1299 | if (root->sectorsize != PAGE_SIZE || | ||
1300 | root->sectorsize != root->leafsize || | ||
1301 | root->sectorsize != root->nodesize) { | ||
1302 | printk(KERN_ERR "btrfs_scrub: size assumptions fail\n"); | ||
1303 | return -EINVAL; | ||
1304 | } | ||
1305 | |||
1306 | ret = scrub_workers_get(root); | ||
1307 | if (ret) | ||
1308 | return ret; | ||
1309 | |||
1310 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); | ||
1311 | dev = btrfs_find_device(root, devid, NULL, NULL); | ||
1312 | if (!dev || dev->missing) { | ||
1313 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | ||
1314 | scrub_workers_put(root); | ||
1315 | return -ENODEV; | ||
1316 | } | ||
1317 | mutex_lock(&fs_info->scrub_lock); | ||
1318 | |||
1319 | if (!dev->in_fs_metadata) { | ||
1320 | mutex_unlock(&fs_info->scrub_lock); | ||
1321 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | ||
1322 | scrub_workers_put(root); | ||
1323 | return -ENODEV; | ||
1324 | } | ||
1325 | |||
1326 | if (dev->scrub_device) { | ||
1327 | mutex_unlock(&fs_info->scrub_lock); | ||
1328 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | ||
1329 | scrub_workers_put(root); | ||
1330 | return -EINPROGRESS; | ||
1331 | } | ||
1332 | sdev = scrub_setup_dev(dev); | ||
1333 | if (IS_ERR(sdev)) { | ||
1334 | mutex_unlock(&fs_info->scrub_lock); | ||
1335 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | ||
1336 | scrub_workers_put(root); | ||
1337 | return PTR_ERR(sdev); | ||
1338 | } | ||
1339 | dev->scrub_device = sdev; | ||
1340 | |||
1341 | atomic_inc(&fs_info->scrubs_running); | ||
1342 | mutex_unlock(&fs_info->scrub_lock); | ||
1343 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | ||
1344 | |||
1345 | down_read(&fs_info->scrub_super_lock); | ||
1346 | ret = scrub_supers(sdev); | ||
1347 | up_read(&fs_info->scrub_super_lock); | ||
1348 | |||
1349 | if (!ret) | ||
1350 | ret = scrub_enumerate_chunks(sdev, start, end); | ||
1351 | |||
1352 | wait_event(sdev->list_wait, atomic_read(&sdev->in_flight) == 0); | ||
1353 | |||
1354 | atomic_dec(&fs_info->scrubs_running); | ||
1355 | wake_up(&fs_info->scrub_pause_wait); | ||
1356 | |||
1357 | if (progress) | ||
1358 | memcpy(progress, &sdev->stat, sizeof(*progress)); | ||
1359 | |||
1360 | mutex_lock(&fs_info->scrub_lock); | ||
1361 | dev->scrub_device = NULL; | ||
1362 | mutex_unlock(&fs_info->scrub_lock); | ||
1363 | |||
1364 | scrub_free_dev(sdev); | ||
1365 | scrub_workers_put(root); | ||
1366 | |||
1367 | return ret; | ||
1368 | } | ||
1369 | |||
1370 | int btrfs_scrub_pause(struct btrfs_root *root) | ||
1371 | { | ||
1372 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
1373 | |||
1374 | mutex_lock(&fs_info->scrub_lock); | ||
1375 | atomic_inc(&fs_info->scrub_pause_req); | ||
1376 | while (atomic_read(&fs_info->scrubs_paused) != | ||
1377 | atomic_read(&fs_info->scrubs_running)) { | ||
1378 | mutex_unlock(&fs_info->scrub_lock); | ||
1379 | wait_event(fs_info->scrub_pause_wait, | ||
1380 | atomic_read(&fs_info->scrubs_paused) == | ||
1381 | atomic_read(&fs_info->scrubs_running)); | ||
1382 | mutex_lock(&fs_info->scrub_lock); | ||
1383 | } | ||
1384 | mutex_unlock(&fs_info->scrub_lock); | ||
1385 | |||
1386 | return 0; | ||
1387 | } | ||
1388 | |||
1389 | int btrfs_scrub_continue(struct btrfs_root *root) | ||
1390 | { | ||
1391 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
1392 | |||
1393 | atomic_dec(&fs_info->scrub_pause_req); | ||
1394 | wake_up(&fs_info->scrub_pause_wait); | ||
1395 | return 0; | ||
1396 | } | ||
1397 | |||
1398 | int btrfs_scrub_pause_super(struct btrfs_root *root) | ||
1399 | { | ||
1400 | down_write(&root->fs_info->scrub_super_lock); | ||
1401 | return 0; | ||
1402 | } | ||
1403 | |||
1404 | int btrfs_scrub_continue_super(struct btrfs_root *root) | ||
1405 | { | ||
1406 | up_write(&root->fs_info->scrub_super_lock); | ||
1407 | return 0; | ||
1408 | } | ||
1409 | |||
1410 | int btrfs_scrub_cancel(struct btrfs_root *root) | ||
1411 | { | ||
1412 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
1413 | |||
1414 | mutex_lock(&fs_info->scrub_lock); | ||
1415 | if (!atomic_read(&fs_info->scrubs_running)) { | ||
1416 | mutex_unlock(&fs_info->scrub_lock); | ||
1417 | return -ENOTCONN; | ||
1418 | } | ||
1419 | |||
1420 | atomic_inc(&fs_info->scrub_cancel_req); | ||
1421 | while (atomic_read(&fs_info->scrubs_running)) { | ||
1422 | mutex_unlock(&fs_info->scrub_lock); | ||
1423 | wait_event(fs_info->scrub_pause_wait, | ||
1424 | atomic_read(&fs_info->scrubs_running) == 0); | ||
1425 | mutex_lock(&fs_info->scrub_lock); | ||
1426 | } | ||
1427 | atomic_dec(&fs_info->scrub_cancel_req); | ||
1428 | mutex_unlock(&fs_info->scrub_lock); | ||
1429 | |||
1430 | return 0; | ||
1431 | } | ||
1432 | |||
1433 | int btrfs_scrub_cancel_dev(struct btrfs_root *root, struct btrfs_device *dev) | ||
1434 | { | ||
1435 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
1436 | struct scrub_dev *sdev; | ||
1437 | |||
1438 | mutex_lock(&fs_info->scrub_lock); | ||
1439 | sdev = dev->scrub_device; | ||
1440 | if (!sdev) { | ||
1441 | mutex_unlock(&fs_info->scrub_lock); | ||
1442 | return -ENOTCONN; | ||
1443 | } | ||
1444 | atomic_inc(&sdev->cancel_req); | ||
1445 | while (dev->scrub_device) { | ||
1446 | mutex_unlock(&fs_info->scrub_lock); | ||
1447 | wait_event(fs_info->scrub_pause_wait, | ||
1448 | dev->scrub_device == NULL); | ||
1449 | mutex_lock(&fs_info->scrub_lock); | ||
1450 | } | ||
1451 | mutex_unlock(&fs_info->scrub_lock); | ||
1452 | |||
1453 | return 0; | ||
1454 | } | ||
1455 | int btrfs_scrub_cancel_devid(struct btrfs_root *root, u64 devid) | ||
1456 | { | ||
1457 | struct btrfs_fs_info *fs_info = root->fs_info; | ||
1458 | struct btrfs_device *dev; | ||
1459 | int ret; | ||
1460 | |||
1461 | /* | ||
1462 | * we have to hold the device_list_mutex here so the device | ||
1463 | * does not go away in cancel_dev. FIXME: find a better solution | ||
1464 | */ | ||
1465 | mutex_lock(&fs_info->fs_devices->device_list_mutex); | ||
1466 | dev = btrfs_find_device(root, devid, NULL, NULL); | ||
1467 | if (!dev) { | ||
1468 | mutex_unlock(&fs_info->fs_devices->device_list_mutex); | ||
1469 | return -ENODEV; | ||
1470 | } | ||
1471 | ret = btrfs_scrub_cancel_dev(root, dev); | ||
1472 | mutex_unlock(&fs_info->fs_devices->device_list_mutex); | ||
1473 | |||
1474 | return ret; | ||
1475 | } | ||
1476 | |||
1477 | int btrfs_scrub_progress(struct btrfs_root *root, u64 devid, | ||
1478 | struct btrfs_scrub_progress *progress) | ||
1479 | { | ||
1480 | struct btrfs_device *dev; | ||
1481 | struct scrub_dev *sdev = NULL; | ||
1482 | |||
1483 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); | ||
1484 | dev = btrfs_find_device(root, devid, NULL, NULL); | ||
1485 | if (dev) | ||
1486 | sdev = dev->scrub_device; | ||
1487 | if (sdev) | ||
1488 | memcpy(progress, &sdev->stat, sizeof(*progress)); | ||
1489 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | ||
1490 | |||
1491 | return dev ? (sdev ? 0 : -ENOTCONN) : -ENODEV; | ||
1492 | } | ||