diff options
Diffstat (limited to 'fs/btrfs/disk-io.c')
-rw-r--r-- | fs/btrfs/disk-io.c | 661 |
1 files changed, 368 insertions, 293 deletions
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index d24a841a8722..09ef25f0c6c7 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c | |||
@@ -48,20 +48,19 @@ | |||
48 | static struct extent_io_ops btree_extent_io_ops; | 48 | static struct extent_io_ops btree_extent_io_ops; |
49 | static void end_workqueue_fn(struct btrfs_work *work); | 49 | static void end_workqueue_fn(struct btrfs_work *work); |
50 | static void free_fs_root(struct btrfs_root *root); | 50 | static void free_fs_root(struct btrfs_root *root); |
51 | static void btrfs_check_super_valid(struct btrfs_fs_info *fs_info, | 51 | static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, |
52 | int read_only); | 52 | int read_only); |
53 | static int btrfs_destroy_ordered_operations(struct btrfs_root *root); | 53 | static void btrfs_destroy_ordered_operations(struct btrfs_root *root); |
54 | static int btrfs_destroy_ordered_extents(struct btrfs_root *root); | 54 | static void btrfs_destroy_ordered_extents(struct btrfs_root *root); |
55 | static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, | 55 | static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, |
56 | struct btrfs_root *root); | 56 | struct btrfs_root *root); |
57 | static int btrfs_destroy_pending_snapshots(struct btrfs_transaction *t); | 57 | static void btrfs_destroy_pending_snapshots(struct btrfs_transaction *t); |
58 | static int btrfs_destroy_delalloc_inodes(struct btrfs_root *root); | 58 | static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root); |
59 | static int btrfs_destroy_marked_extents(struct btrfs_root *root, | 59 | static int btrfs_destroy_marked_extents(struct btrfs_root *root, |
60 | struct extent_io_tree *dirty_pages, | 60 | struct extent_io_tree *dirty_pages, |
61 | int mark); | 61 | int mark); |
62 | static int btrfs_destroy_pinned_extent(struct btrfs_root *root, | 62 | static int btrfs_destroy_pinned_extent(struct btrfs_root *root, |
63 | struct extent_io_tree *pinned_extents); | 63 | struct extent_io_tree *pinned_extents); |
64 | static int btrfs_cleanup_transaction(struct btrfs_root *root); | ||
65 | 64 | ||
66 | /* | 65 | /* |
67 | * end_io_wq structs are used to do processing in task context when an IO is | 66 | * end_io_wq structs are used to do processing in task context when an IO is |
@@ -99,6 +98,7 @@ struct async_submit_bio { | |||
99 | */ | 98 | */ |
100 | u64 bio_offset; | 99 | u64 bio_offset; |
101 | struct btrfs_work work; | 100 | struct btrfs_work work; |
101 | int error; | ||
102 | }; | 102 | }; |
103 | 103 | ||
104 | /* | 104 | /* |
@@ -332,8 +332,8 @@ static int verify_parent_transid(struct extent_io_tree *io_tree, | |||
332 | return 0; | 332 | return 0; |
333 | 333 | ||
334 | lock_extent_bits(io_tree, eb->start, eb->start + eb->len - 1, | 334 | lock_extent_bits(io_tree, eb->start, eb->start + eb->len - 1, |
335 | 0, &cached_state, GFP_NOFS); | 335 | 0, &cached_state); |
336 | if (extent_buffer_uptodate(io_tree, eb, cached_state) && | 336 | if (extent_buffer_uptodate(eb) && |
337 | btrfs_header_generation(eb) == parent_transid) { | 337 | btrfs_header_generation(eb) == parent_transid) { |
338 | ret = 0; | 338 | ret = 0; |
339 | goto out; | 339 | goto out; |
@@ -344,7 +344,7 @@ static int verify_parent_transid(struct extent_io_tree *io_tree, | |||
344 | (unsigned long long)parent_transid, | 344 | (unsigned long long)parent_transid, |
345 | (unsigned long long)btrfs_header_generation(eb)); | 345 | (unsigned long long)btrfs_header_generation(eb)); |
346 | ret = 1; | 346 | ret = 1; |
347 | clear_extent_buffer_uptodate(io_tree, eb, &cached_state); | 347 | clear_extent_buffer_uptodate(eb); |
348 | out: | 348 | out: |
349 | unlock_extent_cached(io_tree, eb->start, eb->start + eb->len - 1, | 349 | unlock_extent_cached(io_tree, eb->start, eb->start + eb->len - 1, |
350 | &cached_state, GFP_NOFS); | 350 | &cached_state, GFP_NOFS); |
@@ -360,9 +360,11 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root, | |||
360 | u64 start, u64 parent_transid) | 360 | u64 start, u64 parent_transid) |
361 | { | 361 | { |
362 | struct extent_io_tree *io_tree; | 362 | struct extent_io_tree *io_tree; |
363 | int failed = 0; | ||
363 | int ret; | 364 | int ret; |
364 | int num_copies = 0; | 365 | int num_copies = 0; |
365 | int mirror_num = 0; | 366 | int mirror_num = 0; |
367 | int failed_mirror = 0; | ||
366 | 368 | ||
367 | clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); | 369 | clear_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags); |
368 | io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; | 370 | io_tree = &BTRFS_I(root->fs_info->btree_inode)->io_tree; |
@@ -370,9 +372,8 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root, | |||
370 | ret = read_extent_buffer_pages(io_tree, eb, start, | 372 | ret = read_extent_buffer_pages(io_tree, eb, start, |
371 | WAIT_COMPLETE, | 373 | WAIT_COMPLETE, |
372 | btree_get_extent, mirror_num); | 374 | btree_get_extent, mirror_num); |
373 | if (!ret && | 375 | if (!ret && !verify_parent_transid(io_tree, eb, parent_transid)) |
374 | !verify_parent_transid(io_tree, eb, parent_transid)) | 376 | break; |
375 | return ret; | ||
376 | 377 | ||
377 | /* | 378 | /* |
378 | * This buffer's crc is fine, but its contents are corrupted, so | 379 | * This buffer's crc is fine, but its contents are corrupted, so |
@@ -380,18 +381,31 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root, | |||
380 | * any less wrong. | 381 | * any less wrong. |
381 | */ | 382 | */ |
382 | if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags)) | 383 | if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags)) |
383 | return ret; | 384 | break; |
385 | |||
386 | if (!failed_mirror) { | ||
387 | failed = 1; | ||
388 | printk(KERN_ERR "failed mirror was %d\n", eb->failed_mirror); | ||
389 | failed_mirror = eb->failed_mirror; | ||
390 | } | ||
384 | 391 | ||
385 | num_copies = btrfs_num_copies(&root->fs_info->mapping_tree, | 392 | num_copies = btrfs_num_copies(&root->fs_info->mapping_tree, |
386 | eb->start, eb->len); | 393 | eb->start, eb->len); |
387 | if (num_copies == 1) | 394 | if (num_copies == 1) |
388 | return ret; | 395 | break; |
389 | 396 | ||
390 | mirror_num++; | 397 | mirror_num++; |
398 | if (mirror_num == failed_mirror) | ||
399 | mirror_num++; | ||
400 | |||
391 | if (mirror_num > num_copies) | 401 | if (mirror_num > num_copies) |
392 | return ret; | 402 | break; |
393 | } | 403 | } |
394 | return -EIO; | 404 | |
405 | if (failed && !ret) | ||
406 | repair_eb_io_failure(root, eb, failed_mirror); | ||
407 | |||
408 | return ret; | ||
395 | } | 409 | } |
396 | 410 | ||
397 | /* | 411 | /* |
@@ -404,50 +418,27 @@ static int csum_dirty_buffer(struct btrfs_root *root, struct page *page) | |||
404 | struct extent_io_tree *tree; | 418 | struct extent_io_tree *tree; |
405 | u64 start = (u64)page->index << PAGE_CACHE_SHIFT; | 419 | u64 start = (u64)page->index << PAGE_CACHE_SHIFT; |
406 | u64 found_start; | 420 | u64 found_start; |
407 | unsigned long len; | ||
408 | struct extent_buffer *eb; | 421 | struct extent_buffer *eb; |
409 | int ret; | ||
410 | 422 | ||
411 | tree = &BTRFS_I(page->mapping->host)->io_tree; | 423 | tree = &BTRFS_I(page->mapping->host)->io_tree; |
412 | 424 | ||
413 | if (page->private == EXTENT_PAGE_PRIVATE) { | 425 | eb = (struct extent_buffer *)page->private; |
414 | WARN_ON(1); | 426 | if (page != eb->pages[0]) |
415 | goto out; | 427 | return 0; |
416 | } | ||
417 | if (!page->private) { | ||
418 | WARN_ON(1); | ||
419 | goto out; | ||
420 | } | ||
421 | len = page->private >> 2; | ||
422 | WARN_ON(len == 0); | ||
423 | |||
424 | eb = alloc_extent_buffer(tree, start, len, page); | ||
425 | if (eb == NULL) { | ||
426 | WARN_ON(1); | ||
427 | goto out; | ||
428 | } | ||
429 | ret = btree_read_extent_buffer_pages(root, eb, start + PAGE_CACHE_SIZE, | ||
430 | btrfs_header_generation(eb)); | ||
431 | BUG_ON(ret); | ||
432 | WARN_ON(!btrfs_header_flag(eb, BTRFS_HEADER_FLAG_WRITTEN)); | ||
433 | |||
434 | found_start = btrfs_header_bytenr(eb); | 428 | found_start = btrfs_header_bytenr(eb); |
435 | if (found_start != start) { | 429 | if (found_start != start) { |
436 | WARN_ON(1); | 430 | WARN_ON(1); |
437 | goto err; | 431 | return 0; |
438 | } | 432 | } |
439 | if (eb->first_page != page) { | 433 | if (eb->pages[0] != page) { |
440 | WARN_ON(1); | 434 | WARN_ON(1); |
441 | goto err; | 435 | return 0; |
442 | } | 436 | } |
443 | if (!PageUptodate(page)) { | 437 | if (!PageUptodate(page)) { |
444 | WARN_ON(1); | 438 | WARN_ON(1); |
445 | goto err; | 439 | return 0; |
446 | } | 440 | } |
447 | csum_tree_block(root, eb, 0); | 441 | csum_tree_block(root, eb, 0); |
448 | err: | ||
449 | free_extent_buffer(eb); | ||
450 | out: | ||
451 | return 0; | 442 | return 0; |
452 | } | 443 | } |
453 | 444 | ||
@@ -537,34 +528,74 @@ static noinline int check_leaf(struct btrfs_root *root, | |||
537 | return 0; | 528 | return 0; |
538 | } | 529 | } |
539 | 530 | ||
531 | struct extent_buffer *find_eb_for_page(struct extent_io_tree *tree, | ||
532 | struct page *page, int max_walk) | ||
533 | { | ||
534 | struct extent_buffer *eb; | ||
535 | u64 start = page_offset(page); | ||
536 | u64 target = start; | ||
537 | u64 min_start; | ||
538 | |||
539 | if (start < max_walk) | ||
540 | min_start = 0; | ||
541 | else | ||
542 | min_start = start - max_walk; | ||
543 | |||
544 | while (start >= min_start) { | ||
545 | eb = find_extent_buffer(tree, start, 0); | ||
546 | if (eb) { | ||
547 | /* | ||
548 | * we found an extent buffer and it contains our page | ||
549 | * horray! | ||
550 | */ | ||
551 | if (eb->start <= target && | ||
552 | eb->start + eb->len > target) | ||
553 | return eb; | ||
554 | |||
555 | /* we found an extent buffer that wasn't for us */ | ||
556 | free_extent_buffer(eb); | ||
557 | return NULL; | ||
558 | } | ||
559 | if (start == 0) | ||
560 | break; | ||
561 | start -= PAGE_CACHE_SIZE; | ||
562 | } | ||
563 | return NULL; | ||
564 | } | ||
565 | |||
540 | static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, | 566 | static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, |
541 | struct extent_state *state) | 567 | struct extent_state *state) |
542 | { | 568 | { |
543 | struct extent_io_tree *tree; | 569 | struct extent_io_tree *tree; |
544 | u64 found_start; | 570 | u64 found_start; |
545 | int found_level; | 571 | int found_level; |
546 | unsigned long len; | ||
547 | struct extent_buffer *eb; | 572 | struct extent_buffer *eb; |
548 | struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; | 573 | struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; |
549 | int ret = 0; | 574 | int ret = 0; |
575 | int reads_done; | ||
550 | 576 | ||
551 | tree = &BTRFS_I(page->mapping->host)->io_tree; | ||
552 | if (page->private == EXTENT_PAGE_PRIVATE) | ||
553 | goto out; | ||
554 | if (!page->private) | 577 | if (!page->private) |
555 | goto out; | 578 | goto out; |
556 | 579 | ||
557 | len = page->private >> 2; | 580 | tree = &BTRFS_I(page->mapping->host)->io_tree; |
558 | WARN_ON(len == 0); | 581 | eb = (struct extent_buffer *)page->private; |
582 | |||
583 | /* the pending IO might have been the only thing that kept this buffer | ||
584 | * in memory. Make sure we have a ref for all this other checks | ||
585 | */ | ||
586 | extent_buffer_get(eb); | ||
587 | |||
588 | reads_done = atomic_dec_and_test(&eb->io_pages); | ||
589 | if (!reads_done) | ||
590 | goto err; | ||
559 | 591 | ||
560 | eb = alloc_extent_buffer(tree, start, len, page); | 592 | if (test_bit(EXTENT_BUFFER_IOERR, &eb->bflags)) { |
561 | if (eb == NULL) { | ||
562 | ret = -EIO; | 593 | ret = -EIO; |
563 | goto out; | 594 | goto err; |
564 | } | 595 | } |
565 | 596 | ||
566 | found_start = btrfs_header_bytenr(eb); | 597 | found_start = btrfs_header_bytenr(eb); |
567 | if (found_start != start) { | 598 | if (found_start != eb->start) { |
568 | printk_ratelimited(KERN_INFO "btrfs bad tree block start " | 599 | printk_ratelimited(KERN_INFO "btrfs bad tree block start " |
569 | "%llu %llu\n", | 600 | "%llu %llu\n", |
570 | (unsigned long long)found_start, | 601 | (unsigned long long)found_start, |
@@ -572,13 +603,6 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, | |||
572 | ret = -EIO; | 603 | ret = -EIO; |
573 | goto err; | 604 | goto err; |
574 | } | 605 | } |
575 | if (eb->first_page != page) { | ||
576 | printk(KERN_INFO "btrfs bad first page %lu %lu\n", | ||
577 | eb->first_page->index, page->index); | ||
578 | WARN_ON(1); | ||
579 | ret = -EIO; | ||
580 | goto err; | ||
581 | } | ||
582 | if (check_tree_block_fsid(root, eb)) { | 606 | if (check_tree_block_fsid(root, eb)) { |
583 | printk_ratelimited(KERN_INFO "btrfs bad fsid on block %llu\n", | 607 | printk_ratelimited(KERN_INFO "btrfs bad fsid on block %llu\n", |
584 | (unsigned long long)eb->start); | 608 | (unsigned long long)eb->start); |
@@ -606,48 +630,31 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, | |||
606 | ret = -EIO; | 630 | ret = -EIO; |
607 | } | 631 | } |
608 | 632 | ||
609 | end = min_t(u64, eb->len, PAGE_CACHE_SIZE); | 633 | if (!ret) |
610 | end = eb->start + end - 1; | 634 | set_extent_buffer_uptodate(eb); |
611 | err: | 635 | err: |
612 | if (test_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) { | 636 | if (test_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) { |
613 | clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags); | 637 | clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags); |
614 | btree_readahead_hook(root, eb, eb->start, ret); | 638 | btree_readahead_hook(root, eb, eb->start, ret); |
615 | } | 639 | } |
616 | 640 | ||
641 | if (ret) | ||
642 | clear_extent_buffer_uptodate(eb); | ||
617 | free_extent_buffer(eb); | 643 | free_extent_buffer(eb); |
618 | out: | 644 | out: |
619 | return ret; | 645 | return ret; |
620 | } | 646 | } |
621 | 647 | ||
622 | static int btree_io_failed_hook(struct bio *failed_bio, | 648 | static int btree_io_failed_hook(struct page *page, int failed_mirror) |
623 | struct page *page, u64 start, u64 end, | ||
624 | int mirror_num, struct extent_state *state) | ||
625 | { | 649 | { |
626 | struct extent_io_tree *tree; | ||
627 | unsigned long len; | ||
628 | struct extent_buffer *eb; | 650 | struct extent_buffer *eb; |
629 | struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; | 651 | struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; |
630 | 652 | ||
631 | tree = &BTRFS_I(page->mapping->host)->io_tree; | 653 | eb = (struct extent_buffer *)page->private; |
632 | if (page->private == EXTENT_PAGE_PRIVATE) | 654 | set_bit(EXTENT_BUFFER_IOERR, &eb->bflags); |
633 | goto out; | 655 | eb->failed_mirror = failed_mirror; |
634 | if (!page->private) | 656 | if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) |
635 | goto out; | ||
636 | |||
637 | len = page->private >> 2; | ||
638 | WARN_ON(len == 0); | ||
639 | |||
640 | eb = alloc_extent_buffer(tree, start, len, page); | ||
641 | if (eb == NULL) | ||
642 | goto out; | ||
643 | |||
644 | if (test_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) { | ||
645 | clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags); | ||
646 | btree_readahead_hook(root, eb, eb->start, -EIO); | 657 | btree_readahead_hook(root, eb, eb->start, -EIO); |
647 | } | ||
648 | free_extent_buffer(eb); | ||
649 | |||
650 | out: | ||
651 | return -EIO; /* we fixed nothing */ | 658 | return -EIO; /* we fixed nothing */ |
652 | } | 659 | } |
653 | 660 | ||
@@ -719,11 +726,14 @@ unsigned long btrfs_async_submit_limit(struct btrfs_fs_info *info) | |||
719 | static void run_one_async_start(struct btrfs_work *work) | 726 | static void run_one_async_start(struct btrfs_work *work) |
720 | { | 727 | { |
721 | struct async_submit_bio *async; | 728 | struct async_submit_bio *async; |
729 | int ret; | ||
722 | 730 | ||
723 | async = container_of(work, struct async_submit_bio, work); | 731 | async = container_of(work, struct async_submit_bio, work); |
724 | async->submit_bio_start(async->inode, async->rw, async->bio, | 732 | ret = async->submit_bio_start(async->inode, async->rw, async->bio, |
725 | async->mirror_num, async->bio_flags, | 733 | async->mirror_num, async->bio_flags, |
726 | async->bio_offset); | 734 | async->bio_offset); |
735 | if (ret) | ||
736 | async->error = ret; | ||
727 | } | 737 | } |
728 | 738 | ||
729 | static void run_one_async_done(struct btrfs_work *work) | 739 | static void run_one_async_done(struct btrfs_work *work) |
@@ -744,6 +754,12 @@ static void run_one_async_done(struct btrfs_work *work) | |||
744 | waitqueue_active(&fs_info->async_submit_wait)) | 754 | waitqueue_active(&fs_info->async_submit_wait)) |
745 | wake_up(&fs_info->async_submit_wait); | 755 | wake_up(&fs_info->async_submit_wait); |
746 | 756 | ||
757 | /* If an error occured we just want to clean up the bio and move on */ | ||
758 | if (async->error) { | ||
759 | bio_endio(async->bio, async->error); | ||
760 | return; | ||
761 | } | ||
762 | |||
747 | async->submit_bio_done(async->inode, async->rw, async->bio, | 763 | async->submit_bio_done(async->inode, async->rw, async->bio, |
748 | async->mirror_num, async->bio_flags, | 764 | async->mirror_num, async->bio_flags, |
749 | async->bio_offset); | 765 | async->bio_offset); |
@@ -785,6 +801,8 @@ int btrfs_wq_submit_bio(struct btrfs_fs_info *fs_info, struct inode *inode, | |||
785 | async->bio_flags = bio_flags; | 801 | async->bio_flags = bio_flags; |
786 | async->bio_offset = bio_offset; | 802 | async->bio_offset = bio_offset; |
787 | 803 | ||
804 | async->error = 0; | ||
805 | |||
788 | atomic_inc(&fs_info->nr_async_submits); | 806 | atomic_inc(&fs_info->nr_async_submits); |
789 | 807 | ||
790 | if (rw & REQ_SYNC) | 808 | if (rw & REQ_SYNC) |
@@ -806,15 +824,18 @@ static int btree_csum_one_bio(struct bio *bio) | |||
806 | struct bio_vec *bvec = bio->bi_io_vec; | 824 | struct bio_vec *bvec = bio->bi_io_vec; |
807 | int bio_index = 0; | 825 | int bio_index = 0; |
808 | struct btrfs_root *root; | 826 | struct btrfs_root *root; |
827 | int ret = 0; | ||
809 | 828 | ||
810 | WARN_ON(bio->bi_vcnt <= 0); | 829 | WARN_ON(bio->bi_vcnt <= 0); |
811 | while (bio_index < bio->bi_vcnt) { | 830 | while (bio_index < bio->bi_vcnt) { |
812 | root = BTRFS_I(bvec->bv_page->mapping->host)->root; | 831 | root = BTRFS_I(bvec->bv_page->mapping->host)->root; |
813 | csum_dirty_buffer(root, bvec->bv_page); | 832 | ret = csum_dirty_buffer(root, bvec->bv_page); |
833 | if (ret) | ||
834 | break; | ||
814 | bio_index++; | 835 | bio_index++; |
815 | bvec++; | 836 | bvec++; |
816 | } | 837 | } |
817 | return 0; | 838 | return ret; |
818 | } | 839 | } |
819 | 840 | ||
820 | static int __btree_submit_bio_start(struct inode *inode, int rw, | 841 | static int __btree_submit_bio_start(struct inode *inode, int rw, |
@@ -826,8 +847,7 @@ static int __btree_submit_bio_start(struct inode *inode, int rw, | |||
826 | * when we're called for a write, we're already in the async | 847 | * when we're called for a write, we're already in the async |
827 | * submission context. Just jump into btrfs_map_bio | 848 | * submission context. Just jump into btrfs_map_bio |
828 | */ | 849 | */ |
829 | btree_csum_one_bio(bio); | 850 | return btree_csum_one_bio(bio); |
830 | return 0; | ||
831 | } | 851 | } |
832 | 852 | ||
833 | static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio, | 853 | static int __btree_submit_bio_done(struct inode *inode, int rw, struct bio *bio, |
@@ -847,15 +867,16 @@ static int btree_submit_bio_hook(struct inode *inode, int rw, struct bio *bio, | |||
847 | { | 867 | { |
848 | int ret; | 868 | int ret; |
849 | 869 | ||
850 | ret = btrfs_bio_wq_end_io(BTRFS_I(inode)->root->fs_info, | ||
851 | bio, 1); | ||
852 | BUG_ON(ret); | ||
853 | |||
854 | if (!(rw & REQ_WRITE)) { | 870 | if (!(rw & REQ_WRITE)) { |
871 | |||
855 | /* | 872 | /* |
856 | * called for a read, do the setup so that checksum validation | 873 | * called for a read, do the setup so that checksum validation |
857 | * can happen in the async kernel threads | 874 | * can happen in the async kernel threads |
858 | */ | 875 | */ |
876 | ret = btrfs_bio_wq_end_io(BTRFS_I(inode)->root->fs_info, | ||
877 | bio, 1); | ||
878 | if (ret) | ||
879 | return ret; | ||
859 | return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, | 880 | return btrfs_map_bio(BTRFS_I(inode)->root, rw, bio, |
860 | mirror_num, 0); | 881 | mirror_num, 0); |
861 | } | 882 | } |
@@ -893,34 +914,6 @@ static int btree_migratepage(struct address_space *mapping, | |||
893 | } | 914 | } |
894 | #endif | 915 | #endif |
895 | 916 | ||
896 | static int btree_writepage(struct page *page, struct writeback_control *wbc) | ||
897 | { | ||
898 | struct extent_io_tree *tree; | ||
899 | struct btrfs_root *root = BTRFS_I(page->mapping->host)->root; | ||
900 | struct extent_buffer *eb; | ||
901 | int was_dirty; | ||
902 | |||
903 | tree = &BTRFS_I(page->mapping->host)->io_tree; | ||
904 | if (!(current->flags & PF_MEMALLOC)) { | ||
905 | return extent_write_full_page(tree, page, | ||
906 | btree_get_extent, wbc); | ||
907 | } | ||
908 | |||
909 | redirty_page_for_writepage(wbc, page); | ||
910 | eb = btrfs_find_tree_block(root, page_offset(page), PAGE_CACHE_SIZE); | ||
911 | WARN_ON(!eb); | ||
912 | |||
913 | was_dirty = test_and_set_bit(EXTENT_BUFFER_DIRTY, &eb->bflags); | ||
914 | if (!was_dirty) { | ||
915 | spin_lock(&root->fs_info->delalloc_lock); | ||
916 | root->fs_info->dirty_metadata_bytes += PAGE_CACHE_SIZE; | ||
917 | spin_unlock(&root->fs_info->delalloc_lock); | ||
918 | } | ||
919 | free_extent_buffer(eb); | ||
920 | |||
921 | unlock_page(page); | ||
922 | return 0; | ||
923 | } | ||
924 | 917 | ||
925 | static int btree_writepages(struct address_space *mapping, | 918 | static int btree_writepages(struct address_space *mapping, |
926 | struct writeback_control *wbc) | 919 | struct writeback_control *wbc) |
@@ -940,7 +933,7 @@ static int btree_writepages(struct address_space *mapping, | |||
940 | if (num_dirty < thresh) | 933 | if (num_dirty < thresh) |
941 | return 0; | 934 | return 0; |
942 | } | 935 | } |
943 | return extent_writepages(tree, mapping, btree_get_extent, wbc); | 936 | return btree_write_cache_pages(mapping, wbc); |
944 | } | 937 | } |
945 | 938 | ||
946 | static int btree_readpage(struct file *file, struct page *page) | 939 | static int btree_readpage(struct file *file, struct page *page) |
@@ -952,16 +945,8 @@ static int btree_readpage(struct file *file, struct page *page) | |||
952 | 945 | ||
953 | static int btree_releasepage(struct page *page, gfp_t gfp_flags) | 946 | static int btree_releasepage(struct page *page, gfp_t gfp_flags) |
954 | { | 947 | { |
955 | struct extent_io_tree *tree; | ||
956 | struct extent_map_tree *map; | ||
957 | int ret; | ||
958 | |||
959 | if (PageWriteback(page) || PageDirty(page)) | 948 | if (PageWriteback(page) || PageDirty(page)) |
960 | return 0; | 949 | return 0; |
961 | |||
962 | tree = &BTRFS_I(page->mapping->host)->io_tree; | ||
963 | map = &BTRFS_I(page->mapping->host)->extent_tree; | ||
964 | |||
965 | /* | 950 | /* |
966 | * We need to mask out eg. __GFP_HIGHMEM and __GFP_DMA32 as we're doing | 951 | * We need to mask out eg. __GFP_HIGHMEM and __GFP_DMA32 as we're doing |
967 | * slab allocation from alloc_extent_state down the callchain where | 952 | * slab allocation from alloc_extent_state down the callchain where |
@@ -969,18 +954,7 @@ static int btree_releasepage(struct page *page, gfp_t gfp_flags) | |||
969 | */ | 954 | */ |
970 | gfp_flags &= ~GFP_SLAB_BUG_MASK; | 955 | gfp_flags &= ~GFP_SLAB_BUG_MASK; |
971 | 956 | ||
972 | ret = try_release_extent_state(map, tree, page, gfp_flags); | 957 | return try_release_extent_buffer(page, gfp_flags); |
973 | if (!ret) | ||
974 | return 0; | ||
975 | |||
976 | ret = try_release_extent_buffer(tree, page); | ||
977 | if (ret == 1) { | ||
978 | ClearPagePrivate(page); | ||
979 | set_page_private(page, 0); | ||
980 | page_cache_release(page); | ||
981 | } | ||
982 | |||
983 | return ret; | ||
984 | } | 958 | } |
985 | 959 | ||
986 | static void btree_invalidatepage(struct page *page, unsigned long offset) | 960 | static void btree_invalidatepage(struct page *page, unsigned long offset) |
@@ -998,15 +972,28 @@ static void btree_invalidatepage(struct page *page, unsigned long offset) | |||
998 | } | 972 | } |
999 | } | 973 | } |
1000 | 974 | ||
975 | static int btree_set_page_dirty(struct page *page) | ||
976 | { | ||
977 | struct extent_buffer *eb; | ||
978 | |||
979 | BUG_ON(!PagePrivate(page)); | ||
980 | eb = (struct extent_buffer *)page->private; | ||
981 | BUG_ON(!eb); | ||
982 | BUG_ON(!test_bit(EXTENT_BUFFER_DIRTY, &eb->bflags)); | ||
983 | BUG_ON(!atomic_read(&eb->refs)); | ||
984 | btrfs_assert_tree_locked(eb); | ||
985 | return __set_page_dirty_nobuffers(page); | ||
986 | } | ||
987 | |||
1001 | static const struct address_space_operations btree_aops = { | 988 | static const struct address_space_operations btree_aops = { |
1002 | .readpage = btree_readpage, | 989 | .readpage = btree_readpage, |
1003 | .writepage = btree_writepage, | ||
1004 | .writepages = btree_writepages, | 990 | .writepages = btree_writepages, |
1005 | .releasepage = btree_releasepage, | 991 | .releasepage = btree_releasepage, |
1006 | .invalidatepage = btree_invalidatepage, | 992 | .invalidatepage = btree_invalidatepage, |
1007 | #ifdef CONFIG_MIGRATION | 993 | #ifdef CONFIG_MIGRATION |
1008 | .migratepage = btree_migratepage, | 994 | .migratepage = btree_migratepage, |
1009 | #endif | 995 | #endif |
996 | .set_page_dirty = btree_set_page_dirty, | ||
1010 | }; | 997 | }; |
1011 | 998 | ||
1012 | int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize, | 999 | int readahead_tree_block(struct btrfs_root *root, u64 bytenr, u32 blocksize, |
@@ -1049,7 +1036,7 @@ int reada_tree_block_flagged(struct btrfs_root *root, u64 bytenr, u32 blocksize, | |||
1049 | if (test_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags)) { | 1036 | if (test_bit(EXTENT_BUFFER_CORRUPT, &buf->bflags)) { |
1050 | free_extent_buffer(buf); | 1037 | free_extent_buffer(buf); |
1051 | return -EIO; | 1038 | return -EIO; |
1052 | } else if (extent_buffer_uptodate(io_tree, buf, NULL)) { | 1039 | } else if (extent_buffer_uptodate(buf)) { |
1053 | *eb = buf; | 1040 | *eb = buf; |
1054 | } else { | 1041 | } else { |
1055 | free_extent_buffer(buf); | 1042 | free_extent_buffer(buf); |
@@ -1074,20 +1061,20 @@ struct extent_buffer *btrfs_find_create_tree_block(struct btrfs_root *root, | |||
1074 | struct extent_buffer *eb; | 1061 | struct extent_buffer *eb; |
1075 | 1062 | ||
1076 | eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->io_tree, | 1063 | eb = alloc_extent_buffer(&BTRFS_I(btree_inode)->io_tree, |
1077 | bytenr, blocksize, NULL); | 1064 | bytenr, blocksize); |
1078 | return eb; | 1065 | return eb; |
1079 | } | 1066 | } |
1080 | 1067 | ||
1081 | 1068 | ||
1082 | int btrfs_write_tree_block(struct extent_buffer *buf) | 1069 | int btrfs_write_tree_block(struct extent_buffer *buf) |
1083 | { | 1070 | { |
1084 | return filemap_fdatawrite_range(buf->first_page->mapping, buf->start, | 1071 | return filemap_fdatawrite_range(buf->pages[0]->mapping, buf->start, |
1085 | buf->start + buf->len - 1); | 1072 | buf->start + buf->len - 1); |
1086 | } | 1073 | } |
1087 | 1074 | ||
1088 | int btrfs_wait_tree_block_writeback(struct extent_buffer *buf) | 1075 | int btrfs_wait_tree_block_writeback(struct extent_buffer *buf) |
1089 | { | 1076 | { |
1090 | return filemap_fdatawait_range(buf->first_page->mapping, | 1077 | return filemap_fdatawait_range(buf->pages[0]->mapping, |
1091 | buf->start, buf->start + buf->len - 1); | 1078 | buf->start, buf->start + buf->len - 1); |
1092 | } | 1079 | } |
1093 | 1080 | ||
@@ -1102,17 +1089,13 @@ struct extent_buffer *read_tree_block(struct btrfs_root *root, u64 bytenr, | |||
1102 | return NULL; | 1089 | return NULL; |
1103 | 1090 | ||
1104 | ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid); | 1091 | ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid); |
1105 | |||
1106 | if (ret == 0) | ||
1107 | set_bit(EXTENT_BUFFER_UPTODATE, &buf->bflags); | ||
1108 | return buf; | 1092 | return buf; |
1109 | 1093 | ||
1110 | } | 1094 | } |
1111 | 1095 | ||
1112 | int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, | 1096 | void clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, |
1113 | struct extent_buffer *buf) | 1097 | struct extent_buffer *buf) |
1114 | { | 1098 | { |
1115 | struct inode *btree_inode = root->fs_info->btree_inode; | ||
1116 | if (btrfs_header_generation(buf) == | 1099 | if (btrfs_header_generation(buf) == |
1117 | root->fs_info->running_transaction->transid) { | 1100 | root->fs_info->running_transaction->transid) { |
1118 | btrfs_assert_tree_locked(buf); | 1101 | btrfs_assert_tree_locked(buf); |
@@ -1121,23 +1104,27 @@ int clean_tree_block(struct btrfs_trans_handle *trans, struct btrfs_root *root, | |||
1121 | spin_lock(&root->fs_info->delalloc_lock); | 1104 | spin_lock(&root->fs_info->delalloc_lock); |
1122 | if (root->fs_info->dirty_metadata_bytes >= buf->len) | 1105 | if (root->fs_info->dirty_metadata_bytes >= buf->len) |
1123 | root->fs_info->dirty_metadata_bytes -= buf->len; | 1106 | root->fs_info->dirty_metadata_bytes -= buf->len; |
1124 | else | 1107 | else { |
1125 | WARN_ON(1); | 1108 | spin_unlock(&root->fs_info->delalloc_lock); |
1109 | btrfs_panic(root->fs_info, -EOVERFLOW, | ||
1110 | "Can't clear %lu bytes from " | ||
1111 | " dirty_mdatadata_bytes (%lu)", | ||
1112 | buf->len, | ||
1113 | root->fs_info->dirty_metadata_bytes); | ||
1114 | } | ||
1126 | spin_unlock(&root->fs_info->delalloc_lock); | 1115 | spin_unlock(&root->fs_info->delalloc_lock); |
1127 | } | 1116 | } |
1128 | 1117 | ||
1129 | /* ugh, clear_extent_buffer_dirty needs to lock the page */ | 1118 | /* ugh, clear_extent_buffer_dirty needs to lock the page */ |
1130 | btrfs_set_lock_blocking(buf); | 1119 | btrfs_set_lock_blocking(buf); |
1131 | clear_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, | 1120 | clear_extent_buffer_dirty(buf); |
1132 | buf); | ||
1133 | } | 1121 | } |
1134 | return 0; | ||
1135 | } | 1122 | } |
1136 | 1123 | ||
1137 | static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | 1124 | static void __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, |
1138 | u32 stripesize, struct btrfs_root *root, | 1125 | u32 stripesize, struct btrfs_root *root, |
1139 | struct btrfs_fs_info *fs_info, | 1126 | struct btrfs_fs_info *fs_info, |
1140 | u64 objectid) | 1127 | u64 objectid) |
1141 | { | 1128 | { |
1142 | root->node = NULL; | 1129 | root->node = NULL; |
1143 | root->commit_root = NULL; | 1130 | root->commit_root = NULL; |
@@ -1189,13 +1176,12 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize, | |||
1189 | root->defrag_running = 0; | 1176 | root->defrag_running = 0; |
1190 | root->root_key.objectid = objectid; | 1177 | root->root_key.objectid = objectid; |
1191 | root->anon_dev = 0; | 1178 | root->anon_dev = 0; |
1192 | return 0; | ||
1193 | } | 1179 | } |
1194 | 1180 | ||
1195 | static int find_and_setup_root(struct btrfs_root *tree_root, | 1181 | static int __must_check find_and_setup_root(struct btrfs_root *tree_root, |
1196 | struct btrfs_fs_info *fs_info, | 1182 | struct btrfs_fs_info *fs_info, |
1197 | u64 objectid, | 1183 | u64 objectid, |
1198 | struct btrfs_root *root) | 1184 | struct btrfs_root *root) |
1199 | { | 1185 | { |
1200 | int ret; | 1186 | int ret; |
1201 | u32 blocksize; | 1187 | u32 blocksize; |
@@ -1208,7 +1194,8 @@ static int find_and_setup_root(struct btrfs_root *tree_root, | |||
1208 | &root->root_item, &root->root_key); | 1194 | &root->root_item, &root->root_key); |
1209 | if (ret > 0) | 1195 | if (ret > 0) |
1210 | return -ENOENT; | 1196 | return -ENOENT; |
1211 | BUG_ON(ret); | 1197 | else if (ret < 0) |
1198 | return ret; | ||
1212 | 1199 | ||
1213 | generation = btrfs_root_generation(&root->root_item); | 1200 | generation = btrfs_root_generation(&root->root_item); |
1214 | blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); | 1201 | blocksize = btrfs_level_size(root, btrfs_root_level(&root->root_item)); |
@@ -1377,7 +1364,7 @@ struct btrfs_root *btrfs_read_fs_root_no_radix(struct btrfs_root *tree_root, | |||
1377 | root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), | 1364 | root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), |
1378 | blocksize, generation); | 1365 | blocksize, generation); |
1379 | root->commit_root = btrfs_root_node(root); | 1366 | root->commit_root = btrfs_root_node(root); |
1380 | BUG_ON(!root->node); | 1367 | BUG_ON(!root->node); /* -ENOMEM */ |
1381 | out: | 1368 | out: |
1382 | if (location->objectid != BTRFS_TREE_LOG_OBJECTID) { | 1369 | if (location->objectid != BTRFS_TREE_LOG_OBJECTID) { |
1383 | root->ref_cows = 1; | 1370 | root->ref_cows = 1; |
@@ -1513,41 +1500,6 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi) | |||
1513 | return 0; | 1500 | return 0; |
1514 | } | 1501 | } |
1515 | 1502 | ||
1516 | static int bio_ready_for_csum(struct bio *bio) | ||
1517 | { | ||
1518 | u64 length = 0; | ||
1519 | u64 buf_len = 0; | ||
1520 | u64 start = 0; | ||
1521 | struct page *page; | ||
1522 | struct extent_io_tree *io_tree = NULL; | ||
1523 | struct bio_vec *bvec; | ||
1524 | int i; | ||
1525 | int ret; | ||
1526 | |||
1527 | bio_for_each_segment(bvec, bio, i) { | ||
1528 | page = bvec->bv_page; | ||
1529 | if (page->private == EXTENT_PAGE_PRIVATE) { | ||
1530 | length += bvec->bv_len; | ||
1531 | continue; | ||
1532 | } | ||
1533 | if (!page->private) { | ||
1534 | length += bvec->bv_len; | ||
1535 | continue; | ||
1536 | } | ||
1537 | length = bvec->bv_len; | ||
1538 | buf_len = page->private >> 2; | ||
1539 | start = page_offset(page) + bvec->bv_offset; | ||
1540 | io_tree = &BTRFS_I(page->mapping->host)->io_tree; | ||
1541 | } | ||
1542 | /* are we fully contained in this bio? */ | ||
1543 | if (buf_len <= length) | ||
1544 | return 1; | ||
1545 | |||
1546 | ret = extent_range_uptodate(io_tree, start + length, | ||
1547 | start + buf_len - 1); | ||
1548 | return ret; | ||
1549 | } | ||
1550 | |||
1551 | /* | 1503 | /* |
1552 | * called by the kthread helper functions to finally call the bio end_io | 1504 | * called by the kthread helper functions to finally call the bio end_io |
1553 | * functions. This is where read checksum verification actually happens | 1505 | * functions. This is where read checksum verification actually happens |
@@ -1563,17 +1515,6 @@ static void end_workqueue_fn(struct btrfs_work *work) | |||
1563 | bio = end_io_wq->bio; | 1515 | bio = end_io_wq->bio; |
1564 | fs_info = end_io_wq->info; | 1516 | fs_info = end_io_wq->info; |
1565 | 1517 | ||
1566 | /* metadata bio reads are special because the whole tree block must | ||
1567 | * be checksummed at once. This makes sure the entire block is in | ||
1568 | * ram and up to date before trying to verify things. For | ||
1569 | * blocksize <= pagesize, it is basically a noop | ||
1570 | */ | ||
1571 | if (!(bio->bi_rw & REQ_WRITE) && end_io_wq->metadata && | ||
1572 | !bio_ready_for_csum(bio)) { | ||
1573 | btrfs_queue_worker(&fs_info->endio_meta_workers, | ||
1574 | &end_io_wq->work); | ||
1575 | return; | ||
1576 | } | ||
1577 | error = end_io_wq->error; | 1518 | error = end_io_wq->error; |
1578 | bio->bi_private = end_io_wq->private; | 1519 | bio->bi_private = end_io_wq->private; |
1579 | bio->bi_end_io = end_io_wq->end_io; | 1520 | bio->bi_end_io = end_io_wq->end_io; |
@@ -1614,9 +1555,10 @@ static int transaction_kthread(void *arg) | |||
1614 | u64 transid; | 1555 | u64 transid; |
1615 | unsigned long now; | 1556 | unsigned long now; |
1616 | unsigned long delay; | 1557 | unsigned long delay; |
1617 | int ret; | 1558 | bool cannot_commit; |
1618 | 1559 | ||
1619 | do { | 1560 | do { |
1561 | cannot_commit = false; | ||
1620 | delay = HZ * 30; | 1562 | delay = HZ * 30; |
1621 | vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); | 1563 | vfs_check_frozen(root->fs_info->sb, SB_FREEZE_WRITE); |
1622 | mutex_lock(&root->fs_info->transaction_kthread_mutex); | 1564 | mutex_lock(&root->fs_info->transaction_kthread_mutex); |
@@ -1638,11 +1580,14 @@ static int transaction_kthread(void *arg) | |||
1638 | transid = cur->transid; | 1580 | transid = cur->transid; |
1639 | spin_unlock(&root->fs_info->trans_lock); | 1581 | spin_unlock(&root->fs_info->trans_lock); |
1640 | 1582 | ||
1583 | /* If the file system is aborted, this will always fail. */ | ||
1641 | trans = btrfs_join_transaction(root); | 1584 | trans = btrfs_join_transaction(root); |
1642 | BUG_ON(IS_ERR(trans)); | 1585 | if (IS_ERR(trans)) { |
1586 | cannot_commit = true; | ||
1587 | goto sleep; | ||
1588 | } | ||
1643 | if (transid == trans->transid) { | 1589 | if (transid == trans->transid) { |
1644 | ret = btrfs_commit_transaction(trans, root); | 1590 | btrfs_commit_transaction(trans, root); |
1645 | BUG_ON(ret); | ||
1646 | } else { | 1591 | } else { |
1647 | btrfs_end_transaction(trans, root); | 1592 | btrfs_end_transaction(trans, root); |
1648 | } | 1593 | } |
@@ -1653,7 +1598,8 @@ sleep: | |||
1653 | if (!try_to_freeze()) { | 1598 | if (!try_to_freeze()) { |
1654 | set_current_state(TASK_INTERRUPTIBLE); | 1599 | set_current_state(TASK_INTERRUPTIBLE); |
1655 | if (!kthread_should_stop() && | 1600 | if (!kthread_should_stop() && |
1656 | !btrfs_transaction_blocked(root->fs_info)) | 1601 | (!btrfs_transaction_blocked(root->fs_info) || |
1602 | cannot_commit)) | ||
1657 | schedule_timeout(delay); | 1603 | schedule_timeout(delay); |
1658 | __set_current_state(TASK_RUNNING); | 1604 | __set_current_state(TASK_RUNNING); |
1659 | } | 1605 | } |
@@ -2042,6 +1988,7 @@ int open_ctree(struct super_block *sb, | |||
2042 | RB_CLEAR_NODE(&BTRFS_I(fs_info->btree_inode)->rb_node); | 1988 | RB_CLEAR_NODE(&BTRFS_I(fs_info->btree_inode)->rb_node); |
2043 | extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree, | 1989 | extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree, |
2044 | fs_info->btree_inode->i_mapping); | 1990 | fs_info->btree_inode->i_mapping); |
1991 | BTRFS_I(fs_info->btree_inode)->io_tree.track_uptodate = 0; | ||
2045 | extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree); | 1992 | extent_map_tree_init(&BTRFS_I(fs_info->btree_inode)->extent_tree); |
2046 | 1993 | ||
2047 | BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops; | 1994 | BTRFS_I(fs_info->btree_inode)->io_tree.ops = &btree_extent_io_ops; |
@@ -2084,6 +2031,7 @@ int open_ctree(struct super_block *sb, | |||
2084 | __setup_root(4096, 4096, 4096, 4096, tree_root, | 2031 | __setup_root(4096, 4096, 4096, 4096, tree_root, |
2085 | fs_info, BTRFS_ROOT_TREE_OBJECTID); | 2032 | fs_info, BTRFS_ROOT_TREE_OBJECTID); |
2086 | 2033 | ||
2034 | invalidate_bdev(fs_devices->latest_bdev); | ||
2087 | bh = btrfs_read_dev_super(fs_devices->latest_bdev); | 2035 | bh = btrfs_read_dev_super(fs_devices->latest_bdev); |
2088 | if (!bh) { | 2036 | if (!bh) { |
2089 | err = -EINVAL; | 2037 | err = -EINVAL; |
@@ -2104,7 +2052,12 @@ int open_ctree(struct super_block *sb, | |||
2104 | /* check FS state, whether FS is broken. */ | 2052 | /* check FS state, whether FS is broken. */ |
2105 | fs_info->fs_state |= btrfs_super_flags(disk_super); | 2053 | fs_info->fs_state |= btrfs_super_flags(disk_super); |
2106 | 2054 | ||
2107 | btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY); | 2055 | ret = btrfs_check_super_valid(fs_info, sb->s_flags & MS_RDONLY); |
2056 | if (ret) { | ||
2057 | printk(KERN_ERR "btrfs: superblock contains fatal errors\n"); | ||
2058 | err = ret; | ||
2059 | goto fail_alloc; | ||
2060 | } | ||
2108 | 2061 | ||
2109 | /* | 2062 | /* |
2110 | * run through our array of backup supers and setup | 2063 | * run through our array of backup supers and setup |
@@ -2135,10 +2088,55 @@ int open_ctree(struct super_block *sb, | |||
2135 | goto fail_alloc; | 2088 | goto fail_alloc; |
2136 | } | 2089 | } |
2137 | 2090 | ||
2091 | if (btrfs_super_leafsize(disk_super) != | ||
2092 | btrfs_super_nodesize(disk_super)) { | ||
2093 | printk(KERN_ERR "BTRFS: couldn't mount because metadata " | ||
2094 | "blocksizes don't match. node %d leaf %d\n", | ||
2095 | btrfs_super_nodesize(disk_super), | ||
2096 | btrfs_super_leafsize(disk_super)); | ||
2097 | err = -EINVAL; | ||
2098 | goto fail_alloc; | ||
2099 | } | ||
2100 | if (btrfs_super_leafsize(disk_super) > BTRFS_MAX_METADATA_BLOCKSIZE) { | ||
2101 | printk(KERN_ERR "BTRFS: couldn't mount because metadata " | ||
2102 | "blocksize (%d) was too large\n", | ||
2103 | btrfs_super_leafsize(disk_super)); | ||
2104 | err = -EINVAL; | ||
2105 | goto fail_alloc; | ||
2106 | } | ||
2107 | |||
2138 | features = btrfs_super_incompat_flags(disk_super); | 2108 | features = btrfs_super_incompat_flags(disk_super); |
2139 | features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF; | 2109 | features |= BTRFS_FEATURE_INCOMPAT_MIXED_BACKREF; |
2140 | if (tree_root->fs_info->compress_type & BTRFS_COMPRESS_LZO) | 2110 | if (tree_root->fs_info->compress_type & BTRFS_COMPRESS_LZO) |
2141 | features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; | 2111 | features |= BTRFS_FEATURE_INCOMPAT_COMPRESS_LZO; |
2112 | |||
2113 | /* | ||
2114 | * flag our filesystem as having big metadata blocks if | ||
2115 | * they are bigger than the page size | ||
2116 | */ | ||
2117 | if (btrfs_super_leafsize(disk_super) > PAGE_CACHE_SIZE) { | ||
2118 | if (!(features & BTRFS_FEATURE_INCOMPAT_BIG_METADATA)) | ||
2119 | printk(KERN_INFO "btrfs flagging fs with big metadata feature\n"); | ||
2120 | features |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA; | ||
2121 | } | ||
2122 | |||
2123 | nodesize = btrfs_super_nodesize(disk_super); | ||
2124 | leafsize = btrfs_super_leafsize(disk_super); | ||
2125 | sectorsize = btrfs_super_sectorsize(disk_super); | ||
2126 | stripesize = btrfs_super_stripesize(disk_super); | ||
2127 | |||
2128 | /* | ||
2129 | * mixed block groups end up with duplicate but slightly offset | ||
2130 | * extent buffers for the same range. It leads to corruptions | ||
2131 | */ | ||
2132 | if ((features & BTRFS_FEATURE_INCOMPAT_MIXED_GROUPS) && | ||
2133 | (sectorsize != leafsize)) { | ||
2134 | printk(KERN_WARNING "btrfs: unequal leaf/node/sector sizes " | ||
2135 | "are not allowed for mixed block groups on %s\n", | ||
2136 | sb->s_id); | ||
2137 | goto fail_alloc; | ||
2138 | } | ||
2139 | |||
2142 | btrfs_set_super_incompat_flags(disk_super, features); | 2140 | btrfs_set_super_incompat_flags(disk_super, features); |
2143 | 2141 | ||
2144 | features = btrfs_super_compat_ro_flags(disk_super) & | 2142 | features = btrfs_super_compat_ro_flags(disk_super) & |
@@ -2242,10 +2240,6 @@ int open_ctree(struct super_block *sb, | |||
2242 | fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, | 2240 | fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages, |
2243 | 4 * 1024 * 1024 / PAGE_CACHE_SIZE); | 2241 | 4 * 1024 * 1024 / PAGE_CACHE_SIZE); |
2244 | 2242 | ||
2245 | nodesize = btrfs_super_nodesize(disk_super); | ||
2246 | leafsize = btrfs_super_leafsize(disk_super); | ||
2247 | sectorsize = btrfs_super_sectorsize(disk_super); | ||
2248 | stripesize = btrfs_super_stripesize(disk_super); | ||
2249 | tree_root->nodesize = nodesize; | 2243 | tree_root->nodesize = nodesize; |
2250 | tree_root->leafsize = leafsize; | 2244 | tree_root->leafsize = leafsize; |
2251 | tree_root->sectorsize = sectorsize; | 2245 | tree_root->sectorsize = sectorsize; |
@@ -2260,6 +2254,12 @@ int open_ctree(struct super_block *sb, | |||
2260 | goto fail_sb_buffer; | 2254 | goto fail_sb_buffer; |
2261 | } | 2255 | } |
2262 | 2256 | ||
2257 | if (sectorsize < PAGE_SIZE) { | ||
2258 | printk(KERN_WARNING "btrfs: Incompatible sector size " | ||
2259 | "found on %s\n", sb->s_id); | ||
2260 | goto fail_sb_buffer; | ||
2261 | } | ||
2262 | |||
2263 | mutex_lock(&fs_info->chunk_mutex); | 2263 | mutex_lock(&fs_info->chunk_mutex); |
2264 | ret = btrfs_read_sys_array(tree_root); | 2264 | ret = btrfs_read_sys_array(tree_root); |
2265 | mutex_unlock(&fs_info->chunk_mutex); | 2265 | mutex_unlock(&fs_info->chunk_mutex); |
@@ -2279,7 +2279,7 @@ int open_ctree(struct super_block *sb, | |||
2279 | chunk_root->node = read_tree_block(chunk_root, | 2279 | chunk_root->node = read_tree_block(chunk_root, |
2280 | btrfs_super_chunk_root(disk_super), | 2280 | btrfs_super_chunk_root(disk_super), |
2281 | blocksize, generation); | 2281 | blocksize, generation); |
2282 | BUG_ON(!chunk_root->node); | 2282 | BUG_ON(!chunk_root->node); /* -ENOMEM */ |
2283 | if (!test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) { | 2283 | if (!test_bit(EXTENT_BUFFER_UPTODATE, &chunk_root->node->bflags)) { |
2284 | printk(KERN_WARNING "btrfs: failed to read chunk root on %s\n", | 2284 | printk(KERN_WARNING "btrfs: failed to read chunk root on %s\n", |
2285 | sb->s_id); | 2285 | sb->s_id); |
@@ -2301,6 +2301,12 @@ int open_ctree(struct super_block *sb, | |||
2301 | 2301 | ||
2302 | btrfs_close_extra_devices(fs_devices); | 2302 | btrfs_close_extra_devices(fs_devices); |
2303 | 2303 | ||
2304 | if (!fs_devices->latest_bdev) { | ||
2305 | printk(KERN_CRIT "btrfs: failed to read devices on %s\n", | ||
2306 | sb->s_id); | ||
2307 | goto fail_tree_roots; | ||
2308 | } | ||
2309 | |||
2304 | retry_root_backup: | 2310 | retry_root_backup: |
2305 | blocksize = btrfs_level_size(tree_root, | 2311 | blocksize = btrfs_level_size(tree_root, |
2306 | btrfs_super_root_level(disk_super)); | 2312 | btrfs_super_root_level(disk_super)); |
@@ -2413,21 +2419,31 @@ retry_root_backup: | |||
2413 | log_tree_root->node = read_tree_block(tree_root, bytenr, | 2419 | log_tree_root->node = read_tree_block(tree_root, bytenr, |
2414 | blocksize, | 2420 | blocksize, |
2415 | generation + 1); | 2421 | generation + 1); |
2422 | /* returns with log_tree_root freed on success */ | ||
2416 | ret = btrfs_recover_log_trees(log_tree_root); | 2423 | ret = btrfs_recover_log_trees(log_tree_root); |
2417 | BUG_ON(ret); | 2424 | if (ret) { |
2425 | btrfs_error(tree_root->fs_info, ret, | ||
2426 | "Failed to recover log tree"); | ||
2427 | free_extent_buffer(log_tree_root->node); | ||
2428 | kfree(log_tree_root); | ||
2429 | goto fail_trans_kthread; | ||
2430 | } | ||
2418 | 2431 | ||
2419 | if (sb->s_flags & MS_RDONLY) { | 2432 | if (sb->s_flags & MS_RDONLY) { |
2420 | ret = btrfs_commit_super(tree_root); | 2433 | ret = btrfs_commit_super(tree_root); |
2421 | BUG_ON(ret); | 2434 | if (ret) |
2435 | goto fail_trans_kthread; | ||
2422 | } | 2436 | } |
2423 | } | 2437 | } |
2424 | 2438 | ||
2425 | ret = btrfs_find_orphan_roots(tree_root); | 2439 | ret = btrfs_find_orphan_roots(tree_root); |
2426 | BUG_ON(ret); | 2440 | if (ret) |
2441 | goto fail_trans_kthread; | ||
2427 | 2442 | ||
2428 | if (!(sb->s_flags & MS_RDONLY)) { | 2443 | if (!(sb->s_flags & MS_RDONLY)) { |
2429 | ret = btrfs_cleanup_fs_roots(fs_info); | 2444 | ret = btrfs_cleanup_fs_roots(fs_info); |
2430 | BUG_ON(ret); | 2445 | if (ret) { |
2446 | } | ||
2431 | 2447 | ||
2432 | ret = btrfs_recover_relocation(tree_root); | 2448 | ret = btrfs_recover_relocation(tree_root); |
2433 | if (ret < 0) { | 2449 | if (ret < 0) { |
@@ -2847,6 +2863,8 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors) | |||
2847 | if (total_errors > max_errors) { | 2863 | if (total_errors > max_errors) { |
2848 | printk(KERN_ERR "btrfs: %d errors while writing supers\n", | 2864 | printk(KERN_ERR "btrfs: %d errors while writing supers\n", |
2849 | total_errors); | 2865 | total_errors); |
2866 | |||
2867 | /* This shouldn't happen. FUA is masked off if unsupported */ | ||
2850 | BUG(); | 2868 | BUG(); |
2851 | } | 2869 | } |
2852 | 2870 | ||
@@ -2863,9 +2881,9 @@ int write_all_supers(struct btrfs_root *root, int max_mirrors) | |||
2863 | } | 2881 | } |
2864 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | 2882 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); |
2865 | if (total_errors > max_errors) { | 2883 | if (total_errors > max_errors) { |
2866 | printk(KERN_ERR "btrfs: %d errors while writing supers\n", | 2884 | btrfs_error(root->fs_info, -EIO, |
2867 | total_errors); | 2885 | "%d errors while writing supers", total_errors); |
2868 | BUG(); | 2886 | return -EIO; |
2869 | } | 2887 | } |
2870 | return 0; | 2888 | return 0; |
2871 | } | 2889 | } |
@@ -2879,7 +2897,20 @@ int write_ctree_super(struct btrfs_trans_handle *trans, | |||
2879 | return ret; | 2897 | return ret; |
2880 | } | 2898 | } |
2881 | 2899 | ||
2882 | int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) | 2900 | /* Kill all outstanding I/O */ |
2901 | void btrfs_abort_devices(struct btrfs_root *root) | ||
2902 | { | ||
2903 | struct list_head *head; | ||
2904 | struct btrfs_device *dev; | ||
2905 | mutex_lock(&root->fs_info->fs_devices->device_list_mutex); | ||
2906 | head = &root->fs_info->fs_devices->devices; | ||
2907 | list_for_each_entry_rcu(dev, head, dev_list) { | ||
2908 | blk_abort_queue(dev->bdev->bd_disk->queue); | ||
2909 | } | ||
2910 | mutex_unlock(&root->fs_info->fs_devices->device_list_mutex); | ||
2911 | } | ||
2912 | |||
2913 | void btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) | ||
2883 | { | 2914 | { |
2884 | spin_lock(&fs_info->fs_roots_radix_lock); | 2915 | spin_lock(&fs_info->fs_roots_radix_lock); |
2885 | radix_tree_delete(&fs_info->fs_roots_radix, | 2916 | radix_tree_delete(&fs_info->fs_roots_radix, |
@@ -2892,7 +2923,6 @@ int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root) | |||
2892 | __btrfs_remove_free_space_cache(root->free_ino_pinned); | 2923 | __btrfs_remove_free_space_cache(root->free_ino_pinned); |
2893 | __btrfs_remove_free_space_cache(root->free_ino_ctl); | 2924 | __btrfs_remove_free_space_cache(root->free_ino_ctl); |
2894 | free_fs_root(root); | 2925 | free_fs_root(root); |
2895 | return 0; | ||
2896 | } | 2926 | } |
2897 | 2927 | ||
2898 | static void free_fs_root(struct btrfs_root *root) | 2928 | static void free_fs_root(struct btrfs_root *root) |
@@ -2909,7 +2939,7 @@ static void free_fs_root(struct btrfs_root *root) | |||
2909 | kfree(root); | 2939 | kfree(root); |
2910 | } | 2940 | } |
2911 | 2941 | ||
2912 | static int del_fs_roots(struct btrfs_fs_info *fs_info) | 2942 | static void del_fs_roots(struct btrfs_fs_info *fs_info) |
2913 | { | 2943 | { |
2914 | int ret; | 2944 | int ret; |
2915 | struct btrfs_root *gang[8]; | 2945 | struct btrfs_root *gang[8]; |
@@ -2938,7 +2968,6 @@ static int del_fs_roots(struct btrfs_fs_info *fs_info) | |||
2938 | for (i = 0; i < ret; i++) | 2968 | for (i = 0; i < ret; i++) |
2939 | btrfs_free_fs_root(fs_info, gang[i]); | 2969 | btrfs_free_fs_root(fs_info, gang[i]); |
2940 | } | 2970 | } |
2941 | return 0; | ||
2942 | } | 2971 | } |
2943 | 2972 | ||
2944 | int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info) | 2973 | int btrfs_cleanup_fs_roots(struct btrfs_fs_info *fs_info) |
@@ -2987,14 +3016,21 @@ int btrfs_commit_super(struct btrfs_root *root) | |||
2987 | if (IS_ERR(trans)) | 3016 | if (IS_ERR(trans)) |
2988 | return PTR_ERR(trans); | 3017 | return PTR_ERR(trans); |
2989 | ret = btrfs_commit_transaction(trans, root); | 3018 | ret = btrfs_commit_transaction(trans, root); |
2990 | BUG_ON(ret); | 3019 | if (ret) |
3020 | return ret; | ||
2991 | /* run commit again to drop the original snapshot */ | 3021 | /* run commit again to drop the original snapshot */ |
2992 | trans = btrfs_join_transaction(root); | 3022 | trans = btrfs_join_transaction(root); |
2993 | if (IS_ERR(trans)) | 3023 | if (IS_ERR(trans)) |
2994 | return PTR_ERR(trans); | 3024 | return PTR_ERR(trans); |
2995 | btrfs_commit_transaction(trans, root); | 3025 | ret = btrfs_commit_transaction(trans, root); |
3026 | if (ret) | ||
3027 | return ret; | ||
2996 | ret = btrfs_write_and_wait_transaction(NULL, root); | 3028 | ret = btrfs_write_and_wait_transaction(NULL, root); |
2997 | BUG_ON(ret); | 3029 | if (ret) { |
3030 | btrfs_error(root->fs_info, ret, | ||
3031 | "Failed to sync btree inode to disk."); | ||
3032 | return ret; | ||
3033 | } | ||
2998 | 3034 | ||
2999 | ret = write_ctree_super(NULL, root, 0); | 3035 | ret = write_ctree_super(NULL, root, 0); |
3000 | return ret; | 3036 | return ret; |
@@ -3110,10 +3146,9 @@ int close_ctree(struct btrfs_root *root) | |||
3110 | int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid) | 3146 | int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid) |
3111 | { | 3147 | { |
3112 | int ret; | 3148 | int ret; |
3113 | struct inode *btree_inode = buf->first_page->mapping->host; | 3149 | struct inode *btree_inode = buf->pages[0]->mapping->host; |
3114 | 3150 | ||
3115 | ret = extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree, buf, | 3151 | ret = extent_buffer_uptodate(buf); |
3116 | NULL); | ||
3117 | if (!ret) | 3152 | if (!ret) |
3118 | return ret; | 3153 | return ret; |
3119 | 3154 | ||
@@ -3124,16 +3159,13 @@ int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid) | |||
3124 | 3159 | ||
3125 | int btrfs_set_buffer_uptodate(struct extent_buffer *buf) | 3160 | int btrfs_set_buffer_uptodate(struct extent_buffer *buf) |
3126 | { | 3161 | { |
3127 | struct inode *btree_inode = buf->first_page->mapping->host; | 3162 | return set_extent_buffer_uptodate(buf); |
3128 | return set_extent_buffer_uptodate(&BTRFS_I(btree_inode)->io_tree, | ||
3129 | buf); | ||
3130 | } | 3163 | } |
3131 | 3164 | ||
3132 | void btrfs_mark_buffer_dirty(struct extent_buffer *buf) | 3165 | void btrfs_mark_buffer_dirty(struct extent_buffer *buf) |
3133 | { | 3166 | { |
3134 | struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; | 3167 | struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; |
3135 | u64 transid = btrfs_header_generation(buf); | 3168 | u64 transid = btrfs_header_generation(buf); |
3136 | struct inode *btree_inode = root->fs_info->btree_inode; | ||
3137 | int was_dirty; | 3169 | int was_dirty; |
3138 | 3170 | ||
3139 | btrfs_assert_tree_locked(buf); | 3171 | btrfs_assert_tree_locked(buf); |
@@ -3145,8 +3177,7 @@ void btrfs_mark_buffer_dirty(struct extent_buffer *buf) | |||
3145 | (unsigned long long)root->fs_info->generation); | 3177 | (unsigned long long)root->fs_info->generation); |
3146 | WARN_ON(1); | 3178 | WARN_ON(1); |
3147 | } | 3179 | } |
3148 | was_dirty = set_extent_buffer_dirty(&BTRFS_I(btree_inode)->io_tree, | 3180 | was_dirty = set_extent_buffer_dirty(buf); |
3149 | buf); | ||
3150 | if (!was_dirty) { | 3181 | if (!was_dirty) { |
3151 | spin_lock(&root->fs_info->delalloc_lock); | 3182 | spin_lock(&root->fs_info->delalloc_lock); |
3152 | root->fs_info->dirty_metadata_bytes += buf->len; | 3183 | root->fs_info->dirty_metadata_bytes += buf->len; |
@@ -3200,12 +3231,8 @@ void __btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr) | |||
3200 | 3231 | ||
3201 | int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) | 3232 | int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid) |
3202 | { | 3233 | { |
3203 | struct btrfs_root *root = BTRFS_I(buf->first_page->mapping->host)->root; | 3234 | struct btrfs_root *root = BTRFS_I(buf->pages[0]->mapping->host)->root; |
3204 | int ret; | 3235 | return btree_read_extent_buffer_pages(root, buf, 0, parent_transid); |
3205 | ret = btree_read_extent_buffer_pages(root, buf, 0, parent_transid); | ||
3206 | if (ret == 0) | ||
3207 | set_bit(EXTENT_BUFFER_UPTODATE, &buf->bflags); | ||
3208 | return ret; | ||
3209 | } | 3236 | } |
3210 | 3237 | ||
3211 | static int btree_lock_page_hook(struct page *page, void *data, | 3238 | static int btree_lock_page_hook(struct page *page, void *data, |
@@ -3213,17 +3240,21 @@ static int btree_lock_page_hook(struct page *page, void *data, | |||
3213 | { | 3240 | { |
3214 | struct inode *inode = page->mapping->host; | 3241 | struct inode *inode = page->mapping->host; |
3215 | struct btrfs_root *root = BTRFS_I(inode)->root; | 3242 | struct btrfs_root *root = BTRFS_I(inode)->root; |
3216 | struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree; | ||
3217 | struct extent_buffer *eb; | 3243 | struct extent_buffer *eb; |
3218 | unsigned long len; | ||
3219 | u64 bytenr = page_offset(page); | ||
3220 | 3244 | ||
3221 | if (page->private == EXTENT_PAGE_PRIVATE) | 3245 | /* |
3246 | * We culled this eb but the page is still hanging out on the mapping, | ||
3247 | * carry on. | ||
3248 | */ | ||
3249 | if (!PagePrivate(page)) | ||
3222 | goto out; | 3250 | goto out; |
3223 | 3251 | ||
3224 | len = page->private >> 2; | 3252 | eb = (struct extent_buffer *)page->private; |
3225 | eb = find_extent_buffer(io_tree, bytenr, len); | 3253 | if (!eb) { |
3226 | if (!eb) | 3254 | WARN_ON(1); |
3255 | goto out; | ||
3256 | } | ||
3257 | if (page != eb->pages[0]) | ||
3227 | goto out; | 3258 | goto out; |
3228 | 3259 | ||
3229 | if (!btrfs_try_tree_write_lock(eb)) { | 3260 | if (!btrfs_try_tree_write_lock(eb)) { |
@@ -3242,7 +3273,6 @@ static int btree_lock_page_hook(struct page *page, void *data, | |||
3242 | } | 3273 | } |
3243 | 3274 | ||
3244 | btrfs_tree_unlock(eb); | 3275 | btrfs_tree_unlock(eb); |
3245 | free_extent_buffer(eb); | ||
3246 | out: | 3276 | out: |
3247 | if (!trylock_page(page)) { | 3277 | if (!trylock_page(page)) { |
3248 | flush_fn(data); | 3278 | flush_fn(data); |
@@ -3251,15 +3281,23 @@ out: | |||
3251 | return 0; | 3281 | return 0; |
3252 | } | 3282 | } |
3253 | 3283 | ||
3254 | static void btrfs_check_super_valid(struct btrfs_fs_info *fs_info, | 3284 | static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info, |
3255 | int read_only) | 3285 | int read_only) |
3256 | { | 3286 | { |
3287 | if (btrfs_super_csum_type(fs_info->super_copy) >= ARRAY_SIZE(btrfs_csum_sizes)) { | ||
3288 | printk(KERN_ERR "btrfs: unsupported checksum algorithm\n"); | ||
3289 | return -EINVAL; | ||
3290 | } | ||
3291 | |||
3257 | if (read_only) | 3292 | if (read_only) |
3258 | return; | 3293 | return 0; |
3259 | 3294 | ||
3260 | if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) | 3295 | if (fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { |
3261 | printk(KERN_WARNING "warning: mount fs with errors, " | 3296 | printk(KERN_WARNING "warning: mount fs with errors, " |
3262 | "running btrfsck is recommended\n"); | 3297 | "running btrfsck is recommended\n"); |
3298 | } | ||
3299 | |||
3300 | return 0; | ||
3263 | } | 3301 | } |
3264 | 3302 | ||
3265 | int btrfs_error_commit_super(struct btrfs_root *root) | 3303 | int btrfs_error_commit_super(struct btrfs_root *root) |
@@ -3281,7 +3319,7 @@ int btrfs_error_commit_super(struct btrfs_root *root) | |||
3281 | return ret; | 3319 | return ret; |
3282 | } | 3320 | } |
3283 | 3321 | ||
3284 | static int btrfs_destroy_ordered_operations(struct btrfs_root *root) | 3322 | static void btrfs_destroy_ordered_operations(struct btrfs_root *root) |
3285 | { | 3323 | { |
3286 | struct btrfs_inode *btrfs_inode; | 3324 | struct btrfs_inode *btrfs_inode; |
3287 | struct list_head splice; | 3325 | struct list_head splice; |
@@ -3303,11 +3341,9 @@ static int btrfs_destroy_ordered_operations(struct btrfs_root *root) | |||
3303 | 3341 | ||
3304 | spin_unlock(&root->fs_info->ordered_extent_lock); | 3342 | spin_unlock(&root->fs_info->ordered_extent_lock); |
3305 | mutex_unlock(&root->fs_info->ordered_operations_mutex); | 3343 | mutex_unlock(&root->fs_info->ordered_operations_mutex); |
3306 | |||
3307 | return 0; | ||
3308 | } | 3344 | } |
3309 | 3345 | ||
3310 | static int btrfs_destroy_ordered_extents(struct btrfs_root *root) | 3346 | static void btrfs_destroy_ordered_extents(struct btrfs_root *root) |
3311 | { | 3347 | { |
3312 | struct list_head splice; | 3348 | struct list_head splice; |
3313 | struct btrfs_ordered_extent *ordered; | 3349 | struct btrfs_ordered_extent *ordered; |
@@ -3339,12 +3375,10 @@ static int btrfs_destroy_ordered_extents(struct btrfs_root *root) | |||
3339 | } | 3375 | } |
3340 | 3376 | ||
3341 | spin_unlock(&root->fs_info->ordered_extent_lock); | 3377 | spin_unlock(&root->fs_info->ordered_extent_lock); |
3342 | |||
3343 | return 0; | ||
3344 | } | 3378 | } |
3345 | 3379 | ||
3346 | static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, | 3380 | int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, |
3347 | struct btrfs_root *root) | 3381 | struct btrfs_root *root) |
3348 | { | 3382 | { |
3349 | struct rb_node *node; | 3383 | struct rb_node *node; |
3350 | struct btrfs_delayed_ref_root *delayed_refs; | 3384 | struct btrfs_delayed_ref_root *delayed_refs; |
@@ -3353,6 +3387,7 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, | |||
3353 | 3387 | ||
3354 | delayed_refs = &trans->delayed_refs; | 3388 | delayed_refs = &trans->delayed_refs; |
3355 | 3389 | ||
3390 | again: | ||
3356 | spin_lock(&delayed_refs->lock); | 3391 | spin_lock(&delayed_refs->lock); |
3357 | if (delayed_refs->num_entries == 0) { | 3392 | if (delayed_refs->num_entries == 0) { |
3358 | spin_unlock(&delayed_refs->lock); | 3393 | spin_unlock(&delayed_refs->lock); |
@@ -3374,6 +3409,7 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, | |||
3374 | struct btrfs_delayed_ref_head *head; | 3409 | struct btrfs_delayed_ref_head *head; |
3375 | 3410 | ||
3376 | head = btrfs_delayed_node_to_head(ref); | 3411 | head = btrfs_delayed_node_to_head(ref); |
3412 | spin_unlock(&delayed_refs->lock); | ||
3377 | mutex_lock(&head->mutex); | 3413 | mutex_lock(&head->mutex); |
3378 | kfree(head->extent_op); | 3414 | kfree(head->extent_op); |
3379 | delayed_refs->num_heads--; | 3415 | delayed_refs->num_heads--; |
@@ -3381,8 +3417,9 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, | |||
3381 | delayed_refs->num_heads_ready--; | 3417 | delayed_refs->num_heads_ready--; |
3382 | list_del_init(&head->cluster); | 3418 | list_del_init(&head->cluster); |
3383 | mutex_unlock(&head->mutex); | 3419 | mutex_unlock(&head->mutex); |
3420 | btrfs_put_delayed_ref(ref); | ||
3421 | goto again; | ||
3384 | } | 3422 | } |
3385 | |||
3386 | spin_unlock(&delayed_refs->lock); | 3423 | spin_unlock(&delayed_refs->lock); |
3387 | btrfs_put_delayed_ref(ref); | 3424 | btrfs_put_delayed_ref(ref); |
3388 | 3425 | ||
@@ -3395,7 +3432,7 @@ static int btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, | |||
3395 | return ret; | 3432 | return ret; |
3396 | } | 3433 | } |
3397 | 3434 | ||
3398 | static int btrfs_destroy_pending_snapshots(struct btrfs_transaction *t) | 3435 | static void btrfs_destroy_pending_snapshots(struct btrfs_transaction *t) |
3399 | { | 3436 | { |
3400 | struct btrfs_pending_snapshot *snapshot; | 3437 | struct btrfs_pending_snapshot *snapshot; |
3401 | struct list_head splice; | 3438 | struct list_head splice; |
@@ -3413,11 +3450,9 @@ static int btrfs_destroy_pending_snapshots(struct btrfs_transaction *t) | |||
3413 | 3450 | ||
3414 | kfree(snapshot); | 3451 | kfree(snapshot); |
3415 | } | 3452 | } |
3416 | |||
3417 | return 0; | ||
3418 | } | 3453 | } |
3419 | 3454 | ||
3420 | static int btrfs_destroy_delalloc_inodes(struct btrfs_root *root) | 3455 | static void btrfs_destroy_delalloc_inodes(struct btrfs_root *root) |
3421 | { | 3456 | { |
3422 | struct btrfs_inode *btrfs_inode; | 3457 | struct btrfs_inode *btrfs_inode; |
3423 | struct list_head splice; | 3458 | struct list_head splice; |
@@ -3437,8 +3472,6 @@ static int btrfs_destroy_delalloc_inodes(struct btrfs_root *root) | |||
3437 | } | 3472 | } |
3438 | 3473 | ||
3439 | spin_unlock(&root->fs_info->delalloc_lock); | 3474 | spin_unlock(&root->fs_info->delalloc_lock); |
3440 | |||
3441 | return 0; | ||
3442 | } | 3475 | } |
3443 | 3476 | ||
3444 | static int btrfs_destroy_marked_extents(struct btrfs_root *root, | 3477 | static int btrfs_destroy_marked_extents(struct btrfs_root *root, |
@@ -3529,13 +3562,43 @@ static int btrfs_destroy_pinned_extent(struct btrfs_root *root, | |||
3529 | return 0; | 3562 | return 0; |
3530 | } | 3563 | } |
3531 | 3564 | ||
3532 | static int btrfs_cleanup_transaction(struct btrfs_root *root) | 3565 | void btrfs_cleanup_one_transaction(struct btrfs_transaction *cur_trans, |
3566 | struct btrfs_root *root) | ||
3567 | { | ||
3568 | btrfs_destroy_delayed_refs(cur_trans, root); | ||
3569 | btrfs_block_rsv_release(root, &root->fs_info->trans_block_rsv, | ||
3570 | cur_trans->dirty_pages.dirty_bytes); | ||
3571 | |||
3572 | /* FIXME: cleanup wait for commit */ | ||
3573 | cur_trans->in_commit = 1; | ||
3574 | cur_trans->blocked = 1; | ||
3575 | if (waitqueue_active(&root->fs_info->transaction_blocked_wait)) | ||
3576 | wake_up(&root->fs_info->transaction_blocked_wait); | ||
3577 | |||
3578 | cur_trans->blocked = 0; | ||
3579 | if (waitqueue_active(&root->fs_info->transaction_wait)) | ||
3580 | wake_up(&root->fs_info->transaction_wait); | ||
3581 | |||
3582 | cur_trans->commit_done = 1; | ||
3583 | if (waitqueue_active(&cur_trans->commit_wait)) | ||
3584 | wake_up(&cur_trans->commit_wait); | ||
3585 | |||
3586 | btrfs_destroy_pending_snapshots(cur_trans); | ||
3587 | |||
3588 | btrfs_destroy_marked_extents(root, &cur_trans->dirty_pages, | ||
3589 | EXTENT_DIRTY); | ||
3590 | |||
3591 | /* | ||
3592 | memset(cur_trans, 0, sizeof(*cur_trans)); | ||
3593 | kmem_cache_free(btrfs_transaction_cachep, cur_trans); | ||
3594 | */ | ||
3595 | } | ||
3596 | |||
3597 | int btrfs_cleanup_transaction(struct btrfs_root *root) | ||
3533 | { | 3598 | { |
3534 | struct btrfs_transaction *t; | 3599 | struct btrfs_transaction *t; |
3535 | LIST_HEAD(list); | 3600 | LIST_HEAD(list); |
3536 | 3601 | ||
3537 | WARN_ON(1); | ||
3538 | |||
3539 | mutex_lock(&root->fs_info->transaction_kthread_mutex); | 3602 | mutex_lock(&root->fs_info->transaction_kthread_mutex); |
3540 | 3603 | ||
3541 | spin_lock(&root->fs_info->trans_lock); | 3604 | spin_lock(&root->fs_info->trans_lock); |
@@ -3600,6 +3663,17 @@ static int btrfs_cleanup_transaction(struct btrfs_root *root) | |||
3600 | return 0; | 3663 | return 0; |
3601 | } | 3664 | } |
3602 | 3665 | ||
3666 | static int btree_writepage_io_failed_hook(struct bio *bio, struct page *page, | ||
3667 | u64 start, u64 end, | ||
3668 | struct extent_state *state) | ||
3669 | { | ||
3670 | struct super_block *sb = page->mapping->host->i_sb; | ||
3671 | struct btrfs_fs_info *fs_info = btrfs_sb(sb); | ||
3672 | btrfs_error(fs_info, -EIO, | ||
3673 | "Error occured while writing out btree at %llu", start); | ||
3674 | return -EIO; | ||
3675 | } | ||
3676 | |||
3603 | static struct extent_io_ops btree_extent_io_ops = { | 3677 | static struct extent_io_ops btree_extent_io_ops = { |
3604 | .write_cache_pages_lock_hook = btree_lock_page_hook, | 3678 | .write_cache_pages_lock_hook = btree_lock_page_hook, |
3605 | .readpage_end_io_hook = btree_readpage_end_io_hook, | 3679 | .readpage_end_io_hook = btree_readpage_end_io_hook, |
@@ -3607,4 +3681,5 @@ static struct extent_io_ops btree_extent_io_ops = { | |||
3607 | .submit_bio_hook = btree_submit_bio_hook, | 3681 | .submit_bio_hook = btree_submit_bio_hook, |
3608 | /* note we're sharing with inode.c for the merge bio hook */ | 3682 | /* note we're sharing with inode.c for the merge bio hook */ |
3609 | .merge_bio_hook = btrfs_merge_bio_hook, | 3683 | .merge_bio_hook = btrfs_merge_bio_hook, |
3684 | .writepage_io_failed_hook = btree_writepage_io_failed_hook, | ||
3610 | }; | 3685 | }; |