diff options
Diffstat (limited to 'fs/btrfs/free-space-cache.c')
-rw-r--r-- | fs/btrfs/free-space-cache.c | 713 |
1 files changed, 449 insertions, 264 deletions
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index a0390657451b..11d2e9cea09e 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c | |||
@@ -24,6 +24,7 @@ | |||
24 | #include "free-space-cache.h" | 24 | #include "free-space-cache.h" |
25 | #include "transaction.h" | 25 | #include "transaction.h" |
26 | #include "disk-io.h" | 26 | #include "disk-io.h" |
27 | #include "extent_io.h" | ||
27 | 28 | ||
28 | #define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8) | 29 | #define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8) |
29 | #define MAX_CACHE_BYTES_PER_GIG (32 * 1024) | 30 | #define MAX_CACHE_BYTES_PER_GIG (32 * 1024) |
@@ -81,6 +82,8 @@ struct inode *lookup_free_space_inode(struct btrfs_root *root, | |||
81 | return ERR_PTR(-ENOENT); | 82 | return ERR_PTR(-ENOENT); |
82 | } | 83 | } |
83 | 84 | ||
85 | inode->i_mapping->flags &= ~__GFP_FS; | ||
86 | |||
84 | spin_lock(&block_group->lock); | 87 | spin_lock(&block_group->lock); |
85 | if (!root->fs_info->closing) { | 88 | if (!root->fs_info->closing) { |
86 | block_group->inode = igrab(inode); | 89 | block_group->inode = igrab(inode); |
@@ -222,6 +225,7 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info, | |||
222 | u64 num_entries; | 225 | u64 num_entries; |
223 | u64 num_bitmaps; | 226 | u64 num_bitmaps; |
224 | u64 generation; | 227 | u64 generation; |
228 | u64 used = btrfs_block_group_used(&block_group->item); | ||
225 | u32 cur_crc = ~(u32)0; | 229 | u32 cur_crc = ~(u32)0; |
226 | pgoff_t index = 0; | 230 | pgoff_t index = 0; |
227 | unsigned long first_page_offset; | 231 | unsigned long first_page_offset; |
@@ -393,7 +397,8 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info, | |||
393 | break; | 397 | break; |
394 | 398 | ||
395 | need_loop = 1; | 399 | need_loop = 1; |
396 | e = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS); | 400 | e = kmem_cache_zalloc(btrfs_free_space_cachep, |
401 | GFP_NOFS); | ||
397 | if (!e) { | 402 | if (!e) { |
398 | kunmap(page); | 403 | kunmap(page); |
399 | unlock_page(page); | 404 | unlock_page(page); |
@@ -405,7 +410,7 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info, | |||
405 | e->bytes = le64_to_cpu(entry->bytes); | 410 | e->bytes = le64_to_cpu(entry->bytes); |
406 | if (!e->bytes) { | 411 | if (!e->bytes) { |
407 | kunmap(page); | 412 | kunmap(page); |
408 | kfree(e); | 413 | kmem_cache_free(btrfs_free_space_cachep, e); |
409 | unlock_page(page); | 414 | unlock_page(page); |
410 | page_cache_release(page); | 415 | page_cache_release(page); |
411 | goto free_cache; | 416 | goto free_cache; |
@@ -420,7 +425,8 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info, | |||
420 | e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); | 425 | e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS); |
421 | if (!e->bitmap) { | 426 | if (!e->bitmap) { |
422 | kunmap(page); | 427 | kunmap(page); |
423 | kfree(e); | 428 | kmem_cache_free( |
429 | btrfs_free_space_cachep, e); | ||
424 | unlock_page(page); | 430 | unlock_page(page); |
425 | page_cache_release(page); | 431 | page_cache_release(page); |
426 | goto free_cache; | 432 | goto free_cache; |
@@ -465,6 +471,17 @@ next: | |||
465 | index++; | 471 | index++; |
466 | } | 472 | } |
467 | 473 | ||
474 | spin_lock(&block_group->tree_lock); | ||
475 | if (block_group->free_space != (block_group->key.offset - used - | ||
476 | block_group->bytes_super)) { | ||
477 | spin_unlock(&block_group->tree_lock); | ||
478 | printk(KERN_ERR "block group %llu has an wrong amount of free " | ||
479 | "space\n", block_group->key.objectid); | ||
480 | ret = 0; | ||
481 | goto free_cache; | ||
482 | } | ||
483 | spin_unlock(&block_group->tree_lock); | ||
484 | |||
468 | ret = 1; | 485 | ret = 1; |
469 | out: | 486 | out: |
470 | kfree(checksums); | 487 | kfree(checksums); |
@@ -491,18 +508,23 @@ int btrfs_write_out_cache(struct btrfs_root *root, | |||
491 | struct inode *inode; | 508 | struct inode *inode; |
492 | struct rb_node *node; | 509 | struct rb_node *node; |
493 | struct list_head *pos, *n; | 510 | struct list_head *pos, *n; |
511 | struct page **pages; | ||
494 | struct page *page; | 512 | struct page *page; |
495 | struct extent_state *cached_state = NULL; | 513 | struct extent_state *cached_state = NULL; |
514 | struct btrfs_free_cluster *cluster = NULL; | ||
515 | struct extent_io_tree *unpin = NULL; | ||
496 | struct list_head bitmap_list; | 516 | struct list_head bitmap_list; |
497 | struct btrfs_key key; | 517 | struct btrfs_key key; |
518 | u64 start, end, len; | ||
498 | u64 bytes = 0; | 519 | u64 bytes = 0; |
499 | u32 *crc, *checksums; | 520 | u32 *crc, *checksums; |
500 | pgoff_t index = 0, last_index = 0; | ||
501 | unsigned long first_page_offset; | 521 | unsigned long first_page_offset; |
502 | int num_checksums; | 522 | int index = 0, num_pages = 0; |
503 | int entries = 0; | 523 | int entries = 0; |
504 | int bitmaps = 0; | 524 | int bitmaps = 0; |
505 | int ret = 0; | 525 | int ret = 0; |
526 | bool next_page = false; | ||
527 | bool out_of_space = false; | ||
506 | 528 | ||
507 | root = root->fs_info->tree_root; | 529 | root = root->fs_info->tree_root; |
508 | 530 | ||
@@ -530,24 +552,43 @@ int btrfs_write_out_cache(struct btrfs_root *root, | |||
530 | return 0; | 552 | return 0; |
531 | } | 553 | } |
532 | 554 | ||
533 | last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT; | 555 | num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> |
556 | PAGE_CACHE_SHIFT; | ||
534 | filemap_write_and_wait(inode->i_mapping); | 557 | filemap_write_and_wait(inode->i_mapping); |
535 | btrfs_wait_ordered_range(inode, inode->i_size & | 558 | btrfs_wait_ordered_range(inode, inode->i_size & |
536 | ~(root->sectorsize - 1), (u64)-1); | 559 | ~(root->sectorsize - 1), (u64)-1); |
537 | 560 | ||
538 | /* We need a checksum per page. */ | 561 | /* We need a checksum per page. */ |
539 | num_checksums = i_size_read(inode) / PAGE_CACHE_SIZE; | 562 | crc = checksums = kzalloc(sizeof(u32) * num_pages, GFP_NOFS); |
540 | crc = checksums = kzalloc(sizeof(u32) * num_checksums, GFP_NOFS); | ||
541 | if (!crc) { | 563 | if (!crc) { |
542 | iput(inode); | 564 | iput(inode); |
543 | return 0; | 565 | return 0; |
544 | } | 566 | } |
545 | 567 | ||
568 | pages = kzalloc(sizeof(struct page *) * num_pages, GFP_NOFS); | ||
569 | if (!pages) { | ||
570 | kfree(crc); | ||
571 | iput(inode); | ||
572 | return 0; | ||
573 | } | ||
574 | |||
546 | /* Since the first page has all of our checksums and our generation we | 575 | /* Since the first page has all of our checksums and our generation we |
547 | * need to calculate the offset into the page that we can start writing | 576 | * need to calculate the offset into the page that we can start writing |
548 | * our entries. | 577 | * our entries. |
549 | */ | 578 | */ |
550 | first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64); | 579 | first_page_offset = (sizeof(u32) * num_pages) + sizeof(u64); |
580 | |||
581 | /* Get the cluster for this block_group if it exists */ | ||
582 | if (!list_empty(&block_group->cluster_list)) | ||
583 | cluster = list_entry(block_group->cluster_list.next, | ||
584 | struct btrfs_free_cluster, | ||
585 | block_group_list); | ||
586 | |||
587 | /* | ||
588 | * We shouldn't have switched the pinned extents yet so this is the | ||
589 | * right one | ||
590 | */ | ||
591 | unpin = root->fs_info->pinned_extents; | ||
551 | 592 | ||
552 | /* | 593 | /* |
553 | * Lock all pages first so we can lock the extent safely. | 594 | * Lock all pages first so we can lock the extent safely. |
@@ -557,20 +598,18 @@ int btrfs_write_out_cache(struct btrfs_root *root, | |||
557 | * after find_get_page at this point. Just putting this here so people | 598 | * after find_get_page at this point. Just putting this here so people |
558 | * know and don't freak out. | 599 | * know and don't freak out. |
559 | */ | 600 | */ |
560 | while (index <= last_index) { | 601 | while (index < num_pages) { |
561 | page = grab_cache_page(inode->i_mapping, index); | 602 | page = grab_cache_page(inode->i_mapping, index); |
562 | if (!page) { | 603 | if (!page) { |
563 | pgoff_t i = 0; | 604 | int i; |
564 | 605 | ||
565 | while (i < index) { | 606 | for (i = 0; i < num_pages; i++) { |
566 | page = find_get_page(inode->i_mapping, i); | 607 | unlock_page(pages[i]); |
567 | unlock_page(page); | 608 | page_cache_release(pages[i]); |
568 | page_cache_release(page); | ||
569 | page_cache_release(page); | ||
570 | i++; | ||
571 | } | 609 | } |
572 | goto out_free; | 610 | goto out_free; |
573 | } | 611 | } |
612 | pages[index] = page; | ||
574 | index++; | 613 | index++; |
575 | } | 614 | } |
576 | 615 | ||
@@ -578,6 +617,12 @@ int btrfs_write_out_cache(struct btrfs_root *root, | |||
578 | lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, | 617 | lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, |
579 | 0, &cached_state, GFP_NOFS); | 618 | 0, &cached_state, GFP_NOFS); |
580 | 619 | ||
620 | /* | ||
621 | * When searching for pinned extents, we need to start at our start | ||
622 | * offset. | ||
623 | */ | ||
624 | start = block_group->key.objectid; | ||
625 | |||
581 | /* Write out the extent entries */ | 626 | /* Write out the extent entries */ |
582 | do { | 627 | do { |
583 | struct btrfs_free_space_entry *entry; | 628 | struct btrfs_free_space_entry *entry; |
@@ -585,18 +630,25 @@ int btrfs_write_out_cache(struct btrfs_root *root, | |||
585 | unsigned long offset = 0; | 630 | unsigned long offset = 0; |
586 | unsigned long start_offset = 0; | 631 | unsigned long start_offset = 0; |
587 | 632 | ||
633 | next_page = false; | ||
634 | |||
588 | if (index == 0) { | 635 | if (index == 0) { |
589 | start_offset = first_page_offset; | 636 | start_offset = first_page_offset; |
590 | offset = start_offset; | 637 | offset = start_offset; |
591 | } | 638 | } |
592 | 639 | ||
593 | page = find_get_page(inode->i_mapping, index); | 640 | if (index >= num_pages) { |
641 | out_of_space = true; | ||
642 | break; | ||
643 | } | ||
644 | |||
645 | page = pages[index]; | ||
594 | 646 | ||
595 | addr = kmap(page); | 647 | addr = kmap(page); |
596 | entry = addr + start_offset; | 648 | entry = addr + start_offset; |
597 | 649 | ||
598 | memset(addr, 0, PAGE_CACHE_SIZE); | 650 | memset(addr, 0, PAGE_CACHE_SIZE); |
599 | while (1) { | 651 | while (node && !next_page) { |
600 | struct btrfs_free_space *e; | 652 | struct btrfs_free_space *e; |
601 | 653 | ||
602 | e = rb_entry(node, struct btrfs_free_space, offset_index); | 654 | e = rb_entry(node, struct btrfs_free_space, offset_index); |
@@ -612,12 +664,49 @@ int btrfs_write_out_cache(struct btrfs_root *root, | |||
612 | entry->type = BTRFS_FREE_SPACE_EXTENT; | 664 | entry->type = BTRFS_FREE_SPACE_EXTENT; |
613 | } | 665 | } |
614 | node = rb_next(node); | 666 | node = rb_next(node); |
615 | if (!node) | 667 | if (!node && cluster) { |
616 | break; | 668 | node = rb_first(&cluster->root); |
669 | cluster = NULL; | ||
670 | } | ||
617 | offset += sizeof(struct btrfs_free_space_entry); | 671 | offset += sizeof(struct btrfs_free_space_entry); |
618 | if (offset + sizeof(struct btrfs_free_space_entry) >= | 672 | if (offset + sizeof(struct btrfs_free_space_entry) >= |
619 | PAGE_CACHE_SIZE) | 673 | PAGE_CACHE_SIZE) |
674 | next_page = true; | ||
675 | entry++; | ||
676 | } | ||
677 | |||
678 | /* | ||
679 | * We want to add any pinned extents to our free space cache | ||
680 | * so we don't leak the space | ||
681 | */ | ||
682 | while (!next_page && (start < block_group->key.objectid + | ||
683 | block_group->key.offset)) { | ||
684 | ret = find_first_extent_bit(unpin, start, &start, &end, | ||
685 | EXTENT_DIRTY); | ||
686 | if (ret) { | ||
687 | ret = 0; | ||
688 | break; | ||
689 | } | ||
690 | |||
691 | /* This pinned extent is out of our range */ | ||
692 | if (start >= block_group->key.objectid + | ||
693 | block_group->key.offset) | ||
620 | break; | 694 | break; |
695 | |||
696 | len = block_group->key.objectid + | ||
697 | block_group->key.offset - start; | ||
698 | len = min(len, end + 1 - start); | ||
699 | |||
700 | entries++; | ||
701 | entry->offset = cpu_to_le64(start); | ||
702 | entry->bytes = cpu_to_le64(len); | ||
703 | entry->type = BTRFS_FREE_SPACE_EXTENT; | ||
704 | |||
705 | start = end + 1; | ||
706 | offset += sizeof(struct btrfs_free_space_entry); | ||
707 | if (offset + sizeof(struct btrfs_free_space_entry) >= | ||
708 | PAGE_CACHE_SIZE) | ||
709 | next_page = true; | ||
621 | entry++; | 710 | entry++; |
622 | } | 711 | } |
623 | *crc = ~(u32)0; | 712 | *crc = ~(u32)0; |
@@ -630,25 +719,8 @@ int btrfs_write_out_cache(struct btrfs_root *root, | |||
630 | 719 | ||
631 | bytes += PAGE_CACHE_SIZE; | 720 | bytes += PAGE_CACHE_SIZE; |
632 | 721 | ||
633 | ClearPageChecked(page); | ||
634 | set_page_extent_mapped(page); | ||
635 | SetPageUptodate(page); | ||
636 | set_page_dirty(page); | ||
637 | |||
638 | /* | ||
639 | * We need to release our reference we got for grab_cache_page, | ||
640 | * except for the first page which will hold our checksums, we | ||
641 | * do that below. | ||
642 | */ | ||
643 | if (index != 0) { | ||
644 | unlock_page(page); | ||
645 | page_cache_release(page); | ||
646 | } | ||
647 | |||
648 | page_cache_release(page); | ||
649 | |||
650 | index++; | 722 | index++; |
651 | } while (node); | 723 | } while (node || next_page); |
652 | 724 | ||
653 | /* Write out the bitmaps */ | 725 | /* Write out the bitmaps */ |
654 | list_for_each_safe(pos, n, &bitmap_list) { | 726 | list_for_each_safe(pos, n, &bitmap_list) { |
@@ -656,7 +728,11 @@ int btrfs_write_out_cache(struct btrfs_root *root, | |||
656 | struct btrfs_free_space *entry = | 728 | struct btrfs_free_space *entry = |
657 | list_entry(pos, struct btrfs_free_space, list); | 729 | list_entry(pos, struct btrfs_free_space, list); |
658 | 730 | ||
659 | page = find_get_page(inode->i_mapping, index); | 731 | if (index >= num_pages) { |
732 | out_of_space = true; | ||
733 | break; | ||
734 | } | ||
735 | page = pages[index]; | ||
660 | 736 | ||
661 | addr = kmap(page); | 737 | addr = kmap(page); |
662 | memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE); | 738 | memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE); |
@@ -667,64 +743,58 @@ int btrfs_write_out_cache(struct btrfs_root *root, | |||
667 | crc++; | 743 | crc++; |
668 | bytes += PAGE_CACHE_SIZE; | 744 | bytes += PAGE_CACHE_SIZE; |
669 | 745 | ||
670 | ClearPageChecked(page); | ||
671 | set_page_extent_mapped(page); | ||
672 | SetPageUptodate(page); | ||
673 | set_page_dirty(page); | ||
674 | unlock_page(page); | ||
675 | page_cache_release(page); | ||
676 | page_cache_release(page); | ||
677 | list_del_init(&entry->list); | 746 | list_del_init(&entry->list); |
678 | index++; | 747 | index++; |
679 | } | 748 | } |
680 | 749 | ||
750 | if (out_of_space) { | ||
751 | btrfs_drop_pages(pages, num_pages); | ||
752 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, | ||
753 | i_size_read(inode) - 1, &cached_state, | ||
754 | GFP_NOFS); | ||
755 | ret = 0; | ||
756 | goto out_free; | ||
757 | } | ||
758 | |||
681 | /* Zero out the rest of the pages just to make sure */ | 759 | /* Zero out the rest of the pages just to make sure */ |
682 | while (index <= last_index) { | 760 | while (index < num_pages) { |
683 | void *addr; | 761 | void *addr; |
684 | 762 | ||
685 | page = find_get_page(inode->i_mapping, index); | 763 | page = pages[index]; |
686 | |||
687 | addr = kmap(page); | 764 | addr = kmap(page); |
688 | memset(addr, 0, PAGE_CACHE_SIZE); | 765 | memset(addr, 0, PAGE_CACHE_SIZE); |
689 | kunmap(page); | 766 | kunmap(page); |
690 | ClearPageChecked(page); | ||
691 | set_page_extent_mapped(page); | ||
692 | SetPageUptodate(page); | ||
693 | set_page_dirty(page); | ||
694 | unlock_page(page); | ||
695 | page_cache_release(page); | ||
696 | page_cache_release(page); | ||
697 | bytes += PAGE_CACHE_SIZE; | 767 | bytes += PAGE_CACHE_SIZE; |
698 | index++; | 768 | index++; |
699 | } | 769 | } |
700 | 770 | ||
701 | btrfs_set_extent_delalloc(inode, 0, bytes - 1, &cached_state); | ||
702 | |||
703 | /* Write the checksums and trans id to the first page */ | 771 | /* Write the checksums and trans id to the first page */ |
704 | { | 772 | { |
705 | void *addr; | 773 | void *addr; |
706 | u64 *gen; | 774 | u64 *gen; |
707 | 775 | ||
708 | page = find_get_page(inode->i_mapping, 0); | 776 | page = pages[0]; |
709 | 777 | ||
710 | addr = kmap(page); | 778 | addr = kmap(page); |
711 | memcpy(addr, checksums, sizeof(u32) * num_checksums); | 779 | memcpy(addr, checksums, sizeof(u32) * num_pages); |
712 | gen = addr + (sizeof(u32) * num_checksums); | 780 | gen = addr + (sizeof(u32) * num_pages); |
713 | *gen = trans->transid; | 781 | *gen = trans->transid; |
714 | kunmap(page); | 782 | kunmap(page); |
715 | ClearPageChecked(page); | ||
716 | set_page_extent_mapped(page); | ||
717 | SetPageUptodate(page); | ||
718 | set_page_dirty(page); | ||
719 | unlock_page(page); | ||
720 | page_cache_release(page); | ||
721 | page_cache_release(page); | ||
722 | } | 783 | } |
723 | BTRFS_I(inode)->generation = trans->transid; | ||
724 | 784 | ||
785 | ret = btrfs_dirty_pages(root, inode, pages, num_pages, 0, | ||
786 | bytes, &cached_state); | ||
787 | btrfs_drop_pages(pages, num_pages); | ||
725 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, | 788 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, |
726 | i_size_read(inode) - 1, &cached_state, GFP_NOFS); | 789 | i_size_read(inode) - 1, &cached_state, GFP_NOFS); |
727 | 790 | ||
791 | if (ret) { | ||
792 | ret = 0; | ||
793 | goto out_free; | ||
794 | } | ||
795 | |||
796 | BTRFS_I(inode)->generation = trans->transid; | ||
797 | |||
728 | filemap_write_and_wait(inode->i_mapping); | 798 | filemap_write_and_wait(inode->i_mapping); |
729 | 799 | ||
730 | key.objectid = BTRFS_FREE_SPACE_OBJECTID; | 800 | key.objectid = BTRFS_FREE_SPACE_OBJECTID; |
@@ -775,6 +845,7 @@ out_free: | |||
775 | BTRFS_I(inode)->generation = 0; | 845 | BTRFS_I(inode)->generation = 0; |
776 | } | 846 | } |
777 | kfree(checksums); | 847 | kfree(checksums); |
848 | kfree(pages); | ||
778 | btrfs_update_inode(trans, root, inode); | 849 | btrfs_update_inode(trans, root, inode); |
779 | iput(inode); | 850 | iput(inode); |
780 | return ret; | 851 | return ret; |
@@ -1187,7 +1258,7 @@ static void free_bitmap(struct btrfs_block_group_cache *block_group, | |||
1187 | { | 1258 | { |
1188 | unlink_free_space(block_group, bitmap_info); | 1259 | unlink_free_space(block_group, bitmap_info); |
1189 | kfree(bitmap_info->bitmap); | 1260 | kfree(bitmap_info->bitmap); |
1190 | kfree(bitmap_info); | 1261 | kmem_cache_free(btrfs_free_space_cachep, bitmap_info); |
1191 | block_group->total_bitmaps--; | 1262 | block_group->total_bitmaps--; |
1192 | recalculate_thresholds(block_group); | 1263 | recalculate_thresholds(block_group); |
1193 | } | 1264 | } |
@@ -1285,9 +1356,22 @@ static int insert_into_bitmap(struct btrfs_block_group_cache *block_group, | |||
1285 | * If we are below the extents threshold then we can add this as an | 1356 | * If we are below the extents threshold then we can add this as an |
1286 | * extent, and don't have to deal with the bitmap | 1357 | * extent, and don't have to deal with the bitmap |
1287 | */ | 1358 | */ |
1288 | if (block_group->free_extents < block_group->extents_thresh && | 1359 | if (block_group->free_extents < block_group->extents_thresh) { |
1289 | info->bytes > block_group->sectorsize * 4) | 1360 | /* |
1290 | return 0; | 1361 | * If this block group has some small extents we don't want to |
1362 | * use up all of our free slots in the cache with them, we want | ||
1363 | * to reserve them to larger extents, however if we have plent | ||
1364 | * of cache left then go ahead an dadd them, no sense in adding | ||
1365 | * the overhead of a bitmap if we don't have to. | ||
1366 | */ | ||
1367 | if (info->bytes <= block_group->sectorsize * 4) { | ||
1368 | if (block_group->free_extents * 2 <= | ||
1369 | block_group->extents_thresh) | ||
1370 | return 0; | ||
1371 | } else { | ||
1372 | return 0; | ||
1373 | } | ||
1374 | } | ||
1291 | 1375 | ||
1292 | /* | 1376 | /* |
1293 | * some block groups are so tiny they can't be enveloped by a bitmap, so | 1377 | * some block groups are so tiny they can't be enveloped by a bitmap, so |
@@ -1342,8 +1426,8 @@ new_bitmap: | |||
1342 | 1426 | ||
1343 | /* no pre-allocated info, allocate a new one */ | 1427 | /* no pre-allocated info, allocate a new one */ |
1344 | if (!info) { | 1428 | if (!info) { |
1345 | info = kzalloc(sizeof(struct btrfs_free_space), | 1429 | info = kmem_cache_zalloc(btrfs_free_space_cachep, |
1346 | GFP_NOFS); | 1430 | GFP_NOFS); |
1347 | if (!info) { | 1431 | if (!info) { |
1348 | spin_lock(&block_group->tree_lock); | 1432 | spin_lock(&block_group->tree_lock); |
1349 | ret = -ENOMEM; | 1433 | ret = -ENOMEM; |
@@ -1365,7 +1449,7 @@ out: | |||
1365 | if (info) { | 1449 | if (info) { |
1366 | if (info->bitmap) | 1450 | if (info->bitmap) |
1367 | kfree(info->bitmap); | 1451 | kfree(info->bitmap); |
1368 | kfree(info); | 1452 | kmem_cache_free(btrfs_free_space_cachep, info); |
1369 | } | 1453 | } |
1370 | 1454 | ||
1371 | return ret; | 1455 | return ret; |
@@ -1398,7 +1482,7 @@ bool try_merge_free_space(struct btrfs_block_group_cache *block_group, | |||
1398 | else | 1482 | else |
1399 | __unlink_free_space(block_group, right_info); | 1483 | __unlink_free_space(block_group, right_info); |
1400 | info->bytes += right_info->bytes; | 1484 | info->bytes += right_info->bytes; |
1401 | kfree(right_info); | 1485 | kmem_cache_free(btrfs_free_space_cachep, right_info); |
1402 | merged = true; | 1486 | merged = true; |
1403 | } | 1487 | } |
1404 | 1488 | ||
@@ -1410,7 +1494,7 @@ bool try_merge_free_space(struct btrfs_block_group_cache *block_group, | |||
1410 | __unlink_free_space(block_group, left_info); | 1494 | __unlink_free_space(block_group, left_info); |
1411 | info->offset = left_info->offset; | 1495 | info->offset = left_info->offset; |
1412 | info->bytes += left_info->bytes; | 1496 | info->bytes += left_info->bytes; |
1413 | kfree(left_info); | 1497 | kmem_cache_free(btrfs_free_space_cachep, left_info); |
1414 | merged = true; | 1498 | merged = true; |
1415 | } | 1499 | } |
1416 | 1500 | ||
@@ -1423,7 +1507,7 @@ int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, | |||
1423 | struct btrfs_free_space *info; | 1507 | struct btrfs_free_space *info; |
1424 | int ret = 0; | 1508 | int ret = 0; |
1425 | 1509 | ||
1426 | info = kzalloc(sizeof(struct btrfs_free_space), GFP_NOFS); | 1510 | info = kmem_cache_zalloc(btrfs_free_space_cachep, GFP_NOFS); |
1427 | if (!info) | 1511 | if (!info) |
1428 | return -ENOMEM; | 1512 | return -ENOMEM; |
1429 | 1513 | ||
@@ -1450,7 +1534,7 @@ int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, | |||
1450 | link: | 1534 | link: |
1451 | ret = link_free_space(block_group, info); | 1535 | ret = link_free_space(block_group, info); |
1452 | if (ret) | 1536 | if (ret) |
1453 | kfree(info); | 1537 | kmem_cache_free(btrfs_free_space_cachep, info); |
1454 | out: | 1538 | out: |
1455 | spin_unlock(&block_group->tree_lock); | 1539 | spin_unlock(&block_group->tree_lock); |
1456 | 1540 | ||
@@ -1520,7 +1604,7 @@ again: | |||
1520 | kfree(info->bitmap); | 1604 | kfree(info->bitmap); |
1521 | block_group->total_bitmaps--; | 1605 | block_group->total_bitmaps--; |
1522 | } | 1606 | } |
1523 | kfree(info); | 1607 | kmem_cache_free(btrfs_free_space_cachep, info); |
1524 | goto out_lock; | 1608 | goto out_lock; |
1525 | } | 1609 | } |
1526 | 1610 | ||
@@ -1556,7 +1640,7 @@ again: | |||
1556 | /* the hole we're creating ends at the end | 1640 | /* the hole we're creating ends at the end |
1557 | * of the info struct, just free the info | 1641 | * of the info struct, just free the info |
1558 | */ | 1642 | */ |
1559 | kfree(info); | 1643 | kmem_cache_free(btrfs_free_space_cachep, info); |
1560 | } | 1644 | } |
1561 | spin_unlock(&block_group->tree_lock); | 1645 | spin_unlock(&block_group->tree_lock); |
1562 | 1646 | ||
@@ -1629,30 +1713,28 @@ __btrfs_return_cluster_to_free_space( | |||
1629 | { | 1713 | { |
1630 | struct btrfs_free_space *entry; | 1714 | struct btrfs_free_space *entry; |
1631 | struct rb_node *node; | 1715 | struct rb_node *node; |
1632 | bool bitmap; | ||
1633 | 1716 | ||
1634 | spin_lock(&cluster->lock); | 1717 | spin_lock(&cluster->lock); |
1635 | if (cluster->block_group != block_group) | 1718 | if (cluster->block_group != block_group) |
1636 | goto out; | 1719 | goto out; |
1637 | 1720 | ||
1638 | bitmap = cluster->points_to_bitmap; | ||
1639 | cluster->block_group = NULL; | 1721 | cluster->block_group = NULL; |
1640 | cluster->window_start = 0; | 1722 | cluster->window_start = 0; |
1641 | list_del_init(&cluster->block_group_list); | 1723 | list_del_init(&cluster->block_group_list); |
1642 | cluster->points_to_bitmap = false; | ||
1643 | |||
1644 | if (bitmap) | ||
1645 | goto out; | ||
1646 | 1724 | ||
1647 | node = rb_first(&cluster->root); | 1725 | node = rb_first(&cluster->root); |
1648 | while (node) { | 1726 | while (node) { |
1727 | bool bitmap; | ||
1728 | |||
1649 | entry = rb_entry(node, struct btrfs_free_space, offset_index); | 1729 | entry = rb_entry(node, struct btrfs_free_space, offset_index); |
1650 | node = rb_next(&entry->offset_index); | 1730 | node = rb_next(&entry->offset_index); |
1651 | rb_erase(&entry->offset_index, &cluster->root); | 1731 | rb_erase(&entry->offset_index, &cluster->root); |
1652 | BUG_ON(entry->bitmap); | 1732 | |
1653 | try_merge_free_space(block_group, entry, false); | 1733 | bitmap = (entry->bitmap != NULL); |
1734 | if (!bitmap) | ||
1735 | try_merge_free_space(block_group, entry, false); | ||
1654 | tree_insert_offset(&block_group->free_space_offset, | 1736 | tree_insert_offset(&block_group->free_space_offset, |
1655 | entry->offset, &entry->offset_index, 0); | 1737 | entry->offset, &entry->offset_index, bitmap); |
1656 | } | 1738 | } |
1657 | cluster->root = RB_ROOT; | 1739 | cluster->root = RB_ROOT; |
1658 | 1740 | ||
@@ -1689,7 +1771,7 @@ void btrfs_remove_free_space_cache(struct btrfs_block_group_cache *block_group) | |||
1689 | unlink_free_space(block_group, info); | 1771 | unlink_free_space(block_group, info); |
1690 | if (info->bitmap) | 1772 | if (info->bitmap) |
1691 | kfree(info->bitmap); | 1773 | kfree(info->bitmap); |
1692 | kfree(info); | 1774 | kmem_cache_free(btrfs_free_space_cachep, info); |
1693 | if (need_resched()) { | 1775 | if (need_resched()) { |
1694 | spin_unlock(&block_group->tree_lock); | 1776 | spin_unlock(&block_group->tree_lock); |
1695 | cond_resched(); | 1777 | cond_resched(); |
@@ -1722,7 +1804,7 @@ u64 btrfs_find_space_for_alloc(struct btrfs_block_group_cache *block_group, | |||
1722 | entry->offset += bytes; | 1804 | entry->offset += bytes; |
1723 | entry->bytes -= bytes; | 1805 | entry->bytes -= bytes; |
1724 | if (!entry->bytes) | 1806 | if (!entry->bytes) |
1725 | kfree(entry); | 1807 | kmem_cache_free(btrfs_free_space_cachep, entry); |
1726 | else | 1808 | else |
1727 | link_free_space(block_group, entry); | 1809 | link_free_space(block_group, entry); |
1728 | } | 1810 | } |
@@ -1775,50 +1857,24 @@ int btrfs_return_cluster_to_free_space( | |||
1775 | 1857 | ||
1776 | static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group, | 1858 | static u64 btrfs_alloc_from_bitmap(struct btrfs_block_group_cache *block_group, |
1777 | struct btrfs_free_cluster *cluster, | 1859 | struct btrfs_free_cluster *cluster, |
1860 | struct btrfs_free_space *entry, | ||
1778 | u64 bytes, u64 min_start) | 1861 | u64 bytes, u64 min_start) |
1779 | { | 1862 | { |
1780 | struct btrfs_free_space *entry; | ||
1781 | int err; | 1863 | int err; |
1782 | u64 search_start = cluster->window_start; | 1864 | u64 search_start = cluster->window_start; |
1783 | u64 search_bytes = bytes; | 1865 | u64 search_bytes = bytes; |
1784 | u64 ret = 0; | 1866 | u64 ret = 0; |
1785 | 1867 | ||
1786 | spin_lock(&block_group->tree_lock); | ||
1787 | spin_lock(&cluster->lock); | ||
1788 | |||
1789 | if (!cluster->points_to_bitmap) | ||
1790 | goto out; | ||
1791 | |||
1792 | if (cluster->block_group != block_group) | ||
1793 | goto out; | ||
1794 | |||
1795 | /* | ||
1796 | * search_start is the beginning of the bitmap, but at some point it may | ||
1797 | * be a good idea to point to the actual start of the free area in the | ||
1798 | * bitmap, so do the offset_to_bitmap trick anyway, and set bitmap_only | ||
1799 | * to 1 to make sure we get the bitmap entry | ||
1800 | */ | ||
1801 | entry = tree_search_offset(block_group, | ||
1802 | offset_to_bitmap(block_group, search_start), | ||
1803 | 1, 0); | ||
1804 | if (!entry || !entry->bitmap) | ||
1805 | goto out; | ||
1806 | |||
1807 | search_start = min_start; | 1868 | search_start = min_start; |
1808 | search_bytes = bytes; | 1869 | search_bytes = bytes; |
1809 | 1870 | ||
1810 | err = search_bitmap(block_group, entry, &search_start, | 1871 | err = search_bitmap(block_group, entry, &search_start, |
1811 | &search_bytes); | 1872 | &search_bytes); |
1812 | if (err) | 1873 | if (err) |
1813 | goto out; | 1874 | return 0; |
1814 | 1875 | ||
1815 | ret = search_start; | 1876 | ret = search_start; |
1816 | bitmap_clear_bits(block_group, entry, ret, bytes); | 1877 | bitmap_clear_bits(block_group, entry, ret, bytes); |
1817 | if (entry->bytes == 0) | ||
1818 | free_bitmap(block_group, entry); | ||
1819 | out: | ||
1820 | spin_unlock(&cluster->lock); | ||
1821 | spin_unlock(&block_group->tree_lock); | ||
1822 | 1878 | ||
1823 | return ret; | 1879 | return ret; |
1824 | } | 1880 | } |
@@ -1836,10 +1892,6 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, | |||
1836 | struct rb_node *node; | 1892 | struct rb_node *node; |
1837 | u64 ret = 0; | 1893 | u64 ret = 0; |
1838 | 1894 | ||
1839 | if (cluster->points_to_bitmap) | ||
1840 | return btrfs_alloc_from_bitmap(block_group, cluster, bytes, | ||
1841 | min_start); | ||
1842 | |||
1843 | spin_lock(&cluster->lock); | 1895 | spin_lock(&cluster->lock); |
1844 | if (bytes > cluster->max_size) | 1896 | if (bytes > cluster->max_size) |
1845 | goto out; | 1897 | goto out; |
@@ -1852,9 +1904,9 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, | |||
1852 | goto out; | 1904 | goto out; |
1853 | 1905 | ||
1854 | entry = rb_entry(node, struct btrfs_free_space, offset_index); | 1906 | entry = rb_entry(node, struct btrfs_free_space, offset_index); |
1855 | |||
1856 | while(1) { | 1907 | while(1) { |
1857 | if (entry->bytes < bytes || entry->offset < min_start) { | 1908 | if (entry->bytes < bytes || |
1909 | (!entry->bitmap && entry->offset < min_start)) { | ||
1858 | struct rb_node *node; | 1910 | struct rb_node *node; |
1859 | 1911 | ||
1860 | node = rb_next(&entry->offset_index); | 1912 | node = rb_next(&entry->offset_index); |
@@ -1864,10 +1916,27 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group_cache *block_group, | |||
1864 | offset_index); | 1916 | offset_index); |
1865 | continue; | 1917 | continue; |
1866 | } | 1918 | } |
1867 | ret = entry->offset; | ||
1868 | 1919 | ||
1869 | entry->offset += bytes; | 1920 | if (entry->bitmap) { |
1870 | entry->bytes -= bytes; | 1921 | ret = btrfs_alloc_from_bitmap(block_group, |
1922 | cluster, entry, bytes, | ||
1923 | min_start); | ||
1924 | if (ret == 0) { | ||
1925 | struct rb_node *node; | ||
1926 | node = rb_next(&entry->offset_index); | ||
1927 | if (!node) | ||
1928 | break; | ||
1929 | entry = rb_entry(node, struct btrfs_free_space, | ||
1930 | offset_index); | ||
1931 | continue; | ||
1932 | } | ||
1933 | } else { | ||
1934 | |||
1935 | ret = entry->offset; | ||
1936 | |||
1937 | entry->offset += bytes; | ||
1938 | entry->bytes -= bytes; | ||
1939 | } | ||
1871 | 1940 | ||
1872 | if (entry->bytes == 0) | 1941 | if (entry->bytes == 0) |
1873 | rb_erase(&entry->offset_index, &cluster->root); | 1942 | rb_erase(&entry->offset_index, &cluster->root); |
@@ -1884,7 +1953,12 @@ out: | |||
1884 | block_group->free_space -= bytes; | 1953 | block_group->free_space -= bytes; |
1885 | if (entry->bytes == 0) { | 1954 | if (entry->bytes == 0) { |
1886 | block_group->free_extents--; | 1955 | block_group->free_extents--; |
1887 | kfree(entry); | 1956 | if (entry->bitmap) { |
1957 | kfree(entry->bitmap); | ||
1958 | block_group->total_bitmaps--; | ||
1959 | recalculate_thresholds(block_group); | ||
1960 | } | ||
1961 | kmem_cache_free(btrfs_free_space_cachep, entry); | ||
1888 | } | 1962 | } |
1889 | 1963 | ||
1890 | spin_unlock(&block_group->tree_lock); | 1964 | spin_unlock(&block_group->tree_lock); |
@@ -1904,12 +1978,13 @@ static int btrfs_bitmap_cluster(struct btrfs_block_group_cache *block_group, | |||
1904 | unsigned long found_bits; | 1978 | unsigned long found_bits; |
1905 | unsigned long start = 0; | 1979 | unsigned long start = 0; |
1906 | unsigned long total_found = 0; | 1980 | unsigned long total_found = 0; |
1981 | int ret; | ||
1907 | bool found = false; | 1982 | bool found = false; |
1908 | 1983 | ||
1909 | i = offset_to_bit(entry->offset, block_group->sectorsize, | 1984 | i = offset_to_bit(entry->offset, block_group->sectorsize, |
1910 | max_t(u64, offset, entry->offset)); | 1985 | max_t(u64, offset, entry->offset)); |
1911 | search_bits = bytes_to_bits(min_bytes, block_group->sectorsize); | 1986 | search_bits = bytes_to_bits(bytes, block_group->sectorsize); |
1912 | total_bits = bytes_to_bits(bytes, block_group->sectorsize); | 1987 | total_bits = bytes_to_bits(min_bytes, block_group->sectorsize); |
1913 | 1988 | ||
1914 | again: | 1989 | again: |
1915 | found_bits = 0; | 1990 | found_bits = 0; |
@@ -1926,7 +2001,7 @@ again: | |||
1926 | } | 2001 | } |
1927 | 2002 | ||
1928 | if (!found_bits) | 2003 | if (!found_bits) |
1929 | return -1; | 2004 | return -ENOSPC; |
1930 | 2005 | ||
1931 | if (!found) { | 2006 | if (!found) { |
1932 | start = i; | 2007 | start = i; |
@@ -1950,189 +2025,208 @@ again: | |||
1950 | 2025 | ||
1951 | cluster->window_start = start * block_group->sectorsize + | 2026 | cluster->window_start = start * block_group->sectorsize + |
1952 | entry->offset; | 2027 | entry->offset; |
1953 | cluster->points_to_bitmap = true; | 2028 | rb_erase(&entry->offset_index, &block_group->free_space_offset); |
2029 | ret = tree_insert_offset(&cluster->root, entry->offset, | ||
2030 | &entry->offset_index, 1); | ||
2031 | BUG_ON(ret); | ||
1954 | 2032 | ||
1955 | return 0; | 2033 | return 0; |
1956 | } | 2034 | } |
1957 | 2035 | ||
1958 | /* | 2036 | /* |
1959 | * here we try to find a cluster of blocks in a block group. The goal | 2037 | * This searches the block group for just extents to fill the cluster with. |
1960 | * is to find at least bytes free and up to empty_size + bytes free. | ||
1961 | * We might not find them all in one contiguous area. | ||
1962 | * | ||
1963 | * returns zero and sets up cluster if things worked out, otherwise | ||
1964 | * it returns -enospc | ||
1965 | */ | 2038 | */ |
1966 | int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, | 2039 | static int setup_cluster_no_bitmap(struct btrfs_block_group_cache *block_group, |
1967 | struct btrfs_root *root, | 2040 | struct btrfs_free_cluster *cluster, |
1968 | struct btrfs_block_group_cache *block_group, | 2041 | u64 offset, u64 bytes, u64 min_bytes) |
1969 | struct btrfs_free_cluster *cluster, | ||
1970 | u64 offset, u64 bytes, u64 empty_size) | ||
1971 | { | 2042 | { |
2043 | struct btrfs_free_space *first = NULL; | ||
1972 | struct btrfs_free_space *entry = NULL; | 2044 | struct btrfs_free_space *entry = NULL; |
2045 | struct btrfs_free_space *prev = NULL; | ||
2046 | struct btrfs_free_space *last; | ||
1973 | struct rb_node *node; | 2047 | struct rb_node *node; |
1974 | struct btrfs_free_space *next; | ||
1975 | struct btrfs_free_space *last = NULL; | ||
1976 | u64 min_bytes; | ||
1977 | u64 window_start; | 2048 | u64 window_start; |
1978 | u64 window_free; | 2049 | u64 window_free; |
1979 | u64 max_extent = 0; | 2050 | u64 max_extent; |
1980 | bool found_bitmap = false; | 2051 | u64 max_gap = 128 * 1024; |
1981 | int ret; | ||
1982 | 2052 | ||
1983 | /* for metadata, allow allocates with more holes */ | 2053 | entry = tree_search_offset(block_group, offset, 0, 1); |
1984 | if (btrfs_test_opt(root, SSD_SPREAD)) { | 2054 | if (!entry) |
1985 | min_bytes = bytes + empty_size; | 2055 | return -ENOSPC; |
1986 | } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) { | ||
1987 | /* | ||
1988 | * we want to do larger allocations when we are | ||
1989 | * flushing out the delayed refs, it helps prevent | ||
1990 | * making more work as we go along. | ||
1991 | */ | ||
1992 | if (trans->transaction->delayed_refs.flushing) | ||
1993 | min_bytes = max(bytes, (bytes + empty_size) >> 1); | ||
1994 | else | ||
1995 | min_bytes = max(bytes, (bytes + empty_size) >> 4); | ||
1996 | } else | ||
1997 | min_bytes = max(bytes, (bytes + empty_size) >> 2); | ||
1998 | |||
1999 | spin_lock(&block_group->tree_lock); | ||
2000 | spin_lock(&cluster->lock); | ||
2001 | |||
2002 | /* someone already found a cluster, hooray */ | ||
2003 | if (cluster->block_group) { | ||
2004 | ret = 0; | ||
2005 | goto out; | ||
2006 | } | ||
2007 | again: | ||
2008 | entry = tree_search_offset(block_group, offset, found_bitmap, 1); | ||
2009 | if (!entry) { | ||
2010 | ret = -ENOSPC; | ||
2011 | goto out; | ||
2012 | } | ||
2013 | 2056 | ||
2014 | /* | 2057 | /* |
2015 | * If found_bitmap is true, we exhausted our search for extent entries, | 2058 | * We don't want bitmaps, so just move along until we find a normal |
2016 | * and we just want to search all of the bitmaps that we can find, and | 2059 | * extent entry. |
2017 | * ignore any extent entries we find. | ||
2018 | */ | 2060 | */ |
2019 | while (entry->bitmap || found_bitmap || | 2061 | while (entry->bitmap) { |
2020 | (!entry->bitmap && entry->bytes < min_bytes)) { | 2062 | node = rb_next(&entry->offset_index); |
2021 | struct rb_node *node = rb_next(&entry->offset_index); | 2063 | if (!node) |
2022 | 2064 | return -ENOSPC; | |
2023 | if (entry->bitmap && entry->bytes > bytes + empty_size) { | ||
2024 | ret = btrfs_bitmap_cluster(block_group, entry, cluster, | ||
2025 | offset, bytes + empty_size, | ||
2026 | min_bytes); | ||
2027 | if (!ret) | ||
2028 | goto got_it; | ||
2029 | } | ||
2030 | |||
2031 | if (!node) { | ||
2032 | ret = -ENOSPC; | ||
2033 | goto out; | ||
2034 | } | ||
2035 | entry = rb_entry(node, struct btrfs_free_space, offset_index); | 2065 | entry = rb_entry(node, struct btrfs_free_space, offset_index); |
2036 | } | 2066 | } |
2037 | 2067 | ||
2038 | /* | ||
2039 | * We already searched all the extent entries from the passed in offset | ||
2040 | * to the end and didn't find enough space for the cluster, and we also | ||
2041 | * didn't find any bitmaps that met our criteria, just go ahead and exit | ||
2042 | */ | ||
2043 | if (found_bitmap) { | ||
2044 | ret = -ENOSPC; | ||
2045 | goto out; | ||
2046 | } | ||
2047 | |||
2048 | cluster->points_to_bitmap = false; | ||
2049 | window_start = entry->offset; | 2068 | window_start = entry->offset; |
2050 | window_free = entry->bytes; | 2069 | window_free = entry->bytes; |
2051 | last = entry; | ||
2052 | max_extent = entry->bytes; | 2070 | max_extent = entry->bytes; |
2071 | first = entry; | ||
2072 | last = entry; | ||
2073 | prev = entry; | ||
2053 | 2074 | ||
2054 | while (1) { | 2075 | while (window_free <= min_bytes) { |
2055 | /* out window is just right, lets fill it */ | 2076 | node = rb_next(&entry->offset_index); |
2056 | if (window_free >= bytes + empty_size) | 2077 | if (!node) |
2057 | break; | 2078 | return -ENOSPC; |
2058 | 2079 | entry = rb_entry(node, struct btrfs_free_space, offset_index); | |
2059 | node = rb_next(&last->offset_index); | ||
2060 | if (!node) { | ||
2061 | if (found_bitmap) | ||
2062 | goto again; | ||
2063 | ret = -ENOSPC; | ||
2064 | goto out; | ||
2065 | } | ||
2066 | next = rb_entry(node, struct btrfs_free_space, offset_index); | ||
2067 | 2080 | ||
2068 | /* | 2081 | if (entry->bitmap) |
2069 | * we found a bitmap, so if this search doesn't result in a | ||
2070 | * cluster, we know to go and search again for the bitmaps and | ||
2071 | * start looking for space there | ||
2072 | */ | ||
2073 | if (next->bitmap) { | ||
2074 | if (!found_bitmap) | ||
2075 | offset = next->offset; | ||
2076 | found_bitmap = true; | ||
2077 | last = next; | ||
2078 | continue; | 2082 | continue; |
2079 | } | ||
2080 | |||
2081 | /* | 2083 | /* |
2082 | * we haven't filled the empty size and the window is | 2084 | * we haven't filled the empty size and the window is |
2083 | * very large. reset and try again | 2085 | * very large. reset and try again |
2084 | */ | 2086 | */ |
2085 | if (next->offset - (last->offset + last->bytes) > 128 * 1024 || | 2087 | if (entry->offset - (prev->offset + prev->bytes) > max_gap || |
2086 | next->offset - window_start > (bytes + empty_size) * 2) { | 2088 | entry->offset - window_start > (min_bytes * 2)) { |
2087 | entry = next; | 2089 | first = entry; |
2088 | window_start = entry->offset; | 2090 | window_start = entry->offset; |
2089 | window_free = entry->bytes; | 2091 | window_free = entry->bytes; |
2090 | last = entry; | 2092 | last = entry; |
2091 | max_extent = entry->bytes; | 2093 | max_extent = entry->bytes; |
2092 | } else { | 2094 | } else { |
2093 | last = next; | 2095 | last = entry; |
2094 | window_free += next->bytes; | 2096 | window_free += entry->bytes; |
2095 | if (entry->bytes > max_extent) | 2097 | if (entry->bytes > max_extent) |
2096 | max_extent = entry->bytes; | 2098 | max_extent = entry->bytes; |
2097 | } | 2099 | } |
2100 | prev = entry; | ||
2098 | } | 2101 | } |
2099 | 2102 | ||
2100 | cluster->window_start = entry->offset; | 2103 | cluster->window_start = first->offset; |
2104 | |||
2105 | node = &first->offset_index; | ||
2101 | 2106 | ||
2102 | /* | 2107 | /* |
2103 | * now we've found our entries, pull them out of the free space | 2108 | * now we've found our entries, pull them out of the free space |
2104 | * cache and put them into the cluster rbtree | 2109 | * cache and put them into the cluster rbtree |
2105 | * | ||
2106 | * The cluster includes an rbtree, but only uses the offset index | ||
2107 | * of each free space cache entry. | ||
2108 | */ | 2110 | */ |
2109 | while (1) { | 2111 | do { |
2112 | int ret; | ||
2113 | |||
2114 | entry = rb_entry(node, struct btrfs_free_space, offset_index); | ||
2110 | node = rb_next(&entry->offset_index); | 2115 | node = rb_next(&entry->offset_index); |
2111 | if (entry->bitmap && node) { | 2116 | if (entry->bitmap) |
2112 | entry = rb_entry(node, struct btrfs_free_space, | ||
2113 | offset_index); | ||
2114 | continue; | 2117 | continue; |
2115 | } else if (entry->bitmap && !node) { | ||
2116 | break; | ||
2117 | } | ||
2118 | 2118 | ||
2119 | rb_erase(&entry->offset_index, &block_group->free_space_offset); | 2119 | rb_erase(&entry->offset_index, &block_group->free_space_offset); |
2120 | ret = tree_insert_offset(&cluster->root, entry->offset, | 2120 | ret = tree_insert_offset(&cluster->root, entry->offset, |
2121 | &entry->offset_index, 0); | 2121 | &entry->offset_index, 0); |
2122 | BUG_ON(ret); | 2122 | BUG_ON(ret); |
2123 | } while (node && entry != last); | ||
2123 | 2124 | ||
2124 | if (!node || entry == last) | 2125 | cluster->max_size = max_extent; |
2125 | break; | ||
2126 | 2126 | ||
2127 | return 0; | ||
2128 | } | ||
2129 | |||
2130 | /* | ||
2131 | * This specifically looks for bitmaps that may work in the cluster, we assume | ||
2132 | * that we have already failed to find extents that will work. | ||
2133 | */ | ||
2134 | static int setup_cluster_bitmap(struct btrfs_block_group_cache *block_group, | ||
2135 | struct btrfs_free_cluster *cluster, | ||
2136 | u64 offset, u64 bytes, u64 min_bytes) | ||
2137 | { | ||
2138 | struct btrfs_free_space *entry; | ||
2139 | struct rb_node *node; | ||
2140 | int ret = -ENOSPC; | ||
2141 | |||
2142 | if (block_group->total_bitmaps == 0) | ||
2143 | return -ENOSPC; | ||
2144 | |||
2145 | entry = tree_search_offset(block_group, | ||
2146 | offset_to_bitmap(block_group, offset), | ||
2147 | 0, 1); | ||
2148 | if (!entry) | ||
2149 | return -ENOSPC; | ||
2150 | |||
2151 | node = &entry->offset_index; | ||
2152 | do { | ||
2127 | entry = rb_entry(node, struct btrfs_free_space, offset_index); | 2153 | entry = rb_entry(node, struct btrfs_free_space, offset_index); |
2154 | node = rb_next(&entry->offset_index); | ||
2155 | if (!entry->bitmap) | ||
2156 | continue; | ||
2157 | if (entry->bytes < min_bytes) | ||
2158 | continue; | ||
2159 | ret = btrfs_bitmap_cluster(block_group, entry, cluster, offset, | ||
2160 | bytes, min_bytes); | ||
2161 | } while (ret && node); | ||
2162 | |||
2163 | return ret; | ||
2164 | } | ||
2165 | |||
2166 | /* | ||
2167 | * here we try to find a cluster of blocks in a block group. The goal | ||
2168 | * is to find at least bytes free and up to empty_size + bytes free. | ||
2169 | * We might not find them all in one contiguous area. | ||
2170 | * | ||
2171 | * returns zero and sets up cluster if things worked out, otherwise | ||
2172 | * it returns -enospc | ||
2173 | */ | ||
2174 | int btrfs_find_space_cluster(struct btrfs_trans_handle *trans, | ||
2175 | struct btrfs_root *root, | ||
2176 | struct btrfs_block_group_cache *block_group, | ||
2177 | struct btrfs_free_cluster *cluster, | ||
2178 | u64 offset, u64 bytes, u64 empty_size) | ||
2179 | { | ||
2180 | u64 min_bytes; | ||
2181 | int ret; | ||
2182 | |||
2183 | /* for metadata, allow allocates with more holes */ | ||
2184 | if (btrfs_test_opt(root, SSD_SPREAD)) { | ||
2185 | min_bytes = bytes + empty_size; | ||
2186 | } else if (block_group->flags & BTRFS_BLOCK_GROUP_METADATA) { | ||
2187 | /* | ||
2188 | * we want to do larger allocations when we are | ||
2189 | * flushing out the delayed refs, it helps prevent | ||
2190 | * making more work as we go along. | ||
2191 | */ | ||
2192 | if (trans->transaction->delayed_refs.flushing) | ||
2193 | min_bytes = max(bytes, (bytes + empty_size) >> 1); | ||
2194 | else | ||
2195 | min_bytes = max(bytes, (bytes + empty_size) >> 4); | ||
2196 | } else | ||
2197 | min_bytes = max(bytes, (bytes + empty_size) >> 2); | ||
2198 | |||
2199 | spin_lock(&block_group->tree_lock); | ||
2200 | |||
2201 | /* | ||
2202 | * If we know we don't have enough space to make a cluster don't even | ||
2203 | * bother doing all the work to try and find one. | ||
2204 | */ | ||
2205 | if (block_group->free_space < min_bytes) { | ||
2206 | spin_unlock(&block_group->tree_lock); | ||
2207 | return -ENOSPC; | ||
2128 | } | 2208 | } |
2129 | 2209 | ||
2130 | cluster->max_size = max_extent; | 2210 | spin_lock(&cluster->lock); |
2131 | got_it: | 2211 | |
2132 | ret = 0; | 2212 | /* someone already found a cluster, hooray */ |
2133 | atomic_inc(&block_group->count); | 2213 | if (cluster->block_group) { |
2134 | list_add_tail(&cluster->block_group_list, &block_group->cluster_list); | 2214 | ret = 0; |
2135 | cluster->block_group = block_group; | 2215 | goto out; |
2216 | } | ||
2217 | |||
2218 | ret = setup_cluster_no_bitmap(block_group, cluster, offset, bytes, | ||
2219 | min_bytes); | ||
2220 | if (ret) | ||
2221 | ret = setup_cluster_bitmap(block_group, cluster, offset, | ||
2222 | bytes, min_bytes); | ||
2223 | |||
2224 | if (!ret) { | ||
2225 | atomic_inc(&block_group->count); | ||
2226 | list_add_tail(&cluster->block_group_list, | ||
2227 | &block_group->cluster_list); | ||
2228 | cluster->block_group = block_group; | ||
2229 | } | ||
2136 | out: | 2230 | out: |
2137 | spin_unlock(&cluster->lock); | 2231 | spin_unlock(&cluster->lock); |
2138 | spin_unlock(&block_group->tree_lock); | 2232 | spin_unlock(&block_group->tree_lock); |
@@ -2149,8 +2243,99 @@ void btrfs_init_free_cluster(struct btrfs_free_cluster *cluster) | |||
2149 | spin_lock_init(&cluster->refill_lock); | 2243 | spin_lock_init(&cluster->refill_lock); |
2150 | cluster->root = RB_ROOT; | 2244 | cluster->root = RB_ROOT; |
2151 | cluster->max_size = 0; | 2245 | cluster->max_size = 0; |
2152 | cluster->points_to_bitmap = false; | ||
2153 | INIT_LIST_HEAD(&cluster->block_group_list); | 2246 | INIT_LIST_HEAD(&cluster->block_group_list); |
2154 | cluster->block_group = NULL; | 2247 | cluster->block_group = NULL; |
2155 | } | 2248 | } |
2156 | 2249 | ||
2250 | int btrfs_trim_block_group(struct btrfs_block_group_cache *block_group, | ||
2251 | u64 *trimmed, u64 start, u64 end, u64 minlen) | ||
2252 | { | ||
2253 | struct btrfs_free_space *entry = NULL; | ||
2254 | struct btrfs_fs_info *fs_info = block_group->fs_info; | ||
2255 | u64 bytes = 0; | ||
2256 | u64 actually_trimmed; | ||
2257 | int ret = 0; | ||
2258 | |||
2259 | *trimmed = 0; | ||
2260 | |||
2261 | while (start < end) { | ||
2262 | spin_lock(&block_group->tree_lock); | ||
2263 | |||
2264 | if (block_group->free_space < minlen) { | ||
2265 | spin_unlock(&block_group->tree_lock); | ||
2266 | break; | ||
2267 | } | ||
2268 | |||
2269 | entry = tree_search_offset(block_group, start, 0, 1); | ||
2270 | if (!entry) | ||
2271 | entry = tree_search_offset(block_group, | ||
2272 | offset_to_bitmap(block_group, | ||
2273 | start), | ||
2274 | 1, 1); | ||
2275 | |||
2276 | if (!entry || entry->offset >= end) { | ||
2277 | spin_unlock(&block_group->tree_lock); | ||
2278 | break; | ||
2279 | } | ||
2280 | |||
2281 | if (entry->bitmap) { | ||
2282 | ret = search_bitmap(block_group, entry, &start, &bytes); | ||
2283 | if (!ret) { | ||
2284 | if (start >= end) { | ||
2285 | spin_unlock(&block_group->tree_lock); | ||
2286 | break; | ||
2287 | } | ||
2288 | bytes = min(bytes, end - start); | ||
2289 | bitmap_clear_bits(block_group, entry, | ||
2290 | start, bytes); | ||
2291 | if (entry->bytes == 0) | ||
2292 | free_bitmap(block_group, entry); | ||
2293 | } else { | ||
2294 | start = entry->offset + BITS_PER_BITMAP * | ||
2295 | block_group->sectorsize; | ||
2296 | spin_unlock(&block_group->tree_lock); | ||
2297 | ret = 0; | ||
2298 | continue; | ||
2299 | } | ||
2300 | } else { | ||
2301 | start = entry->offset; | ||
2302 | bytes = min(entry->bytes, end - start); | ||
2303 | unlink_free_space(block_group, entry); | ||
2304 | kfree(entry); | ||
2305 | } | ||
2306 | |||
2307 | spin_unlock(&block_group->tree_lock); | ||
2308 | |||
2309 | if (bytes >= minlen) { | ||
2310 | int update_ret; | ||
2311 | update_ret = btrfs_update_reserved_bytes(block_group, | ||
2312 | bytes, 1, 1); | ||
2313 | |||
2314 | ret = btrfs_error_discard_extent(fs_info->extent_root, | ||
2315 | start, | ||
2316 | bytes, | ||
2317 | &actually_trimmed); | ||
2318 | |||
2319 | btrfs_add_free_space(block_group, | ||
2320 | start, bytes); | ||
2321 | if (!update_ret) | ||
2322 | btrfs_update_reserved_bytes(block_group, | ||
2323 | bytes, 0, 1); | ||
2324 | |||
2325 | if (ret) | ||
2326 | break; | ||
2327 | *trimmed += actually_trimmed; | ||
2328 | } | ||
2329 | start += bytes; | ||
2330 | bytes = 0; | ||
2331 | |||
2332 | if (fatal_signal_pending(current)) { | ||
2333 | ret = -ERESTARTSYS; | ||
2334 | break; | ||
2335 | } | ||
2336 | |||
2337 | cond_resched(); | ||
2338 | } | ||
2339 | |||
2340 | return ret; | ||
2341 | } | ||