aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/free-space-cache.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/free-space-cache.c')
-rw-r--r--fs/btrfs/free-space-cache.c203
1 files changed, 136 insertions, 67 deletions
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 0037427d8a9d..11d2e9cea09e 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -24,6 +24,7 @@
24#include "free-space-cache.h" 24#include "free-space-cache.h"
25#include "transaction.h" 25#include "transaction.h"
26#include "disk-io.h" 26#include "disk-io.h"
27#include "extent_io.h"
27 28
28#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8) 29#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8)
29#define MAX_CACHE_BYTES_PER_GIG (32 * 1024) 30#define MAX_CACHE_BYTES_PER_GIG (32 * 1024)
@@ -81,6 +82,8 @@ struct inode *lookup_free_space_inode(struct btrfs_root *root,
81 return ERR_PTR(-ENOENT); 82 return ERR_PTR(-ENOENT);
82 } 83 }
83 84
85 inode->i_mapping->flags &= ~__GFP_FS;
86
84 spin_lock(&block_group->lock); 87 spin_lock(&block_group->lock);
85 if (!root->fs_info->closing) { 88 if (!root->fs_info->closing) {
86 block_group->inode = igrab(inode); 89 block_group->inode = igrab(inode);
@@ -222,6 +225,7 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info,
222 u64 num_entries; 225 u64 num_entries;
223 u64 num_bitmaps; 226 u64 num_bitmaps;
224 u64 generation; 227 u64 generation;
228 u64 used = btrfs_block_group_used(&block_group->item);
225 u32 cur_crc = ~(u32)0; 229 u32 cur_crc = ~(u32)0;
226 pgoff_t index = 0; 230 pgoff_t index = 0;
227 unsigned long first_page_offset; 231 unsigned long first_page_offset;
@@ -467,6 +471,17 @@ next:
467 index++; 471 index++;
468 } 472 }
469 473
474 spin_lock(&block_group->tree_lock);
475 if (block_group->free_space != (block_group->key.offset - used -
476 block_group->bytes_super)) {
477 spin_unlock(&block_group->tree_lock);
478 printk(KERN_ERR "block group %llu has an wrong amount of free "
479 "space\n", block_group->key.objectid);
480 ret = 0;
481 goto free_cache;
482 }
483 spin_unlock(&block_group->tree_lock);
484
470 ret = 1; 485 ret = 1;
471out: 486out:
472 kfree(checksums); 487 kfree(checksums);
@@ -493,18 +508,23 @@ int btrfs_write_out_cache(struct btrfs_root *root,
493 struct inode *inode; 508 struct inode *inode;
494 struct rb_node *node; 509 struct rb_node *node;
495 struct list_head *pos, *n; 510 struct list_head *pos, *n;
511 struct page **pages;
496 struct page *page; 512 struct page *page;
497 struct extent_state *cached_state = NULL; 513 struct extent_state *cached_state = NULL;
514 struct btrfs_free_cluster *cluster = NULL;
515 struct extent_io_tree *unpin = NULL;
498 struct list_head bitmap_list; 516 struct list_head bitmap_list;
499 struct btrfs_key key; 517 struct btrfs_key key;
518 u64 start, end, len;
500 u64 bytes = 0; 519 u64 bytes = 0;
501 u32 *crc, *checksums; 520 u32 *crc, *checksums;
502 pgoff_t index = 0, last_index = 0;
503 unsigned long first_page_offset; 521 unsigned long first_page_offset;
504 int num_checksums; 522 int index = 0, num_pages = 0;
505 int entries = 0; 523 int entries = 0;
506 int bitmaps = 0; 524 int bitmaps = 0;
507 int ret = 0; 525 int ret = 0;
526 bool next_page = false;
527 bool out_of_space = false;
508 528
509 root = root->fs_info->tree_root; 529 root = root->fs_info->tree_root;
510 530
@@ -532,24 +552,43 @@ int btrfs_write_out_cache(struct btrfs_root *root,
532 return 0; 552 return 0;
533 } 553 }
534 554
535 last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT; 555 num_pages = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
556 PAGE_CACHE_SHIFT;
536 filemap_write_and_wait(inode->i_mapping); 557 filemap_write_and_wait(inode->i_mapping);
537 btrfs_wait_ordered_range(inode, inode->i_size & 558 btrfs_wait_ordered_range(inode, inode->i_size &
538 ~(root->sectorsize - 1), (u64)-1); 559 ~(root->sectorsize - 1), (u64)-1);
539 560
540 /* We need a checksum per page. */ 561 /* We need a checksum per page. */
541 num_checksums = i_size_read(inode) / PAGE_CACHE_SIZE; 562 crc = checksums = kzalloc(sizeof(u32) * num_pages, GFP_NOFS);
542 crc = checksums = kzalloc(sizeof(u32) * num_checksums, GFP_NOFS);
543 if (!crc) { 563 if (!crc) {
544 iput(inode); 564 iput(inode);
545 return 0; 565 return 0;
546 } 566 }
547 567
568 pages = kzalloc(sizeof(struct page *) * num_pages, GFP_NOFS);
569 if (!pages) {
570 kfree(crc);
571 iput(inode);
572 return 0;
573 }
574
548 /* Since the first page has all of our checksums and our generation we 575 /* Since the first page has all of our checksums and our generation we
549 * need to calculate the offset into the page that we can start writing 576 * need to calculate the offset into the page that we can start writing
550 * our entries. 577 * our entries.
551 */ 578 */
552 first_page_offset = (sizeof(u32) * num_checksums) + sizeof(u64); 579 first_page_offset = (sizeof(u32) * num_pages) + sizeof(u64);
580
581 /* Get the cluster for this block_group if it exists */
582 if (!list_empty(&block_group->cluster_list))
583 cluster = list_entry(block_group->cluster_list.next,
584 struct btrfs_free_cluster,
585 block_group_list);
586
587 /*
588 * We shouldn't have switched the pinned extents yet so this is the
589 * right one
590 */
591 unpin = root->fs_info->pinned_extents;
553 592
554 /* 593 /*
555 * Lock all pages first so we can lock the extent safely. 594 * Lock all pages first so we can lock the extent safely.
@@ -559,20 +598,18 @@ int btrfs_write_out_cache(struct btrfs_root *root,
559 * after find_get_page at this point. Just putting this here so people 598 * after find_get_page at this point. Just putting this here so people
560 * know and don't freak out. 599 * know and don't freak out.
561 */ 600 */
562 while (index <= last_index) { 601 while (index < num_pages) {
563 page = grab_cache_page(inode->i_mapping, index); 602 page = grab_cache_page(inode->i_mapping, index);
564 if (!page) { 603 if (!page) {
565 pgoff_t i = 0; 604 int i;
566 605
567 while (i < index) { 606 for (i = 0; i < num_pages; i++) {
568 page = find_get_page(inode->i_mapping, i); 607 unlock_page(pages[i]);
569 unlock_page(page); 608 page_cache_release(pages[i]);
570 page_cache_release(page);
571 page_cache_release(page);
572 i++;
573 } 609 }
574 goto out_free; 610 goto out_free;
575 } 611 }
612 pages[index] = page;
576 index++; 613 index++;
577 } 614 }
578 615
@@ -580,6 +617,12 @@ int btrfs_write_out_cache(struct btrfs_root *root,
580 lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1, 617 lock_extent_bits(&BTRFS_I(inode)->io_tree, 0, i_size_read(inode) - 1,
581 0, &cached_state, GFP_NOFS); 618 0, &cached_state, GFP_NOFS);
582 619
620 /*
621 * When searching for pinned extents, we need to start at our start
622 * offset.
623 */
624 start = block_group->key.objectid;
625
583 /* Write out the extent entries */ 626 /* Write out the extent entries */
584 do { 627 do {
585 struct btrfs_free_space_entry *entry; 628 struct btrfs_free_space_entry *entry;
@@ -587,18 +630,25 @@ int btrfs_write_out_cache(struct btrfs_root *root,
587 unsigned long offset = 0; 630 unsigned long offset = 0;
588 unsigned long start_offset = 0; 631 unsigned long start_offset = 0;
589 632
633 next_page = false;
634
590 if (index == 0) { 635 if (index == 0) {
591 start_offset = first_page_offset; 636 start_offset = first_page_offset;
592 offset = start_offset; 637 offset = start_offset;
593 } 638 }
594 639
595 page = find_get_page(inode->i_mapping, index); 640 if (index >= num_pages) {
641 out_of_space = true;
642 break;
643 }
644
645 page = pages[index];
596 646
597 addr = kmap(page); 647 addr = kmap(page);
598 entry = addr + start_offset; 648 entry = addr + start_offset;
599 649
600 memset(addr, 0, PAGE_CACHE_SIZE); 650 memset(addr, 0, PAGE_CACHE_SIZE);
601 while (1) { 651 while (node && !next_page) {
602 struct btrfs_free_space *e; 652 struct btrfs_free_space *e;
603 653
604 e = rb_entry(node, struct btrfs_free_space, offset_index); 654 e = rb_entry(node, struct btrfs_free_space, offset_index);
@@ -614,12 +664,49 @@ int btrfs_write_out_cache(struct btrfs_root *root,
614 entry->type = BTRFS_FREE_SPACE_EXTENT; 664 entry->type = BTRFS_FREE_SPACE_EXTENT;
615 } 665 }
616 node = rb_next(node); 666 node = rb_next(node);
617 if (!node) 667 if (!node && cluster) {
618 break; 668 node = rb_first(&cluster->root);
669 cluster = NULL;
670 }
619 offset += sizeof(struct btrfs_free_space_entry); 671 offset += sizeof(struct btrfs_free_space_entry);
620 if (offset + sizeof(struct btrfs_free_space_entry) >= 672 if (offset + sizeof(struct btrfs_free_space_entry) >=
621 PAGE_CACHE_SIZE) 673 PAGE_CACHE_SIZE)
674 next_page = true;
675 entry++;
676 }
677
678 /*
679 * We want to add any pinned extents to our free space cache
680 * so we don't leak the space
681 */
682 while (!next_page && (start < block_group->key.objectid +
683 block_group->key.offset)) {
684 ret = find_first_extent_bit(unpin, start, &start, &end,
685 EXTENT_DIRTY);
686 if (ret) {
687 ret = 0;
622 break; 688 break;
689 }
690
691 /* This pinned extent is out of our range */
692 if (start >= block_group->key.objectid +
693 block_group->key.offset)
694 break;
695
696 len = block_group->key.objectid +
697 block_group->key.offset - start;
698 len = min(len, end + 1 - start);
699
700 entries++;
701 entry->offset = cpu_to_le64(start);
702 entry->bytes = cpu_to_le64(len);
703 entry->type = BTRFS_FREE_SPACE_EXTENT;
704
705 start = end + 1;
706 offset += sizeof(struct btrfs_free_space_entry);
707 if (offset + sizeof(struct btrfs_free_space_entry) >=
708 PAGE_CACHE_SIZE)
709 next_page = true;
623 entry++; 710 entry++;
624 } 711 }
625 *crc = ~(u32)0; 712 *crc = ~(u32)0;
@@ -632,25 +719,8 @@ int btrfs_write_out_cache(struct btrfs_root *root,
632 719
633 bytes += PAGE_CACHE_SIZE; 720 bytes += PAGE_CACHE_SIZE;
634 721
635 ClearPageChecked(page);
636 set_page_extent_mapped(page);
637 SetPageUptodate(page);
638 set_page_dirty(page);
639
640 /*
641 * We need to release our reference we got for grab_cache_page,
642 * except for the first page which will hold our checksums, we
643 * do that below.
644 */
645 if (index != 0) {
646 unlock_page(page);
647 page_cache_release(page);
648 }
649
650 page_cache_release(page);
651
652 index++; 722 index++;
653 } while (node); 723 } while (node || next_page);
654 724
655 /* Write out the bitmaps */ 725 /* Write out the bitmaps */
656 list_for_each_safe(pos, n, &bitmap_list) { 726 list_for_each_safe(pos, n, &bitmap_list) {
@@ -658,7 +728,11 @@ int btrfs_write_out_cache(struct btrfs_root *root,
658 struct btrfs_free_space *entry = 728 struct btrfs_free_space *entry =
659 list_entry(pos, struct btrfs_free_space, list); 729 list_entry(pos, struct btrfs_free_space, list);
660 730
661 page = find_get_page(inode->i_mapping, index); 731 if (index >= num_pages) {
732 out_of_space = true;
733 break;
734 }
735 page = pages[index];
662 736
663 addr = kmap(page); 737 addr = kmap(page);
664 memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE); 738 memcpy(addr, entry->bitmap, PAGE_CACHE_SIZE);
@@ -669,64 +743,58 @@ int btrfs_write_out_cache(struct btrfs_root *root,
669 crc++; 743 crc++;
670 bytes += PAGE_CACHE_SIZE; 744 bytes += PAGE_CACHE_SIZE;
671 745
672 ClearPageChecked(page);
673 set_page_extent_mapped(page);
674 SetPageUptodate(page);
675 set_page_dirty(page);
676 unlock_page(page);
677 page_cache_release(page);
678 page_cache_release(page);
679 list_del_init(&entry->list); 746 list_del_init(&entry->list);
680 index++; 747 index++;
681 } 748 }
682 749
750 if (out_of_space) {
751 btrfs_drop_pages(pages, num_pages);
752 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
753 i_size_read(inode) - 1, &cached_state,
754 GFP_NOFS);
755 ret = 0;
756 goto out_free;
757 }
758
683 /* Zero out the rest of the pages just to make sure */ 759 /* Zero out the rest of the pages just to make sure */
684 while (index <= last_index) { 760 while (index < num_pages) {
685 void *addr; 761 void *addr;
686 762
687 page = find_get_page(inode->i_mapping, index); 763 page = pages[index];
688
689 addr = kmap(page); 764 addr = kmap(page);
690 memset(addr, 0, PAGE_CACHE_SIZE); 765 memset(addr, 0, PAGE_CACHE_SIZE);
691 kunmap(page); 766 kunmap(page);
692 ClearPageChecked(page);
693 set_page_extent_mapped(page);
694 SetPageUptodate(page);
695 set_page_dirty(page);
696 unlock_page(page);
697 page_cache_release(page);
698 page_cache_release(page);
699 bytes += PAGE_CACHE_SIZE; 767 bytes += PAGE_CACHE_SIZE;
700 index++; 768 index++;
701 } 769 }
702 770
703 btrfs_set_extent_delalloc(inode, 0, bytes - 1, &cached_state);
704
705 /* Write the checksums and trans id to the first page */ 771 /* Write the checksums and trans id to the first page */
706 { 772 {
707 void *addr; 773 void *addr;
708 u64 *gen; 774 u64 *gen;
709 775
710 page = find_get_page(inode->i_mapping, 0); 776 page = pages[0];
711 777
712 addr = kmap(page); 778 addr = kmap(page);
713 memcpy(addr, checksums, sizeof(u32) * num_checksums); 779 memcpy(addr, checksums, sizeof(u32) * num_pages);
714 gen = addr + (sizeof(u32) * num_checksums); 780 gen = addr + (sizeof(u32) * num_pages);
715 *gen = trans->transid; 781 *gen = trans->transid;
716 kunmap(page); 782 kunmap(page);
717 ClearPageChecked(page);
718 set_page_extent_mapped(page);
719 SetPageUptodate(page);
720 set_page_dirty(page);
721 unlock_page(page);
722 page_cache_release(page);
723 page_cache_release(page);
724 } 783 }
725 BTRFS_I(inode)->generation = trans->transid;
726 784
785 ret = btrfs_dirty_pages(root, inode, pages, num_pages, 0,
786 bytes, &cached_state);
787 btrfs_drop_pages(pages, num_pages);
727 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0, 788 unlock_extent_cached(&BTRFS_I(inode)->io_tree, 0,
728 i_size_read(inode) - 1, &cached_state, GFP_NOFS); 789 i_size_read(inode) - 1, &cached_state, GFP_NOFS);
729 790
791 if (ret) {
792 ret = 0;
793 goto out_free;
794 }
795
796 BTRFS_I(inode)->generation = trans->transid;
797
730 filemap_write_and_wait(inode->i_mapping); 798 filemap_write_and_wait(inode->i_mapping);
731 799
732 key.objectid = BTRFS_FREE_SPACE_OBJECTID; 800 key.objectid = BTRFS_FREE_SPACE_OBJECTID;
@@ -777,6 +845,7 @@ out_free:
777 BTRFS_I(inode)->generation = 0; 845 BTRFS_I(inode)->generation = 0;
778 } 846 }
779 kfree(checksums); 847 kfree(checksums);
848 kfree(pages);
780 btrfs_update_inode(trans, root, inode); 849 btrfs_update_inode(trans, root, inode);
781 iput(inode); 850 iput(inode);
782 return ret; 851 return ret;