diff options
Diffstat (limited to 'fs/btrfs/ctree.h')
-rw-r--r-- | fs/btrfs/ctree.h | 155 |
1 files changed, 105 insertions, 50 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 5e1d4e30e9d8..ad96495dedc5 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -45,6 +45,13 @@ struct btrfs_ordered_sum; | |||
45 | 45 | ||
46 | #define BTRFS_MAX_LEVEL 8 | 46 | #define BTRFS_MAX_LEVEL 8 |
47 | 47 | ||
48 | /* | ||
49 | * files bigger than this get some pre-flushing when they are added | ||
50 | * to the ordered operations list. That way we limit the total | ||
51 | * work done by the commit | ||
52 | */ | ||
53 | #define BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT (8 * 1024 * 1024) | ||
54 | |||
48 | /* holds pointers to all of the tree roots */ | 55 | /* holds pointers to all of the tree roots */ |
49 | #define BTRFS_ROOT_TREE_OBJECTID 1ULL | 56 | #define BTRFS_ROOT_TREE_OBJECTID 1ULL |
50 | 57 | ||
@@ -136,12 +143,15 @@ static int btrfs_csum_sizes[] = { 4, 0 }; | |||
136 | #define BTRFS_FT_MAX 9 | 143 | #define BTRFS_FT_MAX 9 |
137 | 144 | ||
138 | /* | 145 | /* |
139 | * the key defines the order in the tree, and so it also defines (optimal) | 146 | * The key defines the order in the tree, and so it also defines (optimal) |
140 | * block layout. objectid corresonds to the inode number. The flags | 147 | * block layout. |
141 | * tells us things about the object, and is a kind of stream selector. | 148 | * |
142 | * so for a given inode, keys with flags of 1 might refer to the inode | 149 | * objectid corresponds to the inode number. |
143 | * data, flags of 2 may point to file data in the btree and flags == 3 | 150 | * |
144 | * may point to extents. | 151 | * type tells us things about the object, and is a kind of stream selector. |
152 | * so for a given inode, keys with type of 1 might refer to the inode data, | ||
153 | * type of 2 may point to file data in the btree and type == 3 may point to | ||
154 | * extents. | ||
145 | * | 155 | * |
146 | * offset is the starting byte offset for this key in the stream. | 156 | * offset is the starting byte offset for this key in the stream. |
147 | * | 157 | * |
@@ -193,7 +203,7 @@ struct btrfs_dev_item { | |||
193 | 203 | ||
194 | /* | 204 | /* |
195 | * starting byte of this partition on the device, | 205 | * starting byte of this partition on the device, |
196 | * to allowr for stripe alignment in the future | 206 | * to allow for stripe alignment in the future |
197 | */ | 207 | */ |
198 | __le64 start_offset; | 208 | __le64 start_offset; |
199 | 209 | ||
@@ -401,15 +411,16 @@ struct btrfs_path { | |||
401 | int locks[BTRFS_MAX_LEVEL]; | 411 | int locks[BTRFS_MAX_LEVEL]; |
402 | int reada; | 412 | int reada; |
403 | /* keep some upper locks as we walk down */ | 413 | /* keep some upper locks as we walk down */ |
404 | int keep_locks; | ||
405 | int skip_locking; | ||
406 | int lowest_level; | 414 | int lowest_level; |
407 | 415 | ||
408 | /* | 416 | /* |
409 | * set by btrfs_split_item, tells search_slot to keep all locks | 417 | * set by btrfs_split_item, tells search_slot to keep all locks |
410 | * and to force calls to keep space in the nodes | 418 | * and to force calls to keep space in the nodes |
411 | */ | 419 | */ |
412 | int search_for_split; | 420 | unsigned int search_for_split:1; |
421 | unsigned int keep_locks:1; | ||
422 | unsigned int skip_locking:1; | ||
423 | unsigned int leave_spinning:1; | ||
413 | }; | 424 | }; |
414 | 425 | ||
415 | /* | 426 | /* |
@@ -625,18 +636,35 @@ struct btrfs_space_info { | |||
625 | struct rw_semaphore groups_sem; | 636 | struct rw_semaphore groups_sem; |
626 | }; | 637 | }; |
627 | 638 | ||
628 | struct btrfs_free_space { | 639 | /* |
629 | struct rb_node bytes_index; | 640 | * free clusters are used to claim free space in relatively large chunks, |
630 | struct rb_node offset_index; | 641 | * allowing us to do less seeky writes. They are used for all metadata |
631 | u64 offset; | 642 | * allocations and data allocations in ssd mode. |
632 | u64 bytes; | 643 | */ |
644 | struct btrfs_free_cluster { | ||
645 | spinlock_t lock; | ||
646 | spinlock_t refill_lock; | ||
647 | struct rb_root root; | ||
648 | |||
649 | /* largest extent in this cluster */ | ||
650 | u64 max_size; | ||
651 | |||
652 | /* first extent starting offset */ | ||
653 | u64 window_start; | ||
654 | |||
655 | struct btrfs_block_group_cache *block_group; | ||
656 | /* | ||
657 | * when a cluster is allocated from a block group, we put the | ||
658 | * cluster onto a list in the block group so that it can | ||
659 | * be freed before the block group is freed. | ||
660 | */ | ||
661 | struct list_head block_group_list; | ||
633 | }; | 662 | }; |
634 | 663 | ||
635 | struct btrfs_block_group_cache { | 664 | struct btrfs_block_group_cache { |
636 | struct btrfs_key key; | 665 | struct btrfs_key key; |
637 | struct btrfs_block_group_item item; | 666 | struct btrfs_block_group_item item; |
638 | spinlock_t lock; | 667 | spinlock_t lock; |
639 | struct mutex alloc_mutex; | ||
640 | struct mutex cache_mutex; | 668 | struct mutex cache_mutex; |
641 | u64 pinned; | 669 | u64 pinned; |
642 | u64 reserved; | 670 | u64 reserved; |
@@ -648,6 +676,7 @@ struct btrfs_block_group_cache { | |||
648 | struct btrfs_space_info *space_info; | 676 | struct btrfs_space_info *space_info; |
649 | 677 | ||
650 | /* free space cache stuff */ | 678 | /* free space cache stuff */ |
679 | spinlock_t tree_lock; | ||
651 | struct rb_root free_space_bytes; | 680 | struct rb_root free_space_bytes; |
652 | struct rb_root free_space_offset; | 681 | struct rb_root free_space_offset; |
653 | 682 | ||
@@ -659,6 +688,11 @@ struct btrfs_block_group_cache { | |||
659 | 688 | ||
660 | /* usage count */ | 689 | /* usage count */ |
661 | atomic_t count; | 690 | atomic_t count; |
691 | |||
692 | /* List of struct btrfs_free_clusters for this block group. | ||
693 | * Today it will only have one thing on it, but that may change | ||
694 | */ | ||
695 | struct list_head cluster_list; | ||
662 | }; | 696 | }; |
663 | 697 | ||
664 | struct btrfs_leaf_ref_tree { | 698 | struct btrfs_leaf_ref_tree { |
@@ -688,15 +722,18 @@ struct btrfs_fs_info { | |||
688 | struct rb_root block_group_cache_tree; | 722 | struct rb_root block_group_cache_tree; |
689 | 723 | ||
690 | struct extent_io_tree pinned_extents; | 724 | struct extent_io_tree pinned_extents; |
691 | struct extent_io_tree pending_del; | ||
692 | struct extent_io_tree extent_ins; | ||
693 | 725 | ||
694 | /* logical->physical extent mapping */ | 726 | /* logical->physical extent mapping */ |
695 | struct btrfs_mapping_tree mapping_tree; | 727 | struct btrfs_mapping_tree mapping_tree; |
696 | 728 | ||
697 | u64 generation; | 729 | u64 generation; |
698 | u64 last_trans_committed; | 730 | u64 last_trans_committed; |
699 | u64 last_trans_new_blockgroup; | 731 | |
732 | /* | ||
733 | * this is updated to the current trans every time a full commit | ||
734 | * is required instead of the faster short fsync log commits | ||
735 | */ | ||
736 | u64 last_trans_log_full_commit; | ||
700 | u64 open_ioctl_trans; | 737 | u64 open_ioctl_trans; |
701 | unsigned long mount_opt; | 738 | unsigned long mount_opt; |
702 | u64 max_extent; | 739 | u64 max_extent; |
@@ -717,12 +754,20 @@ struct btrfs_fs_info { | |||
717 | struct mutex tree_log_mutex; | 754 | struct mutex tree_log_mutex; |
718 | struct mutex transaction_kthread_mutex; | 755 | struct mutex transaction_kthread_mutex; |
719 | struct mutex cleaner_mutex; | 756 | struct mutex cleaner_mutex; |
720 | struct mutex extent_ins_mutex; | ||
721 | struct mutex pinned_mutex; | ||
722 | struct mutex chunk_mutex; | 757 | struct mutex chunk_mutex; |
723 | struct mutex drop_mutex; | 758 | struct mutex drop_mutex; |
724 | struct mutex volume_mutex; | 759 | struct mutex volume_mutex; |
725 | struct mutex tree_reloc_mutex; | 760 | struct mutex tree_reloc_mutex; |
761 | |||
762 | /* | ||
763 | * this protects the ordered operations list only while we are | ||
764 | * processing all of the entries on it. This way we make | ||
765 | * sure the commit code doesn't find the list temporarily empty | ||
766 | * because another function happens to be doing non-waiting preflush | ||
767 | * before jumping into the main commit. | ||
768 | */ | ||
769 | struct mutex ordered_operations_mutex; | ||
770 | |||
726 | struct list_head trans_list; | 771 | struct list_head trans_list; |
727 | struct list_head hashers; | 772 | struct list_head hashers; |
728 | struct list_head dead_roots; | 773 | struct list_head dead_roots; |
@@ -737,10 +782,29 @@ struct btrfs_fs_info { | |||
737 | * ordered extents | 782 | * ordered extents |
738 | */ | 783 | */ |
739 | spinlock_t ordered_extent_lock; | 784 | spinlock_t ordered_extent_lock; |
785 | |||
786 | /* | ||
787 | * all of the data=ordered extents pending writeback | ||
788 | * these can span multiple transactions and basically include | ||
789 | * every dirty data page that isn't from nodatacow | ||
790 | */ | ||
740 | struct list_head ordered_extents; | 791 | struct list_head ordered_extents; |
792 | |||
793 | /* | ||
794 | * all of the inodes that have delalloc bytes. It is possible for | ||
795 | * this list to be empty even when there is still dirty data=ordered | ||
796 | * extents waiting to finish IO. | ||
797 | */ | ||
741 | struct list_head delalloc_inodes; | 798 | struct list_head delalloc_inodes; |
742 | 799 | ||
743 | /* | 800 | /* |
801 | * special rename and truncate targets that must be on disk before | ||
802 | * we're allowed to commit. This is basically the ext3 style | ||
803 | * data=ordered list. | ||
804 | */ | ||
805 | struct list_head ordered_operations; | ||
806 | |||
807 | /* | ||
744 | * there is a pool of worker threads for checksumming during writes | 808 | * there is a pool of worker threads for checksumming during writes |
745 | * and a pool for checksumming after reads. This is because readers | 809 | * and a pool for checksumming after reads. This is because readers |
746 | * can run with FS locks held, and the writers may be waiting for | 810 | * can run with FS locks held, and the writers may be waiting for |
@@ -781,6 +845,11 @@ struct btrfs_fs_info { | |||
781 | atomic_t throttle_gen; | 845 | atomic_t throttle_gen; |
782 | 846 | ||
783 | u64 total_pinned; | 847 | u64 total_pinned; |
848 | |||
849 | /* protected by the delalloc lock, used to keep from writing | ||
850 | * metadata until there is a nice batch | ||
851 | */ | ||
852 | u64 dirty_metadata_bytes; | ||
784 | struct list_head dirty_cowonly_roots; | 853 | struct list_head dirty_cowonly_roots; |
785 | 854 | ||
786 | struct btrfs_fs_devices *fs_devices; | 855 | struct btrfs_fs_devices *fs_devices; |
@@ -795,8 +864,12 @@ struct btrfs_fs_info { | |||
795 | spinlock_t delalloc_lock; | 864 | spinlock_t delalloc_lock; |
796 | spinlock_t new_trans_lock; | 865 | spinlock_t new_trans_lock; |
797 | u64 delalloc_bytes; | 866 | u64 delalloc_bytes; |
798 | u64 last_alloc; | 867 | |
799 | u64 last_data_alloc; | 868 | /* data_alloc_cluster is only used in ssd mode */ |
869 | struct btrfs_free_cluster data_alloc_cluster; | ||
870 | |||
871 | /* all metadata allocations go through this cluster */ | ||
872 | struct btrfs_free_cluster meta_alloc_cluster; | ||
800 | 873 | ||
801 | spinlock_t ref_cache_lock; | 874 | spinlock_t ref_cache_lock; |
802 | u64 total_ref_cache_size; | 875 | u64 total_ref_cache_size; |
@@ -888,7 +961,6 @@ struct btrfs_root { | |||
888 | }; | 961 | }; |
889 | 962 | ||
890 | /* | 963 | /* |
891 | |||
892 | * inode items have the data typically returned from stat and store other | 964 | * inode items have the data typically returned from stat and store other |
893 | * info about object characteristics. There is one for every file and dir in | 965 | * info about object characteristics. There is one for every file and dir in |
894 | * the FS | 966 | * the FS |
@@ -919,7 +991,7 @@ struct btrfs_root { | |||
919 | #define BTRFS_EXTENT_CSUM_KEY 128 | 991 | #define BTRFS_EXTENT_CSUM_KEY 128 |
920 | 992 | ||
921 | /* | 993 | /* |
922 | * root items point to tree roots. There are typically in the root | 994 | * root items point to tree roots. They are typically in the root |
923 | * tree used by the super block to find all the other trees | 995 | * tree used by the super block to find all the other trees |
924 | */ | 996 | */ |
925 | #define BTRFS_ROOT_ITEM_KEY 132 | 997 | #define BTRFS_ROOT_ITEM_KEY 132 |
@@ -966,6 +1038,8 @@ struct btrfs_root { | |||
966 | #define BTRFS_MOUNT_SSD (1 << 3) | 1038 | #define BTRFS_MOUNT_SSD (1 << 3) |
967 | #define BTRFS_MOUNT_DEGRADED (1 << 4) | 1039 | #define BTRFS_MOUNT_DEGRADED (1 << 4) |
968 | #define BTRFS_MOUNT_COMPRESS (1 << 5) | 1040 | #define BTRFS_MOUNT_COMPRESS (1 << 5) |
1041 | #define BTRFS_MOUNT_NOTREELOG (1 << 6) | ||
1042 | #define BTRFS_MOUNT_FLUSHONCOMMIT (1 << 7) | ||
969 | 1043 | ||
970 | #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) | 1044 | #define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) |
971 | #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) | 1045 | #define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) |
@@ -1704,18 +1778,16 @@ static inline struct dentry *fdentry(struct file *file) | |||
1704 | } | 1778 | } |
1705 | 1779 | ||
1706 | /* extent-tree.c */ | 1780 | /* extent-tree.c */ |
1781 | void btrfs_put_block_group(struct btrfs_block_group_cache *cache); | ||
1782 | int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans, | ||
1783 | struct btrfs_root *root, unsigned long count); | ||
1707 | int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len); | 1784 | int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len); |
1708 | int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans, | ||
1709 | struct btrfs_root *root, u64 bytenr, | ||
1710 | u64 num_bytes, u32 *refs); | ||
1711 | int btrfs_update_pinned_extents(struct btrfs_root *root, | 1785 | int btrfs_update_pinned_extents(struct btrfs_root *root, |
1712 | u64 bytenr, u64 num, int pin); | 1786 | u64 bytenr, u64 num, int pin); |
1713 | int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, | 1787 | int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, |
1714 | struct btrfs_root *root, struct extent_buffer *leaf); | 1788 | struct btrfs_root *root, struct extent_buffer *leaf); |
1715 | int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, | 1789 | int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, |
1716 | struct btrfs_root *root, u64 objectid, u64 bytenr); | 1790 | struct btrfs_root *root, u64 objectid, u64 bytenr); |
1717 | int btrfs_extent_post_op(struct btrfs_trans_handle *trans, | ||
1718 | struct btrfs_root *root); | ||
1719 | int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy); | 1791 | int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy); |
1720 | struct btrfs_block_group_cache *btrfs_lookup_block_group( | 1792 | struct btrfs_block_group_cache *btrfs_lookup_block_group( |
1721 | struct btrfs_fs_info *info, | 1793 | struct btrfs_fs_info *info, |
@@ -1777,7 +1849,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans, | |||
1777 | u64 root_objectid, u64 ref_generation, | 1849 | u64 root_objectid, u64 ref_generation, |
1778 | u64 owner_objectid); | 1850 | u64 owner_objectid); |
1779 | int btrfs_update_extent_ref(struct btrfs_trans_handle *trans, | 1851 | int btrfs_update_extent_ref(struct btrfs_trans_handle *trans, |
1780 | struct btrfs_root *root, u64 bytenr, | 1852 | struct btrfs_root *root, u64 bytenr, u64 num_bytes, |
1781 | u64 orig_parent, u64 parent, | 1853 | u64 orig_parent, u64 parent, |
1782 | u64 root_objectid, u64 ref_generation, | 1854 | u64 root_objectid, u64 ref_generation, |
1783 | u64 owner_objectid); | 1855 | u64 owner_objectid); |
@@ -1838,7 +1910,7 @@ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key, | |||
1838 | int btrfs_cow_block(struct btrfs_trans_handle *trans, | 1910 | int btrfs_cow_block(struct btrfs_trans_handle *trans, |
1839 | struct btrfs_root *root, struct extent_buffer *buf, | 1911 | struct btrfs_root *root, struct extent_buffer *buf, |
1840 | struct extent_buffer *parent, int parent_slot, | 1912 | struct extent_buffer *parent, int parent_slot, |
1841 | struct extent_buffer **cow_ret, u64 prealloc_dest); | 1913 | struct extent_buffer **cow_ret); |
1842 | int btrfs_copy_root(struct btrfs_trans_handle *trans, | 1914 | int btrfs_copy_root(struct btrfs_trans_handle *trans, |
1843 | struct btrfs_root *root, | 1915 | struct btrfs_root *root, |
1844 | struct extent_buffer *buf, | 1916 | struct extent_buffer *buf, |
@@ -2060,7 +2132,7 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset, | |||
2060 | unsigned long btrfs_force_ra(struct address_space *mapping, | 2132 | unsigned long btrfs_force_ra(struct address_space *mapping, |
2061 | struct file_ra_state *ra, struct file *file, | 2133 | struct file_ra_state *ra, struct file *file, |
2062 | pgoff_t offset, pgoff_t last_index); | 2134 | pgoff_t offset, pgoff_t last_index); |
2063 | int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page); | 2135 | int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); |
2064 | int btrfs_readpage(struct file *file, struct page *page); | 2136 | int btrfs_readpage(struct file *file, struct page *page); |
2065 | void btrfs_delete_inode(struct inode *inode); | 2137 | void btrfs_delete_inode(struct inode *inode); |
2066 | void btrfs_put_inode(struct inode *inode); | 2138 | void btrfs_put_inode(struct inode *inode); |
@@ -2133,21 +2205,4 @@ int btrfs_check_acl(struct inode *inode, int mask); | |||
2133 | int btrfs_init_acl(struct inode *inode, struct inode *dir); | 2205 | int btrfs_init_acl(struct inode *inode, struct inode *dir); |
2134 | int btrfs_acl_chmod(struct inode *inode); | 2206 | int btrfs_acl_chmod(struct inode *inode); |
2135 | 2207 | ||
2136 | /* free-space-cache.c */ | ||
2137 | int btrfs_add_free_space(struct btrfs_block_group_cache *block_group, | ||
2138 | u64 bytenr, u64 size); | ||
2139 | int btrfs_add_free_space_lock(struct btrfs_block_group_cache *block_group, | ||
2140 | u64 offset, u64 bytes); | ||
2141 | int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group, | ||
2142 | u64 bytenr, u64 size); | ||
2143 | int btrfs_remove_free_space_lock(struct btrfs_block_group_cache *block_group, | ||
2144 | u64 offset, u64 bytes); | ||
2145 | void btrfs_remove_free_space_cache(struct btrfs_block_group_cache | ||
2146 | *block_group); | ||
2147 | struct btrfs_free_space *btrfs_find_free_space(struct btrfs_block_group_cache | ||
2148 | *block_group, u64 offset, | ||
2149 | u64 bytes); | ||
2150 | void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group, | ||
2151 | u64 bytes); | ||
2152 | u64 btrfs_block_group_free_space(struct btrfs_block_group_cache *block_group); | ||
2153 | #endif | 2208 | #endif |