aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/ctree.h
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/ctree.h')
-rw-r--r--fs/btrfs/ctree.h155
1 files changed, 105 insertions, 50 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 5e1d4e30e9d8..ad96495dedc5 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -45,6 +45,13 @@ struct btrfs_ordered_sum;
45 45
46#define BTRFS_MAX_LEVEL 8 46#define BTRFS_MAX_LEVEL 8
47 47
48/*
49 * files bigger than this get some pre-flushing when they are added
50 * to the ordered operations list. That way we limit the total
51 * work done by the commit
52 */
53#define BTRFS_ORDERED_OPERATIONS_FLUSH_LIMIT (8 * 1024 * 1024)
54
48/* holds pointers to all of the tree roots */ 55/* holds pointers to all of the tree roots */
49#define BTRFS_ROOT_TREE_OBJECTID 1ULL 56#define BTRFS_ROOT_TREE_OBJECTID 1ULL
50 57
@@ -136,12 +143,15 @@ static int btrfs_csum_sizes[] = { 4, 0 };
136#define BTRFS_FT_MAX 9 143#define BTRFS_FT_MAX 9
137 144
138/* 145/*
139 * the key defines the order in the tree, and so it also defines (optimal) 146 * The key defines the order in the tree, and so it also defines (optimal)
140 * block layout. objectid corresonds to the inode number. The flags 147 * block layout.
141 * tells us things about the object, and is a kind of stream selector. 148 *
142 * so for a given inode, keys with flags of 1 might refer to the inode 149 * objectid corresponds to the inode number.
143 * data, flags of 2 may point to file data in the btree and flags == 3 150 *
144 * may point to extents. 151 * type tells us things about the object, and is a kind of stream selector.
152 * so for a given inode, keys with type of 1 might refer to the inode data,
153 * type of 2 may point to file data in the btree and type == 3 may point to
154 * extents.
145 * 155 *
146 * offset is the starting byte offset for this key in the stream. 156 * offset is the starting byte offset for this key in the stream.
147 * 157 *
@@ -193,7 +203,7 @@ struct btrfs_dev_item {
193 203
194 /* 204 /*
195 * starting byte of this partition on the device, 205 * starting byte of this partition on the device,
196 * to allowr for stripe alignment in the future 206 * to allow for stripe alignment in the future
197 */ 207 */
198 __le64 start_offset; 208 __le64 start_offset;
199 209
@@ -401,15 +411,16 @@ struct btrfs_path {
401 int locks[BTRFS_MAX_LEVEL]; 411 int locks[BTRFS_MAX_LEVEL];
402 int reada; 412 int reada;
403 /* keep some upper locks as we walk down */ 413 /* keep some upper locks as we walk down */
404 int keep_locks;
405 int skip_locking;
406 int lowest_level; 414 int lowest_level;
407 415
408 /* 416 /*
409 * set by btrfs_split_item, tells search_slot to keep all locks 417 * set by btrfs_split_item, tells search_slot to keep all locks
410 * and to force calls to keep space in the nodes 418 * and to force calls to keep space in the nodes
411 */ 419 */
412 int search_for_split; 420 unsigned int search_for_split:1;
421 unsigned int keep_locks:1;
422 unsigned int skip_locking:1;
423 unsigned int leave_spinning:1;
413}; 424};
414 425
415/* 426/*
@@ -625,18 +636,35 @@ struct btrfs_space_info {
625 struct rw_semaphore groups_sem; 636 struct rw_semaphore groups_sem;
626}; 637};
627 638
628struct btrfs_free_space { 639/*
629 struct rb_node bytes_index; 640 * free clusters are used to claim free space in relatively large chunks,
630 struct rb_node offset_index; 641 * allowing us to do less seeky writes. They are used for all metadata
631 u64 offset; 642 * allocations and data allocations in ssd mode.
632 u64 bytes; 643 */
644struct btrfs_free_cluster {
645 spinlock_t lock;
646 spinlock_t refill_lock;
647 struct rb_root root;
648
649 /* largest extent in this cluster */
650 u64 max_size;
651
652 /* first extent starting offset */
653 u64 window_start;
654
655 struct btrfs_block_group_cache *block_group;
656 /*
657 * when a cluster is allocated from a block group, we put the
658 * cluster onto a list in the block group so that it can
659 * be freed before the block group is freed.
660 */
661 struct list_head block_group_list;
633}; 662};
634 663
635struct btrfs_block_group_cache { 664struct btrfs_block_group_cache {
636 struct btrfs_key key; 665 struct btrfs_key key;
637 struct btrfs_block_group_item item; 666 struct btrfs_block_group_item item;
638 spinlock_t lock; 667 spinlock_t lock;
639 struct mutex alloc_mutex;
640 struct mutex cache_mutex; 668 struct mutex cache_mutex;
641 u64 pinned; 669 u64 pinned;
642 u64 reserved; 670 u64 reserved;
@@ -648,6 +676,7 @@ struct btrfs_block_group_cache {
648 struct btrfs_space_info *space_info; 676 struct btrfs_space_info *space_info;
649 677
650 /* free space cache stuff */ 678 /* free space cache stuff */
679 spinlock_t tree_lock;
651 struct rb_root free_space_bytes; 680 struct rb_root free_space_bytes;
652 struct rb_root free_space_offset; 681 struct rb_root free_space_offset;
653 682
@@ -659,6 +688,11 @@ struct btrfs_block_group_cache {
659 688
660 /* usage count */ 689 /* usage count */
661 atomic_t count; 690 atomic_t count;
691
692 /* List of struct btrfs_free_clusters for this block group.
693 * Today it will only have one thing on it, but that may change
694 */
695 struct list_head cluster_list;
662}; 696};
663 697
664struct btrfs_leaf_ref_tree { 698struct btrfs_leaf_ref_tree {
@@ -688,15 +722,18 @@ struct btrfs_fs_info {
688 struct rb_root block_group_cache_tree; 722 struct rb_root block_group_cache_tree;
689 723
690 struct extent_io_tree pinned_extents; 724 struct extent_io_tree pinned_extents;
691 struct extent_io_tree pending_del;
692 struct extent_io_tree extent_ins;
693 725
694 /* logical->physical extent mapping */ 726 /* logical->physical extent mapping */
695 struct btrfs_mapping_tree mapping_tree; 727 struct btrfs_mapping_tree mapping_tree;
696 728
697 u64 generation; 729 u64 generation;
698 u64 last_trans_committed; 730 u64 last_trans_committed;
699 u64 last_trans_new_blockgroup; 731
732 /*
733 * this is updated to the current trans every time a full commit
734 * is required instead of the faster short fsync log commits
735 */
736 u64 last_trans_log_full_commit;
700 u64 open_ioctl_trans; 737 u64 open_ioctl_trans;
701 unsigned long mount_opt; 738 unsigned long mount_opt;
702 u64 max_extent; 739 u64 max_extent;
@@ -717,12 +754,20 @@ struct btrfs_fs_info {
717 struct mutex tree_log_mutex; 754 struct mutex tree_log_mutex;
718 struct mutex transaction_kthread_mutex; 755 struct mutex transaction_kthread_mutex;
719 struct mutex cleaner_mutex; 756 struct mutex cleaner_mutex;
720 struct mutex extent_ins_mutex;
721 struct mutex pinned_mutex;
722 struct mutex chunk_mutex; 757 struct mutex chunk_mutex;
723 struct mutex drop_mutex; 758 struct mutex drop_mutex;
724 struct mutex volume_mutex; 759 struct mutex volume_mutex;
725 struct mutex tree_reloc_mutex; 760 struct mutex tree_reloc_mutex;
761
762 /*
763 * this protects the ordered operations list only while we are
764 * processing all of the entries on it. This way we make
765 * sure the commit code doesn't find the list temporarily empty
766 * because another function happens to be doing non-waiting preflush
767 * before jumping into the main commit.
768 */
769 struct mutex ordered_operations_mutex;
770
726 struct list_head trans_list; 771 struct list_head trans_list;
727 struct list_head hashers; 772 struct list_head hashers;
728 struct list_head dead_roots; 773 struct list_head dead_roots;
@@ -737,10 +782,29 @@ struct btrfs_fs_info {
737 * ordered extents 782 * ordered extents
738 */ 783 */
739 spinlock_t ordered_extent_lock; 784 spinlock_t ordered_extent_lock;
785
786 /*
787 * all of the data=ordered extents pending writeback
788 * these can span multiple transactions and basically include
789 * every dirty data page that isn't from nodatacow
790 */
740 struct list_head ordered_extents; 791 struct list_head ordered_extents;
792
793 /*
794 * all of the inodes that have delalloc bytes. It is possible for
795 * this list to be empty even when there is still dirty data=ordered
796 * extents waiting to finish IO.
797 */
741 struct list_head delalloc_inodes; 798 struct list_head delalloc_inodes;
742 799
743 /* 800 /*
801 * special rename and truncate targets that must be on disk before
802 * we're allowed to commit. This is basically the ext3 style
803 * data=ordered list.
804 */
805 struct list_head ordered_operations;
806
807 /*
744 * there is a pool of worker threads for checksumming during writes 808 * there is a pool of worker threads for checksumming during writes
745 * and a pool for checksumming after reads. This is because readers 809 * and a pool for checksumming after reads. This is because readers
746 * can run with FS locks held, and the writers may be waiting for 810 * can run with FS locks held, and the writers may be waiting for
@@ -781,6 +845,11 @@ struct btrfs_fs_info {
781 atomic_t throttle_gen; 845 atomic_t throttle_gen;
782 846
783 u64 total_pinned; 847 u64 total_pinned;
848
849 /* protected by the delalloc lock, used to keep from writing
850 * metadata until there is a nice batch
851 */
852 u64 dirty_metadata_bytes;
784 struct list_head dirty_cowonly_roots; 853 struct list_head dirty_cowonly_roots;
785 854
786 struct btrfs_fs_devices *fs_devices; 855 struct btrfs_fs_devices *fs_devices;
@@ -795,8 +864,12 @@ struct btrfs_fs_info {
795 spinlock_t delalloc_lock; 864 spinlock_t delalloc_lock;
796 spinlock_t new_trans_lock; 865 spinlock_t new_trans_lock;
797 u64 delalloc_bytes; 866 u64 delalloc_bytes;
798 u64 last_alloc; 867
799 u64 last_data_alloc; 868 /* data_alloc_cluster is only used in ssd mode */
869 struct btrfs_free_cluster data_alloc_cluster;
870
871 /* all metadata allocations go through this cluster */
872 struct btrfs_free_cluster meta_alloc_cluster;
800 873
801 spinlock_t ref_cache_lock; 874 spinlock_t ref_cache_lock;
802 u64 total_ref_cache_size; 875 u64 total_ref_cache_size;
@@ -888,7 +961,6 @@ struct btrfs_root {
888}; 961};
889 962
890/* 963/*
891
892 * inode items have the data typically returned from stat and store other 964 * inode items have the data typically returned from stat and store other
893 * info about object characteristics. There is one for every file and dir in 965 * info about object characteristics. There is one for every file and dir in
894 * the FS 966 * the FS
@@ -919,7 +991,7 @@ struct btrfs_root {
919#define BTRFS_EXTENT_CSUM_KEY 128 991#define BTRFS_EXTENT_CSUM_KEY 128
920 992
921/* 993/*
922 * root items point to tree roots. There are typically in the root 994 * root items point to tree roots. They are typically in the root
923 * tree used by the super block to find all the other trees 995 * tree used by the super block to find all the other trees
924 */ 996 */
925#define BTRFS_ROOT_ITEM_KEY 132 997#define BTRFS_ROOT_ITEM_KEY 132
@@ -966,6 +1038,8 @@ struct btrfs_root {
966#define BTRFS_MOUNT_SSD (1 << 3) 1038#define BTRFS_MOUNT_SSD (1 << 3)
967#define BTRFS_MOUNT_DEGRADED (1 << 4) 1039#define BTRFS_MOUNT_DEGRADED (1 << 4)
968#define BTRFS_MOUNT_COMPRESS (1 << 5) 1040#define BTRFS_MOUNT_COMPRESS (1 << 5)
1041#define BTRFS_MOUNT_NOTREELOG (1 << 6)
1042#define BTRFS_MOUNT_FLUSHONCOMMIT (1 << 7)
969 1043
970#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt) 1044#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt)
971#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt) 1045#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt)
@@ -1704,18 +1778,16 @@ static inline struct dentry *fdentry(struct file *file)
1704} 1778}
1705 1779
1706/* extent-tree.c */ 1780/* extent-tree.c */
1781void btrfs_put_block_group(struct btrfs_block_group_cache *cache);
1782int btrfs_run_delayed_refs(struct btrfs_trans_handle *trans,
1783 struct btrfs_root *root, unsigned long count);
1707int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len); 1784int btrfs_lookup_extent(struct btrfs_root *root, u64 start, u64 len);
1708int btrfs_lookup_extent_ref(struct btrfs_trans_handle *trans,
1709 struct btrfs_root *root, u64 bytenr,
1710 u64 num_bytes, u32 *refs);
1711int btrfs_update_pinned_extents(struct btrfs_root *root, 1785int btrfs_update_pinned_extents(struct btrfs_root *root,
1712 u64 bytenr, u64 num, int pin); 1786 u64 bytenr, u64 num, int pin);
1713int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans, 1787int btrfs_drop_leaf_ref(struct btrfs_trans_handle *trans,
1714 struct btrfs_root *root, struct extent_buffer *leaf); 1788 struct btrfs_root *root, struct extent_buffer *leaf);
1715int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, 1789int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans,
1716 struct btrfs_root *root, u64 objectid, u64 bytenr); 1790 struct btrfs_root *root, u64 objectid, u64 bytenr);
1717int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
1718 struct btrfs_root *root);
1719int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy); 1791int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy);
1720struct btrfs_block_group_cache *btrfs_lookup_block_group( 1792struct btrfs_block_group_cache *btrfs_lookup_block_group(
1721 struct btrfs_fs_info *info, 1793 struct btrfs_fs_info *info,
@@ -1777,7 +1849,7 @@ int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
1777 u64 root_objectid, u64 ref_generation, 1849 u64 root_objectid, u64 ref_generation,
1778 u64 owner_objectid); 1850 u64 owner_objectid);
1779int btrfs_update_extent_ref(struct btrfs_trans_handle *trans, 1851int btrfs_update_extent_ref(struct btrfs_trans_handle *trans,
1780 struct btrfs_root *root, u64 bytenr, 1852 struct btrfs_root *root, u64 bytenr, u64 num_bytes,
1781 u64 orig_parent, u64 parent, 1853 u64 orig_parent, u64 parent,
1782 u64 root_objectid, u64 ref_generation, 1854 u64 root_objectid, u64 ref_generation,
1783 u64 owner_objectid); 1855 u64 owner_objectid);
@@ -1838,7 +1910,7 @@ int btrfs_search_forward(struct btrfs_root *root, struct btrfs_key *min_key,
1838int btrfs_cow_block(struct btrfs_trans_handle *trans, 1910int btrfs_cow_block(struct btrfs_trans_handle *trans,
1839 struct btrfs_root *root, struct extent_buffer *buf, 1911 struct btrfs_root *root, struct extent_buffer *buf,
1840 struct extent_buffer *parent, int parent_slot, 1912 struct extent_buffer *parent, int parent_slot,
1841 struct extent_buffer **cow_ret, u64 prealloc_dest); 1913 struct extent_buffer **cow_ret);
1842int btrfs_copy_root(struct btrfs_trans_handle *trans, 1914int btrfs_copy_root(struct btrfs_trans_handle *trans,
1843 struct btrfs_root *root, 1915 struct btrfs_root *root,
1844 struct extent_buffer *buf, 1916 struct extent_buffer *buf,
@@ -2060,7 +2132,7 @@ int btrfs_merge_bio_hook(struct page *page, unsigned long offset,
2060unsigned long btrfs_force_ra(struct address_space *mapping, 2132unsigned long btrfs_force_ra(struct address_space *mapping,
2061 struct file_ra_state *ra, struct file *file, 2133 struct file_ra_state *ra, struct file *file,
2062 pgoff_t offset, pgoff_t last_index); 2134 pgoff_t offset, pgoff_t last_index);
2063int btrfs_page_mkwrite(struct vm_area_struct *vma, struct page *page); 2135int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf);
2064int btrfs_readpage(struct file *file, struct page *page); 2136int btrfs_readpage(struct file *file, struct page *page);
2065void btrfs_delete_inode(struct inode *inode); 2137void btrfs_delete_inode(struct inode *inode);
2066void btrfs_put_inode(struct inode *inode); 2138void btrfs_put_inode(struct inode *inode);
@@ -2133,21 +2205,4 @@ int btrfs_check_acl(struct inode *inode, int mask);
2133int btrfs_init_acl(struct inode *inode, struct inode *dir); 2205int btrfs_init_acl(struct inode *inode, struct inode *dir);
2134int btrfs_acl_chmod(struct inode *inode); 2206int btrfs_acl_chmod(struct inode *inode);
2135 2207
2136/* free-space-cache.c */
2137int btrfs_add_free_space(struct btrfs_block_group_cache *block_group,
2138 u64 bytenr, u64 size);
2139int btrfs_add_free_space_lock(struct btrfs_block_group_cache *block_group,
2140 u64 offset, u64 bytes);
2141int btrfs_remove_free_space(struct btrfs_block_group_cache *block_group,
2142 u64 bytenr, u64 size);
2143int btrfs_remove_free_space_lock(struct btrfs_block_group_cache *block_group,
2144 u64 offset, u64 bytes);
2145void btrfs_remove_free_space_cache(struct btrfs_block_group_cache
2146 *block_group);
2147struct btrfs_free_space *btrfs_find_free_space(struct btrfs_block_group_cache
2148 *block_group, u64 offset,
2149 u64 bytes);
2150void btrfs_dump_free_space(struct btrfs_block_group_cache *block_group,
2151 u64 bytes);
2152u64 btrfs_block_group_free_space(struct btrfs_block_group_cache *block_group);
2153#endif 2208#endif