diff options
author | Chris Mason <chris.mason@oracle.com> | 2008-07-17 12:53:50 -0400 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2008-09-25 11:04:04 -0400 |
commit | e6dcd2dc9c489108648e2ed543315dd134d50a9a (patch) | |
tree | cddf6f588b65e28c5feb8bff89b22d8ff70f8a50 /fs/btrfs/extent-tree.c | |
parent | 77a41afb7d0dd0f27b6f2f1a5bc701929c7034de (diff) |
Btrfs: New data=ordered implementation
The old data=ordered code would force commit to wait until
all the data extents from the transaction were fully on disk. This
introduced large latencies into the commit and stalled new writers
in the transaction for a long time.
The new code changes the way data allocations and extents work:
* When delayed allocation is filled, data extents are reserved, and
the extent bit EXTENT_ORDERED is set on the entire range of the extent.
A struct btrfs_ordered_extent is allocated an inserted into a per-inode
rbtree to track the pending extents.
* As each page is written EXTENT_ORDERED is cleared on the bytes corresponding
to that page.
* When all of the bytes corresponding to a single struct btrfs_ordered_extent
are written, The previously reserved extent is inserted into the FS
btree and into the extent allocation trees. The checksums for the file
data are also updated.
Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/extent-tree.c')
-rw-r--r-- | fs/btrfs/extent-tree.c | 132 |
1 files changed, 93 insertions, 39 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 8ebfa6be0790..343d1101c31c 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c | |||
@@ -1895,36 +1895,17 @@ error: | |||
1895 | return ret; | 1895 | return ret; |
1896 | } | 1896 | } |
1897 | 1897 | ||
1898 | /* | 1898 | static int __btrfs_reserve_extent(struct btrfs_trans_handle *trans, |
1899 | * finds a free extent and does all the dirty work required for allocation | 1899 | struct btrfs_root *root, |
1900 | * returns the key for the extent through ins, and a tree buffer for | 1900 | u64 num_bytes, u64 min_alloc_size, |
1901 | * the first block of the extent through buf. | 1901 | u64 empty_size, u64 hint_byte, |
1902 | * | 1902 | u64 search_end, struct btrfs_key *ins, |
1903 | * returns 0 if everything worked, non-zero otherwise. | 1903 | u64 data) |
1904 | */ | ||
1905 | int btrfs_alloc_extent(struct btrfs_trans_handle *trans, | ||
1906 | struct btrfs_root *root, | ||
1907 | u64 num_bytes, u64 min_alloc_size, | ||
1908 | u64 root_objectid, u64 ref_generation, | ||
1909 | u64 owner, u64 owner_offset, | ||
1910 | u64 empty_size, u64 hint_byte, | ||
1911 | u64 search_end, struct btrfs_key *ins, u64 data) | ||
1912 | { | 1904 | { |
1913 | int ret; | 1905 | int ret; |
1914 | int pending_ret; | ||
1915 | u64 super_used; | ||
1916 | u64 root_used; | ||
1917 | u64 search_start = 0; | 1906 | u64 search_start = 0; |
1918 | u64 alloc_profile; | 1907 | u64 alloc_profile; |
1919 | u32 sizes[2]; | ||
1920 | struct btrfs_fs_info *info = root->fs_info; | 1908 | struct btrfs_fs_info *info = root->fs_info; |
1921 | struct btrfs_root *extent_root = info->extent_root; | ||
1922 | struct btrfs_extent_item *extent_item; | ||
1923 | struct btrfs_extent_ref *ref; | ||
1924 | struct btrfs_path *path; | ||
1925 | struct btrfs_key keys[2]; | ||
1926 | |||
1927 | maybe_lock_mutex(root); | ||
1928 | 1909 | ||
1929 | if (data) { | 1910 | if (data) { |
1930 | alloc_profile = info->avail_data_alloc_bits & | 1911 | alloc_profile = info->avail_data_alloc_bits & |
@@ -1974,11 +1955,48 @@ again: | |||
1974 | } | 1955 | } |
1975 | if (ret) { | 1956 | if (ret) { |
1976 | printk("allocation failed flags %Lu\n", data); | 1957 | printk("allocation failed flags %Lu\n", data); |
1977 | } | ||
1978 | if (ret) { | ||
1979 | BUG(); | 1958 | BUG(); |
1980 | goto out; | ||
1981 | } | 1959 | } |
1960 | clear_extent_dirty(&root->fs_info->free_space_cache, | ||
1961 | ins->objectid, ins->objectid + ins->offset - 1, | ||
1962 | GFP_NOFS); | ||
1963 | return 0; | ||
1964 | } | ||
1965 | |||
1966 | int btrfs_reserve_extent(struct btrfs_trans_handle *trans, | ||
1967 | struct btrfs_root *root, | ||
1968 | u64 num_bytes, u64 min_alloc_size, | ||
1969 | u64 empty_size, u64 hint_byte, | ||
1970 | u64 search_end, struct btrfs_key *ins, | ||
1971 | u64 data) | ||
1972 | { | ||
1973 | int ret; | ||
1974 | maybe_lock_mutex(root); | ||
1975 | ret = __btrfs_reserve_extent(trans, root, num_bytes, min_alloc_size, | ||
1976 | empty_size, hint_byte, search_end, ins, | ||
1977 | data); | ||
1978 | maybe_unlock_mutex(root); | ||
1979 | return ret; | ||
1980 | } | ||
1981 | |||
1982 | static int __btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, | ||
1983 | struct btrfs_root *root, | ||
1984 | u64 root_objectid, u64 ref_generation, | ||
1985 | u64 owner, u64 owner_offset, | ||
1986 | struct btrfs_key *ins) | ||
1987 | { | ||
1988 | int ret; | ||
1989 | int pending_ret; | ||
1990 | u64 super_used; | ||
1991 | u64 root_used; | ||
1992 | u64 num_bytes = ins->offset; | ||
1993 | u32 sizes[2]; | ||
1994 | struct btrfs_fs_info *info = root->fs_info; | ||
1995 | struct btrfs_root *extent_root = info->extent_root; | ||
1996 | struct btrfs_extent_item *extent_item; | ||
1997 | struct btrfs_extent_ref *ref; | ||
1998 | struct btrfs_path *path; | ||
1999 | struct btrfs_key keys[2]; | ||
1982 | 2000 | ||
1983 | /* block accounting for super block */ | 2001 | /* block accounting for super block */ |
1984 | spin_lock_irq(&info->delalloc_lock); | 2002 | spin_lock_irq(&info->delalloc_lock); |
@@ -1990,10 +2008,6 @@ again: | |||
1990 | root_used = btrfs_root_used(&root->root_item); | 2008 | root_used = btrfs_root_used(&root->root_item); |
1991 | btrfs_set_root_used(&root->root_item, root_used + num_bytes); | 2009 | btrfs_set_root_used(&root->root_item, root_used + num_bytes); |
1992 | 2010 | ||
1993 | clear_extent_dirty(&root->fs_info->free_space_cache, | ||
1994 | ins->objectid, ins->objectid + ins->offset - 1, | ||
1995 | GFP_NOFS); | ||
1996 | |||
1997 | if (root == extent_root) { | 2011 | if (root == extent_root) { |
1998 | set_extent_bits(&root->fs_info->extent_ins, ins->objectid, | 2012 | set_extent_bits(&root->fs_info->extent_ins, ins->objectid, |
1999 | ins->objectid + ins->offset - 1, | 2013 | ins->objectid + ins->offset - 1, |
@@ -2001,10 +2015,6 @@ again: | |||
2001 | goto update_block; | 2015 | goto update_block; |
2002 | } | 2016 | } |
2003 | 2017 | ||
2004 | WARN_ON(trans->alloc_exclude_nr); | ||
2005 | trans->alloc_exclude_start = ins->objectid; | ||
2006 | trans->alloc_exclude_nr = ins->offset; | ||
2007 | |||
2008 | memcpy(&keys[0], ins, sizeof(*ins)); | 2018 | memcpy(&keys[0], ins, sizeof(*ins)); |
2009 | keys[1].offset = hash_extent_ref(root_objectid, ref_generation, | 2019 | keys[1].offset = hash_extent_ref(root_objectid, ref_generation, |
2010 | owner, owner_offset); | 2020 | owner, owner_offset); |
@@ -2054,6 +2064,51 @@ update_block: | |||
2054 | BUG(); | 2064 | BUG(); |
2055 | } | 2065 | } |
2056 | out: | 2066 | out: |
2067 | return ret; | ||
2068 | } | ||
2069 | |||
2070 | int btrfs_alloc_reserved_extent(struct btrfs_trans_handle *trans, | ||
2071 | struct btrfs_root *root, | ||
2072 | u64 root_objectid, u64 ref_generation, | ||
2073 | u64 owner, u64 owner_offset, | ||
2074 | struct btrfs_key *ins) | ||
2075 | { | ||
2076 | int ret; | ||
2077 | maybe_lock_mutex(root); | ||
2078 | ret = __btrfs_alloc_reserved_extent(trans, root, root_objectid, | ||
2079 | ref_generation, owner, | ||
2080 | owner_offset, ins); | ||
2081 | maybe_unlock_mutex(root); | ||
2082 | return ret; | ||
2083 | } | ||
2084 | /* | ||
2085 | * finds a free extent and does all the dirty work required for allocation | ||
2086 | * returns the key for the extent through ins, and a tree buffer for | ||
2087 | * the first block of the extent through buf. | ||
2088 | * | ||
2089 | * returns 0 if everything worked, non-zero otherwise. | ||
2090 | */ | ||
2091 | int btrfs_alloc_extent(struct btrfs_trans_handle *trans, | ||
2092 | struct btrfs_root *root, | ||
2093 | u64 num_bytes, u64 min_alloc_size, | ||
2094 | u64 root_objectid, u64 ref_generation, | ||
2095 | u64 owner, u64 owner_offset, | ||
2096 | u64 empty_size, u64 hint_byte, | ||
2097 | u64 search_end, struct btrfs_key *ins, u64 data) | ||
2098 | { | ||
2099 | int ret; | ||
2100 | |||
2101 | maybe_lock_mutex(root); | ||
2102 | |||
2103 | ret = __btrfs_reserve_extent(trans, root, num_bytes, | ||
2104 | min_alloc_size, empty_size, hint_byte, | ||
2105 | search_end, ins, data); | ||
2106 | BUG_ON(ret); | ||
2107 | ret = __btrfs_alloc_reserved_extent(trans, root, root_objectid, | ||
2108 | ref_generation, owner, | ||
2109 | owner_offset, ins); | ||
2110 | BUG_ON(ret); | ||
2111 | |||
2057 | maybe_unlock_mutex(root); | 2112 | maybe_unlock_mutex(root); |
2058 | return ret; | 2113 | return ret; |
2059 | } | 2114 | } |
@@ -2288,8 +2343,8 @@ static int noinline walk_down_tree(struct btrfs_trans_handle *trans, | |||
2288 | mutex_lock(&root->fs_info->alloc_mutex); | 2343 | mutex_lock(&root->fs_info->alloc_mutex); |
2289 | 2344 | ||
2290 | /* we've dropped the lock, double check */ | 2345 | /* we've dropped the lock, double check */ |
2291 | ret = drop_snap_lookup_refcount(root, bytenr, | 2346 | ret = lookup_extent_ref(NULL, root, bytenr, blocksize, |
2292 | blocksize, &refs); | 2347 | &refs); |
2293 | BUG_ON(ret); | 2348 | BUG_ON(ret); |
2294 | if (refs != 1) { | 2349 | if (refs != 1) { |
2295 | parent = path->nodes[*level]; | 2350 | parent = path->nodes[*level]; |
@@ -2584,7 +2639,6 @@ out_unlock: | |||
2584 | kfree(ra); | 2639 | kfree(ra); |
2585 | trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1); | 2640 | trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1); |
2586 | if (trans) { | 2641 | if (trans) { |
2587 | btrfs_add_ordered_inode(inode); | ||
2588 | btrfs_end_transaction(trans, BTRFS_I(inode)->root); | 2642 | btrfs_end_transaction(trans, BTRFS_I(inode)->root); |
2589 | mark_inode_dirty(inode); | 2643 | mark_inode_dirty(inode); |
2590 | } | 2644 | } |