aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorFilipe David Borba Manana <fdmanana@gmail.com>2014-01-07 06:42:27 -0500
committerChris Mason <clm@fb.com>2014-01-28 16:20:23 -0500
commit1acae57b161ef1282f565ef907f72aeed0eb71d9 (patch)
tree7234dacef63e67640d780cfb82742762b5e00bfd /fs
parent90515e7f5d7d24cbb2a4038a3f1b5cfa2921aa17 (diff)
Btrfs: faster file extent item replace operations
When writing to a file we drop existing file extent items that cover the write range and then add a new file extent item that represents that write range. Before this change we were doing a tree lookup to remove the file extent items, and then after we did another tree lookup to insert the new file extent item. Most of the time all the file extent items we need to drop are located within a single leaf - this is the leaf where our new file extent item ends up at. Therefore, in this common case just combine these 2 operations into a single one. By avoiding the second btree navigation for insertion of the new file extent item, we reduce btree node/leaf lock acquisitions/releases, btree block/leaf COW operations, CPU time on btree node/leaf key binary searches, etc. Besides for file writes, this is an operation that happens for file fsync's as well. However log btrees are much less likely to big as big as regular fs btrees, therefore the impact of this change is smaller. The following benchmark was performed against an SSD drive and a HDD drive, both for random and sequential writes: sysbench --test=fileio --file-num=4096 --file-total-size=8G \ --file-test-mode=[rndwr|seqwr] --num-threads=512 \ --file-block-size=8192 \ --max-requests=1000000 \ --file-fsync-freq=0 --file-io-mode=sync [prepare|run] All results below are averages of 10 runs of the respective test. ** SSD sequential writes Before this change: 225.88 Mb/sec After this change: 277.26 Mb/sec ** SSD random writes Before this change: 49.91 Mb/sec After this change: 56.39 Mb/sec ** HDD sequential writes Before this change: 68.53 Mb/sec After this change: 69.87 Mb/sec ** HDD random writes Before this change: 13.04 Mb/sec After this change: 14.39 Mb/sec Signed-off-by: Filipe David Borba Manana <fdmanana@gmail.com> Signed-off-by: Josef Bacik <jbacik@fb.com> Signed-off-by: Chris Mason <clm@fb.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/ctree.h5
-rw-r--r--fs/btrfs/file.c44
-rw-r--r--fs/btrfs/inode.c87
-rw-r--r--fs/btrfs/tree-log.c24
4 files changed, 114 insertions, 46 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 5be778e62a29..f52a60b9eba5 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -3772,7 +3772,10 @@ extern const struct file_operations btrfs_file_operations;
3772int __btrfs_drop_extents(struct btrfs_trans_handle *trans, 3772int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
3773 struct btrfs_root *root, struct inode *inode, 3773 struct btrfs_root *root, struct inode *inode,
3774 struct btrfs_path *path, u64 start, u64 end, 3774 struct btrfs_path *path, u64 start, u64 end,
3775 u64 *drop_end, int drop_cache); 3775 u64 *drop_end, int drop_cache,
3776 int replace_extent,
3777 u32 extent_item_size,
3778 int *key_inserted);
3776int btrfs_drop_extents(struct btrfs_trans_handle *trans, 3779int btrfs_drop_extents(struct btrfs_trans_handle *trans,
3777 struct btrfs_root *root, struct inode *inode, u64 start, 3780 struct btrfs_root *root, struct inode *inode, u64 start,
3778 u64 end, int drop_cache); 3781 u64 end, int drop_cache);
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 35bf83876f3f..030012e1710c 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -692,7 +692,10 @@ next:
692int __btrfs_drop_extents(struct btrfs_trans_handle *trans, 692int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
693 struct btrfs_root *root, struct inode *inode, 693 struct btrfs_root *root, struct inode *inode,
694 struct btrfs_path *path, u64 start, u64 end, 694 struct btrfs_path *path, u64 start, u64 end,
695 u64 *drop_end, int drop_cache) 695 u64 *drop_end, int drop_cache,
696 int replace_extent,
697 u32 extent_item_size,
698 int *key_inserted)
696{ 699{
697 struct extent_buffer *leaf; 700 struct extent_buffer *leaf;
698 struct btrfs_file_extent_item *fi; 701 struct btrfs_file_extent_item *fi;
@@ -712,6 +715,7 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
712 int modify_tree = -1; 715 int modify_tree = -1;
713 int update_refs = (root->ref_cows || root == root->fs_info->tree_root); 716 int update_refs = (root->ref_cows || root == root->fs_info->tree_root);
714 int found = 0; 717 int found = 0;
718 int leafs_visited = 0;
715 719
716 if (drop_cache) 720 if (drop_cache)
717 btrfs_drop_extent_cache(inode, start, end - 1, 0); 721 btrfs_drop_extent_cache(inode, start, end - 1, 0);
@@ -733,6 +737,7 @@ int __btrfs_drop_extents(struct btrfs_trans_handle *trans,
733 path->slots[0]--; 737 path->slots[0]--;
734 } 738 }
735 ret = 0; 739 ret = 0;
740 leafs_visited++;
736next_slot: 741next_slot:
737 leaf = path->nodes[0]; 742 leaf = path->nodes[0];
738 if (path->slots[0] >= btrfs_header_nritems(leaf)) { 743 if (path->slots[0] >= btrfs_header_nritems(leaf)) {
@@ -744,6 +749,7 @@ next_slot:
744 ret = 0; 749 ret = 0;
745 break; 750 break;
746 } 751 }
752 leafs_visited++;
747 leaf = path->nodes[0]; 753 leaf = path->nodes[0];
748 recow = 1; 754 recow = 1;
749 } 755 }
@@ -927,14 +933,44 @@ next_slot:
927 } 933 }
928 934
929 if (!ret && del_nr > 0) { 935 if (!ret && del_nr > 0) {
936 /*
937 * Set path->slots[0] to first slot, so that after the delete
938 * if items are move off from our leaf to its immediate left or
939 * right neighbor leafs, we end up with a correct and adjusted
940 * path->slots[0] for our insertion.
941 */
942 path->slots[0] = del_slot;
930 ret = btrfs_del_items(trans, root, path, del_slot, del_nr); 943 ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
931 if (ret) 944 if (ret)
932 btrfs_abort_transaction(trans, root, ret); 945 btrfs_abort_transaction(trans, root, ret);
946
947 leaf = path->nodes[0];
948 /*
949 * leaf eb has flag EXTENT_BUFFER_STALE if it was deleted (that
950 * is, its contents got pushed to its neighbors), in which case
951 * it means path->locks[0] == 0
952 */
953 if (!ret && replace_extent && leafs_visited == 1 &&
954 path->locks[0] &&
955 btrfs_leaf_free_space(root, leaf) >=
956 sizeof(struct btrfs_item) + extent_item_size) {
957
958 key.objectid = ino;
959 key.type = BTRFS_EXTENT_DATA_KEY;
960 key.offset = start;
961 setup_items_for_insert(root, path, &key,
962 &extent_item_size,
963 extent_item_size,
964 sizeof(struct btrfs_item) +
965 extent_item_size, 1);
966 *key_inserted = 1;
967 }
933 } 968 }
934 969
970 if (!replace_extent || !(*key_inserted))
971 btrfs_release_path(path);
935 if (drop_end) 972 if (drop_end)
936 *drop_end = found ? min(end, extent_end) : end; 973 *drop_end = found ? min(end, extent_end) : end;
937 btrfs_release_path(path);
938 return ret; 974 return ret;
939} 975}
940 976
@@ -949,7 +985,7 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans,
949 if (!path) 985 if (!path)
950 return -ENOMEM; 986 return -ENOMEM;
951 ret = __btrfs_drop_extents(trans, root, inode, path, start, end, NULL, 987 ret = __btrfs_drop_extents(trans, root, inode, path, start, end, NULL,
952 drop_cache); 988 drop_cache, 0, 0, NULL);
953 btrfs_free_path(path); 989 btrfs_free_path(path);
954 return ret; 990 return ret;
955} 991}
@@ -2219,7 +2255,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
2219 while (cur_offset < lockend) { 2255 while (cur_offset < lockend) {
2220 ret = __btrfs_drop_extents(trans, root, inode, path, 2256 ret = __btrfs_drop_extents(trans, root, inode, path,
2221 cur_offset, lockend + 1, 2257 cur_offset, lockend + 1,
2222 &drop_end, 1); 2258 &drop_end, 1, 0, 0, NULL);
2223 if (ret != -ENOSPC) 2259 if (ret != -ENOSPC)
2224 break; 2260 break;
2225 2261
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index ffb23e506762..23f18eb5fb55 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -125,13 +125,12 @@ static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
125 * no overlapping inline items exist in the btree 125 * no overlapping inline items exist in the btree
126 */ 126 */
127static noinline int insert_inline_extent(struct btrfs_trans_handle *trans, 127static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
128 struct btrfs_path *path, int extent_inserted,
128 struct btrfs_root *root, struct inode *inode, 129 struct btrfs_root *root, struct inode *inode,
129 u64 start, size_t size, size_t compressed_size, 130 u64 start, size_t size, size_t compressed_size,
130 int compress_type, 131 int compress_type,
131 struct page **compressed_pages) 132 struct page **compressed_pages)
132{ 133{
133 struct btrfs_key key;
134 struct btrfs_path *path;
135 struct extent_buffer *leaf; 134 struct extent_buffer *leaf;
136 struct page *page = NULL; 135 struct page *page = NULL;
137 char *kaddr; 136 char *kaddr;
@@ -140,29 +139,29 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
140 int err = 0; 139 int err = 0;
141 int ret; 140 int ret;
142 size_t cur_size = size; 141 size_t cur_size = size;
143 size_t datasize;
144 unsigned long offset; 142 unsigned long offset;
145 143
146 if (compressed_size && compressed_pages) 144 if (compressed_size && compressed_pages)
147 cur_size = compressed_size; 145 cur_size = compressed_size;
148 146
149 path = btrfs_alloc_path(); 147 inode_add_bytes(inode, size);
150 if (!path)
151 return -ENOMEM;
152 148
153 path->leave_spinning = 1; 149 if (!extent_inserted) {
150 struct btrfs_key key;
151 size_t datasize;
154 152
155 key.objectid = btrfs_ino(inode); 153 key.objectid = btrfs_ino(inode);
156 key.offset = start; 154 key.offset = start;
157 btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY); 155 btrfs_set_key_type(&key, BTRFS_EXTENT_DATA_KEY);
158 datasize = btrfs_file_extent_calc_inline_size(cur_size);
159 156
160 inode_add_bytes(inode, size); 157 datasize = btrfs_file_extent_calc_inline_size(cur_size);
161 ret = btrfs_insert_empty_item(trans, root, path, &key, 158 path->leave_spinning = 1;
162 datasize); 159 ret = btrfs_insert_empty_item(trans, root, path, &key,
163 if (ret) { 160 datasize);
164 err = ret; 161 if (ret) {
165 goto fail; 162 err = ret;
163 goto fail;
164 }
166 } 165 }
167 leaf = path->nodes[0]; 166 leaf = path->nodes[0];
168 ei = btrfs_item_ptr(leaf, path->slots[0], 167 ei = btrfs_item_ptr(leaf, path->slots[0],
@@ -203,7 +202,7 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
203 page_cache_release(page); 202 page_cache_release(page);
204 } 203 }
205 btrfs_mark_buffer_dirty(leaf); 204 btrfs_mark_buffer_dirty(leaf);
206 btrfs_free_path(path); 205 btrfs_release_path(path);
207 206
208 /* 207 /*
209 * we're an inline extent, so nobody can 208 * we're an inline extent, so nobody can
@@ -219,7 +218,6 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
219 218
220 return ret; 219 return ret;
221fail: 220fail:
222 btrfs_free_path(path);
223 return err; 221 return err;
224} 222}
225 223
@@ -242,6 +240,9 @@ static noinline int cow_file_range_inline(struct btrfs_root *root,
242 u64 aligned_end = ALIGN(end, root->sectorsize); 240 u64 aligned_end = ALIGN(end, root->sectorsize);
243 u64 data_len = inline_len; 241 u64 data_len = inline_len;
244 int ret; 242 int ret;
243 struct btrfs_path *path;
244 int extent_inserted = 0;
245 u32 extent_item_size;
245 246
246 if (compressed_size) 247 if (compressed_size)
247 data_len = compressed_size; 248 data_len = compressed_size;
@@ -256,12 +257,27 @@ static noinline int cow_file_range_inline(struct btrfs_root *root,
256 return 1; 257 return 1;
257 } 258 }
258 259
260 path = btrfs_alloc_path();
261 if (!path)
262 return -ENOMEM;
263
259 trans = btrfs_join_transaction(root); 264 trans = btrfs_join_transaction(root);
260 if (IS_ERR(trans)) 265 if (IS_ERR(trans)) {
266 btrfs_free_path(path);
261 return PTR_ERR(trans); 267 return PTR_ERR(trans);
268 }
262 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 269 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
263 270
264 ret = btrfs_drop_extents(trans, root, inode, start, aligned_end, 1); 271 if (compressed_size && compressed_pages)
272 extent_item_size = btrfs_file_extent_calc_inline_size(
273 compressed_size);
274 else
275 extent_item_size = btrfs_file_extent_calc_inline_size(
276 inline_len);
277
278 ret = __btrfs_drop_extents(trans, root, inode, path,
279 start, aligned_end, NULL,
280 1, 1, extent_item_size, &extent_inserted);
265 if (ret) { 281 if (ret) {
266 btrfs_abort_transaction(trans, root, ret); 282 btrfs_abort_transaction(trans, root, ret);
267 goto out; 283 goto out;
@@ -269,7 +285,8 @@ static noinline int cow_file_range_inline(struct btrfs_root *root,
269 285
270 if (isize > actual_end) 286 if (isize > actual_end)
271 inline_len = min_t(u64, isize, actual_end); 287 inline_len = min_t(u64, isize, actual_end);
272 ret = insert_inline_extent(trans, root, inode, start, 288 ret = insert_inline_extent(trans, path, extent_inserted,
289 root, inode, start,
273 inline_len, compressed_size, 290 inline_len, compressed_size,
274 compress_type, compressed_pages); 291 compress_type, compressed_pages);
275 if (ret && ret != -ENOSPC) { 292 if (ret && ret != -ENOSPC) {
@@ -284,6 +301,7 @@ static noinline int cow_file_range_inline(struct btrfs_root *root,
284 btrfs_delalloc_release_metadata(inode, end + 1 - start); 301 btrfs_delalloc_release_metadata(inode, end + 1 - start);
285 btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0); 302 btrfs_drop_extent_cache(inode, start, aligned_end - 1, 0);
286out: 303out:
304 btrfs_free_path(path);
287 btrfs_end_transaction(trans, root); 305 btrfs_end_transaction(trans, root);
288 return ret; 306 return ret;
289} 307}
@@ -1841,14 +1859,13 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1841 struct btrfs_path *path; 1859 struct btrfs_path *path;
1842 struct extent_buffer *leaf; 1860 struct extent_buffer *leaf;
1843 struct btrfs_key ins; 1861 struct btrfs_key ins;
1862 int extent_inserted = 0;
1844 int ret; 1863 int ret;
1845 1864
1846 path = btrfs_alloc_path(); 1865 path = btrfs_alloc_path();
1847 if (!path) 1866 if (!path)
1848 return -ENOMEM; 1867 return -ENOMEM;
1849 1868
1850 path->leave_spinning = 1;
1851
1852 /* 1869 /*
1853 * we may be replacing one extent in the tree with another. 1870 * we may be replacing one extent in the tree with another.
1854 * The new extent is pinned in the extent map, and we don't want 1871 * The new extent is pinned in the extent map, and we don't want
@@ -1858,17 +1875,23 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
1858 * the caller is expected to unpin it and allow it to be merged 1875 * the caller is expected to unpin it and allow it to be merged
1859 * with the others. 1876 * with the others.
1860 */ 1877 */
1861 ret = btrfs_drop_extents(trans, root, inode, file_pos, 1878 ret = __btrfs_drop_extents(trans, root, inode, path, file_pos,
1862 file_pos + num_bytes, 0); 1879 file_pos + num_bytes, NULL, 0,
1880 1, sizeof(*fi), &extent_inserted);
1863 if (ret) 1881 if (ret)
1864 goto out; 1882 goto out;
1865 1883
1866 ins.objectid = btrfs_ino(inode); 1884 if (!extent_inserted) {
1867 ins.offset = file_pos; 1885 ins.objectid = btrfs_ino(inode);
1868 ins.type = BTRFS_EXTENT_DATA_KEY; 1886 ins.offset = file_pos;
1869 ret = btrfs_insert_empty_item(trans, root, path, &ins, sizeof(*fi)); 1887 ins.type = BTRFS_EXTENT_DATA_KEY;
1870 if (ret) 1888
1871 goto out; 1889 path->leave_spinning = 1;
1890 ret = btrfs_insert_empty_item(trans, root, path, &ins,
1891 sizeof(*fi));
1892 if (ret)
1893 goto out;
1894 }
1872 leaf = path->nodes[0]; 1895 leaf = path->nodes[0];
1873 fi = btrfs_item_ptr(leaf, path->slots[0], 1896 fi = btrfs_item_ptr(leaf, path->slots[0],
1874 struct btrfs_file_extent_item); 1897 struct btrfs_file_extent_item);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index ba2f15109dac..b561e7a4007d 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -3495,21 +3495,27 @@ static int log_one_extent(struct btrfs_trans_handle *trans,
3495 int ret; 3495 int ret;
3496 int index = log->log_transid % 2; 3496 int index = log->log_transid % 2;
3497 bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; 3497 bool skip_csum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
3498 3498 int extent_inserted = 0;
3499 ret = __btrfs_drop_extents(trans, log, inode, path, em->start,
3500 em->start + em->len, NULL, 0);
3501 if (ret)
3502 return ret;
3503 3499
3504 INIT_LIST_HEAD(&ordered_sums); 3500 INIT_LIST_HEAD(&ordered_sums);
3505 btrfs_init_map_token(&token); 3501 btrfs_init_map_token(&token);
3506 key.objectid = btrfs_ino(inode);
3507 key.type = BTRFS_EXTENT_DATA_KEY;
3508 key.offset = em->start;
3509 3502
3510 ret = btrfs_insert_empty_item(trans, log, path, &key, sizeof(*fi)); 3503 ret = __btrfs_drop_extents(trans, log, inode, path, em->start,
3504 em->start + em->len, NULL, 0, 1,
3505 sizeof(*fi), &extent_inserted);
3511 if (ret) 3506 if (ret)
3512 return ret; 3507 return ret;
3508
3509 if (!extent_inserted) {
3510 key.objectid = btrfs_ino(inode);
3511 key.type = BTRFS_EXTENT_DATA_KEY;
3512 key.offset = em->start;
3513
3514 ret = btrfs_insert_empty_item(trans, log, path, &key,
3515 sizeof(*fi));
3516 if (ret)
3517 return ret;
3518 }
3513 leaf = path->nodes[0]; 3519 leaf = path->nodes[0];
3514 fi = btrfs_item_ptr(leaf, path->slots[0], 3520 fi = btrfs_item_ptr(leaf, path->slots[0],
3515 struct btrfs_file_extent_item); 3521 struct btrfs_file_extent_item);