aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorZach Brown <zab@redhat.com>2012-11-15 19:04:43 -0500
committerChris Mason <chris.mason@fusionio.com>2013-02-20 14:06:25 -0500
commit24542bf7ea5e4fdfdb5157ff544c093fa4dcb536 (patch)
treef4367b2f82fb247baa5be1066850ed5f7cd34a48
parent1cba0cdf5e4dbcd9e5fa5b54d7a028e55e2ca057 (diff)
btrfs: limit fallocate extent reservation to 256MB
Very large fallocate requests are cpu bound and result in extents with a repeating pattern of ever decreasing size: $ time fallocate -l 1T file real 0m13.039s ( an excerpt of the extents from btrfs-debug-tree: ) prealloc data disk byte 1536292564992 nr 397312 prealloc data disk byte 1536292962304 nr 196608 prealloc data disk byte 1536293158912 nr 98304 prealloc data disk byte 1536293257216 nr 49152 prealloc data disk byte 1536293306368 nr 24576 prealloc data disk byte 1536293330944 nr 12288 prealloc data disk byte 1536293343232 nr 8192 prealloc data disk byte 1536293351424 nr 4096 prealloc data disk byte 1536293355520 nr 4096 prealloc data disk byte 1536293359616 nr 4096 The excessive cpu use comes from __btrfs_prealloc_file_range() trying to allocate the entire remaining size after each extent is allocated. btrfs_reserve_extent() repeatedly cuts this requested size in half until it gets down to the size that the allocators can return. We limit the problem for now by capping each reservation at 256 meg. The small extents come from a masking bug when decreasing the requested reservation size. The high 32bits are cleared and the remaining low bits might happen to reserve a small size. Fix this by using round_down() which properly casts the mask. After these fixes huge fallocate requests are fast and result in nice large extents: $ time fallocate -l 1T file real 0m0.082s prealloc data disk byte 1112425889792 nr 268435456 prealloc data disk byte 1112694325248 nr 268435456 prealloc data disk byte 1112962760704 nr 268435456 Reported-by: Eric Sandeen <sandeen@redhat.com> Signed-off-by: Zach Brown <zab@redhat.com> Signed-off-by: Chris Mason <chris.mason@fusionio.com>
-rw-r--r--fs/btrfs/extent-tree.c2
-rw-r--r--fs/btrfs/inode.c5
2 files changed, 4 insertions, 3 deletions
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index b3ecca447ddf..d2b3a5e9a621 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -6143,7 +6143,7 @@ again:
6143 if (ret == -ENOSPC) { 6143 if (ret == -ENOSPC) {
6144 if (!final_tried) { 6144 if (!final_tried) {
6145 num_bytes = num_bytes >> 1; 6145 num_bytes = num_bytes >> 1;
6146 num_bytes = num_bytes & ~(root->sectorsize - 1); 6146 num_bytes = round_down(num_bytes, root->sectorsize);
6147 num_bytes = max(num_bytes, min_alloc_size); 6147 num_bytes = max(num_bytes, min_alloc_size);
6148 if (num_bytes == min_alloc_size) 6148 if (num_bytes == min_alloc_size)
6149 final_tried = true; 6149 final_tried = true;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 4e6a11c2cfdd..3bc62b181ef8 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -7894,8 +7894,9 @@ static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
7894 } 7894 }
7895 } 7895 }
7896 7896
7897 ret = btrfs_reserve_extent(trans, root, num_bytes, min_size, 7897 ret = btrfs_reserve_extent(trans, root,
7898 0, *alloc_hint, &ins, 1); 7898 min(num_bytes, 256ULL * 1024 * 1024),
7899 min_size, 0, *alloc_hint, &ins, 1);
7899 if (ret) { 7900 if (ret) {
7900 if (own_trans) 7901 if (own_trans)
7901 btrfs_end_transaction(trans, root); 7902 btrfs_end_transaction(trans, root);