aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/file.c
diff options
context:
space:
mode:
authorJosef Bacik <josef@redhat.com>2009-09-11 16:12:44 -0400
committerChris Mason <chris.mason@oracle.com>2009-09-28 16:29:42 -0400
commit9ed74f2dba6ebf9f30b80554290bfc73cc3ef083 (patch)
tree763d58a4a11ceca26dcdaedefb1fd662c4e2fa8b /fs/btrfs/file.c
parentc65ddb52dc412c9b67681b1aa16cd1bac8434e24 (diff)
Btrfs: proper -ENOSPC handling
At the start of a transaction we do a btrfs_reserve_metadata_space() and specify how many items we plan on modifying. Then once we've done our modifications and such, just call btrfs_unreserve_metadata_space() for the same number of items we reserved. For keeping track of metadata needed for data I've had to add an extent_io op for when we merge extents. This lets us track space properly when we are doing sequential writes, so we don't end up reserving way more metadata space than what we need. The only place where the metadata space accounting is not done is in the relocation code. This is because Yan is going to be reworking that code in the near future, so running btrfs-vol -b could still possibly result in a ENOSPC related panic. This patch also turns off the metadata_ratio stuff in order to allow users to more efficiently use their disk space. This patch makes it so we track how much metadata we need for an inode's delayed allocation extents by tracking how many extents are currently waiting for allocation. It introduces two new callbacks for the extent_io tree's, merge_extent_hook and split_extent_hook. These help us keep track of when we merge delalloc extents together and split them up. Reservations are handled prior to any actually dirty'ing occurs, and then we unreserve after we dirty. btrfs_unreserve_metadata_for_delalloc() will make the appropriate unreservations as needed based on the number of reservations we currently have and the number of extents we currently have. Doing the reservation outside of doing any of the actual dirty'ing lets us do things like filemap_flush() the inode to try and force delalloc to happen, or as a last resort actually start allocation on all delalloc inodes in the fs. This has survived dbench, fs_mark and an fsx torture test. Signed-off-by: Josef Bacik <jbacik@redhat.com> Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/file.c')
-rw-r--r--fs/btrfs/file.c11
1 files changed, 10 insertions, 1 deletions
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 571ad3c13b47..1be96ba6f6bb 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -123,7 +123,10 @@ static noinline int dirty_and_release_pages(struct btrfs_trans_handle *trans,
123 root->sectorsize - 1) & ~((u64)root->sectorsize - 1); 123 root->sectorsize - 1) & ~((u64)root->sectorsize - 1);
124 124
125 end_of_last_block = start_pos + num_bytes - 1; 125 end_of_last_block = start_pos + num_bytes - 1;
126 btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block); 126 err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block);
127 if (err)
128 return err;
129
127 for (i = 0; i < num_pages; i++) { 130 for (i = 0; i < num_pages; i++) {
128 struct page *p = pages[i]; 131 struct page *p = pages[i];
129 SetPageUptodate(p); 132 SetPageUptodate(p);
@@ -927,6 +930,11 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf,
927 err = file_remove_suid(file); 930 err = file_remove_suid(file);
928 if (err) 931 if (err)
929 goto out_nolock; 932 goto out_nolock;
933
934 err = btrfs_reserve_metadata_for_delalloc(root, inode, 1);
935 if (err)
936 goto out_nolock;
937
930 file_update_time(file); 938 file_update_time(file);
931 939
932 pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); 940 pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL);
@@ -1028,6 +1036,7 @@ out:
1028 mutex_unlock(&inode->i_mutex); 1036 mutex_unlock(&inode->i_mutex);
1029 if (ret) 1037 if (ret)
1030 err = ret; 1038 err = ret;
1039 btrfs_unreserve_metadata_for_delalloc(root, inode, 1);
1031 1040
1032out_nolock: 1041out_nolock:
1033 kfree(pages); 1042 kfree(pages);