diff options
author | Josef Bacik <josef@redhat.com> | 2009-09-11 16:12:44 -0400 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2009-09-28 16:29:42 -0400 |
commit | 9ed74f2dba6ebf9f30b80554290bfc73cc3ef083 (patch) | |
tree | 763d58a4a11ceca26dcdaedefb1fd662c4e2fa8b /fs/btrfs/ctree.h | |
parent | c65ddb52dc412c9b67681b1aa16cd1bac8434e24 (diff) |
Btrfs: proper -ENOSPC handling
At the start of a transaction we do a btrfs_reserve_metadata_space() and
specify how many items we plan on modifying. Then once we've done our
modifications and such, just call btrfs_unreserve_metadata_space() for
the same number of items we reserved.
For keeping track of metadata needed for data I've had to add an extent_io op
for when we merge extents. This lets us track space properly when we are doing
sequential writes, so we don't end up reserving way more metadata space than
what we need.
The only place where the metadata space accounting is not done is in the
relocation code. This is because Yan is going to be reworking that code in the
near future, so running btrfs-vol -b could still possibly result in a ENOSPC
related panic. This patch also turns off the metadata_ratio stuff in order to
allow users to more efficiently use their disk space.
This patch makes it so we track how much metadata we need for an inode's
delayed allocation extents by tracking how many extents are currently
waiting for allocation. It introduces two new callbacks for the
extent_io tree's, merge_extent_hook and split_extent_hook. These help
us keep track of when we merge delalloc extents together and split them
up. Reservations are handled prior to any actually dirty'ing occurs,
and then we unreserve after we dirty.
btrfs_unreserve_metadata_for_delalloc() will make the appropriate
unreservations as needed based on the number of reservations we
currently have and the number of extents we currently have. Doing the
reservation outside of doing any of the actual dirty'ing lets us do
things like filemap_flush() the inode to try and force delalloc to
happen, or as a last resort actually start allocation on all delalloc
inodes in the fs. This has survived dbench, fs_mark and an fsx torture
test.
Signed-off-by: Josef Bacik <jbacik@redhat.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs/btrfs/ctree.h')
-rw-r--r-- | fs/btrfs/ctree.h | 23 |
1 files changed, 16 insertions, 7 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 80599b4e42bd..b3959a150c3b 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -675,18 +675,19 @@ struct btrfs_space_info { | |||
675 | current allocations */ | 675 | current allocations */ |
676 | u64 bytes_readonly; /* total bytes that are read only */ | 676 | u64 bytes_readonly; /* total bytes that are read only */ |
677 | u64 bytes_super; /* total bytes reserved for the super blocks */ | 677 | u64 bytes_super; /* total bytes reserved for the super blocks */ |
678 | 678 | u64 bytes_root; /* the number of bytes needed to commit a | |
679 | /* delalloc accounting */ | 679 | transaction */ |
680 | u64 bytes_delalloc; /* number of bytes reserved for allocation, | ||
681 | this space is not necessarily reserved yet | ||
682 | by the allocator */ | ||
683 | u64 bytes_may_use; /* number of bytes that may be used for | 680 | u64 bytes_may_use; /* number of bytes that may be used for |
684 | delalloc */ | 681 | delalloc/allocations */ |
682 | u64 bytes_delalloc; /* number of bytes currently reserved for | ||
683 | delayed allocation */ | ||
685 | 684 | ||
686 | int full; /* indicates that we cannot allocate any more | 685 | int full; /* indicates that we cannot allocate any more |
687 | chunks for this space */ | 686 | chunks for this space */ |
688 | int force_alloc; /* set if we need to force a chunk alloc for | 687 | int force_alloc; /* set if we need to force a chunk alloc for |
689 | this space */ | 688 | this space */ |
689 | int force_delalloc; /* make people start doing filemap_flush until | ||
690 | we're under a threshold */ | ||
690 | 691 | ||
691 | struct list_head list; | 692 | struct list_head list; |
692 | 693 | ||
@@ -695,6 +696,9 @@ struct btrfs_space_info { | |||
695 | spinlock_t lock; | 696 | spinlock_t lock; |
696 | struct rw_semaphore groups_sem; | 697 | struct rw_semaphore groups_sem; |
697 | atomic_t caching_threads; | 698 | atomic_t caching_threads; |
699 | |||
700 | int allocating_chunk; | ||
701 | wait_queue_head_t wait; | ||
698 | }; | 702 | }; |
699 | 703 | ||
700 | /* | 704 | /* |
@@ -2022,7 +2026,12 @@ u64 btrfs_reduce_alloc_profile(struct btrfs_root *root, u64 flags); | |||
2022 | void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde); | 2026 | void btrfs_set_inode_space_info(struct btrfs_root *root, struct inode *ionde); |
2023 | void btrfs_clear_space_info_full(struct btrfs_fs_info *info); | 2027 | void btrfs_clear_space_info_full(struct btrfs_fs_info *info); |
2024 | 2028 | ||
2025 | int btrfs_check_metadata_free_space(struct btrfs_root *root); | 2029 | int btrfs_reserve_metadata_space(struct btrfs_root *root, int num_items); |
2030 | int btrfs_unreserve_metadata_space(struct btrfs_root *root, int num_items); | ||
2031 | int btrfs_unreserve_metadata_for_delalloc(struct btrfs_root *root, | ||
2032 | struct inode *inode, int num_items); | ||
2033 | int btrfs_reserve_metadata_for_delalloc(struct btrfs_root *root, | ||
2034 | struct inode *inode, int num_items); | ||
2026 | int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, | 2035 | int btrfs_check_data_free_space(struct btrfs_root *root, struct inode *inode, |
2027 | u64 bytes); | 2036 | u64 bytes); |
2028 | void btrfs_free_reserved_data_space(struct btrfs_root *root, | 2037 | void btrfs_free_reserved_data_space(struct btrfs_root *root, |