diff options
author | Chris Mason <chris.mason@oracle.com> | 2010-05-25 20:56:50 -0400 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2010-05-25 21:52:08 -0400 |
commit | 4845e44ffdb26be9b25610664228e8ecaf949a0d (patch) | |
tree | 8852e175b6b02a36df6b47c54d574f3365ddb34f /fs | |
parent | eaf25d933e64c2bf3c79b83e8820404f36fdfc52 (diff) |
Btrfs: rework O_DIRECT enospc handling
This changes O_DIRECT write code to mark extents as delalloc
while it is processing them. Yan Zheng has reworked the
enospc accounting based on tracking delalloc extents and
this makes it much easier to track enospc in the O_DIRECT code.
There are a few space cases with the O_DIRECT code though,
it only sets the EXTENT_DELALLOC bits, instead of doing
EXTENT_DELALLOC | EXTENT_DIRTY | EXTENT_UPTODATE, because
we don't want to mess with clearing the dirty and uptodate
bits when things go wrong. This is important because there
are no pages in the page cache, so any extent state structs
that we put in the tree won't get freed by releasepage. We have
to clear them ourselves as the DIO ends.
With this commit, we reserve space at in btrfs_file_aio_write,
and then as each btrfs_direct_IO call progresses it sets
EXTENT_DELALLOC on the range.
btrfs_get_blocks_direct is responsible for clearing the delalloc
at the same time it drops the extent lock.
Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/btrfs/extent_io.c | 9 | ||||
-rw-r--r-- | fs/btrfs/extent_io.h | 4 | ||||
-rw-r--r-- | fs/btrfs/file.c | 14 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 52 |
4 files changed, 49 insertions, 30 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 15392af21bf..a4080c21ec5 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c | |||
@@ -135,7 +135,7 @@ static struct extent_state *alloc_extent_state(gfp_t mask) | |||
135 | return state; | 135 | return state; |
136 | } | 136 | } |
137 | 137 | ||
138 | static void free_extent_state(struct extent_state *state) | 138 | void free_extent_state(struct extent_state *state) |
139 | { | 139 | { |
140 | if (!state) | 140 | if (!state) |
141 | return; | 141 | return; |
@@ -745,10 +745,9 @@ static void cache_state(struct extent_state *state, | |||
745 | * [start, end] is inclusive This takes the tree lock. | 745 | * [start, end] is inclusive This takes the tree lock. |
746 | */ | 746 | */ |
747 | 747 | ||
748 | static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | 748 | int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, |
749 | int bits, int exclusive_bits, u64 *failed_start, | 749 | int bits, int exclusive_bits, u64 *failed_start, |
750 | struct extent_state **cached_state, | 750 | struct extent_state **cached_state, gfp_t mask) |
751 | gfp_t mask) | ||
752 | { | 751 | { |
753 | struct extent_state *state; | 752 | struct extent_state *state; |
754 | struct extent_state *prealloc = NULL; | 753 | struct extent_state *prealloc = NULL; |
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 86c7b341d07..5691c7b590d 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h | |||
@@ -178,6 +178,7 @@ u64 count_range_bits(struct extent_io_tree *tree, | |||
178 | u64 *start, u64 search_end, | 178 | u64 *start, u64 search_end, |
179 | u64 max_bytes, unsigned long bits); | 179 | u64 max_bytes, unsigned long bits); |
180 | 180 | ||
181 | void free_extent_state(struct extent_state *state); | ||
181 | int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, | 182 | int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, |
182 | int bits, int filled, struct extent_state *cached_state); | 183 | int bits, int filled, struct extent_state *cached_state); |
183 | int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, | 184 | int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, |
@@ -187,6 +188,9 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | |||
187 | gfp_t mask); | 188 | gfp_t mask); |
188 | int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, | 189 | int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, |
189 | int bits, gfp_t mask); | 190 | int bits, gfp_t mask); |
191 | int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, | ||
192 | int bits, int exclusive_bits, u64 *failed_start, | ||
193 | struct extent_state **cached_state, gfp_t mask); | ||
190 | int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, | 194 | int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, |
191 | gfp_t mask); | 195 | gfp_t mask); |
192 | int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, | 196 | int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 233aea2e5ef..54556cae449 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -909,13 +909,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | |||
909 | } | 909 | } |
910 | 910 | ||
911 | if (num_written < 0) { | 911 | if (num_written < 0) { |
912 | if (num_written != -EIOCBQUEUED) { | ||
913 | /* | ||
914 | * aio land will take care of releasing the | ||
915 | * delalloc | ||
916 | */ | ||
917 | btrfs_delalloc_release_space(inode, count); | ||
918 | } | ||
919 | ret = num_written; | 912 | ret = num_written; |
920 | num_written = 0; | 913 | num_written = 0; |
921 | goto out; | 914 | goto out; |
@@ -924,13 +917,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb, | |||
924 | pos = *ppos; | 917 | pos = *ppos; |
925 | goto out; | 918 | goto out; |
926 | } | 919 | } |
927 | |||
928 | /* | ||
929 | * the buffered IO will reserve bytes for the rest of the | ||
930 | * range, don't double count them here | ||
931 | */ | ||
932 | btrfs_delalloc_release_space(inode, count - num_written); | ||
933 | |||
934 | /* | 920 | /* |
935 | * We are going to do buffered for the rest of the range, so we | 921 | * We are going to do buffered for the rest of the range, so we |
936 | * need to make sure to invalidate the buffered pages when we're | 922 | * need to make sure to invalidate the buffered pages when we're |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 13a4aa22286..00aefbdcc2d 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -5327,8 +5327,9 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, | |||
5327 | return PTR_ERR(em); | 5327 | return PTR_ERR(em); |
5328 | len = min(len, em->block_len); | 5328 | len = min(len, em->block_len); |
5329 | } | 5329 | } |
5330 | unlock_extent(&BTRFS_I(inode)->io_tree, start, start + len - 1, | 5330 | clear_extent_bit(&BTRFS_I(inode)->io_tree, start, start + len - 1, |
5331 | GFP_NOFS); | 5331 | EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DIRTY, 1, |
5332 | 0, NULL, GFP_NOFS); | ||
5332 | map: | 5333 | map: |
5333 | bh_result->b_blocknr = (em->block_start + (start - em->start)) >> | 5334 | bh_result->b_blocknr = (em->block_start + (start - em->start)) >> |
5334 | inode->i_blkbits; | 5335 | inode->i_blkbits; |
@@ -5596,14 +5597,18 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, | |||
5596 | struct file *file = iocb->ki_filp; | 5597 | struct file *file = iocb->ki_filp; |
5597 | struct inode *inode = file->f_mapping->host; | 5598 | struct inode *inode = file->f_mapping->host; |
5598 | struct btrfs_ordered_extent *ordered; | 5599 | struct btrfs_ordered_extent *ordered; |
5600 | struct extent_state *cached_state = NULL; | ||
5599 | u64 lockstart, lockend; | 5601 | u64 lockstart, lockend; |
5600 | ssize_t ret; | 5602 | ssize_t ret; |
5603 | int writing = rw & WRITE; | ||
5604 | int write_bits = 0; | ||
5601 | 5605 | ||
5602 | lockstart = offset; | 5606 | lockstart = offset; |
5603 | lockend = offset + iov_length(iov, nr_segs) - 1; | 5607 | lockend = offset + iov_length(iov, nr_segs) - 1; |
5608 | |||
5604 | while (1) { | 5609 | while (1) { |
5605 | lock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend, | 5610 | lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, |
5606 | GFP_NOFS); | 5611 | 0, &cached_state, GFP_NOFS); |
5607 | /* | 5612 | /* |
5608 | * We're concerned with the entire range that we're going to be | 5613 | * We're concerned with the entire range that we're going to be |
5609 | * doing DIO to, so we need to make sure theres no ordered | 5614 | * doing DIO to, so we need to make sure theres no ordered |
@@ -5613,29 +5618,54 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, | |||
5613 | lockend - lockstart + 1); | 5618 | lockend - lockstart + 1); |
5614 | if (!ordered) | 5619 | if (!ordered) |
5615 | break; | 5620 | break; |
5616 | unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend, | 5621 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, |
5617 | GFP_NOFS); | 5622 | &cached_state, GFP_NOFS); |
5618 | btrfs_start_ordered_extent(inode, ordered, 1); | 5623 | btrfs_start_ordered_extent(inode, ordered, 1); |
5619 | btrfs_put_ordered_extent(ordered); | 5624 | btrfs_put_ordered_extent(ordered); |
5620 | cond_resched(); | 5625 | cond_resched(); |
5621 | } | 5626 | } |
5622 | 5627 | ||
5628 | /* | ||
5629 | * we don't use btrfs_set_extent_delalloc because we don't want | ||
5630 | * the dirty or uptodate bits | ||
5631 | */ | ||
5632 | if (writing) { | ||
5633 | write_bits = EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING; | ||
5634 | ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, | ||
5635 | EXTENT_DELALLOC, 0, NULL, &cached_state, | ||
5636 | GFP_NOFS); | ||
5637 | if (ret) { | ||
5638 | clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, | ||
5639 | lockend, EXTENT_LOCKED | write_bits, | ||
5640 | 1, 0, &cached_state, GFP_NOFS); | ||
5641 | goto out; | ||
5642 | } | ||
5643 | } | ||
5644 | |||
5645 | free_extent_state(cached_state); | ||
5646 | cached_state = NULL; | ||
5647 | |||
5623 | ret = __blockdev_direct_IO(rw, iocb, inode, NULL, iov, offset, nr_segs, | 5648 | ret = __blockdev_direct_IO(rw, iocb, inode, NULL, iov, offset, nr_segs, |
5624 | btrfs_get_blocks_direct, NULL, | 5649 | btrfs_get_blocks_direct, NULL, |
5625 | btrfs_submit_direct, 0); | 5650 | btrfs_submit_direct, 0); |
5626 | 5651 | ||
5627 | if (ret < 0 && ret != -EIOCBQUEUED) { | 5652 | if (ret < 0 && ret != -EIOCBQUEUED) { |
5628 | unlock_extent(&BTRFS_I(inode)->io_tree, offset, | 5653 | clear_extent_bit(&BTRFS_I(inode)->io_tree, offset, |
5629 | offset + iov_length(iov, nr_segs) - 1, GFP_NOFS); | 5654 | offset + iov_length(iov, nr_segs) - 1, |
5655 | EXTENT_LOCKED | write_bits, 1, 0, | ||
5656 | &cached_state, GFP_NOFS); | ||
5630 | } else if (ret >= 0 && ret < iov_length(iov, nr_segs)) { | 5657 | } else if (ret >= 0 && ret < iov_length(iov, nr_segs)) { |
5631 | /* | 5658 | /* |
5632 | * We're falling back to buffered, unlock the section we didn't | 5659 | * We're falling back to buffered, unlock the section we didn't |
5633 | * do IO on. | 5660 | * do IO on. |
5634 | */ | 5661 | */ |
5635 | unlock_extent(&BTRFS_I(inode)->io_tree, offset + ret, | 5662 | clear_extent_bit(&BTRFS_I(inode)->io_tree, offset + ret, |
5636 | offset + iov_length(iov, nr_segs) - 1, GFP_NOFS); | 5663 | offset + iov_length(iov, nr_segs) - 1, |
5664 | EXTENT_LOCKED | write_bits, 1, 0, | ||
5665 | &cached_state, GFP_NOFS); | ||
5637 | } | 5666 | } |
5638 | 5667 | out: | |
5668 | free_extent_state(cached_state); | ||
5639 | return ret; | 5669 | return ret; |
5640 | } | 5670 | } |
5641 | 5671 | ||