aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
authorChris Mason <chris.mason@oracle.com>2010-05-25 20:56:50 -0400
committerChris Mason <chris.mason@oracle.com>2010-05-25 21:52:08 -0400
commit4845e44ffdb26be9b25610664228e8ecaf949a0d (patch)
tree8852e175b6b02a36df6b47c54d574f3365ddb34f /fs
parenteaf25d933e64c2bf3c79b83e8820404f36fdfc52 (diff)
Btrfs: rework O_DIRECT enospc handling
This changes O_DIRECT write code to mark extents as delalloc while it is processing them. Yan Zheng has reworked the enospc accounting based on tracking delalloc extents and this makes it much easier to track enospc in the O_DIRECT code. There are a few space cases with the O_DIRECT code though, it only sets the EXTENT_DELALLOC bits, instead of doing EXTENT_DELALLOC | EXTENT_DIRTY | EXTENT_UPTODATE, because we don't want to mess with clearing the dirty and uptodate bits when things go wrong. This is important because there are no pages in the page cache, so any extent state structs that we put in the tree won't get freed by releasepage. We have to clear them ourselves as the DIO ends. With this commit, we reserve space at in btrfs_file_aio_write, and then as each btrfs_direct_IO call progresses it sets EXTENT_DELALLOC on the range. btrfs_get_blocks_direct is responsible for clearing the delalloc at the same time it drops the extent lock. Signed-off-by: Chris Mason <chris.mason@oracle.com>
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/extent_io.c9
-rw-r--r--fs/btrfs/extent_io.h4
-rw-r--r--fs/btrfs/file.c14
-rw-r--r--fs/btrfs/inode.c52
4 files changed, 49 insertions, 30 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 15392af21bf..a4080c21ec5 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -135,7 +135,7 @@ static struct extent_state *alloc_extent_state(gfp_t mask)
135 return state; 135 return state;
136} 136}
137 137
138static void free_extent_state(struct extent_state *state) 138void free_extent_state(struct extent_state *state)
139{ 139{
140 if (!state) 140 if (!state)
141 return; 141 return;
@@ -745,10 +745,9 @@ static void cache_state(struct extent_state *state,
745 * [start, end] is inclusive This takes the tree lock. 745 * [start, end] is inclusive This takes the tree lock.
746 */ 746 */
747 747
748static int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, 748int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
749 int bits, int exclusive_bits, u64 *failed_start, 749 int bits, int exclusive_bits, u64 *failed_start,
750 struct extent_state **cached_state, 750 struct extent_state **cached_state, gfp_t mask)
751 gfp_t mask)
752{ 751{
753 struct extent_state *state; 752 struct extent_state *state;
754 struct extent_state *prealloc = NULL; 753 struct extent_state *prealloc = NULL;
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 86c7b341d07..5691c7b590d 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -178,6 +178,7 @@ u64 count_range_bits(struct extent_io_tree *tree,
178 u64 *start, u64 search_end, 178 u64 *start, u64 search_end,
179 u64 max_bytes, unsigned long bits); 179 u64 max_bytes, unsigned long bits);
180 180
181void free_extent_state(struct extent_state *state);
181int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end, 182int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
182 int bits, int filled, struct extent_state *cached_state); 183 int bits, int filled, struct extent_state *cached_state);
183int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 184int clear_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
@@ -187,6 +188,9 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
187 gfp_t mask); 188 gfp_t mask);
188int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, 189int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end,
189 int bits, gfp_t mask); 190 int bits, gfp_t mask);
191int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end,
192 int bits, int exclusive_bits, u64 *failed_start,
193 struct extent_state **cached_state, gfp_t mask);
190int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end, 194int set_extent_uptodate(struct extent_io_tree *tree, u64 start, u64 end,
191 gfp_t mask); 195 gfp_t mask);
192int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end, 196int set_extent_new(struct extent_io_tree *tree, u64 start, u64 end,
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 233aea2e5ef..54556cae449 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -909,13 +909,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
909 } 909 }
910 910
911 if (num_written < 0) { 911 if (num_written < 0) {
912 if (num_written != -EIOCBQUEUED) {
913 /*
914 * aio land will take care of releasing the
915 * delalloc
916 */
917 btrfs_delalloc_release_space(inode, count);
918 }
919 ret = num_written; 912 ret = num_written;
920 num_written = 0; 913 num_written = 0;
921 goto out; 914 goto out;
@@ -924,13 +917,6 @@ static ssize_t btrfs_file_aio_write(struct kiocb *iocb,
924 pos = *ppos; 917 pos = *ppos;
925 goto out; 918 goto out;
926 } 919 }
927
928 /*
929 * the buffered IO will reserve bytes for the rest of the
930 * range, don't double count them here
931 */
932 btrfs_delalloc_release_space(inode, count - num_written);
933
934 /* 920 /*
935 * We are going to do buffered for the rest of the range, so we 921 * We are going to do buffered for the rest of the range, so we
936 * need to make sure to invalidate the buffered pages when we're 922 * need to make sure to invalidate the buffered pages when we're
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 13a4aa22286..00aefbdcc2d 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -5327,8 +5327,9 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
5327 return PTR_ERR(em); 5327 return PTR_ERR(em);
5328 len = min(len, em->block_len); 5328 len = min(len, em->block_len);
5329 } 5329 }
5330 unlock_extent(&BTRFS_I(inode)->io_tree, start, start + len - 1, 5330 clear_extent_bit(&BTRFS_I(inode)->io_tree, start, start + len - 1,
5331 GFP_NOFS); 5331 EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DIRTY, 1,
5332 0, NULL, GFP_NOFS);
5332map: 5333map:
5333 bh_result->b_blocknr = (em->block_start + (start - em->start)) >> 5334 bh_result->b_blocknr = (em->block_start + (start - em->start)) >>
5334 inode->i_blkbits; 5335 inode->i_blkbits;
@@ -5596,14 +5597,18 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
5596 struct file *file = iocb->ki_filp; 5597 struct file *file = iocb->ki_filp;
5597 struct inode *inode = file->f_mapping->host; 5598 struct inode *inode = file->f_mapping->host;
5598 struct btrfs_ordered_extent *ordered; 5599 struct btrfs_ordered_extent *ordered;
5600 struct extent_state *cached_state = NULL;
5599 u64 lockstart, lockend; 5601 u64 lockstart, lockend;
5600 ssize_t ret; 5602 ssize_t ret;
5603 int writing = rw & WRITE;
5604 int write_bits = 0;
5601 5605
5602 lockstart = offset; 5606 lockstart = offset;
5603 lockend = offset + iov_length(iov, nr_segs) - 1; 5607 lockend = offset + iov_length(iov, nr_segs) - 1;
5608
5604 while (1) { 5609 while (1) {
5605 lock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend, 5610 lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
5606 GFP_NOFS); 5611 0, &cached_state, GFP_NOFS);
5607 /* 5612 /*
5608 * We're concerned with the entire range that we're going to be 5613 * We're concerned with the entire range that we're going to be
5609 * doing DIO to, so we need to make sure theres no ordered 5614 * doing DIO to, so we need to make sure theres no ordered
@@ -5613,29 +5618,54 @@ static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
5613 lockend - lockstart + 1); 5618 lockend - lockstart + 1);
5614 if (!ordered) 5619 if (!ordered)
5615 break; 5620 break;
5616 unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend, 5621 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
5617 GFP_NOFS); 5622 &cached_state, GFP_NOFS);
5618 btrfs_start_ordered_extent(inode, ordered, 1); 5623 btrfs_start_ordered_extent(inode, ordered, 1);
5619 btrfs_put_ordered_extent(ordered); 5624 btrfs_put_ordered_extent(ordered);
5620 cond_resched(); 5625 cond_resched();
5621 } 5626 }
5622 5627
5628 /*
5629 * we don't use btrfs_set_extent_delalloc because we don't want
5630 * the dirty or uptodate bits
5631 */
5632 if (writing) {
5633 write_bits = EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING;
5634 ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
5635 EXTENT_DELALLOC, 0, NULL, &cached_state,
5636 GFP_NOFS);
5637 if (ret) {
5638 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
5639 lockend, EXTENT_LOCKED | write_bits,
5640 1, 0, &cached_state, GFP_NOFS);
5641 goto out;
5642 }
5643 }
5644
5645 free_extent_state(cached_state);
5646 cached_state = NULL;
5647
5623 ret = __blockdev_direct_IO(rw, iocb, inode, NULL, iov, offset, nr_segs, 5648 ret = __blockdev_direct_IO(rw, iocb, inode, NULL, iov, offset, nr_segs,
5624 btrfs_get_blocks_direct, NULL, 5649 btrfs_get_blocks_direct, NULL,
5625 btrfs_submit_direct, 0); 5650 btrfs_submit_direct, 0);
5626 5651
5627 if (ret < 0 && ret != -EIOCBQUEUED) { 5652 if (ret < 0 && ret != -EIOCBQUEUED) {
5628 unlock_extent(&BTRFS_I(inode)->io_tree, offset, 5653 clear_extent_bit(&BTRFS_I(inode)->io_tree, offset,
5629 offset + iov_length(iov, nr_segs) - 1, GFP_NOFS); 5654 offset + iov_length(iov, nr_segs) - 1,
5655 EXTENT_LOCKED | write_bits, 1, 0,
5656 &cached_state, GFP_NOFS);
5630 } else if (ret >= 0 && ret < iov_length(iov, nr_segs)) { 5657 } else if (ret >= 0 && ret < iov_length(iov, nr_segs)) {
5631 /* 5658 /*
5632 * We're falling back to buffered, unlock the section we didn't 5659 * We're falling back to buffered, unlock the section we didn't
5633 * do IO on. 5660 * do IO on.
5634 */ 5661 */
5635 unlock_extent(&BTRFS_I(inode)->io_tree, offset + ret, 5662 clear_extent_bit(&BTRFS_I(inode)->io_tree, offset + ret,
5636 offset + iov_length(iov, nr_segs) - 1, GFP_NOFS); 5663 offset + iov_length(iov, nr_segs) - 1,
5664 EXTENT_LOCKED | write_bits, 1, 0,
5665 &cached_state, GFP_NOFS);
5637 } 5666 }
5638 5667out:
5668 free_extent_state(cached_state);
5639 return ret; 5669 return ret;
5640} 5670}
5641 5671