aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/inode.c256
1 files changed, 127 insertions, 129 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index dac1fc21d809..09182449cbdf 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -5773,18 +5773,109 @@ out:
5773 return ret; 5773 return ret;
5774} 5774}
5775 5775
5776static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
5777 struct extent_state **cached_state, int writing)
5778{
5779 struct btrfs_ordered_extent *ordered;
5780 int ret = 0;
5781
5782 while (1) {
5783 lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
5784 0, cached_state);
5785 /*
5786 * We're concerned with the entire range that we're going to be
5787 * doing DIO to, so we need to make sure theres no ordered
5788 * extents in this range.
5789 */
5790 ordered = btrfs_lookup_ordered_range(inode, lockstart,
5791 lockend - lockstart + 1);
5792
5793 /*
5794 * We need to make sure there are no buffered pages in this
5795 * range either, we could have raced between the invalidate in
5796 * generic_file_direct_write and locking the extent. The
5797 * invalidate needs to happen so that reads after a write do not
5798 * get stale data.
5799 */
5800 if (!ordered && (!writing ||
5801 !test_range_bit(&BTRFS_I(inode)->io_tree,
5802 lockstart, lockend, EXTENT_UPTODATE, 0,
5803 *cached_state)))
5804 break;
5805
5806 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
5807 cached_state, GFP_NOFS);
5808
5809 if (ordered) {
5810 btrfs_start_ordered_extent(inode, ordered, 1);
5811 btrfs_put_ordered_extent(ordered);
5812 } else {
5813 /* Screw you mmap */
5814 ret = filemap_write_and_wait_range(inode->i_mapping,
5815 lockstart,
5816 lockend);
5817 if (ret)
5818 break;
5819
5820 /*
5821 * If we found a page that couldn't be invalidated just
5822 * fall back to buffered.
5823 */
5824 ret = invalidate_inode_pages2_range(inode->i_mapping,
5825 lockstart >> PAGE_CACHE_SHIFT,
5826 lockend >> PAGE_CACHE_SHIFT);
5827 if (ret)
5828 break;
5829 }
5830
5831 cond_resched();
5832 }
5833
5834 return ret;
5835}
5836
5776static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, 5837static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
5777 struct buffer_head *bh_result, int create) 5838 struct buffer_head *bh_result, int create)
5778{ 5839{
5779 struct extent_map *em; 5840 struct extent_map *em;
5780 struct btrfs_root *root = BTRFS_I(inode)->root; 5841 struct btrfs_root *root = BTRFS_I(inode)->root;
5842 struct extent_state *cached_state = NULL;
5781 u64 start = iblock << inode->i_blkbits; 5843 u64 start = iblock << inode->i_blkbits;
5844 u64 lockstart, lockend;
5782 u64 len = bh_result->b_size; 5845 u64 len = bh_result->b_size;
5783 struct btrfs_trans_handle *trans; 5846 struct btrfs_trans_handle *trans;
5847 int unlock_bits = EXTENT_LOCKED;
5848 int ret;
5849
5850 lockstart = start;
5851 lockend = start + len - 1;
5852 if (create) {
5853 ret = btrfs_delalloc_reserve_space(inode, len);
5854 if (ret)
5855 return ret;
5856 unlock_bits |= EXTENT_DELALLOC | EXTENT_DIRTY;
5857 }
5858
5859 /*
5860 * If this errors out it's because we couldn't invalidate pagecache for
5861 * this range and we need to fallback to buffered.
5862 */
5863 if (lock_extent_direct(inode, lockstart, lockend, &cached_state, create))
5864 return -ENOTBLK;
5865
5866 if (create) {
5867 ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
5868 lockend, EXTENT_DELALLOC, NULL,
5869 &cached_state, GFP_NOFS);
5870 if (ret)
5871 goto unlock_err;
5872 }
5784 5873
5785 em = btrfs_get_extent(inode, NULL, 0, start, len, 0); 5874 em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
5786 if (IS_ERR(em)) 5875 if (IS_ERR(em)) {
5787 return PTR_ERR(em); 5876 ret = PTR_ERR(em);
5877 goto unlock_err;
5878 }
5788 5879
5789 /* 5880 /*
5790 * Ok for INLINE and COMPRESSED extents we need to fallback on buffered 5881 * Ok for INLINE and COMPRESSED extents we need to fallback on buffered
@@ -5803,17 +5894,16 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
5803 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) || 5894 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) ||
5804 em->block_start == EXTENT_MAP_INLINE) { 5895 em->block_start == EXTENT_MAP_INLINE) {
5805 free_extent_map(em); 5896 free_extent_map(em);
5806 return -ENOTBLK; 5897 ret = -ENOTBLK;
5898 goto unlock_err;
5807 } 5899 }
5808 5900
5809 /* Just a good old fashioned hole, return */ 5901 /* Just a good old fashioned hole, return */
5810 if (!create && (em->block_start == EXTENT_MAP_HOLE || 5902 if (!create && (em->block_start == EXTENT_MAP_HOLE ||
5811 test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { 5903 test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
5812 free_extent_map(em); 5904 free_extent_map(em);
5813 /* DIO will do one hole at a time, so just unlock a sector */ 5905 ret = 0;
5814 unlock_extent(&BTRFS_I(inode)->io_tree, start, 5906 goto unlock_err;
5815 start + root->sectorsize - 1);
5816 return 0;
5817 } 5907 }
5818 5908
5819 /* 5909 /*
@@ -5826,8 +5916,9 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
5826 * 5916 *
5827 */ 5917 */
5828 if (!create) { 5918 if (!create) {
5829 len = em->len - (start - em->start); 5919 len = min(len, em->len - (start - em->start));
5830 goto map; 5920 lockstart = start + len;
5921 goto unlock;
5831 } 5922 }
5832 5923
5833 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) || 5924 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) ||
@@ -5859,7 +5950,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
5859 btrfs_end_transaction(trans, root); 5950 btrfs_end_transaction(trans, root);
5860 if (ret) { 5951 if (ret) {
5861 free_extent_map(em); 5952 free_extent_map(em);
5862 return ret; 5953 goto unlock_err;
5863 } 5954 }
5864 goto unlock; 5955 goto unlock;
5865 } 5956 }
@@ -5872,14 +5963,12 @@ must_cow:
5872 */ 5963 */
5873 len = bh_result->b_size; 5964 len = bh_result->b_size;
5874 em = btrfs_new_extent_direct(inode, em, start, len); 5965 em = btrfs_new_extent_direct(inode, em, start, len);
5875 if (IS_ERR(em)) 5966 if (IS_ERR(em)) {
5876 return PTR_ERR(em); 5967 ret = PTR_ERR(em);
5968 goto unlock_err;
5969 }
5877 len = min(len, em->len - (start - em->start)); 5970 len = min(len, em->len - (start - em->start));
5878unlock: 5971unlock:
5879 clear_extent_bit(&BTRFS_I(inode)->io_tree, start, start + len - 1,
5880 EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DIRTY, 1,
5881 0, NULL, GFP_NOFS);
5882map:
5883 bh_result->b_blocknr = (em->block_start + (start - em->start)) >> 5972 bh_result->b_blocknr = (em->block_start + (start - em->start)) >>
5884 inode->i_blkbits; 5973 inode->i_blkbits;
5885 bh_result->b_size = len; 5974 bh_result->b_size = len;
@@ -5897,9 +5986,28 @@ map:
5897 i_size_write(inode, start + len); 5986 i_size_write(inode, start + len);
5898 } 5987 }
5899 5988
5989 /*
5990 * In the case of write we need to clear and unlock the entire range,
5991 * in the case of read we need to unlock only the end area that we
5992 * aren't using if there is any left over space.
5993 */
5994 if (lockstart < lockend)
5995 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
5996 unlock_bits, 1, 0, &cached_state, GFP_NOFS);
5997 else
5998 free_extent_state(cached_state);
5999
5900 free_extent_map(em); 6000 free_extent_map(em);
5901 6001
5902 return 0; 6002 return 0;
6003
6004unlock_err:
6005 if (create)
6006 unlock_bits |= EXTENT_DO_ACCOUNTING;
6007
6008 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
6009 unlock_bits, 1, 0, &cached_state, GFP_NOFS);
6010 return ret;
5903} 6011}
5904 6012
5905struct btrfs_dio_private { 6013struct btrfs_dio_private {
@@ -6340,132 +6448,22 @@ static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *io
6340out: 6448out:
6341 return retval; 6449 return retval;
6342} 6450}
6451
6343static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, 6452static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
6344 const struct iovec *iov, loff_t offset, 6453 const struct iovec *iov, loff_t offset,
6345 unsigned long nr_segs) 6454 unsigned long nr_segs)
6346{ 6455{
6347 struct file *file = iocb->ki_filp; 6456 struct file *file = iocb->ki_filp;
6348 struct inode *inode = file->f_mapping->host; 6457 struct inode *inode = file->f_mapping->host;
6349 struct btrfs_ordered_extent *ordered;
6350 struct extent_state *cached_state = NULL;
6351 u64 lockstart, lockend;
6352 ssize_t ret;
6353 int writing = rw & WRITE;
6354 int write_bits = 0;
6355 size_t count = iov_length(iov, nr_segs);
6356 6458
6357 if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov, 6459 if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov,
6358 offset, nr_segs)) { 6460 offset, nr_segs))
6359 return 0; 6461 return 0;
6360 }
6361
6362 lockstart = offset;
6363 lockend = offset + count - 1;
6364
6365 if (writing) {
6366 ret = btrfs_delalloc_reserve_space(inode, count);
6367 if (ret)
6368 goto out;
6369 }
6370 6462
6371 while (1) { 6463 return __blockdev_direct_IO(rw, iocb, inode,
6372 lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
6373 0, &cached_state);
6374 /*
6375 * We're concerned with the entire range that we're going to be
6376 * doing DIO to, so we need to make sure theres no ordered
6377 * extents in this range.
6378 */
6379 ordered = btrfs_lookup_ordered_range(inode, lockstart,
6380 lockend - lockstart + 1);
6381
6382 /*
6383 * We need to make sure there are no buffered pages in this
6384 * range either, we could have raced between the invalidate in
6385 * generic_file_direct_write and locking the extent. The
6386 * invalidate needs to happen so that reads after a write do not
6387 * get stale data.
6388 */
6389 if (!ordered && (!writing ||
6390 !test_range_bit(&BTRFS_I(inode)->io_tree,
6391 lockstart, lockend, EXTENT_UPTODATE, 0,
6392 cached_state)))
6393 break;
6394
6395 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
6396 &cached_state, GFP_NOFS);
6397
6398 if (ordered) {
6399 btrfs_start_ordered_extent(inode, ordered, 1);
6400 btrfs_put_ordered_extent(ordered);
6401 } else {
6402 /* Screw you mmap */
6403 ret = filemap_write_and_wait_range(file->f_mapping,
6404 lockstart,
6405 lockend);
6406 if (ret)
6407 goto out;
6408
6409 /*
6410 * If we found a page that couldn't be invalidated just
6411 * fall back to buffered.
6412 */
6413 ret = invalidate_inode_pages2_range(file->f_mapping,
6414 lockstart >> PAGE_CACHE_SHIFT,
6415 lockend >> PAGE_CACHE_SHIFT);
6416 if (ret) {
6417 if (ret == -EBUSY)
6418 ret = 0;
6419 goto out;
6420 }
6421 }
6422
6423 cond_resched();
6424 }
6425
6426 /*
6427 * we don't use btrfs_set_extent_delalloc because we don't want
6428 * the dirty or uptodate bits
6429 */
6430 if (writing) {
6431 write_bits = EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING;
6432 ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
6433 EXTENT_DELALLOC, NULL, &cached_state,
6434 GFP_NOFS);
6435 if (ret) {
6436 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
6437 lockend, EXTENT_LOCKED | write_bits,
6438 1, 0, &cached_state, GFP_NOFS);
6439 goto out;
6440 }
6441 }
6442
6443 free_extent_state(cached_state);
6444 cached_state = NULL;
6445
6446 ret = __blockdev_direct_IO(rw, iocb, inode,
6447 BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev, 6464 BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev,
6448 iov, offset, nr_segs, btrfs_get_blocks_direct, NULL, 6465 iov, offset, nr_segs, btrfs_get_blocks_direct, NULL,
6449 btrfs_submit_direct, 0); 6466 btrfs_submit_direct, 0);
6450
6451 if (ret < 0 && ret != -EIOCBQUEUED) {
6452 clear_extent_bit(&BTRFS_I(inode)->io_tree, offset,
6453 offset + iov_length(iov, nr_segs) - 1,
6454 EXTENT_LOCKED | write_bits, 1, 0,
6455 &cached_state, GFP_NOFS);
6456 } else if (ret >= 0 && ret < iov_length(iov, nr_segs)) {
6457 /*
6458 * We're falling back to buffered, unlock the section we didn't
6459 * do IO on.
6460 */
6461 clear_extent_bit(&BTRFS_I(inode)->io_tree, offset + ret,
6462 offset + iov_length(iov, nr_segs) - 1,
6463 EXTENT_LOCKED | write_bits, 1, 0,
6464 &cached_state, GFP_NOFS);
6465 }
6466out:
6467 free_extent_state(cached_state);
6468 return ret;
6469} 6467}
6470 6468
6471static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 6469static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,