diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/btrfs/inode.c | 256 |
1 files changed, 127 insertions, 129 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index dac1fc21d809..09182449cbdf 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -5773,18 +5773,109 @@ out: | |||
5773 | return ret; | 5773 | return ret; |
5774 | } | 5774 | } |
5775 | 5775 | ||
5776 | static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend, | ||
5777 | struct extent_state **cached_state, int writing) | ||
5778 | { | ||
5779 | struct btrfs_ordered_extent *ordered; | ||
5780 | int ret = 0; | ||
5781 | |||
5782 | while (1) { | ||
5783 | lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, | ||
5784 | 0, cached_state); | ||
5785 | /* | ||
5786 | * We're concerned with the entire range that we're going to be | ||
5787 | * doing DIO to, so we need to make sure theres no ordered | ||
5788 | * extents in this range. | ||
5789 | */ | ||
5790 | ordered = btrfs_lookup_ordered_range(inode, lockstart, | ||
5791 | lockend - lockstart + 1); | ||
5792 | |||
5793 | /* | ||
5794 | * We need to make sure there are no buffered pages in this | ||
5795 | * range either, we could have raced between the invalidate in | ||
5796 | * generic_file_direct_write and locking the extent. The | ||
5797 | * invalidate needs to happen so that reads after a write do not | ||
5798 | * get stale data. | ||
5799 | */ | ||
5800 | if (!ordered && (!writing || | ||
5801 | !test_range_bit(&BTRFS_I(inode)->io_tree, | ||
5802 | lockstart, lockend, EXTENT_UPTODATE, 0, | ||
5803 | *cached_state))) | ||
5804 | break; | ||
5805 | |||
5806 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, | ||
5807 | cached_state, GFP_NOFS); | ||
5808 | |||
5809 | if (ordered) { | ||
5810 | btrfs_start_ordered_extent(inode, ordered, 1); | ||
5811 | btrfs_put_ordered_extent(ordered); | ||
5812 | } else { | ||
5813 | /* Screw you mmap */ | ||
5814 | ret = filemap_write_and_wait_range(inode->i_mapping, | ||
5815 | lockstart, | ||
5816 | lockend); | ||
5817 | if (ret) | ||
5818 | break; | ||
5819 | |||
5820 | /* | ||
5821 | * If we found a page that couldn't be invalidated just | ||
5822 | * fall back to buffered. | ||
5823 | */ | ||
5824 | ret = invalidate_inode_pages2_range(inode->i_mapping, | ||
5825 | lockstart >> PAGE_CACHE_SHIFT, | ||
5826 | lockend >> PAGE_CACHE_SHIFT); | ||
5827 | if (ret) | ||
5828 | break; | ||
5829 | } | ||
5830 | |||
5831 | cond_resched(); | ||
5832 | } | ||
5833 | |||
5834 | return ret; | ||
5835 | } | ||
5836 | |||
5776 | static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, | 5837 | static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, |
5777 | struct buffer_head *bh_result, int create) | 5838 | struct buffer_head *bh_result, int create) |
5778 | { | 5839 | { |
5779 | struct extent_map *em; | 5840 | struct extent_map *em; |
5780 | struct btrfs_root *root = BTRFS_I(inode)->root; | 5841 | struct btrfs_root *root = BTRFS_I(inode)->root; |
5842 | struct extent_state *cached_state = NULL; | ||
5781 | u64 start = iblock << inode->i_blkbits; | 5843 | u64 start = iblock << inode->i_blkbits; |
5844 | u64 lockstart, lockend; | ||
5782 | u64 len = bh_result->b_size; | 5845 | u64 len = bh_result->b_size; |
5783 | struct btrfs_trans_handle *trans; | 5846 | struct btrfs_trans_handle *trans; |
5847 | int unlock_bits = EXTENT_LOCKED; | ||
5848 | int ret; | ||
5849 | |||
5850 | lockstart = start; | ||
5851 | lockend = start + len - 1; | ||
5852 | if (create) { | ||
5853 | ret = btrfs_delalloc_reserve_space(inode, len); | ||
5854 | if (ret) | ||
5855 | return ret; | ||
5856 | unlock_bits |= EXTENT_DELALLOC | EXTENT_DIRTY; | ||
5857 | } | ||
5858 | |||
5859 | /* | ||
5860 | * If this errors out it's because we couldn't invalidate pagecache for | ||
5861 | * this range and we need to fallback to buffered. | ||
5862 | */ | ||
5863 | if (lock_extent_direct(inode, lockstart, lockend, &cached_state, create)) | ||
5864 | return -ENOTBLK; | ||
5865 | |||
5866 | if (create) { | ||
5867 | ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, | ||
5868 | lockend, EXTENT_DELALLOC, NULL, | ||
5869 | &cached_state, GFP_NOFS); | ||
5870 | if (ret) | ||
5871 | goto unlock_err; | ||
5872 | } | ||
5784 | 5873 | ||
5785 | em = btrfs_get_extent(inode, NULL, 0, start, len, 0); | 5874 | em = btrfs_get_extent(inode, NULL, 0, start, len, 0); |
5786 | if (IS_ERR(em)) | 5875 | if (IS_ERR(em)) { |
5787 | return PTR_ERR(em); | 5876 | ret = PTR_ERR(em); |
5877 | goto unlock_err; | ||
5878 | } | ||
5788 | 5879 | ||
5789 | /* | 5880 | /* |
5790 | * Ok for INLINE and COMPRESSED extents we need to fallback on buffered | 5881 | * Ok for INLINE and COMPRESSED extents we need to fallback on buffered |
@@ -5803,17 +5894,16 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, | |||
5803 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) || | 5894 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) || |
5804 | em->block_start == EXTENT_MAP_INLINE) { | 5895 | em->block_start == EXTENT_MAP_INLINE) { |
5805 | free_extent_map(em); | 5896 | free_extent_map(em); |
5806 | return -ENOTBLK; | 5897 | ret = -ENOTBLK; |
5898 | goto unlock_err; | ||
5807 | } | 5899 | } |
5808 | 5900 | ||
5809 | /* Just a good old fashioned hole, return */ | 5901 | /* Just a good old fashioned hole, return */ |
5810 | if (!create && (em->block_start == EXTENT_MAP_HOLE || | 5902 | if (!create && (em->block_start == EXTENT_MAP_HOLE || |
5811 | test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { | 5903 | test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { |
5812 | free_extent_map(em); | 5904 | free_extent_map(em); |
5813 | /* DIO will do one hole at a time, so just unlock a sector */ | 5905 | ret = 0; |
5814 | unlock_extent(&BTRFS_I(inode)->io_tree, start, | 5906 | goto unlock_err; |
5815 | start + root->sectorsize - 1); | ||
5816 | return 0; | ||
5817 | } | 5907 | } |
5818 | 5908 | ||
5819 | /* | 5909 | /* |
@@ -5826,8 +5916,9 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, | |||
5826 | * | 5916 | * |
5827 | */ | 5917 | */ |
5828 | if (!create) { | 5918 | if (!create) { |
5829 | len = em->len - (start - em->start); | 5919 | len = min(len, em->len - (start - em->start)); |
5830 | goto map; | 5920 | lockstart = start + len; |
5921 | goto unlock; | ||
5831 | } | 5922 | } |
5832 | 5923 | ||
5833 | if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) || | 5924 | if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) || |
@@ -5859,7 +5950,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, | |||
5859 | btrfs_end_transaction(trans, root); | 5950 | btrfs_end_transaction(trans, root); |
5860 | if (ret) { | 5951 | if (ret) { |
5861 | free_extent_map(em); | 5952 | free_extent_map(em); |
5862 | return ret; | 5953 | goto unlock_err; |
5863 | } | 5954 | } |
5864 | goto unlock; | 5955 | goto unlock; |
5865 | } | 5956 | } |
@@ -5872,14 +5963,12 @@ must_cow: | |||
5872 | */ | 5963 | */ |
5873 | len = bh_result->b_size; | 5964 | len = bh_result->b_size; |
5874 | em = btrfs_new_extent_direct(inode, em, start, len); | 5965 | em = btrfs_new_extent_direct(inode, em, start, len); |
5875 | if (IS_ERR(em)) | 5966 | if (IS_ERR(em)) { |
5876 | return PTR_ERR(em); | 5967 | ret = PTR_ERR(em); |
5968 | goto unlock_err; | ||
5969 | } | ||
5877 | len = min(len, em->len - (start - em->start)); | 5970 | len = min(len, em->len - (start - em->start)); |
5878 | unlock: | 5971 | unlock: |
5879 | clear_extent_bit(&BTRFS_I(inode)->io_tree, start, start + len - 1, | ||
5880 | EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DIRTY, 1, | ||
5881 | 0, NULL, GFP_NOFS); | ||
5882 | map: | ||
5883 | bh_result->b_blocknr = (em->block_start + (start - em->start)) >> | 5972 | bh_result->b_blocknr = (em->block_start + (start - em->start)) >> |
5884 | inode->i_blkbits; | 5973 | inode->i_blkbits; |
5885 | bh_result->b_size = len; | 5974 | bh_result->b_size = len; |
@@ -5897,9 +5986,28 @@ map: | |||
5897 | i_size_write(inode, start + len); | 5986 | i_size_write(inode, start + len); |
5898 | } | 5987 | } |
5899 | 5988 | ||
5989 | /* | ||
5990 | * In the case of write we need to clear and unlock the entire range, | ||
5991 | * in the case of read we need to unlock only the end area that we | ||
5992 | * aren't using if there is any left over space. | ||
5993 | */ | ||
5994 | if (lockstart < lockend) | ||
5995 | clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, | ||
5996 | unlock_bits, 1, 0, &cached_state, GFP_NOFS); | ||
5997 | else | ||
5998 | free_extent_state(cached_state); | ||
5999 | |||
5900 | free_extent_map(em); | 6000 | free_extent_map(em); |
5901 | 6001 | ||
5902 | return 0; | 6002 | return 0; |
6003 | |||
6004 | unlock_err: | ||
6005 | if (create) | ||
6006 | unlock_bits |= EXTENT_DO_ACCOUNTING; | ||
6007 | |||
6008 | clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, | ||
6009 | unlock_bits, 1, 0, &cached_state, GFP_NOFS); | ||
6010 | return ret; | ||
5903 | } | 6011 | } |
5904 | 6012 | ||
5905 | struct btrfs_dio_private { | 6013 | struct btrfs_dio_private { |
@@ -6340,132 +6448,22 @@ static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *io | |||
6340 | out: | 6448 | out: |
6341 | return retval; | 6449 | return retval; |
6342 | } | 6450 | } |
6451 | |||
6343 | static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, | 6452 | static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, |
6344 | const struct iovec *iov, loff_t offset, | 6453 | const struct iovec *iov, loff_t offset, |
6345 | unsigned long nr_segs) | 6454 | unsigned long nr_segs) |
6346 | { | 6455 | { |
6347 | struct file *file = iocb->ki_filp; | 6456 | struct file *file = iocb->ki_filp; |
6348 | struct inode *inode = file->f_mapping->host; | 6457 | struct inode *inode = file->f_mapping->host; |
6349 | struct btrfs_ordered_extent *ordered; | ||
6350 | struct extent_state *cached_state = NULL; | ||
6351 | u64 lockstart, lockend; | ||
6352 | ssize_t ret; | ||
6353 | int writing = rw & WRITE; | ||
6354 | int write_bits = 0; | ||
6355 | size_t count = iov_length(iov, nr_segs); | ||
6356 | 6458 | ||
6357 | if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov, | 6459 | if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov, |
6358 | offset, nr_segs)) { | 6460 | offset, nr_segs)) |
6359 | return 0; | 6461 | return 0; |
6360 | } | ||
6361 | |||
6362 | lockstart = offset; | ||
6363 | lockend = offset + count - 1; | ||
6364 | |||
6365 | if (writing) { | ||
6366 | ret = btrfs_delalloc_reserve_space(inode, count); | ||
6367 | if (ret) | ||
6368 | goto out; | ||
6369 | } | ||
6370 | 6462 | ||
6371 | while (1) { | 6463 | return __blockdev_direct_IO(rw, iocb, inode, |
6372 | lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, | ||
6373 | 0, &cached_state); | ||
6374 | /* | ||
6375 | * We're concerned with the entire range that we're going to be | ||
6376 | * doing DIO to, so we need to make sure theres no ordered | ||
6377 | * extents in this range. | ||
6378 | */ | ||
6379 | ordered = btrfs_lookup_ordered_range(inode, lockstart, | ||
6380 | lockend - lockstart + 1); | ||
6381 | |||
6382 | /* | ||
6383 | * We need to make sure there are no buffered pages in this | ||
6384 | * range either, we could have raced between the invalidate in | ||
6385 | * generic_file_direct_write and locking the extent. The | ||
6386 | * invalidate needs to happen so that reads after a write do not | ||
6387 | * get stale data. | ||
6388 | */ | ||
6389 | if (!ordered && (!writing || | ||
6390 | !test_range_bit(&BTRFS_I(inode)->io_tree, | ||
6391 | lockstart, lockend, EXTENT_UPTODATE, 0, | ||
6392 | cached_state))) | ||
6393 | break; | ||
6394 | |||
6395 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, | ||
6396 | &cached_state, GFP_NOFS); | ||
6397 | |||
6398 | if (ordered) { | ||
6399 | btrfs_start_ordered_extent(inode, ordered, 1); | ||
6400 | btrfs_put_ordered_extent(ordered); | ||
6401 | } else { | ||
6402 | /* Screw you mmap */ | ||
6403 | ret = filemap_write_and_wait_range(file->f_mapping, | ||
6404 | lockstart, | ||
6405 | lockend); | ||
6406 | if (ret) | ||
6407 | goto out; | ||
6408 | |||
6409 | /* | ||
6410 | * If we found a page that couldn't be invalidated just | ||
6411 | * fall back to buffered. | ||
6412 | */ | ||
6413 | ret = invalidate_inode_pages2_range(file->f_mapping, | ||
6414 | lockstart >> PAGE_CACHE_SHIFT, | ||
6415 | lockend >> PAGE_CACHE_SHIFT); | ||
6416 | if (ret) { | ||
6417 | if (ret == -EBUSY) | ||
6418 | ret = 0; | ||
6419 | goto out; | ||
6420 | } | ||
6421 | } | ||
6422 | |||
6423 | cond_resched(); | ||
6424 | } | ||
6425 | |||
6426 | /* | ||
6427 | * we don't use btrfs_set_extent_delalloc because we don't want | ||
6428 | * the dirty or uptodate bits | ||
6429 | */ | ||
6430 | if (writing) { | ||
6431 | write_bits = EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING; | ||
6432 | ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, | ||
6433 | EXTENT_DELALLOC, NULL, &cached_state, | ||
6434 | GFP_NOFS); | ||
6435 | if (ret) { | ||
6436 | clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, | ||
6437 | lockend, EXTENT_LOCKED | write_bits, | ||
6438 | 1, 0, &cached_state, GFP_NOFS); | ||
6439 | goto out; | ||
6440 | } | ||
6441 | } | ||
6442 | |||
6443 | free_extent_state(cached_state); | ||
6444 | cached_state = NULL; | ||
6445 | |||
6446 | ret = __blockdev_direct_IO(rw, iocb, inode, | ||
6447 | BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev, | 6464 | BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev, |
6448 | iov, offset, nr_segs, btrfs_get_blocks_direct, NULL, | 6465 | iov, offset, nr_segs, btrfs_get_blocks_direct, NULL, |
6449 | btrfs_submit_direct, 0); | 6466 | btrfs_submit_direct, 0); |
6450 | |||
6451 | if (ret < 0 && ret != -EIOCBQUEUED) { | ||
6452 | clear_extent_bit(&BTRFS_I(inode)->io_tree, offset, | ||
6453 | offset + iov_length(iov, nr_segs) - 1, | ||
6454 | EXTENT_LOCKED | write_bits, 1, 0, | ||
6455 | &cached_state, GFP_NOFS); | ||
6456 | } else if (ret >= 0 && ret < iov_length(iov, nr_segs)) { | ||
6457 | /* | ||
6458 | * We're falling back to buffered, unlock the section we didn't | ||
6459 | * do IO on. | ||
6460 | */ | ||
6461 | clear_extent_bit(&BTRFS_I(inode)->io_tree, offset + ret, | ||
6462 | offset + iov_length(iov, nr_segs) - 1, | ||
6463 | EXTENT_LOCKED | write_bits, 1, 0, | ||
6464 | &cached_state, GFP_NOFS); | ||
6465 | } | ||
6466 | out: | ||
6467 | free_extent_state(cached_state); | ||
6468 | return ret; | ||
6469 | } | 6467 | } |
6470 | 6468 | ||
6471 | static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | 6469 | static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, |