diff options
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r-- | fs/btrfs/inode.c | 329 |
1 files changed, 166 insertions, 163 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 83baec24946d..ec154f954646 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -324,7 +324,8 @@ static noinline int add_async_extent(struct async_cow *cow, | |||
324 | * If this code finds it can't get good compression, it puts an | 324 | * If this code finds it can't get good compression, it puts an |
325 | * entry onto the work queue to write the uncompressed bytes. This | 325 | * entry onto the work queue to write the uncompressed bytes. This |
326 | * makes sure that both compressed inodes and uncompressed inodes | 326 | * makes sure that both compressed inodes and uncompressed inodes |
327 | * are written in the same order that pdflush sent them down. | 327 | * are written in the same order that the flusher thread sent them |
328 | * down. | ||
328 | */ | 329 | */ |
329 | static noinline int compress_file_range(struct inode *inode, | 330 | static noinline int compress_file_range(struct inode *inode, |
330 | struct page *locked_page, | 331 | struct page *locked_page, |
@@ -1007,9 +1008,7 @@ static noinline void async_cow_submit(struct btrfs_work *work) | |||
1007 | nr_pages = (async_cow->end - async_cow->start + PAGE_CACHE_SIZE) >> | 1008 | nr_pages = (async_cow->end - async_cow->start + PAGE_CACHE_SIZE) >> |
1008 | PAGE_CACHE_SHIFT; | 1009 | PAGE_CACHE_SHIFT; |
1009 | 1010 | ||
1010 | atomic_sub(nr_pages, &root->fs_info->async_delalloc_pages); | 1011 | if (atomic_sub_return(nr_pages, &root->fs_info->async_delalloc_pages) < |
1011 | |||
1012 | if (atomic_read(&root->fs_info->async_delalloc_pages) < | ||
1013 | 5 * 1024 * 1024 && | 1012 | 5 * 1024 * 1024 && |
1014 | waitqueue_active(&root->fs_info->async_submit_wait)) | 1013 | waitqueue_active(&root->fs_info->async_submit_wait)) |
1015 | wake_up(&root->fs_info->async_submit_wait); | 1014 | wake_up(&root->fs_info->async_submit_wait); |
@@ -1884,8 +1883,11 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent) | |||
1884 | trans = btrfs_join_transaction_nolock(root); | 1883 | trans = btrfs_join_transaction_nolock(root); |
1885 | else | 1884 | else |
1886 | trans = btrfs_join_transaction(root); | 1885 | trans = btrfs_join_transaction(root); |
1887 | if (IS_ERR(trans)) | 1886 | if (IS_ERR(trans)) { |
1888 | return PTR_ERR(trans); | 1887 | ret = PTR_ERR(trans); |
1888 | trans = NULL; | ||
1889 | goto out; | ||
1890 | } | ||
1889 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | 1891 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; |
1890 | ret = btrfs_update_inode_fallback(trans, root, inode); | 1892 | ret = btrfs_update_inode_fallback(trans, root, inode); |
1891 | if (ret) /* -ENOMEM or corruption */ | 1893 | if (ret) /* -ENOMEM or corruption */ |
@@ -3173,7 +3175,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans, | |||
3173 | btrfs_i_size_write(dir, dir->i_size - name_len * 2); | 3175 | btrfs_i_size_write(dir, dir->i_size - name_len * 2); |
3174 | inode_inc_iversion(dir); | 3176 | inode_inc_iversion(dir); |
3175 | dir->i_mtime = dir->i_ctime = CURRENT_TIME; | 3177 | dir->i_mtime = dir->i_ctime = CURRENT_TIME; |
3176 | ret = btrfs_update_inode(trans, root, dir); | 3178 | ret = btrfs_update_inode_fallback(trans, root, dir); |
3177 | if (ret) | 3179 | if (ret) |
3178 | btrfs_abort_transaction(trans, root, ret); | 3180 | btrfs_abort_transaction(trans, root, ret); |
3179 | out: | 3181 | out: |
@@ -5773,18 +5775,112 @@ out: | |||
5773 | return ret; | 5775 | return ret; |
5774 | } | 5776 | } |
5775 | 5777 | ||
5778 | static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend, | ||
5779 | struct extent_state **cached_state, int writing) | ||
5780 | { | ||
5781 | struct btrfs_ordered_extent *ordered; | ||
5782 | int ret = 0; | ||
5783 | |||
5784 | while (1) { | ||
5785 | lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, | ||
5786 | 0, cached_state); | ||
5787 | /* | ||
5788 | * We're concerned with the entire range that we're going to be | ||
5789 | * doing DIO to, so we need to make sure theres no ordered | ||
5790 | * extents in this range. | ||
5791 | */ | ||
5792 | ordered = btrfs_lookup_ordered_range(inode, lockstart, | ||
5793 | lockend - lockstart + 1); | ||
5794 | |||
5795 | /* | ||
5796 | * We need to make sure there are no buffered pages in this | ||
5797 | * range either, we could have raced between the invalidate in | ||
5798 | * generic_file_direct_write and locking the extent. The | ||
5799 | * invalidate needs to happen so that reads after a write do not | ||
5800 | * get stale data. | ||
5801 | */ | ||
5802 | if (!ordered && (!writing || | ||
5803 | !test_range_bit(&BTRFS_I(inode)->io_tree, | ||
5804 | lockstart, lockend, EXTENT_UPTODATE, 0, | ||
5805 | *cached_state))) | ||
5806 | break; | ||
5807 | |||
5808 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, | ||
5809 | cached_state, GFP_NOFS); | ||
5810 | |||
5811 | if (ordered) { | ||
5812 | btrfs_start_ordered_extent(inode, ordered, 1); | ||
5813 | btrfs_put_ordered_extent(ordered); | ||
5814 | } else { | ||
5815 | /* Screw you mmap */ | ||
5816 | ret = filemap_write_and_wait_range(inode->i_mapping, | ||
5817 | lockstart, | ||
5818 | lockend); | ||
5819 | if (ret) | ||
5820 | break; | ||
5821 | |||
5822 | /* | ||
5823 | * If we found a page that couldn't be invalidated just | ||
5824 | * fall back to buffered. | ||
5825 | */ | ||
5826 | ret = invalidate_inode_pages2_range(inode->i_mapping, | ||
5827 | lockstart >> PAGE_CACHE_SHIFT, | ||
5828 | lockend >> PAGE_CACHE_SHIFT); | ||
5829 | if (ret) | ||
5830 | break; | ||
5831 | } | ||
5832 | |||
5833 | cond_resched(); | ||
5834 | } | ||
5835 | |||
5836 | return ret; | ||
5837 | } | ||
5838 | |||
5776 | static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, | 5839 | static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, |
5777 | struct buffer_head *bh_result, int create) | 5840 | struct buffer_head *bh_result, int create) |
5778 | { | 5841 | { |
5779 | struct extent_map *em; | 5842 | struct extent_map *em; |
5780 | struct btrfs_root *root = BTRFS_I(inode)->root; | 5843 | struct btrfs_root *root = BTRFS_I(inode)->root; |
5844 | struct extent_state *cached_state = NULL; | ||
5781 | u64 start = iblock << inode->i_blkbits; | 5845 | u64 start = iblock << inode->i_blkbits; |
5846 | u64 lockstart, lockend; | ||
5782 | u64 len = bh_result->b_size; | 5847 | u64 len = bh_result->b_size; |
5783 | struct btrfs_trans_handle *trans; | 5848 | struct btrfs_trans_handle *trans; |
5849 | int unlock_bits = EXTENT_LOCKED; | ||
5850 | int ret; | ||
5851 | |||
5852 | if (create) { | ||
5853 | ret = btrfs_delalloc_reserve_space(inode, len); | ||
5854 | if (ret) | ||
5855 | return ret; | ||
5856 | unlock_bits |= EXTENT_DELALLOC | EXTENT_DIRTY; | ||
5857 | } else { | ||
5858 | len = min_t(u64, len, root->sectorsize); | ||
5859 | } | ||
5860 | |||
5861 | lockstart = start; | ||
5862 | lockend = start + len - 1; | ||
5863 | |||
5864 | /* | ||
5865 | * If this errors out it's because we couldn't invalidate pagecache for | ||
5866 | * this range and we need to fallback to buffered. | ||
5867 | */ | ||
5868 | if (lock_extent_direct(inode, lockstart, lockend, &cached_state, create)) | ||
5869 | return -ENOTBLK; | ||
5870 | |||
5871 | if (create) { | ||
5872 | ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, | ||
5873 | lockend, EXTENT_DELALLOC, NULL, | ||
5874 | &cached_state, GFP_NOFS); | ||
5875 | if (ret) | ||
5876 | goto unlock_err; | ||
5877 | } | ||
5784 | 5878 | ||
5785 | em = btrfs_get_extent(inode, NULL, 0, start, len, 0); | 5879 | em = btrfs_get_extent(inode, NULL, 0, start, len, 0); |
5786 | if (IS_ERR(em)) | 5880 | if (IS_ERR(em)) { |
5787 | return PTR_ERR(em); | 5881 | ret = PTR_ERR(em); |
5882 | goto unlock_err; | ||
5883 | } | ||
5788 | 5884 | ||
5789 | /* | 5885 | /* |
5790 | * Ok for INLINE and COMPRESSED extents we need to fallback on buffered | 5886 | * Ok for INLINE and COMPRESSED extents we need to fallback on buffered |
@@ -5803,17 +5899,16 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, | |||
5803 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) || | 5899 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) || |
5804 | em->block_start == EXTENT_MAP_INLINE) { | 5900 | em->block_start == EXTENT_MAP_INLINE) { |
5805 | free_extent_map(em); | 5901 | free_extent_map(em); |
5806 | return -ENOTBLK; | 5902 | ret = -ENOTBLK; |
5903 | goto unlock_err; | ||
5807 | } | 5904 | } |
5808 | 5905 | ||
5809 | /* Just a good old fashioned hole, return */ | 5906 | /* Just a good old fashioned hole, return */ |
5810 | if (!create && (em->block_start == EXTENT_MAP_HOLE || | 5907 | if (!create && (em->block_start == EXTENT_MAP_HOLE || |
5811 | test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { | 5908 | test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { |
5812 | free_extent_map(em); | 5909 | free_extent_map(em); |
5813 | /* DIO will do one hole at a time, so just unlock a sector */ | 5910 | ret = 0; |
5814 | unlock_extent(&BTRFS_I(inode)->io_tree, start, | 5911 | goto unlock_err; |
5815 | start + root->sectorsize - 1); | ||
5816 | return 0; | ||
5817 | } | 5912 | } |
5818 | 5913 | ||
5819 | /* | 5914 | /* |
@@ -5826,8 +5921,9 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, | |||
5826 | * | 5921 | * |
5827 | */ | 5922 | */ |
5828 | if (!create) { | 5923 | if (!create) { |
5829 | len = em->len - (start - em->start); | 5924 | len = min(len, em->len - (start - em->start)); |
5830 | goto map; | 5925 | lockstart = start + len; |
5926 | goto unlock; | ||
5831 | } | 5927 | } |
5832 | 5928 | ||
5833 | if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) || | 5929 | if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) || |
@@ -5859,7 +5955,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, | |||
5859 | btrfs_end_transaction(trans, root); | 5955 | btrfs_end_transaction(trans, root); |
5860 | if (ret) { | 5956 | if (ret) { |
5861 | free_extent_map(em); | 5957 | free_extent_map(em); |
5862 | return ret; | 5958 | goto unlock_err; |
5863 | } | 5959 | } |
5864 | goto unlock; | 5960 | goto unlock; |
5865 | } | 5961 | } |
@@ -5872,14 +5968,12 @@ must_cow: | |||
5872 | */ | 5968 | */ |
5873 | len = bh_result->b_size; | 5969 | len = bh_result->b_size; |
5874 | em = btrfs_new_extent_direct(inode, em, start, len); | 5970 | em = btrfs_new_extent_direct(inode, em, start, len); |
5875 | if (IS_ERR(em)) | 5971 | if (IS_ERR(em)) { |
5876 | return PTR_ERR(em); | 5972 | ret = PTR_ERR(em); |
5973 | goto unlock_err; | ||
5974 | } | ||
5877 | len = min(len, em->len - (start - em->start)); | 5975 | len = min(len, em->len - (start - em->start)); |
5878 | unlock: | 5976 | unlock: |
5879 | clear_extent_bit(&BTRFS_I(inode)->io_tree, start, start + len - 1, | ||
5880 | EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DIRTY, 1, | ||
5881 | 0, NULL, GFP_NOFS); | ||
5882 | map: | ||
5883 | bh_result->b_blocknr = (em->block_start + (start - em->start)) >> | 5977 | bh_result->b_blocknr = (em->block_start + (start - em->start)) >> |
5884 | inode->i_blkbits; | 5978 | inode->i_blkbits; |
5885 | bh_result->b_size = len; | 5979 | bh_result->b_size = len; |
@@ -5897,9 +5991,44 @@ map: | |||
5897 | i_size_write(inode, start + len); | 5991 | i_size_write(inode, start + len); |
5898 | } | 5992 | } |
5899 | 5993 | ||
5994 | /* | ||
5995 | * In the case of write we need to clear and unlock the entire range, | ||
5996 | * in the case of read we need to unlock only the end area that we | ||
5997 | * aren't using if there is any left over space. | ||
5998 | */ | ||
5999 | if (lockstart < lockend) { | ||
6000 | if (create && len < lockend - lockstart) { | ||
6001 | clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, | ||
6002 | lockstart + len - 1, unlock_bits, 1, 0, | ||
6003 | &cached_state, GFP_NOFS); | ||
6004 | /* | ||
6005 | * Beside unlock, we also need to cleanup reserved space | ||
6006 | * for the left range by attaching EXTENT_DO_ACCOUNTING. | ||
6007 | */ | ||
6008 | clear_extent_bit(&BTRFS_I(inode)->io_tree, | ||
6009 | lockstart + len, lockend, | ||
6010 | unlock_bits | EXTENT_DO_ACCOUNTING, | ||
6011 | 1, 0, NULL, GFP_NOFS); | ||
6012 | } else { | ||
6013 | clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, | ||
6014 | lockend, unlock_bits, 1, 0, | ||
6015 | &cached_state, GFP_NOFS); | ||
6016 | } | ||
6017 | } else { | ||
6018 | free_extent_state(cached_state); | ||
6019 | } | ||
6020 | |||
5900 | free_extent_map(em); | 6021 | free_extent_map(em); |
5901 | 6022 | ||
5902 | return 0; | 6023 | return 0; |
6024 | |||
6025 | unlock_err: | ||
6026 | if (create) | ||
6027 | unlock_bits |= EXTENT_DO_ACCOUNTING; | ||
6028 | |||
6029 | clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, | ||
6030 | unlock_bits, 1, 0, &cached_state, GFP_NOFS); | ||
6031 | return ret; | ||
5903 | } | 6032 | } |
5904 | 6033 | ||
5905 | struct btrfs_dio_private { | 6034 | struct btrfs_dio_private { |
@@ -5907,7 +6036,6 @@ struct btrfs_dio_private { | |||
5907 | u64 logical_offset; | 6036 | u64 logical_offset; |
5908 | u64 disk_bytenr; | 6037 | u64 disk_bytenr; |
5909 | u64 bytes; | 6038 | u64 bytes; |
5910 | u32 *csums; | ||
5911 | void *private; | 6039 | void *private; |
5912 | 6040 | ||
5913 | /* number of bios pending for this dio */ | 6041 | /* number of bios pending for this dio */ |
@@ -5927,7 +6055,6 @@ static void btrfs_endio_direct_read(struct bio *bio, int err) | |||
5927 | struct inode *inode = dip->inode; | 6055 | struct inode *inode = dip->inode; |
5928 | struct btrfs_root *root = BTRFS_I(inode)->root; | 6056 | struct btrfs_root *root = BTRFS_I(inode)->root; |
5929 | u64 start; | 6057 | u64 start; |
5930 | u32 *private = dip->csums; | ||
5931 | 6058 | ||
5932 | start = dip->logical_offset; | 6059 | start = dip->logical_offset; |
5933 | do { | 6060 | do { |
@@ -5935,8 +6062,12 @@ static void btrfs_endio_direct_read(struct bio *bio, int err) | |||
5935 | struct page *page = bvec->bv_page; | 6062 | struct page *page = bvec->bv_page; |
5936 | char *kaddr; | 6063 | char *kaddr; |
5937 | u32 csum = ~(u32)0; | 6064 | u32 csum = ~(u32)0; |
6065 | u64 private = ~(u32)0; | ||
5938 | unsigned long flags; | 6066 | unsigned long flags; |
5939 | 6067 | ||
6068 | if (get_state_private(&BTRFS_I(inode)->io_tree, | ||
6069 | start, &private)) | ||
6070 | goto failed; | ||
5940 | local_irq_save(flags); | 6071 | local_irq_save(flags); |
5941 | kaddr = kmap_atomic(page); | 6072 | kaddr = kmap_atomic(page); |
5942 | csum = btrfs_csum_data(root, kaddr + bvec->bv_offset, | 6073 | csum = btrfs_csum_data(root, kaddr + bvec->bv_offset, |
@@ -5946,18 +6077,18 @@ static void btrfs_endio_direct_read(struct bio *bio, int err) | |||
5946 | local_irq_restore(flags); | 6077 | local_irq_restore(flags); |
5947 | 6078 | ||
5948 | flush_dcache_page(bvec->bv_page); | 6079 | flush_dcache_page(bvec->bv_page); |
5949 | if (csum != *private) { | 6080 | if (csum != private) { |
6081 | failed: | ||
5950 | printk(KERN_ERR "btrfs csum failed ino %llu off" | 6082 | printk(KERN_ERR "btrfs csum failed ino %llu off" |
5951 | " %llu csum %u private %u\n", | 6083 | " %llu csum %u private %u\n", |
5952 | (unsigned long long)btrfs_ino(inode), | 6084 | (unsigned long long)btrfs_ino(inode), |
5953 | (unsigned long long)start, | 6085 | (unsigned long long)start, |
5954 | csum, *private); | 6086 | csum, (unsigned)private); |
5955 | err = -EIO; | 6087 | err = -EIO; |
5956 | } | 6088 | } |
5957 | } | 6089 | } |
5958 | 6090 | ||
5959 | start += bvec->bv_len; | 6091 | start += bvec->bv_len; |
5960 | private++; | ||
5961 | bvec++; | 6092 | bvec++; |
5962 | } while (bvec <= bvec_end); | 6093 | } while (bvec <= bvec_end); |
5963 | 6094 | ||
@@ -5965,7 +6096,6 @@ static void btrfs_endio_direct_read(struct bio *bio, int err) | |||
5965 | dip->logical_offset + dip->bytes - 1); | 6096 | dip->logical_offset + dip->bytes - 1); |
5966 | bio->bi_private = dip->private; | 6097 | bio->bi_private = dip->private; |
5967 | 6098 | ||
5968 | kfree(dip->csums); | ||
5969 | kfree(dip); | 6099 | kfree(dip); |
5970 | 6100 | ||
5971 | /* If we had a csum failure make sure to clear the uptodate flag */ | 6101 | /* If we had a csum failure make sure to clear the uptodate flag */ |
@@ -6071,7 +6201,7 @@ static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev, | |||
6071 | 6201 | ||
6072 | static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, | 6202 | static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, |
6073 | int rw, u64 file_offset, int skip_sum, | 6203 | int rw, u64 file_offset, int skip_sum, |
6074 | u32 *csums, int async_submit) | 6204 | int async_submit) |
6075 | { | 6205 | { |
6076 | int write = rw & REQ_WRITE; | 6206 | int write = rw & REQ_WRITE; |
6077 | struct btrfs_root *root = BTRFS_I(inode)->root; | 6207 | struct btrfs_root *root = BTRFS_I(inode)->root; |
@@ -6104,8 +6234,7 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, | |||
6104 | if (ret) | 6234 | if (ret) |
6105 | goto err; | 6235 | goto err; |
6106 | } else if (!skip_sum) { | 6236 | } else if (!skip_sum) { |
6107 | ret = btrfs_lookup_bio_sums_dio(root, inode, bio, | 6237 | ret = btrfs_lookup_bio_sums_dio(root, inode, bio, file_offset); |
6108 | file_offset, csums); | ||
6109 | if (ret) | 6238 | if (ret) |
6110 | goto err; | 6239 | goto err; |
6111 | } | 6240 | } |
@@ -6131,10 +6260,8 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, | |||
6131 | u64 submit_len = 0; | 6260 | u64 submit_len = 0; |
6132 | u64 map_length; | 6261 | u64 map_length; |
6133 | int nr_pages = 0; | 6262 | int nr_pages = 0; |
6134 | u32 *csums = dip->csums; | ||
6135 | int ret = 0; | 6263 | int ret = 0; |
6136 | int async_submit = 0; | 6264 | int async_submit = 0; |
6137 | int write = rw & REQ_WRITE; | ||
6138 | 6265 | ||
6139 | map_length = orig_bio->bi_size; | 6266 | map_length = orig_bio->bi_size; |
6140 | ret = btrfs_map_block(map_tree, READ, start_sector << 9, | 6267 | ret = btrfs_map_block(map_tree, READ, start_sector << 9, |
@@ -6170,16 +6297,13 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, | |||
6170 | atomic_inc(&dip->pending_bios); | 6297 | atomic_inc(&dip->pending_bios); |
6171 | ret = __btrfs_submit_dio_bio(bio, inode, rw, | 6298 | ret = __btrfs_submit_dio_bio(bio, inode, rw, |
6172 | file_offset, skip_sum, | 6299 | file_offset, skip_sum, |
6173 | csums, async_submit); | 6300 | async_submit); |
6174 | if (ret) { | 6301 | if (ret) { |
6175 | bio_put(bio); | 6302 | bio_put(bio); |
6176 | atomic_dec(&dip->pending_bios); | 6303 | atomic_dec(&dip->pending_bios); |
6177 | goto out_err; | 6304 | goto out_err; |
6178 | } | 6305 | } |
6179 | 6306 | ||
6180 | /* Write's use the ordered csums */ | ||
6181 | if (!write && !skip_sum) | ||
6182 | csums = csums + nr_pages; | ||
6183 | start_sector += submit_len >> 9; | 6307 | start_sector += submit_len >> 9; |
6184 | file_offset += submit_len; | 6308 | file_offset += submit_len; |
6185 | 6309 | ||
@@ -6209,7 +6333,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip, | |||
6209 | 6333 | ||
6210 | submit: | 6334 | submit: |
6211 | ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum, | 6335 | ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum, |
6212 | csums, async_submit); | 6336 | async_submit); |
6213 | if (!ret) | 6337 | if (!ret) |
6214 | return 0; | 6338 | return 0; |
6215 | 6339 | ||
@@ -6245,17 +6369,6 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode, | |||
6245 | ret = -ENOMEM; | 6369 | ret = -ENOMEM; |
6246 | goto free_ordered; | 6370 | goto free_ordered; |
6247 | } | 6371 | } |
6248 | dip->csums = NULL; | ||
6249 | |||
6250 | /* Write's use the ordered csum stuff, so we don't need dip->csums */ | ||
6251 | if (!write && !skip_sum) { | ||
6252 | dip->csums = kmalloc(sizeof(u32) * bio->bi_vcnt, GFP_NOFS); | ||
6253 | if (!dip->csums) { | ||
6254 | kfree(dip); | ||
6255 | ret = -ENOMEM; | ||
6256 | goto free_ordered; | ||
6257 | } | ||
6258 | } | ||
6259 | 6372 | ||
6260 | dip->private = bio->bi_private; | 6373 | dip->private = bio->bi_private; |
6261 | dip->inode = inode; | 6374 | dip->inode = inode; |
@@ -6340,132 +6453,22 @@ static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *io | |||
6340 | out: | 6453 | out: |
6341 | return retval; | 6454 | return retval; |
6342 | } | 6455 | } |
6456 | |||
6343 | static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, | 6457 | static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, |
6344 | const struct iovec *iov, loff_t offset, | 6458 | const struct iovec *iov, loff_t offset, |
6345 | unsigned long nr_segs) | 6459 | unsigned long nr_segs) |
6346 | { | 6460 | { |
6347 | struct file *file = iocb->ki_filp; | 6461 | struct file *file = iocb->ki_filp; |
6348 | struct inode *inode = file->f_mapping->host; | 6462 | struct inode *inode = file->f_mapping->host; |
6349 | struct btrfs_ordered_extent *ordered; | ||
6350 | struct extent_state *cached_state = NULL; | ||
6351 | u64 lockstart, lockend; | ||
6352 | ssize_t ret; | ||
6353 | int writing = rw & WRITE; | ||
6354 | int write_bits = 0; | ||
6355 | size_t count = iov_length(iov, nr_segs); | ||
6356 | 6463 | ||
6357 | if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov, | 6464 | if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov, |
6358 | offset, nr_segs)) { | 6465 | offset, nr_segs)) |
6359 | return 0; | 6466 | return 0; |
6360 | } | ||
6361 | |||
6362 | lockstart = offset; | ||
6363 | lockend = offset + count - 1; | ||
6364 | |||
6365 | if (writing) { | ||
6366 | ret = btrfs_delalloc_reserve_space(inode, count); | ||
6367 | if (ret) | ||
6368 | goto out; | ||
6369 | } | ||
6370 | |||
6371 | while (1) { | ||
6372 | lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend, | ||
6373 | 0, &cached_state); | ||
6374 | /* | ||
6375 | * We're concerned with the entire range that we're going to be | ||
6376 | * doing DIO to, so we need to make sure theres no ordered | ||
6377 | * extents in this range. | ||
6378 | */ | ||
6379 | ordered = btrfs_lookup_ordered_range(inode, lockstart, | ||
6380 | lockend - lockstart + 1); | ||
6381 | |||
6382 | /* | ||
6383 | * We need to make sure there are no buffered pages in this | ||
6384 | * range either, we could have raced between the invalidate in | ||
6385 | * generic_file_direct_write and locking the extent. The | ||
6386 | * invalidate needs to happen so that reads after a write do not | ||
6387 | * get stale data. | ||
6388 | */ | ||
6389 | if (!ordered && (!writing || | ||
6390 | !test_range_bit(&BTRFS_I(inode)->io_tree, | ||
6391 | lockstart, lockend, EXTENT_UPTODATE, 0, | ||
6392 | cached_state))) | ||
6393 | break; | ||
6394 | |||
6395 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend, | ||
6396 | &cached_state, GFP_NOFS); | ||
6397 | |||
6398 | if (ordered) { | ||
6399 | btrfs_start_ordered_extent(inode, ordered, 1); | ||
6400 | btrfs_put_ordered_extent(ordered); | ||
6401 | } else { | ||
6402 | /* Screw you mmap */ | ||
6403 | ret = filemap_write_and_wait_range(file->f_mapping, | ||
6404 | lockstart, | ||
6405 | lockend); | ||
6406 | if (ret) | ||
6407 | goto out; | ||
6408 | |||
6409 | /* | ||
6410 | * If we found a page that couldn't be invalidated just | ||
6411 | * fall back to buffered. | ||
6412 | */ | ||
6413 | ret = invalidate_inode_pages2_range(file->f_mapping, | ||
6414 | lockstart >> PAGE_CACHE_SHIFT, | ||
6415 | lockend >> PAGE_CACHE_SHIFT); | ||
6416 | if (ret) { | ||
6417 | if (ret == -EBUSY) | ||
6418 | ret = 0; | ||
6419 | goto out; | ||
6420 | } | ||
6421 | } | ||
6422 | |||
6423 | cond_resched(); | ||
6424 | } | ||
6425 | 6467 | ||
6426 | /* | 6468 | return __blockdev_direct_IO(rw, iocb, inode, |
6427 | * we don't use btrfs_set_extent_delalloc because we don't want | ||
6428 | * the dirty or uptodate bits | ||
6429 | */ | ||
6430 | if (writing) { | ||
6431 | write_bits = EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING; | ||
6432 | ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend, | ||
6433 | EXTENT_DELALLOC, NULL, &cached_state, | ||
6434 | GFP_NOFS); | ||
6435 | if (ret) { | ||
6436 | clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, | ||
6437 | lockend, EXTENT_LOCKED | write_bits, | ||
6438 | 1, 0, &cached_state, GFP_NOFS); | ||
6439 | goto out; | ||
6440 | } | ||
6441 | } | ||
6442 | |||
6443 | free_extent_state(cached_state); | ||
6444 | cached_state = NULL; | ||
6445 | |||
6446 | ret = __blockdev_direct_IO(rw, iocb, inode, | ||
6447 | BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev, | 6469 | BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev, |
6448 | iov, offset, nr_segs, btrfs_get_blocks_direct, NULL, | 6470 | iov, offset, nr_segs, btrfs_get_blocks_direct, NULL, |
6449 | btrfs_submit_direct, 0); | 6471 | btrfs_submit_direct, 0); |
6450 | |||
6451 | if (ret < 0 && ret != -EIOCBQUEUED) { | ||
6452 | clear_extent_bit(&BTRFS_I(inode)->io_tree, offset, | ||
6453 | offset + iov_length(iov, nr_segs) - 1, | ||
6454 | EXTENT_LOCKED | write_bits, 1, 0, | ||
6455 | &cached_state, GFP_NOFS); | ||
6456 | } else if (ret >= 0 && ret < iov_length(iov, nr_segs)) { | ||
6457 | /* | ||
6458 | * We're falling back to buffered, unlock the section we didn't | ||
6459 | * do IO on. | ||
6460 | */ | ||
6461 | clear_extent_bit(&BTRFS_I(inode)->io_tree, offset + ret, | ||
6462 | offset + iov_length(iov, nr_segs) - 1, | ||
6463 | EXTENT_LOCKED | write_bits, 1, 0, | ||
6464 | &cached_state, GFP_NOFS); | ||
6465 | } | ||
6466 | out: | ||
6467 | free_extent_state(cached_state); | ||
6468 | return ret; | ||
6469 | } | 6472 | } |
6470 | 6473 | ||
6471 | static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | 6474 | static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, |