aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/inode.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r--fs/btrfs/inode.c329
1 files changed, 166 insertions, 163 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 83baec24946d..ec154f954646 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -324,7 +324,8 @@ static noinline int add_async_extent(struct async_cow *cow,
324 * If this code finds it can't get good compression, it puts an 324 * If this code finds it can't get good compression, it puts an
325 * entry onto the work queue to write the uncompressed bytes. This 325 * entry onto the work queue to write the uncompressed bytes. This
326 * makes sure that both compressed inodes and uncompressed inodes 326 * makes sure that both compressed inodes and uncompressed inodes
327 * are written in the same order that pdflush sent them down. 327 * are written in the same order that the flusher thread sent them
328 * down.
328 */ 329 */
329static noinline int compress_file_range(struct inode *inode, 330static noinline int compress_file_range(struct inode *inode,
330 struct page *locked_page, 331 struct page *locked_page,
@@ -1007,9 +1008,7 @@ static noinline void async_cow_submit(struct btrfs_work *work)
1007 nr_pages = (async_cow->end - async_cow->start + PAGE_CACHE_SIZE) >> 1008 nr_pages = (async_cow->end - async_cow->start + PAGE_CACHE_SIZE) >>
1008 PAGE_CACHE_SHIFT; 1009 PAGE_CACHE_SHIFT;
1009 1010
1010 atomic_sub(nr_pages, &root->fs_info->async_delalloc_pages); 1011 if (atomic_sub_return(nr_pages, &root->fs_info->async_delalloc_pages) <
1011
1012 if (atomic_read(&root->fs_info->async_delalloc_pages) <
1013 5 * 1024 * 1024 && 1012 5 * 1024 * 1024 &&
1014 waitqueue_active(&root->fs_info->async_submit_wait)) 1013 waitqueue_active(&root->fs_info->async_submit_wait))
1015 wake_up(&root->fs_info->async_submit_wait); 1014 wake_up(&root->fs_info->async_submit_wait);
@@ -1884,8 +1883,11 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
1884 trans = btrfs_join_transaction_nolock(root); 1883 trans = btrfs_join_transaction_nolock(root);
1885 else 1884 else
1886 trans = btrfs_join_transaction(root); 1885 trans = btrfs_join_transaction(root);
1887 if (IS_ERR(trans)) 1886 if (IS_ERR(trans)) {
1888 return PTR_ERR(trans); 1887 ret = PTR_ERR(trans);
1888 trans = NULL;
1889 goto out;
1890 }
1889 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 1891 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
1890 ret = btrfs_update_inode_fallback(trans, root, inode); 1892 ret = btrfs_update_inode_fallback(trans, root, inode);
1891 if (ret) /* -ENOMEM or corruption */ 1893 if (ret) /* -ENOMEM or corruption */
@@ -3173,7 +3175,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
3173 btrfs_i_size_write(dir, dir->i_size - name_len * 2); 3175 btrfs_i_size_write(dir, dir->i_size - name_len * 2);
3174 inode_inc_iversion(dir); 3176 inode_inc_iversion(dir);
3175 dir->i_mtime = dir->i_ctime = CURRENT_TIME; 3177 dir->i_mtime = dir->i_ctime = CURRENT_TIME;
3176 ret = btrfs_update_inode(trans, root, dir); 3178 ret = btrfs_update_inode_fallback(trans, root, dir);
3177 if (ret) 3179 if (ret)
3178 btrfs_abort_transaction(trans, root, ret); 3180 btrfs_abort_transaction(trans, root, ret);
3179out: 3181out:
@@ -5773,18 +5775,112 @@ out:
5773 return ret; 5775 return ret;
5774} 5776}
5775 5777
5778static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
5779 struct extent_state **cached_state, int writing)
5780{
5781 struct btrfs_ordered_extent *ordered;
5782 int ret = 0;
5783
5784 while (1) {
5785 lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
5786 0, cached_state);
5787 /*
5788 * We're concerned with the entire range that we're going to be
5789 * doing DIO to, so we need to make sure theres no ordered
5790 * extents in this range.
5791 */
5792 ordered = btrfs_lookup_ordered_range(inode, lockstart,
5793 lockend - lockstart + 1);
5794
5795 /*
5796 * We need to make sure there are no buffered pages in this
5797 * range either, we could have raced between the invalidate in
5798 * generic_file_direct_write and locking the extent. The
5799 * invalidate needs to happen so that reads after a write do not
5800 * get stale data.
5801 */
5802 if (!ordered && (!writing ||
5803 !test_range_bit(&BTRFS_I(inode)->io_tree,
5804 lockstart, lockend, EXTENT_UPTODATE, 0,
5805 *cached_state)))
5806 break;
5807
5808 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
5809 cached_state, GFP_NOFS);
5810
5811 if (ordered) {
5812 btrfs_start_ordered_extent(inode, ordered, 1);
5813 btrfs_put_ordered_extent(ordered);
5814 } else {
5815 /* Screw you mmap */
5816 ret = filemap_write_and_wait_range(inode->i_mapping,
5817 lockstart,
5818 lockend);
5819 if (ret)
5820 break;
5821
5822 /*
5823 * If we found a page that couldn't be invalidated just
5824 * fall back to buffered.
5825 */
5826 ret = invalidate_inode_pages2_range(inode->i_mapping,
5827 lockstart >> PAGE_CACHE_SHIFT,
5828 lockend >> PAGE_CACHE_SHIFT);
5829 if (ret)
5830 break;
5831 }
5832
5833 cond_resched();
5834 }
5835
5836 return ret;
5837}
5838
5776static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, 5839static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
5777 struct buffer_head *bh_result, int create) 5840 struct buffer_head *bh_result, int create)
5778{ 5841{
5779 struct extent_map *em; 5842 struct extent_map *em;
5780 struct btrfs_root *root = BTRFS_I(inode)->root; 5843 struct btrfs_root *root = BTRFS_I(inode)->root;
5844 struct extent_state *cached_state = NULL;
5781 u64 start = iblock << inode->i_blkbits; 5845 u64 start = iblock << inode->i_blkbits;
5846 u64 lockstart, lockend;
5782 u64 len = bh_result->b_size; 5847 u64 len = bh_result->b_size;
5783 struct btrfs_trans_handle *trans; 5848 struct btrfs_trans_handle *trans;
5849 int unlock_bits = EXTENT_LOCKED;
5850 int ret;
5851
5852 if (create) {
5853 ret = btrfs_delalloc_reserve_space(inode, len);
5854 if (ret)
5855 return ret;
5856 unlock_bits |= EXTENT_DELALLOC | EXTENT_DIRTY;
5857 } else {
5858 len = min_t(u64, len, root->sectorsize);
5859 }
5860
5861 lockstart = start;
5862 lockend = start + len - 1;
5863
5864 /*
5865 * If this errors out it's because we couldn't invalidate pagecache for
5866 * this range and we need to fallback to buffered.
5867 */
5868 if (lock_extent_direct(inode, lockstart, lockend, &cached_state, create))
5869 return -ENOTBLK;
5870
5871 if (create) {
5872 ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
5873 lockend, EXTENT_DELALLOC, NULL,
5874 &cached_state, GFP_NOFS);
5875 if (ret)
5876 goto unlock_err;
5877 }
5784 5878
5785 em = btrfs_get_extent(inode, NULL, 0, start, len, 0); 5879 em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
5786 if (IS_ERR(em)) 5880 if (IS_ERR(em)) {
5787 return PTR_ERR(em); 5881 ret = PTR_ERR(em);
5882 goto unlock_err;
5883 }
5788 5884
5789 /* 5885 /*
5790 * Ok for INLINE and COMPRESSED extents we need to fallback on buffered 5886 * Ok for INLINE and COMPRESSED extents we need to fallback on buffered
@@ -5803,17 +5899,16 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
5803 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) || 5899 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) ||
5804 em->block_start == EXTENT_MAP_INLINE) { 5900 em->block_start == EXTENT_MAP_INLINE) {
5805 free_extent_map(em); 5901 free_extent_map(em);
5806 return -ENOTBLK; 5902 ret = -ENOTBLK;
5903 goto unlock_err;
5807 } 5904 }
5808 5905
5809 /* Just a good old fashioned hole, return */ 5906 /* Just a good old fashioned hole, return */
5810 if (!create && (em->block_start == EXTENT_MAP_HOLE || 5907 if (!create && (em->block_start == EXTENT_MAP_HOLE ||
5811 test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { 5908 test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
5812 free_extent_map(em); 5909 free_extent_map(em);
5813 /* DIO will do one hole at a time, so just unlock a sector */ 5910 ret = 0;
5814 unlock_extent(&BTRFS_I(inode)->io_tree, start, 5911 goto unlock_err;
5815 start + root->sectorsize - 1);
5816 return 0;
5817 } 5912 }
5818 5913
5819 /* 5914 /*
@@ -5826,8 +5921,9 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
5826 * 5921 *
5827 */ 5922 */
5828 if (!create) { 5923 if (!create) {
5829 len = em->len - (start - em->start); 5924 len = min(len, em->len - (start - em->start));
5830 goto map; 5925 lockstart = start + len;
5926 goto unlock;
5831 } 5927 }
5832 5928
5833 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) || 5929 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) ||
@@ -5859,7 +5955,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
5859 btrfs_end_transaction(trans, root); 5955 btrfs_end_transaction(trans, root);
5860 if (ret) { 5956 if (ret) {
5861 free_extent_map(em); 5957 free_extent_map(em);
5862 return ret; 5958 goto unlock_err;
5863 } 5959 }
5864 goto unlock; 5960 goto unlock;
5865 } 5961 }
@@ -5872,14 +5968,12 @@ must_cow:
5872 */ 5968 */
5873 len = bh_result->b_size; 5969 len = bh_result->b_size;
5874 em = btrfs_new_extent_direct(inode, em, start, len); 5970 em = btrfs_new_extent_direct(inode, em, start, len);
5875 if (IS_ERR(em)) 5971 if (IS_ERR(em)) {
5876 return PTR_ERR(em); 5972 ret = PTR_ERR(em);
5973 goto unlock_err;
5974 }
5877 len = min(len, em->len - (start - em->start)); 5975 len = min(len, em->len - (start - em->start));
5878unlock: 5976unlock:
5879 clear_extent_bit(&BTRFS_I(inode)->io_tree, start, start + len - 1,
5880 EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DIRTY, 1,
5881 0, NULL, GFP_NOFS);
5882map:
5883 bh_result->b_blocknr = (em->block_start + (start - em->start)) >> 5977 bh_result->b_blocknr = (em->block_start + (start - em->start)) >>
5884 inode->i_blkbits; 5978 inode->i_blkbits;
5885 bh_result->b_size = len; 5979 bh_result->b_size = len;
@@ -5897,9 +5991,44 @@ map:
5897 i_size_write(inode, start + len); 5991 i_size_write(inode, start + len);
5898 } 5992 }
5899 5993
5994 /*
5995 * In the case of write we need to clear and unlock the entire range,
5996 * in the case of read we need to unlock only the end area that we
5997 * aren't using if there is any left over space.
5998 */
5999 if (lockstart < lockend) {
6000 if (create && len < lockend - lockstart) {
6001 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
6002 lockstart + len - 1, unlock_bits, 1, 0,
6003 &cached_state, GFP_NOFS);
6004 /*
6005 * Beside unlock, we also need to cleanup reserved space
6006 * for the left range by attaching EXTENT_DO_ACCOUNTING.
6007 */
6008 clear_extent_bit(&BTRFS_I(inode)->io_tree,
6009 lockstart + len, lockend,
6010 unlock_bits | EXTENT_DO_ACCOUNTING,
6011 1, 0, NULL, GFP_NOFS);
6012 } else {
6013 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
6014 lockend, unlock_bits, 1, 0,
6015 &cached_state, GFP_NOFS);
6016 }
6017 } else {
6018 free_extent_state(cached_state);
6019 }
6020
5900 free_extent_map(em); 6021 free_extent_map(em);
5901 6022
5902 return 0; 6023 return 0;
6024
6025unlock_err:
6026 if (create)
6027 unlock_bits |= EXTENT_DO_ACCOUNTING;
6028
6029 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
6030 unlock_bits, 1, 0, &cached_state, GFP_NOFS);
6031 return ret;
5903} 6032}
5904 6033
5905struct btrfs_dio_private { 6034struct btrfs_dio_private {
@@ -5907,7 +6036,6 @@ struct btrfs_dio_private {
5907 u64 logical_offset; 6036 u64 logical_offset;
5908 u64 disk_bytenr; 6037 u64 disk_bytenr;
5909 u64 bytes; 6038 u64 bytes;
5910 u32 *csums;
5911 void *private; 6039 void *private;
5912 6040
5913 /* number of bios pending for this dio */ 6041 /* number of bios pending for this dio */
@@ -5927,7 +6055,6 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
5927 struct inode *inode = dip->inode; 6055 struct inode *inode = dip->inode;
5928 struct btrfs_root *root = BTRFS_I(inode)->root; 6056 struct btrfs_root *root = BTRFS_I(inode)->root;
5929 u64 start; 6057 u64 start;
5930 u32 *private = dip->csums;
5931 6058
5932 start = dip->logical_offset; 6059 start = dip->logical_offset;
5933 do { 6060 do {
@@ -5935,8 +6062,12 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
5935 struct page *page = bvec->bv_page; 6062 struct page *page = bvec->bv_page;
5936 char *kaddr; 6063 char *kaddr;
5937 u32 csum = ~(u32)0; 6064 u32 csum = ~(u32)0;
6065 u64 private = ~(u32)0;
5938 unsigned long flags; 6066 unsigned long flags;
5939 6067
6068 if (get_state_private(&BTRFS_I(inode)->io_tree,
6069 start, &private))
6070 goto failed;
5940 local_irq_save(flags); 6071 local_irq_save(flags);
5941 kaddr = kmap_atomic(page); 6072 kaddr = kmap_atomic(page);
5942 csum = btrfs_csum_data(root, kaddr + bvec->bv_offset, 6073 csum = btrfs_csum_data(root, kaddr + bvec->bv_offset,
@@ -5946,18 +6077,18 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
5946 local_irq_restore(flags); 6077 local_irq_restore(flags);
5947 6078
5948 flush_dcache_page(bvec->bv_page); 6079 flush_dcache_page(bvec->bv_page);
5949 if (csum != *private) { 6080 if (csum != private) {
6081failed:
5950 printk(KERN_ERR "btrfs csum failed ino %llu off" 6082 printk(KERN_ERR "btrfs csum failed ino %llu off"
5951 " %llu csum %u private %u\n", 6083 " %llu csum %u private %u\n",
5952 (unsigned long long)btrfs_ino(inode), 6084 (unsigned long long)btrfs_ino(inode),
5953 (unsigned long long)start, 6085 (unsigned long long)start,
5954 csum, *private); 6086 csum, (unsigned)private);
5955 err = -EIO; 6087 err = -EIO;
5956 } 6088 }
5957 } 6089 }
5958 6090
5959 start += bvec->bv_len; 6091 start += bvec->bv_len;
5960 private++;
5961 bvec++; 6092 bvec++;
5962 } while (bvec <= bvec_end); 6093 } while (bvec <= bvec_end);
5963 6094
@@ -5965,7 +6096,6 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
5965 dip->logical_offset + dip->bytes - 1); 6096 dip->logical_offset + dip->bytes - 1);
5966 bio->bi_private = dip->private; 6097 bio->bi_private = dip->private;
5967 6098
5968 kfree(dip->csums);
5969 kfree(dip); 6099 kfree(dip);
5970 6100
5971 /* If we had a csum failure make sure to clear the uptodate flag */ 6101 /* If we had a csum failure make sure to clear the uptodate flag */
@@ -6071,7 +6201,7 @@ static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev,
6071 6201
6072static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, 6202static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
6073 int rw, u64 file_offset, int skip_sum, 6203 int rw, u64 file_offset, int skip_sum,
6074 u32 *csums, int async_submit) 6204 int async_submit)
6075{ 6205{
6076 int write = rw & REQ_WRITE; 6206 int write = rw & REQ_WRITE;
6077 struct btrfs_root *root = BTRFS_I(inode)->root; 6207 struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -6104,8 +6234,7 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
6104 if (ret) 6234 if (ret)
6105 goto err; 6235 goto err;
6106 } else if (!skip_sum) { 6236 } else if (!skip_sum) {
6107 ret = btrfs_lookup_bio_sums_dio(root, inode, bio, 6237 ret = btrfs_lookup_bio_sums_dio(root, inode, bio, file_offset);
6108 file_offset, csums);
6109 if (ret) 6238 if (ret)
6110 goto err; 6239 goto err;
6111 } 6240 }
@@ -6131,10 +6260,8 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
6131 u64 submit_len = 0; 6260 u64 submit_len = 0;
6132 u64 map_length; 6261 u64 map_length;
6133 int nr_pages = 0; 6262 int nr_pages = 0;
6134 u32 *csums = dip->csums;
6135 int ret = 0; 6263 int ret = 0;
6136 int async_submit = 0; 6264 int async_submit = 0;
6137 int write = rw & REQ_WRITE;
6138 6265
6139 map_length = orig_bio->bi_size; 6266 map_length = orig_bio->bi_size;
6140 ret = btrfs_map_block(map_tree, READ, start_sector << 9, 6267 ret = btrfs_map_block(map_tree, READ, start_sector << 9,
@@ -6170,16 +6297,13 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
6170 atomic_inc(&dip->pending_bios); 6297 atomic_inc(&dip->pending_bios);
6171 ret = __btrfs_submit_dio_bio(bio, inode, rw, 6298 ret = __btrfs_submit_dio_bio(bio, inode, rw,
6172 file_offset, skip_sum, 6299 file_offset, skip_sum,
6173 csums, async_submit); 6300 async_submit);
6174 if (ret) { 6301 if (ret) {
6175 bio_put(bio); 6302 bio_put(bio);
6176 atomic_dec(&dip->pending_bios); 6303 atomic_dec(&dip->pending_bios);
6177 goto out_err; 6304 goto out_err;
6178 } 6305 }
6179 6306
6180 /* Write's use the ordered csums */
6181 if (!write && !skip_sum)
6182 csums = csums + nr_pages;
6183 start_sector += submit_len >> 9; 6307 start_sector += submit_len >> 9;
6184 file_offset += submit_len; 6308 file_offset += submit_len;
6185 6309
@@ -6209,7 +6333,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
6209 6333
6210submit: 6334submit:
6211 ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum, 6335 ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum,
6212 csums, async_submit); 6336 async_submit);
6213 if (!ret) 6337 if (!ret)
6214 return 0; 6338 return 0;
6215 6339
@@ -6245,17 +6369,6 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
6245 ret = -ENOMEM; 6369 ret = -ENOMEM;
6246 goto free_ordered; 6370 goto free_ordered;
6247 } 6371 }
6248 dip->csums = NULL;
6249
6250 /* Write's use the ordered csum stuff, so we don't need dip->csums */
6251 if (!write && !skip_sum) {
6252 dip->csums = kmalloc(sizeof(u32) * bio->bi_vcnt, GFP_NOFS);
6253 if (!dip->csums) {
6254 kfree(dip);
6255 ret = -ENOMEM;
6256 goto free_ordered;
6257 }
6258 }
6259 6372
6260 dip->private = bio->bi_private; 6373 dip->private = bio->bi_private;
6261 dip->inode = inode; 6374 dip->inode = inode;
@@ -6340,132 +6453,22 @@ static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *io
6340out: 6453out:
6341 return retval; 6454 return retval;
6342} 6455}
6456
6343static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, 6457static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
6344 const struct iovec *iov, loff_t offset, 6458 const struct iovec *iov, loff_t offset,
6345 unsigned long nr_segs) 6459 unsigned long nr_segs)
6346{ 6460{
6347 struct file *file = iocb->ki_filp; 6461 struct file *file = iocb->ki_filp;
6348 struct inode *inode = file->f_mapping->host; 6462 struct inode *inode = file->f_mapping->host;
6349 struct btrfs_ordered_extent *ordered;
6350 struct extent_state *cached_state = NULL;
6351 u64 lockstart, lockend;
6352 ssize_t ret;
6353 int writing = rw & WRITE;
6354 int write_bits = 0;
6355 size_t count = iov_length(iov, nr_segs);
6356 6463
6357 if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov, 6464 if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov,
6358 offset, nr_segs)) { 6465 offset, nr_segs))
6359 return 0; 6466 return 0;
6360 }
6361
6362 lockstart = offset;
6363 lockend = offset + count - 1;
6364
6365 if (writing) {
6366 ret = btrfs_delalloc_reserve_space(inode, count);
6367 if (ret)
6368 goto out;
6369 }
6370
6371 while (1) {
6372 lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
6373 0, &cached_state);
6374 /*
6375 * We're concerned with the entire range that we're going to be
6376 * doing DIO to, so we need to make sure theres no ordered
6377 * extents in this range.
6378 */
6379 ordered = btrfs_lookup_ordered_range(inode, lockstart,
6380 lockend - lockstart + 1);
6381
6382 /*
6383 * We need to make sure there are no buffered pages in this
6384 * range either, we could have raced between the invalidate in
6385 * generic_file_direct_write and locking the extent. The
6386 * invalidate needs to happen so that reads after a write do not
6387 * get stale data.
6388 */
6389 if (!ordered && (!writing ||
6390 !test_range_bit(&BTRFS_I(inode)->io_tree,
6391 lockstart, lockend, EXTENT_UPTODATE, 0,
6392 cached_state)))
6393 break;
6394
6395 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
6396 &cached_state, GFP_NOFS);
6397
6398 if (ordered) {
6399 btrfs_start_ordered_extent(inode, ordered, 1);
6400 btrfs_put_ordered_extent(ordered);
6401 } else {
6402 /* Screw you mmap */
6403 ret = filemap_write_and_wait_range(file->f_mapping,
6404 lockstart,
6405 lockend);
6406 if (ret)
6407 goto out;
6408
6409 /*
6410 * If we found a page that couldn't be invalidated just
6411 * fall back to buffered.
6412 */
6413 ret = invalidate_inode_pages2_range(file->f_mapping,
6414 lockstart >> PAGE_CACHE_SHIFT,
6415 lockend >> PAGE_CACHE_SHIFT);
6416 if (ret) {
6417 if (ret == -EBUSY)
6418 ret = 0;
6419 goto out;
6420 }
6421 }
6422
6423 cond_resched();
6424 }
6425 6467
6426 /* 6468 return __blockdev_direct_IO(rw, iocb, inode,
6427 * we don't use btrfs_set_extent_delalloc because we don't want
6428 * the dirty or uptodate bits
6429 */
6430 if (writing) {
6431 write_bits = EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING;
6432 ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
6433 EXTENT_DELALLOC, NULL, &cached_state,
6434 GFP_NOFS);
6435 if (ret) {
6436 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
6437 lockend, EXTENT_LOCKED | write_bits,
6438 1, 0, &cached_state, GFP_NOFS);
6439 goto out;
6440 }
6441 }
6442
6443 free_extent_state(cached_state);
6444 cached_state = NULL;
6445
6446 ret = __blockdev_direct_IO(rw, iocb, inode,
6447 BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev, 6469 BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev,
6448 iov, offset, nr_segs, btrfs_get_blocks_direct, NULL, 6470 iov, offset, nr_segs, btrfs_get_blocks_direct, NULL,
6449 btrfs_submit_direct, 0); 6471 btrfs_submit_direct, 0);
6450
6451 if (ret < 0 && ret != -EIOCBQUEUED) {
6452 clear_extent_bit(&BTRFS_I(inode)->io_tree, offset,
6453 offset + iov_length(iov, nr_segs) - 1,
6454 EXTENT_LOCKED | write_bits, 1, 0,
6455 &cached_state, GFP_NOFS);
6456 } else if (ret >= 0 && ret < iov_length(iov, nr_segs)) {
6457 /*
6458 * We're falling back to buffered, unlock the section we didn't
6459 * do IO on.
6460 */
6461 clear_extent_bit(&BTRFS_I(inode)->io_tree, offset + ret,
6462 offset + iov_length(iov, nr_segs) - 1,
6463 EXTENT_LOCKED | write_bits, 1, 0,
6464 &cached_state, GFP_NOFS);
6465 }
6466out:
6467 free_extent_state(cached_state);
6468 return ret;
6469} 6472}
6470 6473
6471static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 6474static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,