aboutsummaryrefslogtreecommitdiffstats
path: root/fs/btrfs/inode.c
diff options
context:
space:
mode:
authorOlof Johansson <olof@lixom.net>2012-09-16 21:31:37 -0400
committerOlof Johansson <olof@lixom.net>2012-09-16 21:31:37 -0400
commit2e6185f1fea6cf88e9ce25cde1d6291ddfb3d4f0 (patch)
treec45ae7bace055c258fba5c4c6c0340b1e3f17f05 /fs/btrfs/inode.c
parent7405a749ae14f846cc2892c36d1a9343b0264b7c (diff)
parentfd301cc4e5ba839050be135a178031bcd0d363a5 (diff)
Merge tag 'tegra-for-3.7-drivers-i2c' of git://git.kernel.org/pub/scm/linux/kernel/git/swarren/linux-tegra into next/drivers
From Stephen Warren: ARM: tegra: i2c driver enhancements mostly related to clocking This branch contains a number of fixes and cleanups to the Tegra I2C driver related to clocks. These are based on the common clock conversion in order to avoid duplicating the clock driver changes before and after the conversion. Finally, a bug-fix related to I2C_M_NOSTART is included. This branch is based on previous pull request tegra-for-3.7-common-clk. * tag 'tegra-for-3.7-drivers-i2c' of git://git.kernel.org/pub/scm/linux/kernel/git/swarren/linux-tegra: i2c: tegra: dynamically control fast clk i2c: tegra: I2_M_NOSTART functionality not supported in Tegra20 ARM: tegra: clock: remove unused clock entry for i2c ARM: tegra: clock: add connection name in i2c clock entry i2c: tegra: pass proper name for getting clock ARM: tegra: clock: add i2c fast clock entry in clock table ARM: Tegra: Add smp_twd clock for Tegra20 ARM: tegra: cpu-tegra: explicitly manage re-parenting ARM: tegra: fix overflow in tegra20_pll_clk_round_rate() ARM: tegra: Fix data type for io address ARM: tegra: remove tegra_timer from tegra_list_clks ARM: tegra30: clocks: fix the wrong tegra_audio_sync_clk_ops name ARM: tegra: clocks: separate tegra_clk_32k_ops from Tegra20 and Tegra30 ARM: tegra: Remove duplicate code ARM: tegra: Port tegra to generic clock framework ARM: tegra: Add clk_tegra structure and helper functions ARM: tegra: Rename tegra20 clock file ARM: tegra20: Separate out clk ops and clk data ARM: tegra30: Separate out clk ops and clk data ARM: tegra: fix U16 divider range check ... + sync to v3.6-rc4 Resolved remove/modify conflict in arch/arm/mach-sa1100/leds-hackkit.c caused by the sync with v3.6-rc4. Signed-off-by: Olof Johansson <olof@lixom.net>
Diffstat (limited to 'fs/btrfs/inode.c')
-rw-r--r--fs/btrfs/inode.c329
1 files changed, 166 insertions, 163 deletions
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 83baec24946d..ec154f954646 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -324,7 +324,8 @@ static noinline int add_async_extent(struct async_cow *cow,
324 * If this code finds it can't get good compression, it puts an 324 * If this code finds it can't get good compression, it puts an
325 * entry onto the work queue to write the uncompressed bytes. This 325 * entry onto the work queue to write the uncompressed bytes. This
326 * makes sure that both compressed inodes and uncompressed inodes 326 * makes sure that both compressed inodes and uncompressed inodes
327 * are written in the same order that pdflush sent them down. 327 * are written in the same order that the flusher thread sent them
328 * down.
328 */ 329 */
329static noinline int compress_file_range(struct inode *inode, 330static noinline int compress_file_range(struct inode *inode,
330 struct page *locked_page, 331 struct page *locked_page,
@@ -1007,9 +1008,7 @@ static noinline void async_cow_submit(struct btrfs_work *work)
1007 nr_pages = (async_cow->end - async_cow->start + PAGE_CACHE_SIZE) >> 1008 nr_pages = (async_cow->end - async_cow->start + PAGE_CACHE_SIZE) >>
1008 PAGE_CACHE_SHIFT; 1009 PAGE_CACHE_SHIFT;
1009 1010
1010 atomic_sub(nr_pages, &root->fs_info->async_delalloc_pages); 1011 if (atomic_sub_return(nr_pages, &root->fs_info->async_delalloc_pages) <
1011
1012 if (atomic_read(&root->fs_info->async_delalloc_pages) <
1013 5 * 1024 * 1024 && 1012 5 * 1024 * 1024 &&
1014 waitqueue_active(&root->fs_info->async_submit_wait)) 1013 waitqueue_active(&root->fs_info->async_submit_wait))
1015 wake_up(&root->fs_info->async_submit_wait); 1014 wake_up(&root->fs_info->async_submit_wait);
@@ -1884,8 +1883,11 @@ static int btrfs_finish_ordered_io(struct btrfs_ordered_extent *ordered_extent)
1884 trans = btrfs_join_transaction_nolock(root); 1883 trans = btrfs_join_transaction_nolock(root);
1885 else 1884 else
1886 trans = btrfs_join_transaction(root); 1885 trans = btrfs_join_transaction(root);
1887 if (IS_ERR(trans)) 1886 if (IS_ERR(trans)) {
1888 return PTR_ERR(trans); 1887 ret = PTR_ERR(trans);
1888 trans = NULL;
1889 goto out;
1890 }
1889 trans->block_rsv = &root->fs_info->delalloc_block_rsv; 1891 trans->block_rsv = &root->fs_info->delalloc_block_rsv;
1890 ret = btrfs_update_inode_fallback(trans, root, inode); 1892 ret = btrfs_update_inode_fallback(trans, root, inode);
1891 if (ret) /* -ENOMEM or corruption */ 1893 if (ret) /* -ENOMEM or corruption */
@@ -3173,7 +3175,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
3173 btrfs_i_size_write(dir, dir->i_size - name_len * 2); 3175 btrfs_i_size_write(dir, dir->i_size - name_len * 2);
3174 inode_inc_iversion(dir); 3176 inode_inc_iversion(dir);
3175 dir->i_mtime = dir->i_ctime = CURRENT_TIME; 3177 dir->i_mtime = dir->i_ctime = CURRENT_TIME;
3176 ret = btrfs_update_inode(trans, root, dir); 3178 ret = btrfs_update_inode_fallback(trans, root, dir);
3177 if (ret) 3179 if (ret)
3178 btrfs_abort_transaction(trans, root, ret); 3180 btrfs_abort_transaction(trans, root, ret);
3179out: 3181out:
@@ -5773,18 +5775,112 @@ out:
5773 return ret; 5775 return ret;
5774} 5776}
5775 5777
5778static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
5779 struct extent_state **cached_state, int writing)
5780{
5781 struct btrfs_ordered_extent *ordered;
5782 int ret = 0;
5783
5784 while (1) {
5785 lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
5786 0, cached_state);
5787 /*
5788 * We're concerned with the entire range that we're going to be
5789 * doing DIO to, so we need to make sure theres no ordered
5790 * extents in this range.
5791 */
5792 ordered = btrfs_lookup_ordered_range(inode, lockstart,
5793 lockend - lockstart + 1);
5794
5795 /*
5796 * We need to make sure there are no buffered pages in this
5797 * range either, we could have raced between the invalidate in
5798 * generic_file_direct_write and locking the extent. The
5799 * invalidate needs to happen so that reads after a write do not
5800 * get stale data.
5801 */
5802 if (!ordered && (!writing ||
5803 !test_range_bit(&BTRFS_I(inode)->io_tree,
5804 lockstart, lockend, EXTENT_UPTODATE, 0,
5805 *cached_state)))
5806 break;
5807
5808 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
5809 cached_state, GFP_NOFS);
5810
5811 if (ordered) {
5812 btrfs_start_ordered_extent(inode, ordered, 1);
5813 btrfs_put_ordered_extent(ordered);
5814 } else {
5815 /* Screw you mmap */
5816 ret = filemap_write_and_wait_range(inode->i_mapping,
5817 lockstart,
5818 lockend);
5819 if (ret)
5820 break;
5821
5822 /*
5823 * If we found a page that couldn't be invalidated just
5824 * fall back to buffered.
5825 */
5826 ret = invalidate_inode_pages2_range(inode->i_mapping,
5827 lockstart >> PAGE_CACHE_SHIFT,
5828 lockend >> PAGE_CACHE_SHIFT);
5829 if (ret)
5830 break;
5831 }
5832
5833 cond_resched();
5834 }
5835
5836 return ret;
5837}
5838
5776static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, 5839static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
5777 struct buffer_head *bh_result, int create) 5840 struct buffer_head *bh_result, int create)
5778{ 5841{
5779 struct extent_map *em; 5842 struct extent_map *em;
5780 struct btrfs_root *root = BTRFS_I(inode)->root; 5843 struct btrfs_root *root = BTRFS_I(inode)->root;
5844 struct extent_state *cached_state = NULL;
5781 u64 start = iblock << inode->i_blkbits; 5845 u64 start = iblock << inode->i_blkbits;
5846 u64 lockstart, lockend;
5782 u64 len = bh_result->b_size; 5847 u64 len = bh_result->b_size;
5783 struct btrfs_trans_handle *trans; 5848 struct btrfs_trans_handle *trans;
5849 int unlock_bits = EXTENT_LOCKED;
5850 int ret;
5851
5852 if (create) {
5853 ret = btrfs_delalloc_reserve_space(inode, len);
5854 if (ret)
5855 return ret;
5856 unlock_bits |= EXTENT_DELALLOC | EXTENT_DIRTY;
5857 } else {
5858 len = min_t(u64, len, root->sectorsize);
5859 }
5860
5861 lockstart = start;
5862 lockend = start + len - 1;
5863
5864 /*
5865 * If this errors out it's because we couldn't invalidate pagecache for
5866 * this range and we need to fallback to buffered.
5867 */
5868 if (lock_extent_direct(inode, lockstart, lockend, &cached_state, create))
5869 return -ENOTBLK;
5870
5871 if (create) {
5872 ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
5873 lockend, EXTENT_DELALLOC, NULL,
5874 &cached_state, GFP_NOFS);
5875 if (ret)
5876 goto unlock_err;
5877 }
5784 5878
5785 em = btrfs_get_extent(inode, NULL, 0, start, len, 0); 5879 em = btrfs_get_extent(inode, NULL, 0, start, len, 0);
5786 if (IS_ERR(em)) 5880 if (IS_ERR(em)) {
5787 return PTR_ERR(em); 5881 ret = PTR_ERR(em);
5882 goto unlock_err;
5883 }
5788 5884
5789 /* 5885 /*
5790 * Ok for INLINE and COMPRESSED extents we need to fallback on buffered 5886 * Ok for INLINE and COMPRESSED extents we need to fallback on buffered
@@ -5803,17 +5899,16 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
5803 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) || 5899 if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) ||
5804 em->block_start == EXTENT_MAP_INLINE) { 5900 em->block_start == EXTENT_MAP_INLINE) {
5805 free_extent_map(em); 5901 free_extent_map(em);
5806 return -ENOTBLK; 5902 ret = -ENOTBLK;
5903 goto unlock_err;
5807 } 5904 }
5808 5905
5809 /* Just a good old fashioned hole, return */ 5906 /* Just a good old fashioned hole, return */
5810 if (!create && (em->block_start == EXTENT_MAP_HOLE || 5907 if (!create && (em->block_start == EXTENT_MAP_HOLE ||
5811 test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { 5908 test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
5812 free_extent_map(em); 5909 free_extent_map(em);
5813 /* DIO will do one hole at a time, so just unlock a sector */ 5910 ret = 0;
5814 unlock_extent(&BTRFS_I(inode)->io_tree, start, 5911 goto unlock_err;
5815 start + root->sectorsize - 1);
5816 return 0;
5817 } 5912 }
5818 5913
5819 /* 5914 /*
@@ -5826,8 +5921,9 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
5826 * 5921 *
5827 */ 5922 */
5828 if (!create) { 5923 if (!create) {
5829 len = em->len - (start - em->start); 5924 len = min(len, em->len - (start - em->start));
5830 goto map; 5925 lockstart = start + len;
5926 goto unlock;
5831 } 5927 }
5832 5928
5833 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) || 5929 if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) ||
@@ -5859,7 +5955,7 @@ static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock,
5859 btrfs_end_transaction(trans, root); 5955 btrfs_end_transaction(trans, root);
5860 if (ret) { 5956 if (ret) {
5861 free_extent_map(em); 5957 free_extent_map(em);
5862 return ret; 5958 goto unlock_err;
5863 } 5959 }
5864 goto unlock; 5960 goto unlock;
5865 } 5961 }
@@ -5872,14 +5968,12 @@ must_cow:
5872 */ 5968 */
5873 len = bh_result->b_size; 5969 len = bh_result->b_size;
5874 em = btrfs_new_extent_direct(inode, em, start, len); 5970 em = btrfs_new_extent_direct(inode, em, start, len);
5875 if (IS_ERR(em)) 5971 if (IS_ERR(em)) {
5876 return PTR_ERR(em); 5972 ret = PTR_ERR(em);
5973 goto unlock_err;
5974 }
5877 len = min(len, em->len - (start - em->start)); 5975 len = min(len, em->len - (start - em->start));
5878unlock: 5976unlock:
5879 clear_extent_bit(&BTRFS_I(inode)->io_tree, start, start + len - 1,
5880 EXTENT_LOCKED | EXTENT_DELALLOC | EXTENT_DIRTY, 1,
5881 0, NULL, GFP_NOFS);
5882map:
5883 bh_result->b_blocknr = (em->block_start + (start - em->start)) >> 5977 bh_result->b_blocknr = (em->block_start + (start - em->start)) >>
5884 inode->i_blkbits; 5978 inode->i_blkbits;
5885 bh_result->b_size = len; 5979 bh_result->b_size = len;
@@ -5897,9 +5991,44 @@ map:
5897 i_size_write(inode, start + len); 5991 i_size_write(inode, start + len);
5898 } 5992 }
5899 5993
5994 /*
5995 * In the case of write we need to clear and unlock the entire range,
5996 * in the case of read we need to unlock only the end area that we
5997 * aren't using if there is any left over space.
5998 */
5999 if (lockstart < lockend) {
6000 if (create && len < lockend - lockstart) {
6001 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
6002 lockstart + len - 1, unlock_bits, 1, 0,
6003 &cached_state, GFP_NOFS);
6004 /*
6005 * Beside unlock, we also need to cleanup reserved space
6006 * for the left range by attaching EXTENT_DO_ACCOUNTING.
6007 */
6008 clear_extent_bit(&BTRFS_I(inode)->io_tree,
6009 lockstart + len, lockend,
6010 unlock_bits | EXTENT_DO_ACCOUNTING,
6011 1, 0, NULL, GFP_NOFS);
6012 } else {
6013 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
6014 lockend, unlock_bits, 1, 0,
6015 &cached_state, GFP_NOFS);
6016 }
6017 } else {
6018 free_extent_state(cached_state);
6019 }
6020
5900 free_extent_map(em); 6021 free_extent_map(em);
5901 6022
5902 return 0; 6023 return 0;
6024
6025unlock_err:
6026 if (create)
6027 unlock_bits |= EXTENT_DO_ACCOUNTING;
6028
6029 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
6030 unlock_bits, 1, 0, &cached_state, GFP_NOFS);
6031 return ret;
5903} 6032}
5904 6033
5905struct btrfs_dio_private { 6034struct btrfs_dio_private {
@@ -5907,7 +6036,6 @@ struct btrfs_dio_private {
5907 u64 logical_offset; 6036 u64 logical_offset;
5908 u64 disk_bytenr; 6037 u64 disk_bytenr;
5909 u64 bytes; 6038 u64 bytes;
5910 u32 *csums;
5911 void *private; 6039 void *private;
5912 6040
5913 /* number of bios pending for this dio */ 6041 /* number of bios pending for this dio */
@@ -5927,7 +6055,6 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
5927 struct inode *inode = dip->inode; 6055 struct inode *inode = dip->inode;
5928 struct btrfs_root *root = BTRFS_I(inode)->root; 6056 struct btrfs_root *root = BTRFS_I(inode)->root;
5929 u64 start; 6057 u64 start;
5930 u32 *private = dip->csums;
5931 6058
5932 start = dip->logical_offset; 6059 start = dip->logical_offset;
5933 do { 6060 do {
@@ -5935,8 +6062,12 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
5935 struct page *page = bvec->bv_page; 6062 struct page *page = bvec->bv_page;
5936 char *kaddr; 6063 char *kaddr;
5937 u32 csum = ~(u32)0; 6064 u32 csum = ~(u32)0;
6065 u64 private = ~(u32)0;
5938 unsigned long flags; 6066 unsigned long flags;
5939 6067
6068 if (get_state_private(&BTRFS_I(inode)->io_tree,
6069 start, &private))
6070 goto failed;
5940 local_irq_save(flags); 6071 local_irq_save(flags);
5941 kaddr = kmap_atomic(page); 6072 kaddr = kmap_atomic(page);
5942 csum = btrfs_csum_data(root, kaddr + bvec->bv_offset, 6073 csum = btrfs_csum_data(root, kaddr + bvec->bv_offset,
@@ -5946,18 +6077,18 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
5946 local_irq_restore(flags); 6077 local_irq_restore(flags);
5947 6078
5948 flush_dcache_page(bvec->bv_page); 6079 flush_dcache_page(bvec->bv_page);
5949 if (csum != *private) { 6080 if (csum != private) {
6081failed:
5950 printk(KERN_ERR "btrfs csum failed ino %llu off" 6082 printk(KERN_ERR "btrfs csum failed ino %llu off"
5951 " %llu csum %u private %u\n", 6083 " %llu csum %u private %u\n",
5952 (unsigned long long)btrfs_ino(inode), 6084 (unsigned long long)btrfs_ino(inode),
5953 (unsigned long long)start, 6085 (unsigned long long)start,
5954 csum, *private); 6086 csum, (unsigned)private);
5955 err = -EIO; 6087 err = -EIO;
5956 } 6088 }
5957 } 6089 }
5958 6090
5959 start += bvec->bv_len; 6091 start += bvec->bv_len;
5960 private++;
5961 bvec++; 6092 bvec++;
5962 } while (bvec <= bvec_end); 6093 } while (bvec <= bvec_end);
5963 6094
@@ -5965,7 +6096,6 @@ static void btrfs_endio_direct_read(struct bio *bio, int err)
5965 dip->logical_offset + dip->bytes - 1); 6096 dip->logical_offset + dip->bytes - 1);
5966 bio->bi_private = dip->private; 6097 bio->bi_private = dip->private;
5967 6098
5968 kfree(dip->csums);
5969 kfree(dip); 6099 kfree(dip);
5970 6100
5971 /* If we had a csum failure make sure to clear the uptodate flag */ 6101 /* If we had a csum failure make sure to clear the uptodate flag */
@@ -6071,7 +6201,7 @@ static struct bio *btrfs_dio_bio_alloc(struct block_device *bdev,
6071 6201
6072static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode, 6202static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
6073 int rw, u64 file_offset, int skip_sum, 6203 int rw, u64 file_offset, int skip_sum,
6074 u32 *csums, int async_submit) 6204 int async_submit)
6075{ 6205{
6076 int write = rw & REQ_WRITE; 6206 int write = rw & REQ_WRITE;
6077 struct btrfs_root *root = BTRFS_I(inode)->root; 6207 struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -6104,8 +6234,7 @@ static inline int __btrfs_submit_dio_bio(struct bio *bio, struct inode *inode,
6104 if (ret) 6234 if (ret)
6105 goto err; 6235 goto err;
6106 } else if (!skip_sum) { 6236 } else if (!skip_sum) {
6107 ret = btrfs_lookup_bio_sums_dio(root, inode, bio, 6237 ret = btrfs_lookup_bio_sums_dio(root, inode, bio, file_offset);
6108 file_offset, csums);
6109 if (ret) 6238 if (ret)
6110 goto err; 6239 goto err;
6111 } 6240 }
@@ -6131,10 +6260,8 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
6131 u64 submit_len = 0; 6260 u64 submit_len = 0;
6132 u64 map_length; 6261 u64 map_length;
6133 int nr_pages = 0; 6262 int nr_pages = 0;
6134 u32 *csums = dip->csums;
6135 int ret = 0; 6263 int ret = 0;
6136 int async_submit = 0; 6264 int async_submit = 0;
6137 int write = rw & REQ_WRITE;
6138 6265
6139 map_length = orig_bio->bi_size; 6266 map_length = orig_bio->bi_size;
6140 ret = btrfs_map_block(map_tree, READ, start_sector << 9, 6267 ret = btrfs_map_block(map_tree, READ, start_sector << 9,
@@ -6170,16 +6297,13 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
6170 atomic_inc(&dip->pending_bios); 6297 atomic_inc(&dip->pending_bios);
6171 ret = __btrfs_submit_dio_bio(bio, inode, rw, 6298 ret = __btrfs_submit_dio_bio(bio, inode, rw,
6172 file_offset, skip_sum, 6299 file_offset, skip_sum,
6173 csums, async_submit); 6300 async_submit);
6174 if (ret) { 6301 if (ret) {
6175 bio_put(bio); 6302 bio_put(bio);
6176 atomic_dec(&dip->pending_bios); 6303 atomic_dec(&dip->pending_bios);
6177 goto out_err; 6304 goto out_err;
6178 } 6305 }
6179 6306
6180 /* Write's use the ordered csums */
6181 if (!write && !skip_sum)
6182 csums = csums + nr_pages;
6183 start_sector += submit_len >> 9; 6307 start_sector += submit_len >> 9;
6184 file_offset += submit_len; 6308 file_offset += submit_len;
6185 6309
@@ -6209,7 +6333,7 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
6209 6333
6210submit: 6334submit:
6211 ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum, 6335 ret = __btrfs_submit_dio_bio(bio, inode, rw, file_offset, skip_sum,
6212 csums, async_submit); 6336 async_submit);
6213 if (!ret) 6337 if (!ret)
6214 return 0; 6338 return 0;
6215 6339
@@ -6245,17 +6369,6 @@ static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode,
6245 ret = -ENOMEM; 6369 ret = -ENOMEM;
6246 goto free_ordered; 6370 goto free_ordered;
6247 } 6371 }
6248 dip->csums = NULL;
6249
6250 /* Write's use the ordered csum stuff, so we don't need dip->csums */
6251 if (!write && !skip_sum) {
6252 dip->csums = kmalloc(sizeof(u32) * bio->bi_vcnt, GFP_NOFS);
6253 if (!dip->csums) {
6254 kfree(dip);
6255 ret = -ENOMEM;
6256 goto free_ordered;
6257 }
6258 }
6259 6372
6260 dip->private = bio->bi_private; 6373 dip->private = bio->bi_private;
6261 dip->inode = inode; 6374 dip->inode = inode;
@@ -6340,132 +6453,22 @@ static ssize_t check_direct_IO(struct btrfs_root *root, int rw, struct kiocb *io
6340out: 6453out:
6341 return retval; 6454 return retval;
6342} 6455}
6456
6343static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, 6457static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb,
6344 const struct iovec *iov, loff_t offset, 6458 const struct iovec *iov, loff_t offset,
6345 unsigned long nr_segs) 6459 unsigned long nr_segs)
6346{ 6460{
6347 struct file *file = iocb->ki_filp; 6461 struct file *file = iocb->ki_filp;
6348 struct inode *inode = file->f_mapping->host; 6462 struct inode *inode = file->f_mapping->host;
6349 struct btrfs_ordered_extent *ordered;
6350 struct extent_state *cached_state = NULL;
6351 u64 lockstart, lockend;
6352 ssize_t ret;
6353 int writing = rw & WRITE;
6354 int write_bits = 0;
6355 size_t count = iov_length(iov, nr_segs);
6356 6463
6357 if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov, 6464 if (check_direct_IO(BTRFS_I(inode)->root, rw, iocb, iov,
6358 offset, nr_segs)) { 6465 offset, nr_segs))
6359 return 0; 6466 return 0;
6360 }
6361
6362 lockstart = offset;
6363 lockend = offset + count - 1;
6364
6365 if (writing) {
6366 ret = btrfs_delalloc_reserve_space(inode, count);
6367 if (ret)
6368 goto out;
6369 }
6370
6371 while (1) {
6372 lock_extent_bits(&BTRFS_I(inode)->io_tree, lockstart, lockend,
6373 0, &cached_state);
6374 /*
6375 * We're concerned with the entire range that we're going to be
6376 * doing DIO to, so we need to make sure theres no ordered
6377 * extents in this range.
6378 */
6379 ordered = btrfs_lookup_ordered_range(inode, lockstart,
6380 lockend - lockstart + 1);
6381
6382 /*
6383 * We need to make sure there are no buffered pages in this
6384 * range either, we could have raced between the invalidate in
6385 * generic_file_direct_write and locking the extent. The
6386 * invalidate needs to happen so that reads after a write do not
6387 * get stale data.
6388 */
6389 if (!ordered && (!writing ||
6390 !test_range_bit(&BTRFS_I(inode)->io_tree,
6391 lockstart, lockend, EXTENT_UPTODATE, 0,
6392 cached_state)))
6393 break;
6394
6395 unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
6396 &cached_state, GFP_NOFS);
6397
6398 if (ordered) {
6399 btrfs_start_ordered_extent(inode, ordered, 1);
6400 btrfs_put_ordered_extent(ordered);
6401 } else {
6402 /* Screw you mmap */
6403 ret = filemap_write_and_wait_range(file->f_mapping,
6404 lockstart,
6405 lockend);
6406 if (ret)
6407 goto out;
6408
6409 /*
6410 * If we found a page that couldn't be invalidated just
6411 * fall back to buffered.
6412 */
6413 ret = invalidate_inode_pages2_range(file->f_mapping,
6414 lockstart >> PAGE_CACHE_SHIFT,
6415 lockend >> PAGE_CACHE_SHIFT);
6416 if (ret) {
6417 if (ret == -EBUSY)
6418 ret = 0;
6419 goto out;
6420 }
6421 }
6422
6423 cond_resched();
6424 }
6425 6467
6426 /* 6468 return __blockdev_direct_IO(rw, iocb, inode,
6427 * we don't use btrfs_set_extent_delalloc because we don't want
6428 * the dirty or uptodate bits
6429 */
6430 if (writing) {
6431 write_bits = EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING;
6432 ret = set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart, lockend,
6433 EXTENT_DELALLOC, NULL, &cached_state,
6434 GFP_NOFS);
6435 if (ret) {
6436 clear_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
6437 lockend, EXTENT_LOCKED | write_bits,
6438 1, 0, &cached_state, GFP_NOFS);
6439 goto out;
6440 }
6441 }
6442
6443 free_extent_state(cached_state);
6444 cached_state = NULL;
6445
6446 ret = __blockdev_direct_IO(rw, iocb, inode,
6447 BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev, 6469 BTRFS_I(inode)->root->fs_info->fs_devices->latest_bdev,
6448 iov, offset, nr_segs, btrfs_get_blocks_direct, NULL, 6470 iov, offset, nr_segs, btrfs_get_blocks_direct, NULL,
6449 btrfs_submit_direct, 0); 6471 btrfs_submit_direct, 0);
6450
6451 if (ret < 0 && ret != -EIOCBQUEUED) {
6452 clear_extent_bit(&BTRFS_I(inode)->io_tree, offset,
6453 offset + iov_length(iov, nr_segs) - 1,
6454 EXTENT_LOCKED | write_bits, 1, 0,
6455 &cached_state, GFP_NOFS);
6456 } else if (ret >= 0 && ret < iov_length(iov, nr_segs)) {
6457 /*
6458 * We're falling back to buffered, unlock the section we didn't
6459 * do IO on.
6460 */
6461 clear_extent_bit(&BTRFS_I(inode)->io_tree, offset + ret,
6462 offset + iov_length(iov, nr_segs) - 1,
6463 EXTENT_LOCKED | write_bits, 1, 0,
6464 &cached_state, GFP_NOFS);
6465 }
6466out:
6467 free_extent_state(cached_state);
6468 return ret;
6469} 6472}
6470 6473
6471static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, 6474static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,