diff options
-rw-r--r-- | fs/btrfs/ctree.h | 2 | ||||
-rw-r--r-- | fs/btrfs/file-item.c | 25 | ||||
-rw-r--r-- | fs/btrfs/file.c | 69 | ||||
-rw-r--r-- | fs/btrfs/inode.c | 487 | ||||
-rw-r--r-- | fs/btrfs/ordered-data.c | 75 | ||||
-rw-r--r-- | fs/btrfs/ordered-data.h | 9 |
6 files changed, 631 insertions, 36 deletions
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 5ed0223d1cbe..e9bf86415e86 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h | |||
@@ -2317,6 +2317,8 @@ int btrfs_del_csums(struct btrfs_trans_handle *trans, | |||
2317 | struct btrfs_root *root, u64 bytenr, u64 len); | 2317 | struct btrfs_root *root, u64 bytenr, u64 len); |
2318 | int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, | 2318 | int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, |
2319 | struct bio *bio, u32 *dst); | 2319 | struct bio *bio, u32 *dst); |
2320 | int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode, | ||
2321 | struct bio *bio, u64 logical_offset, u32 *dst); | ||
2320 | int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, | 2322 | int btrfs_insert_file_extent(struct btrfs_trans_handle *trans, |
2321 | struct btrfs_root *root, | 2323 | struct btrfs_root *root, |
2322 | u64 objectid, u64 pos, | 2324 | u64 objectid, u64 pos, |
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c index 21aead39a76c..a562a250ae77 100644 --- a/fs/btrfs/file-item.c +++ b/fs/btrfs/file-item.c | |||
@@ -149,13 +149,14 @@ int btrfs_lookup_file_extent(struct btrfs_trans_handle *trans, | |||
149 | } | 149 | } |
150 | 150 | ||
151 | 151 | ||
152 | int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, | 152 | static int __btrfs_lookup_bio_sums(struct btrfs_root *root, |
153 | struct bio *bio, u32 *dst) | 153 | struct inode *inode, struct bio *bio, |
154 | u64 logical_offset, u32 *dst, int dio) | ||
154 | { | 155 | { |
155 | u32 sum; | 156 | u32 sum; |
156 | struct bio_vec *bvec = bio->bi_io_vec; | 157 | struct bio_vec *bvec = bio->bi_io_vec; |
157 | int bio_index = 0; | 158 | int bio_index = 0; |
158 | u64 offset; | 159 | u64 offset = 0; |
159 | u64 item_start_offset = 0; | 160 | u64 item_start_offset = 0; |
160 | u64 item_last_offset = 0; | 161 | u64 item_last_offset = 0; |
161 | u64 disk_bytenr; | 162 | u64 disk_bytenr; |
@@ -174,8 +175,11 @@ int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, | |||
174 | WARN_ON(bio->bi_vcnt <= 0); | 175 | WARN_ON(bio->bi_vcnt <= 0); |
175 | 176 | ||
176 | disk_bytenr = (u64)bio->bi_sector << 9; | 177 | disk_bytenr = (u64)bio->bi_sector << 9; |
178 | if (dio) | ||
179 | offset = logical_offset; | ||
177 | while (bio_index < bio->bi_vcnt) { | 180 | while (bio_index < bio->bi_vcnt) { |
178 | offset = page_offset(bvec->bv_page) + bvec->bv_offset; | 181 | if (!dio) |
182 | offset = page_offset(bvec->bv_page) + bvec->bv_offset; | ||
179 | ret = btrfs_find_ordered_sum(inode, offset, disk_bytenr, &sum); | 183 | ret = btrfs_find_ordered_sum(inode, offset, disk_bytenr, &sum); |
180 | if (ret == 0) | 184 | if (ret == 0) |
181 | goto found; | 185 | goto found; |
@@ -238,6 +242,7 @@ found: | |||
238 | else | 242 | else |
239 | set_state_private(io_tree, offset, sum); | 243 | set_state_private(io_tree, offset, sum); |
240 | disk_bytenr += bvec->bv_len; | 244 | disk_bytenr += bvec->bv_len; |
245 | offset += bvec->bv_len; | ||
241 | bio_index++; | 246 | bio_index++; |
242 | bvec++; | 247 | bvec++; |
243 | } | 248 | } |
@@ -245,6 +250,18 @@ found: | |||
245 | return 0; | 250 | return 0; |
246 | } | 251 | } |
247 | 252 | ||
253 | int btrfs_lookup_bio_sums(struct btrfs_root *root, struct inode *inode, | ||
254 | struct bio *bio, u32 *dst) | ||
255 | { | ||
256 | return __btrfs_lookup_bio_sums(root, inode, bio, 0, dst, 0); | ||
257 | } | ||
258 | |||
259 | int btrfs_lookup_bio_sums_dio(struct btrfs_root *root, struct inode *inode, | ||
260 | struct bio *bio, u64 offset, u32 *dst) | ||
261 | { | ||
262 | return __btrfs_lookup_bio_sums(root, inode, bio, offset, dst, 1); | ||
263 | } | ||
264 | |||
248 | int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, | 265 | int btrfs_lookup_csums_range(struct btrfs_root *root, u64 start, u64 end, |
249 | struct list_head *list) | 266 | struct list_head *list) |
250 | { | 267 | { |
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 6d8f817eadb5..a28810abfb98 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c | |||
@@ -822,6 +822,47 @@ again: | |||
822 | return 0; | 822 | return 0; |
823 | } | 823 | } |
824 | 824 | ||
825 | /* Copied from read-write.c */ | ||
826 | static void wait_on_retry_sync_kiocb(struct kiocb *iocb) | ||
827 | { | ||
828 | set_current_state(TASK_UNINTERRUPTIBLE); | ||
829 | if (!kiocbIsKicked(iocb)) | ||
830 | schedule(); | ||
831 | else | ||
832 | kiocbClearKicked(iocb); | ||
833 | __set_current_state(TASK_RUNNING); | ||
834 | } | ||
835 | |||
836 | /* | ||
837 | * Just a copy of what do_sync_write does. | ||
838 | */ | ||
839 | static ssize_t __btrfs_direct_write(struct file *file, const char __user *buf, | ||
840 | size_t count, loff_t pos, loff_t *ppos) | ||
841 | { | ||
842 | struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = count }; | ||
843 | unsigned long nr_segs = 1; | ||
844 | struct kiocb kiocb; | ||
845 | ssize_t ret; | ||
846 | |||
847 | init_sync_kiocb(&kiocb, file); | ||
848 | kiocb.ki_pos = pos; | ||
849 | kiocb.ki_left = count; | ||
850 | kiocb.ki_nbytes = count; | ||
851 | |||
852 | while (1) { | ||
853 | ret = generic_file_direct_write(&kiocb, &iov, &nr_segs, pos, | ||
854 | ppos, count, count); | ||
855 | if (ret != -EIOCBRETRY) | ||
856 | break; | ||
857 | wait_on_retry_sync_kiocb(&kiocb); | ||
858 | } | ||
859 | |||
860 | if (ret == -EIOCBQUEUED) | ||
861 | ret = wait_on_sync_kiocb(&kiocb); | ||
862 | *ppos = kiocb.ki_pos; | ||
863 | return ret; | ||
864 | } | ||
865 | |||
825 | static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | 866 | static ssize_t btrfs_file_write(struct file *file, const char __user *buf, |
826 | size_t count, loff_t *ppos) | 867 | size_t count, loff_t *ppos) |
827 | { | 868 | { |
@@ -838,12 +879,11 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
838 | unsigned long first_index; | 879 | unsigned long first_index; |
839 | unsigned long last_index; | 880 | unsigned long last_index; |
840 | int will_write; | 881 | int will_write; |
882 | int buffered = 0; | ||
841 | 883 | ||
842 | will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) || | 884 | will_write = ((file->f_flags & O_DSYNC) || IS_SYNC(inode) || |
843 | (file->f_flags & O_DIRECT)); | 885 | (file->f_flags & O_DIRECT)); |
844 | 886 | ||
845 | nrptrs = min((count + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE, | ||
846 | PAGE_CACHE_SIZE / (sizeof(struct page *))); | ||
847 | pinned[0] = NULL; | 887 | pinned[0] = NULL; |
848 | pinned[1] = NULL; | 888 | pinned[1] = NULL; |
849 | 889 | ||
@@ -867,13 +907,34 @@ static ssize_t btrfs_file_write(struct file *file, const char __user *buf, | |||
867 | goto out; | 907 | goto out; |
868 | 908 | ||
869 | file_update_time(file); | 909 | file_update_time(file); |
910 | BTRFS_I(inode)->sequence++; | ||
911 | |||
912 | if (unlikely(file->f_flags & O_DIRECT)) { | ||
913 | num_written = __btrfs_direct_write(file, buf, count, pos, | ||
914 | ppos); | ||
915 | pos += num_written; | ||
916 | count -= num_written; | ||
917 | |||
918 | /* We've written everything we wanted to, exit */ | ||
919 | if (num_written < 0 || !count) | ||
920 | goto out; | ||
870 | 921 | ||
922 | /* | ||
923 | * We are going to do buffered for the rest of the range, so we | ||
924 | * need to make sure to invalidate the buffered pages when we're | ||
925 | * done. | ||
926 | */ | ||
927 | buffered = 1; | ||
928 | buf += num_written; | ||
929 | } | ||
930 | |||
931 | nrptrs = min((count + PAGE_CACHE_SIZE - 1) / PAGE_CACHE_SIZE, | ||
932 | PAGE_CACHE_SIZE / (sizeof(struct page *))); | ||
871 | pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); | 933 | pages = kmalloc(nrptrs * sizeof(struct page *), GFP_KERNEL); |
872 | 934 | ||
873 | /* generic_write_checks can change our pos */ | 935 | /* generic_write_checks can change our pos */ |
874 | start_pos = pos; | 936 | start_pos = pos; |
875 | 937 | ||
876 | BTRFS_I(inode)->sequence++; | ||
877 | first_index = pos >> PAGE_CACHE_SHIFT; | 938 | first_index = pos >> PAGE_CACHE_SHIFT; |
878 | last_index = (pos + count) >> PAGE_CACHE_SHIFT; | 939 | last_index = (pos + count) >> PAGE_CACHE_SHIFT; |
879 | 940 | ||
@@ -1007,7 +1068,7 @@ out: | |||
1007 | btrfs_end_transaction(trans, root); | 1068 | btrfs_end_transaction(trans, root); |
1008 | } | 1069 | } |
1009 | } | 1070 | } |
1010 | if (file->f_flags & O_DIRECT) { | 1071 | if (file->f_flags & O_DIRECT && buffered) { |
1011 | invalidate_mapping_pages(inode->i_mapping, | 1072 | invalidate_mapping_pages(inode->i_mapping, |
1012 | start_pos >> PAGE_CACHE_SHIFT, | 1073 | start_pos >> PAGE_CACHE_SHIFT, |
1013 | (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT); | 1074 | (start_pos + num_written - 1) >> PAGE_CACHE_SHIFT); |
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 460dd512eebd..1695440a59a4 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c | |||
@@ -698,6 +698,38 @@ retry: | |||
698 | return 0; | 698 | return 0; |
699 | } | 699 | } |
700 | 700 | ||
701 | static u64 get_extent_allocation_hint(struct inode *inode, u64 start, | ||
702 | u64 num_bytes) | ||
703 | { | ||
704 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | ||
705 | struct extent_map *em; | ||
706 | u64 alloc_hint = 0; | ||
707 | |||
708 | read_lock(&em_tree->lock); | ||
709 | em = search_extent_mapping(em_tree, start, num_bytes); | ||
710 | if (em) { | ||
711 | /* | ||
712 | * if block start isn't an actual block number then find the | ||
713 | * first block in this inode and use that as a hint. If that | ||
714 | * block is also bogus then just don't worry about it. | ||
715 | */ | ||
716 | if (em->block_start >= EXTENT_MAP_LAST_BYTE) { | ||
717 | free_extent_map(em); | ||
718 | em = search_extent_mapping(em_tree, 0, 0); | ||
719 | if (em && em->block_start < EXTENT_MAP_LAST_BYTE) | ||
720 | alloc_hint = em->block_start; | ||
721 | if (em) | ||
722 | free_extent_map(em); | ||
723 | } else { | ||
724 | alloc_hint = em->block_start; | ||
725 | free_extent_map(em); | ||
726 | } | ||
727 | } | ||
728 | read_unlock(&em_tree->lock); | ||
729 | |||
730 | return alloc_hint; | ||
731 | } | ||
732 | |||
701 | /* | 733 | /* |
702 | * when extent_io.c finds a delayed allocation range in the file, | 734 | * when extent_io.c finds a delayed allocation range in the file, |
703 | * the call backs end up in this code. The basic idea is to | 735 | * the call backs end up in this code. The basic idea is to |
@@ -770,29 +802,7 @@ static noinline int cow_file_range(struct inode *inode, | |||
770 | BUG_ON(disk_num_bytes > | 802 | BUG_ON(disk_num_bytes > |
771 | btrfs_super_total_bytes(&root->fs_info->super_copy)); | 803 | btrfs_super_total_bytes(&root->fs_info->super_copy)); |
772 | 804 | ||
773 | 805 | alloc_hint = get_extent_allocation_hint(inode, start, num_bytes); | |
774 | read_lock(&BTRFS_I(inode)->extent_tree.lock); | ||
775 | em = search_extent_mapping(&BTRFS_I(inode)->extent_tree, | ||
776 | start, num_bytes); | ||
777 | if (em) { | ||
778 | /* | ||
779 | * if block start isn't an actual block number then find the | ||
780 | * first block in this inode and use that as a hint. If that | ||
781 | * block is also bogus then just don't worry about it. | ||
782 | */ | ||
783 | if (em->block_start >= EXTENT_MAP_LAST_BYTE) { | ||
784 | free_extent_map(em); | ||
785 | em = search_extent_mapping(em_tree, 0, 0); | ||
786 | if (em && em->block_start < EXTENT_MAP_LAST_BYTE) | ||
787 | alloc_hint = em->block_start; | ||
788 | if (em) | ||
789 | free_extent_map(em); | ||
790 | } else { | ||
791 | alloc_hint = em->block_start; | ||
792 | free_extent_map(em); | ||
793 | } | ||
794 | } | ||
795 | read_unlock(&BTRFS_I(inode)->extent_tree.lock); | ||
796 | btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0); | 806 | btrfs_drop_extent_cache(inode, start, start + num_bytes - 1, 0); |
797 | 807 | ||
798 | while (disk_num_bytes > 0) { | 808 | while (disk_num_bytes > 0) { |
@@ -5171,11 +5181,440 @@ out: | |||
5171 | return em; | 5181 | return em; |
5172 | } | 5182 | } |
5173 | 5183 | ||
5184 | static struct extent_map *btrfs_new_extent_direct(struct inode *inode, | ||
5185 | u64 start, u64 len) | ||
5186 | { | ||
5187 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
5188 | struct btrfs_trans_handle *trans; | ||
5189 | struct extent_map *em; | ||
5190 | struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree; | ||
5191 | struct btrfs_key ins; | ||
5192 | u64 alloc_hint; | ||
5193 | int ret; | ||
5194 | |||
5195 | btrfs_drop_extent_cache(inode, start, start + len - 1, 0); | ||
5196 | |||
5197 | trans = btrfs_join_transaction(root, 0); | ||
5198 | if (!trans) | ||
5199 | return ERR_PTR(-ENOMEM); | ||
5200 | |||
5201 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | ||
5202 | |||
5203 | alloc_hint = get_extent_allocation_hint(inode, start, len); | ||
5204 | ret = btrfs_reserve_extent(trans, root, len, root->sectorsize, 0, | ||
5205 | alloc_hint, (u64)-1, &ins, 1); | ||
5206 | if (ret) { | ||
5207 | em = ERR_PTR(ret); | ||
5208 | goto out; | ||
5209 | } | ||
5210 | |||
5211 | em = alloc_extent_map(GFP_NOFS); | ||
5212 | if (!em) { | ||
5213 | em = ERR_PTR(-ENOMEM); | ||
5214 | goto out; | ||
5215 | } | ||
5216 | |||
5217 | em->start = start; | ||
5218 | em->orig_start = em->start; | ||
5219 | em->len = ins.offset; | ||
5220 | |||
5221 | em->block_start = ins.objectid; | ||
5222 | em->block_len = ins.offset; | ||
5223 | em->bdev = root->fs_info->fs_devices->latest_bdev; | ||
5224 | set_bit(EXTENT_FLAG_PINNED, &em->flags); | ||
5225 | |||
5226 | while (1) { | ||
5227 | write_lock(&em_tree->lock); | ||
5228 | ret = add_extent_mapping(em_tree, em); | ||
5229 | write_unlock(&em_tree->lock); | ||
5230 | if (ret != -EEXIST) | ||
5231 | break; | ||
5232 | btrfs_drop_extent_cache(inode, start, start + em->len - 1, 0); | ||
5233 | } | ||
5234 | |||
5235 | ret = btrfs_add_ordered_extent_dio(inode, start, ins.objectid, | ||
5236 | ins.offset, ins.offset, 0); | ||
5237 | if (ret) { | ||
5238 | btrfs_free_reserved_extent(root, ins.objectid, ins.offset); | ||
5239 | em = ERR_PTR(ret); | ||
5240 | } | ||
5241 | out: | ||
5242 | btrfs_end_transaction(trans, root); | ||
5243 | return em; | ||
5244 | } | ||
5245 | |||
5246 | static int btrfs_get_blocks_direct(struct inode *inode, sector_t iblock, | ||
5247 | struct buffer_head *bh_result, int create) | ||
5248 | { | ||
5249 | struct extent_map *em; | ||
5250 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
5251 | u64 start = iblock << inode->i_blkbits; | ||
5252 | u64 len = bh_result->b_size; | ||
5253 | |||
5254 | em = btrfs_get_extent(inode, NULL, 0, start, len, 0); | ||
5255 | if (IS_ERR(em)) | ||
5256 | return PTR_ERR(em); | ||
5257 | |||
5258 | /* | ||
5259 | * Ok for INLINE and COMPRESSED extents we need to fallback on buffered | ||
5260 | * io. INLINE is special, and we could probably kludge it in here, but | ||
5261 | * it's still buffered so for safety lets just fall back to the generic | ||
5262 | * buffered path. | ||
5263 | * | ||
5264 | * For COMPRESSED we _have_ to read the entire extent in so we can | ||
5265 | * decompress it, so there will be buffering required no matter what we | ||
5266 | * do, so go ahead and fallback to buffered. | ||
5267 | * | ||
5268 | * We return -ENOTBLK because thats what makes DIO go ahead and go back | ||
5269 | * to buffered IO. Don't blame me, this is the price we pay for using | ||
5270 | * the generic code. | ||
5271 | */ | ||
5272 | if (test_bit(EXTENT_FLAG_COMPRESSED, &em->flags) || | ||
5273 | em->block_start == EXTENT_MAP_INLINE) { | ||
5274 | free_extent_map(em); | ||
5275 | return -ENOTBLK; | ||
5276 | } | ||
5277 | |||
5278 | /* Just a good old fashioned hole, return */ | ||
5279 | if (!create && (em->block_start == EXTENT_MAP_HOLE || | ||
5280 | test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) { | ||
5281 | free_extent_map(em); | ||
5282 | /* DIO will do one hole at a time, so just unlock a sector */ | ||
5283 | unlock_extent(&BTRFS_I(inode)->io_tree, start, | ||
5284 | start + root->sectorsize - 1, GFP_NOFS); | ||
5285 | return 0; | ||
5286 | } | ||
5287 | |||
5288 | /* | ||
5289 | * We don't allocate a new extent in the following cases | ||
5290 | * | ||
5291 | * 1) The inode is marked as NODATACOW. In this case we'll just use the | ||
5292 | * existing extent. | ||
5293 | * 2) The extent is marked as PREALLOC. We're good to go here and can | ||
5294 | * just use the extent. | ||
5295 | * | ||
5296 | */ | ||
5297 | if (!create) | ||
5298 | goto map; | ||
5299 | |||
5300 | if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags) || | ||
5301 | ((BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW) && | ||
5302 | em->block_start != EXTENT_MAP_HOLE)) { | ||
5303 | u64 block_start; | ||
5304 | int type; | ||
5305 | int ret; | ||
5306 | |||
5307 | if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) | ||
5308 | type = BTRFS_ORDERED_PREALLOC; | ||
5309 | else | ||
5310 | type = BTRFS_ORDERED_NOCOW; | ||
5311 | len = min(len, em->block_len - (start - em->start)); | ||
5312 | block_start = em->block_start + (start - em->start); | ||
5313 | ret = btrfs_add_ordered_extent_dio(inode, start, | ||
5314 | start, len, len, type); | ||
5315 | if (ret) { | ||
5316 | free_extent_map(em); | ||
5317 | return ret; | ||
5318 | } | ||
5319 | } else { | ||
5320 | free_extent_map(em); | ||
5321 | em = btrfs_new_extent_direct(inode, start, len); | ||
5322 | if (IS_ERR(em)) | ||
5323 | return PTR_ERR(em); | ||
5324 | len = min(len, em->block_len); | ||
5325 | } | ||
5326 | unlock_extent(&BTRFS_I(inode)->io_tree, start, start + len - 1, | ||
5327 | GFP_NOFS); | ||
5328 | map: | ||
5329 | bh_result->b_blocknr = (em->block_start + (start - em->start)) >> | ||
5330 | inode->i_blkbits; | ||
5331 | bh_result->b_size = em->len - (start - em->start); | ||
5332 | bh_result->b_bdev = em->bdev; | ||
5333 | set_buffer_mapped(bh_result); | ||
5334 | if (create && !test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) | ||
5335 | set_buffer_new(bh_result); | ||
5336 | |||
5337 | free_extent_map(em); | ||
5338 | |||
5339 | return 0; | ||
5340 | } | ||
5341 | |||
5342 | struct btrfs_dio_private { | ||
5343 | struct inode *inode; | ||
5344 | u64 logical_offset; | ||
5345 | u64 disk_bytenr; | ||
5346 | u64 bytes; | ||
5347 | u32 *csums; | ||
5348 | void *private; | ||
5349 | }; | ||
5350 | |||
5351 | static void btrfs_endio_direct_read(struct bio *bio, int err) | ||
5352 | { | ||
5353 | struct bio_vec *bvec_end = bio->bi_io_vec + bio->bi_vcnt - 1; | ||
5354 | struct bio_vec *bvec = bio->bi_io_vec; | ||
5355 | struct btrfs_dio_private *dip = bio->bi_private; | ||
5356 | struct inode *inode = dip->inode; | ||
5357 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
5358 | u64 start; | ||
5359 | u32 *private = dip->csums; | ||
5360 | |||
5361 | start = dip->logical_offset; | ||
5362 | do { | ||
5363 | if (!(BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)) { | ||
5364 | struct page *page = bvec->bv_page; | ||
5365 | char *kaddr; | ||
5366 | u32 csum = ~(u32)0; | ||
5367 | unsigned long flags; | ||
5368 | |||
5369 | local_irq_save(flags); | ||
5370 | kaddr = kmap_atomic(page, KM_IRQ0); | ||
5371 | csum = btrfs_csum_data(root, kaddr + bvec->bv_offset, | ||
5372 | csum, bvec->bv_len); | ||
5373 | btrfs_csum_final(csum, (char *)&csum); | ||
5374 | kunmap_atomic(kaddr, KM_IRQ0); | ||
5375 | local_irq_restore(flags); | ||
5376 | |||
5377 | flush_dcache_page(bvec->bv_page); | ||
5378 | if (csum != *private) { | ||
5379 | printk(KERN_ERR "btrfs csum failed ino %lu off" | ||
5380 | " %llu csum %u private %u\n", | ||
5381 | inode->i_ino, (unsigned long long)start, | ||
5382 | csum, *private); | ||
5383 | err = -EIO; | ||
5384 | } | ||
5385 | } | ||
5386 | |||
5387 | start += bvec->bv_len; | ||
5388 | private++; | ||
5389 | bvec++; | ||
5390 | } while (bvec <= bvec_end); | ||
5391 | |||
5392 | unlock_extent(&BTRFS_I(inode)->io_tree, dip->logical_offset, | ||
5393 | dip->logical_offset + dip->bytes - 1, GFP_NOFS); | ||
5394 | bio->bi_private = dip->private; | ||
5395 | |||
5396 | kfree(dip->csums); | ||
5397 | kfree(dip); | ||
5398 | dio_end_io(bio, err); | ||
5399 | } | ||
5400 | |||
5401 | static void btrfs_endio_direct_write(struct bio *bio, int err) | ||
5402 | { | ||
5403 | struct btrfs_dio_private *dip = bio->bi_private; | ||
5404 | struct inode *inode = dip->inode; | ||
5405 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
5406 | struct btrfs_trans_handle *trans; | ||
5407 | struct btrfs_ordered_extent *ordered = NULL; | ||
5408 | struct extent_state *cached_state = NULL; | ||
5409 | int ret; | ||
5410 | |||
5411 | if (err) | ||
5412 | goto out_done; | ||
5413 | |||
5414 | ret = btrfs_dec_test_ordered_pending(inode, &ordered, | ||
5415 | dip->logical_offset, dip->bytes); | ||
5416 | if (!ret) | ||
5417 | goto out_done; | ||
5418 | |||
5419 | BUG_ON(!ordered); | ||
5420 | |||
5421 | trans = btrfs_join_transaction(root, 1); | ||
5422 | if (!trans) { | ||
5423 | err = -ENOMEM; | ||
5424 | goto out; | ||
5425 | } | ||
5426 | trans->block_rsv = &root->fs_info->delalloc_block_rsv; | ||
5427 | |||
5428 | if (test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) { | ||
5429 | ret = btrfs_ordered_update_i_size(inode, 0, ordered); | ||
5430 | if (!ret) | ||
5431 | ret = btrfs_update_inode(trans, root, inode); | ||
5432 | err = ret; | ||
5433 | goto out; | ||
5434 | } | ||
5435 | |||
5436 | lock_extent_bits(&BTRFS_I(inode)->io_tree, ordered->file_offset, | ||
5437 | ordered->file_offset + ordered->len - 1, 0, | ||
5438 | &cached_state, GFP_NOFS); | ||
5439 | |||
5440 | if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags)) { | ||
5441 | ret = btrfs_mark_extent_written(trans, inode, | ||
5442 | ordered->file_offset, | ||
5443 | ordered->file_offset + | ||
5444 | ordered->len); | ||
5445 | if (ret) { | ||
5446 | err = ret; | ||
5447 | goto out_unlock; | ||
5448 | } | ||
5449 | } else { | ||
5450 | ret = insert_reserved_file_extent(trans, inode, | ||
5451 | ordered->file_offset, | ||
5452 | ordered->start, | ||
5453 | ordered->disk_len, | ||
5454 | ordered->len, | ||
5455 | ordered->len, | ||
5456 | 0, 0, 0, | ||
5457 | BTRFS_FILE_EXTENT_REG); | ||
5458 | unpin_extent_cache(&BTRFS_I(inode)->extent_tree, | ||
5459 | ordered->file_offset, ordered->len); | ||
5460 | if (ret) { | ||
5461 | err = ret; | ||
5462 | WARN_ON(1); | ||
5463 | goto out_unlock; | ||
5464 | } | ||
5465 | } | ||
5466 | |||
5467 | add_pending_csums(trans, inode, ordered->file_offset, &ordered->list); | ||
5468 | btrfs_ordered_update_i_size(inode, 0, ordered); | ||
5469 | btrfs_update_inode(trans, root, inode); | ||
5470 | out_unlock: | ||
5471 | unlock_extent_cached(&BTRFS_I(inode)->io_tree, ordered->file_offset, | ||
5472 | ordered->file_offset + ordered->len - 1, | ||
5473 | &cached_state, GFP_NOFS); | ||
5474 | out: | ||
5475 | btrfs_delalloc_release_metadata(inode, ordered->len); | ||
5476 | btrfs_end_transaction(trans, root); | ||
5477 | btrfs_put_ordered_extent(ordered); | ||
5478 | btrfs_put_ordered_extent(ordered); | ||
5479 | out_done: | ||
5480 | bio->bi_private = dip->private; | ||
5481 | |||
5482 | kfree(dip->csums); | ||
5483 | kfree(dip); | ||
5484 | dio_end_io(bio, err); | ||
5485 | } | ||
5486 | |||
5487 | static void btrfs_submit_direct(int rw, struct bio *bio, struct inode *inode, | ||
5488 | loff_t file_offset) | ||
5489 | { | ||
5490 | struct btrfs_root *root = BTRFS_I(inode)->root; | ||
5491 | struct btrfs_dio_private *dip; | ||
5492 | struct bio_vec *bvec = bio->bi_io_vec; | ||
5493 | u64 start; | ||
5494 | int skip_sum; | ||
5495 | int write = rw & (1 << BIO_RW); | ||
5496 | int ret = 0; | ||
5497 | |||
5498 | skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM; | ||
5499 | |||
5500 | dip = kmalloc(sizeof(*dip), GFP_NOFS); | ||
5501 | if (!dip) { | ||
5502 | ret = -ENOMEM; | ||
5503 | goto free_ordered; | ||
5504 | } | ||
5505 | dip->csums = NULL; | ||
5506 | |||
5507 | if (!skip_sum) { | ||
5508 | dip->csums = kmalloc(sizeof(u32) * bio->bi_vcnt, GFP_NOFS); | ||
5509 | if (!dip->csums) { | ||
5510 | ret = -ENOMEM; | ||
5511 | goto free_ordered; | ||
5512 | } | ||
5513 | } | ||
5514 | |||
5515 | dip->private = bio->bi_private; | ||
5516 | dip->inode = inode; | ||
5517 | dip->logical_offset = file_offset; | ||
5518 | |||
5519 | start = dip->logical_offset; | ||
5520 | dip->bytes = 0; | ||
5521 | do { | ||
5522 | dip->bytes += bvec->bv_len; | ||
5523 | bvec++; | ||
5524 | } while (bvec <= (bio->bi_io_vec + bio->bi_vcnt - 1)); | ||
5525 | |||
5526 | dip->disk_bytenr = bio->bi_sector << 9; | ||
5527 | bio->bi_private = dip; | ||
5528 | |||
5529 | if (write) | ||
5530 | bio->bi_end_io = btrfs_endio_direct_write; | ||
5531 | else | ||
5532 | bio->bi_end_io = btrfs_endio_direct_read; | ||
5533 | |||
5534 | ret = btrfs_bio_wq_end_io(root->fs_info, bio, 0); | ||
5535 | if (ret) | ||
5536 | goto out_err; | ||
5537 | |||
5538 | if (write && !skip_sum) | ||
5539 | btrfs_csum_one_bio(root, inode, bio, dip->logical_offset, 1); | ||
5540 | else if (!skip_sum) | ||
5541 | btrfs_lookup_bio_sums_dio(root, inode, bio, | ||
5542 | dip->logical_offset, dip->csums); | ||
5543 | |||
5544 | ret = btrfs_map_bio(root, rw, bio, 0, 0); | ||
5545 | if (ret) | ||
5546 | goto out_err; | ||
5547 | return; | ||
5548 | out_err: | ||
5549 | kfree(dip->csums); | ||
5550 | kfree(dip); | ||
5551 | free_ordered: | ||
5552 | /* | ||
5553 | * If this is a write, we need to clean up the reserved space and kill | ||
5554 | * the ordered extent. | ||
5555 | */ | ||
5556 | if (write) { | ||
5557 | struct btrfs_ordered_extent *ordered; | ||
5558 | ordered = btrfs_lookup_ordered_extent(inode, | ||
5559 | dip->logical_offset); | ||
5560 | if (!test_bit(BTRFS_ORDERED_PREALLOC, &ordered->flags) && | ||
5561 | !test_bit(BTRFS_ORDERED_NOCOW, &ordered->flags)) | ||
5562 | btrfs_free_reserved_extent(root, ordered->start, | ||
5563 | ordered->disk_len); | ||
5564 | btrfs_put_ordered_extent(ordered); | ||
5565 | btrfs_put_ordered_extent(ordered); | ||
5566 | } | ||
5567 | bio_endio(bio, ret); | ||
5568 | } | ||
5569 | |||
5174 | static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, | 5570 | static ssize_t btrfs_direct_IO(int rw, struct kiocb *iocb, |
5175 | const struct iovec *iov, loff_t offset, | 5571 | const struct iovec *iov, loff_t offset, |
5176 | unsigned long nr_segs) | 5572 | unsigned long nr_segs) |
5177 | { | 5573 | { |
5178 | return -EINVAL; | 5574 | struct file *file = iocb->ki_filp; |
5575 | struct inode *inode = file->f_mapping->host; | ||
5576 | struct btrfs_ordered_extent *ordered; | ||
5577 | u64 lockstart, lockend; | ||
5578 | ssize_t ret; | ||
5579 | |||
5580 | lockstart = offset; | ||
5581 | lockend = offset + iov_length(iov, nr_segs) - 1; | ||
5582 | while (1) { | ||
5583 | lock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend, | ||
5584 | GFP_NOFS); | ||
5585 | /* | ||
5586 | * We're concerned with the entire range that we're going to be | ||
5587 | * doing DIO to, so we need to make sure theres no ordered | ||
5588 | * extents in this range. | ||
5589 | */ | ||
5590 | ordered = btrfs_lookup_ordered_range(inode, lockstart, | ||
5591 | lockend - lockstart + 1); | ||
5592 | if (!ordered) | ||
5593 | break; | ||
5594 | unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend, | ||
5595 | GFP_NOFS); | ||
5596 | btrfs_start_ordered_extent(inode, ordered, 1); | ||
5597 | btrfs_put_ordered_extent(ordered); | ||
5598 | cond_resched(); | ||
5599 | } | ||
5600 | |||
5601 | ret = __blockdev_direct_IO(rw, iocb, inode, NULL, iov, offset, nr_segs, | ||
5602 | btrfs_get_blocks_direct, NULL, | ||
5603 | btrfs_submit_direct, 0); | ||
5604 | |||
5605 | if (ret < 0 && ret != -EIOCBQUEUED) { | ||
5606 | unlock_extent(&BTRFS_I(inode)->io_tree, offset, | ||
5607 | offset + iov_length(iov, nr_segs) - 1, GFP_NOFS); | ||
5608 | } else if (ret >= 0 && ret < iov_length(iov, nr_segs)) { | ||
5609 | /* | ||
5610 | * We're falling back to buffered, unlock the section we didn't | ||
5611 | * do IO on. | ||
5612 | */ | ||
5613 | unlock_extent(&BTRFS_I(inode)->io_tree, offset + ret, | ||
5614 | offset + iov_length(iov, nr_segs) - 1, GFP_NOFS); | ||
5615 | } | ||
5616 | |||
5617 | return ret; | ||
5179 | } | 5618 | } |
5180 | 5619 | ||
5181 | static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, | 5620 | static int btrfs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo, |
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index c9f1020572f2..e56c72bc5add 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c | |||
@@ -124,6 +124,15 @@ static int offset_in_entry(struct btrfs_ordered_extent *entry, u64 file_offset) | |||
124 | return 1; | 124 | return 1; |
125 | } | 125 | } |
126 | 126 | ||
127 | static int range_overlaps(struct btrfs_ordered_extent *entry, u64 file_offset, | ||
128 | u64 len) | ||
129 | { | ||
130 | if (file_offset + len <= entry->file_offset || | ||
131 | entry->file_offset + entry->len <= file_offset) | ||
132 | return 0; | ||
133 | return 1; | ||
134 | } | ||
135 | |||
127 | /* | 136 | /* |
128 | * look find the first ordered struct that has this offset, otherwise | 137 | * look find the first ordered struct that has this offset, otherwise |
129 | * the first one less than this offset | 138 | * the first one less than this offset |
@@ -161,8 +170,9 @@ static inline struct rb_node *tree_search(struct btrfs_ordered_inode_tree *tree, | |||
161 | * The tree is given a single reference on the ordered extent that was | 170 | * The tree is given a single reference on the ordered extent that was |
162 | * inserted. | 171 | * inserted. |
163 | */ | 172 | */ |
164 | int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | 173 | static int __btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, |
165 | u64 start, u64 len, u64 disk_len, int type) | 174 | u64 start, u64 len, u64 disk_len, |
175 | int type, int dio) | ||
166 | { | 176 | { |
167 | struct btrfs_ordered_inode_tree *tree; | 177 | struct btrfs_ordered_inode_tree *tree; |
168 | struct rb_node *node; | 178 | struct rb_node *node; |
@@ -182,6 +192,9 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | |||
182 | if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) | 192 | if (type != BTRFS_ORDERED_IO_DONE && type != BTRFS_ORDERED_COMPLETE) |
183 | set_bit(type, &entry->flags); | 193 | set_bit(type, &entry->flags); |
184 | 194 | ||
195 | if (dio) | ||
196 | set_bit(BTRFS_ORDERED_DIRECT, &entry->flags); | ||
197 | |||
185 | /* one ref for the tree */ | 198 | /* one ref for the tree */ |
186 | atomic_set(&entry->refs, 1); | 199 | atomic_set(&entry->refs, 1); |
187 | init_waitqueue_head(&entry->wait); | 200 | init_waitqueue_head(&entry->wait); |
@@ -203,6 +216,20 @@ int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | |||
203 | return 0; | 216 | return 0; |
204 | } | 217 | } |
205 | 218 | ||
219 | int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | ||
220 | u64 start, u64 len, u64 disk_len, int type) | ||
221 | { | ||
222 | return __btrfs_add_ordered_extent(inode, file_offset, start, len, | ||
223 | disk_len, type, 0); | ||
224 | } | ||
225 | |||
226 | int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset, | ||
227 | u64 start, u64 len, u64 disk_len, int type) | ||
228 | { | ||
229 | return __btrfs_add_ordered_extent(inode, file_offset, start, len, | ||
230 | disk_len, type, 1); | ||
231 | } | ||
232 | |||
206 | /* | 233 | /* |
207 | * Add a struct btrfs_ordered_sum into the list of checksums to be inserted | 234 | * Add a struct btrfs_ordered_sum into the list of checksums to be inserted |
208 | * when an ordered extent is finished. If the list covers more than one | 235 | * when an ordered extent is finished. If the list covers more than one |
@@ -484,7 +511,8 @@ void btrfs_start_ordered_extent(struct inode *inode, | |||
484 | * start IO on any dirty ones so the wait doesn't stall waiting | 511 | * start IO on any dirty ones so the wait doesn't stall waiting |
485 | * for pdflush to find them | 512 | * for pdflush to find them |
486 | */ | 513 | */ |
487 | filemap_fdatawrite_range(inode->i_mapping, start, end); | 514 | if (!test_bit(BTRFS_ORDERED_DIRECT, &entry->flags)) |
515 | filemap_fdatawrite_range(inode->i_mapping, start, end); | ||
488 | if (wait) { | 516 | if (wait) { |
489 | wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE, | 517 | wait_event(entry->wait, test_bit(BTRFS_ORDERED_COMPLETE, |
490 | &entry->flags)); | 518 | &entry->flags)); |
@@ -581,6 +609,47 @@ out: | |||
581 | return entry; | 609 | return entry; |
582 | } | 610 | } |
583 | 611 | ||
612 | /* Since the DIO code tries to lock a wide area we need to look for any ordered | ||
613 | * extents that exist in the range, rather than just the start of the range. | ||
614 | */ | ||
615 | struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode, | ||
616 | u64 file_offset, | ||
617 | u64 len) | ||
618 | { | ||
619 | struct btrfs_ordered_inode_tree *tree; | ||
620 | struct rb_node *node; | ||
621 | struct btrfs_ordered_extent *entry = NULL; | ||
622 | |||
623 | tree = &BTRFS_I(inode)->ordered_tree; | ||
624 | spin_lock(&tree->lock); | ||
625 | node = tree_search(tree, file_offset); | ||
626 | if (!node) { | ||
627 | node = tree_search(tree, file_offset + len); | ||
628 | if (!node) | ||
629 | goto out; | ||
630 | } | ||
631 | |||
632 | while (1) { | ||
633 | entry = rb_entry(node, struct btrfs_ordered_extent, rb_node); | ||
634 | if (range_overlaps(entry, file_offset, len)) | ||
635 | break; | ||
636 | |||
637 | if (entry->file_offset >= file_offset + len) { | ||
638 | entry = NULL; | ||
639 | break; | ||
640 | } | ||
641 | entry = NULL; | ||
642 | node = rb_next(node); | ||
643 | if (!node) | ||
644 | break; | ||
645 | } | ||
646 | out: | ||
647 | if (entry) | ||
648 | atomic_inc(&entry->refs); | ||
649 | spin_unlock(&tree->lock); | ||
650 | return entry; | ||
651 | } | ||
652 | |||
584 | /* | 653 | /* |
585 | * lookup and return any extent before 'file_offset'. NULL is returned | 654 | * lookup and return any extent before 'file_offset'. NULL is returned |
586 | * if none is found | 655 | * if none is found |
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h index c82f76a9f040..8ac365492a3f 100644 --- a/fs/btrfs/ordered-data.h +++ b/fs/btrfs/ordered-data.h | |||
@@ -72,6 +72,8 @@ struct btrfs_ordered_sum { | |||
72 | 72 | ||
73 | #define BTRFS_ORDERED_PREALLOC 4 /* set when writing to prealloced extent */ | 73 | #define BTRFS_ORDERED_PREALLOC 4 /* set when writing to prealloced extent */ |
74 | 74 | ||
75 | #define BTRFS_ORDERED_DIRECT 5 /* set when we're doing DIO with this extent */ | ||
76 | |||
75 | struct btrfs_ordered_extent { | 77 | struct btrfs_ordered_extent { |
76 | /* logical offset in the file */ | 78 | /* logical offset in the file */ |
77 | u64 file_offset; | 79 | u64 file_offset; |
@@ -140,7 +142,9 @@ int btrfs_dec_test_ordered_pending(struct inode *inode, | |||
140 | struct btrfs_ordered_extent **cached, | 142 | struct btrfs_ordered_extent **cached, |
141 | u64 file_offset, u64 io_size); | 143 | u64 file_offset, u64 io_size); |
142 | int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, | 144 | int btrfs_add_ordered_extent(struct inode *inode, u64 file_offset, |
143 | u64 start, u64 len, u64 disk_len, int tyep); | 145 | u64 start, u64 len, u64 disk_len, int type); |
146 | int btrfs_add_ordered_extent_dio(struct inode *inode, u64 file_offset, | ||
147 | u64 start, u64 len, u64 disk_len, int type); | ||
144 | int btrfs_add_ordered_sum(struct inode *inode, | 148 | int btrfs_add_ordered_sum(struct inode *inode, |
145 | struct btrfs_ordered_extent *entry, | 149 | struct btrfs_ordered_extent *entry, |
146 | struct btrfs_ordered_sum *sum); | 150 | struct btrfs_ordered_sum *sum); |
@@ -151,6 +155,9 @@ void btrfs_start_ordered_extent(struct inode *inode, | |||
151 | int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len); | 155 | int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len); |
152 | struct btrfs_ordered_extent * | 156 | struct btrfs_ordered_extent * |
153 | btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset); | 157 | btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset); |
158 | struct btrfs_ordered_extent *btrfs_lookup_ordered_range(struct inode *inode, | ||
159 | u64 file_offset, | ||
160 | u64 len); | ||
154 | int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, | 161 | int btrfs_ordered_update_i_size(struct inode *inode, u64 offset, |
155 | struct btrfs_ordered_extent *ordered); | 162 | struct btrfs_ordered_extent *ordered); |
156 | int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum); | 163 | int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum); |