diff options
Diffstat (limited to 'fs/direct-io.c')
-rw-r--r-- | fs/direct-io.c | 103 |
1 files changed, 69 insertions, 34 deletions
diff --git a/fs/direct-io.c b/fs/direct-io.c index e82adc2debb7..51f270b479b6 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c | |||
@@ -82,6 +82,8 @@ struct dio { | |||
82 | int reap_counter; /* rate limit reaping */ | 82 | int reap_counter; /* rate limit reaping */ |
83 | get_block_t *get_block; /* block mapping function */ | 83 | get_block_t *get_block; /* block mapping function */ |
84 | dio_iodone_t *end_io; /* IO completion function */ | 84 | dio_iodone_t *end_io; /* IO completion function */ |
85 | dio_submit_t *submit_io; /* IO submition function */ | ||
86 | loff_t logical_offset_in_bio; /* current first logical block in bio */ | ||
85 | sector_t final_block_in_bio; /* current final block in bio + 1 */ | 87 | sector_t final_block_in_bio; /* current final block in bio + 1 */ |
86 | sector_t next_block_for_io; /* next block to be put under IO, | 88 | sector_t next_block_for_io; /* next block to be put under IO, |
87 | in dio_blocks units */ | 89 | in dio_blocks units */ |
@@ -96,6 +98,7 @@ struct dio { | |||
96 | unsigned cur_page_offset; /* Offset into it, in bytes */ | 98 | unsigned cur_page_offset; /* Offset into it, in bytes */ |
97 | unsigned cur_page_len; /* Nr of bytes at cur_page_offset */ | 99 | unsigned cur_page_len; /* Nr of bytes at cur_page_offset */ |
98 | sector_t cur_page_block; /* Where it starts */ | 100 | sector_t cur_page_block; /* Where it starts */ |
101 | loff_t cur_page_fs_offset; /* Offset in file */ | ||
99 | 102 | ||
100 | /* BIO completion state */ | 103 | /* BIO completion state */ |
101 | spinlock_t bio_lock; /* protects BIO fields below */ | 104 | spinlock_t bio_lock; /* protects BIO fields below */ |
@@ -215,7 +218,7 @@ static struct page *dio_get_page(struct dio *dio) | |||
215 | * filesystems can use it to hold additional state between get_block calls and | 218 | * filesystems can use it to hold additional state between get_block calls and |
216 | * dio_complete. | 219 | * dio_complete. |
217 | */ | 220 | */ |
218 | static int dio_complete(struct dio *dio, loff_t offset, int ret) | 221 | static int dio_complete(struct dio *dio, loff_t offset, int ret, bool is_async) |
219 | { | 222 | { |
220 | ssize_t transferred = 0; | 223 | ssize_t transferred = 0; |
221 | 224 | ||
@@ -236,14 +239,6 @@ static int dio_complete(struct dio *dio, loff_t offset, int ret) | |||
236 | transferred = dio->i_size - offset; | 239 | transferred = dio->i_size - offset; |
237 | } | 240 | } |
238 | 241 | ||
239 | if (dio->end_io && dio->result) | ||
240 | dio->end_io(dio->iocb, offset, transferred, | ||
241 | dio->map_bh.b_private); | ||
242 | |||
243 | if (dio->flags & DIO_LOCKING) | ||
244 | /* lockdep: non-owner release */ | ||
245 | up_read_non_owner(&dio->inode->i_alloc_sem); | ||
246 | |||
247 | if (ret == 0) | 242 | if (ret == 0) |
248 | ret = dio->page_errors; | 243 | ret = dio->page_errors; |
249 | if (ret == 0) | 244 | if (ret == 0) |
@@ -251,6 +246,17 @@ static int dio_complete(struct dio *dio, loff_t offset, int ret) | |||
251 | if (ret == 0) | 246 | if (ret == 0) |
252 | ret = transferred; | 247 | ret = transferred; |
253 | 248 | ||
249 | if (dio->end_io && dio->result) { | ||
250 | dio->end_io(dio->iocb, offset, transferred, | ||
251 | dio->map_bh.b_private, ret, is_async); | ||
252 | } else if (is_async) { | ||
253 | aio_complete(dio->iocb, ret, 0); | ||
254 | } | ||
255 | |||
256 | if (dio->flags & DIO_LOCKING) | ||
257 | /* lockdep: non-owner release */ | ||
258 | up_read_non_owner(&dio->inode->i_alloc_sem); | ||
259 | |||
254 | return ret; | 260 | return ret; |
255 | } | 261 | } |
256 | 262 | ||
@@ -274,8 +280,7 @@ static void dio_bio_end_aio(struct bio *bio, int error) | |||
274 | spin_unlock_irqrestore(&dio->bio_lock, flags); | 280 | spin_unlock_irqrestore(&dio->bio_lock, flags); |
275 | 281 | ||
276 | if (remaining == 0) { | 282 | if (remaining == 0) { |
277 | int ret = dio_complete(dio, dio->iocb->ki_pos, 0); | 283 | dio_complete(dio, dio->iocb->ki_pos, 0, true); |
278 | aio_complete(dio->iocb, ret, 0); | ||
279 | kfree(dio); | 284 | kfree(dio); |
280 | } | 285 | } |
281 | } | 286 | } |
@@ -300,6 +305,26 @@ static void dio_bio_end_io(struct bio *bio, int error) | |||
300 | spin_unlock_irqrestore(&dio->bio_lock, flags); | 305 | spin_unlock_irqrestore(&dio->bio_lock, flags); |
301 | } | 306 | } |
302 | 307 | ||
308 | /** | ||
309 | * dio_end_io - handle the end io action for the given bio | ||
310 | * @bio: The direct io bio thats being completed | ||
311 | * @error: Error if there was one | ||
312 | * | ||
313 | * This is meant to be called by any filesystem that uses their own dio_submit_t | ||
314 | * so that the DIO specific endio actions are dealt with after the filesystem | ||
315 | * has done it's completion work. | ||
316 | */ | ||
317 | void dio_end_io(struct bio *bio, int error) | ||
318 | { | ||
319 | struct dio *dio = bio->bi_private; | ||
320 | |||
321 | if (dio->is_async) | ||
322 | dio_bio_end_aio(bio, error); | ||
323 | else | ||
324 | dio_bio_end_io(bio, error); | ||
325 | } | ||
326 | EXPORT_SYMBOL_GPL(dio_end_io); | ||
327 | |||
303 | static int | 328 | static int |
304 | dio_bio_alloc(struct dio *dio, struct block_device *bdev, | 329 | dio_bio_alloc(struct dio *dio, struct block_device *bdev, |
305 | sector_t first_sector, int nr_vecs) | 330 | sector_t first_sector, int nr_vecs) |
@@ -316,6 +341,7 @@ dio_bio_alloc(struct dio *dio, struct block_device *bdev, | |||
316 | bio->bi_end_io = dio_bio_end_io; | 341 | bio->bi_end_io = dio_bio_end_io; |
317 | 342 | ||
318 | dio->bio = bio; | 343 | dio->bio = bio; |
344 | dio->logical_offset_in_bio = dio->cur_page_fs_offset; | ||
319 | return 0; | 345 | return 0; |
320 | } | 346 | } |
321 | 347 | ||
@@ -340,10 +366,15 @@ static void dio_bio_submit(struct dio *dio) | |||
340 | if (dio->is_async && dio->rw == READ) | 366 | if (dio->is_async && dio->rw == READ) |
341 | bio_set_pages_dirty(bio); | 367 | bio_set_pages_dirty(bio); |
342 | 368 | ||
343 | submit_bio(dio->rw, bio); | 369 | if (dio->submit_io) |
370 | dio->submit_io(dio->rw, bio, dio->inode, | ||
371 | dio->logical_offset_in_bio); | ||
372 | else | ||
373 | submit_bio(dio->rw, bio); | ||
344 | 374 | ||
345 | dio->bio = NULL; | 375 | dio->bio = NULL; |
346 | dio->boundary = 0; | 376 | dio->boundary = 0; |
377 | dio->logical_offset_in_bio = 0; | ||
347 | } | 378 | } |
348 | 379 | ||
349 | /* | 380 | /* |
@@ -603,10 +634,26 @@ static int dio_send_cur_page(struct dio *dio) | |||
603 | int ret = 0; | 634 | int ret = 0; |
604 | 635 | ||
605 | if (dio->bio) { | 636 | if (dio->bio) { |
637 | loff_t cur_offset = dio->block_in_file << dio->blkbits; | ||
638 | loff_t bio_next_offset = dio->logical_offset_in_bio + | ||
639 | dio->bio->bi_size; | ||
640 | |||
606 | /* | 641 | /* |
607 | * See whether this new request is contiguous with the old | 642 | * See whether this new request is contiguous with the old. |
643 | * | ||
644 | * Btrfs cannot handl having logically non-contiguous requests | ||
645 | * submitted. For exmple if you have | ||
646 | * | ||
647 | * Logical: [0-4095][HOLE][8192-12287] | ||
648 | * Phyiscal: [0-4095] [4096-8181] | ||
649 | * | ||
650 | * We cannot submit those pages together as one BIO. So if our | ||
651 | * current logical offset in the file does not equal what would | ||
652 | * be the next logical offset in the bio, submit the bio we | ||
653 | * have. | ||
608 | */ | 654 | */ |
609 | if (dio->final_block_in_bio != dio->cur_page_block) | 655 | if (dio->final_block_in_bio != dio->cur_page_block || |
656 | cur_offset != bio_next_offset) | ||
610 | dio_bio_submit(dio); | 657 | dio_bio_submit(dio); |
611 | /* | 658 | /* |
612 | * Submit now if the underlying fs is about to perform a | 659 | * Submit now if the underlying fs is about to perform a |
@@ -701,6 +748,7 @@ submit_page_section(struct dio *dio, struct page *page, | |||
701 | dio->cur_page_offset = offset; | 748 | dio->cur_page_offset = offset; |
702 | dio->cur_page_len = len; | 749 | dio->cur_page_len = len; |
703 | dio->cur_page_block = blocknr; | 750 | dio->cur_page_block = blocknr; |
751 | dio->cur_page_fs_offset = dio->block_in_file << dio->blkbits; | ||
704 | out: | 752 | out: |
705 | return ret; | 753 | return ret; |
706 | } | 754 | } |
@@ -935,7 +983,7 @@ static ssize_t | |||
935 | direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, | 983 | direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, |
936 | const struct iovec *iov, loff_t offset, unsigned long nr_segs, | 984 | const struct iovec *iov, loff_t offset, unsigned long nr_segs, |
937 | unsigned blkbits, get_block_t get_block, dio_iodone_t end_io, | 985 | unsigned blkbits, get_block_t get_block, dio_iodone_t end_io, |
938 | struct dio *dio) | 986 | dio_submit_t submit_io, struct dio *dio) |
939 | { | 987 | { |
940 | unsigned long user_addr; | 988 | unsigned long user_addr; |
941 | unsigned long flags; | 989 | unsigned long flags; |
@@ -952,6 +1000,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, | |||
952 | 1000 | ||
953 | dio->get_block = get_block; | 1001 | dio->get_block = get_block; |
954 | dio->end_io = end_io; | 1002 | dio->end_io = end_io; |
1003 | dio->submit_io = submit_io; | ||
955 | dio->final_block_in_bio = -1; | 1004 | dio->final_block_in_bio = -1; |
956 | dio->next_block_for_io = -1; | 1005 | dio->next_block_for_io = -1; |
957 | 1006 | ||
@@ -1008,7 +1057,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, | |||
1008 | } | 1057 | } |
1009 | } /* end iovec loop */ | 1058 | } /* end iovec loop */ |
1010 | 1059 | ||
1011 | if (ret == -ENOTBLK && (rw & WRITE)) { | 1060 | if (ret == -ENOTBLK) { |
1012 | /* | 1061 | /* |
1013 | * The remaining part of the request will be | 1062 | * The remaining part of the request will be |
1014 | * be handled by buffered I/O when we return | 1063 | * be handled by buffered I/O when we return |
@@ -1079,7 +1128,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, | |||
1079 | spin_unlock_irqrestore(&dio->bio_lock, flags); | 1128 | spin_unlock_irqrestore(&dio->bio_lock, flags); |
1080 | 1129 | ||
1081 | if (ret2 == 0) { | 1130 | if (ret2 == 0) { |
1082 | ret = dio_complete(dio, offset, ret); | 1131 | ret = dio_complete(dio, offset, ret, false); |
1083 | kfree(dio); | 1132 | kfree(dio); |
1084 | } else | 1133 | } else |
1085 | BUG_ON(ret != -EIOCBQUEUED); | 1134 | BUG_ON(ret != -EIOCBQUEUED); |
@@ -1110,7 +1159,7 @@ ssize_t | |||
1110 | __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, | 1159 | __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, |
1111 | struct block_device *bdev, const struct iovec *iov, loff_t offset, | 1160 | struct block_device *bdev, const struct iovec *iov, loff_t offset, |
1112 | unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, | 1161 | unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, |
1113 | int flags) | 1162 | dio_submit_t submit_io, int flags) |
1114 | { | 1163 | { |
1115 | int seg; | 1164 | int seg; |
1116 | size_t size; | 1165 | size_t size; |
@@ -1197,22 +1246,8 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, | |||
1197 | (end > i_size_read(inode))); | 1246 | (end > i_size_read(inode))); |
1198 | 1247 | ||
1199 | retval = direct_io_worker(rw, iocb, inode, iov, offset, | 1248 | retval = direct_io_worker(rw, iocb, inode, iov, offset, |
1200 | nr_segs, blkbits, get_block, end_io, dio); | 1249 | nr_segs, blkbits, get_block, end_io, |
1201 | 1250 | submit_io, dio); | |
1202 | /* | ||
1203 | * In case of error extending write may have instantiated a few | ||
1204 | * blocks outside i_size. Trim these off again for DIO_LOCKING. | ||
1205 | * | ||
1206 | * NOTE: filesystems with their own locking have to handle this | ||
1207 | * on their own. | ||
1208 | */ | ||
1209 | if (flags & DIO_LOCKING) { | ||
1210 | if (unlikely((rw & WRITE) && retval < 0)) { | ||
1211 | loff_t isize = i_size_read(inode); | ||
1212 | if (end > isize) | ||
1213 | vmtruncate(inode, isize); | ||
1214 | } | ||
1215 | } | ||
1216 | 1251 | ||
1217 | out: | 1252 | out: |
1218 | return retval; | 1253 | return retval; |