aboutsummaryrefslogtreecommitdiffstats
path: root/fs/direct-io.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/direct-io.c')
-rw-r--r--fs/direct-io.c103
1 files changed, 69 insertions, 34 deletions
diff --git a/fs/direct-io.c b/fs/direct-io.c
index e82adc2debb7..51f270b479b6 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -82,6 +82,8 @@ struct dio {
82 int reap_counter; /* rate limit reaping */ 82 int reap_counter; /* rate limit reaping */
83 get_block_t *get_block; /* block mapping function */ 83 get_block_t *get_block; /* block mapping function */
84 dio_iodone_t *end_io; /* IO completion function */ 84 dio_iodone_t *end_io; /* IO completion function */
85 dio_submit_t *submit_io; /* IO submition function */
86 loff_t logical_offset_in_bio; /* current first logical block in bio */
85 sector_t final_block_in_bio; /* current final block in bio + 1 */ 87 sector_t final_block_in_bio; /* current final block in bio + 1 */
86 sector_t next_block_for_io; /* next block to be put under IO, 88 sector_t next_block_for_io; /* next block to be put under IO,
87 in dio_blocks units */ 89 in dio_blocks units */
@@ -96,6 +98,7 @@ struct dio {
96 unsigned cur_page_offset; /* Offset into it, in bytes */ 98 unsigned cur_page_offset; /* Offset into it, in bytes */
97 unsigned cur_page_len; /* Nr of bytes at cur_page_offset */ 99 unsigned cur_page_len; /* Nr of bytes at cur_page_offset */
98 sector_t cur_page_block; /* Where it starts */ 100 sector_t cur_page_block; /* Where it starts */
101 loff_t cur_page_fs_offset; /* Offset in file */
99 102
100 /* BIO completion state */ 103 /* BIO completion state */
101 spinlock_t bio_lock; /* protects BIO fields below */ 104 spinlock_t bio_lock; /* protects BIO fields below */
@@ -215,7 +218,7 @@ static struct page *dio_get_page(struct dio *dio)
215 * filesystems can use it to hold additional state between get_block calls and 218 * filesystems can use it to hold additional state between get_block calls and
216 * dio_complete. 219 * dio_complete.
217 */ 220 */
218static int dio_complete(struct dio *dio, loff_t offset, int ret) 221static int dio_complete(struct dio *dio, loff_t offset, int ret, bool is_async)
219{ 222{
220 ssize_t transferred = 0; 223 ssize_t transferred = 0;
221 224
@@ -236,14 +239,6 @@ static int dio_complete(struct dio *dio, loff_t offset, int ret)
236 transferred = dio->i_size - offset; 239 transferred = dio->i_size - offset;
237 } 240 }
238 241
239 if (dio->end_io && dio->result)
240 dio->end_io(dio->iocb, offset, transferred,
241 dio->map_bh.b_private);
242
243 if (dio->flags & DIO_LOCKING)
244 /* lockdep: non-owner release */
245 up_read_non_owner(&dio->inode->i_alloc_sem);
246
247 if (ret == 0) 242 if (ret == 0)
248 ret = dio->page_errors; 243 ret = dio->page_errors;
249 if (ret == 0) 244 if (ret == 0)
@@ -251,6 +246,17 @@ static int dio_complete(struct dio *dio, loff_t offset, int ret)
251 if (ret == 0) 246 if (ret == 0)
252 ret = transferred; 247 ret = transferred;
253 248
249 if (dio->end_io && dio->result) {
250 dio->end_io(dio->iocb, offset, transferred,
251 dio->map_bh.b_private, ret, is_async);
252 } else if (is_async) {
253 aio_complete(dio->iocb, ret, 0);
254 }
255
256 if (dio->flags & DIO_LOCKING)
257 /* lockdep: non-owner release */
258 up_read_non_owner(&dio->inode->i_alloc_sem);
259
254 return ret; 260 return ret;
255} 261}
256 262
@@ -274,8 +280,7 @@ static void dio_bio_end_aio(struct bio *bio, int error)
274 spin_unlock_irqrestore(&dio->bio_lock, flags); 280 spin_unlock_irqrestore(&dio->bio_lock, flags);
275 281
276 if (remaining == 0) { 282 if (remaining == 0) {
277 int ret = dio_complete(dio, dio->iocb->ki_pos, 0); 283 dio_complete(dio, dio->iocb->ki_pos, 0, true);
278 aio_complete(dio->iocb, ret, 0);
279 kfree(dio); 284 kfree(dio);
280 } 285 }
281} 286}
@@ -300,6 +305,26 @@ static void dio_bio_end_io(struct bio *bio, int error)
300 spin_unlock_irqrestore(&dio->bio_lock, flags); 305 spin_unlock_irqrestore(&dio->bio_lock, flags);
301} 306}
302 307
308/**
309 * dio_end_io - handle the end io action for the given bio
310 * @bio: The direct io bio thats being completed
311 * @error: Error if there was one
312 *
313 * This is meant to be called by any filesystem that uses their own dio_submit_t
314 * so that the DIO specific endio actions are dealt with after the filesystem
315 * has done it's completion work.
316 */
317void dio_end_io(struct bio *bio, int error)
318{
319 struct dio *dio = bio->bi_private;
320
321 if (dio->is_async)
322 dio_bio_end_aio(bio, error);
323 else
324 dio_bio_end_io(bio, error);
325}
326EXPORT_SYMBOL_GPL(dio_end_io);
327
303static int 328static int
304dio_bio_alloc(struct dio *dio, struct block_device *bdev, 329dio_bio_alloc(struct dio *dio, struct block_device *bdev,
305 sector_t first_sector, int nr_vecs) 330 sector_t first_sector, int nr_vecs)
@@ -316,6 +341,7 @@ dio_bio_alloc(struct dio *dio, struct block_device *bdev,
316 bio->bi_end_io = dio_bio_end_io; 341 bio->bi_end_io = dio_bio_end_io;
317 342
318 dio->bio = bio; 343 dio->bio = bio;
344 dio->logical_offset_in_bio = dio->cur_page_fs_offset;
319 return 0; 345 return 0;
320} 346}
321 347
@@ -340,10 +366,15 @@ static void dio_bio_submit(struct dio *dio)
340 if (dio->is_async && dio->rw == READ) 366 if (dio->is_async && dio->rw == READ)
341 bio_set_pages_dirty(bio); 367 bio_set_pages_dirty(bio);
342 368
343 submit_bio(dio->rw, bio); 369 if (dio->submit_io)
370 dio->submit_io(dio->rw, bio, dio->inode,
371 dio->logical_offset_in_bio);
372 else
373 submit_bio(dio->rw, bio);
344 374
345 dio->bio = NULL; 375 dio->bio = NULL;
346 dio->boundary = 0; 376 dio->boundary = 0;
377 dio->logical_offset_in_bio = 0;
347} 378}
348 379
349/* 380/*
@@ -603,10 +634,26 @@ static int dio_send_cur_page(struct dio *dio)
603 int ret = 0; 634 int ret = 0;
604 635
605 if (dio->bio) { 636 if (dio->bio) {
637 loff_t cur_offset = dio->block_in_file << dio->blkbits;
638 loff_t bio_next_offset = dio->logical_offset_in_bio +
639 dio->bio->bi_size;
640
606 /* 641 /*
607 * See whether this new request is contiguous with the old 642 * See whether this new request is contiguous with the old.
643 *
644 * Btrfs cannot handl having logically non-contiguous requests
645 * submitted. For exmple if you have
646 *
647 * Logical: [0-4095][HOLE][8192-12287]
648 * Phyiscal: [0-4095] [4096-8181]
649 *
650 * We cannot submit those pages together as one BIO. So if our
651 * current logical offset in the file does not equal what would
652 * be the next logical offset in the bio, submit the bio we
653 * have.
608 */ 654 */
609 if (dio->final_block_in_bio != dio->cur_page_block) 655 if (dio->final_block_in_bio != dio->cur_page_block ||
656 cur_offset != bio_next_offset)
610 dio_bio_submit(dio); 657 dio_bio_submit(dio);
611 /* 658 /*
612 * Submit now if the underlying fs is about to perform a 659 * Submit now if the underlying fs is about to perform a
@@ -701,6 +748,7 @@ submit_page_section(struct dio *dio, struct page *page,
701 dio->cur_page_offset = offset; 748 dio->cur_page_offset = offset;
702 dio->cur_page_len = len; 749 dio->cur_page_len = len;
703 dio->cur_page_block = blocknr; 750 dio->cur_page_block = blocknr;
751 dio->cur_page_fs_offset = dio->block_in_file << dio->blkbits;
704out: 752out:
705 return ret; 753 return ret;
706} 754}
@@ -935,7 +983,7 @@ static ssize_t
935direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, 983direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
936 const struct iovec *iov, loff_t offset, unsigned long nr_segs, 984 const struct iovec *iov, loff_t offset, unsigned long nr_segs,
937 unsigned blkbits, get_block_t get_block, dio_iodone_t end_io, 985 unsigned blkbits, get_block_t get_block, dio_iodone_t end_io,
938 struct dio *dio) 986 dio_submit_t submit_io, struct dio *dio)
939{ 987{
940 unsigned long user_addr; 988 unsigned long user_addr;
941 unsigned long flags; 989 unsigned long flags;
@@ -952,6 +1000,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
952 1000
953 dio->get_block = get_block; 1001 dio->get_block = get_block;
954 dio->end_io = end_io; 1002 dio->end_io = end_io;
1003 dio->submit_io = submit_io;
955 dio->final_block_in_bio = -1; 1004 dio->final_block_in_bio = -1;
956 dio->next_block_for_io = -1; 1005 dio->next_block_for_io = -1;
957 1006
@@ -1008,7 +1057,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
1008 } 1057 }
1009 } /* end iovec loop */ 1058 } /* end iovec loop */
1010 1059
1011 if (ret == -ENOTBLK && (rw & WRITE)) { 1060 if (ret == -ENOTBLK) {
1012 /* 1061 /*
1013 * The remaining part of the request will be 1062 * The remaining part of the request will be
1014 * be handled by buffered I/O when we return 1063 * be handled by buffered I/O when we return
@@ -1079,7 +1128,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
1079 spin_unlock_irqrestore(&dio->bio_lock, flags); 1128 spin_unlock_irqrestore(&dio->bio_lock, flags);
1080 1129
1081 if (ret2 == 0) { 1130 if (ret2 == 0) {
1082 ret = dio_complete(dio, offset, ret); 1131 ret = dio_complete(dio, offset, ret, false);
1083 kfree(dio); 1132 kfree(dio);
1084 } else 1133 } else
1085 BUG_ON(ret != -EIOCBQUEUED); 1134 BUG_ON(ret != -EIOCBQUEUED);
@@ -1110,7 +1159,7 @@ ssize_t
1110__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, 1159__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1111 struct block_device *bdev, const struct iovec *iov, loff_t offset, 1160 struct block_device *bdev, const struct iovec *iov, loff_t offset,
1112 unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, 1161 unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
1113 int flags) 1162 dio_submit_t submit_io, int flags)
1114{ 1163{
1115 int seg; 1164 int seg;
1116 size_t size; 1165 size_t size;
@@ -1197,22 +1246,8 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1197 (end > i_size_read(inode))); 1246 (end > i_size_read(inode)));
1198 1247
1199 retval = direct_io_worker(rw, iocb, inode, iov, offset, 1248 retval = direct_io_worker(rw, iocb, inode, iov, offset,
1200 nr_segs, blkbits, get_block, end_io, dio); 1249 nr_segs, blkbits, get_block, end_io,
1201 1250 submit_io, dio);
1202 /*
1203 * In case of error extending write may have instantiated a few
1204 * blocks outside i_size. Trim these off again for DIO_LOCKING.
1205 *
1206 * NOTE: filesystems with their own locking have to handle this
1207 * on their own.
1208 */
1209 if (flags & DIO_LOCKING) {
1210 if (unlikely((rw & WRITE) && retval < 0)) {
1211 loff_t isize = i_size_read(inode);
1212 if (end > isize)
1213 vmtruncate(inode, isize);
1214 }
1215 }
1216 1251
1217out: 1252out:
1218 return retval; 1253 return retval;