aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJosef Bacik <josef@redhat.com>2010-05-23 11:00:55 -0400
committerChris Mason <chris.mason@oracle.com>2010-05-25 10:34:55 -0400
commitfacd07b07d2a7988f5ce849558838cc953847637 (patch)
tree269200329390f450d2bc7f0858a8ed114a418374
parent66f998f611897319b555364cefd5d6e88a205866 (diff)
direct-io: add a hook for the fs to provide its own submit_bio function
Because BTRFS can do RAID and such, we need our own submit hook so we can setup the bio's in the correct fashion, and handle checksum errors properly. So there are a few changes here 1) The submit_io hook. This is straightforward, just call this instead of submit_bio. 2) Allow the fs to return -ENOTBLK for reads. Usually this has only worked for writes, since writes can fallback onto buffered IO. But BTRFS needs the option of falling back on buffered IO if it encounters a compressed extent, since we need to read the entire extent in and decompress it. So if we get -ENOTBLK back from get_block we'll return back and fallback on buffered just like the write case. I've tested these changes with fsx and everything seems to work. Thanks, Signed-off-by: Josef Bacik <josef@redhat.com> Signed-off-by: Chris Mason <chris.mason@oracle.com>
-rw-r--r--fs/direct-io.c42
-rw-r--r--include/linux/fs.h11
2 files changed, 45 insertions, 8 deletions
diff --git a/fs/direct-io.c b/fs/direct-io.c
index e82adc2debb7..5949947b060a 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -82,6 +82,8 @@ struct dio {
82 int reap_counter; /* rate limit reaping */ 82 int reap_counter; /* rate limit reaping */
83 get_block_t *get_block; /* block mapping function */ 83 get_block_t *get_block; /* block mapping function */
84 dio_iodone_t *end_io; /* IO completion function */ 84 dio_iodone_t *end_io; /* IO completion function */
85 dio_submit_t *submit_io; /* IO submition function */
86 loff_t logical_offset_in_bio; /* current first logical block in bio */
85 sector_t final_block_in_bio; /* current final block in bio + 1 */ 87 sector_t final_block_in_bio; /* current final block in bio + 1 */
86 sector_t next_block_for_io; /* next block to be put under IO, 88 sector_t next_block_for_io; /* next block to be put under IO,
87 in dio_blocks units */ 89 in dio_blocks units */
@@ -96,6 +98,7 @@ struct dio {
96 unsigned cur_page_offset; /* Offset into it, in bytes */ 98 unsigned cur_page_offset; /* Offset into it, in bytes */
97 unsigned cur_page_len; /* Nr of bytes at cur_page_offset */ 99 unsigned cur_page_len; /* Nr of bytes at cur_page_offset */
98 sector_t cur_page_block; /* Where it starts */ 100 sector_t cur_page_block; /* Where it starts */
101 loff_t cur_page_fs_offset; /* Offset in file */
99 102
100 /* BIO completion state */ 103 /* BIO completion state */
101 spinlock_t bio_lock; /* protects BIO fields below */ 104 spinlock_t bio_lock; /* protects BIO fields below */
@@ -300,6 +303,26 @@ static void dio_bio_end_io(struct bio *bio, int error)
300 spin_unlock_irqrestore(&dio->bio_lock, flags); 303 spin_unlock_irqrestore(&dio->bio_lock, flags);
301} 304}
302 305
306/**
307 * dio_end_io - handle the end io action for the given bio
308 * @bio: The direct io bio thats being completed
309 * @error: Error if there was one
310 *
311 * This is meant to be called by any filesystem that uses their own dio_submit_t
312 * so that the DIO specific endio actions are dealt with after the filesystem
313 * has done it's completion work.
314 */
315void dio_end_io(struct bio *bio, int error)
316{
317 struct dio *dio = bio->bi_private;
318
319 if (dio->is_async)
320 dio_bio_end_aio(bio, error);
321 else
322 dio_bio_end_io(bio, error);
323}
324EXPORT_SYMBOL_GPL(dio_end_io);
325
303static int 326static int
304dio_bio_alloc(struct dio *dio, struct block_device *bdev, 327dio_bio_alloc(struct dio *dio, struct block_device *bdev,
305 sector_t first_sector, int nr_vecs) 328 sector_t first_sector, int nr_vecs)
@@ -316,6 +339,7 @@ dio_bio_alloc(struct dio *dio, struct block_device *bdev,
316 bio->bi_end_io = dio_bio_end_io; 339 bio->bi_end_io = dio_bio_end_io;
317 340
318 dio->bio = bio; 341 dio->bio = bio;
342 dio->logical_offset_in_bio = dio->cur_page_fs_offset;
319 return 0; 343 return 0;
320} 344}
321 345
@@ -340,10 +364,15 @@ static void dio_bio_submit(struct dio *dio)
340 if (dio->is_async && dio->rw == READ) 364 if (dio->is_async && dio->rw == READ)
341 bio_set_pages_dirty(bio); 365 bio_set_pages_dirty(bio);
342 366
343 submit_bio(dio->rw, bio); 367 if (dio->submit_io)
368 dio->submit_io(dio->rw, bio, dio->inode,
369 dio->logical_offset_in_bio);
370 else
371 submit_bio(dio->rw, bio);
344 372
345 dio->bio = NULL; 373 dio->bio = NULL;
346 dio->boundary = 0; 374 dio->boundary = 0;
375 dio->logical_offset_in_bio = 0;
347} 376}
348 377
349/* 378/*
@@ -701,6 +730,7 @@ submit_page_section(struct dio *dio, struct page *page,
701 dio->cur_page_offset = offset; 730 dio->cur_page_offset = offset;
702 dio->cur_page_len = len; 731 dio->cur_page_len = len;
703 dio->cur_page_block = blocknr; 732 dio->cur_page_block = blocknr;
733 dio->cur_page_fs_offset = dio->block_in_file << dio->blkbits;
704out: 734out:
705 return ret; 735 return ret;
706} 736}
@@ -935,7 +965,7 @@ static ssize_t
935direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, 965direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
936 const struct iovec *iov, loff_t offset, unsigned long nr_segs, 966 const struct iovec *iov, loff_t offset, unsigned long nr_segs,
937 unsigned blkbits, get_block_t get_block, dio_iodone_t end_io, 967 unsigned blkbits, get_block_t get_block, dio_iodone_t end_io,
938 struct dio *dio) 968 dio_submit_t submit_io, struct dio *dio)
939{ 969{
940 unsigned long user_addr; 970 unsigned long user_addr;
941 unsigned long flags; 971 unsigned long flags;
@@ -952,6 +982,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
952 982
953 dio->get_block = get_block; 983 dio->get_block = get_block;
954 dio->end_io = end_io; 984 dio->end_io = end_io;
985 dio->submit_io = submit_io;
955 dio->final_block_in_bio = -1; 986 dio->final_block_in_bio = -1;
956 dio->next_block_for_io = -1; 987 dio->next_block_for_io = -1;
957 988
@@ -1008,7 +1039,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode,
1008 } 1039 }
1009 } /* end iovec loop */ 1040 } /* end iovec loop */
1010 1041
1011 if (ret == -ENOTBLK && (rw & WRITE)) { 1042 if (ret == -ENOTBLK) {
1012 /* 1043 /*
1013 * The remaining part of the request will be 1044 * The remaining part of the request will be
1014 * be handled by buffered I/O when we return 1045 * be handled by buffered I/O when we return
@@ -1110,7 +1141,7 @@ ssize_t
1110__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, 1141__blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1111 struct block_device *bdev, const struct iovec *iov, loff_t offset, 1142 struct block_device *bdev, const struct iovec *iov, loff_t offset,
1112 unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, 1143 unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
1113 int flags) 1144 dio_submit_t submit_io, int flags)
1114{ 1145{
1115 int seg; 1146 int seg;
1116 size_t size; 1147 size_t size;
@@ -1197,7 +1228,8 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
1197 (end > i_size_read(inode))); 1228 (end > i_size_read(inode)));
1198 1229
1199 retval = direct_io_worker(rw, iocb, inode, iov, offset, 1230 retval = direct_io_worker(rw, iocb, inode, iov, offset,
1200 nr_segs, blkbits, get_block, end_io, dio); 1231 nr_segs, blkbits, get_block, end_io,
1232 submit_io, dio);
1201 1233
1202 /* 1234 /*
1203 * In case of error extending write may have instantiated a few 1235 * In case of error extending write may have instantiated a few
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 44f35aea2f1f..10704f0086c8 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2250,10 +2250,15 @@ static inline int xip_truncate_page(struct address_space *mapping, loff_t from)
2250#endif 2250#endif
2251 2251
2252#ifdef CONFIG_BLOCK 2252#ifdef CONFIG_BLOCK
2253struct bio;
2254typedef void (dio_submit_t)(int rw, struct bio *bio, struct inode *inode,
2255 loff_t file_offset);
2256void dio_end_io(struct bio *bio, int error);
2257
2253ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, 2258ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode,
2254 struct block_device *bdev, const struct iovec *iov, loff_t offset, 2259 struct block_device *bdev, const struct iovec *iov, loff_t offset,
2255 unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, 2260 unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io,
2256 int lock_type); 2261 dio_submit_t submit_io, int lock_type);
2257 2262
2258enum { 2263enum {
2259 /* need locking between buffered and direct access */ 2264 /* need locking between buffered and direct access */
@@ -2269,7 +2274,7 @@ static inline ssize_t blockdev_direct_IO(int rw, struct kiocb *iocb,
2269 dio_iodone_t end_io) 2274 dio_iodone_t end_io)
2270{ 2275{
2271 return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset, 2276 return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset,
2272 nr_segs, get_block, end_io, 2277 nr_segs, get_block, end_io, NULL,
2273 DIO_LOCKING | DIO_SKIP_HOLES); 2278 DIO_LOCKING | DIO_SKIP_HOLES);
2274} 2279}
2275 2280
@@ -2279,7 +2284,7 @@ static inline ssize_t blockdev_direct_IO_no_locking(int rw, struct kiocb *iocb,
2279 dio_iodone_t end_io) 2284 dio_iodone_t end_io)
2280{ 2285{
2281 return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset, 2286 return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset,
2282 nr_segs, get_block, end_io, 0); 2287 nr_segs, get_block, end_io, NULL, 0);
2283} 2288}
2284#endif 2289#endif
2285 2290