diff options
author | Josef Bacik <josef@redhat.com> | 2010-05-23 11:00:55 -0400 |
---|---|---|
committer | Chris Mason <chris.mason@oracle.com> | 2010-05-25 10:34:55 -0400 |
commit | facd07b07d2a7988f5ce849558838cc953847637 (patch) | |
tree | 269200329390f450d2bc7f0858a8ed114a418374 | |
parent | 66f998f611897319b555364cefd5d6e88a205866 (diff) |
direct-io: add a hook for the fs to provide its own submit_bio function
Because BTRFS can do RAID and such, we need our own submit hook so we can setup
the bio's in the correct fashion, and handle checksum errors properly. So there
are a few changes here
1) The submit_io hook. This is straightforward, just call this instead of
submit_bio.
2) Allow the fs to return -ENOTBLK for reads. Usually this has only worked for
writes, since writes can fallback onto buffered IO. But BTRFS needs the option
of falling back on buffered IO if it encounters a compressed extent, since we
need to read the entire extent in and decompress it. So if we get -ENOTBLK back
from get_block we'll return back and fallback on buffered just like the write
case.
I've tested these changes with fsx and everything seems to work. Thanks,
Signed-off-by: Josef Bacik <josef@redhat.com>
Signed-off-by: Chris Mason <chris.mason@oracle.com>
-rw-r--r-- | fs/direct-io.c | 42 | ||||
-rw-r--r-- | include/linux/fs.h | 11 |
2 files changed, 45 insertions, 8 deletions
diff --git a/fs/direct-io.c b/fs/direct-io.c index e82adc2debb7..5949947b060a 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c | |||
@@ -82,6 +82,8 @@ struct dio { | |||
82 | int reap_counter; /* rate limit reaping */ | 82 | int reap_counter; /* rate limit reaping */ |
83 | get_block_t *get_block; /* block mapping function */ | 83 | get_block_t *get_block; /* block mapping function */ |
84 | dio_iodone_t *end_io; /* IO completion function */ | 84 | dio_iodone_t *end_io; /* IO completion function */ |
85 | dio_submit_t *submit_io; /* IO submition function */ | ||
86 | loff_t logical_offset_in_bio; /* current first logical block in bio */ | ||
85 | sector_t final_block_in_bio; /* current final block in bio + 1 */ | 87 | sector_t final_block_in_bio; /* current final block in bio + 1 */ |
86 | sector_t next_block_for_io; /* next block to be put under IO, | 88 | sector_t next_block_for_io; /* next block to be put under IO, |
87 | in dio_blocks units */ | 89 | in dio_blocks units */ |
@@ -96,6 +98,7 @@ struct dio { | |||
96 | unsigned cur_page_offset; /* Offset into it, in bytes */ | 98 | unsigned cur_page_offset; /* Offset into it, in bytes */ |
97 | unsigned cur_page_len; /* Nr of bytes at cur_page_offset */ | 99 | unsigned cur_page_len; /* Nr of bytes at cur_page_offset */ |
98 | sector_t cur_page_block; /* Where it starts */ | 100 | sector_t cur_page_block; /* Where it starts */ |
101 | loff_t cur_page_fs_offset; /* Offset in file */ | ||
99 | 102 | ||
100 | /* BIO completion state */ | 103 | /* BIO completion state */ |
101 | spinlock_t bio_lock; /* protects BIO fields below */ | 104 | spinlock_t bio_lock; /* protects BIO fields below */ |
@@ -300,6 +303,26 @@ static void dio_bio_end_io(struct bio *bio, int error) | |||
300 | spin_unlock_irqrestore(&dio->bio_lock, flags); | 303 | spin_unlock_irqrestore(&dio->bio_lock, flags); |
301 | } | 304 | } |
302 | 305 | ||
306 | /** | ||
307 | * dio_end_io - handle the end io action for the given bio | ||
308 | * @bio: The direct io bio thats being completed | ||
309 | * @error: Error if there was one | ||
310 | * | ||
311 | * This is meant to be called by any filesystem that uses their own dio_submit_t | ||
312 | * so that the DIO specific endio actions are dealt with after the filesystem | ||
313 | * has done it's completion work. | ||
314 | */ | ||
315 | void dio_end_io(struct bio *bio, int error) | ||
316 | { | ||
317 | struct dio *dio = bio->bi_private; | ||
318 | |||
319 | if (dio->is_async) | ||
320 | dio_bio_end_aio(bio, error); | ||
321 | else | ||
322 | dio_bio_end_io(bio, error); | ||
323 | } | ||
324 | EXPORT_SYMBOL_GPL(dio_end_io); | ||
325 | |||
303 | static int | 326 | static int |
304 | dio_bio_alloc(struct dio *dio, struct block_device *bdev, | 327 | dio_bio_alloc(struct dio *dio, struct block_device *bdev, |
305 | sector_t first_sector, int nr_vecs) | 328 | sector_t first_sector, int nr_vecs) |
@@ -316,6 +339,7 @@ dio_bio_alloc(struct dio *dio, struct block_device *bdev, | |||
316 | bio->bi_end_io = dio_bio_end_io; | 339 | bio->bi_end_io = dio_bio_end_io; |
317 | 340 | ||
318 | dio->bio = bio; | 341 | dio->bio = bio; |
342 | dio->logical_offset_in_bio = dio->cur_page_fs_offset; | ||
319 | return 0; | 343 | return 0; |
320 | } | 344 | } |
321 | 345 | ||
@@ -340,10 +364,15 @@ static void dio_bio_submit(struct dio *dio) | |||
340 | if (dio->is_async && dio->rw == READ) | 364 | if (dio->is_async && dio->rw == READ) |
341 | bio_set_pages_dirty(bio); | 365 | bio_set_pages_dirty(bio); |
342 | 366 | ||
343 | submit_bio(dio->rw, bio); | 367 | if (dio->submit_io) |
368 | dio->submit_io(dio->rw, bio, dio->inode, | ||
369 | dio->logical_offset_in_bio); | ||
370 | else | ||
371 | submit_bio(dio->rw, bio); | ||
344 | 372 | ||
345 | dio->bio = NULL; | 373 | dio->bio = NULL; |
346 | dio->boundary = 0; | 374 | dio->boundary = 0; |
375 | dio->logical_offset_in_bio = 0; | ||
347 | } | 376 | } |
348 | 377 | ||
349 | /* | 378 | /* |
@@ -701,6 +730,7 @@ submit_page_section(struct dio *dio, struct page *page, | |||
701 | dio->cur_page_offset = offset; | 730 | dio->cur_page_offset = offset; |
702 | dio->cur_page_len = len; | 731 | dio->cur_page_len = len; |
703 | dio->cur_page_block = blocknr; | 732 | dio->cur_page_block = blocknr; |
733 | dio->cur_page_fs_offset = dio->block_in_file << dio->blkbits; | ||
704 | out: | 734 | out: |
705 | return ret; | 735 | return ret; |
706 | } | 736 | } |
@@ -935,7 +965,7 @@ static ssize_t | |||
935 | direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, | 965 | direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, |
936 | const struct iovec *iov, loff_t offset, unsigned long nr_segs, | 966 | const struct iovec *iov, loff_t offset, unsigned long nr_segs, |
937 | unsigned blkbits, get_block_t get_block, dio_iodone_t end_io, | 967 | unsigned blkbits, get_block_t get_block, dio_iodone_t end_io, |
938 | struct dio *dio) | 968 | dio_submit_t submit_io, struct dio *dio) |
939 | { | 969 | { |
940 | unsigned long user_addr; | 970 | unsigned long user_addr; |
941 | unsigned long flags; | 971 | unsigned long flags; |
@@ -952,6 +982,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, | |||
952 | 982 | ||
953 | dio->get_block = get_block; | 983 | dio->get_block = get_block; |
954 | dio->end_io = end_io; | 984 | dio->end_io = end_io; |
985 | dio->submit_io = submit_io; | ||
955 | dio->final_block_in_bio = -1; | 986 | dio->final_block_in_bio = -1; |
956 | dio->next_block_for_io = -1; | 987 | dio->next_block_for_io = -1; |
957 | 988 | ||
@@ -1008,7 +1039,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, | |||
1008 | } | 1039 | } |
1009 | } /* end iovec loop */ | 1040 | } /* end iovec loop */ |
1010 | 1041 | ||
1011 | if (ret == -ENOTBLK && (rw & WRITE)) { | 1042 | if (ret == -ENOTBLK) { |
1012 | /* | 1043 | /* |
1013 | * The remaining part of the request will be | 1044 | * The remaining part of the request will be |
1014 | * be handled by buffered I/O when we return | 1045 | * be handled by buffered I/O when we return |
@@ -1110,7 +1141,7 @@ ssize_t | |||
1110 | __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, | 1141 | __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, |
1111 | struct block_device *bdev, const struct iovec *iov, loff_t offset, | 1142 | struct block_device *bdev, const struct iovec *iov, loff_t offset, |
1112 | unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, | 1143 | unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, |
1113 | int flags) | 1144 | dio_submit_t submit_io, int flags) |
1114 | { | 1145 | { |
1115 | int seg; | 1146 | int seg; |
1116 | size_t size; | 1147 | size_t size; |
@@ -1197,7 +1228,8 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, | |||
1197 | (end > i_size_read(inode))); | 1228 | (end > i_size_read(inode))); |
1198 | 1229 | ||
1199 | retval = direct_io_worker(rw, iocb, inode, iov, offset, | 1230 | retval = direct_io_worker(rw, iocb, inode, iov, offset, |
1200 | nr_segs, blkbits, get_block, end_io, dio); | 1231 | nr_segs, blkbits, get_block, end_io, |
1232 | submit_io, dio); | ||
1201 | 1233 | ||
1202 | /* | 1234 | /* |
1203 | * In case of error extending write may have instantiated a few | 1235 | * In case of error extending write may have instantiated a few |
diff --git a/include/linux/fs.h b/include/linux/fs.h index 44f35aea2f1f..10704f0086c8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h | |||
@@ -2250,10 +2250,15 @@ static inline int xip_truncate_page(struct address_space *mapping, loff_t from) | |||
2250 | #endif | 2250 | #endif |
2251 | 2251 | ||
2252 | #ifdef CONFIG_BLOCK | 2252 | #ifdef CONFIG_BLOCK |
2253 | struct bio; | ||
2254 | typedef void (dio_submit_t)(int rw, struct bio *bio, struct inode *inode, | ||
2255 | loff_t file_offset); | ||
2256 | void dio_end_io(struct bio *bio, int error); | ||
2257 | |||
2253 | ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, | 2258 | ssize_t __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, |
2254 | struct block_device *bdev, const struct iovec *iov, loff_t offset, | 2259 | struct block_device *bdev, const struct iovec *iov, loff_t offset, |
2255 | unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, | 2260 | unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, |
2256 | int lock_type); | 2261 | dio_submit_t submit_io, int lock_type); |
2257 | 2262 | ||
2258 | enum { | 2263 | enum { |
2259 | /* need locking between buffered and direct access */ | 2264 | /* need locking between buffered and direct access */ |
@@ -2269,7 +2274,7 @@ static inline ssize_t blockdev_direct_IO(int rw, struct kiocb *iocb, | |||
2269 | dio_iodone_t end_io) | 2274 | dio_iodone_t end_io) |
2270 | { | 2275 | { |
2271 | return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset, | 2276 | return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset, |
2272 | nr_segs, get_block, end_io, | 2277 | nr_segs, get_block, end_io, NULL, |
2273 | DIO_LOCKING | DIO_SKIP_HOLES); | 2278 | DIO_LOCKING | DIO_SKIP_HOLES); |
2274 | } | 2279 | } |
2275 | 2280 | ||
@@ -2279,7 +2284,7 @@ static inline ssize_t blockdev_direct_IO_no_locking(int rw, struct kiocb *iocb, | |||
2279 | dio_iodone_t end_io) | 2284 | dio_iodone_t end_io) |
2280 | { | 2285 | { |
2281 | return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset, | 2286 | return __blockdev_direct_IO(rw, iocb, inode, bdev, iov, offset, |
2282 | nr_segs, get_block, end_io, 0); | 2287 | nr_segs, get_block, end_io, NULL, 0); |
2283 | } | 2288 | } |
2284 | #endif | 2289 | #endif |
2285 | 2290 | ||