diff options
Diffstat (limited to 'fs/direct-io.c')
-rw-r--r-- | fs/direct-io.c | 40 |
1 files changed, 24 insertions, 16 deletions
diff --git a/fs/direct-io.c b/fs/direct-io.c index 27f3e787faca..910a8ed74b5d 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c | |||
@@ -86,12 +86,12 @@ struct dio { | |||
86 | unsigned first_block_in_page; /* doesn't change, Used only once */ | 86 | unsigned first_block_in_page; /* doesn't change, Used only once */ |
87 | int boundary; /* prev block is at a boundary */ | 87 | int boundary; /* prev block is at a boundary */ |
88 | int reap_counter; /* rate limit reaping */ | 88 | int reap_counter; /* rate limit reaping */ |
89 | get_blocks_t *get_blocks; /* block mapping function */ | 89 | get_block_t *get_block; /* block mapping function */ |
90 | dio_iodone_t *end_io; /* IO completion function */ | 90 | dio_iodone_t *end_io; /* IO completion function */ |
91 | sector_t final_block_in_bio; /* current final block in bio + 1 */ | 91 | sector_t final_block_in_bio; /* current final block in bio + 1 */ |
92 | sector_t next_block_for_io; /* next block to be put under IO, | 92 | sector_t next_block_for_io; /* next block to be put under IO, |
93 | in dio_blocks units */ | 93 | in dio_blocks units */ |
94 | struct buffer_head map_bh; /* last get_blocks() result */ | 94 | struct buffer_head map_bh; /* last get_block() result */ |
95 | 95 | ||
96 | /* | 96 | /* |
97 | * Deferred addition of a page to the dio. These variables are | 97 | * Deferred addition of a page to the dio. These variables are |
@@ -129,6 +129,7 @@ struct dio { | |||
129 | /* AIO related stuff */ | 129 | /* AIO related stuff */ |
130 | struct kiocb *iocb; /* kiocb */ | 130 | struct kiocb *iocb; /* kiocb */ |
131 | int is_async; /* is IO async ? */ | 131 | int is_async; /* is IO async ? */ |
132 | int io_error; /* IO error in completion path */ | ||
132 | ssize_t result; /* IO result */ | 133 | ssize_t result; /* IO result */ |
133 | }; | 134 | }; |
134 | 135 | ||
@@ -210,9 +211,9 @@ static struct page *dio_get_page(struct dio *dio) | |||
210 | 211 | ||
211 | /* | 212 | /* |
212 | * Called when all DIO BIO I/O has been completed - let the filesystem | 213 | * Called when all DIO BIO I/O has been completed - let the filesystem |
213 | * know, if it registered an interest earlier via get_blocks. Pass the | 214 | * know, if it registered an interest earlier via get_block. Pass the |
214 | * private field of the map buffer_head so that filesystems can use it | 215 | * private field of the map buffer_head so that filesystems can use it |
215 | * to hold additional state between get_blocks calls and dio_complete. | 216 | * to hold additional state between get_block calls and dio_complete. |
216 | */ | 217 | */ |
217 | static void dio_complete(struct dio *dio, loff_t offset, ssize_t bytes) | 218 | static void dio_complete(struct dio *dio, loff_t offset, ssize_t bytes) |
218 | { | 219 | { |
@@ -250,6 +251,10 @@ static void finished_one_bio(struct dio *dio) | |||
250 | ((offset + transferred) > dio->i_size)) | 251 | ((offset + transferred) > dio->i_size)) |
251 | transferred = dio->i_size - offset; | 252 | transferred = dio->i_size - offset; |
252 | 253 | ||
254 | /* check for error in completion path */ | ||
255 | if (dio->io_error) | ||
256 | transferred = dio->io_error; | ||
257 | |||
253 | dio_complete(dio, offset, transferred); | 258 | dio_complete(dio, offset, transferred); |
254 | 259 | ||
255 | /* Complete AIO later if falling back to buffered i/o */ | 260 | /* Complete AIO later if falling back to buffered i/o */ |
@@ -406,7 +411,7 @@ static int dio_bio_complete(struct dio *dio, struct bio *bio) | |||
406 | int page_no; | 411 | int page_no; |
407 | 412 | ||
408 | if (!uptodate) | 413 | if (!uptodate) |
409 | dio->result = -EIO; | 414 | dio->io_error = -EIO; |
410 | 415 | ||
411 | if (dio->is_async && dio->rw == READ) { | 416 | if (dio->is_async && dio->rw == READ) { |
412 | bio_check_pages_dirty(bio); /* transfers ownership */ | 417 | bio_check_pages_dirty(bio); /* transfers ownership */ |
@@ -488,7 +493,7 @@ static int dio_bio_reap(struct dio *dio) | |||
488 | * The fs is allowed to map lots of blocks at once. If it wants to do that, | 493 | * The fs is allowed to map lots of blocks at once. If it wants to do that, |
489 | * it uses the passed inode-relative block number as the file offset, as usual. | 494 | * it uses the passed inode-relative block number as the file offset, as usual. |
490 | * | 495 | * |
491 | * get_blocks() is passed the number of i_blkbits-sized blocks which direct_io | 496 | * get_block() is passed the number of i_blkbits-sized blocks which direct_io |
492 | * has remaining to do. The fs should not map more than this number of blocks. | 497 | * has remaining to do. The fs should not map more than this number of blocks. |
493 | * | 498 | * |
494 | * If the fs has mapped a lot of blocks, it should populate bh->b_size to | 499 | * If the fs has mapped a lot of blocks, it should populate bh->b_size to |
@@ -501,7 +506,7 @@ static int dio_bio_reap(struct dio *dio) | |||
501 | * In the case of filesystem holes: the fs may return an arbitrarily-large | 506 | * In the case of filesystem holes: the fs may return an arbitrarily-large |
502 | * hole by returning an appropriate value in b_size and by clearing | 507 | * hole by returning an appropriate value in b_size and by clearing |
503 | * buffer_mapped(). However the direct-io code will only process holes one | 508 | * buffer_mapped(). However the direct-io code will only process holes one |
504 | * block at a time - it will repeatedly call get_blocks() as it walks the hole. | 509 | * block at a time - it will repeatedly call get_block() as it walks the hole. |
505 | */ | 510 | */ |
506 | static int get_more_blocks(struct dio *dio) | 511 | static int get_more_blocks(struct dio *dio) |
507 | { | 512 | { |
@@ -519,8 +524,6 @@ static int get_more_blocks(struct dio *dio) | |||
519 | */ | 524 | */ |
520 | ret = dio->page_errors; | 525 | ret = dio->page_errors; |
521 | if (ret == 0) { | 526 | if (ret == 0) { |
522 | map_bh->b_state = 0; | ||
523 | map_bh->b_size = 0; | ||
524 | BUG_ON(dio->block_in_file >= dio->final_block_in_request); | 527 | BUG_ON(dio->block_in_file >= dio->final_block_in_request); |
525 | fs_startblk = dio->block_in_file >> dio->blkfactor; | 528 | fs_startblk = dio->block_in_file >> dio->blkfactor; |
526 | dio_count = dio->final_block_in_request - dio->block_in_file; | 529 | dio_count = dio->final_block_in_request - dio->block_in_file; |
@@ -529,6 +532,9 @@ static int get_more_blocks(struct dio *dio) | |||
529 | if (dio_count & blkmask) | 532 | if (dio_count & blkmask) |
530 | fs_count++; | 533 | fs_count++; |
531 | 534 | ||
535 | map_bh->b_state = 0; | ||
536 | map_bh->b_size = fs_count << dio->inode->i_blkbits; | ||
537 | |||
532 | create = dio->rw == WRITE; | 538 | create = dio->rw == WRITE; |
533 | if (dio->lock_type == DIO_LOCKING) { | 539 | if (dio->lock_type == DIO_LOCKING) { |
534 | if (dio->block_in_file < (i_size_read(dio->inode) >> | 540 | if (dio->block_in_file < (i_size_read(dio->inode) >> |
@@ -537,13 +543,14 @@ static int get_more_blocks(struct dio *dio) | |||
537 | } else if (dio->lock_type == DIO_NO_LOCKING) { | 543 | } else if (dio->lock_type == DIO_NO_LOCKING) { |
538 | create = 0; | 544 | create = 0; |
539 | } | 545 | } |
546 | |||
540 | /* | 547 | /* |
541 | * For writes inside i_size we forbid block creations: only | 548 | * For writes inside i_size we forbid block creations: only |
542 | * overwrites are permitted. We fall back to buffered writes | 549 | * overwrites are permitted. We fall back to buffered writes |
543 | * at a higher level for inside-i_size block-instantiating | 550 | * at a higher level for inside-i_size block-instantiating |
544 | * writes. | 551 | * writes. |
545 | */ | 552 | */ |
546 | ret = (*dio->get_blocks)(dio->inode, fs_startblk, fs_count, | 553 | ret = (*dio->get_block)(dio->inode, fs_startblk, |
547 | map_bh, create); | 554 | map_bh, create); |
548 | } | 555 | } |
549 | return ret; | 556 | return ret; |
@@ -778,11 +785,11 @@ static void dio_zero_block(struct dio *dio, int end) | |||
778 | * happily perform page-sized but 512-byte aligned IOs. It is important that | 785 | * happily perform page-sized but 512-byte aligned IOs. It is important that |
779 | * blockdev IO be able to have fine alignment and large sizes. | 786 | * blockdev IO be able to have fine alignment and large sizes. |
780 | * | 787 | * |
781 | * So what we do is to permit the ->get_blocks function to populate bh.b_size | 788 | * So what we do is to permit the ->get_block function to populate bh.b_size |
782 | * with the size of IO which is permitted at this offset and this i_blkbits. | 789 | * with the size of IO which is permitted at this offset and this i_blkbits. |
783 | * | 790 | * |
784 | * For best results, the blockdev should be set up with 512-byte i_blkbits and | 791 | * For best results, the blockdev should be set up with 512-byte i_blkbits and |
785 | * it should set b_size to PAGE_SIZE or more inside get_blocks(). This gives | 792 | * it should set b_size to PAGE_SIZE or more inside get_block(). This gives |
786 | * fine alignment but still allows this function to work in PAGE_SIZE units. | 793 | * fine alignment but still allows this function to work in PAGE_SIZE units. |
787 | */ | 794 | */ |
788 | static int do_direct_IO(struct dio *dio) | 795 | static int do_direct_IO(struct dio *dio) |
@@ -942,7 +949,7 @@ out: | |||
942 | static ssize_t | 949 | static ssize_t |
943 | direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, | 950 | direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, |
944 | const struct iovec *iov, loff_t offset, unsigned long nr_segs, | 951 | const struct iovec *iov, loff_t offset, unsigned long nr_segs, |
945 | unsigned blkbits, get_blocks_t get_blocks, dio_iodone_t end_io, | 952 | unsigned blkbits, get_block_t get_block, dio_iodone_t end_io, |
946 | struct dio *dio) | 953 | struct dio *dio) |
947 | { | 954 | { |
948 | unsigned long user_addr; | 955 | unsigned long user_addr; |
@@ -964,13 +971,14 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, | |||
964 | 971 | ||
965 | dio->boundary = 0; | 972 | dio->boundary = 0; |
966 | dio->reap_counter = 0; | 973 | dio->reap_counter = 0; |
967 | dio->get_blocks = get_blocks; | 974 | dio->get_block = get_block; |
968 | dio->end_io = end_io; | 975 | dio->end_io = end_io; |
969 | dio->map_bh.b_private = NULL; | 976 | dio->map_bh.b_private = NULL; |
970 | dio->final_block_in_bio = -1; | 977 | dio->final_block_in_bio = -1; |
971 | dio->next_block_for_io = -1; | 978 | dio->next_block_for_io = -1; |
972 | 979 | ||
973 | dio->page_errors = 0; | 980 | dio->page_errors = 0; |
981 | dio->io_error = 0; | ||
974 | dio->result = 0; | 982 | dio->result = 0; |
975 | dio->iocb = iocb; | 983 | dio->iocb = iocb; |
976 | dio->i_size = i_size_read(inode); | 984 | dio->i_size = i_size_read(inode); |
@@ -1171,7 +1179,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, | |||
1171 | ssize_t | 1179 | ssize_t |
1172 | __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, | 1180 | __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, |
1173 | struct block_device *bdev, const struct iovec *iov, loff_t offset, | 1181 | struct block_device *bdev, const struct iovec *iov, loff_t offset, |
1174 | unsigned long nr_segs, get_blocks_t get_blocks, dio_iodone_t end_io, | 1182 | unsigned long nr_segs, get_block_t get_block, dio_iodone_t end_io, |
1175 | int dio_lock_type) | 1183 | int dio_lock_type) |
1176 | { | 1184 | { |
1177 | int seg; | 1185 | int seg; |
@@ -1267,7 +1275,7 @@ __blockdev_direct_IO(int rw, struct kiocb *iocb, struct inode *inode, | |||
1267 | (end > i_size_read(inode))); | 1275 | (end > i_size_read(inode))); |
1268 | 1276 | ||
1269 | retval = direct_io_worker(rw, iocb, inode, iov, offset, | 1277 | retval = direct_io_worker(rw, iocb, inode, iov, offset, |
1270 | nr_segs, blkbits, get_blocks, end_io, dio); | 1278 | nr_segs, blkbits, get_block, end_io, dio); |
1271 | 1279 | ||
1272 | if (rw == READ && dio_lock_type == DIO_LOCKING) | 1280 | if (rw == READ && dio_lock_type == DIO_LOCKING) |
1273 | release_i_mutex = 0; | 1281 | release_i_mutex = 0; |