diff options
Diffstat (limited to 'fs/direct-io.c')
| -rw-r--r-- | fs/direct-io.c | 323 |
1 files changed, 144 insertions, 179 deletions
diff --git a/fs/direct-io.c b/fs/direct-io.c index 5981e17f46f0..d9d0833444f5 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c | |||
| @@ -27,6 +27,7 @@ | |||
| 27 | #include <linux/slab.h> | 27 | #include <linux/slab.h> |
| 28 | #include <linux/highmem.h> | 28 | #include <linux/highmem.h> |
| 29 | #include <linux/pagemap.h> | 29 | #include <linux/pagemap.h> |
| 30 | #include <linux/task_io_accounting_ops.h> | ||
| 30 | #include <linux/bio.h> | 31 | #include <linux/bio.h> |
| 31 | #include <linux/wait.h> | 32 | #include <linux/wait.h> |
| 32 | #include <linux/err.h> | 33 | #include <linux/err.h> |
| @@ -121,8 +122,7 @@ struct dio { | |||
| 121 | 122 | ||
| 122 | /* BIO completion state */ | 123 | /* BIO completion state */ |
| 123 | spinlock_t bio_lock; /* protects BIO fields below */ | 124 | spinlock_t bio_lock; /* protects BIO fields below */ |
| 124 | int bio_count; /* nr bios to be completed */ | 125 | unsigned long refcount; /* direct_io_worker() and bios */ |
| 125 | int bios_in_flight; /* nr bios in flight */ | ||
| 126 | struct bio *bio_list; /* singly linked via bi_private */ | 126 | struct bio *bio_list; /* singly linked via bi_private */ |
| 127 | struct task_struct *waiter; /* waiting task (NULL if none) */ | 127 | struct task_struct *waiter; /* waiting task (NULL if none) */ |
| 128 | 128 | ||
| @@ -209,76 +209,55 @@ static struct page *dio_get_page(struct dio *dio) | |||
| 209 | return dio->pages[dio->head++]; | 209 | return dio->pages[dio->head++]; |
| 210 | } | 210 | } |
| 211 | 211 | ||
| 212 | /* | 212 | /** |
| 213 | * Called when all DIO BIO I/O has been completed - let the filesystem | 213 | * dio_complete() - called when all DIO BIO I/O has been completed |
| 214 | * know, if it registered an interest earlier via get_block. Pass the | 214 | * @offset: the byte offset in the file of the completed operation |
| 215 | * private field of the map buffer_head so that filesystems can use it | 215 | * |
| 216 | * to hold additional state between get_block calls and dio_complete. | 216 | * This releases locks as dictated by the locking type, lets interested parties |
| 217 | */ | 217 | * know that a DIO operation has completed, and calculates the resulting return |
| 218 | static void dio_complete(struct dio *dio, loff_t offset, ssize_t bytes) | 218 | * code for the operation. |
| 219 | { | 219 | * |
| 220 | if (dio->end_io && dio->result) | 220 | * It lets the filesystem know if it registered an interest earlier via |
| 221 | dio->end_io(dio->iocb, offset, bytes, dio->map_bh.b_private); | 221 | * get_block. Pass the private field of the map buffer_head so that |
| 222 | if (dio->lock_type == DIO_LOCKING) | 222 | * filesystems can use it to hold additional state between get_block calls and |
| 223 | /* lockdep: non-owner release */ | 223 | * dio_complete. |
| 224 | up_read_non_owner(&dio->inode->i_alloc_sem); | ||
| 225 | } | ||
| 226 | |||
| 227 | /* | ||
| 228 | * Called when a BIO has been processed. If the count goes to zero then IO is | ||
| 229 | * complete and we can signal this to the AIO layer. | ||
| 230 | */ | 224 | */ |
| 231 | static void finished_one_bio(struct dio *dio) | 225 | static int dio_complete(struct dio *dio, loff_t offset, int ret) |
| 232 | { | 226 | { |
| 233 | unsigned long flags; | 227 | ssize_t transferred = 0; |
| 234 | 228 | ||
| 235 | spin_lock_irqsave(&dio->bio_lock, flags); | 229 | /* |
| 236 | if (dio->bio_count == 1) { | 230 | * AIO submission can race with bio completion to get here while |
| 237 | if (dio->is_async) { | 231 | * expecting to have the last io completed by bio completion. |
| 238 | ssize_t transferred; | 232 | * In that case -EIOCBQUEUED is in fact not an error we want |
| 239 | loff_t offset; | 233 | * to preserve through this call. |
| 240 | 234 | */ | |
| 241 | /* | 235 | if (ret == -EIOCBQUEUED) |
| 242 | * Last reference to the dio is going away. | 236 | ret = 0; |
| 243 | * Drop spinlock and complete the DIO. | ||
| 244 | */ | ||
| 245 | spin_unlock_irqrestore(&dio->bio_lock, flags); | ||
| 246 | 237 | ||
| 247 | /* Check for short read case */ | 238 | if (dio->result) { |
| 248 | transferred = dio->result; | 239 | transferred = dio->result; |
| 249 | offset = dio->iocb->ki_pos; | ||
| 250 | 240 | ||
| 251 | if ((dio->rw == READ) && | 241 | /* Check for short read case */ |
| 252 | ((offset + transferred) > dio->i_size)) | 242 | if ((dio->rw == READ) && ((offset + transferred) > dio->i_size)) |
| 253 | transferred = dio->i_size - offset; | 243 | transferred = dio->i_size - offset; |
| 244 | } | ||
| 254 | 245 | ||
| 255 | /* check for error in completion path */ | 246 | if (dio->end_io && dio->result) |
| 256 | if (dio->io_error) | 247 | dio->end_io(dio->iocb, offset, transferred, |
| 257 | transferred = dio->io_error; | 248 | dio->map_bh.b_private); |
| 249 | if (dio->lock_type == DIO_LOCKING) | ||
| 250 | /* lockdep: non-owner release */ | ||
| 251 | up_read_non_owner(&dio->inode->i_alloc_sem); | ||
| 258 | 252 | ||
| 259 | dio_complete(dio, offset, transferred); | 253 | if (ret == 0) |
| 254 | ret = dio->page_errors; | ||
| 255 | if (ret == 0) | ||
| 256 | ret = dio->io_error; | ||
| 257 | if (ret == 0) | ||
| 258 | ret = transferred; | ||
| 260 | 259 | ||
| 261 | /* Complete AIO later if falling back to buffered i/o */ | 260 | return ret; |
| 262 | if (dio->result == dio->size || | ||
| 263 | ((dio->rw == READ) && dio->result)) { | ||
| 264 | aio_complete(dio->iocb, transferred, 0); | ||
| 265 | kfree(dio); | ||
| 266 | return; | ||
| 267 | } else { | ||
| 268 | /* | ||
| 269 | * Falling back to buffered | ||
| 270 | */ | ||
| 271 | spin_lock_irqsave(&dio->bio_lock, flags); | ||
| 272 | dio->bio_count--; | ||
| 273 | if (dio->waiter) | ||
| 274 | wake_up_process(dio->waiter); | ||
| 275 | spin_unlock_irqrestore(&dio->bio_lock, flags); | ||
| 276 | return; | ||
| 277 | } | ||
| 278 | } | ||
| 279 | } | ||
| 280 | dio->bio_count--; | ||
| 281 | spin_unlock_irqrestore(&dio->bio_lock, flags); | ||
| 282 | } | 261 | } |
| 283 | 262 | ||
| 284 | static int dio_bio_complete(struct dio *dio, struct bio *bio); | 263 | static int dio_bio_complete(struct dio *dio, struct bio *bio); |
| @@ -288,12 +267,27 @@ static int dio_bio_complete(struct dio *dio, struct bio *bio); | |||
| 288 | static int dio_bio_end_aio(struct bio *bio, unsigned int bytes_done, int error) | 267 | static int dio_bio_end_aio(struct bio *bio, unsigned int bytes_done, int error) |
| 289 | { | 268 | { |
| 290 | struct dio *dio = bio->bi_private; | 269 | struct dio *dio = bio->bi_private; |
| 270 | unsigned long remaining; | ||
| 271 | unsigned long flags; | ||
| 291 | 272 | ||
| 292 | if (bio->bi_size) | 273 | if (bio->bi_size) |
| 293 | return 1; | 274 | return 1; |
| 294 | 275 | ||
| 295 | /* cleanup the bio */ | 276 | /* cleanup the bio */ |
| 296 | dio_bio_complete(dio, bio); | 277 | dio_bio_complete(dio, bio); |
| 278 | |||
| 279 | spin_lock_irqsave(&dio->bio_lock, flags); | ||
| 280 | remaining = --dio->refcount; | ||
| 281 | if (remaining == 1 && dio->waiter) | ||
| 282 | wake_up_process(dio->waiter); | ||
| 283 | spin_unlock_irqrestore(&dio->bio_lock, flags); | ||
| 284 | |||
| 285 | if (remaining == 0) { | ||
| 286 | int ret = dio_complete(dio, dio->iocb->ki_pos, 0); | ||
| 287 | aio_complete(dio->iocb, ret, 0); | ||
| 288 | kfree(dio); | ||
| 289 | } | ||
| 290 | |||
| 297 | return 0; | 291 | return 0; |
| 298 | } | 292 | } |
| 299 | 293 | ||
| @@ -315,8 +309,7 @@ static int dio_bio_end_io(struct bio *bio, unsigned int bytes_done, int error) | |||
| 315 | spin_lock_irqsave(&dio->bio_lock, flags); | 309 | spin_lock_irqsave(&dio->bio_lock, flags); |
| 316 | bio->bi_private = dio->bio_list; | 310 | bio->bi_private = dio->bio_list; |
| 317 | dio->bio_list = bio; | 311 | dio->bio_list = bio; |
| 318 | dio->bios_in_flight--; | 312 | if (--dio->refcount == 1 && dio->waiter) |
| 319 | if (dio->waiter && dio->bios_in_flight == 0) | ||
| 320 | wake_up_process(dio->waiter); | 313 | wake_up_process(dio->waiter); |
| 321 | spin_unlock_irqrestore(&dio->bio_lock, flags); | 314 | spin_unlock_irqrestore(&dio->bio_lock, flags); |
| 322 | return 0; | 315 | return 0; |
| @@ -347,6 +340,8 @@ dio_bio_alloc(struct dio *dio, struct block_device *bdev, | |||
| 347 | * In the AIO read case we speculatively dirty the pages before starting IO. | 340 | * In the AIO read case we speculatively dirty the pages before starting IO. |
| 348 | * During IO completion, any of these pages which happen to have been written | 341 | * During IO completion, any of these pages which happen to have been written |
| 349 | * back will be redirtied by bio_check_pages_dirty(). | 342 | * back will be redirtied by bio_check_pages_dirty(). |
| 343 | * | ||
| 344 | * bios hold a dio reference between submit_bio and ->end_io. | ||
| 350 | */ | 345 | */ |
| 351 | static void dio_bio_submit(struct dio *dio) | 346 | static void dio_bio_submit(struct dio *dio) |
| 352 | { | 347 | { |
| @@ -354,12 +349,14 @@ static void dio_bio_submit(struct dio *dio) | |||
| 354 | unsigned long flags; | 349 | unsigned long flags; |
| 355 | 350 | ||
| 356 | bio->bi_private = dio; | 351 | bio->bi_private = dio; |
| 352 | |||
| 357 | spin_lock_irqsave(&dio->bio_lock, flags); | 353 | spin_lock_irqsave(&dio->bio_lock, flags); |
| 358 | dio->bio_count++; | 354 | dio->refcount++; |
| 359 | dio->bios_in_flight++; | ||
| 360 | spin_unlock_irqrestore(&dio->bio_lock, flags); | 355 | spin_unlock_irqrestore(&dio->bio_lock, flags); |
| 356 | |||
| 361 | if (dio->is_async && dio->rw == READ) | 357 | if (dio->is_async && dio->rw == READ) |
| 362 | bio_set_pages_dirty(bio); | 358 | bio_set_pages_dirty(bio); |
| 359 | |||
| 363 | submit_bio(dio->rw, bio); | 360 | submit_bio(dio->rw, bio); |
| 364 | 361 | ||
| 365 | dio->bio = NULL; | 362 | dio->bio = NULL; |
| @@ -376,28 +373,37 @@ static void dio_cleanup(struct dio *dio) | |||
| 376 | } | 373 | } |
| 377 | 374 | ||
| 378 | /* | 375 | /* |
| 379 | * Wait for the next BIO to complete. Remove it and return it. | 376 | * Wait for the next BIO to complete. Remove it and return it. NULL is |
| 377 | * returned once all BIOs have been completed. This must only be called once | ||
| 378 | * all bios have been issued so that dio->refcount can only decrease. This | ||
| 379 | * requires that that the caller hold a reference on the dio. | ||
| 380 | */ | 380 | */ |
| 381 | static struct bio *dio_await_one(struct dio *dio) | 381 | static struct bio *dio_await_one(struct dio *dio) |
| 382 | { | 382 | { |
| 383 | unsigned long flags; | 383 | unsigned long flags; |
| 384 | struct bio *bio; | 384 | struct bio *bio = NULL; |
| 385 | 385 | ||
| 386 | spin_lock_irqsave(&dio->bio_lock, flags); | 386 | spin_lock_irqsave(&dio->bio_lock, flags); |
| 387 | while (dio->bio_list == NULL) { | 387 | |
| 388 | set_current_state(TASK_UNINTERRUPTIBLE); | 388 | /* |
| 389 | if (dio->bio_list == NULL) { | 389 | * Wait as long as the list is empty and there are bios in flight. bio |
| 390 | dio->waiter = current; | 390 | * completion drops the count, maybe adds to the list, and wakes while |
| 391 | spin_unlock_irqrestore(&dio->bio_lock, flags); | 391 | * holding the bio_lock so we don't need set_current_state()'s barrier |
| 392 | blk_run_address_space(dio->inode->i_mapping); | 392 | * and can call it after testing our condition. |
| 393 | io_schedule(); | 393 | */ |
| 394 | spin_lock_irqsave(&dio->bio_lock, flags); | 394 | while (dio->refcount > 1 && dio->bio_list == NULL) { |
| 395 | dio->waiter = NULL; | 395 | __set_current_state(TASK_UNINTERRUPTIBLE); |
| 396 | } | 396 | dio->waiter = current; |
| 397 | set_current_state(TASK_RUNNING); | 397 | spin_unlock_irqrestore(&dio->bio_lock, flags); |
| 398 | io_schedule(); | ||
| 399 | /* wake up sets us TASK_RUNNING */ | ||
| 400 | spin_lock_irqsave(&dio->bio_lock, flags); | ||
| 401 | dio->waiter = NULL; | ||
| 402 | } | ||
| 403 | if (dio->bio_list) { | ||
| 404 | bio = dio->bio_list; | ||
| 405 | dio->bio_list = bio->bi_private; | ||
| 398 | } | 406 | } |
| 399 | bio = dio->bio_list; | ||
| 400 | dio->bio_list = bio->bi_private; | ||
| 401 | spin_unlock_irqrestore(&dio->bio_lock, flags); | 407 | spin_unlock_irqrestore(&dio->bio_lock, flags); |
| 402 | return bio; | 408 | return bio; |
| 403 | } | 409 | } |
| @@ -426,34 +432,24 @@ static int dio_bio_complete(struct dio *dio, struct bio *bio) | |||
| 426 | } | 432 | } |
| 427 | bio_put(bio); | 433 | bio_put(bio); |
| 428 | } | 434 | } |
| 429 | finished_one_bio(dio); | ||
| 430 | return uptodate ? 0 : -EIO; | 435 | return uptodate ? 0 : -EIO; |
| 431 | } | 436 | } |
| 432 | 437 | ||
| 433 | /* | 438 | /* |
| 434 | * Wait on and process all in-flight BIOs. | 439 | * Wait on and process all in-flight BIOs. This must only be called once |
| 440 | * all bios have been issued so that the refcount can only decrease. | ||
| 441 | * This just waits for all bios to make it through dio_bio_complete. IO | ||
| 442 | * errors are propogated through dio->io_error and should be propogated via | ||
| 443 | * dio_complete(). | ||
| 435 | */ | 444 | */ |
| 436 | static int dio_await_completion(struct dio *dio) | 445 | static void dio_await_completion(struct dio *dio) |
| 437 | { | 446 | { |
| 438 | int ret = 0; | 447 | struct bio *bio; |
| 439 | 448 | do { | |
| 440 | if (dio->bio) | 449 | bio = dio_await_one(dio); |
| 441 | dio_bio_submit(dio); | 450 | if (bio) |
| 442 | 451 | dio_bio_complete(dio, bio); | |
| 443 | /* | 452 | } while (bio); |
| 444 | * The bio_lock is not held for the read of bio_count. | ||
| 445 | * This is ok since it is the dio_bio_complete() that changes | ||
| 446 | * bio_count. | ||
| 447 | */ | ||
| 448 | while (dio->bio_count) { | ||
| 449 | struct bio *bio = dio_await_one(dio); | ||
| 450 | int ret2; | ||
| 451 | |||
| 452 | ret2 = dio_bio_complete(dio, bio); | ||
| 453 | if (ret == 0) | ||
| 454 | ret = ret2; | ||
| 455 | } | ||
| 456 | return ret; | ||
| 457 | } | 453 | } |
| 458 | 454 | ||
| 459 | /* | 455 | /* |
| @@ -675,6 +671,13 @@ submit_page_section(struct dio *dio, struct page *page, | |||
| 675 | { | 671 | { |
| 676 | int ret = 0; | 672 | int ret = 0; |
| 677 | 673 | ||
| 674 | if (dio->rw & WRITE) { | ||
| 675 | /* | ||
| 676 | * Read accounting is performed in submit_bio() | ||
| 677 | */ | ||
| 678 | task_io_account_write(len); | ||
| 679 | } | ||
| 680 | |||
| 678 | /* | 681 | /* |
| 679 | * Can we just grow the current page's presence in the dio? | 682 | * Can we just grow the current page's presence in the dio? |
| 680 | */ | 683 | */ |
| @@ -953,6 +956,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, | |||
| 953 | struct dio *dio) | 956 | struct dio *dio) |
| 954 | { | 957 | { |
| 955 | unsigned long user_addr; | 958 | unsigned long user_addr; |
| 959 | unsigned long flags; | ||
| 956 | int seg; | 960 | int seg; |
| 957 | ssize_t ret = 0; | 961 | ssize_t ret = 0; |
| 958 | ssize_t ret2; | 962 | ssize_t ret2; |
| @@ -983,17 +987,8 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, | |||
| 983 | dio->iocb = iocb; | 987 | dio->iocb = iocb; |
| 984 | dio->i_size = i_size_read(inode); | 988 | dio->i_size = i_size_read(inode); |
| 985 | 989 | ||
| 986 | /* | ||
| 987 | * BIO completion state. | ||
| 988 | * | ||
| 989 | * ->bio_count starts out at one, and we decrement it to zero after all | ||
| 990 | * BIOs are submitted. This to avoid the situation where a really fast | ||
| 991 | * (or synchronous) device could take the count to zero while we're | ||
| 992 | * still submitting BIOs. | ||
| 993 | */ | ||
| 994 | dio->bio_count = 1; | ||
| 995 | dio->bios_in_flight = 0; | ||
| 996 | spin_lock_init(&dio->bio_lock); | 990 | spin_lock_init(&dio->bio_lock); |
| 991 | dio->refcount = 1; | ||
| 997 | dio->bio_list = NULL; | 992 | dio->bio_list = NULL; |
| 998 | dio->waiter = NULL; | 993 | dio->waiter = NULL; |
| 999 | 994 | ||
| @@ -1069,6 +1064,9 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, | |||
| 1069 | if (dio->bio) | 1064 | if (dio->bio) |
| 1070 | dio_bio_submit(dio); | 1065 | dio_bio_submit(dio); |
| 1071 | 1066 | ||
| 1067 | /* All IO is now issued, send it on its way */ | ||
| 1068 | blk_run_address_space(inode->i_mapping); | ||
| 1069 | |||
| 1072 | /* | 1070 | /* |
| 1073 | * It is possible that, we return short IO due to end of file. | 1071 | * It is possible that, we return short IO due to end of file. |
| 1074 | * In that case, we need to release all the pages we got hold on. | 1072 | * In that case, we need to release all the pages we got hold on. |
| @@ -1084,74 +1082,41 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, | |||
| 1084 | mutex_unlock(&dio->inode->i_mutex); | 1082 | mutex_unlock(&dio->inode->i_mutex); |
| 1085 | 1083 | ||
| 1086 | /* | 1084 | /* |
| 1087 | * OK, all BIOs are submitted, so we can decrement bio_count to truly | 1085 | * The only time we want to leave bios in flight is when a successful |
| 1088 | * reflect the number of to-be-processed BIOs. | 1086 | * partial aio read or full aio write have been setup. In that case |
| 1087 | * bio completion will call aio_complete. The only time it's safe to | ||
| 1088 | * call aio_complete is when we return -EIOCBQUEUED, so we key on that. | ||
| 1089 | * This had *better* be the only place that raises -EIOCBQUEUED. | ||
| 1089 | */ | 1090 | */ |
| 1090 | if (dio->is_async) { | 1091 | BUG_ON(ret == -EIOCBQUEUED); |
| 1091 | int should_wait = 0; | 1092 | if (dio->is_async && ret == 0 && dio->result && |
| 1093 | ((rw & READ) || (dio->result == dio->size))) | ||
| 1094 | ret = -EIOCBQUEUED; | ||
| 1092 | 1095 | ||
| 1093 | if (dio->result < dio->size && (rw & WRITE)) { | 1096 | if (ret != -EIOCBQUEUED) |
| 1094 | dio->waiter = current; | 1097 | dio_await_completion(dio); |
| 1095 | should_wait = 1; | ||
| 1096 | } | ||
| 1097 | if (ret == 0) | ||
| 1098 | ret = dio->result; | ||
| 1099 | finished_one_bio(dio); /* This can free the dio */ | ||
| 1100 | blk_run_address_space(inode->i_mapping); | ||
| 1101 | if (should_wait) { | ||
| 1102 | unsigned long flags; | ||
| 1103 | /* | ||
| 1104 | * Wait for already issued I/O to drain out and | ||
| 1105 | * release its references to user-space pages | ||
| 1106 | * before returning to fallback on buffered I/O | ||
| 1107 | */ | ||
| 1108 | |||
| 1109 | spin_lock_irqsave(&dio->bio_lock, flags); | ||
| 1110 | set_current_state(TASK_UNINTERRUPTIBLE); | ||
| 1111 | while (dio->bio_count) { | ||
| 1112 | spin_unlock_irqrestore(&dio->bio_lock, flags); | ||
| 1113 | io_schedule(); | ||
| 1114 | spin_lock_irqsave(&dio->bio_lock, flags); | ||
| 1115 | set_current_state(TASK_UNINTERRUPTIBLE); | ||
| 1116 | } | ||
| 1117 | spin_unlock_irqrestore(&dio->bio_lock, flags); | ||
| 1118 | set_current_state(TASK_RUNNING); | ||
| 1119 | kfree(dio); | ||
| 1120 | } | ||
| 1121 | } else { | ||
| 1122 | ssize_t transferred = 0; | ||
| 1123 | |||
| 1124 | finished_one_bio(dio); | ||
| 1125 | ret2 = dio_await_completion(dio); | ||
| 1126 | if (ret == 0) | ||
| 1127 | ret = ret2; | ||
| 1128 | if (ret == 0) | ||
| 1129 | ret = dio->page_errors; | ||
| 1130 | if (dio->result) { | ||
| 1131 | loff_t i_size = i_size_read(inode); | ||
| 1132 | |||
| 1133 | transferred = dio->result; | ||
| 1134 | /* | ||
| 1135 | * Adjust the return value if the read crossed a | ||
| 1136 | * non-block-aligned EOF. | ||
| 1137 | */ | ||
| 1138 | if (rw == READ && (offset + transferred > i_size)) | ||
| 1139 | transferred = i_size - offset; | ||
| 1140 | } | ||
| 1141 | dio_complete(dio, offset, transferred); | ||
| 1142 | if (ret == 0) | ||
| 1143 | ret = transferred; | ||
| 1144 | 1098 | ||
| 1145 | /* We could have also come here on an AIO file extend */ | 1099 | /* |
| 1146 | if (!is_sync_kiocb(iocb) && (rw & WRITE) && | 1100 | * Sync will always be dropping the final ref and completing the |
| 1147 | ret >= 0 && dio->result == dio->size) | 1101 | * operation. AIO can if it was a broken operation described above or |
| 1148 | /* | 1102 | * in fact if all the bios race to complete before we get here. In |
| 1149 | * For AIO writes where we have completed the | 1103 | * that case dio_complete() translates the EIOCBQUEUED into the proper |
| 1150 | * i/o, we have to mark the the aio complete. | 1104 | * return code that the caller will hand to aio_complete(). |
| 1151 | */ | 1105 | * |
| 1152 | aio_complete(iocb, ret, 0); | 1106 | * This is managed by the bio_lock instead of being an atomic_t so that |
| 1107 | * completion paths can drop their ref and use the remaining count to | ||
| 1108 | * decide to wake the submission path atomically. | ||
| 1109 | */ | ||
| 1110 | spin_lock_irqsave(&dio->bio_lock, flags); | ||
| 1111 | ret2 = --dio->refcount; | ||
| 1112 | spin_unlock_irqrestore(&dio->bio_lock, flags); | ||
| 1113 | BUG_ON(!dio->is_async && ret2 != 0); | ||
| 1114 | if (ret2 == 0) { | ||
| 1115 | ret = dio_complete(dio, offset, ret); | ||
| 1153 | kfree(dio); | 1116 | kfree(dio); |
| 1154 | } | 1117 | } else |
| 1118 | BUG_ON(ret != -EIOCBQUEUED); | ||
| 1119 | |||
| 1155 | return ret; | 1120 | return ret; |
| 1156 | } | 1121 | } |
| 1157 | 1122 | ||
