diff options
-rw-r--r-- | fs/direct-io.c | 140 |
1 files changed, 66 insertions, 74 deletions
diff --git a/fs/direct-io.c b/fs/direct-io.c index b296942ff7d5..bc1cbf9149f7 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c | |||
@@ -121,9 +121,8 @@ struct dio { | |||
121 | int page_errors; /* errno from get_user_pages() */ | 121 | int page_errors; /* errno from get_user_pages() */ |
122 | 122 | ||
123 | /* BIO completion state */ | 123 | /* BIO completion state */ |
124 | atomic_t refcount; /* direct_io_worker() and bios */ | ||
124 | spinlock_t bio_lock; /* protects BIO fields below */ | 125 | spinlock_t bio_lock; /* protects BIO fields below */ |
125 | int bio_count; /* nr bios to be completed */ | ||
126 | int bios_in_flight; /* nr bios in flight */ | ||
127 | struct bio *bio_list; /* singly linked via bi_private */ | 126 | struct bio *bio_list; /* singly linked via bi_private */ |
128 | struct task_struct *waiter; /* waiting task (NULL if none) */ | 127 | struct task_struct *waiter; /* waiting task (NULL if none) */ |
129 | 128 | ||
@@ -256,44 +255,27 @@ static int dio_complete(struct dio *dio, loff_t offset, int ret) | |||
256 | * Called when a BIO has been processed. If the count goes to zero then IO is | 255 | * Called when a BIO has been processed. If the count goes to zero then IO is |
257 | * complete and we can signal this to the AIO layer. | 256 | * complete and we can signal this to the AIO layer. |
258 | */ | 257 | */ |
259 | static void finished_one_bio(struct dio *dio) | 258 | static void dio_complete_aio(struct dio *dio) |
260 | { | 259 | { |
261 | unsigned long flags; | 260 | unsigned long flags; |
261 | int ret; | ||
262 | 262 | ||
263 | spin_lock_irqsave(&dio->bio_lock, flags); | 263 | ret = dio_complete(dio, dio->iocb->ki_pos, 0); |
264 | if (dio->bio_count == 1) { | ||
265 | if (dio->is_async) { | ||
266 | int ret; | ||
267 | |||
268 | /* | ||
269 | * Last reference to the dio is going away. | ||
270 | * Drop spinlock and complete the DIO. | ||
271 | */ | ||
272 | spin_unlock_irqrestore(&dio->bio_lock, flags); | ||
273 | |||
274 | ret = dio_complete(dio, dio->iocb->ki_pos, 0); | ||
275 | 264 | ||
276 | /* Complete AIO later if falling back to buffered i/o */ | 265 | /* Complete AIO later if falling back to buffered i/o */ |
277 | if (dio->result == dio->size || | 266 | if (dio->result == dio->size || |
278 | ((dio->rw == READ) && dio->result)) { | 267 | ((dio->rw == READ) && dio->result)) { |
279 | aio_complete(dio->iocb, ret, 0); | 268 | aio_complete(dio->iocb, ret, 0); |
280 | kfree(dio); | 269 | kfree(dio); |
281 | return; | 270 | } else { |
282 | } else { | 271 | /* |
283 | /* | 272 | * Falling back to buffered |
284 | * Falling back to buffered | 273 | */ |
285 | */ | 274 | spin_lock_irqsave(&dio->bio_lock, flags); |
286 | spin_lock_irqsave(&dio->bio_lock, flags); | 275 | if (dio->waiter) |
287 | dio->bio_count--; | 276 | wake_up_process(dio->waiter); |
288 | if (dio->waiter) | 277 | spin_unlock_irqrestore(&dio->bio_lock, flags); |
289 | wake_up_process(dio->waiter); | ||
290 | spin_unlock_irqrestore(&dio->bio_lock, flags); | ||
291 | return; | ||
292 | } | ||
293 | } | ||
294 | } | 278 | } |
295 | dio->bio_count--; | ||
296 | spin_unlock_irqrestore(&dio->bio_lock, flags); | ||
297 | } | 279 | } |
298 | 280 | ||
299 | static int dio_bio_complete(struct dio *dio, struct bio *bio); | 281 | static int dio_bio_complete(struct dio *dio, struct bio *bio); |
@@ -309,6 +291,10 @@ static int dio_bio_end_aio(struct bio *bio, unsigned int bytes_done, int error) | |||
309 | 291 | ||
310 | /* cleanup the bio */ | 292 | /* cleanup the bio */ |
311 | dio_bio_complete(dio, bio); | 293 | dio_bio_complete(dio, bio); |
294 | |||
295 | if (atomic_dec_and_test(&dio->refcount)) | ||
296 | dio_complete_aio(dio); | ||
297 | |||
312 | return 0; | 298 | return 0; |
313 | } | 299 | } |
314 | 300 | ||
@@ -330,8 +316,7 @@ static int dio_bio_end_io(struct bio *bio, unsigned int bytes_done, int error) | |||
330 | spin_lock_irqsave(&dio->bio_lock, flags); | 316 | spin_lock_irqsave(&dio->bio_lock, flags); |
331 | bio->bi_private = dio->bio_list; | 317 | bio->bi_private = dio->bio_list; |
332 | dio->bio_list = bio; | 318 | dio->bio_list = bio; |
333 | dio->bios_in_flight--; | 319 | if ((atomic_sub_return(1, &dio->refcount) == 1) && dio->waiter) |
334 | if (dio->waiter && dio->bios_in_flight == 0) | ||
335 | wake_up_process(dio->waiter); | 320 | wake_up_process(dio->waiter); |
336 | spin_unlock_irqrestore(&dio->bio_lock, flags); | 321 | spin_unlock_irqrestore(&dio->bio_lock, flags); |
337 | return 0; | 322 | return 0; |
@@ -362,17 +347,15 @@ dio_bio_alloc(struct dio *dio, struct block_device *bdev, | |||
362 | * In the AIO read case we speculatively dirty the pages before starting IO. | 347 | * In the AIO read case we speculatively dirty the pages before starting IO. |
363 | * During IO completion, any of these pages which happen to have been written | 348 | * During IO completion, any of these pages which happen to have been written |
364 | * back will be redirtied by bio_check_pages_dirty(). | 349 | * back will be redirtied by bio_check_pages_dirty(). |
350 | * | ||
351 | * bios hold a dio reference between submit_bio and ->end_io. | ||
365 | */ | 352 | */ |
366 | static void dio_bio_submit(struct dio *dio) | 353 | static void dio_bio_submit(struct dio *dio) |
367 | { | 354 | { |
368 | struct bio *bio = dio->bio; | 355 | struct bio *bio = dio->bio; |
369 | unsigned long flags; | ||
370 | 356 | ||
371 | bio->bi_private = dio; | 357 | bio->bi_private = dio; |
372 | spin_lock_irqsave(&dio->bio_lock, flags); | 358 | atomic_inc(&dio->refcount); |
373 | dio->bio_count++; | ||
374 | dio->bios_in_flight++; | ||
375 | spin_unlock_irqrestore(&dio->bio_lock, flags); | ||
376 | if (dio->is_async && dio->rw == READ) | 359 | if (dio->is_async && dio->rw == READ) |
377 | bio_set_pages_dirty(bio); | 360 | bio_set_pages_dirty(bio); |
378 | submit_bio(dio->rw, bio); | 361 | submit_bio(dio->rw, bio); |
@@ -390,18 +373,28 @@ static void dio_cleanup(struct dio *dio) | |||
390 | page_cache_release(dio_get_page(dio)); | 373 | page_cache_release(dio_get_page(dio)); |
391 | } | 374 | } |
392 | 375 | ||
376 | static int wait_for_more_bios(struct dio *dio) | ||
377 | { | ||
378 | assert_spin_locked(&dio->bio_lock); | ||
379 | |||
380 | return (atomic_read(&dio->refcount) > 1) && (dio->bio_list == NULL); | ||
381 | } | ||
382 | |||
393 | /* | 383 | /* |
394 | * Wait for the next BIO to complete. Remove it and return it. | 384 | * Wait for the next BIO to complete. Remove it and return it. NULL is |
385 | * returned once all BIOs have been completed. This must only be called once | ||
386 | * all bios have been issued so that dio->refcount can only decrease. This | ||
387 | * requires that that the caller hold a reference on the dio. | ||
395 | */ | 388 | */ |
396 | static struct bio *dio_await_one(struct dio *dio) | 389 | static struct bio *dio_await_one(struct dio *dio) |
397 | { | 390 | { |
398 | unsigned long flags; | 391 | unsigned long flags; |
399 | struct bio *bio; | 392 | struct bio *bio = NULL; |
400 | 393 | ||
401 | spin_lock_irqsave(&dio->bio_lock, flags); | 394 | spin_lock_irqsave(&dio->bio_lock, flags); |
402 | while (dio->bio_list == NULL) { | 395 | while (wait_for_more_bios(dio)) { |
403 | set_current_state(TASK_UNINTERRUPTIBLE); | 396 | set_current_state(TASK_UNINTERRUPTIBLE); |
404 | if (dio->bio_list == NULL) { | 397 | if (wait_for_more_bios(dio)) { |
405 | dio->waiter = current; | 398 | dio->waiter = current; |
406 | spin_unlock_irqrestore(&dio->bio_lock, flags); | 399 | spin_unlock_irqrestore(&dio->bio_lock, flags); |
407 | io_schedule(); | 400 | io_schedule(); |
@@ -410,8 +403,10 @@ static struct bio *dio_await_one(struct dio *dio) | |||
410 | } | 403 | } |
411 | set_current_state(TASK_RUNNING); | 404 | set_current_state(TASK_RUNNING); |
412 | } | 405 | } |
413 | bio = dio->bio_list; | 406 | if (dio->bio_list) { |
414 | dio->bio_list = bio->bi_private; | 407 | bio = dio->bio_list; |
408 | dio->bio_list = bio->bi_private; | ||
409 | } | ||
415 | spin_unlock_irqrestore(&dio->bio_lock, flags); | 410 | spin_unlock_irqrestore(&dio->bio_lock, flags); |
416 | return bio; | 411 | return bio; |
417 | } | 412 | } |
@@ -440,25 +435,24 @@ static int dio_bio_complete(struct dio *dio, struct bio *bio) | |||
440 | } | 435 | } |
441 | bio_put(bio); | 436 | bio_put(bio); |
442 | } | 437 | } |
443 | finished_one_bio(dio); | ||
444 | return uptodate ? 0 : -EIO; | 438 | return uptodate ? 0 : -EIO; |
445 | } | 439 | } |
446 | 440 | ||
447 | /* | 441 | /* |
448 | * Wait on and process all in-flight BIOs. | 442 | * Wait on and process all in-flight BIOs. This must only be called once |
443 | * all bios have been issued so that the refcount can only decrease. | ||
444 | * This just waits for all bios to make it through dio_bio_complete. IO | ||
445 | * errors are propogated through dio->io_error and should be propogated via | ||
446 | * dio_complete(). | ||
449 | */ | 447 | */ |
450 | static void dio_await_completion(struct dio *dio) | 448 | static void dio_await_completion(struct dio *dio) |
451 | { | 449 | { |
452 | /* | 450 | struct bio *bio; |
453 | * The bio_lock is not held for the read of bio_count. | 451 | do { |
454 | * This is ok since it is the dio_bio_complete() that changes | 452 | bio = dio_await_one(dio); |
455 | * bio_count. | 453 | if (bio) |
456 | */ | 454 | dio_bio_complete(dio, bio); |
457 | while (dio->bio_count) { | 455 | } while (bio); |
458 | struct bio *bio = dio_await_one(dio); | ||
459 | /* io errors are propogated through dio->io_error */ | ||
460 | dio_bio_complete(dio, bio); | ||
461 | } | ||
462 | } | 456 | } |
463 | 457 | ||
464 | /* | 458 | /* |
@@ -995,16 +989,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, | |||
995 | dio->iocb = iocb; | 989 | dio->iocb = iocb; |
996 | dio->i_size = i_size_read(inode); | 990 | dio->i_size = i_size_read(inode); |
997 | 991 | ||
998 | /* | 992 | atomic_set(&dio->refcount, 1); |
999 | * BIO completion state. | ||
1000 | * | ||
1001 | * ->bio_count starts out at one, and we decrement it to zero after all | ||
1002 | * BIOs are submitted. This to avoid the situation where a really fast | ||
1003 | * (or synchronous) device could take the count to zero while we're | ||
1004 | * still submitting BIOs. | ||
1005 | */ | ||
1006 | dio->bio_count = 1; | ||
1007 | dio->bios_in_flight = 0; | ||
1008 | spin_lock_init(&dio->bio_lock); | 993 | spin_lock_init(&dio->bio_lock); |
1009 | dio->bio_list = NULL; | 994 | dio->bio_list = NULL; |
1010 | dio->waiter = NULL; | 995 | dio->waiter = NULL; |
@@ -1111,7 +1096,11 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, | |||
1111 | } | 1096 | } |
1112 | if (ret == 0) | 1097 | if (ret == 0) |
1113 | ret = dio->result; | 1098 | ret = dio->result; |
1114 | finished_one_bio(dio); /* This can free the dio */ | 1099 | |
1100 | /* this can free the dio */ | ||
1101 | if (atomic_dec_and_test(&dio->refcount)) | ||
1102 | dio_complete_aio(dio); | ||
1103 | |||
1115 | if (should_wait) { | 1104 | if (should_wait) { |
1116 | unsigned long flags; | 1105 | unsigned long flags; |
1117 | /* | 1106 | /* |
@@ -1122,7 +1111,7 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, | |||
1122 | 1111 | ||
1123 | spin_lock_irqsave(&dio->bio_lock, flags); | 1112 | spin_lock_irqsave(&dio->bio_lock, flags); |
1124 | set_current_state(TASK_UNINTERRUPTIBLE); | 1113 | set_current_state(TASK_UNINTERRUPTIBLE); |
1125 | while (dio->bio_count) { | 1114 | while (atomic_read(&dio->refcount)) { |
1126 | spin_unlock_irqrestore(&dio->bio_lock, flags); | 1115 | spin_unlock_irqrestore(&dio->bio_lock, flags); |
1127 | io_schedule(); | 1116 | io_schedule(); |
1128 | spin_lock_irqsave(&dio->bio_lock, flags); | 1117 | spin_lock_irqsave(&dio->bio_lock, flags); |
@@ -1133,7 +1122,6 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, | |||
1133 | kfree(dio); | 1122 | kfree(dio); |
1134 | } | 1123 | } |
1135 | } else { | 1124 | } else { |
1136 | finished_one_bio(dio); | ||
1137 | dio_await_completion(dio); | 1125 | dio_await_completion(dio); |
1138 | 1126 | ||
1139 | ret = dio_complete(dio, offset, ret); | 1127 | ret = dio_complete(dio, offset, ret); |
@@ -1146,7 +1134,11 @@ direct_io_worker(int rw, struct kiocb *iocb, struct inode *inode, | |||
1146 | * i/o, we have to mark the the aio complete. | 1134 | * i/o, we have to mark the the aio complete. |
1147 | */ | 1135 | */ |
1148 | aio_complete(iocb, ret, 0); | 1136 | aio_complete(iocb, ret, 0); |
1149 | kfree(dio); | 1137 | |
1138 | if (atomic_dec_and_test(&dio->refcount)) | ||
1139 | kfree(dio); | ||
1140 | else | ||
1141 | BUG(); | ||
1150 | } | 1142 | } |
1151 | return ret; | 1143 | return ret; |
1152 | } | 1144 | } |